diff --git a/modules/statistics/bin/loganalyzer.php b/modules/statistics/bin/loganalyzer.php index 0995f789156f10b4d18e7f9629402bc8923f3620..00d911b2800e0358c6614f899a0f04f946ef1eee 100755 --- a/modules/statistics/bin/loganalyzer.php +++ b/modules/statistics/bin/loganalyzer.php @@ -2,19 +2,54 @@ <?php require_once( dirname(dirname(dirname(dirname(__FILE__)))) . '/www/_include.php'); -require_once('../extlibs/loganalyzer.php'); -echo 'Statistics directory : ' . $statdir . "\n"; -echo 'Input file : ' . $inputfile . "\n"; -echo 'Offset : ' . $offset . "\n"; +$progName = array_shift($argv); +$debug = FALSE; +$dryrun = FALSE; +foreach($argv as $a) { + if(strlen($a) === 0) continue; + + if(strpos($a, '=') !== FALSE) { + $p = strpos($a, '='); + $v = substr($a, $p + 1); + $a = substr($a, 0, $p); + } else { + $v = NULL; + } + + /* Map short options to long options. */ + $shortOptMap = array( + '-d' => '--debug', + ); + if(array_key_exists($a, $shortOptMap)) $a = $shortOptMap[$a]; + + switch($a) { + case '--help': + printHelp(); + exit(0); + case '--debug': + $debug = TRUE; + break; + case '--dry-run': + $dryrun = TRUE; + break; + default: + echo('Unknown option: ' . $a . "\n"); + echo('Please run `' . $progName . ' --help` for usage information.' . "\n"); + exit(1); + } +} + +$aggregator = new sspmod_statistics_Aggregator(); +$aggregator->dumpConfig(); +$results = $aggregator->aggregate($debug); + +if (!$dryrun) { + $aggregator->store($results); +} -// foreach ($results AS $rulename => $ruleresults) { -// foreach ($ruleresults AS $fileno => $fileres) { -// file_put_contents($statdir . $rulename . '-' . $fileno . '.stat', serialize($fileres) ); -// } -// } foreach ($results AS $slot => $val) { foreach ($val AS $sp => $no) { @@ -23,8 +58,24 @@ foreach ($results AS $slot => $val) { echo "\n"; } -echo "Results:\n"; -#print_r($results); -?> \ No newline at end of file + +/** + * This function prints the help output. + */ +function printHelp() { + global $progName; + + /* '======================================================================' */ + echo('Usage: ' . $progName . ' [options] + +This program parses and aggregates SimpleSAMLphp log files. + +Options: + -d, --debug Used when configuring the log file syntax. See doc. + -dry-run Aggregate but do not store the results. + +'); +} + diff --git a/modules/statistics/docs/statistics.txt b/modules/statistics/docs/statistics.txt index 71f1d2489cfd3a7b9759ba7a6ef18377854213ff..90af693175e8337823a0b278a4f055089e372cba 100644 --- a/modules/statistics/docs/statistics.txt +++ b/modules/statistics/docs/statistics.txt @@ -37,9 +37,7 @@ With content that looks like this: Nov 15 13:16:39 www1 simplesamlphp-openwiki[2125]: 5 STAT [3493d5d87f] saml20-sp-SSO urn:mace:feide.no:services:no.feide.openwikicore sam.feide.no NA Nov 15 13:37:27 www1 simplesamlphp-foodle[3146]: 5 STAT [77013b4b6e] AUTH-login-admin OK -Here you can see that I collect statistics in one file for several -installations. You could easily separate each instance of -simpleSAMLphp into separate files (your preference). +Here you can see that I collect statistics in one file for several installations. You could easily separate each instance of simpleSAMLphp into separate files (your preference). ## Configure the statistics module @@ -57,18 +55,55 @@ Configure the path of the log file: 'inputfile' => '/tmp/stat', -and the syntax of the log file. You need to configure where the -date starts (usually 0), and how long the date is (usually 15), and -where the first columns starts (such that STAT is in column 3): +Make sure the stat dir is writable. SimpleSAMLphp will write data here: - 'datestart' => 1, + 'statdir' => '/tmp/stats/', + +### Configuring the syntax of the logfile + +Syslog uses different date formats on different environments, so you need to do some manual tweaking to make sure that simpleSAMLphp knows how to interpret the logs. + +There is three parameter values you need to make sure is correct. + + 'datestart' => 1, 'datelength' => 15, 'offsetspan' => 21, -Make sure the stat dir is writable. SimpleSAMLphp will write data -here: +The first `datestart` is 1 when the date start from the begginning of the line. The `datelength` parameter tells how many character long the date is. + +The `offsetspan` parameter shows on which character the first column starts, such that the STAT keyword becomes in column number 3. + +Use the `loganalyzer` script with the `--debug` parameter to debug whether your configuration is correct. Then it easy to see what is wrong. In example if the STAT keyword is not in column 3. + +Here is example output: + + + $ cd modules/statistics/bin + $ ./loganalyzer.php --debug + Statistics directory : /tmp/stats/ + Input file : /Users/andreas/Desktop/simplesamlphp.log + Offset : 4237200 + ---------------------------------------- + Log line: Feb 11 11:32:57 moria-app1 syslog_moria-app1[6630]: 5 STAT [2d41ee3f1e] AUTH-login-admin Failed + Date parse [Feb 11 11:32:57] to [Wed, 11 Feb 09 11:32:57 +0100] + Array + ( + [0] => moria-app1 + [1] => syslog_moria-app1[6630]: + [2] => 5 + [3] => STAT + [4] => [2d41ee3f1e] + [5] => AUTH-login-admin + [6] => Failed + ) + +In the debug output, please verify four things: + + 1. That the first field in the date parse line contains all the characters that are part of the timestamp, compare with the log line on the line above. + 2. Verify that the second field in the date parse line is correct - corresponding to the input timestamp. + 3. That the first `[0]` field contains all the characters from the first column. + 4. That column `[3]` is STAT. - 'statdir' => '/tmp/stats/', ### Setup cron @@ -84,10 +119,7 @@ script`statistics/bin/loganalyzer.php`. ## Presentation of the statistics -At the installation page there will be a link "show statitics", go -there and if simpleSAMLphp finds the statistics files in the -`statdir` generated from cron or the script you will see -statistics. Enjoy. +At the installation page there will be a link "show statitics", go there and if simpleSAMLphp finds the statistics files in the `statdir` generated from cron or the script you will see statistics. Enjoy. Support ------- diff --git a/modules/statistics/extlibs/loganalyzer.php b/modules/statistics/extlibs/loganalyzer.php deleted file mode 100755 index fce6b9012033f880de9bd8b83fbe3094085c4eb3..0000000000000000000000000000000000000000 --- a/modules/statistics/extlibs/loganalyzer.php +++ /dev/null @@ -1,83 +0,0 @@ -<?php - -$config = SimpleSAML_Configuration::getInstance(); -$statconfig = $config->copyFromBase('statconfig', 'statistics.php'); - -$statdir = $statconfig->getValue('statdir'); -$inputfile = $statconfig->getValue('inputfile'); -$statrules = $statconfig->getValue('statrules'); -$offset = $statconfig->getValue('offset', 0); - -if (!is_dir($statdir)) - throw new Exception('Statistics module: output dir do not exists [' . $statdir . ']'); - -if (!file_exists($inputfile)) - throw new Exception('Statistics module: input file do not exists [' . $inputfile . ']'); - - -$file = fopen($inputfile, 'r'); -$logfile = file($inputfile, FILE_IGNORE_NEW_LINES ); - - -$logparser = new sspmod_statistics_LogParser( - $statconfig->getValue('datestart', 0), $statconfig->getValue('datelength', 15), $statconfig->getValue('offsetspan', 44) -); -$datehandler = new sspmod_statistics_DateHandler($statconfig->getValue('offset', 0)); - -$results = array(); - -// Parse through log file, line by line -foreach ($logfile AS $logline) { - - // Continue if STAT is not found on line. - if (!preg_match('/STAT/', $logline)) continue; - - // Parse log, and extract epoch time and rest of content. - $epoch = $logparser->parseEpoch($logline); - $content = $logparser->parseContent($logline); - $action = $content[4]; - - - // Iterate all the statrules from config. - foreach ($statrules AS $rulename => $rule) { - - // echo 'Comparing action: [' . $rule['action'] . '] with [' . $action . ']'; - - $timeslot = $datehandler->toSlot($epoch, $rule['slot']); - $fileslot = $datehandler->toSlot($epoch, $rule['fileslot']); //print_r($content); - if (isset($rule['action']) && ($action !== $rule['action'])) continue; - - $difcol = $content[$rule['col']]; // echo '[...' . $difcol . '...]'; - - if (!isset($results[$rulename][$fileslot][$timeslot]['_'])) $results[$rulename][$fileslot][$timeslot]['_'] = 0; - if (!isset($results[$rulename][$fileslot][$timeslot][$difcol])) $results[$rulename][$fileslot][$timeslot][$difcol] = 0; - - $results[$rulename][$fileslot][$timeslot]['_']++; - $results[$rulename][$fileslot][$timeslot][$difcol]++; - } -} - -// Iterate the first level of results, which is per rule, as defined in the config. -foreach ($results AS $rulename => $ruleresults) { - - // Iterate the second level of results, which is the fileslot. - foreach ($ruleresults AS $fileno => $fileres) { - - $slotlist = array_keys($fileres); - - // Get start and end slot number within the file, based on the fileslot. - $start = $datehandler->toSlot($datehandler->fromSlot($fileno, $statrules[$rulename]['fileslot']), $statrules[$rulename]['slot']); - $end = $datehandler->toSlot($datehandler->fromSlot($fileno+1, $statrules[$rulename]['fileslot']), $statrules[$rulename]['slot']); - - // Fill in missing entries and sort file results - $filledresult = array(); - for ($slot = $start; $slot < $end; $slot++) { - $filledresult[$slot] = (isset($fileres[$slot])) ? $fileres[$slot] : array('_' => 0); - } - - // store file - file_put_contents($statdir . '/' . $rulename . '-' . $fileno . '.stat', serialize($filledresult), LOCK_EX ); - } -} - -?> \ No newline at end of file diff --git a/modules/statistics/hooks/hook_cron.php b/modules/statistics/hooks/hook_cron.php index cd0edc555f8697cd8d44afb326133fea4d9857df..726169e8d89e9b582ad0beb774fcaf297931161d 100644 --- a/modules/statistics/hooks/hook_cron.php +++ b/modules/statistics/hooks/hook_cron.php @@ -16,7 +16,8 @@ function statistics_hook_cron(&$croninfo) { if ($statconfig->getValue('cron_tag', NULL) !== $croninfo['tag']) return; try { - require_once(SimpleSAML_Module::getModuleDir('statistics') . '/extlibs/loganalyzer.php'); + $aggregator = new sspmod_statistics_Aggregator(); + $results = $aggregator->aggregate(); } catch (Exception $e) { $croninfo['summary'][] = 'Loganalyzer threw exception: ' . $e->getMessage(); } diff --git a/modules/statistics/lib/Aggregator.php b/modules/statistics/lib/Aggregator.php new file mode 100644 index 0000000000000000000000000000000000000000..3176ca3f81f5ff05ea54e355b88ed160cedf8cde --- /dev/null +++ b/modules/statistics/lib/Aggregator.php @@ -0,0 +1,133 @@ +<?php +/* + * @author Andreas Ă…kre Solberg <andreas.solberg@uninett.no> + * @package simpleSAMLphp + * @version $Id$ + */ +class sspmod_statistics_Aggregator { + + private $config; + private $statconfig; + private $statdir; + private $inputfile; + private $statrules; + private $offset; + + /** + * Constructor + */ + public function __construct() { + + $this->config = SimpleSAML_Configuration::getInstance(); + $this->statconfig = $this->config->copyFromBase('statconfig', 'statistics.php'); + + $this->statdir = $this->statconfig->getValue('statdir'); + $this->inputfile = $this->statconfig->getValue('inputfile'); + $this->statrules = $this->statconfig->getValue('statrules'); + $this->offset = $this->statconfig->getValue('offset', 0); + } + + public function dumpConfig() { + + echo 'Statistics directory : ' . $this->statdir . "\n"; + echo 'Input file : ' . $this->inputfile . "\n"; + echo 'Offset : ' . $this->offset . "\n"; + + } + + + + public function aggregate($debug = FALSE) { + + if (!is_dir($this->statdir)) + throw new Exception('Statistics module: output dir do not exists [' . $this->statdir . ']'); + + if (!file_exists($this->inputfile)) + throw new Exception('Statistics module: input file do not exists [' . $this->inputfile . ']'); + + + $file = fopen($this->inputfile, 'r'); + $logfile = file($this->inputfile, FILE_IGNORE_NEW_LINES ); + + + $logparser = new sspmod_statistics_LogParser( + $this->statconfig->getValue('datestart', 0), $this->statconfig->getValue('datelength', 15), $this->statconfig->getValue('offsetspan', 44) + ); + $datehandler = new sspmod_statistics_DateHandler($this->offset); + + $results = array(); + + $i = 0; + // Parse through log file, line by line + foreach ($logfile AS $logline) { + $i++; + // Continue if STAT is not found on line. + if (!preg_match('/STAT/', $logline)) continue; + + // Parse log, and extract epoch time and rest of content. + $epoch = $logparser->parseEpoch($logline); + $content = $logparser->parseContent($logline); + $action = $content[4]; + + if ($debug) { + echo("----------------------------------------\n"); + echo('Log line: ' . $logline . "\n"); + echo('Date parse [' . substr($logline, 0, $this->statconfig->getValue('datelength', 15)) . '] to [' . date(DATE_RFC822, $epoch) . ']' . "\n"); + print_r($content); + if ($i > 2) exit; + } + + + // Iterate all the statrules from config. + foreach ($this->statrules AS $rulename => $rule) { + + #echo 'Comparing action: [' . $rule['action'] . '] with [' . $action . ']'; + + $timeslot = $datehandler->toSlot($epoch, $rule['slot']); + $fileslot = $datehandler->toSlot($epoch, $rule['fileslot']); //print_r($content); + if (isset($rule['action']) && ($action !== $rule['action'])) continue; + + $difcol = $content[$rule['col']]; // echo '[...' . $difcol . '...]'; + + if (!isset($results[$rulename][$fileslot][$timeslot]['_'])) $results[$rulename][$fileslot][$timeslot]['_'] = 0; + if (!isset($results[$rulename][$fileslot][$timeslot][$difcol])) $results[$rulename][$fileslot][$timeslot][$difcol] = 0; + + $results[$rulename][$fileslot][$timeslot]['_']++; + $results[$rulename][$fileslot][$timeslot][$difcol]++; + } + } + return $results; + } + + + public function store($results) { + + // Iterate the first level of results, which is per rule, as defined in the config. + foreach ($results AS $rulename => $ruleresults) { + + // Iterate the second level of results, which is the fileslot. + foreach ($ruleresults AS $fileno => $fileres) { + + $slotlist = array_keys($fileres); + + // Get start and end slot number within the file, based on the fileslot. + $start = $datehandler->toSlot($datehandler->fromSlot($fileno, $this->statrules[$rulename]['fileslot']), $this->statrules[$rulename]['slot']); + $end = $datehandler->toSlot($datehandler->fromSlot($fileno+1, $this->statrules[$rulename]['fileslot']), $this->statrules[$rulename]['slot']); + + // Fill in missing entries and sort file results + $filledresult = array(); + for ($slot = $start; $slot < $end; $slot++) { + $filledresult[$slot] = (isset($fileres[$slot])) ? $fileres[$slot] : array('_' => 0); + } + + // store file + file_put_contents($statdir . '/' . $rulename . '-' . $fileno . '.stat', serialize($filledresult), LOCK_EX ); + } + } + + } + + +} + +?> \ No newline at end of file