From fe801a33359c5a62b86a552136d27085ee6dd145 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20=C3=85kre=20Solberg?= <andreas.solberg@uninett.no> Date: Fri, 6 Mar 2009 14:46:25 +0000 Subject: [PATCH] statistics: aggregator trims cols and more effective reading of large files... git-svn-id: https://simplesamlphp.googlecode.com/svn/trunk@1387 44740490-163a-0410-bde0-09ae8108e29a --- modules/statistics/lib/Aggregator.php | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/modules/statistics/lib/Aggregator.php b/modules/statistics/lib/Aggregator.php index 576e0227b..d5314759b 100644 --- a/modules/statistics/lib/Aggregator.php +++ b/modules/statistics/lib/Aggregator.php @@ -45,7 +45,7 @@ class sspmod_statistics_Aggregator { $file = fopen($this->inputfile, 'r'); - $logfile = file($this->inputfile, FILE_IGNORE_NEW_LINES ); + #$logfile = file($this->inputfile, FILE_IGNORE_NEW_LINES ); $logparser = new sspmod_statistics_LogParser( @@ -57,8 +57,10 @@ class sspmod_statistics_Aggregator { $i = 0; // Parse through log file, line by line - foreach ($logfile AS $logline) { - + while (!feof($file)) { + + $logline = fgets($file, 4096); + // Continue if STAT is not found on line. if (!preg_match('/STAT/', $logline)) continue; $i++; @@ -66,9 +68,16 @@ class sspmod_statistics_Aggregator { // Parse log, and extract epoch time and rest of content. $epoch = $logparser->parseEpoch($logline); $content = $logparser->parseContent($logline); - $action = $content[5]; + $action = trim($content[5]); + + if (($i % 10000) == 0) { + echo("Read line " . $i . "\n"); + } + if ($debug) { + + echo("----------------------------------------\n"); echo('Log line: ' . $logline . "\n"); echo('Date parse [' . substr($logline, 0, $this->statconfig->getValue('datelength', 15)) . '] to [' . date(DATE_RFC822, $epoch) . ']' . "\n"); @@ -88,7 +97,7 @@ class sspmod_statistics_Aggregator { if (isset($rule['action']) && ($action !== $rule['action'])) continue; - $difcol = $content[$rule['col']]; // echo '[...' . $difcol . '...]'; + $difcol = trim($content[$rule['col']]); // echo '[...' . $difcol . '...]'; if (!isset($results[$rulename][$fileslot][$timeslot]['_'])) $results[$rulename][$fileslot][$timeslot]['_'] = 0; if (!isset($results[$rulename][$fileslot][$timeslot][$difcol])) $results[$rulename][$fileslot][$timeslot][$difcol] = 0; -- GitLab