Skip to content
Snippets Groups Projects
Commit a5f6fe7c authored by Andreas Åkre Solberg's avatar Andreas Åkre Solberg
Browse files

add logcleanerscript

git-svn-id: https://simplesamlphp.googlecode.com/svn/trunk@1390 44740490-163a-0410-bde0-09ae8108e29a
parent bcd3e33e
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env php
<?php
/* This is the base directory of the simpleSAMLphp installation. */
$baseDir = dirname(dirname(dirname(dirname(__FILE__))));
/* Add library autoloader. */
require_once($baseDir . '/lib/_autoload.php');
/* Initialize the configuration. */
SimpleSAML_Configuration::setConfigDir($baseDir . '/config');
$progName = array_shift($argv);
$debug = FALSE;
$dryrun = FALSE;
$output = '/tmp/simplesamlphp-new.log';
foreach($argv as $a) {
if(strlen($a) === 0) continue;
if(strpos($a, '=') !== FALSE) {
$p = strpos($a, '=');
$v = substr($a, $p + 1);
$a = substr($a, 0, $p);
} else {
$v = NULL;
}
/* Map short options to long options. */
$shortOptMap = array(
'-d' => '--debug',
);
if(array_key_exists($a, $shortOptMap)) $a = $shortOptMap[$a];
switch($a) {
case '--help':
printHelp();
exit(0);
case '--debug':
$debug = TRUE;
break;
case '--dry-run':
$dryrun = TRUE;
break;
case '--outfile':
$output = $v;
break;
default:
echo('Unknown option: ' . $a . "\n");
echo('Please run `' . $progName . ' --help` for usage information.' . "\n");
exit(1);
}
}
$cleaner = new sspmod_statistics_LogCleaner();
$cleaner->dumpConfig();
$todelete = $cleaner->clean($debug);
echo "Cleaning these trackIDs: " . join(', ', $todelete) . "\n";
if (!$dryrun) {
$cleaner->store($todelete, $output);
}
/**
* This function prints the help output.
*/
function printHelp() {
global $progName;
/* '======================================================================' */
echo('Usage: ' . $progName . ' [options]
This program cleans logs. This script is experimental. Do not run it unless you have talked to Andreas about it.
The script deletes log lines related to sessions that produce more than 200 lines.
Options:
-d, --debug Used when configuring the log file syntax. See doc.
--dry-run Aggregate but do not store the results.
--outfile File to output the results.
');
}
<?php
/*
* @author Andreas Åkre Solberg <andreas.solberg@uninett.no>
* @package simpleSAMLphp
* @version $Id$
*/
class sspmod_statistics_LogCleaner {
private $statconfig;
private $statdir;
private $inputfile;
private $statrules;
private $offset;
/**
* Constructor
*/
public function __construct() {
$this->statconfig = SimpleSAML_Configuration::getConfig('module_statistics.php');
$this->statdir = $this->statconfig->getValue('statdir');
$this->inputfile = $this->statconfig->getValue('inputfile');
$this->statrules = $this->statconfig->getValue('statrules');
$this->offset = $this->statconfig->getValue('offset', 0);
}
public function dumpConfig() {
echo 'Statistics directory : ' . $this->statdir . "\n";
echo 'Input file : ' . $this->inputfile . "\n";
echo 'Offset : ' . $this->offset . "\n";
}
public function clean($debug = FALSE) {
if (!is_dir($this->statdir))
throw new Exception('Statistics module: output dir do not exists [' . $this->statdir . ']');
if (!file_exists($this->inputfile))
throw new Exception('Statistics module: input file do not exists [' . $this->inputfile . ']');
$file = fopen($this->inputfile, 'r');
#$logfile = file($this->inputfile, FILE_IGNORE_NEW_LINES );
$logparser = new sspmod_statistics_LogParser(
$this->statconfig->getValue('datestart', 0), $this->statconfig->getValue('datelength', 15), $this->statconfig->getValue('offsetspan', 44)
);
$datehandler = new sspmod_statistics_DateHandler($this->offset);
$results = array();
$sessioncounter = array();
$i = 0;
// Parse through log file, line by line
while (!feof($file)) {
$logline = fgets($file, 4096);
// Continue if STAT is not found on line.
if (!preg_match('/STAT/', $logline)) continue;
$i++;
// Parse log, and extract epoch time and rest of content.
$epoch = $logparser->parseEpoch($logline);
$content = $logparser->parseContent($logline);
$action = trim($content[5]);
if (($i % 10000) == 0) {
echo("Read line " . $i . "\n");
}
$trackid = $content[4];
#echo "trackid: " . $content[4] . "\n";
if(!isset($sessioncounter[$trackid])) $sessioncounter[$trackid] = 0;
$sessioncounter[$trackid]++;
if ($debug) {
echo("----------------------------------------\n");
echo('Log line: ' . $logline . "\n");
echo('Date parse [' . substr($logline, 0, $this->statconfig->getValue('datelength', 15)) . '] to [' . date(DATE_RFC822, $epoch) . ']' . "\n");
print_r($content);
if ($i >= 13) exit;
}
}
$histogram = array();
foreach($sessioncounter AS $trackid => $sc) {
if(!isset($histogram[$sc])) $histogram[$sc] = 0;
$histogram[$sc]++;
}
ksort($histogram);
$todelete = array();
foreach($sessioncounter AS $trackid => $sc) {
if($sc > 200) $todelete[] = $trackid;
}
#print_r($histogram);
return $todelete;
}
public function store($todelete, $outputfile) {
echo "Preparing to delete [" .count($todelete) . "] trackids\n";
if (!is_dir($this->statdir))
throw new Exception('Statistics module: output dir do not exists [' . $this->statdir . ']');
if (!file_exists($this->inputfile))
throw new Exception('Statistics module: input file do not exists [' . $this->inputfile . ']');
$file = fopen($this->inputfile, 'r');
#$logfile = file($this->inputfile, FILE_IGNORE_NEW_LINES );
$outfile = fopen($outputfile, 'w');
$logparser = new sspmod_statistics_LogParser(
$this->statconfig->getValue('datestart', 0), $this->statconfig->getValue('datelength', 15), $this->statconfig->getValue('offsetspan', 44)
);
$i = 0;
// Parse through log file, line by line
while (!feof($file)) {
$logline = fgets($file, 4096);
// Continue if STAT is not found on line.
if (!preg_match('/STAT/', $logline)) continue;
$i++;
$content = $logparser->parseContent($logline);
$action = trim($content[5]);
if (($i % 10000) == 0) {
echo("Read line " . $i . "\n");
}
$trackid = $content[4];
if (in_array($trackid, $todelete)) {
#echo "Deleting entry with trackid: $trackid \n";
continue;
} else {
#echo "NOT Deleting entry with trackid: $trackid \n";
}
fputs($outfile, $logline);
}
fclose($file);
fclose($outfile);
}
}
?>
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment