diff options
author | fligtar@gmail.com <fligtar@gmail.com@4eb1ac78-321c-0410-a911-ec516a8615a5> | 2008-03-06 05:36:10 (GMT) |
---|---|---|
committer | fligtar@gmail.com <fligtar@gmail.com@4eb1ac78-321c-0410-a911-ec516a8615a5> | 2008-03-06 05:36:10 (GMT) |
commit | 2b18f536d50bce5eb3e6e933183bb64d5bba1341 (patch) | |
tree | cdefe56f2489a057149facdcf04e8cbad833f88b /bin | |
parent | 72a7edc227235d9c4fc6a49b4b9ec6eac8d69d77 (diff) |
add support for geography in log parser; bug 408234; r=morgamic
git-svn-id: http://svn.mozilla.org/addons/trunk@11027 4eb1ac78-321c-0410-a911-ec516a8615a5
Diffstat (limited to 'bin')
-rw-r--r-- | bin/parse_logs/count_downloads.class.php | 15 | ||||
-rw-r--r-- | bin/parse_logs/count_update_pings.class.php | 17 | ||||
-rw-r--r-- | bin/parse_logs/log_parser.class.php | 98 | ||||
-rw-r--r-- | bin/parse_logs/parse_logs.php | 7 |
4 files changed, 76 insertions, 61 deletions
diff --git a/bin/parse_logs/count_downloads.class.php b/bin/parse_logs/count_downloads.class.php index 81ceca6..c38544b 100644 --- a/bin/parse_logs/count_downloads.class.php +++ b/bin/parse_logs/count_downloads.class.php @@ -44,8 +44,7 @@ class Count_Downloads { var $countedIPs = array(); // list of counted IP addresses var $db; - var $totalSkipped = 0; - var $totalSkippedNL = 0; + var $totalSkipped = array('blacklist' => 0, 'SJ' => 0, 'NL' => 0, 'CN' => 0); var $totalCounted = 0; /** @@ -66,13 +65,9 @@ class Count_Downloads { // Make sure IP is not in blacklist and is not coming from Mozilla .nl if (isset($this->countedIPs[$details['ip']])) { - $this->totalSkipped++; + $this->totalSkipped['blacklist']++; outputIfVerbose("[DownloadCounter] IP ({$details['ip']}) in blacklist; skipped"); } - elseif (strpos($details['ip'], MOZILLA_NL_IP) !== false) { - $this->totalSkippedNL++; - outputIfVerbose("[DownloadCounter] IP ({$details['ip']}) from Mozilla .nl; skipped"); - } elseif (empty($details['fileid'])) { outputIfVerbose('[DownloadCounter] No file id found'); } @@ -121,6 +116,12 @@ class Count_Downloads { $this->db->query("UPDATE download_counts SET count=count+1 WHERE addon_id='{$addon_id}' AND date='{$date}'", true); } + /** + * Callback for after a logfile is parsed. + */ + function logfileParsedCallback() { + + } } ?> diff --git a/bin/parse_logs/count_update_pings.class.php b/bin/parse_logs/count_update_pings.class.php index 56fa089..84be36c 100644 --- a/bin/parse_logs/count_update_pings.class.php +++ b/bin/parse_logs/count_update_pings.class.php @@ -44,8 +44,7 @@ class Count_Update_Pings { var $db; var $guids = array(); // array of GUIDs and add-on ids var $counts = array(); // array of update ping counts - var $totalSkipped = 0; - var $totalSkippedNL = 0; + var $totalSkipped = array('unknown_guid' => 0, 'SJ' => 0, 'NL' => 0, 'CN' => 0); var $totalCounted = 0; /** @@ -75,15 +74,11 @@ class Count_Update_Pings { * @param array $details details from the parsed log line */ function count($details) { - // Make sure IP is not in blacklist and is not coming from Mozilla .nl + // Make sure GUID is known if (empty($this->guids[$details['addon']['guid']])) { - $this->totalSkipped++; + $this->totalSkipped['unknown_guid']++; outputIfVerbose("[UpdatePingCounter] Unknown GUID skipped: {$details['addon']['guid']}"); } - elseif(strpos($details['ip'], MOZILLA_NL_IP) !== false) { - $this->totalSkippedNL++; - outputIfVerbose("[UpdatePingCounter] IP ({$details['ip']}) from Mozilla .nl; skipped"); - } else { $addon_id = $this->guids[$details['addon']['guid']]; $date = date('Y-m-d', $details['unixtime']); @@ -261,6 +256,12 @@ class Count_Update_Pings { return $this->serializeCounts($totalCounts); } + /** + * Callback for after a logfile is parsed. + */ + function logfileParsedCallback() { + $this->updateCounts(); + } } ?> diff --git a/bin/parse_logs/log_parser.class.php b/bin/parse_logs/log_parser.class.php index b6996d3..fbce318 100644 --- a/bin/parse_logs/log_parser.class.php +++ b/bin/parse_logs/log_parser.class.php @@ -38,8 +38,11 @@ * * ***** END LICENSE BLOCK ***** */ -// Mozilla .nl IPs -define('MOZILLA_NL_IP', '63.245.213.'); +// Mozilla datacenter IPs to filter +$datacenters = array( + 'NL' => array('63.245.213.'), + 'CN' => array('59.151.50.') + ); // Include class files require_once('../database.class.php'); @@ -55,10 +58,8 @@ class Log_Parser { var $date = ''; // date of updateping logs to parse var $type; // type of parsing var $db; - var $countDownloads = false; - var $downloadCounter; // reference to download counting class object - var $countUpdatePings = false; - var $updatePingCounter; // reference to update ping counting class object + var $counter; // the counter class + var $geo = ''; /** * Initiates parser and database connection @@ -68,16 +69,16 @@ class Log_Parser { * @param string $parseType the type of parsing to be done * @param string $date the date of updateping logs to parse */ - function Log_Parser($logDir, $tmpDir, $parseType, $date = '') { + function Log_Parser($logDir, $tmpDir, $parseType, $geo, $date = '') { $this->tmpDir = $tmpDir; $this->logDir = $logDir; $this->type = $parseType; + $this->geo = $geo; $this->db =& new Database(); if ($parseType == 'downloads') { - $this->downloadCounter =& new Count_Downloads($this->db); - $this->countDownloads = true; + $this->counter =& new Count_Downloads($this->db); } elseif ($parseType == 'updatepings') { if (empty($date)) { @@ -86,8 +87,7 @@ class Log_Parser { } $this->date = $date; - $this->updatePingCounter =& new Count_Update_Pings($this->db); - $this->countUpdatePings = true; + $this->counter =& new Count_Update_Pings($this->db); } } @@ -96,20 +96,20 @@ class Log_Parser { * been parsed, passes to parsing function. */ function start() { - echo "\n---------- [ BEGIN ACCESS LOG PARSING ] ----------\n"; + echo "\n---------- [ BEGIN ACCESS LOG PARSING FOR {$this->geo}] ----------\n"; exec("find {$this->logDir} -name \"access_{$this->date}*.gz\" -type f", $loglist); if (!empty($loglist)) { foreach ($loglist as $logfile) { if (!empty($logfile)) { - $logfile_query = $this->db->query("SELECT * FROM logs_parsed WHERE name='".mysql_real_escape_string(basename($logfile))."'"); + $logfile_query = $this->db->query("SELECT * FROM logs_parsed WHERE name='".mysql_real_escape_string(basename($logfile))."' AND geo='".mysql_real_escape_string($this->geo)."'"); $logfile_result = mysql_fetch_array($logfile_query); if ($logfile_result["{$this->type}_done"] == 1) - echo basename($logfile)." has already been parsed!\n"; + echo basename($logfile)." has already been parsed for {$this->geo}!\n"; else { if (empty($logfile_result)) - $this->db->query("INSERT INTO logs_parsed (name) VALUES('".mysql_real_escape_string(basename($logfile))."')", true); + $this->db->query("INSERT INTO logs_parsed (name, geo) VALUES('".mysql_real_escape_string(basename($logfile))."', '".mysql_real_escape_string($this->geo)."')", true); $this->parse($logfile); } @@ -134,9 +134,9 @@ class Log_Parser { echo "\n---------- [ Copying {$logfile} ] ----------\n"; $tempFile = "{$this->tmpDir}/addon_log_file_".str_replace(' ', '_', microtime()); - if ($this->countDownloads) + if ($this->type == 'downloads') $pattern = 'downloads/file/'; - elseif ($this->countUpdatePings) + elseif ($this->type == 'updatepings') $pattern = 'VersionCheck.php'; // Strip relevant lines out of log file and write to a temp file @@ -144,7 +144,7 @@ class Log_Parser { if (!$fp = fopen($tempFile, 'r')) die('Failed to open temp file'); - echo "\n---------- [ Parsing {$logfile} ] ----------\n"; + echo "\n---------- [ Parsing {$logfile} in {$this->geo}] ----------\n"; while ($line = fgets($fp)) { // Match line patterns @@ -153,28 +153,27 @@ class Log_Parser { if (!empty($matches[0])) { $lineDetails = $this->getLineDetails($matches); - if (!is_array($lineDetails)) { + if (!is_array($lineDetails)) continue; - } - if ($this->countDownloads && $lineDetails['type'] == 'download') { - $this->downloadCounter->count($lineDetails); + if ($geoFound = $this->fromMozillaDatacenter($lineDetails['ip'])) { + $this->counter->totalSkipped[$geoFound]++; + outputIfVerbose("[{$this->type}Counter] IP ({$lineDetails['ip']}) from Mozilla {$geoFound}; skipped"); + continue; } - if ($this->countUpdatePings && $lineDetails['type'] == 'update') { - $this->updatePingCounter->count($lineDetails); - } + if ($this->type == $lineDetails['type']) + $this->counter->count($lineDetails); } else echo "Could not match log entry to pattern: {$line}\n"; } - // Write counts to DB - if ($this->countUpdatePings) - $this->updatePingCounter->updateCounts(); + // Logfile post-parse callback + $this->counter->logfileParsedCallback(); // Mark file as finished parsing - $this->db->query("UPDATE logs_parsed SET {$this->type}_done=1 WHERE name='".mysql_real_escape_string(basename($logfile))."'", true); + $this->db->query("UPDATE logs_parsed SET {$this->type}_done=1 WHERE name='".mysql_real_escape_string(basename($logfile))."' AND geo='".mysql_real_escape_string($this->geo)."'", true); fclose($fp); @@ -217,16 +216,16 @@ class Log_Parser { // Set request type if ($matches[1] == 'file') - $log_data['type'] = 'download'; + $log_data['type'] = 'downloads'; elseif ($matches[1] == 'VersionCheck.php') - $log_data['type'] = 'update'; + $log_data['type'] = 'updatepings'; // If a download, get the file id out - if ($log_data['type'] == 'download') { + if ($log_data['type'] == 'downloads') { $log_data['fileid'] = mysql_real_escape_string($matches[3]); } // If it's an update ping, get out all of the details - elseif ($log_data['type'] == 'update') { + elseif ($log_data['type'] == 'updatepings') { $log_data['addon']['reqVersion'] = !empty($matches[5]) ? $matches[5] : null; $log_data['addon']['guid'] = !empty($matches[6]) ? $matches[6] : null; $log_data['addon']['version'] = !empty($matches[8]) ? $matches[8] : null; @@ -241,21 +240,34 @@ class Log_Parser { return $log_data; } + /** + * Determines if an IP is from a Mozilla datacenter + */ + function fromMozillaDatacenter($ip) { + global $datacenters; + + if (empty($datacenters)) + return false; + + foreach ($datacenters as $geo => $datacenter_ips) { + foreach ($datacenter_ips as $datacenter_ip) { + if (strpos($ip, $datacenter_ip) !== false) + return $geo; + } + } + + return false; + } + /** * Called when finished parsing all logfiles to update counts in the database */ function finish() { - if ($this->countDownloads) { - echo "\nDownloads counted: {$this->downloadCounter->totalCounted}\n"; - echo "Downloads skipped (30s blacklist): {$this->downloadCounter->totalSkipped}\n"; - echo "Downloads skipped (Mozilla .nl): {$this->downloadCounter->totalSkippedNL}\n"; + echo "\n{$this->type} counted: {$this->counter->totalCounted}\n"; + foreach ($this->counter->totalSkipped as $skipped => $count) { + echo "\tSkipped because of {$skipped}: {$count}\n"; } - if ($this->countUpdatePings) { - echo "\nUpdate pings counted: {$this->updatePingCounter->totalCounted}\n"; - echo "Update pings skipped: {$this->updatePingCounter->totalSkipped}\n"; - echo "Downloads skipped (Mozilla .nl): {$this->updatePingCounter->totalSkippedNL}\n"; - } - + $this->db->close(); } } diff --git a/bin/parse_logs/parse_logs.php b/bin/parse_logs/parse_logs.php index 0383220..9f57f75 100644 --- a/bin/parse_logs/parse_logs.php +++ b/bin/parse_logs/parse_logs.php @@ -71,10 +71,10 @@ if ($argv) { $verbose = array_key_exists('v', $_GET); // Validate arguments -if (!empty($_GET['logs']) && !empty($_GET['temp']) && !empty($_GET['type'])) { +if (!empty($_GET['logs']) && !empty($_GET['temp']) && !empty($_GET['type']) && !empty($_GET['geo'])) { if (is_readable($_GET['logs']) && is_writable($_GET['temp'])) { if (in_array($_GET['type'], array('downloads', 'updatepings'))) { - $parser = new Log_Parser($_GET['logs'], $_GET['temp'], $_GET['type'], !empty($_GET['date']) ? $_GET['date'] : ''); + $parser = new Log_Parser($_GET['logs'], $_GET['temp'], $_GET['type'], $_GET['geo'], !empty($_GET['date']) ? $_GET['date'] : ''); $parser->start(); $finished = true; @@ -87,10 +87,11 @@ if (!empty($_GET['logs']) && !empty($_GET['temp']) && !empty($_GET['type'])) { if (empty($finished)) { // Output usage instructions print "usage:\n"; - print "php -f parse_logs.php logs=[log_dir] temp=[tmp_dir] type=[parse_type] date=[date] [v]\n"; + print "php -f parse_logs.php logs=[log_dir] temp=[tmp_dir] type=[parse_type] geo=[geo] date=[date] [v]\n"; print "\tlog_dir:\tDirectory with the access log files\n"; print "\ttmp_dir:\tDirectory for the temp file to be written\n"; print "\tparse_type:\tdownloads or updatepings\n"; + print "\tgeo:\tdatacenter from which logs are being parsed\n"; print "\tdate:\tsingle date for which to parse update pings, in YYYY-MM-DD format\n"; print "\tv:\tverbose output of progress\n"; print "sample usage:\n"; |