Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/bin
diff options
context:
space:
mode:
authorfligtar@gmail.com <fligtar@gmail.com@4eb1ac78-321c-0410-a911-ec516a8615a5>2008-03-06 05:36:10 (GMT)
committer fligtar@gmail.com <fligtar@gmail.com@4eb1ac78-321c-0410-a911-ec516a8615a5>2008-03-06 05:36:10 (GMT)
commit2b18f536d50bce5eb3e6e933183bb64d5bba1341 (patch)
treecdefe56f2489a057149facdcf04e8cbad833f88b /bin
parent72a7edc227235d9c4fc6a49b4b9ec6eac8d69d77 (diff)
add support for geography in log parser; bug 408234; r=morgamic
git-svn-id: http://svn.mozilla.org/addons/trunk@11027 4eb1ac78-321c-0410-a911-ec516a8615a5
Diffstat (limited to 'bin')
-rw-r--r--bin/parse_logs/count_downloads.class.php15
-rw-r--r--bin/parse_logs/count_update_pings.class.php17
-rw-r--r--bin/parse_logs/log_parser.class.php98
-rw-r--r--bin/parse_logs/parse_logs.php7
4 files changed, 76 insertions, 61 deletions
diff --git a/bin/parse_logs/count_downloads.class.php b/bin/parse_logs/count_downloads.class.php
index 81ceca6..c38544b 100644
--- a/bin/parse_logs/count_downloads.class.php
+++ b/bin/parse_logs/count_downloads.class.php
@@ -44,8 +44,7 @@
class Count_Downloads {
var $countedIPs = array(); // list of counted IP addresses
var $db;
- var $totalSkipped = 0;
- var $totalSkippedNL = 0;
+ var $totalSkipped = array('blacklist' => 0, 'SJ' => 0, 'NL' => 0, 'CN' => 0);
var $totalCounted = 0;
/**
@@ -66,13 +65,9 @@ class Count_Downloads {
// Make sure IP is not in blacklist and is not coming from Mozilla .nl
if (isset($this->countedIPs[$details['ip']])) {
- $this->totalSkipped++;
+ $this->totalSkipped['blacklist']++;
outputIfVerbose("[DownloadCounter] IP ({$details['ip']}) in blacklist; skipped");
}
- elseif (strpos($details['ip'], MOZILLA_NL_IP) !== false) {
- $this->totalSkippedNL++;
- outputIfVerbose("[DownloadCounter] IP ({$details['ip']}) from Mozilla .nl; skipped");
- }
elseif (empty($details['fileid'])) {
outputIfVerbose('[DownloadCounter] No file id found');
}
@@ -121,6 +116,12 @@ class Count_Downloads {
$this->db->query("UPDATE download_counts SET count=count+1 WHERE addon_id='{$addon_id}' AND date='{$date}'", true);
}
+ /**
+ * Callback for after a logfile is parsed.
+ */
+ function logfileParsedCallback() {
+
+ }
}
?>
diff --git a/bin/parse_logs/count_update_pings.class.php b/bin/parse_logs/count_update_pings.class.php
index 56fa089..84be36c 100644
--- a/bin/parse_logs/count_update_pings.class.php
+++ b/bin/parse_logs/count_update_pings.class.php
@@ -44,8 +44,7 @@ class Count_Update_Pings {
var $db;
var $guids = array(); // array of GUIDs and add-on ids
var $counts = array(); // array of update ping counts
- var $totalSkipped = 0;
- var $totalSkippedNL = 0;
+ var $totalSkipped = array('unknown_guid' => 0, 'SJ' => 0, 'NL' => 0, 'CN' => 0);
var $totalCounted = 0;
/**
@@ -75,15 +74,11 @@ class Count_Update_Pings {
* @param array $details details from the parsed log line
*/
function count($details) {
- // Make sure IP is not in blacklist and is not coming from Mozilla .nl
+ // Make sure GUID is known
if (empty($this->guids[$details['addon']['guid']])) {
- $this->totalSkipped++;
+ $this->totalSkipped['unknown_guid']++;
outputIfVerbose("[UpdatePingCounter] Unknown GUID skipped: {$details['addon']['guid']}");
}
- elseif(strpos($details['ip'], MOZILLA_NL_IP) !== false) {
- $this->totalSkippedNL++;
- outputIfVerbose("[UpdatePingCounter] IP ({$details['ip']}) from Mozilla .nl; skipped");
- }
else {
$addon_id = $this->guids[$details['addon']['guid']];
$date = date('Y-m-d', $details['unixtime']);
@@ -261,6 +256,12 @@ class Count_Update_Pings {
return $this->serializeCounts($totalCounts);
}
+ /**
+ * Callback for after a logfile is parsed.
+ */
+ function logfileParsedCallback() {
+ $this->updateCounts();
+ }
}
?>
diff --git a/bin/parse_logs/log_parser.class.php b/bin/parse_logs/log_parser.class.php
index b6996d3..fbce318 100644
--- a/bin/parse_logs/log_parser.class.php
+++ b/bin/parse_logs/log_parser.class.php
@@ -38,8 +38,11 @@
*
* ***** END LICENSE BLOCK ***** */
-// Mozilla .nl IPs
-define('MOZILLA_NL_IP', '63.245.213.');
+// Mozilla datacenter IPs to filter
+$datacenters = array(
+ 'NL' => array('63.245.213.'),
+ 'CN' => array('59.151.50.')
+ );
// Include class files
require_once('../database.class.php');
@@ -55,10 +58,8 @@ class Log_Parser {
var $date = ''; // date of updateping logs to parse
var $type; // type of parsing
var $db;
- var $countDownloads = false;
- var $downloadCounter; // reference to download counting class object
- var $countUpdatePings = false;
- var $updatePingCounter; // reference to update ping counting class object
+ var $counter; // the counter class
+ var $geo = '';
/**
* Initiates parser and database connection
@@ -68,16 +69,16 @@ class Log_Parser {
* @param string $parseType the type of parsing to be done
* @param string $date the date of updateping logs to parse
*/
- function Log_Parser($logDir, $tmpDir, $parseType, $date = '') {
+ function Log_Parser($logDir, $tmpDir, $parseType, $geo, $date = '') {
$this->tmpDir = $tmpDir;
$this->logDir = $logDir;
$this->type = $parseType;
+ $this->geo = $geo;
$this->db =& new Database();
if ($parseType == 'downloads') {
- $this->downloadCounter =& new Count_Downloads($this->db);
- $this->countDownloads = true;
+ $this->counter =& new Count_Downloads($this->db);
}
elseif ($parseType == 'updatepings') {
if (empty($date)) {
@@ -86,8 +87,7 @@ class Log_Parser {
}
$this->date = $date;
- $this->updatePingCounter =& new Count_Update_Pings($this->db);
- $this->countUpdatePings = true;
+ $this->counter =& new Count_Update_Pings($this->db);
}
}
@@ -96,20 +96,20 @@ class Log_Parser {
* been parsed, passes to parsing function.
*/
function start() {
- echo "\n---------- [ BEGIN ACCESS LOG PARSING ] ----------\n";
+ echo "\n---------- [ BEGIN ACCESS LOG PARSING FOR {$this->geo}] ----------\n";
exec("find {$this->logDir} -name \"access_{$this->date}*.gz\" -type f", $loglist);
if (!empty($loglist)) {
foreach ($loglist as $logfile) {
if (!empty($logfile)) {
- $logfile_query = $this->db->query("SELECT * FROM logs_parsed WHERE name='".mysql_real_escape_string(basename($logfile))."'");
+ $logfile_query = $this->db->query("SELECT * FROM logs_parsed WHERE name='".mysql_real_escape_string(basename($logfile))."' AND geo='".mysql_real_escape_string($this->geo)."'");
$logfile_result = mysql_fetch_array($logfile_query);
if ($logfile_result["{$this->type}_done"] == 1)
- echo basename($logfile)." has already been parsed!\n";
+ echo basename($logfile)." has already been parsed for {$this->geo}!\n";
else {
if (empty($logfile_result))
- $this->db->query("INSERT INTO logs_parsed (name) VALUES('".mysql_real_escape_string(basename($logfile))."')", true);
+ $this->db->query("INSERT INTO logs_parsed (name, geo) VALUES('".mysql_real_escape_string(basename($logfile))."', '".mysql_real_escape_string($this->geo)."')", true);
$this->parse($logfile);
}
@@ -134,9 +134,9 @@ class Log_Parser {
echo "\n---------- [ Copying {$logfile} ] ----------\n";
$tempFile = "{$this->tmpDir}/addon_log_file_".str_replace(' ', '_', microtime());
- if ($this->countDownloads)
+ if ($this->type == 'downloads')
$pattern = 'downloads/file/';
- elseif ($this->countUpdatePings)
+ elseif ($this->type == 'updatepings')
$pattern = 'VersionCheck.php';
// Strip relevant lines out of log file and write to a temp file
@@ -144,7 +144,7 @@ class Log_Parser {
if (!$fp = fopen($tempFile, 'r'))
die('Failed to open temp file');
- echo "\n---------- [ Parsing {$logfile} ] ----------\n";
+ echo "\n---------- [ Parsing {$logfile} in {$this->geo}] ----------\n";
while ($line = fgets($fp)) {
// Match line patterns
@@ -153,28 +153,27 @@ class Log_Parser {
if (!empty($matches[0])) {
$lineDetails = $this->getLineDetails($matches);
- if (!is_array($lineDetails)) {
+ if (!is_array($lineDetails))
continue;
- }
- if ($this->countDownloads && $lineDetails['type'] == 'download') {
- $this->downloadCounter->count($lineDetails);
+ if ($geoFound = $this->fromMozillaDatacenter($lineDetails['ip'])) {
+ $this->counter->totalSkipped[$geoFound]++;
+ outputIfVerbose("[{$this->type}Counter] IP ({$lineDetails['ip']}) from Mozilla {$geoFound}; skipped");
+ continue;
}
- if ($this->countUpdatePings && $lineDetails['type'] == 'update') {
- $this->updatePingCounter->count($lineDetails);
- }
+ if ($this->type == $lineDetails['type'])
+ $this->counter->count($lineDetails);
}
else
echo "Could not match log entry to pattern: {$line}\n";
}
- // Write counts to DB
- if ($this->countUpdatePings)
- $this->updatePingCounter->updateCounts();
+ // Logfile post-parse callback
+ $this->counter->logfileParsedCallback();
// Mark file as finished parsing
- $this->db->query("UPDATE logs_parsed SET {$this->type}_done=1 WHERE name='".mysql_real_escape_string(basename($logfile))."'", true);
+ $this->db->query("UPDATE logs_parsed SET {$this->type}_done=1 WHERE name='".mysql_real_escape_string(basename($logfile))."' AND geo='".mysql_real_escape_string($this->geo)."'", true);
fclose($fp);
@@ -217,16 +216,16 @@ class Log_Parser {
// Set request type
if ($matches[1] == 'file')
- $log_data['type'] = 'download';
+ $log_data['type'] = 'downloads';
elseif ($matches[1] == 'VersionCheck.php')
- $log_data['type'] = 'update';
+ $log_data['type'] = 'updatepings';
// If a download, get the file id out
- if ($log_data['type'] == 'download') {
+ if ($log_data['type'] == 'downloads') {
$log_data['fileid'] = mysql_real_escape_string($matches[3]);
}
// If it's an update ping, get out all of the details
- elseif ($log_data['type'] == 'update') {
+ elseif ($log_data['type'] == 'updatepings') {
$log_data['addon']['reqVersion'] = !empty($matches[5]) ? $matches[5] : null;
$log_data['addon']['guid'] = !empty($matches[6]) ? $matches[6] : null;
$log_data['addon']['version'] = !empty($matches[8]) ? $matches[8] : null;
@@ -241,21 +240,34 @@ class Log_Parser {
return $log_data;
}
+ /**
+ * Determines if an IP is from a Mozilla datacenter
+ */
+ function fromMozillaDatacenter($ip) {
+ global $datacenters;
+
+ if (empty($datacenters))
+ return false;
+
+ foreach ($datacenters as $geo => $datacenter_ips) {
+ foreach ($datacenter_ips as $datacenter_ip) {
+ if (strpos($ip, $datacenter_ip) !== false)
+ return $geo;
+ }
+ }
+
+ return false;
+ }
+
/**
* Called when finished parsing all logfiles to update counts in the database
*/
function finish() {
- if ($this->countDownloads) {
- echo "\nDownloads counted: {$this->downloadCounter->totalCounted}\n";
- echo "Downloads skipped (30s blacklist): {$this->downloadCounter->totalSkipped}\n";
- echo "Downloads skipped (Mozilla .nl): {$this->downloadCounter->totalSkippedNL}\n";
+ echo "\n{$this->type} counted: {$this->counter->totalCounted}\n";
+ foreach ($this->counter->totalSkipped as $skipped => $count) {
+ echo "\tSkipped because of {$skipped}: {$count}\n";
}
- if ($this->countUpdatePings) {
- echo "\nUpdate pings counted: {$this->updatePingCounter->totalCounted}\n";
- echo "Update pings skipped: {$this->updatePingCounter->totalSkipped}\n";
- echo "Downloads skipped (Mozilla .nl): {$this->updatePingCounter->totalSkippedNL}\n";
- }
-
+
$this->db->close();
}
}
diff --git a/bin/parse_logs/parse_logs.php b/bin/parse_logs/parse_logs.php
index 0383220..9f57f75 100644
--- a/bin/parse_logs/parse_logs.php
+++ b/bin/parse_logs/parse_logs.php
@@ -71,10 +71,10 @@ if ($argv) {
$verbose = array_key_exists('v', $_GET);
// Validate arguments
-if (!empty($_GET['logs']) && !empty($_GET['temp']) && !empty($_GET['type'])) {
+if (!empty($_GET['logs']) && !empty($_GET['temp']) && !empty($_GET['type']) && !empty($_GET['geo'])) {
if (is_readable($_GET['logs']) && is_writable($_GET['temp'])) {
if (in_array($_GET['type'], array('downloads', 'updatepings'))) {
- $parser = new Log_Parser($_GET['logs'], $_GET['temp'], $_GET['type'], !empty($_GET['date']) ? $_GET['date'] : '');
+ $parser = new Log_Parser($_GET['logs'], $_GET['temp'], $_GET['type'], $_GET['geo'], !empty($_GET['date']) ? $_GET['date'] : '');
$parser->start();
$finished = true;
@@ -87,10 +87,11 @@ if (!empty($_GET['logs']) && !empty($_GET['temp']) && !empty($_GET['type'])) {
if (empty($finished)) {
// Output usage instructions
print "usage:\n";
- print "php -f parse_logs.php logs=[log_dir] temp=[tmp_dir] type=[parse_type] date=[date] [v]\n";
+ print "php -f parse_logs.php logs=[log_dir] temp=[tmp_dir] type=[parse_type] geo=[geo] date=[date] [v]\n";
print "\tlog_dir:\tDirectory with the access log files\n";
print "\ttmp_dir:\tDirectory for the temp file to be written\n";
print "\tparse_type:\tdownloads or updatepings\n";
+ print "\tgeo:\tdatacenter from which logs are being parsed\n";
print "\tdate:\tsingle date for which to parse update pings, in YYYY-MM-DD format\n";
print "\tv:\tverbose output of progress\n";
print "sample usage:\n";