Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/bin/parse_logs
diff options
context:
space:
mode:
authorwclouser@mozilla.com <wclouser@mozilla.com@4eb1ac78-321c-0410-a911-ec516a8615a5>2008-12-03 21:18:17 (GMT)
committer wclouser@mozilla.com <wclouser@mozilla.com@4eb1ac78-321c-0410-a911-ec516a8615a5>2008-12-03 21:18:17 (GMT)
commitdb67bb2f3785dfd1fa17464d80f08abcaff5b268 (patch)
tree05de797a7728f25b37659ebb1495272ecc084d97 /bin/parse_logs
parent07175d5b9560bd99e0451e7f60720ee6f5facc99 (diff)
Bug 462814 - Stats collection for Collections
git-svn-id: http://svn.mozilla.org/addons/trunk@20420 4eb1ac78-321c-0410-a911-ec516a8615a5
Diffstat (limited to 'bin/parse_logs')
-rw-r--r--bin/parse_logs/count_downloads.class.php103
-rw-r--r--bin/parse_logs/log_parser.class.php84
-rw-r--r--bin/parse_logs/parse_logs.php11
3 files changed, 140 insertions, 58 deletions
diff --git a/bin/parse_logs/count_downloads.class.php b/bin/parse_logs/count_downloads.class.php
index 4eb0989..9db7d61 100644
--- a/bin/parse_logs/count_downloads.class.php
+++ b/bin/parse_logs/count_downloads.class.php
@@ -22,6 +22,7 @@
* Contributor(s):
* Justin Scott <fligtar@mozilla.com> (Original Author)
* Andrei Hajdukewycz <sancus@off.net>
+ * Wil Clouser <wclouser@mozilla.com>
*
*
* Alternatively, the contents of this file may be used under the terms of
@@ -46,7 +47,7 @@ class Count_Downloads {
var $db;
var $totalSkipped = array('blacklist' => 0, 'SJ' => 0, 'NL' => 0, 'CN' => 0);
var $totalCounted = 0;
- var $counts = array( 'totdown' => array(), 'perday' => array() );
+ var $counts = array( 'totdown' => array(), 'perday' => array(), 'collections_and_addons' => array() );
/**
* Initializes download count parser
@@ -58,6 +59,10 @@ class Count_Downloads {
/**
* Increment relevant properties of each add-on for each date
*
+ //@TODO XXX: Currently the collection ID isn't passed back to us so we have no way of knowing what collection an add-on came from.
+ // Convienently, we only have one collection ID at the moment so this is hardcoded to 1 in this function. This needs to be fixed
+ // before bandwagon starts!
+ *
* @param array $details details from the parsed log line
*/
function count($details) {
@@ -69,43 +74,78 @@ class Count_Downloads {
$this->totalSkipped['blacklist']++;
outputIfVerbose("[DownloadCounter] IP ({$details['ip']}) in blacklist; skipped");
}
- elseif (empty($details['fileid'])) {
- outputIfVerbose('[DownloadCounter] No file id found');
- }
- else {
+
+ $_addon_ids = array();
+
+ if ($details['type'] == 'collections' && !empty($details['addon_ids'])) {
+ if (!empty($this->counts['collections_and_addons'][1]['total'])) {
+ $this->counts['collections_and_addons'][1]['total'] += 1;
+ } else {
+ $this->counts['collections_and_addons'][1]['total'] = 1;
+ }
+ $_addon_ids = $details['addon_ids'];
+ } elseif (!empty($details['fileid'])) {
$addon_id_result = $this->db->query("SELECT versions.addon_id FROM files LEFT JOIN versions ON files.version_id = versions.id WHERE files.id={$details['fileid']}");
$addon_id = mysql_fetch_array($addon_id_result);
-
+
if (!empty($addon_id['addon_id'])) {
- // update total downloads
- if (!empty($this->counts['totdown'][$addon_id['addon_id']])) {
- $this->counts['totdown'][$addon_id['addon_id']] += 1;
- } else {
- $this->counts['totdown'][$addon_id['addon_id']] = 1;
- }
+ $_addon_ids = array($addon_id['addon_id']);
+ }
+ }
- // update per-day
- if ( !isset( $this->counts['perday'][$addon_id['addon_id']] ) ) {
- $this->counts['perday'][$addon_id['addon_id']] = array();
- }
+ if (empty($_addon_ids)) {
+ outputIfVerbose("[DownloadCounter] No add-on ids found in path: {$details['path']}");
+ }
+
+ // Increment stats for each downloaded add-on
+ foreach ($_addon_ids as $_addon_id) {
- if (!empty($this->counts['perday'][$addon_id['addon_id']][date('Y-m-d', $details['unixtime'])])) {
- $this->counts['perday'][$addon_id['addon_id']][date('Y-m-d', $details['unixtime'])] += 1;
+ // If it's a collection it gets counted in that group too
+ if ($details['type'] == 'collections') {
+ // This array is in the format:
+ // [collections_and_addons] => Array
+ // (
+ // [$collection_id] => Array
+ // (
+ // [total] => 4
+ // [addon_ids] => Array
+ // (
+ // [$addon_id] => $total_downloads_of_this_addon_from_that_collection
+ // ...
+ // )
+ // )
+ // )
+ if (!empty($this->counts['collections_and_addons'][1]['addon_ids'][$_addon_id])) {
+ $this->counts['collections_and_addons'][1]['addon_ids'][$_addon_id] += 1;
} else {
- $this->counts['perday'][$addon_id['addon_id']][date('Y-m-d', $details['unixtime'])] = 1;
+ $this->counts['collections_and_addons'][1]['addon_ids'][$_addon_id] = 1;
}
+ }
+
+ // update total downloads
+ if (!empty($this->counts['totdown'][$_addon_id])) {
+ $this->counts['totdown'][$_addon_id] += 1;
+ } else {
+ $this->counts['totdown'][$_addon_id] = 1;
+ }
- $this->totalCounted++;
- outputIfVerbose("[DownloadCounter] Updated count for add-on id: {$addon_id['addon_id']}");
+ // update per-day
+ if ( !isset( $this->counts['perday'][$_addon_id] ) ) {
+ $this->counts['perday'][$_addon_id] = array();
}
- else {
- // Couldn't find add-on associated with the file id (disturbing)
- outputIfVerbose("[DownloadCounter] Could not retrieve add-on ID with file ID: {$details['fileid']}");
+
+ if (!empty($this->counts['perday'][$_addon_id][date('Y-m-d', $details['unixtime'])])) {
+ $this->counts['perday'][$_addon_id][date('Y-m-d', $details['unixtime'])] += 1;
+ } else {
+ $this->counts['perday'][$_addon_id][date('Y-m-d', $details['unixtime'])] = 1;
}
-
- // Blacklist IP from being counted again for 30 seconds
- $this->countedIPs[$details['ip']] = $details['unixtime'];
+
+ $this->totalCounted++;
+ outputIfVerbose("[DownloadCounter] Updated count for add-on id: {$_addon_id}");
}
+
+ // Blacklist IP from being counted again for 30 seconds
+ $this->countedIPs[$details['ip']] = $details['unixtime'];
}
/**
@@ -135,10 +175,19 @@ class Count_Downloads {
}
}
+ // now the collections
+ foreach ( $this->counts['collections_and_addons'] as $collection_id => $details) {
+ $this->db->query("UPDATE collections SET downloads = downloads + {$details['total']} WHERE id={$collection_id} LIMIT 1", true);
+ foreach ($details['addon_ids'] as $addon_id => $count ) {
+ $this->db->query("UPDATE addons_collections SET downloads = downloads + {$count} WHERE addon_id={$addon_id} AND collection_id={$collection_id} LIMIT 1", true);
+ }
+ }
+
// Garbage collection on counts array after the log file is parsed and
// database is updated.
$this->counts['totdown'] = array();
$this->counts['perday'] = array();
+ $this->counts['collections_and_addons'] = array();
}
}
diff --git a/bin/parse_logs/log_parser.class.php b/bin/parse_logs/log_parser.class.php
index 7598f4a..7f0dde7 100644
--- a/bin/parse_logs/log_parser.class.php
+++ b/bin/parse_logs/log_parser.class.php
@@ -22,6 +22,7 @@
* Contributor(s):
* Justin Scott <fligtar@mozilla.com> (Original Author)
* Andrei Hajdukewycz <sancus@off.net>
+ * Wil Clouser <wclouser@mozilla.com>
*
*
* Alternatively, the contents of this file may be used under the terms of
@@ -45,9 +46,20 @@ $datacenters = array(
);
// Include class files
-require_once('../database.class.php');
-require_once('count_downloads.class.php');
-require_once('count_update_pings.class.php');
+$root = dirname(dirname(dirname(__FILE__)));
+require_once "{$root}/bin/database.class.php";
+require_once "{$root}/bin/parse_logs/count_downloads.class.php";
+require_once "{$root}/bin/parse_logs/count_update_pings.class.php";
+
+/**
+ * Outputs message if script has been called with verbose flag
+ */
+function outputIfVerbose($message) {
+ global $verbose;
+
+ if ($verbose)
+ print "{$message}\n";
+}
/**
* Parses the access logs and hands off to counting classes
@@ -130,6 +142,7 @@ class Log_Parser {
* Extracts log to temp file and begins matching line patterns and hands off
* to counters.
*
+ * @todo this function should be split up because it's hard to test this way.
* @param string $logfile the name of the current logfile
*/
function parse($logfile) {
@@ -149,26 +162,20 @@ class Log_Parser {
echo "\n---------- [ Parsing {$logfile} in {$this->geo}] ----------\n";
while ($line = fgets($fp)) {
- // Match line patterns
- preg_match("/^(\S+) (\S+) (\S+) \[([^:]+):(\d+:\d+:\d+) ([^\]]+)\] \"(\S+) (.*?) (\S+)\" (\S+) (\S+) (\".*?\") (\".*?\") (\".*?\")$/", $line, $matches);
+
+ $lineDetails = $this->parseLine($line);
- if (!empty($matches[0])) {
- $lineDetails = $this->getLineDetails($matches);
-
- if (!is_array($lineDetails))
- continue;
+ if (!is_array($lineDetails))
+ continue;
- if ($geoFound = $this->fromMozillaDatacenter($lineDetails['ip'])) {
- $this->counter->totalSkipped[$geoFound]++;
- outputIfVerbose("[{$this->type}Counter] IP ({$lineDetails['ip']}) from Mozilla {$geoFound}; skipped");
- continue;
- }
-
- if ($this->type == $lineDetails['type'])
- $this->counter->count($lineDetails);
+ if ($geoFound = $this->fromMozillaDatacenter($lineDetails['ip'])) {
+ $this->counter->totalSkipped[$geoFound]++;
+ outputIfVerbose("[{$this->type}Counter] IP ({$lineDetails['ip']}) from Mozilla {$geoFound}; skipped");
+ continue;
}
- else
- echo "Could not match log entry to pattern: {$line}\n";
+
+ if ($this->type == $lineDetails['type'])
+ $this->counter->count($lineDetails);
}
// Logfile post-parse callback
@@ -186,6 +193,28 @@ class Log_Parser {
}
/**
+ * Split log line into the chunks we need
+ */
+ function parseLine($line) {
+
+ if (empty($line))
+ return false;
+
+ // Match line patterns
+ preg_match("/^(\S+) (\S+) (\S+) \[([^:]+):(\d+:\d+:\d+) ([^\]]+)\] \"(\S+) (.*?) (\S+)\" (\S+) (\S+) (\".*?\") (\".*?\") (\".*?\")$/", $line, $matches);
+
+ if (empty($matches[0])) {
+ outputIfVerbose("Could not match log entry to pattern: {$line}\n");
+ return false;
+ } else {
+
+ $lineDetails = log_parser::getLineDetails($matches);
+
+ return $lineDetails;
+ }
+ }
+
+ /**
* Breaks pattern matches into relevant descriptions
*/
function getLineDetails($matches) {
@@ -213,7 +242,11 @@ class Log_Parser {
preg_match("/(file|VersionCheck\.php)(\/([0-9]*))?(\?reqVersion=([^&]+)&id=([^&]+)(&version=([^&]+))?(&maxAppVersion=([^&]+))?(&status=([^&]+))?(&appID=([^&]+))?(&appVersion=([^&]+))?(&appOS=([^&]+))?(&appABI=(\S*))?)?/", $log_data['path'], $matches);
if (empty($matches)) {
- return false;
+ // If that first crazy regex fails, let's see if it's a collection
+ preg_match("/(collections)\/success\?i=([0-9,].+)/", $log_data['path'], $matches);
+
+ if (empty($matches))
+ return false;
}
// Set request type
@@ -221,6 +254,8 @@ class Log_Parser {
$log_data['type'] = 'downloads';
elseif ($matches[1] == 'VersionCheck.php')
$log_data['type'] = 'updatepings';
+ elseif ($matches[1] == 'collections')
+ $log_data['type'] = 'collections';
// If a download, get the file id out
if ($log_data['type'] == 'downloads') {
@@ -238,6 +273,13 @@ class Log_Parser {
$log_data['addon']['appOS'] = !empty($matches[18]) ? $matches[18] : null;
$log_data['addon']['appABI'] = !empty($matches[20]) ? $matches[20] : null;
}
+ // If it's a collection update, parse out the add-on ids
+ elseif ($log_data['type'] == 'collections') {
+ $_ids = explode(',', $matches[2]);
+
+ // Filter for numbers. We can use ctype_* because explode() returns strings
+ $log_data['addon_ids'] = array_filter($_ids, 'ctype_digit');
+ }
return $log_data;
}
diff --git a/bin/parse_logs/parse_logs.php b/bin/parse_logs/parse_logs.php
index 9f57f75..d0e80e2 100644
--- a/bin/parse_logs/parse_logs.php
+++ b/bin/parse_logs/parse_logs.php
@@ -22,6 +22,7 @@
* Contributor(s):
* Justin Scott <fligtar@mozilla.com> (Original Author)
* Andrei Hajdukewycz <sancus@off.net>
+ * Wil Clouser <wclouser@mozilla.com>
*
*
* Alternatively, the contents of this file may be used under the terms of
@@ -104,14 +105,4 @@ $end = $end[1] + $end[0];
echo "\nExecution time: ".($end - $start)."\n";
-/**
- * Outputs message if script has been called with verbose flag
- */
-function outputIfVerbose($message) {
- global $verbose;
-
- if ($verbose)
- print "{$message}\n";
-}
-
?>