diff options
author | wclouser@mozilla.com <wclouser@mozilla.com@4eb1ac78-321c-0410-a911-ec516a8615a5> | 2008-12-03 21:18:17 (GMT) |
---|---|---|
committer | wclouser@mozilla.com <wclouser@mozilla.com@4eb1ac78-321c-0410-a911-ec516a8615a5> | 2008-12-03 21:18:17 (GMT) |
commit | db67bb2f3785dfd1fa17464d80f08abcaff5b268 (patch) | |
tree | 05de797a7728f25b37659ebb1495272ecc084d97 /bin/parse_logs | |
parent | 07175d5b9560bd99e0451e7f60720ee6f5facc99 (diff) |
Bug 462814 - Stats collection for Collections
git-svn-id: http://svn.mozilla.org/addons/trunk@20420 4eb1ac78-321c-0410-a911-ec516a8615a5
Diffstat (limited to 'bin/parse_logs')
-rw-r--r-- | bin/parse_logs/count_downloads.class.php | 103 | ||||
-rw-r--r-- | bin/parse_logs/log_parser.class.php | 84 | ||||
-rw-r--r-- | bin/parse_logs/parse_logs.php | 11 |
3 files changed, 140 insertions, 58 deletions
diff --git a/bin/parse_logs/count_downloads.class.php b/bin/parse_logs/count_downloads.class.php index 4eb0989..9db7d61 100644 --- a/bin/parse_logs/count_downloads.class.php +++ b/bin/parse_logs/count_downloads.class.php @@ -22,6 +22,7 @@ * Contributor(s): * Justin Scott <fligtar@mozilla.com> (Original Author) * Andrei Hajdukewycz <sancus@off.net> + * Wil Clouser <wclouser@mozilla.com> * * * Alternatively, the contents of this file may be used under the terms of @@ -46,7 +47,7 @@ class Count_Downloads { var $db; var $totalSkipped = array('blacklist' => 0, 'SJ' => 0, 'NL' => 0, 'CN' => 0); var $totalCounted = 0; - var $counts = array( 'totdown' => array(), 'perday' => array() ); + var $counts = array( 'totdown' => array(), 'perday' => array(), 'collections_and_addons' => array() ); /** * Initializes download count parser @@ -58,6 +59,10 @@ class Count_Downloads { /** * Increment relevant properties of each add-on for each date * + //@TODO XXX: Currently the collection ID isn't passed back to us so we have no way of knowing what collection an add-on came from. + // Convienently, we only have one collection ID at the moment so this is hardcoded to 1 in this function. This needs to be fixed + // before bandwagon starts! + * * @param array $details details from the parsed log line */ function count($details) { @@ -69,43 +74,78 @@ class Count_Downloads { $this->totalSkipped['blacklist']++; outputIfVerbose("[DownloadCounter] IP ({$details['ip']}) in blacklist; skipped"); } - elseif (empty($details['fileid'])) { - outputIfVerbose('[DownloadCounter] No file id found'); - } - else { + + $_addon_ids = array(); + + if ($details['type'] == 'collections' && !empty($details['addon_ids'])) { + if (!empty($this->counts['collections_and_addons'][1]['total'])) { + $this->counts['collections_and_addons'][1]['total'] += 1; + } else { + $this->counts['collections_and_addons'][1]['total'] = 1; + } + $_addon_ids = $details['addon_ids']; + } elseif (!empty($details['fileid'])) { $addon_id_result = $this->db->query("SELECT versions.addon_id FROM files LEFT JOIN versions ON files.version_id = versions.id WHERE files.id={$details['fileid']}"); $addon_id = mysql_fetch_array($addon_id_result); - + if (!empty($addon_id['addon_id'])) { - // update total downloads - if (!empty($this->counts['totdown'][$addon_id['addon_id']])) { - $this->counts['totdown'][$addon_id['addon_id']] += 1; - } else { - $this->counts['totdown'][$addon_id['addon_id']] = 1; - } + $_addon_ids = array($addon_id['addon_id']); + } + } - // update per-day - if ( !isset( $this->counts['perday'][$addon_id['addon_id']] ) ) { - $this->counts['perday'][$addon_id['addon_id']] = array(); - } + if (empty($_addon_ids)) { + outputIfVerbose("[DownloadCounter] No add-on ids found in path: {$details['path']}"); + } + + // Increment stats for each downloaded add-on + foreach ($_addon_ids as $_addon_id) { - if (!empty($this->counts['perday'][$addon_id['addon_id']][date('Y-m-d', $details['unixtime'])])) { - $this->counts['perday'][$addon_id['addon_id']][date('Y-m-d', $details['unixtime'])] += 1; + // If it's a collection it gets counted in that group too + if ($details['type'] == 'collections') { + // This array is in the format: + // [collections_and_addons] => Array + // ( + // [$collection_id] => Array + // ( + // [total] => 4 + // [addon_ids] => Array + // ( + // [$addon_id] => $total_downloads_of_this_addon_from_that_collection + // ... + // ) + // ) + // ) + if (!empty($this->counts['collections_and_addons'][1]['addon_ids'][$_addon_id])) { + $this->counts['collections_and_addons'][1]['addon_ids'][$_addon_id] += 1; } else { - $this->counts['perday'][$addon_id['addon_id']][date('Y-m-d', $details['unixtime'])] = 1; + $this->counts['collections_and_addons'][1]['addon_ids'][$_addon_id] = 1; } + } + + // update total downloads + if (!empty($this->counts['totdown'][$_addon_id])) { + $this->counts['totdown'][$_addon_id] += 1; + } else { + $this->counts['totdown'][$_addon_id] = 1; + } - $this->totalCounted++; - outputIfVerbose("[DownloadCounter] Updated count for add-on id: {$addon_id['addon_id']}"); + // update per-day + if ( !isset( $this->counts['perday'][$_addon_id] ) ) { + $this->counts['perday'][$_addon_id] = array(); } - else { - // Couldn't find add-on associated with the file id (disturbing) - outputIfVerbose("[DownloadCounter] Could not retrieve add-on ID with file ID: {$details['fileid']}"); + + if (!empty($this->counts['perday'][$_addon_id][date('Y-m-d', $details['unixtime'])])) { + $this->counts['perday'][$_addon_id][date('Y-m-d', $details['unixtime'])] += 1; + } else { + $this->counts['perday'][$_addon_id][date('Y-m-d', $details['unixtime'])] = 1; } - - // Blacklist IP from being counted again for 30 seconds - $this->countedIPs[$details['ip']] = $details['unixtime']; + + $this->totalCounted++; + outputIfVerbose("[DownloadCounter] Updated count for add-on id: {$_addon_id}"); } + + // Blacklist IP from being counted again for 30 seconds + $this->countedIPs[$details['ip']] = $details['unixtime']; } /** @@ -135,10 +175,19 @@ class Count_Downloads { } } + // now the collections + foreach ( $this->counts['collections_and_addons'] as $collection_id => $details) { + $this->db->query("UPDATE collections SET downloads = downloads + {$details['total']} WHERE id={$collection_id} LIMIT 1", true); + foreach ($details['addon_ids'] as $addon_id => $count ) { + $this->db->query("UPDATE addons_collections SET downloads = downloads + {$count} WHERE addon_id={$addon_id} AND collection_id={$collection_id} LIMIT 1", true); + } + } + // Garbage collection on counts array after the log file is parsed and // database is updated. $this->counts['totdown'] = array(); $this->counts['perday'] = array(); + $this->counts['collections_and_addons'] = array(); } } diff --git a/bin/parse_logs/log_parser.class.php b/bin/parse_logs/log_parser.class.php index 7598f4a..7f0dde7 100644 --- a/bin/parse_logs/log_parser.class.php +++ b/bin/parse_logs/log_parser.class.php @@ -22,6 +22,7 @@ * Contributor(s): * Justin Scott <fligtar@mozilla.com> (Original Author) * Andrei Hajdukewycz <sancus@off.net> + * Wil Clouser <wclouser@mozilla.com> * * * Alternatively, the contents of this file may be used under the terms of @@ -45,9 +46,20 @@ $datacenters = array( ); // Include class files -require_once('../database.class.php'); -require_once('count_downloads.class.php'); -require_once('count_update_pings.class.php'); +$root = dirname(dirname(dirname(__FILE__))); +require_once "{$root}/bin/database.class.php"; +require_once "{$root}/bin/parse_logs/count_downloads.class.php"; +require_once "{$root}/bin/parse_logs/count_update_pings.class.php"; + +/** + * Outputs message if script has been called with verbose flag + */ +function outputIfVerbose($message) { + global $verbose; + + if ($verbose) + print "{$message}\n"; +} /** * Parses the access logs and hands off to counting classes @@ -130,6 +142,7 @@ class Log_Parser { * Extracts log to temp file and begins matching line patterns and hands off * to counters. * + * @todo this function should be split up because it's hard to test this way. * @param string $logfile the name of the current logfile */ function parse($logfile) { @@ -149,26 +162,20 @@ class Log_Parser { echo "\n---------- [ Parsing {$logfile} in {$this->geo}] ----------\n"; while ($line = fgets($fp)) { - // Match line patterns - preg_match("/^(\S+) (\S+) (\S+) \[([^:]+):(\d+:\d+:\d+) ([^\]]+)\] \"(\S+) (.*?) (\S+)\" (\S+) (\S+) (\".*?\") (\".*?\") (\".*?\")$/", $line, $matches); + + $lineDetails = $this->parseLine($line); - if (!empty($matches[0])) { - $lineDetails = $this->getLineDetails($matches); - - if (!is_array($lineDetails)) - continue; + if (!is_array($lineDetails)) + continue; - if ($geoFound = $this->fromMozillaDatacenter($lineDetails['ip'])) { - $this->counter->totalSkipped[$geoFound]++; - outputIfVerbose("[{$this->type}Counter] IP ({$lineDetails['ip']}) from Mozilla {$geoFound}; skipped"); - continue; - } - - if ($this->type == $lineDetails['type']) - $this->counter->count($lineDetails); + if ($geoFound = $this->fromMozillaDatacenter($lineDetails['ip'])) { + $this->counter->totalSkipped[$geoFound]++; + outputIfVerbose("[{$this->type}Counter] IP ({$lineDetails['ip']}) from Mozilla {$geoFound}; skipped"); + continue; } - else - echo "Could not match log entry to pattern: {$line}\n"; + + if ($this->type == $lineDetails['type']) + $this->counter->count($lineDetails); } // Logfile post-parse callback @@ -186,6 +193,28 @@ class Log_Parser { } /** + * Split log line into the chunks we need + */ + function parseLine($line) { + + if (empty($line)) + return false; + + // Match line patterns + preg_match("/^(\S+) (\S+) (\S+) \[([^:]+):(\d+:\d+:\d+) ([^\]]+)\] \"(\S+) (.*?) (\S+)\" (\S+) (\S+) (\".*?\") (\".*?\") (\".*?\")$/", $line, $matches); + + if (empty($matches[0])) { + outputIfVerbose("Could not match log entry to pattern: {$line}\n"); + return false; + } else { + + $lineDetails = log_parser::getLineDetails($matches); + + return $lineDetails; + } + } + + /** * Breaks pattern matches into relevant descriptions */ function getLineDetails($matches) { @@ -213,7 +242,11 @@ class Log_Parser { preg_match("/(file|VersionCheck\.php)(\/([0-9]*))?(\?reqVersion=([^&]+)&id=([^&]+)(&version=([^&]+))?(&maxAppVersion=([^&]+))?(&status=([^&]+))?(&appID=([^&]+))?(&appVersion=([^&]+))?(&appOS=([^&]+))?(&appABI=(\S*))?)?/", $log_data['path'], $matches); if (empty($matches)) { - return false; + // If that first crazy regex fails, let's see if it's a collection + preg_match("/(collections)\/success\?i=([0-9,].+)/", $log_data['path'], $matches); + + if (empty($matches)) + return false; } // Set request type @@ -221,6 +254,8 @@ class Log_Parser { $log_data['type'] = 'downloads'; elseif ($matches[1] == 'VersionCheck.php') $log_data['type'] = 'updatepings'; + elseif ($matches[1] == 'collections') + $log_data['type'] = 'collections'; // If a download, get the file id out if ($log_data['type'] == 'downloads') { @@ -238,6 +273,13 @@ class Log_Parser { $log_data['addon']['appOS'] = !empty($matches[18]) ? $matches[18] : null; $log_data['addon']['appABI'] = !empty($matches[20]) ? $matches[20] : null; } + // If it's a collection update, parse out the add-on ids + elseif ($log_data['type'] == 'collections') { + $_ids = explode(',', $matches[2]); + + // Filter for numbers. We can use ctype_* because explode() returns strings + $log_data['addon_ids'] = array_filter($_ids, 'ctype_digit'); + } return $log_data; } diff --git a/bin/parse_logs/parse_logs.php b/bin/parse_logs/parse_logs.php index 9f57f75..d0e80e2 100644 --- a/bin/parse_logs/parse_logs.php +++ b/bin/parse_logs/parse_logs.php @@ -22,6 +22,7 @@ * Contributor(s): * Justin Scott <fligtar@mozilla.com> (Original Author) * Andrei Hajdukewycz <sancus@off.net> + * Wil Clouser <wclouser@mozilla.com> * * * Alternatively, the contents of this file may be used under the terms of @@ -104,14 +105,4 @@ $end = $end[1] + $end[0]; echo "\nExecution time: ".($end - $start)."\n"; -/** - * Outputs message if script has been called with verbose flag - */ -function outputIfVerbose($message) { - global $verbose; - - if ($verbose) - print "{$message}\n"; -} - ?> |