Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorChris Ball <cjb@laptop.org>2008-06-11 02:56:29 (GMT)
committer Chris Ball <cjb@laptop.org>2008-06-11 02:56:29 (GMT)
commit58f194834480590586de93617f61fa8a14c81e85 (patch)
treed07ff3042065e4ca023e8eb46696e38e53eeca48 /tools
parent5ec1d3d4717116db66c3252548a907676533fecb (diff)
GetPageCounts.pl update.
Diffstat (limited to 'tools')
-rwxr-xr-xtools/GetPageCounts.pl28
1 files changed, 13 insertions, 15 deletions
diff --git a/tools/GetPageCounts.pl b/tools/GetPageCounts.pl
index 7d9fd9b..5b29f9e 100755
--- a/tools/GetPageCounts.pl
+++ b/tools/GetPageCounts.pl
@@ -4,15 +4,18 @@ use URI::Escape;
use warnings;
use strict;
-
# using the traffic stats, get pagecounts for articles
# count pagecounts of redirect pages towards the main page,
# report counts for redirect as the counts of their main page
+# page list and redirect list generated from GetPages.pl
my $pagelist = $ARGV[0];
my $redirectlist = $ARGV[1];
+# traffic stats from file provided by Henrik
my $trafficlist = $ARGV[2];
+# read in page list, store as keys in a "page counts" hash
+# values initally zero, traffic numbers added to them
open(PAGES,$pagelist) or die;
my %pagecounts = ();
while (<PAGES>) {
@@ -21,8 +24,7 @@ while (<PAGES>) {
}
close(PAGES);
-#print "Done reading pages...\n";
-
+# read in redirect list, store as hash key, value pairs
open(REDIRECTS,$redirectlist) or die;
my %redirects = ();
while (<REDIRECTS>) {
@@ -32,38 +34,34 @@ while (<REDIRECTS>) {
}
close(REDIRECTS);
-#print "Done reading redirects...\n";
-
-
+# read traffic stats
open(TRAFFIC,$trafficlist) or die;
while (<TRAFFIC>) {
my @data = split;
my $page = $data[1];
- $page = uri_unescape($page);
- $page =~ s/_/ /g;
- if (exists $redirects{$page}) {
+ $page = uri_unescape($page); # pages need to be unescaped
+ $page =~ s/_/ /g; # and underscores converted
+ if (exists $redirects{$page}) { # if redirect, also add count towards main page
if (exists $pagecounts{$redirects{$page}}) {
$pagecounts{$redirects{$page}} += $data[2];
}
}
- if (exists $pagecounts{$page}) {
+ if (exists $pagecounts{$page}) { # add count to this page
$pagecounts{$page} += $data[2];
} else {
#print "$page doesn't exist on page list!\n";
}
}
+# now output traffic amounts, not ordered
foreach my $page (keys %pagecounts) {
+ # If redirect, print target page's traffic score
if (exists $redirects{$page}) {
if (exists $pagecounts{$redirects{$page}}) {
print "$pagecounts{$redirects{$page}}\t[[$page]]\n";
- } else {
-print "ERROR redirect page $page isn't on list??\n";
- }
+ }
} elsif (exists $pagecounts{$page}) {
print "$pagecounts{$page}\t[[$page]]\n";
- } else {
-print "ERROR page $page not on list??\n";
}
}