diff options
author | Chris Ball <cjb@laptop.org> | 2008-06-11 02:56:29 (GMT) |
---|---|---|
committer | Chris Ball <cjb@laptop.org> | 2008-06-11 02:56:29 (GMT) |
commit | 58f194834480590586de93617f61fa8a14c81e85 (patch) | |
tree | d07ff3042065e4ca023e8eb46696e38e53eeca48 /tools | |
parent | 5ec1d3d4717116db66c3252548a907676533fecb (diff) |
GetPageCounts.pl update.
Diffstat (limited to 'tools')
-rwxr-xr-x | tools/GetPageCounts.pl | 28 |
1 files changed, 13 insertions, 15 deletions
diff --git a/tools/GetPageCounts.pl b/tools/GetPageCounts.pl index 7d9fd9b..5b29f9e 100755 --- a/tools/GetPageCounts.pl +++ b/tools/GetPageCounts.pl @@ -4,15 +4,18 @@ use URI::Escape; use warnings; use strict; - # using the traffic stats, get pagecounts for articles # count pagecounts of redirect pages towards the main page, # report counts for redirect as the counts of their main page +# page list and redirect list generated from GetPages.pl my $pagelist = $ARGV[0]; my $redirectlist = $ARGV[1]; +# traffic stats from file provided by Henrik my $trafficlist = $ARGV[2]; +# read in page list, store as keys in a "page counts" hash +# values initally zero, traffic numbers added to them open(PAGES,$pagelist) or die; my %pagecounts = (); while (<PAGES>) { @@ -21,8 +24,7 @@ while (<PAGES>) { } close(PAGES); -#print "Done reading pages...\n"; - +# read in redirect list, store as hash key, value pairs open(REDIRECTS,$redirectlist) or die; my %redirects = (); while (<REDIRECTS>) { @@ -32,38 +34,34 @@ while (<REDIRECTS>) { } close(REDIRECTS); -#print "Done reading redirects...\n"; - - +# read traffic stats open(TRAFFIC,$trafficlist) or die; while (<TRAFFIC>) { my @data = split; my $page = $data[1]; - $page = uri_unescape($page); - $page =~ s/_/ /g; - if (exists $redirects{$page}) { + $page = uri_unescape($page); # pages need to be unescaped + $page =~ s/_/ /g; # and underscores converted + if (exists $redirects{$page}) { # if redirect, also add count towards main page if (exists $pagecounts{$redirects{$page}}) { $pagecounts{$redirects{$page}} += $data[2]; } } - if (exists $pagecounts{$page}) { + if (exists $pagecounts{$page}) { # add count to this page $pagecounts{$page} += $data[2]; } else { #print "$page doesn't exist on page list!\n"; } } +# now output traffic amounts, not ordered foreach my $page (keys %pagecounts) { + # If redirect, print target page's traffic score if (exists $redirects{$page}) { if (exists $pagecounts{$redirects{$page}}) { print "$pagecounts{$redirects{$page}}\t[[$page]]\n"; - } else { -print "ERROR redirect page $page isn't on list??\n"; - } + } } elsif (exists $pagecounts{$page}) { print "$pagecounts{$page}\t[[$page]]\n"; - } else { -print "ERROR page $page not on list??\n"; } } |