blob: 5b29f9e3a75101b3994cf7f08f46e5940286cadb (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
|
#!/usr/bin/perl
use URI::Escape;
use warnings;
use strict;
# using the traffic stats, get pagecounts for articles
# count pagecounts of redirect pages towards the main page,
# report counts for redirect as the counts of their main page
# page list and redirect list generated from GetPages.pl
my $pagelist = $ARGV[0];
my $redirectlist = $ARGV[1];
# traffic stats from file provided by Henrik
my $trafficlist = $ARGV[2];
# read in page list, store as keys in a "page counts" hash
# values initally zero, traffic numbers added to them
open(PAGES,$pagelist) or die;
my %pagecounts = ();
while (<PAGES>) {
chomp;
$pagecounts{$_} = 0;
}
close(PAGES);
# read in redirect list, store as hash key, value pairs
open(REDIRECTS,$redirectlist) or die;
my %redirects = ();
while (<REDIRECTS>) {
if (/\[\[(.*)\]\]\s*\[\[(.*)\]\]/) {
$redirects{$1} = $2;
}
}
close(REDIRECTS);
# read traffic stats
open(TRAFFIC,$trafficlist) or die;
while (<TRAFFIC>) {
my @data = split;
my $page = $data[1];
$page = uri_unescape($page); # pages need to be unescaped
$page =~ s/_/ /g; # and underscores converted
if (exists $redirects{$page}) { # if redirect, also add count towards main page
if (exists $pagecounts{$redirects{$page}}) {
$pagecounts{$redirects{$page}} += $data[2];
}
}
if (exists $pagecounts{$page}) { # add count to this page
$pagecounts{$page} += $data[2];
} else {
#print "$page doesn't exist on page list!\n";
}
}
# now output traffic amounts, not ordered
foreach my $page (keys %pagecounts) {
# If redirect, print target page's traffic score
if (exists $redirects{$page}) {
if (exists $pagecounts{$redirects{$page}}) {
print "$pagecounts{$redirects{$page}}\t[[$page]]\n";
}
} elsif (exists $pagecounts{$page}) {
print "$pagecounts{$page}\t[[$page]]\n";
}
}
|