Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/tools/templatestats.py
blob: 57ed5ffbb90b1b7bd8aa7fbf3e9b6a012e5ba816 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/usr/bin/python 
#
# Trivial script -- check usage frequency of templates
# (Shows templates have a 'long-tail')
# Usage:
#
#  bzcat -d -c es_PE/es_PE.xml.bz2.processed  | ./templatestats.py > templatestats.txt 
#
# Author: Martin Langhoff <martin@laptop.org>
#
import sys, re

rx = re.compile('\{\{.+?\}\}')
seen = {}

while 1:
    line = sys.stdin.readline()
    if not line:
        break
    m = rx.findall(line)
    for p in m:
        # strip away curly braces
        p = p[2:-2]
        p = re.sub('\{+', '', p)
        if p in seen:
            seen[p] = seen[p]+1
        else:
            seen[p] = 1

order = []
for p in seen.keys():
    order.append(tuple([seen[p], p]))

order.sort(cmp=lambda x,y: cmp(y[0], x[0]))

for p in order:
    print "%i : %s" % p