diff options
author | Gonzalo Odiard <godiard@gmail.com> | 2013-04-11 03:18:30 (GMT) |
---|---|---|
committer | Gonzalo Odiard <godiard@gmail.com> | 2013-04-11 03:18:30 (GMT) |
commit | 8f2a9b96ee10e1c84ddc2587c1d3e4395c642bb4 (patch) | |
tree | 61851fcd06795c0691cc8e2bf6ab98a4594c3da6 | |
parent | e9d682735efc52790a07ee0bf012805d1b2065b7 (diff) |
Add a script to get stats parsing the pages from aslo
This first version write a ";" separated values to the standard output
The data is the activity code, name, last update date & user and
compatibility from the last version.
Signed-off-by: Gonzalo Odiard <gonzalo@laptop.org>
-rw-r--r-- | statistics/activities_aslo_stats.py | 58 |
1 files changed, 58 insertions, 0 deletions
diff --git a/statistics/activities_aslo_stats.py b/statistics/activities_aslo_stats.py new file mode 100644 index 0000000..60fa9c5 --- /dev/null +++ b/statistics/activities_aslo_stats.py @@ -0,0 +1,58 @@ +from urllib2 import urlopen + +from BeautifulSoup import BeautifulSoup as bs + +html_act_index = urlopen('http://download.sugarlabs.org/activities/') +sopa_index = bs(html_act_index) +pre = sopa_index.find('pre') + +for link in pre.findAll('a'): + activity_code = link.text.replace('/', '') + + if activity_code != link.text: + + date = link.nextSibling.strip() + date = date[:date.find(' ')] + # search the activity uploader + act_url = 'http://activities.sugarlabs.org/es-ES/sugar/addons/versions/%s' % activity_code + + uploader = '' + activity_name = '' + compat_from = '' + compat_to = '' + try: + sopa_act = bs(urlopen(act_url)) + try: + uploader_div = sopa_act.find('div', {'class': 'uploader'}) + uploader = uploader_div.find('a').text + except: + pass + if uploader in ('nickname', ''): + # try reading from http://activities.sugarlabs.org/es-ES/sugar/addon/%s + try: + act_main_url = 'http://activities.sugarlabs.org/es-ES/sugar/addon/%s' % activity_code + sopa_act_main = bs(urlopen(act_main_url)) + uploader = sopa_act_main.find('a', {'class': 'profileLink'}).text + except: + pass + + try: + main_div = sopa_act.find('div', role='main') + activity_name = main_div.find('a', href='/es-ES/sugar/addon/%s' % activity_code).text + except: + pass + try: + compat_div = sopa_act.find('div', {'class': 'app_compat'}) + compat = compat_div.find('li').text.replace('–', '-') + compat = compat.replace('Sugar:', '') + compat_from = compat[:compat.find('-')].strip().replace('.',',') + compat_to = compat[compat.find('-') + 1:].strip().replace('.',',') + + except: + pass + except: + pass + + if activity_name != '' and uploader != '': + print "%s;%s;%s;%s;%s;%s" % (activity_code, date, activity_name, uploader, compat_from, compat_to) + |