1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
|
from urllib2 import urlopen
from BeautifulSoup import BeautifulSoup as bs
html_act_index = urlopen('http://download.sugarlabs.org/activities/')
sopa_index = bs(html_act_index)
pre = sopa_index.find('pre')
for link in pre.findAll('a'):
activity_code = link.text.replace('/', '')
if activity_code != link.text:
date = link.nextSibling.strip()
date = date[:date.find(' ')]
# search the activity uploader
act_url = 'http://activities.sugarlabs.org/es-ES/sugar/addons/versions/%s' % activity_code
uploader = ''
activity_name = ''
compat_from = ''
compat_to = ''
try:
sopa_act = bs(urlopen(act_url))
try:
uploader_div = sopa_act.find('div', {'class': 'uploader'})
uploader = uploader_div.find('a').text
except:
pass
if uploader in ('nickname', ''):
# try reading from http://activities.sugarlabs.org/es-ES/sugar/addon/%s
try:
act_main_url = 'http://activities.sugarlabs.org/es-ES/sugar/addon/%s' % activity_code
sopa_act_main = bs(urlopen(act_main_url))
uploader = sopa_act_main.find('a', {'class': 'profileLink'}).text
except:
pass
try:
main_div = sopa_act.find('div', role='main')
activity_name = main_div.find('a', href='/es-ES/sugar/addon/%s' % activity_code).text
except:
pass
try:
compat_div = sopa_act.find('div', {'class': 'app_compat'})
compat = compat_div.find('li').text.replace('–', '-')
compat = compat.replace('Sugar:', '')
compat_from = compat[:compat.find('-')].strip().replace('.',',')
compat_to = compat[compat.find('-') + 1:].strip().replace('.',',')
except:
pass
except:
pass
if activity_name != '' and uploader != '':
print "%s;%s;%s;%s;%s;%s" % (activity_code, date, activity_name, uploader, compat_from, compat_to)
|