blob: 97ee16f52d9799ec19d304a24ad4a2a131bdd907 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
|
# This example demonstrates how urllib2 can be used to open websites and read
# some data from them.
import urllib2
# define a function which will open a bunch of links we give it in a list
def open_sites(links):
sites = []
for url in urls:
print "Opening: " + url
# try to open that site
try:
site = urllib2.urlopen(url)
except:
# Does an error occur with any of the default urls?
# Practice: If so, could you fix it?
print "An error has occured, skipping " + url
print
raw_input("...press enter key to continue...")
continue
if site.geturl() != url:
print "Careful! Site " + url + " has redirected you to " + site.geturl()
print "Site " + site.geturl() + " is now open."
print
sites.append(site)
raw_input("...press enter key to continue...")
print
return sites
url1 = "http://www.google.com"
url2 = "http://www.sugarlabs.org"
url3 = "www.wikipedia.org"
urls = [url1, url2, url3]
sites = open_sites(urls)
print
print "Let's read those sites and find their titles."
print
raw_input("...press enter key to continue...")
print
for site in sites:
site_content = site.read()
title_at = site_content.find("<title>") + 7
print "The title of site at " + site.geturl() + " begins at its index " + str(title_at)
title_ends = site_content.find("</title>", title_at)
title = site_content[title_at:title_ends]
# In Python, \ is the so-called "escape" character. Since some characters have
# special meanings, like " or ' opening and closing a string, we have to tell
# the interpreter to ignore such meanings when we wish to put those precise
# characters in a string (or print them). In the following line, we wish to
# print the " character so we "escape" it - by putting \ in before it.
# Practice: What would we have to do to print an escape character \ ?
print "The title is: \"" + title + "\""
print
# An index of -1 refers to the first element from the end. Thus, this
# comparison checks whether the current element is the last one.
# Practice: Why would we want that?
if site == sites[-1]:
raw_input("...press enter to finish..:")
else:
raw_input("...press enter key to continue...")
print
|