Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/data/GSOC examples/Opening websites
blob: 97ee16f52d9799ec19d304a24ad4a2a131bdd907 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# This example demonstrates how urllib2 can be used to open websites and read
# some data from them.

import urllib2

# define a function which will open a bunch of links we give it in a list
def open_sites(links):
    sites = []
    for url in urls:
        print "Opening: " + url
        # try to open that site
        try:
            site = urllib2.urlopen(url)
        except:
            # Does an error occur with any of the default urls? 
            # Practice: If so, could you fix it?
            print "An error has occured, skipping " + url
            print
            raw_input("...press enter key to continue...")
            continue
        if site.geturl() != url:
            print "Careful! Site " + url + " has redirected you to " + site.geturl()
        print "Site " + site.geturl() + " is now open."
        print
        sites.append(site)
        raw_input("...press enter key to continue...")
        print
    return sites

url1 = "http://www.google.com"
url2 = "http://www.sugarlabs.org"
url3 = "www.wikipedia.org"
urls = [url1, url2, url3]

sites = open_sites(urls)

print
print "Let's read those sites and find their titles."
print
raw_input("...press enter key to continue...")
print

for site in sites:
    site_content = site.read()
    title_at = site_content.find("<title>") + 7
    print "The title of site at " + site.geturl() + " begins at its index " + str(title_at)
    title_ends = site_content.find("</title>", title_at)
    title = site_content[title_at:title_ends]
    # In Python, \ is the so-called "escape" character. Since some characters have
    # special meanings, like " or ' opening and closing a string, we have to tell
    # the interpreter to ignore such meanings when we wish to put those precise
    # characters in a string (or print them). In the following line, we wish to
    # print the " character so we "escape" it - by putting \ in before it.
    # Practice: What would we have to do to print an escape character \ ? 
    print "The title is: \"" + title + "\""
    print
    # An index of -1 refers to the first element from the end. Thus, this 
    # comparison checks whether the current element is the last one.
    # Practice: Why would we want that?
    if site == sites[-1]:
        raw_input("...press enter to finish..:")
    else:
        raw_input("...press enter key to continue...")
    print