Added a Python script which demonstrates the basic use of urllib2.

It opens a few sites, reads their content and locates their titles in that content.
author: Dinko Galetic <dgaletic@everflame.(none)> 2010-06-03 07:43:27 (GMT)
committer: Dinko Galetic <dgaletic@everflame.(none)> 2010-06-03 07:43:27 (GMT)
commit: 313518d63ff2f167069097fbb9f621ca8a6c62fa (patch)
tree: 5a73bd18cf61f0e0a99371794790214bf97bfed5
parent: b42aaca7f79dcd31b349791e1a13183758bf59a9 (diff)
1 files changed, 64 insertions, 0 deletions
diff --git a/data/GSOC examples/Opening websites b/data/GSOC examples/Opening websites
new file mode 100644
index 0000000..97ee16f
--- /dev/null
+++ b/data/GSOC examples/Opening websites
@@ -0,0 +1,64 @@
+# This example demonstrates how urllib2 can be used to open websites and read
+# some data from them.
+
+import urllib2
+
+# define a function which will open a bunch of links we give it in a list
+def open_sites(links):
+    sites = []
+    for url in urls:
+        print "Opening: " + url
+        # try to open that site
+        try:
+            site = urllib2.urlopen(url)
+        except:
+            # Does an error occur with any of the default urls? 
+            # Practice: If so, could you fix it?
+            print "An error has occured, skipping " + url
+            print
+            raw_input("...press enter key to continue...")
+            continue
+        if site.geturl() != url:
+            print "Careful! Site " + url + " has redirected you to " + site.geturl()
+        print "Site " + site.geturl() + " is now open."
+        print
+        sites.append(site)
+        raw_input("...press enter key to continue...")
+        print
+    return sites
+
+url1 = "http://www.google.com"
+url2 = "http://www.sugarlabs.org"
+url3 = "www.wikipedia.org"
+urls = [url1, url2, url3]
+
+sites = open_sites(urls)
+
+print
+print "Let's read those sites and find their titles."
+print
+raw_input("...press enter key to continue...")
+print
+
+for site in sites:
+    site_content = site.read()
+    title_at = site_content.find("<title>") + 7
+    print "The title of site at " + site.geturl() + " begins at its index " + str(title_at)
+    title_ends = site_content.find("</title>", title_at)
+    title = site_content[title_at:title_ends]
+    # In Python, \ is the so-called "escape" character. Since some characters have
+    # special meanings, like " or ' opening and closing a string, we have to tell
+    # the interpreter to ignore such meanings when we wish to put those precise
+    # characters in a string (or print them). In the following line, we wish to
+    # print the " character so we "escape" it - by putting \ in before it.
+    # Practice: What would we have to do to print an escape character \ ? 
+    print "The title is: \"" + title + "\""
+    print
+    # An index of -1 refers to the first element from the end. Thus, this 
+    # comparison checks whether the current element is the last one.
+    # Practice: Why would we want that?
+    if site == sites[-1]:
+        raw_input("...press enter to finish..:")
+    else:
+        raw_input("...press enter key to continue...")
+    print
author	Dinko Galetic <dgaletic@everflame.(none)>	2010-06-03 07:43:27 (GMT)
committer	Dinko Galetic <dgaletic@everflame.(none)>	2010-06-03 07:43:27 (GMT)
commit	313518d63ff2f167069097fbb9f621ca8a6c62fa (patch)
tree	5a73bd18cf61f0e0a99371794790214bf97bfed5
parent	b42aaca7f79dcd31b349791e1a13183758bf59a9 (diff)