Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDinko Galetic <dgaletic@everflame.(none)>2010-06-03 07:43:27 (GMT)
committer Dinko Galetic <dgaletic@everflame.(none)>2010-06-03 07:43:27 (GMT)
commit313518d63ff2f167069097fbb9f621ca8a6c62fa (patch)
tree5a73bd18cf61f0e0a99371794790214bf97bfed5
parentb42aaca7f79dcd31b349791e1a13183758bf59a9 (diff)
Added a Python script which demonstrates the basic use of urllib2.
It opens a few sites, reads their content and locates their titles in that content.
-rw-r--r--data/GSOC examples/Opening websites64
1 files changed, 64 insertions, 0 deletions
diff --git a/data/GSOC examples/Opening websites b/data/GSOC examples/Opening websites
new file mode 100644
index 0000000..97ee16f
--- /dev/null
+++ b/data/GSOC examples/Opening websites
@@ -0,0 +1,64 @@
+# This example demonstrates how urllib2 can be used to open websites and read
+# some data from them.
+
+import urllib2
+
+# define a function which will open a bunch of links we give it in a list
+def open_sites(links):
+ sites = []
+ for url in urls:
+ print "Opening: " + url
+ # try to open that site
+ try:
+ site = urllib2.urlopen(url)
+ except:
+ # Does an error occur with any of the default urls?
+ # Practice: If so, could you fix it?
+ print "An error has occured, skipping " + url
+ print
+ raw_input("...press enter key to continue...")
+ continue
+ if site.geturl() != url:
+ print "Careful! Site " + url + " has redirected you to " + site.geturl()
+ print "Site " + site.geturl() + " is now open."
+ print
+ sites.append(site)
+ raw_input("...press enter key to continue...")
+ print
+ return sites
+
+url1 = "http://www.google.com"
+url2 = "http://www.sugarlabs.org"
+url3 = "www.wikipedia.org"
+urls = [url1, url2, url3]
+
+sites = open_sites(urls)
+
+print
+print "Let's read those sites and find their titles."
+print
+raw_input("...press enter key to continue...")
+print
+
+for site in sites:
+ site_content = site.read()
+ title_at = site_content.find("<title>") + 7
+ print "The title of site at " + site.geturl() + " begins at its index " + str(title_at)
+ title_ends = site_content.find("</title>", title_at)
+ title = site_content[title_at:title_ends]
+ # In Python, \ is the so-called "escape" character. Since some characters have
+ # special meanings, like " or ' opening and closing a string, we have to tell
+ # the interpreter to ignore such meanings when we wish to put those precise
+ # characters in a string (or print them). In the following line, we wish to
+ # print the " character so we "escape" it - by putting \ in before it.
+ # Practice: What would we have to do to print an escape character \ ?
+ print "The title is: \"" + title + "\""
+ print
+ # An index of -1 refers to the first element from the end. Thus, this
+ # comparison checks whether the current element is the last one.
+ # Practice: Why would we want that?
+ if site == sites[-1]:
+ raw_input("...press enter to finish..:")
+ else:
+ raw_input("...press enter key to continue...")
+ print