initial commitHEAD master

author: Tony Anderson <tony_anderson@usa.net> 2011-04-25 10:33:11 (GMT)
committer: Tony Anderson <tony_anderson@usa.net> 2011-04-25 10:33:11 (GMT)
commit: b9a2719691b4c6cf83f31eb0b6c3e7e878524c0e (patch)
tree: f9c01373e56e9c1c749dfe9b1bf7c7482e05eb77
19 files changed, 5425 insertions, 0 deletions
diff --git a/BeautifulSoup.py b/BeautifulSoup.py
new file mode 100755
index 0000000..28e3e96
--- /dev/null
+++ b/BeautifulSoup.py
@@ -0,0 +1,2011 @@
+"""Beautiful Soup
+Elixir and Tonic
+"The Screen-Scraper's Friend"
+http://www.crummy.com/software/BeautifulSoup/
+
+Beautiful Soup parses a (possibly invalid) XML or HTML document into a
+tree representation. It provides methods and Pythonic idioms that make
+it easy to navigate, search, and modify the tree.
+
+A well-formed XML/HTML document yields a well-formed data
+structure. An ill-formed XML/HTML document yields a correspondingly
+ill-formed data structure. If your document is only locally
+well-formed, you can use this library to find and process the
+well-formed part of it.
+
+Beautiful Soup works with Python 2.2 and up. It has no external
+dependencies, but you'll have more success at converting data to UTF-8
+if you also install these three packages:
+
+* chardet, for auto-detecting character encodings
+  http://chardet.feedparser.org/
+* cjkcodecs and iconv_codec, which add more encodings to the ones supported
+  by stock Python.
+  http://cjkpython.i18n.org/
+
+Beautiful Soup defines classes for two main parsing strategies:
+
+ * BeautifulStoneSoup, for parsing XML, SGML, or your domain-specific
+   language that kind of looks like XML.
+
+ * BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid
+   or invalid. This class has web browser-like heuristics for
+   obtaining a sensible parse tree in the face of common HTML errors.
+
+Beautiful Soup also defines a class (UnicodeDammit) for autodetecting
+the encoding of an HTML or XML document, and converting it to
+Unicode. Much of this code is taken from Mark Pilgrim's Universal Feed Parser.
+
+For more than you ever wanted to know about Beautiful Soup, see the
+documentation:
+http://www.crummy.com/software/BeautifulSoup/documentation.html
+
+Here, have some legalese:
+
+Copyright (c) 2004-2009, Leonard Richardson
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+  * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+
+  * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following
+    disclaimer in the documentation and/or other materials provided
+    with the distribution.
+
+  * Neither the name of the the Beautiful Soup Consortium and All
+    Night Kosher Bakery nor the names of its contributors may be
+    used to endorse or promote products derived from this software
+    without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT.
+
+"""
+from __future__ import generators
+
+__author__ = "Leonard Richardson (leonardr@segfault.org)"
+__version__ = "3.0.8"
+__copyright__ = "Copyright (c) 2004-2009 Leonard Richardson"
+__license__ = "New-style BSD"
+
+from sgmllib import SGMLParser, SGMLParseError
+import codecs
+import markupbase
+import types
+import re
+import sgmllib
+try:
+  from htmlentitydefs import name2codepoint
+except ImportError:
+  name2codepoint = {}
+try:
+    set
+except NameError:
+    from sets import Set as set
+
+#These hacks make Beautiful Soup able to parse XML with namespaces
+sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*')
+markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match
+
+DEFAULT_OUTPUT_ENCODING = "utf-8"
+
+def _match_css_class(str):
+    """Build a RE to match the given CSS class."""
+    return re.compile(r"(^|.*\s)%s($|\s)" % str)
+
+# First, the classes that represent markup elements.
+
+class PageElement(object):
+    """Contains the navigational information for some part of the page
+    (either a tag or a piece of text)"""
+
+    def setup(self, parent=None, previous=None):
+        """Sets up the initial relations between this element and
+        other elements."""
+        self.parent = parent
+        self.previous = previous
+        self.next = None
+        self.previousSibling = None
+        self.nextSibling = None
+        if self.parent and self.parent.contents:
+            self.previousSibling = self.parent.contents[-1]
+            self.previousSibling.nextSibling = self
+
+    def replaceWith(self, replaceWith):
+        oldParent = self.parent
+        myIndex = self.parent.index(self)
+        if hasattr(replaceWith, "parent")\
+                  and replaceWith.parent is self.parent:
+            # We're replacing this element with one of its siblings.
+            index = replaceWith.parent.index(replaceWith)
+            if index and index < myIndex:
+                # Furthermore, it comes before this element. That
+                # means that when we extract it, the index of this
+                # element will change.
+                myIndex = myIndex - 1
+        self.extract()
+        oldParent.insert(myIndex, replaceWith)
+
+    def replaceWithChildren(self):
+        myParent = self.parent
+        myIndex = self.parent.index(self)
+        self.extract()
+        reversedChildren = list(self.contents)
+        reversedChildren.reverse()
+        for child in reversedChildren:
+            myParent.insert(myIndex, child)
+
+    def extract(self):
+        """Destructively rips this element out of the tree."""
+        if self.parent:
+            try:
+                del self.parent.contents[self.parent.index(self)]
+            except ValueError:
+                pass
+
+        #Find the two elements that would be next to each other if
+        #this element (and any children) hadn't been parsed. Connect
+        #the two.
+        lastChild = self._lastRecursiveChild()
+        nextElement = lastChild.next
+
+        if self.previous:
+            self.previous.next = nextElement
+        if nextElement:
+            nextElement.previous = self.previous
+        self.previous = None
+        lastChild.next = None
+
+        self.parent = None
+        if self.previousSibling:
+            self.previousSibling.nextSibling = self.nextSibling
+        if self.nextSibling:
+            self.nextSibling.previousSibling = self.previousSibling
+        self.previousSibling = self.nextSibling = None
+        return self
+
+    def _lastRecursiveChild(self):
+        "Finds the last element beneath this object to be parsed."
+        lastChild = self
+        while hasattr(lastChild, 'contents') and lastChild.contents:
+            lastChild = lastChild.contents[-1]
+        return lastChild
+
+    def insert(self, position, newChild):
+        if isinstance(newChild, basestring) \
+            and not isinstance(newChild, NavigableString):
+            newChild = NavigableString(newChild)
+
+        position =  min(position, len(self.contents))
+        if hasattr(newChild, 'parent') and newChild.parent is not None:
+            # We're 'inserting' an element that's already one
+            # of this object's children.
+            if newChild.parent is self:
+                index = self.index(newChild)
+                if index > position:
+                    # Furthermore we're moving it further down the
+                    # list of this object's children. That means that
+                    # when we extract this element, our target index
+                    # will jump down one.
+                    position = position - 1
+            newChild.extract()
+
+        newChild.parent = self
+        previousChild = None
+        if position == 0:
+            newChild.previousSibling = None
+            newChild.previous = self
+        else:
+            previousChild = self.contents[position-1]
+            newChild.previousSibling = previousChild
+            newChild.previousSibling.nextSibling = newChild
+            newChild.previous = previousChild._lastRecursiveChild()
+        if newChild.previous:
+            newChild.previous.next = newChild
+
+        newChildsLastElement = newChild._lastRecursiveChild()
+
+        if position >= len(self.contents):
+            newChild.nextSibling = None
+
+            parent = self
+            parentsNextSibling = None
+            while not parentsNextSibling:
+                parentsNextSibling = parent.nextSibling
+                parent = parent.parent
+                if not parent: # This is the last element in the document.
+                    break
+            if parentsNextSibling:
+                newChildsLastElement.next = parentsNextSibling
+            else:
+                newChildsLastElement.next = None
+        else:
+            nextChild = self.contents[position]
+            newChild.nextSibling = nextChild
+            if newChild.nextSibling:
+                newChild.nextSibling.previousSibling = newChild
+            newChildsLastElement.next = nextChild
+
+        if newChildsLastElement.next:
+            newChildsLastElement.next.previous = newChildsLastElement
+        self.contents.insert(position, newChild)
+
+    def append(self, tag):
+        """Appends the given tag to the contents of this tag."""
+        self.insert(len(self.contents), tag)
+
+    def findNext(self, name=None, attrs={}, text=None, **kwargs):
+        """Returns the first item that matches the given criteria and
+        appears after this Tag in the document."""
+        return self._findOne(self.findAllNext, name, attrs, text, **kwargs)
+
+    def findAllNext(self, name=None, attrs={}, text=None, limit=None,
+                    **kwargs):
+        """Returns all items that match the given criteria and appear
+        after this Tag in the document."""
+        return self._findAll(name, attrs, text, limit, self.nextGenerator,
+                             **kwargs)
+
+    def findNextSibling(self, name=None, attrs={}, text=None, **kwargs):
+        """Returns the closest sibling to this Tag that matches the
+        given criteria and appears after this Tag in the document."""
+        return self._findOne(self.findNextSiblings, name, attrs, text,
+                             **kwargs)
+
+    def findNextSiblings(self, name=None, attrs={}, text=None, limit=None,
+                         **kwargs):
+        """Returns the siblings of this Tag that match the given
+        criteria and appear after this Tag in the document."""
+        return self._findAll(name, attrs, text, limit,
+                             self.nextSiblingGenerator, **kwargs)
+    fetchNextSiblings = findNextSiblings # Compatibility with pre-3.x
+
+    def findPrevious(self, name=None, attrs={}, text=None, **kwargs):
+        """Returns the first item that matches the given criteria and
+        appears before this Tag in the document."""
+        return self._findOne(self.findAllPrevious, name, attrs, text, **kwargs)
+
+    def findAllPrevious(self, name=None, attrs={}, text=None, limit=None,
+                        **kwargs):
+        """Returns all items that match the given criteria and appear
+        before this Tag in the document."""
+        return self._findAll(name, attrs, text, limit, self.previousGenerator,
+                           **kwargs)
+    fetchPrevious = findAllPrevious # Compatibility with pre-3.x
+
+    def findPreviousSibling(self, name=None, attrs={}, text=None, **kwargs):
+        """Returns the closest sibling to this Tag that matches the
+        given criteria and appears before this Tag in the document."""
+        return self._findOne(self.findPreviousSiblings, name, attrs, text,
+                             **kwargs)
+
+    def findPreviousSiblings(self, name=None, attrs={}, text=None,
+                             limit=None, **kwargs):
+        """Returns the siblings of this Tag that match the given
+        criteria and appear before this Tag in the document."""
+        return self._findAll(name, attrs, text, limit,
+                             self.previousSiblingGenerator, **kwargs)
+    fetchPreviousSiblings = findPreviousSiblings # Compatibility with pre-3.x
+
+    def findParent(self, name=None, attrs={}, **kwargs):
+        """Returns the closest parent of this Tag that matches the given
+        criteria."""
+        # NOTE: We can't use _findOne because findParents takes a different
+        # set of arguments.
+        r = None
+        l = self.findParents(name, attrs, 1)
+        if l:
+            r = l[0]
+        return r
+
+    def findParents(self, name=None, attrs={}, limit=None, **kwargs):
+        """Returns the parents of this Tag that match the given
+        criteria."""
+
+        return self._findAll(name, attrs, None, limit, self.parentGenerator,
+                             **kwargs)
+    fetchParents = findParents # Compatibility with pre-3.x
+
+    #These methods do the real heavy lifting.
+
+    def _findOne(self, method, name, attrs, text, **kwargs):
+        r = None
+        l = method(name, attrs, text, 1, **kwargs)
+        if l:
+            r = l[0]
+        return r
+
+    def _findAll(self, name, attrs, text, limit, generator, **kwargs):
+        "Iterates over a generator looking for things that match."
+
+        if isinstance(name, SoupStrainer):
+            strainer = name
+        # Special case some findAll* searches
+        # findAll*(True)
+        elif not limit and name is True and not attrs and not kwargs:
+            return [element for element in generator()
+                    if isinstance(element, Tag)]
+
+        # findAll*('tag-name')
+        elif not limit and isinstance(name, basestring) and not attrs \
+                and not kwargs:
+            return [element for element in generator()
+                    if isinstance(element, Tag) and element.name == name]
+
+        # Build a SoupStrainer
+        else:
+            strainer = SoupStrainer(name, attrs, text, **kwargs)
+        results = ResultSet(strainer)
+        g = generator()
+        while True:
+            try:
+                i = g.next()
+            except StopIteration:
+                break
+            if i:
+                found = strainer.search(i)
+                if found:
+                    results.append(found)
+                    if limit and len(results) >= limit:
+                        break
+        return results
+
+    #These Generators can be used to navigate starting from both
+    #NavigableStrings and Tags.
+    def nextGenerator(self):
+        i = self
+        while i is not None:
+            i = i.next
+            yield i
+
+    def nextSiblingGenerator(self):
+        i = self
+        while i is not None:
+            i = i.nextSibling
+            yield i
+
+    def previousGenerator(self):
+        i = self
+        while i is not None:
+            i = i.previous
+            yield i
+
+    def previousSiblingGenerator(self):
+        i = self
+        while i is not None:
+            i = i.previousSibling
+            yield i
+
+    def parentGenerator(self):
+        i = self
+        while i is not None:
+            i = i.parent
+            yield i
+
+    # Utility methods
+    def substituteEncoding(self, str, encoding=None):
+        encoding = encoding or "utf-8"
+        return str.replace("%SOUP-ENCODING%", encoding)
+
+    def toEncoding(self, s, encoding=None):
+        """Encodes an object to a string in some encoding, or to Unicode.
+        ."""
+        if isinstance(s, unicode):
+            if encoding:
+                s = s.encode(encoding)
+        elif isinstance(s, str):
+            if encoding:
+                s = s.encode(encoding)
+            else:
+                s = unicode(s)
+        else:
+            if encoding:
+                s  = self.toEncoding(str(s), encoding)
+            else:
+                s = unicode(s)
+        return s
+
+class NavigableString(unicode, PageElement):
+
+    def __new__(cls, value):
+        """Create a new NavigableString.
+
+        When unpickling a NavigableString, this method is called with
+        the string in DEFAULT_OUTPUT_ENCODING. That encoding needs to be
+        passed in to the superclass's __new__ or the superclass won't know
+        how to handle non-ASCII characters.
+        """
+        if isinstance(value, unicode):
+            return unicode.__new__(cls, value)
+        return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
+
+    def __getnewargs__(self):
+        return (NavigableString.__str__(self),)
+
+    def __getattr__(self, attr):
+        """text.string gives you text. This is for backwards
+        compatibility for Navigable*String, but for CData* it lets you
+        get the string without the CData wrapper."""
+        if attr == 'string':
+            return self
+        else:
+            raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr)
+
+    def __unicode__(self):
+        return str(self).decode(DEFAULT_OUTPUT_ENCODING)
+
+    def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):
+        if encoding:
+            return self.encode(encoding)
+        else:
+            return self
+
+class CData(NavigableString):
+
+    def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):
+        return "<![CDATA[%s]]>" % NavigableString.__str__(self, encoding)
+
+class ProcessingInstruction(NavigableString):
+    def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):
+        output = self
+        if "%SOUP-ENCODING%" in output:
+            output = self.substituteEncoding(output, encoding)
+        return "<?%s?>" % self.toEncoding(output, encoding)
+
+class Comment(NavigableString):
+    def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):
+        return "<!--%s-->" % NavigableString.__str__(self, encoding)
+
+class Declaration(NavigableString):
+    def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):
+        return "<!%s>" % NavigableString.__str__(self, encoding)
+
+class Tag(PageElement):
+
+    """Represents a found HTML tag with its attributes and contents."""
+
+    def _invert(h):
+        "Cheap function to invert a hash."
+        i = {}
+        for k,v in h.items():
+            i[v] = k
+        return i
+
+    XML_ENTITIES_TO_SPECIAL_CHARS = { "apos" : "'",
+                                      "quot" : '"',
+                                      "amp" : "&",
+                                      "lt" : "<",
+                                      "gt" : ">" }
+
+    XML_SPECIAL_CHARS_TO_ENTITIES = _invert(XML_ENTITIES_TO_SPECIAL_CHARS)
+
+    def _convertEntities(self, match):
+        """Used in a call to re.sub to replace HTML, XML, and numeric
+        entities with the appropriate Unicode characters. If HTML
+        entities are being converted, any unrecognized entities are
+        escaped."""
+        x = match.group(1)
+        if self.convertHTMLEntities and x in name2codepoint:
+            return unichr(name2codepoint[x])
+        elif x in self.XML_ENTITIES_TO_SPECIAL_CHARS:
+            if self.convertXMLEntities:
+                return self.XML_ENTITIES_TO_SPECIAL_CHARS[x]
+            else:
+                return u'&%s;' % x
+        elif len(x) > 0 and x[0] == '#':
+            # Handle numeric entities
+            if len(x) > 1 and x[1] == 'x':
+                return unichr(int(x[2:], 16))
+            else:
+                return unichr(int(x[1:]))
+
+        elif self.escapeUnrecognizedEntities:
+            return u'&amp;%s;' % x
+        else:
+            return u'&%s;' % x
+
+    def __init__(self, parser, name, attrs=None, parent=None,
+                 previous=None):
+        "Basic constructor."
+
+        # We don't actually store the parser object: that lets extracted
+        # chunks be garbage-collected
+        self.parserClass = parser.__class__
+        self.isSelfClosing = parser.isSelfClosingTag(name)
+        self.name = name
+        if attrs is None:
+            attrs = []
+        self.attrs = attrs
+        self.contents = []
+        self.setup(parent, previous)
+        self.hidden = False
+        self.containsSubstitutions = False
+        self.convertHTMLEntities = parser.convertHTMLEntities
+        self.convertXMLEntities = parser.convertXMLEntities
+        self.escapeUnrecognizedEntities = parser.escapeUnrecognizedEntities
+
+        # Convert any HTML, XML, or numeric entities in the attribute values.
+        convert = lambda(k, val): (k,
+                                   re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);",
+                                          self._convertEntities,
+                                          val))
+        self.attrs = map(convert, self.attrs)
+
+    def getString(self):
+        if (len(self.contents) == 1
+            and isinstance(self.contents[0], NavigableString)):
+            return self.contents[0]
+
+    def setString(self, string):
+        """Replace the contents of the tag with a string"""
+        self.clear()
+        self.append(string)
+
+    string = property(getString, setString)
+
+    def getText(self, separator=u""):
+        if not len(self.contents):
+            return u""
+        stopNode = self._lastRecursiveChild().next
+        strings = []
+        current = self.contents[0]
+        while current is not stopNode:
+            if isinstance(current, NavigableString):
+                strings.append(current.strip())
+            current = current.next
+        return separator.join(strings)
+
+    text = property(getText)
+
+    def get(self, key, default=None):
+        """Returns the value of the 'key' attribute for the tag, or
+        the value given for 'default' if it doesn't have that
+        attribute."""
+        return self._getAttrMap().get(key, default)
+
+    def clear(self):
+        """Extract all children."""
+        for child in self.contents[:]:
+            child.extract()
+
+    def index(self, element):
+        for i, child in enumerate(self.contents):
+            if child is element:
+                return i
+        raise ValueError("Tag.index: element not in tag")
+
+    def has_key(self, key):
+        return self._getAttrMap().has_key(key)
+
+    def __getitem__(self, key):
+        """tag[key] returns the value of the 'key' attribute for the tag,
+        and throws an exception if it's not there."""
+        return self._getAttrMap()[key]
+
+    def __iter__(self):
+        "Iterating over a tag iterates over its contents."
+        return iter(self.contents)
+
+    def __len__(self):
+        "The length of a tag is the length of its list of contents."
+        return len(self.contents)
+
+    def __contains__(self, x):
+        return x in self.contents
+
+    def __nonzero__(self):
+        "A tag is non-None even if it has no contents."
+        return True
+
+    def __setitem__(self, key, value):
+        """Setting tag[key] sets the value of the 'key' attribute for the
+        tag."""
+        self._getAttrMap()
+        self.attrMap[key] = value
+        found = False
+        for i in range(0, len(self.attrs)):
+            if self.attrs[i][0] == key:
+                self.attrs[i] = (key, value)
+                found = True
+        if not found:
+            self.attrs.append((key, value))
+        self._getAttrMap()[key] = value
+
+    def __delitem__(self, key):
+        "Deleting tag[key] deletes all 'key' attributes for the tag."
+        for item in self.attrs:
+            if item[0] == key:
+                self.attrs.remove(item)
+                #We don't break because bad HTML can define the same
+                #attribute multiple times.
+            self._getAttrMap()
+            if self.attrMap.has_key(key):
+                del self.attrMap[key]
+
+    def __call__(self, *args, **kwargs):
+        """Calling a tag like a function is the same as calling its
+        findAll() method. Eg. tag('a') returns a list of all the A tags
+        found within this tag."""
+        return apply(self.findAll, args, kwargs)
+
+    def __getattr__(self, tag):
+        #print "Getattr %s.%s" % (self.__class__, tag)
+        if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3:
+            return self.find(tag[:-3])
+        elif tag.find('__') != 0:
+            return self.find(tag)
+        raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__, tag)
+
+    def __eq__(self, other):
+        """Returns true iff this tag has the same name, the same attributes,
+        and the same contents (recursively) as the given tag.
+
+        NOTE: right now this will return false if two tags have the
+        same attributes in a different order. Should this be fixed?"""
+        if other is self:
+            return True
+        if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other):
+            return False
+        for i in range(0, len(self.contents)):
+            if self.contents[i] != other.contents[i]:
+                return False
+        return True
+
+    def __ne__(self, other):
+        """Returns true iff this tag is not identical to the other tag,
+        as defined in __eq__."""
+        return not self == other
+
+    def __repr__(self, encoding=DEFAULT_OUTPUT_ENCODING):
+        """Renders this tag as a string."""
+        return self.__str__(encoding)
+
+    def __unicode__(self):
+        return self.__str__(None)
+
+    BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|"
+                                           + "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)"
+                                           + ")")
+
+    def _sub_entity(self, x):
+        """Used with a regular expression to substitute the
+        appropriate XML entity for an XML special character."""
+        return "&" + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + ";"
+
+    def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING,
+                prettyPrint=False, indentLevel=0):
+        """Returns a string or Unicode representation of this tag and
+        its contents. To get Unicode, pass None for encoding.
+
+        NOTE: since Python's HTML parser consumes whitespace, this
+        method is not certain to reproduce the whitespace present in
+        the original string."""
+
+        encodedName = self.toEncoding(self.name, encoding)
+
+        attrs = []
+        if self.attrs:
+            for key, val in self.attrs:
+                fmt = '%s="%s"'
+                if isinstance(val, basestring):
+                    if self.containsSubstitutions and '%SOUP-ENCODING%' in val:
+                        val = self.substituteEncoding(val, encoding)
+
+                    # The attribute value either:
+                    #
+                    # * Contains no embedded double quotes or single quotes.
+                    #   No problem: we enclose it in double quotes.
+                    # * Contains embedded single quotes. No problem:
+                    #   double quotes work here too.
+                    # * Contains embedded double quotes. No problem:
+                    #   we enclose it in single quotes.
+                    # * Embeds both single _and_ double quotes. This
+                    #   can't happen naturally, but it can happen if
+                    #   you modify an attribute value after parsing
+                    #   the document. Now we have a bit of a
+                    #   problem. We solve it by enclosing the
+                    #   attribute in single quotes, and escaping any
+                    #   embedded single quotes to XML entities.
+                    if '"' in val:
+                        fmt = "%s='%s'"
+                        if "'" in val:
+                            # TODO: replace with apos when
+                            # appropriate.
+                            val = val.replace("'", "&squot;")
+
+                    # Now we're okay w/r/t quotes. But the attribute
+                    # value might also contain angle brackets, or
+                    # ampersands that aren't part of entities. We need
+                    # to escape those to XML entities too.
+                    val = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity, val)
+
+                attrs.append(fmt % (self.toEncoding(key, encoding),
+                                    self.toEncoding(val, encoding)))
+        close = ''
+        closeTag = ''
+        if self.isSelfClosing:
+            close = ' /'
+        else:
+            closeTag = '</%s>' % encodedName
+
+        indentTag, indentContents = 0, 0
+        if prettyPrint:
+            indentTag = indentLevel
+            space = (' ' * (indentTag-1))
+            indentContents = indentTag + 1
+        contents = self.renderContents(encoding, prettyPrint, indentContents)
+        if self.hidden:
+            s = contents
+        else:
+            s = []
+            attributeString = ''
+            if attrs:
+                attributeString = ' ' + ' '.join(attrs)
+            if prettyPrint:
+                s.append(space)
+            s.append('<%s%s%s>' % (encodedName, attributeString, close))
+            if prettyPrint:
+                s.append("\n")
+            s.append(contents)
+            if prettyPrint and contents and contents[-1] != "\n":
+                s.append("\n")
+            if prettyPrint and closeTag:
+                s.append(space)
+            s.append(closeTag)
+            if prettyPrint and closeTag and self.nextSibling:
+                s.append("\n")
+            s = ''.join(s)
+        return s
+
+    def decompose(self):
+        """Recursively destroys the contents of this tree."""
+        self.extract()
+        if len(self.contents) == 0:
+            return
+        current = self.contents[0]
+        while current is not None:
+            next = current.next
+            if isinstance(current, Tag):
+                del current.contents[:]
+            current.parent = None
+            current.previous = None
+            current.previousSibling = None
+            current.next = None
+            current.nextSibling = None
+            current = next
+
+    def prettify(self, encoding=DEFAULT_OUTPUT_ENCODING):
+        return self.__str__(encoding, True)
+
+    def renderContents(self, encoding=DEFAULT_OUTPUT_ENCODING,
+                       prettyPrint=False, indentLevel=0):
+        """Renders the contents of this tag as a string in the given
+        encoding. If encoding is None, returns a Unicode string.."""
+        s=[]
+        for c in self:
+            text = None
+            if isinstance(c, NavigableString):
+                text = c.__str__(encoding)
+            elif isinstance(c, Tag):
+                s.append(c.__str__(encoding, prettyPrint, indentLevel))
+            if text and prettyPrint:
+                text = text.strip()
+            if text:
+                if prettyPrint:
+                    s.append(" " * (indentLevel-1))
+                s.append(text)
+                if prettyPrint:
+                    s.append("\n")
+        return ''.join(s)
+
+    #Soup methods
+
+    def find(self, name=None, attrs={}, recursive=True, text=None,
+             **kwargs):
+        """Return only the first child of this Tag matching the given
+        criteria."""
+        r = None
+        l = self.findAll(name, attrs, recursive, text, 1, **kwargs)
+        if l:
+            r = l[0]
+        return r
+    findChild = find
+
+    def findAll(self, name=None, attrs={}, recursive=True, text=None,
+                limit=None, **kwargs):
+        """Extracts a list of Tag objects that match the given
+        criteria.  You can specify the name of the Tag and any
+        attributes you want the Tag to have.
+
+        The value of a key-value pair in the 'attrs' map can be a
+        string, a list of strings, a regular expression object, or a
+        callable that takes a string and returns whether or not the
+        string matches for some custom definition of 'matches'. The
+        same is true of the tag name."""
+        generator = self.recursiveChildGenerator
+        if not recursive:
+            generator = self.childGenerator
+        return self._findAll(name, attrs, text, limit, generator, **kwargs)
+    findChildren = findAll
+
+    # Pre-3.x compatibility methods
+    first = find
+    fetch = findAll
+
+    def fetchText(self, text=None, recursive=True, limit=None):
+        return self.findAll(text=text, recursive=recursive, limit=limit)
+
+    def firstText(self, text=None, recursive=True):
+        return self.find(text=text, recursive=recursive)
+
+    #Private methods
+
+    def _getAttrMap(self):
+        """Initializes a map representation of this tag's attributes,
+        if not already initialized."""
+        if not getattr(self, 'attrMap'):
+            self.attrMap = {}
+            for (key, value) in self.attrs:
+                self.attrMap[key] = value
+        return self.attrMap
+
+    #Generator methods
+    def childGenerator(self):
+        # Just use the iterator from the contents
+        return iter(self.contents)
+
+    def recursiveChildGenerator(self):
+        if not len(self.contents):
+            raise StopIteration
+        stopNode = self._lastRecursiveChild().next
+        current = self.contents[0]
+        while current is not stopNode:
+            yield current
+            current = current.next
+
+
+# Next, a couple classes to represent queries and their results.
+class SoupStrainer:
+    """Encapsulates a number of ways of matching a markup element (tag or
+    text)."""
+
+    def __init__(self, name=None, attrs={}, text=None, **kwargs):
+        self.name = name
+        if isinstance(attrs, basestring):
+            kwargs['class'] = _match_css_class(attrs)
+            attrs = None
+        if kwargs:
+            if attrs:
+                attrs = attrs.copy()
+                attrs.update(kwargs)
+            else:
+                attrs = kwargs
+        self.attrs = attrs
+        self.text = text
+
+    def __str__(self):
+        if self.text:
+            return self.text
+        else:
+            return "%s|%s" % (self.name, self.attrs)
+
+    def searchTag(self, markupName=None, markupAttrs={}):
+        found = None
+        markup = None
+        if isinstance(markupName, Tag):
+            markup = markupName
+            markupAttrs = markup
+        callFunctionWithTagData = callable(self.name) \
+                                and not isinstance(markupName, Tag)
+
+        if (not self.name) \
+               or callFunctionWithTagData \
+               or (markup and self._matches(markup, self.name)) \
+               or (not markup and self._matches(markupName, self.name)):
+            if callFunctionWithTagData:
+                match = self.name(markupName, markupAttrs)
+            else:
+                match = True
+                markupAttrMap = None
+                for attr, matchAgainst in self.attrs.items():
+                    if not markupAttrMap:
+                         if hasattr(markupAttrs, 'get'):
+                            markupAttrMap = markupAttrs
+                         else:
+                            markupAttrMap = {}
+                            for k,v in markupAttrs:
+                                markupAttrMap[k] = v
+                    attrValue = markupAttrMap.get(attr)
+                    if not self._matches(attrValue, matchAgainst):
+                        match = False
+                        break
+            if match:
+                if markup:
+                    found = markup
+                else:
+                    found = markupName
+        return found
+
+    def search(self, markup):
+        #print 'looking for %s in %s' % (self, markup)
+        found = None
+        # If given a list of items, scan it for a text element that
+        # matches.
+        if hasattr(markup, "__iter__") \
+                and not isinstance(markup, Tag):
+            for element in markup:
+                if isinstance(element, NavigableString) \
+                       and self.search(element):
+                    found = element
+                    break
+        # If it's a Tag, make sure its name or attributes match.
+        # Don't bother with Tags if we're searching for text.
+        elif isinstance(markup, Tag):
+            if not self.text:
+                found = self.searchTag(markup)
+        # If it's text, make sure the text matches.
+        elif isinstance(markup, NavigableString) or \
+                 isinstance(markup, basestring):
+            if self._matches(markup, self.text):
+                found = markup
+        else:
+            raise Exception, "I don't know how to match against a %s" \
+                  % markup.__class__
+        return found
+
+    def _matches(self, markup, matchAgainst):
+        #print "Matching %s against %s" % (markup, matchAgainst)
+        result = False
+        if matchAgainst is True:
+            result = markup is not None
+        elif callable(matchAgainst):
+            result = matchAgainst(markup)
+        else:
+            #Custom match methods take the tag as an argument, but all
+            #other ways of matching match the tag name as a string.
+            if isinstance(markup, Tag):
+                markup = markup.name
+            if markup and not isinstance(markup, basestring):
+                markup = unicode(markup)
+            #Now we know that chunk is either a string, or None.
+            if hasattr(matchAgainst, 'match'):
+                # It's a regexp object.
+                result = markup and matchAgainst.search(markup)
+            elif hasattr(matchAgainst, '__iter__'): # list-like
+                result = markup in matchAgainst
+            elif hasattr(matchAgainst, 'items'):
+                result = markup.has_key(matchAgainst)
+            elif matchAgainst and isinstance(markup, basestring):
+                if isinstance(markup, unicode):
+                    matchAgainst = unicode(matchAgainst)
+                else:
+                    matchAgainst = str(matchAgainst)
+
+            if not result:
+                result = matchAgainst == markup
+        return result
+
+class ResultSet(list):
+    """A ResultSet is just a list that keeps track of the SoupStrainer
+    that created it."""
+    def __init__(self, source):
+        list.__init__([])
+        self.source = source
+
+# Now, some helper functions.
+
+def buildTagMap(default, *args):
+    """Turns a list of maps, lists, or scalars into a single map.
+    Used to build the SELF_CLOSING_TAGS, NESTABLE_TAGS, and
+    NESTING_RESET_TAGS maps out of lists and partial maps."""
+    built = {}
+    for portion in args:
+        if hasattr(portion, 'items'):
+            #It's a map. Merge it.
+            for k,v in portion.items():
+                built[k] = v
+        elif hasattr(portion, '__iter__'): # is a list
+            #It's a list. Map each item to the default.
+            for k in portion:
+                built[k] = default
+        else:
+            #It's a scalar. Map it to the default.
+            built[portion] = default
+    return built
+
+# Now, the parser classes.
+
+class BeautifulStoneSoup(Tag, SGMLParser):
+
+    """This class contains the basic parser and search code. It defines
+    a parser that knows nothing about tag behavior except for the
+    following:
+
+      You can't close a tag without closing all the tags it encloses.
+      That is, "<foo><bar></foo>" actually means
+      "<foo><bar></bar></foo>".
+
+    [Another possible explanation is "<foo><bar /></foo>", but since
+    this class defines no SELF_CLOSING_TAGS, it will never use that
+    explanation.]
+
+    This class is useful for parsing XML or made-up markup languages,
+    or when BeautifulSoup makes an assumption counter to what you were
+    expecting."""
+
+    SELF_CLOSING_TAGS = {}
+    NESTABLE_TAGS = {}
+    RESET_NESTING_TAGS = {}
+    QUOTE_TAGS = {}
+    PRESERVE_WHITESPACE_TAGS = []
+
+    MARKUP_MASSAGE = [(re.compile('(<[^<>]*)/>'),
+                       lambda x: x.group(1) + ' />'),
+                      (re.compile('<!\s+([^<>]*)>'),
+                       lambda x: '<!' + x.group(1) + '>')
+                      ]
+
+    ROOT_TAG_NAME = u'[document]'
+
+    HTML_ENTITIES = "html"
+    XML_ENTITIES = "xml"
+    XHTML_ENTITIES = "xhtml"
+    # TODO: This only exists for backwards-compatibility
+    ALL_ENTITIES = XHTML_ENTITIES
+
+    # Used when determining whether a text node is all whitespace and
+    # can be replaced with a single space. A text node that contains
+    # fancy Unicode spaces (usually non-breaking) should be left
+    # alone.
+    STRIP_ASCII_SPACES = { 9: None, 10: None, 12: None, 13: None, 32: None, }
+
+    def __init__(self, markup="", parseOnlyThese=None, fromEncoding=None,
+                 markupMassage=True, smartQuotesTo=XML_ENTITIES,
+                 convertEntities=None, selfClosingTags=None, isHTML=False):
+        """The Soup object is initialized as the 'root tag', and the
+        provided markup (which can be a string or a file-like object)
+        is fed into the underlying parser.
+
+        sgmllib will process most bad HTML, and the BeautifulSoup
+        class has some tricks for dealing with some HTML that kills
+        sgmllib, but Beautiful Soup can nonetheless choke or lose data
+        if your data uses self-closing tags or declarations
+        incorrectly.
+
+        By default, Beautiful Soup uses regexes to sanitize input,
+        avoiding the vast majority of these problems. If the problems
+        don't apply to you, pass in False for markupMassage, and
+        you'll get better performance.
+
+        The default parser massage techniques fix the two most common
+        instances of invalid HTML that choke sgmllib:
+
+         <br/> (No space between name of closing tag and tag close)
+         <! --Comment--> (Extraneous whitespace in declaration)
+
+        You can pass in a custom list of (RE object, replace method)
+        tuples to get Beautiful Soup to scrub your input the way you
+        want."""
+
+        self.parseOnlyThese = parseOnlyThese
+        self.fromEncoding = fromEncoding
+        self.smartQuotesTo = smartQuotesTo
+        self.convertEntities = convertEntities
+        # Set the rules for how we'll deal with the entities we
+        # encounter
+        if self.convertEntities:
+            # It doesn't make sense to convert encoded characters to
+            # entities even while you're converting entities to Unicode.
+            # Just convert it all to Unicode.
+            self.smartQuotesTo = None
+            if convertEntities == self.HTML_ENTITIES:
+                self.convertXMLEntities = False
+                self.convertHTMLEntities = True
+                self.escapeUnrecognizedEntities = True
+            elif convertEntities == self.XHTML_ENTITIES:
+                self.convertXMLEntities = True
+                self.convertHTMLEntities = True
+                self.escapeUnrecognizedEntities = False
+            elif convertEntities == self.XML_ENTITIES:
+                self.convertXMLEntities = True
+                self.convertHTMLEntities = False
+                self.escapeUnrecognizedEntities = False
+        else:
+            self.convertXMLEntities = False
+            self.convertHTMLEntities = False
+            self.escapeUnrecognizedEntities = False
+
+        self.instanceSelfClosingTags = buildTagMap(None, selfClosingTags)
+        SGMLParser.__init__(self)
+
+        if hasattr(markup, 'read'):        # It's a file-type object.
+            markup = markup.read()
+        self.markup = markup
+        self.markupMassage = markupMassage
+        try:
+            self._feed(isHTML=isHTML)
+        except StopParsing:
+            pass
+        self.markup = None                 # The markup can now be GCed
+
+    def convert_charref(self, name):
+        """This method fixes a bug in Python's SGMLParser."""
+        try:
+            n = int(name)
+        except ValueError:
+            return
+        if not 0 <= n <= 127 : # ASCII ends at 127, not 255
+            return
+        return self.convert_codepoint(n)
+
+    def _feed(self, inDocumentEncoding=None, isHTML=False):
+        # Convert the document to Unicode.
+        markup = self.markup
+        if isinstance(markup, unicode):
+            if not hasattr(self, 'originalEncoding'):
+                self.originalEncoding = None
+        else:
+            dammit = UnicodeDammit\
+                     (markup, [self.fromEncoding, inDocumentEncoding],
+                      smartQuotesTo=self.smartQuotesTo, isHTML=isHTML)
+            markup = dammit.unicode
+            self.originalEncoding = dammit.originalEncoding
+            self.declaredHTMLEncoding = dammit.declaredHTMLEncoding
+        if markup:
+            if self.markupMassage:
+                if not hasattr(self.markupMassage, "__iter__"):
+                    self.markupMassage = self.MARKUP_MASSAGE
+                for fix, m in self.markupMassage:
+                    markup = fix.sub(m, markup)
+                # TODO: We get rid of markupMassage so that the
+                # soup object can be deepcopied later on. Some
+                # Python installations can't copy regexes. If anyone
+                # was relying on the existence of markupMassage, this
+                # might cause problems.
+                del(self.markupMassage)
+        self.reset()
+
+        SGMLParser.feed(self, markup)
+        # Close out any unfinished strings and close all the open tags.
+        self.endData()
+        while self.currentTag.name != self.ROOT_TAG_NAME:
+            self.popTag()
+
+    def __getattr__(self, methodName):
+        """This method routes method call requests to either the SGMLParser
+        superclass or the Tag superclass, depending on the method name."""
+        #print "__getattr__ called on %s.%s" % (self.__class__, methodName)
+
+        if methodName.startswith('start_') or methodName.startswith('end_') \
+               or methodName.startswith('do_'):
+            return SGMLParser.__getattr__(self, methodName)
+        elif not methodName.startswith('__'):
+            return Tag.__getattr__(self, methodName)
+        else:
+            raise AttributeError
+
+    def isSelfClosingTag(self, name):
+        """Returns true iff the given string is the name of a
+        self-closing tag according to this parser."""
+        return self.SELF_CLOSING_TAGS.has_key(name) \
+               or self.instanceSelfClosingTags.has_key(name)
+
+    def reset(self):
+        Tag.__init__(self, self, self.ROOT_TAG_NAME)
+        self.hidden = 1
+        SGMLParser.reset(self)
+        self.currentData = []
+        self.currentTag = None
+        self.tagStack = []
+        self.quoteStack = []
+        self.pushTag(self)
+
+    def popTag(self):
+        tag = self.tagStack.pop()
+
+        #print "Pop", tag.name
+        if self.tagStack:
+            self.currentTag = self.tagStack[-1]
+        return self.currentTag
+
+    def pushTag(self, tag):
+        #print "Push", tag.name
+        if self.currentTag:
+            self.currentTag.contents.append(tag)
+        self.tagStack.append(tag)
+        self.currentTag = self.tagStack[-1]
+
+    def endData(self, containerClass=NavigableString):
+        if self.currentData:
+            currentData = u''.join(self.currentData)
+            if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and
+                not set([tag.name for tag in self.tagStack]).intersection(
+                    self.PRESERVE_WHITESPACE_TAGS)):
+                if '\n' in currentData:
+                    currentData = '\n'
+                else:
+                    currentData = ' '
+            self.currentData = []
+            if self.parseOnlyThese and len(self.tagStack) <= 1 and \
+                   (not self.parseOnlyThese.text or \
+                    not self.parseOnlyThese.search(currentData)):
+                return
+            o = containerClass(currentData)
+            o.setup(self.currentTag, self.previous)
+            if self.previous:
+                self.previous.next = o
+            self.previous = o
+            self.currentTag.contents.append(o)
+
+
+    def _popToTag(self, name, inclusivePop=True):
+        """Pops the tag stack up to and including the most recent
+        instance of the given tag. If inclusivePop is false, pops the tag
+        stack up to but *not* including the most recent instqance of
+        the given tag."""
+        #print "Popping to %s" % name
+        if name == self.ROOT_TAG_NAME:
+            return
+
+        numPops = 0
+        mostRecentTag = None
+        for i in range(len(self.tagStack)-1, 0, -1):
+            if name == self.tagStack[i].name:
+                numPops = len(self.tagStack)-i
+                break
+        if not inclusivePop:
+            numPops = numPops - 1
+
+        for i in range(0, numPops):
+            mostRecentTag = self.popTag()
+        return mostRecentTag
+
+    def _smartPop(self, name):
+
+        """We need to pop up to the previous tag of this type, unless
+        one of this tag's nesting reset triggers comes between this
+        tag and the previous tag of this type, OR unless this tag is a
+        generic nesting trigger and another generic nesting trigger
+        comes between this tag and the previous tag of this type.
+
+        Examples:
+         <p>Foo<b>Bar *<p>* should pop to 'p', not 'b'.
+         <p>Foo<table>Bar *<p>* should pop to 'table', not 'p'.
+         <p>Foo<table><tr>Bar *<p>* should pop to 'tr', not 'p'.
+
+         <li><ul><li> *<li>* should pop to 'ul', not the first 'li'.
+         <tr><table><tr> *<tr>* should pop to 'table', not the first 'tr'
+         <td><tr><td> *<td>* should pop to 'tr', not the first 'td'
+        """
+
+        nestingResetTriggers = self.NESTABLE_TAGS.get(name)
+        isNestable = nestingResetTriggers != None
+        isResetNesting = self.RESET_NESTING_TAGS.has_key(name)
+        popTo = None
+        inclusive = True
+        for i in range(len(self.tagStack)-1, 0, -1):
+            p = self.tagStack[i]
+            if (not p or p.name == name) and not isNestable:
+                #Non-nestable tags get popped to the top or to their
+                #last occurance.
+                popTo = name
+                break
+            if (nestingResetTriggers is not None
+                and p.name in nestingResetTriggers) \
+                or (nestingResetTriggers is None and isResetNesting
+                    and self.RESET_NESTING_TAGS.has_key(p.name)):
+
+                #If we encounter one of the nesting reset triggers
+                #peculiar to this tag, or we encounter another tag
+                #that causes nesting to reset, pop up to but not
+                #including that tag.
+                popTo = p.name
+                inclusive = False
+                break
+            p = p.parent
+        if popTo:
+            self._popToTag(popTo, inclusive)
+
+    def unknown_starttag(self, name, attrs, selfClosing=0):
+        #print "Start tag %s: %s" % (name, attrs)
+        if self.quoteStack:
+            #This is not a real tag.
+            #print "<%s> is not real!" % name
+            attrs = ''.join([' %s="%s"' % (x, y) for x, y in attrs])
+            self.handle_data('<%s%s>' % (name, attrs))
+            return
+        self.endData()
+
+        if not self.isSelfClosingTag(name) and not selfClosing:
+            self._smartPop(name)
+
+        if self.parseOnlyThese and len(self.tagStack) <= 1 \
+               and (self.parseOnlyThese.text or not self.parseOnlyThese.searchTag(name, attrs)):
+            return
+
+        tag = Tag(self, name, attrs, self.currentTag, self.previous)
+        if self.previous:
+            self.previous.next = tag
+        self.previous = tag
+        self.pushTag(tag)
+        if selfClosing or self.isSelfClosingTag(name):
+            self.popTag()
+        if name in self.QUOTE_TAGS:
+            #print "Beginning quote (%s)" % name
+            self.quoteStack.append(name)
+            self.literal = 1
+        return tag
+
+    def unknown_endtag(self, name):
+        #print "End tag %s" % name
+        if self.quoteStack and self.quoteStack[-1] != name:
+            #This is not a real end tag.
+            #print "</%s> is not real!" % name
+            self.handle_data('</%s>' % name)
+            return
+        self.endData()
+        self._popToTag(name)
+        if self.quoteStack and self.quoteStack[-1] == name:
+            self.quoteStack.pop()
+            self.literal = (len(self.quoteStack) > 0)
+
+    def handle_data(self, data):
+        self.currentData.append(data)
+
+    def _toStringSubclass(self, text, subclass):
+        """Adds a certain piece of text to the tree as a NavigableString
+        subclass."""
+        self.endData()
+        self.handle_data(text)
+        self.endData(subclass)
+
+    def handle_pi(self, text):
+        """Handle a processing instruction as a ProcessingInstruction
+        object, possibly one with a %SOUP-ENCODING% slot into which an
+        encoding will be plugged later."""
+        if text[:3] == "xml":
+            text = u"xml version='1.0' encoding='%SOUP-ENCODING%'"
+        self._toStringSubclass(text, ProcessingInstruction)
+
+    def handle_comment(self, text):
+        "Handle comments as Comment objects."
+        self._toStringSubclass(text, Comment)
+
+    def handle_charref(self, ref):
+        "Handle character references as data."
+        if self.convertEntities:
+            data = unichr(int(ref))
+        else:
+            data = '&#%s;' % ref
+        self.handle_data(data)
+
+    def handle_entityref(self, ref):
+        """Handle entity references as data, possibly converting known
+        HTML and/or XML entity references to the corresponding Unicode
+        characters."""
+        data = None
+        if self.convertHTMLEntities:
+            try:
+                data = unichr(name2codepoint[ref])
+            except KeyError:
+                pass
+
+        if not data and self.convertXMLEntities:
+                data = self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref)
+
+        if not data and self.convertHTMLEntities and \
+            not self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref):
+                # TODO: We've got a problem here. We're told this is
+                # an entity reference, but it's not an XML entity
+                # reference or an HTML entity reference. Nonetheless,
+                # the logical thing to do is to pass it through as an
+                # unrecognized entity reference.
+                #
+                # Except: when the input is "&carol;" this function
+                # will be called with input "carol". When the input is
+                # "AT&T", this function will be called with input
+                # "T". We have no way of knowing whether a semicolon
+                # was present originally, so we don't know whether
+                # this is an unknown entity or just a misplaced
+                # ampersand.
+                #
+                # The more common case is a misplaced ampersand, so I
+                # escape the ampersand and omit the trailing semicolon.
+                data = "&amp;%s" % ref
+        if not data:
+            # This case is different from the one above, because we
+            # haven't already gone through a supposedly comprehensive
+            # mapping of entities to Unicode characters. We might not
+            # have gone through any mapping at all. So the chances are
+            # very high that this is a real entity, and not a
+            # misplaced ampersand.
+            data = "&%s;" % ref
+        self.handle_data(data)
+
+    def handle_decl(self, data):
+        "Handle DOCTYPEs and the like as Declaration objects."
+        self._toStringSubclass(data, Declaration)
+
+    def parse_declaration(self, i):
+        """Treat a bogus SGML declaration as raw data. Treat a CDATA
+        declaration as a CData object."""
+        j = None
+        if self.rawdata[i:i+9] == '<![CDATA[':
+             k = self.rawdata.find(']]>', i)
+             if k == -1:
+                 k = len(self.rawdata)
+             data = self.rawdata[i+9:k]
+             j = k+3
+             self._toStringSubclass(data, CData)
+        else:
+            try:
+                j = SGMLParser.parse_declaration(self, i)
+            except SGMLParseError:
+                toHandle = self.rawdata[i:]
+                self.handle_data(toHandle)
+                j = i + len(toHandle)
+        return j
+
+class BeautifulSoup(BeautifulStoneSoup):
+
+    """This parser knows the following facts about HTML:
+
+    * Some tags have no closing tag and should be interpreted as being
+      closed as soon as they are encountered.
+
+    * The text inside some tags (ie. 'script') may contain tags which
+      are not really part of the document and which should be parsed
+      as text, not tags. If you want to parse the text as tags, you can
+      always fetch it and parse it explicitly.
+
+    * Tag nesting rules:
+
+      Most tags can't be nested at all. For instance, the occurance of
+      a <p> tag should implicitly close the previous <p> tag.
+
+       <p>Para1<p>Para2
+        should be transformed into:
+       <p>Para1</p><p>Para2
+
+      Some tags can be nested arbitrarily. For instance, the occurance
+      of a <blockquote> tag should _not_ implicitly close the previous
+      <blockquote> tag.
+
+       Alice said: <blockquote>Bob said: <blockquote>Blah
+        should NOT be transformed into:
+       Alice said: <blockquote>Bob said: </blockquote><blockquote>Blah
+
+      Some tags can be nested, but the nesting is reset by the
+      interposition of other tags. For instance, a <tr> tag should
+      implicitly close the previous <tr> tag within the same <table>,
+      but not close a <tr> tag in another table.
+
+       <table><tr>Blah<tr>Blah
+        should be transformed into:
+       <table><tr>Blah</tr><tr>Blah
+        but,
+       <tr>Blah<table><tr>Blah
+        should NOT be transformed into
+       <tr>Blah<table></tr><tr>Blah
+
+    Differing assumptions about tag nesting rules are a major source
+    of problems with the BeautifulSoup class. If BeautifulSoup is not
+    treating as nestable a tag your page author treats as nestable,
+    try ICantBelieveItsBeautifulSoup, MinimalSoup, or
+    BeautifulStoneSoup before writing your own subclass."""
+
+    def __init__(self, *args, **kwargs):
+        if not kwargs.has_key('smartQuotesTo'):
+            kwargs['smartQuotesTo'] = self.HTML_ENTITIES
+        kwargs['isHTML'] = True
+        BeautifulStoneSoup.__init__(self, *args, **kwargs)
+
+    SELF_CLOSING_TAGS = buildTagMap(None,
+                                    ('br' , 'hr', 'input', 'img', 'meta',
+                                    'spacer', 'link', 'frame', 'base', 'col'))
+
+    PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea'])
+
+    QUOTE_TAGS = {'script' : None, 'textarea' : None}
+
+    #According to the HTML standard, each of these inline tags can
+    #contain another tag of the same type. Furthermore, it's common
+    #to actually use these tags this way.
+    NESTABLE_INLINE_TAGS = ('span', 'font', 'q', 'object', 'bdo', 'sub', 'sup',
+                            'center')
+
+    #According to the HTML standard, these block tags can contain
+    #another tag of the same type. Furthermore, it's common
+    #to actually use these tags this way.
+    NESTABLE_BLOCK_TAGS = ('blockquote', 'div', 'fieldset', 'ins', 'del')
+
+    #Lists can contain other lists, but there are restrictions.
+    NESTABLE_LIST_TAGS = { 'ol' : [],
+                           'ul' : [],
+                           'li' : ['ul', 'ol'],
+                           'dl' : [],
+                           'dd' : ['dl'],
+                           'dt' : ['dl'] }
+
+    #Tables can contain other tables, but there are restrictions.
+    NESTABLE_TABLE_TAGS = {'table' : [],
+                           'tr' : ['table', 'tbody', 'tfoot', 'thead'],
+                           'td' : ['tr'],
+                           'th' : ['tr'],
+                           'thead' : ['table'],
+                           'tbody' : ['table'],
+                           'tfoot' : ['table'],
+                           }
+
+    NON_NESTABLE_BLOCK_TAGS = ('address', 'form', 'p', 'pre')
+
+    #If one of these tags is encountered, all tags up to the next tag of
+    #this type are popped.
+    RESET_NESTING_TAGS = buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript',
+                                     NON_NESTABLE_BLOCK_TAGS,
+                                     NESTABLE_LIST_TAGS,
+                                     NESTABLE_TABLE_TAGS)
+
+    NESTABLE_TAGS = buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS,
+                                NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS)
+
+    # Used to detect the charset in a META tag; see start_meta
+    CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M)
+
+    def start_meta(self, attrs):
+        """Beautiful Soup can detect a charset included in a META tag,
+        try to convert the document to that charset, and re-parse the
+        document from the beginning."""
+        httpEquiv = None
+        contentType = None
+        contentTypeIndex = None
+        tagNeedsEncodingSubstitution = False
+
+        for i in range(0, len(attrs)):
+            key, value = attrs[i]
+            key = key.lower()
+            if key == 'http-equiv':
+                httpEquiv = value
+            elif key == 'content':
+                contentType = value
+                contentTypeIndex = i
+
+        if httpEquiv and contentType: # It's an interesting meta tag.
+            match = self.CHARSET_RE.search(contentType)
+            if match:
+                if (self.declaredHTMLEncoding is not None or
+                    self.originalEncoding == self.fromEncoding):
+                    # An HTML encoding was sniffed while converting
+                    # the document to Unicode, or an HTML encoding was
+                    # sniffed during a previous pass through the
+                    # document, or an encoding was specified
+                    # explicitly and it worked. Rewrite the meta tag.
+                    def rewrite(match):
+                        return match.group(1) + "%SOUP-ENCODING%"
+                    newAttr = self.CHARSET_RE.sub(rewrite, contentType)
+                    attrs[contentTypeIndex] = (attrs[contentTypeIndex][0],
+                                               newAttr)
+                    tagNeedsEncodingSubstitution = True
+                else:
+                    # This is our first pass through the document.
+                    # Go through it again with the encoding information.
+                    newCharset = match.group(3)
+                    if newCharset and newCharset != self.originalEncoding:
+                        self.declaredHTMLEncoding = newCharset
+                        self._feed(self.declaredHTMLEncoding)
+                        raise StopParsing
+                    pass
+        tag = self.unknown_starttag("meta", attrs)
+        if tag and tagNeedsEncodingSubstitution:
+            tag.containsSubstitutions = True
+
+class StopParsing(Exception):
+    pass
+
+class ICantBelieveItsBeautifulSoup(BeautifulSoup):
+
+    """The BeautifulSoup class is oriented towards skipping over
+    common HTML errors like unclosed tags. However, sometimes it makes
+    errors of its own. For instance, consider this fragment:
+
+     <b>Foo<b>Bar</b></b>
+
+    This is perfectly valid (if bizarre) HTML. However, the
+    BeautifulSoup class will implicitly close the first b tag when it
+    encounters the second 'b'. It will think the author wrote
+    "<b>Foo<b>Bar", and didn't close the first 'b' tag, because
+    there's no real-world reason to bold something that's already
+    bold. When it encounters '</b></b>' it will close two more 'b'
+    tags, for a grand total of three tags closed instead of two. This
+    can throw off the rest of your document structure. The same is
+    true of a number of other tags, listed below.
+
+    It's much more common for someone to forget to close a 'b' tag
+    than to actually use nested 'b' tags, and the BeautifulSoup class
+    handles the common case. This class handles the not-co-common
+    case: where you can't believe someone wrote what they did, but
+    it's valid HTML and BeautifulSoup screwed up by assuming it
+    wouldn't be."""
+
+    I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \
+     ('em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong',
+      'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b',
+      'big')
+
+    I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ('noscript')
+
+    NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS,
+                                I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS,
+                                I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS)
+
+class MinimalSoup(BeautifulSoup):
+    """The MinimalSoup class is for parsing HTML that contains
+    pathologically bad markup. It makes no assumptions about tag
+    nesting, but it does know which tags are self-closing, that
+    <script> tags contain Javascript and should not be parsed, that
+    META tags may contain encoding information, and so on.
+
+    This also makes it better for subclassing than BeautifulStoneSoup
+    or BeautifulSoup."""
+
+    RESET_NESTING_TAGS = buildTagMap('noscript')
+    NESTABLE_TAGS = {}
+
+class BeautifulSOAP(BeautifulStoneSoup):
+    """This class will push a tag with only a single string child into
+    the tag's parent as an attribute. The attribute's name is the tag
+    name, and the value is the string child. An example should give
+    the flavor of the change:
+
+    <foo><bar>baz</bar></foo>
+     =>
+    <foo bar="baz"><bar>baz</bar></foo>
+
+    You can then access fooTag['bar'] instead of fooTag.barTag.string.
+
+    This is, of course, useful for scraping structures that tend to
+    use subelements instead of attributes, such as SOAP messages. Note
+    that it modifies its input, so don't print the modified version
+    out.
+
+    I'm not sure how many people really want to use this class; let me
+    know if you do. Mainly I like the name."""
+
+    def popTag(self):
+        if len(self.tagStack) > 1:
+            tag = self.tagStack[-1]
+            parent = self.tagStack[-2]
+            parent._getAttrMap()
+            if (isinstance(tag, Tag) and len(tag.contents) == 1 and
+                isinstance(tag.contents[0], NavigableString) and
+                not parent.attrMap.has_key(tag.name)):
+                parent[tag.name] = tag.contents[0]
+        BeautifulStoneSoup.popTag(self)
+
+#Enterprise class names! It has come to our attention that some people
+#think the names of the Beautiful Soup parser classes are too silly
+#and "unprofessional" for use in enterprise screen-scraping. We feel
+#your pain! For such-minded folk, the Beautiful Soup Consortium And
+#All-Night Kosher Bakery recommends renaming this file to
+#"RobustParser.py" (or, in cases of extreme enterprisiness,
+#"RobustParserBeanInterface.class") and using the following
+#enterprise-friendly class aliases:
+class RobustXMLParser(BeautifulStoneSoup):
+    pass
+class RobustHTMLParser(BeautifulSoup):
+    pass
+class RobustWackAssHTMLParser(ICantBelieveItsBeautifulSoup):
+    pass
+class RobustInsanelyWackAssHTMLParser(MinimalSoup):
+    pass
+class SimplifyingSOAPParser(BeautifulSOAP):
+    pass
+
+######################################################
+#
+# Bonus library: Unicode, Dammit
+#
+# This class forces XML data into a standard format (usually to UTF-8
+# or Unicode).  It is heavily based on code from Mark Pilgrim's
+# Universal Feed Parser. It does not rewrite the XML or HTML to
+# reflect a new encoding: that happens in BeautifulStoneSoup.handle_pi
+# (XML) and BeautifulSoup.start_meta (HTML).
+
+# Autodetects character encodings.
+# Download from http://chardet.feedparser.org/
+try:
+    import chardet
+#    import chardet.constants
+#    chardet.constants._debug = 1
+except ImportError:
+    chardet = None
+
+# cjkcodecs and iconv_codec make Python know about more character encodings.
+# Both are available from http://cjkpython.i18n.org/
+# They're built in if you use Python 2.4.
+try:
+    import cjkcodecs.aliases
+except ImportError:
+    pass
+try:
+    import iconv_codec
+except ImportError:
+    pass
+
+class UnicodeDammit:
+    """A class for detecting the encoding of a *ML document and
+    converting it to a Unicode string. If the source encoding is
+    windows-1252, can replace MS smart quotes with their HTML or XML
+    equivalents."""
+
+    # This dictionary maps commonly seen values for "charset" in HTML
+    # meta tags to the corresponding Python codec names. It only covers
+    # values that aren't in Python's aliases and can't be determined
+    # by the heuristics in find_codec.
+    CHARSET_ALIASES = { "macintosh" : "mac-roman",
+                        "x-sjis" : "shift-jis" }
+
+    def __init__(self, markup, overrideEncodings=[],
+                 smartQuotesTo='xml', isHTML=False):
+        self.declaredHTMLEncoding = None
+        self.markup, documentEncoding, sniffedEncoding = \
+                     self._detectEncoding(markup, isHTML)
+        self.smartQuotesTo = smartQuotesTo
+        self.triedEncodings = []
+        if markup == '' or isinstance(markup, unicode):
+            self.originalEncoding = None
+            self.unicode = unicode(markup)
+            return
+
+        u = None
+        for proposedEncoding in overrideEncodings:
+            u = self._convertFrom(proposedEncoding)
+            if u: break
+        if not u:
+            for proposedEncoding in (documentEncoding, sniffedEncoding):
+                u = self._convertFrom(proposedEncoding)
+                if u: break
+
+        # If no luck and we have auto-detection library, try that:
+        if not u and chardet and not isinstance(self.markup, unicode):
+            u = self._convertFrom(chardet.detect(self.markup)['encoding'])
+
+        # As a last resort, try utf-8 and windows-1252:
+        if not u:
+            for proposed_encoding in ("utf-8", "windows-1252"):
+                u = self._convertFrom(proposed_encoding)
+                if u: break
+
+        self.unicode = u
+        if not u: self.originalEncoding = None
+
+    def _subMSChar(self, orig):
+        """Changes a MS smart quote character to an XML or HTML
+        entity."""
+        sub = self.MS_CHARS.get(orig)
+        if isinstance(sub, tuple):
+            if self.smartQuotesTo == 'xml':
+                sub = '&#x%s;' % sub[1]
+            else:
+                sub = '&%s;' % sub[0]
+        return sub
+
+    def _convertFrom(self, proposed):
+        proposed = self.find_codec(proposed)
+        if not proposed or proposed in self.triedEncodings:
+            return None
+        self.triedEncodings.append(proposed)
+        markup = self.markup
+
+        # Convert smart quotes to HTML if coming from an encoding
+        # that might have them.
+        if self.smartQuotesTo and proposed.lower() in("windows-1252",
+                                                      "iso-8859-1",
+                                                      "iso-8859-2"):
+            markup = re.compile("([\x80-\x9f])").sub \
+                     (lambda(x): self._subMSChar(x.group(1)),
+                      markup)
+
+        try:
+            # print "Trying to convert document to %s" % proposed
+            u = self._toUnicode(markup, proposed)
+            self.markup = u
+            self.originalEncoding = proposed
+        except Exception, e:
+            # print "That didn't work!"
+            # print e
+            return None
+        #print "Correct encoding: %s" % proposed
+        return self.markup
+
+    def _toUnicode(self, data, encoding):
+        '''Given a string and its encoding, decodes the string into Unicode.
+        %encoding is a string recognized by encodings.aliases'''
+
+        # strip Byte Order Mark (if present)
+        if (len(data) >= 4) and (data[:2] == '\xfe\xff') \
+               and (data[2:4] != '\x00\x00'):
+            encoding = 'utf-16be'
+            data = data[2:]
+        elif (len(data) >= 4) and (data[:2] == '\xff\xfe') \
+                 and (data[2:4] != '\x00\x00'):
+            encoding = 'utf-16le'
+            data = data[2:]
+        elif data[:3] == '\xef\xbb\xbf':
+            encoding = 'utf-8'
+            data = data[3:]
+        elif data[:4] == '\x00\x00\xfe\xff':
+            encoding = 'utf-32be'
+            data = data[4:]
+        elif data[:4] == '\xff\xfe\x00\x00':
+            encoding = 'utf-32le'
+            data = data[4:]
+        newdata = unicode(data, encoding)
+        return newdata
+
+    def _detectEncoding(self, xml_data, isHTML=False):
+        """Given a document, tries to detect its XML encoding."""
+        xml_encoding = sniffed_xml_encoding = None
+        try:
+            if xml_data[:4] == '\x4c\x6f\xa7\x94':
+                # EBCDIC
+                xml_data = self._ebcdic_to_ascii(xml_data)
+            elif xml_data[:4] == '\x00\x3c\x00\x3f':
+                # UTF-16BE
+                sniffed_xml_encoding = 'utf-16be'
+                xml_data = unicode(xml_data, 'utf-16be').encode('utf-8')
+            elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \
+                     and (xml_data[2:4] != '\x00\x00'):
+                # UTF-16BE with BOM
+                sniffed_xml_encoding = 'utf-16be'
+                xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8')
+            elif xml_data[:4] == '\x3c\x00\x3f\x00':
+                # UTF-16LE
+                sniffed_xml_encoding = 'utf-16le'
+                xml_data = unicode(xml_data, 'utf-16le').encode('utf-8')
+            elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \
+                     (xml_data[2:4] != '\x00\x00'):
+                # UTF-16LE with BOM
+                sniffed_xml_encoding = 'utf-16le'
+                xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8')
+            elif xml_data[:4] == '\x00\x00\x00\x3c':
+                # UTF-32BE
+                sniffed_xml_encoding = 'utf-32be'
+                xml_data = unicode(xml_data, 'utf-32be').encode('utf-8')
+            elif xml_data[:4] == '\x3c\x00\x00\x00':
+                # UTF-32LE
+                sniffed_xml_encoding = 'utf-32le'
+                xml_data = unicode(xml_data, 'utf-32le').encode('utf-8')
+            elif xml_data[:4] == '\x00\x00\xfe\xff':
+                # UTF-32BE with BOM
+                sniffed_xml_encoding = 'utf-32be'
+                xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8')
+            elif xml_data[:4] == '\xff\xfe\x00\x00':
+                # UTF-32LE with BOM
+                sniffed_xml_encoding = 'utf-32le'
+                xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8')
+            elif xml_data[:3] == '\xef\xbb\xbf':
+                # UTF-8 with BOM
+                sniffed_xml_encoding = 'utf-8'
+                xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8')
+            else:
+                sniffed_xml_encoding = 'ascii'
+                pass
+        except:
+            xml_encoding_match = None
+        xml_encoding_match = re.compile(
+            '^<\?.*encoding=[\'"](.*?)[\'"].*\?>').match(xml_data)
+        if not xml_encoding_match and isHTML:
+            regexp = re.compile('<\s*meta[^>]+charset=([^>]*?)[;\'">]', re.I)
+            xml_encoding_match = regexp.search(xml_data)
+        if xml_encoding_match is not None:
+            xml_encoding = xml_encoding_match.groups()[0].lower()
+            if isHTML:
+                self.declaredHTMLEncoding = xml_encoding
+            if sniffed_xml_encoding and \
+               (xml_encoding in ('iso-10646-ucs-2', 'ucs-2', 'csunicode',
+                                 'iso-10646-ucs-4', 'ucs-4', 'csucs4',
+                                 'utf-16', 'utf-32', 'utf_16', 'utf_32',
+                                 'utf16', 'u16')):
+                xml_encoding = sniffed_xml_encoding
+        return xml_data, xml_encoding, sniffed_xml_encoding
+
+
+    def find_codec(self, charset):
+        return self._codec(self.CHARSET_ALIASES.get(charset, charset)) \
+               or (charset and self._codec(charset.replace("-", ""))) \
+               or (charset and self._codec(charset.replace("-", "_"))) \
+               or charset
+
+    def _codec(self, charset):
+        if not charset: return charset
+        codec = None
+        try:
+            codecs.lookup(charset)
+            codec = charset
+        except (LookupError, ValueError):
+            pass
+        return codec
+
+    EBCDIC_TO_ASCII_MAP = None
+    def _ebcdic_to_ascii(self, s):
+        c = self.__class__
+        if not c.EBCDIC_TO_ASCII_MAP:
+            emap = (0,1,2,3,156,9,134,127,151,141,142,11,12,13,14,15,
+                    16,17,18,19,157,133,8,135,24,25,146,143,28,29,30,31,
+                    128,129,130,131,132,10,23,27,136,137,138,139,140,5,6,7,
+                    144,145,22,147,148,149,150,4,152,153,154,155,20,21,158,26,
+                    32,160,161,162,163,164,165,166,167,168,91,46,60,40,43,33,
+                    38,169,170,171,172,173,174,175,176,177,93,36,42,41,59,94,
+                    45,47,178,179,180,181,182,183,184,185,124,44,37,95,62,63,
+                    186,187,188,189,190,191,192,193,194,96,58,35,64,39,61,34,
+                    195,97,98,99,100,101,102,103,104,105,196,197,198,199,200,
+                    201,202,106,107,108,109,110,111,112,113,114,203,204,205,
+                    206,207,208,209,126,115,116,117,118,119,120,121,122,210,
+                    211,212,213,214,215,216,217,218,219,220,221,222,223,224,
+                    225,226,227,228,229,230,231,123,65,66,67,68,69,70,71,72,
+                    73,232,233,234,235,236,237,125,74,75,76,77,78,79,80,81,
+                    82,238,239,240,241,242,243,92,159,83,84,85,86,87,88,89,
+                    90,244,245,246,247,248,249,48,49,50,51,52,53,54,55,56,57,
+                    250,251,252,253,254,255)
+            import string
+            c.EBCDIC_TO_ASCII_MAP = string.maketrans( \
+            ''.join(map(chr, range(256))), ''.join(map(chr, emap)))
+        return s.translate(c.EBCDIC_TO_ASCII_MAP)
+
+    MS_CHARS = { '\x80' : ('euro', '20AC'),
+                 '\x81' : ' ',
+                 '\x82' : ('sbquo', '201A'),
+                 '\x83' : ('fnof', '192'),
+                 '\x84' : ('bdquo', '201E'),
+                 '\x85' : ('hellip', '2026'),
+                 '\x86' : ('dagger', '2020'),
+                 '\x87' : ('Dagger', '2021'),
+                 '\x88' : ('circ', '2C6'),
+                 '\x89' : ('permil', '2030'),
+                 '\x8A' : ('Scaron', '160'),
+                 '\x8B' : ('lsaquo', '2039'),
+                 '\x8C' : ('OElig', '152'),
+                 '\x8D' : '?',
+                 '\x8E' : ('#x17D', '17D'),
+                 '\x8F' : '?',
+                 '\x90' : '?',
+                 '\x91' : ('lsquo', '2018'),
+                 '\x92' : ('rsquo', '2019'),
+                 '\x93' : ('ldquo', '201C'),
+                 '\x94' : ('rdquo', '201D'),
+                 '\x95' : ('bull', '2022'),
+                 '\x96' : ('ndash', '2013'),
+                 '\x97' : ('mdash', '2014'),
+                 '\x98' : ('tilde', '2DC'),
+                 '\x99' : ('trade', '2122'),
+                 '\x9a' : ('scaron', '161'),
+                 '\x9b' : ('rsaquo', '203A'),
+                 '\x9c' : ('oelig', '153'),
+                 '\x9d' : '?',
+                 '\x9e' : ('#x17E', '17E'),
+                 '\x9f' : ('Yuml', ''),}
+
+#######################################################################
+
+
+#By default, act as an HTML pretty-printer.
+if __name__ == '__main__':
+    import sys
+    soup = BeautifulSoup(sys.stdin)
+    print soup.prettify()
diff --git a/ReadMe b/ReadMe
new file mode 100644
index 0000000..347d9ac
--- /dev/null
+++ b/ReadMe
@@ -0,0 +1,62 @@
+The Build directory contains scripts which are used to prepare milestones and activities from the original 
+Siyavula content. It also contains scripts to generate 'basic' and 'Karma' activities from the source 'content' folder.
+
+The scripts assume the following directory structure:
+
+/library/
+/library/courseware
+/library/rwcd
+/library/rwcd/git
+/library/rwcd/git/content
+/library/rwcd/git/build
+/library/rwcd/Siyavula
+/library/rwcd/siyavula_science
+/library/rwcd/siyavula_technology
+/library/rwcd/siyavula_mathematics
+/library/rwcd/siyavula_english
+/library/rwcd/master
+/library/rwcd/trial
+
+Conversion of Siyavula Learner's Modules to 'basic' activities:
+
+(Assume conversion of Mathematics modules for P1)
+
+1. In siyavula_english, create folders for each course: e.g. za1.
+2. The original files are in the Siyavula folder: Ed Gd 1/Mathematics/Learner's Modules as MS Word documents.
+   Some of these modules are equivalent to a milestone. Others have enough material for multiple milestones. These
+   modules are often divided into Learning Units. These modules represent the course work for a complete year 
+   (30 weeks). A typical milestone should contain enough material (activities) to require a student from one to 
+   two weeks to complete. This means a year's course should have between 15 and 30 milestones. In Rwanda, the 
+   year's course may be divided into three terms, each of which has from 5 to 10 milestones (typically 7-8).
+3. In this case there are eight modules (.doc files) containing  Create a folder in za1 for each milestone, 
+   e.g. map1m1, map1m2, .... The current conversion in this case each module is converted directly to a milestone.
+   Copy each module (.doc file) into the corresponding folder in siyavula_mathematics (map1m1 etc.). 
+   At this point you should have eight folders in siyavula_english each containing a single .doc file.
+4. Open the .doc file in Open Office (Write). Save it as html (Save As and then select html format at the bottom 
+   right of the pop-up window.). At this point the folder will contain the original .doc file, a .html file with the 
+   same name and the images for the module (.png and .gif).
+5. Look at the original doc file or at a printed copy. Decide where the module should be divided into activities. This 
+   is art not science. There are several considerations in making this decision. First, the content is intended to be 
+   printed on A4 paper. The html will be displayed on the OLPC laptop screen. Therefore an activity should fit on one 
+   or more screens. Second, an activity should be a single task for the student. The student must be able to understand
+   clearly what he/she is to do and when he or she has finished. Students in Rwanda will normally have a forty minute
+   period each day to work on the course. If a typical student can finish the milestone in seven periods and the
+   milestone has twelve activities, a typical activity would require about twenty-five minutes to complete. So the 
+   process to identify activities within a milestone is to look at the original Siyavula module and decide how to divide 
+   it into tasks. Some modules have activities or assignments or a list (A., B., C.) to help identify activities).
+6. Edit the .html file for the module in gedit or notepad. Enter a horizontal rule tag (<hr />) in the text to show 
+   the separation between activities.
+6. In the command line, run the .cleanUp.py script (./cleanUp.py English za1 map1m1). The cleanUp.py script will clean 
+   up the html and write a file: source.txt in the folder. This script operates on a single milestone and should be run  
+   for each milestone to be converted. The source.txt file includes the <hr /> tags entered in the previous step. 
+7. In the command line, run the .cvtSiyavula.py script (./cleanup.py English za1). Note: The cvtSiyavula script 
+   operates on all of the milestones in a course. The cvtSiyavula script creates folders in /library/rwcd/trial with 
+   the top level named for the subject (siyavula_mathematics). In this folder, there is a folder per milestone. In the  
+   milestone folder, is a folder per activity. The cvtSiyavula script renames image files as img1.png, img2.png, ....
+   These images are resized to the height and width specified in the html <img> tag. Images in .gif format are 
+   converted to .png format retaining the size of the original.
+8. Copy the folders for the course (za1) to the content folder replacing the one already there, if any. In this case, 
+   the /library/git/content/Mathematics folder.
+9. In general, this conversion will be done only once for a module. Beyond that point, the content will be edited for 
+   the OLPC laptop, to meet Rwanda curriculum requirements, and to improve the lesson. If the conversion process is 
+   repeated, those edits will be lost.
diff --git a/cleanUp.py b/cleanUp.py
new file mode 100755
index 0000000..a9dc662
--- /dev/null
+++ b/cleanUp.py
@@ -0,0 +1,60 @@
+#!/usr/bin/python
+#clean up tags in html conversion of Siyavula module
+#write cleaned up version as source.txt in source folder
+from path import path
+import os, sys, subprocess
+from BeautifulSoup import BeautifulSoup
+from optparse import OptionParser
+from cvtFile import Cvt
+
+SOURCE = path('../')
+
+def makesoup(txtin):
+        soup = BeautifulSoup(txtin,fromEncoding="utf-8") 
+        return soup
+
+#use BeautifulSoup to clean up tags
+def applyFix(soup):
+    #remove <head>
+    try:
+        soup.head.extract()
+    except:
+        pass
+    #perform conversions
+    cvt = Cvt(soup)
+    for method in cvt.processlist:
+        cvt.methods[method]()
+    #set up txt
+    txtout = cvt.soup.prettify()
+    #return
+    return txtout
+
+parser = OptionParser(usage="Usage: %prog [options] file")
+(options, args) = parser.parse_args()
+if not args:
+    print 'Specify a folder and module (e.g. Z4 z4m1 arguments.'
+    parser.print_help()
+    sys.exit(1)
+
+SUBJECT = args[0]
+COURSE = args[1]
+MODULE = args[2]
+tag = 'page-break-before: always">'
+basepath = SOURCE / SUBJECT / COURSE / MODULE
+fin = open(basepath / MODULE + '.html','r')
+txt = fin.read()
+fin.close()
+txt = txt.replace('\n',' ')
+txtout = ''
+while txt.find(tag) > -1:
+    pos = txt.find(tag)
+    txtin = txt[:pos+len(tag)]
+    txt = txt[pos+len(tag):]
+    if txtout:
+        txtout += '\n\n<hr />\n\n'
+    #set up soup
+    soup = makesoup(txtin)
+    txtout += applyFix(soup)
+fout = open(basepath / 'source.txt','w')
+fout.write(txtout)
+fout.close
diff --git a/cvtFile.py b/cvtFile.py
new file mode 100755
index 0000000..f70881a
--- /dev/null
+++ b/cvtFile.py
@@ -0,0 +1,185 @@
+#!/usr/bin/python
+"""
+This version acts as cgi script, applying selected conversions
+to a file supplied in the form
+
+get form parameters
+make soup
+apply conversions
+make txt
+return
+"""
+import os, sys
+import logging
+from BeautifulSoup import BeautifulSoup, Tag, NavigableString, Comment
+from path import path
+import subprocess
+from PIL import Image
+
+class Cvt():
+    def __init__(self, soup):
+        self.soup = soup
+        self.logfile = open('logfile', 'w')
+        self.kstrongs = 0
+        self.kfonts = 0
+        self.kspans = 0
+        #methods
+        #method_2 - remove style attributes from table tags
+        #method_3 - remove style attributes from td tags
+        #method_4 - remove style attributes from p tags
+        #method_5 - remove style attributes from tr tags
+        #method_6 - remove col tags
+        #method_9 - remove fonts retaining content
+        #method_10 - remove empty p tags (string == '&nbsp;')
+        #method_11 - remove spans retaining content
+        self.processlist = [2, 3, 4, 5, 6, 7, 9, 10, 11, 12]
+        self.iterlist = [9, 11, 12]
+        self.methods = { 1:self.method_1, 2:self.method_2, 3:self.method_3, 4:self.method_4, 5:self.method_5,
+                6:self.method_6, 7:self.method_7, 8:self.method_8, 9:self.method_9, 10:self.method_10, 
+                11:self.method_11, 12:self.method_12,
+        }
+
+    def close_logfile(self):
+        self.logfile.close()
+
+    def method_1(self):
+        link = Tag(self.soup, 'link')
+        link['rel']="StyleSheet"
+        link['type']="text/css"
+        link['href']="../../css/activity.css"
+        meta = self.soup.find('meta')
+        meta.insert(0,link)
+    
+    #remove style attributes from table tags
+    def method_2(self):
+        tblkeys = ['width', 'border', 'bordercolor', 'cellpadding', 'cellspacing', 'frame', 'rules', 'dir']
+        tbls = self.soup.findAll('table')
+        for tbl in tbls:
+            for key in tblkeys:
+                try:
+                    del tbl[key]
+                except:
+                    pass
+    
+    #remove style attributes from td tags
+    def method_3(self):
+        tdkeys = ['width', 'height', 'bgcolor', 'valign']
+        tds = self.soup.findAll('td')
+        for td in tds:
+            for key in tdkeys:
+                try:
+                    del td[key]
+                except:
+                    pass
+    
+    #remove style attributes from p tags     
+    def method_4(self):
+        pkeys=['lang', 'align', 'style', 'class']
+        ps = self.soup.findAll('p')
+        for p in ps:
+            for key in pkeys:
+                centerflag = False
+                try:
+                    if 'head' in p['class']:
+                        centerflag = True
+                except:
+                    pass
+                try:
+                    del p[key]
+                except:
+                    pass
+            if centerflag:
+                p['class'] = 'center'
+    
+    #remove style attributes from tr tags
+    def method_5(self):
+        pkeys=['lang', 'align', 'style', 'class', 'valign']
+        ps = self.soup.findAll('tr')
+        for p in ps:
+            for key in pkeys:
+               try:
+                   del p[key]
+               except:
+                   pass
+    
+    #remove col tags
+    def method_6(self):
+        cols = self.soup.findAll('col')
+        for col in cols:
+            col.extract()
+
+    #remove attributes from span tags      
+    def method_7(self):
+        pkeys=['lang', 'align', 'style', 'class']
+        ps = self.soup.findAll('span')
+        for p in ps:
+            for key in pkeys:
+                try:
+                    del p[key]
+                except:
+                    pass
+    
+    
+    def method_8(self):
+        ps = self.soup.findAll('p')
+        for p in ps:
+            bs = p.findAll('b')
+            for b in bs:
+                try:
+                    if 'LO' in b.string:
+                        if p.findParents('h1'):
+                            h1=p.findParent('h1')
+                            h1.replaceWith('<p class="solid">' + str(b) + '</p>')
+                        else:
+                            p.replaceWith('<p class="solid">' + str(b) + '</p>')
+                except:
+                    print 'b string not found', b.prettify()
+    
+    #remove font tags retaining content 
+    def method_9(self):
+        while self.soup.font:
+            if len(self.soup.font.contents) > 1:
+                self.soup.font.replaceWith(self.soup.font.contents[1])
+            elif len(self.soup.font.contents) > 0:
+                self.soup.font.replaceWith(self.soup.font.contents[0])
+            else:
+                self.soup.font.extract()
+
+    #remove empty paragraphs (used for spacing)
+    def method_10(self):
+        tags = self.soup.findAll('br')
+        for tag in tags:
+            if len(tag) == 0:
+                tag.extract()
+        tags = self.soup.findAll('p')
+        for tag in tags:
+            if len(tag)==0:
+                tag.extract()
+            if tag.string:
+                str = tag.string.strip()
+                if not str:
+                    tag.extract()
+
+    #remove spans (replace with p tags) retaining content 
+    def method_11(self):
+        while self.soup.span:
+            if len(self.soup.span.contents)>1:
+                self.soup.span.replaceWith(self.soup.span.contents[1])
+            elif len(self.soup.span.contents)>0:
+                self.soup.span.replaceWith(self.soup.span.contents[0])
+            else:
+                self.soup.span.extract()
+
+    #remove strong tags retaining content 
+    def method_12(self):
+        while self.soup.strong:
+            if len(self.soup.strong.contents)>1:
+                self.soup.strong.replaceWith(self.soup.strong.contents[1])
+            elif len(self.soup.strong.contents)>0:
+                self.soup.string.replaceWith(self.soup.strong.contents[0])
+            else:
+                self.soup.strong.extract()
+
+def makesoup(txtin):
+        soup = BeautifulSoup(txtin) 
+        return soup
diff --git a/cvtSiyavula.py b/cvtSiyavula.py
new file mode 100755
index 0000000..6a6db14
--- /dev/null
+++ b/cvtSiyavula.py
@@ -0,0 +1,157 @@
+#!/usr/bin/python
+#create milestone of basic activities from Siyavula folder
+#source folder has text files a01..a99.txt representing individual activities after conversion from 
+#doc format. Folder also has all of the images for the milestone.
+#
+#for each folder in SOURCE create a milestone folder in TARGET
+#for each a*.txt file in SOURCE folder create an activity folder in TARGET / milestone
+#also add an entry in activities.js
+#copy the a*.txt file to the activity folder as source.txt
+#for each image tag in a*.txt, replace the tag with a comment <!--In_imgn.png height xxx width xxx left-->
+#copy the source image to the activity folder renaming it imgn (where n is the number of the image in this activity)
+from path import path
+from PIL import Image
+import subprocess
+import sys
+from optparse import OptionParser
+
+SOURCE = path('../')
+TARGET = path('../trial/')
+
+def getField(element, tag):
+    pos1 = element.find(tag)
+    if pos1 < 0:
+        return ''
+    pos2 = element[pos1:].find(' ')
+    if pos2 < 0:
+        pos2 = len(element)
+    field = element[pos1+len(tag):pos1+pos2].replace('"','')
+    if tag == 'src=':
+        pos1 = field.find('html_')
+        field = '*' + field[pos1+len('html_'):]
+    return field
+
+def parseElement(count, element, folder, activityName):
+    # <img src="Math_Gr1_m1_NUMBER_FUN_html_m3e0e7122.png" border="1" alt="" hspace="12" width="100" height="115" align="LEFT" />
+    #need to build comment
+    tgtimg = 'img' + str(count) + '.png'
+    tgtpth = TARGET / folder /activityName / tgtimg
+    searchImg = getField(element, 'src=')
+    h = getField(element,'height=')
+    w = getField(element,'width=')
+    if len(h) < 1 or len(w) < 1:
+        print element, 'h:',h,'w:',w
+    align = getField(element,'align=')
+    comment = '<!--I' + str(count) +'_' + tgtimg + ' height ' + h + ' width ' + w + ' ' + align.lower() + '-->'
+    srcpth = SOURCE / folder
+    files = srcpth.files(searchImg)
+    srcimg = files[0].name
+    try:
+        im = Image.open(srcpth / srcimg)
+        try:
+            size = (int(w), int(h))
+            imt = im.resize(size,Image.ANTIALIAS)
+            imt.save(tgtpth)
+        except:
+            im.save(tgtpth,'png')
+    except IOError:
+        print 'cannot create thumbnail for', tgtpth, srcpth/srcimg
+    return comment
+
+def processImages(txt, folder, activityName):
+    tag = '<img '
+    endTag = '>'
+    count = 0
+    while tag in txt:
+        count += 1
+        start = txt.find(tag)
+        end = start + txt[start:].find(endTag)
+        element = txt[start+len(tag):end]
+        comment = parseElement(count, element, folder, activityName)
+        txt = txt[:start]+comment+txt[end+len(endTag):]
+    return txt
+
+#get command line options
+parser = OptionParser(usage="Usage: %prog [options] file")
+(options, args) = parser.parse_args()
+if not args:
+    print 'Specify a course (e.g. zs4) as argument.'
+    parser.print_help()
+    sys.exit(1)
+
+SUBJECT = path(args[0])
+COURSE = path(args[1])
+if SUBJECT == 'siyavula_science':
+    sbj = 'sci'
+elif SUBJECT == 'siyavula_technology':
+    sbj = 'tek'
+elif SUBJECT == 'siyavula_mathematics':
+    sbj = 'ma'
+elif SUBJECT == 'siyavula_english':
+    sbj = 'en'
+else:
+    print 'subject not known'
+    sys.exit()
+
+srcpth = SOURCE / SUBJECT / COURSE
+tgtpth = TARGET / SUBJECT / COURSE
+print 'pths', srcpth, tgtpth
+#create clean output folder
+subprocess.call('rm -rf ' + tgtpth, shell=True)
+subprocess.call('mkdir -p ' + tgtpth, shell=True) 
+milestones = srcpth.dirs()
+milestones.sort()
+ms_entries = []
+mcount = 0
+for milestone in milestones:
+    subprocess.call('mkdir ' + tgtpth / milestone.namebase, shell=True)
+    fin = open(srcpth / milestone.namebase / 'source.txt', 'r') 
+    txt = fin.read()
+    fin.close()
+    entry = ['0',str(mcount),sbj,'milestone',milestone.namebase,'cyan']
+    ms_entries.append(entry)
+    mcount += 1
+    acount = 0
+    entries = []
+    tag = '<hr />'
+    done = False
+    txtout = ''
+    while not done:
+        acount += 1
+        acnt = str(acount)
+        if len(acnt) < 2:
+            acnt = '0' + acnt
+        activityName = milestone.namebase + 'a' + acnt
+        entry = [str(acount),str(acount),'Technology','basic',activityName,'blue']
+        entries.append(entry)
+        tgt = tgtpth / milestone.namebase / activityName
+        print 'tgt', tgt
+        subprocess.call('mkdir ' + tgt, shell=True) 
+        pos = txt.find(tag)
+        txtin = txt[:pos]
+        txt = txt[pos+len(tag):]   
+        folder = SUBJECT / COURSE / milestone.namebase
+        txtout = processImages(txtin, folder, activityName)
+        fout = open(tgt / 'source.txt','w')
+        fout.write(txtout)
+        fout.close()
+        txtout = ''
+        if not tag in txt:
+            done = True
+    #write out activities.js
+    txtout = 'var activities = [\n'
+    for entry in entries:
+        txtout += str(entry) + ',\n'
+    txtout += ']\n\n'
+    fout=open(TARGET / folder / 'activities.js','w')
+    fout.write(txtout)
+    fout.close()
+#also write out milestones.js
+txtout = 'var activities = [\n'
+for entry in ms_entries:
+    txtout += str(entry) + ',\n'
+txtout += ']\n\n'
+fout = open(TARGET / SUBJECT / COURSE /  'milestones.js','w')
+fout.write(txtout)
+fout.close()
+    
diff --git a/generate b/generate
new file mode 100755
index 0000000..a240300
--- /dev/null
+++ b/generate
@@ -0,0 +1,150 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#this program generates activities for a course based on milestones.js and activities.js
+#it calls different generators depending on the activity type:
+#currently Karma, basic, Sugar or EPaath (Flash)
+
+import sys, subprocess
+from optparse import OptionParser
+from path import path
+from generate_basic_lesson import generateBasicLesson
+from generate_karma_lesson import process_description
+from generate_karma_lesson import KarmaFramework
+#
+SOURCE = path('/home/tony/Desktop/git/newcontent')
+#SOURCE = path('/home/tony/testcontent')
+TARGET = path('/home/tony/Desktop/master')
+#TARGET = path('/home/tony/testmaster')
+MENUS = path('/home/tony/Desktop/git/generate/menus')
+
+def copyFiles(src, dst, files):
+    for file in files:
+        srcpth = path(src) / file
+        dstpth = path(dst) / file
+        rmcmd = 'rm -rf ' + dstpth
+        cpcmd = 'cp -r ' + srcpth + ' ' + dstpth
+        if srcpth.exists():
+            if dstpth.exists():
+                subprocess.call(rmcmd, shell=True)
+            subprocess.call(cpcmd, shell=True)
+
+def get_entries(srcpth):
+    fin = open(srcpth,'r')
+    txt = fin.read()
+    fin.close()
+    lines = txt.split('\n')
+    entries = []
+    for line in lines:
+        try:
+            entry = eval(line)[0]
+        except:
+            continue
+        if len(entry) < 3:
+            entry = eval(line)
+        entries.append(entry)
+    return entries
+
+parser = OptionParser(usage="Usage: %prog [options] file")
+(options, args) = parser.parse_args()
+if not args:
+    print 'Specify a course (e.g. enp411 as an argument.'
+    parser.print_help()
+    sys.exit(1)
+
+SUBJECT = args[0]
+COURSE = args[1].lower()
+#update version
+cmd = 'git shortlog'
+pipe = 'subprocess.PIPE'
+pid = subprocess.Popen(cmd, cwd=SOURCE, stdout = subprocess.PIPE, stderr = subprocess.PIPE, shell=True)
+pid.wait()
+(result,err) = pid.communicate()
+lines = result.split('\n')
+version = lines[len(lines)-3].strip()
+files = SOURCE.files('version*')
+for file in files:
+    subprocess.call('rm ' + file,shell=True)
+files = TARGET.files('version*')
+for file in files:
+    subprocess.call('rm ' + file,shell=True)
+fout = open(SOURCE / version,'w')
+fout.write('')
+fout.close()
+cmd = 'cp ' + SOURCE / version + ' ' + TARGET
+print cmd
+subprocess.call(cmd, shell=True)
+print 'reset basic files'
+#reset basic files
+cmd = 'cp ' + MENUS / 'subject.html ' + TARGET / 'index.html'
+subprocess.call(cmd, shell=True)
+fileList = ['subjects.js', 'karma']
+copyFiles(SOURCE, TARGET, fileList)
+subprocess.call('cp ' + SOURCE / 'version-* ' + TARGET,shell=True)
+#setup subjects
+subjects = get_entries(SOURCE / 'subjects.js')
+for entry in subjects: 
+    subject = entry[1]
+    subprocess.call('mkdir -p ' + TARGET /subject, shell=True)
+    #copy icons
+    cmd = 'cp ' + SOURCE / subject / subject.lower() + '.png ' + TARGET / subject
+    subprocess.call(cmd, shell=True)
+    if subject == 'Library':
+        continue
+    #copy index.html
+    cmd = 'cp ' + MENUS / 'course.html ' + TARGET / subject / 'index.html'
+    subprocess.call(cmd, shell=True)
+    #copy courses.js
+    subprocess.call('cp ' + SOURCE / subject / 'courses.js ' + TARGET / subject, shell=True) 
+#create folder in TARGET (master) for SUBJECT COURSE
+spth = SOURCE / SUBJECT / COURSE
+tpth = TARGET / SUBJECT / COURSE
+subprocess.call('rm -rf ' + tpth + '/*',shell=True)
+subprocess.call('mkdir -p ' + tpth, shell=True)
+#copy milestones.js to dst
+subprocess.call('cp ' + spth / 'milestones.js ' + tpth, shell=True)
+#copy index.html to dst
+subprocess.call('cp ' + MENUS  / 'milestone.html ' + tpth / 'index.html',shell=True)
+#make list of milestones from milestones.js
+milestones = get_entries(spth / 'milestones.js')
+print 'milestones',len(milestones)
+count = 0
+for entry in milestones:
+    milestone = entry[4]
+    dstms = tpth / milestone
+    srcms = spth / milestone
+    #create target milestone folder
+    try:
+        subprocess.call('rm -rf ' + dstms, shell=True)
+        subprocess.call('mkdir ' + dstms, shell=True)
+    except:
+        print 'make',dstms,'failed',sys.exc_info()[:2]
+    #copy activities.js to activity folder
+    subprocess.call('cp '+ srcms / 'activities.js ' + dstms,shell=True)
+    #copy index.html to activity folder
+    cmd = 'cp ' + MENUS / 'activity.html ' + dstms / 'index.html'
+    subprocess.call(cmd ,shell=True)
+    #get list of activities in milestone
+    activities = get_entries(srcms / 'activities.js')
+    for entry in activities:
+        activity = entry[4]
+        activity_type = entry[3]
+        print 'milestone',milestone,'activity',activity,'activity_type', activity_type
+        #generate activities
+        src = srcms / activity
+        dst = dstms / activity
+        act = path('content') / SUBJECT / COURSE / milestone / activity
+        karmapth = SOURCE / 'karma'
+        subprocess.call('mkdir ' + dst,shell=True)
+        #switch based on activity type
+        if activity_type == 'basic':
+            generateBasicLesson(act, src,dst)
+        elif activity_type == 'Karma':
+            karma = KarmaFramework(karmapth)
+            process_description(karma,src / 'description.py', dst)
+        elif activity_type == 'EPaath':
+            generate-EPaath-lesson(src,dst)
+        else: #copy activity to master
+            subprocess.call('cp -r ' + src + '/* ' + dst, shell=True)
+
+
+
diff --git a/generate_basic_lesson.py b/generate_basic_lesson.py
new file mode 100755
index 0000000..8876c3f
--- /dev/null
+++ b/generate_basic_lesson.py
@@ -0,0 +1,407 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+import sys, subprocess
+from path import path
+
+MENUS = path('/home/tony/Desktop/git/generate/menus')
+#
+def getTag(txt, tag):
+    start = txt.find(tag)+len(tag)+1
+    end = txt[start:].find(' ')+start
+    if end < start:
+        end = len(txt)
+    return txt[start:end]    
+
+def getImageData(image):
+    #parse image:format: 1_yyyy.png height xxx width yyy left
+    #get image number
+    end = image.find('_')
+    imgno = image[:end]
+    #get image name
+    start = image.find('_')
+    end = image.find(' ')
+    name = image[start+1:end]
+    #get height
+    height = getTag(image, 'height')
+    width = getTag(image, 'width')
+    if 'left' in image:
+        ifloat = 'left'
+    elif 'right'in image:
+        ifloat = 'right'
+    else:
+         ifloat = None
+    return (imgno, name, height, width, ifloat)
+
+def makeQuiz(screen):
+    #scan for quiz comments
+    tag = '<!--Q'
+    endTag = '-->'
+    quiz = []
+    if tag+endTag in screen:
+        return True, quiz
+    while tag in screen:
+        #comment = <!--Q:'question' A:'answer'-->
+        start = screen.find(tag)
+        end = screen.find(endTag)
+        question = screen[start+4:end]
+        if question:
+            quiz.append(question)
+        screen = screen[end+len(endTag):]
+    return False, quiz
+
+
+def findImages(screen):
+    #process imagelist
+    imageList = []
+    tag = '<!--I'
+    endTag = '-->'
+    while tag in screen:
+        start = screen.find(tag)
+        end = screen.find(endTag)
+        comment = screen[start+len(tag):end]
+        if len(comment) > 0:
+            imageList.append(comment)
+        screen = screen[end+len(endTag):]
+    return imageList
+
+
+def findAudio(screen):
+    #process audiolist
+    audioList = []
+    tag = '<!--'
+    tags = "ARSXH"
+    endTag = '-->'
+    while tag in screen:
+        start = screen.find(tag)
+        end = screen.find(endTag)
+        comment = screen[start+len(tag):end]
+        if len(comment) > 0 and comment[0] in tags:
+            audioList.append(comment)
+        screen = screen[end+len(endTag):]
+    return audioList
+
+def generate_image(imageList):
+    #format: nn_img1.png height:xxx width:yyy left
+    txtout = ''
+    for line in imageList:
+        (imgno, name, height, width, ifloat) = getImageData(line)
+        real = path(name).namebase
+        txtout = txtout + "      $('#I" + imgno + "')\n"
+        if ifloat:
+            txtout = txtout + "           .addClass('image_"+ifloat+"')\n"            
+        #txtout = txtout + "           .append(karma.createImg('" + real + "'))\n"
+        txtout += "      $('<img>',{\n"
+        txtout += "         src:host+pth+'/"+name+"'\n    })\n    .appendTo('#I"+imgno+"')\n"
+    return txtout
+
+def generate_load(pth):
+    lesson = ''
+    lesson = lesson + "  $('<div id="
+    lesson = lesson + '"txtMain"/>'
+    lesson = lesson + "')\n"
+    lesson = lesson + "     .appendTo('#content')\n"
+    lesson = lesson + "     .load(host+'cgi-bin/getFile.py',\n"
+    if screen == 0:
+        lesson = lesson + "     {'filename':" + pth + "/a.txt'},\n"
+    else:
+        lesson = lesson + "     {'filename':" + pth + "/a" + str(screen) + ".txt'},\n"
+    lesson = lesson +     "     function(){\n"
+    return lesson
+
+def generate_audio(audiolist):
+    #format A1_clip.ogg or R1_clip.ogg or X1_clip.ogg or S1_clip.txt or H1_clip.ogg
+    txtout = ''
+    for line in audiolist:
+        typ = line[0]
+        pos = line.find('_')
+        clip = line[pos+1:]
+        if typ == 'A':
+            txtout += "    playAudio(pth+'"+clip+"')\n"
+        elif typ == 'X':
+            txtout += "    playXoAudio(pth+'"+clip+"')\n"
+        elif typ == 'H':
+            txtout += "    playHelp(pth+'"+clip+"')\n"
+        elif typ == 'S' and line[1] != 'G':
+            txtout += "    sayText(pth+'"+clip+"')\n"
+        elif typ == 'R':
+            txtout += "    recordAudio(pth+'"+clip+"')\n"
+    return txtout
+
+def generateLessonCSS(imageList):
+    txtout = ''
+    for image in imageList:
+        (imgno, name, height, width, ifloat) = getImageData(image)
+        insert = "#I"+imgno+"{height:"+height+"px; width:"+width+"px; }\n\n"
+        txtout += insert
+    return txtout
+
+def generateLessonKarma(imageList):
+    txtout = 'function lesson_karma(){\n    return Karma({\n        image:[\n'
+    for image in imageList:
+        (imgno, name, height, width, float) = getImageData(image)
+        txtout += "{name:'"+path(name).namebase+"', file:'"+name+"'},\n"
+    txtout += '              ],\n       audio:[\n        ]})};\n'
+    return txtout
+
+def generateQuiz(activity, quizList):
+    for i in range(len(quizList)):
+        quiz = quizList[i]
+        if len(quizList) == 1:
+            txtout = 'var quiz = {\n'
+        else:
+            txtout = 'var quiz'+str(i+1)+' = {\n'
+        tflist = []
+        mclist = []
+        salist = []
+        #we need to make a list of the questions by type: multilist, tf, fill
+        #note: we should have cloze and sa should accept variant answers (e.g a/b where a, b are regular expressions
+        for question in quiz:
+            if "S:" in question:
+                mclist.append(question)
+            elif "T:" in question or 'F:' in question:
+                tflist.append(question) 
+            else:
+                salist.append(question)
+        if mclist:
+            txtout += '    multiList:[\n'
+            for q in mclist:
+                #process question and add to txtout
+                apos = q.find('A:')
+                spos = q.find('S:')
+                ans = 'ans: ' + q[apos+2:spos] + ', '
+                sel = 'ansSel: [' + q[spos+2:] + ']'
+                ques = '{ques: ' + q[:apos] + ', '
+                txtout += ques + ans + sel +  '},\n'
+            txtout += '],\n'
+        if tflist:
+            txtout += '    tf:[\n'
+            for q in tflist:
+               #process questionstring and add to quiztxt
+               apos1 = q.find('T:')
+               apos2 = q.find('F:')
+               ans = 'ans:' + q[apos1+2:apos2] + ', '
+               sel = 'ansSel:' + q[apos2+2:]
+               ques = '{ques: ' + q[:apos1] + ', '
+               txtout += ques + ans + sel + '},\n'
+            txtout += '],\n'
+        if salist:
+            txtout += '    fill:[\n'
+            for q in salist:
+                #process question string and add to quiztxt
+                apos = q.find('A:')
+                ans = 'ans: ' + q[apos + 2:]
+                ques = '{ques: ' + q[2:apos] + ','
+                txtout += ques + ans + '},\n'
+            txtout += '    ]\n'
+        txtout += '};\n\n'
+    txtout += 'var options = {\n'
+    txtout += "    title: '" + activity.namebase + "',\n" 
+    txtout += '    random: false,\n'
+    txtout += '    allrandom: false,\n'
+    txtout += '};\n'
+    return txtout
+
+def generateLesson(srcpth, pth, screens, imageList, audioList, quiz):
+    #generate lesson.js
+    tag = '<!--SG-->'
+    txtout = ''
+    lessonpth = srcpth / 'lesson.js'
+    if lessonpth.exists():
+        fin = open(lessonpth, 'r')
+        txtout += fin.read()
+        txtout += '\n'
+        fin.close()
+    if len(screens) == 1:
+        #single screen
+        txtout += 'function initialize(karma) {\n'
+        txtout += "  host = 'http://localhost:8008/'\n"
+        txtout += "  pth='"+pth+"/'\n"
+        txtout += "  if(mode=='Faculty'){\n    $('#ĺinkEdit').addClass('linkEdit');\n"
+        txtout += "  }else{\n"
+        txtout += "    $('#linkApply')\n      .addClass('linkApply')\n      .attr('score','70');\n  }\n"
+        txtout += "  $('<div id="
+        txtout += '"txtMain"/>'
+        txtout += "')\n"
+        txtout += "     .appendTo('#content')\n"
+        txtout += "     .load(host+'cgi-bin/getFile.py',\n"
+        txtout += "     {'filename':pth+'a.txt'},\n"
+        txtout +=     "     function(){\n"
+        if imageList and imageList[0]:
+            txtout += generate_image(imageList[0])
+        if audioList and audioList[0]:
+            txtout += generate_audio(audioList[0])
+        txtout += '  });\n'
+        if tag in screens[0]:
+            txtout += "  $('#linkStart')\n"
+            txtout += "    .addClass('linkStart')\n"
+            txtout += "    .click(function(){startGame(karma);\n"
+            txtout += "  });\n" 
+        txtout += '};\n\n'
+        if not 'startGame' in txtout:
+            txtout += 'function startGame(karma) {\n'
+            if quiz:
+                txtout += "    $('#linkStart').addClass('linkStart');\n"
+                txtout += "    $('<div id=" + '"quizArea"/>' + "')\n"
+                txtout += '           .appendTo("#content")\n'
+                txtout += '    $("#quizArea")\n'
+                txtout += '        .jQuizMe(quiz, options)\n' 
+            txtout += '};\n\n'
+        txtout += 'setUpLesson(initialize, startGame);\n'
+    else:
+        #multiple screen
+        txtout += 'var currentScreen;\n\n'
+        for i in range(len(screens)):
+            screen = i+1
+            txtout += 'function generateScreen'+str(screen)+'(karma) {\n'
+            txtout += '  currentScreen = ' + str(i+1) + '\n'
+            txtout += "  host = 'http://localhost:8008/'\n"
+            txtout += "  pth='"+pth+"/'\n"
+            txtout += "  if(mode=='Faculty'){\n    $('#ĺinkEdit').addClass('linkEdit');\n"
+            txtout += "  }else{\n"
+            txtout += "    $('#linkApply')\n      .addClass('linkApply')\n      .attr('score','70');\n  }\n"
+            txtout += "  $('<div id="
+            txtout += '"txtMain"/>'
+            txtout += "')\n"
+            txtout += "     .appendTo('#content')\n"
+            txtout += "     .load(host+'cgi-bin/getFile.py',\n"
+            txtout += "     {'filename':pth+'a" + str(screen) + ".txt'},\n"
+            txtout +=     "     function(){\n"
+            if imageList[i]:
+                txtout += generate_image(imageList[i])
+            txtout += '    });\n'
+            if audioList[i]:
+                txtout += generate_audio(audioList[i])
+            if tag in screens[i]:
+                txtout += "    $('#linkStart')\n"
+                txtout += "      .addClass('linkStart')\n"
+                txtout += "      .click(function(){startGame(karma);});\n" 
+            txtout += '};\n'
+        txtout += '\n'
+        txtout += 'function initialize() {};\n\n'
+        if not 'startGame' in txtout:
+            txtout += 'function startGame(karma){\n'
+            if quiz:
+                if not srcpth / 'quiz.js':
+                    txtout += "    url = window.location+'';\n"
+                    txtout += "    tmp = url.split('/');\n"
+                    txtout += "    tmp.pop();\n"
+                    txtout += "    tmp.shift();\n"
+                    txtout += "    tmp.shift();\n"
+                    txtout += "    tmp.shift();\n"
+                    txtout += "    pth = tmp.join('/');\n"
+                    txtout += "    $('div id = " + '"workArea"'+"'/>)\n"
+                    txtout += "    .load(host+'cgi/writeQuiz.py',\n"
+                    txtout += "    {'activity':pth,'text':quiztxt});\n"
+                txtout += "    $('#linkStart').addClass('linkStart');\n"
+                txtout += "    $('<div id=" + '"quizArea"/>' + "')\n"
+                txtout += '           .appendTo("#content")\n'
+                txtout += '    $("#quizArea")\n'
+                txtout += '        .jQuizMe(quiz[currentScreen], options)\n' 
+            txtout += '};\n\n'
+        txtout += '\nsetUpMultiScreenLesson([\n'
+        for i in range(len(screens)):
+            txtout += '    generateScreen' + str(i+1) + ',\n'
+        txtout += ']);\n'
+    return txtout
+
+def makeScreens(txt):
+    screens = []
+    tag = '<hr />'
+    while tag in txt:
+        pos = txt.find(tag)
+        screen = txt[:pos]
+        txt = txt[pos+len(tag):]
+        screens.append(screen)
+    if len(txt)>0:
+        screens.append(txt)
+    return screens
+
+def addSpans(screen):
+    tag = '<!--I'
+    endTag = '-->'
+    start = 0
+    while screen[start:].find(tag)>-1:
+        pos1 = screen[start:].find(tag)
+        pos2 = screen[start:].find(endTag)
+        end = start + pos2
+        comment = screen[start+pos1+len(tag):end]
+        if len(comment) > 0:
+            imgno, name, height, width, ifloat = getImageData(comment)
+            span = "<span id = 'I" + imgno + "'></span>" 
+            screen = screen[:end+len(endTag)] + span + screen[end+len(endTag):]
+            end = end + len(span)
+        start = end + len(endTag)
+    return screen
+
+def generateBasicLesson(actpth, srcpth, dstpth):
+    #read source.txt
+    txtpth = srcpth / 'source.txt'
+    fin = open(txtpth,'r')
+    txt = fin.read()
+    fin.close()
+    #copy index.html to dst
+    subprocess.call('cp ' + MENUS / 'index.html ' + dstpth, shell=True)
+    #copy source.txt to dst
+    subprocess.call('cp ' + srcpth / 'source.txt ' + dstpth, shell=True)
+    #copy assets to dst
+    images = path(srcpth).files('*.png')
+    for image in images:
+        subprocess.call('cp ' + image + ' ' + dstpth, shell=True)
+    images = path(srcpth).files('*.ogg')
+    for image in images: 
+        subprocess.call('cp ' + image + ' ' + dstpth, shell=True)
+    #we don't copy gif because Karma doesn't handle them - they need to be converted to png
+    #subprocess.call('cp ' + srcpth / '*.gif ' + dstpth, shell = True)
+    #scan source text returning list of screens (a*.txt)
+    screens = makeScreens(txt)
+    quiz = []
+    imageList = []
+    audioList = []
+    quizFlag = False #True when activity contains generated quiz   
+    for i in range(len(screens)):
+        screen = screens[i]
+        flag, quizText = makeQuiz(screen)
+        if flag:
+            quizFlag = True
+        if quizText:
+            quiz.append(quizText)
+        imageList.append(findImages(screen))
+        audioList.append(findAudio(screen))
+        #write screen
+        #but first add spans for images
+        screen = addSpans(screen)
+        if len(screens) == 1:
+            fout = open(dstpth / 'a.txt','w')
+        else:
+            fout = open(dstpth / 'a' + str(i+1) + '.txt','w')
+        fout.write(screen)
+        fout.close()
+    #create lesson.js
+    txtout = generateLesson(srcpth, actpth, screens, imageList, audioList, quiz)
+    fout = open(dstpth / 'lesson.js','w')
+    fout.write(txtout)
+    fout.close()
+    if imageList:
+        totalList = []
+        for list in imageList:
+            for image in list:
+                totalList.append(image)
+        #create lesson-karma.js
+        txtout = generateLessonKarma(totalList)
+        fout = open(dstpth / 'lesson-karma.js','w')
+        fout.write(txtout)
+        fout.close()
+        #create lesson.css
+        txtout = generateLessonCSS(totalList)
+        fout = open(dstpth / 'lesson.css','w')
+        fout.write(txtout)
+        fout.close()
+    if quiz and not quizFlag:
+        #create quiz.js
+        txtout = generateQuiz(dstpth, quiz)
+        fout = open(dstpth / 'quiz.js','w')
+        fout.write(txtout)
+        fout.close()   
+
+
diff --git a/generate_karma_lesson.py b/generate_karma_lesson.py
new file mode 100755
index 0000000..e0b532e
--- /dev/null
+++ b/generate_karma_lesson.py
@@ -0,0 +1,692 @@
+#! /usr/bin/env python2.6
+# -*- coding: utf-8 -*-
+
+from html import HtmlDocument, HtmlElement
+import mo2js
+import codecs
+import os
+import shutil
+import string
+import sys
+import time
+import fnmatch
+from optparse import OptionParser
+from path import path
+
+class KarmaFramework():
+    def __init__(self, root_dir):
+        self.root_dir = root_dir
+        self.java_script_files = [
+            self._karma_file('js/external/jquery-1.4.2.js', 'jquery'),
+            self._karma_file('js/external/jquery-ui-1.8.2.js', 'jquery-ui'),
+            self._karma_file('js/external/jquery.ui.core.js', 'ui.core'),
+            self._karma_file('js/external/jquery.ui.mouse.js', 'ui.mouse'),
+            self._karma_file('js/external/jquery.ui.widget.js', 'ui.widget'),
+            self._karma_file('js/external/jquery.ui.position.js', 'ui.position'),
+            self._karma_file('js/external/jquery.ui.draggable.js', 'ui.draggable'),
+            self._karma_file('js/external/jquery.ui.droppable.js', 'ui.droppable'),
+            self._karma_file('js/jquery.watermarkinput.js', 'jquery.watermarkinput'),
+            self._karma_file('js/ui.scoreboard.js', 'ui.scoreboard'),
+            self._karma_file('js/jquery.svg.js', 'jquery.svg'),
+            self._karma_file('js/karma.js', 'karma'),
+            self._karma_file('js/global.js', 'global'),
+            self._karma_file('js/common.js', 'common'),
+            self._karma_file('js/jquery.clickable.js', 'jquery.clickable'),
+            self._karma_file('js/multiple-choice.js', 'multiple-choice'),
+            self._karma_file('js/clock.js', 'clock'),
+            self._karma_file('js/jquery.i18n.js', 'i18n'),
+            self._karma_file('js/jquery.strings.js', 'jquery.strings'),
+            self._karma_file('js/jquery.keyfilter.js', 'jquery.keyfilter'),
+            self._karma_file('js/kStart.js', 'kstart'),
+            self._karma_file('js/config.js', 'config'),
+            self._karma_file('js/base.js','base')
+            ]
+        self.css_files = [
+            self._karma_file('css/global.css', 'global'),
+            self._karma_file('css/ui.scoreboard.css', 'ui.scoreboard'),
+            self._karma_file('css/kStart.css', 'kstart')
+            ]
+        self.audio_files = [
+            self._karma_file('audio/en_correct.ogg', 'correct'),
+            self._karma_file('audio/en_incorrect.ogg', 'incorrect'),
+            self._karma_file('audio/ne_correct.ogg', 'ne_correct'),
+            self._karma_file('audio/ne_incorrect.ogg', 'ne_incorrect'),
+            self._karma_file('audio/byebye.ogg', 'byebye'),
+            self._karma_file('audio/trigger.ogg', 'trigger')
+            ]
+        self.image_files = [
+            self._karma_file('image/title_block_lt.png', 'title_block_lt'),
+            self._karma_file('image/title_block_rt.png', 'title_block_rt'),
+            self._karma_file('image/favicon.ico', 'favicon')
+            ]
+
+    def _karma_file(self, path, name, **kw):
+        kw['karma_root'] = self.root_dir
+        return KarmaFile(path, name, **kw)
+
+    def _find_file(self, name, files):
+        for f in files:
+            if f.name() == name:
+                return f
+        return None
+
+    def java_script(self, name):
+        return self._find_file(name, self.java_script_files)
+
+    def css(self, name):
+        return self._find_file(name, self.css_files)
+
+    def audio(self, name):
+        return self._find_file(name, self.audio_files)
+
+    def image(self, name):
+        return self._find_file(name, self.image_files)
+
+
+argv0 = sys.argv[0]
+now = time.strftime("%Y/%m/%d %H:%M:%S", time.localtime())
+warning_text_lines = ['This file was generated by %s on %s.' % (argv0, now),
+                      'Do not edit.',
+                      'cwd: %s' % os.getcwd(),
+                      'command: %s' % ' '.join(sys.argv)]
+
+
+theLesson = None
+
+include_stack = []
+script_root = os.path.abspath(os.path.dirname(argv0))
+
+
+class File():
+    _name = None
+    src = ''
+    lesson_deploy = True
+    create_file = False
+    data = ''
+
+    def to_string(self):
+        print 'name:', self._name
+        print 'src:', self.src
+        print 'lesson_deploy:', self.lesson_deploy
+        print 'create_file:', self.create_file
+        print 'data:', self.data
+        print
+
+    def __init__(self, pth, name=None, **kw):
+        self._name = name
+        self.src = pth
+
+        if 'karma' in kw and kw['karma']:
+            self.lesson_deploy = False
+        if 'generated' in kw and kw['generated']:
+            self.create_file = True
+
+        if not self.create_file:
+            # find the existing file
+            test_files = []
+            if self.lesson_deploy:
+                test_files.append(os.path.join(theLesson.src_directory, pth))
+            if len(include_stack) > 0:
+                test_files.append(os.path.join(os.path.dirname(include_stack[-1]), pth))
+            if 'karma_root' in kw:
+                test_files.append(os.path.join(kw['karma_root'], pth))
+
+            for f in test_files:
+                abs_path = os.path.abspath(f)
+                if os.path.isfile(abs_path):
+                     self.src = abs_path
+        else:
+            self.src = self._absolute_path()
+
+    def _deploy_folder(self):
+        return os.path.abspath(path(theLesson.directory).parent)
+
+    def name(self):
+        return self._name
+
+    # only applicable for assets
+    def preload(self):
+        return self._name != None
+
+    def _src_path(self):
+        return self.src
+
+    def relative_path(self, start=None, **kw):
+        if start == None or start == '':
+            # default relative is to lesson output
+            start = theLesson.directory
+        elif start == 'deploy':
+            start = self._deploy_folder()
+
+        #rel_path = os.path.relpath(self._absolute_path(), start)
+        pth = str(self._absolute_path())
+        pos = pth.find('/karma/')
+        if pos > -1:
+            rel_path = path('../../../..' + pth[pos:])
+        else:
+            rel_path = path(pth).name
+        if 'web' in kw and kw['web']:
+            rel_path = string.replace(rel_path, '\\', '/')
+
+        return rel_path
+
+    def _absolute_path(self):
+        if self.lesson_deploy:
+            return os.path.join(self._deploy_folder(), self._basename())
+        else:
+            return self.src
+
+    def _basename(self):
+        return os.path.basename(self.src)
+
+    def make_available(self):
+        if self.create_file:
+            f = open(self._absolute_path(), 'w')
+            print >>f, self.data
+            f.close()
+        elif self.lesson_deploy:
+            check_file_exists(self._src_path())
+            shutil.copy(self._src_path(), self._absolute_path())
+
+    # only applicable for generated files
+    def write(self, x):
+        self.data = self.data + x
+
+
+class KarmaFile(File):
+    def __init__(self, path, name=None, **kw):
+        kw['karma'] = True
+        File.__init__(self, path, name, **kw)
+
+
+java_script_dependencies = [
+    ('effects.core', 'effects.blind'),
+    ('effects.core', 'effects.bounce'),
+    ('effects.core', 'effects.clip'),
+    ('effects.core', 'effects.drop'),
+    ('effects.core', 'effects.explode'),
+    ('effects.core', 'effects.fold'),
+    ('effects.core', 'effects.highlight'),
+    ('effects.core', 'effects.pulsate'),
+    ('effects.core', 'effects.scale'),
+    ('effects.core', 'effects.shake'),
+    ('effects.core', 'effects.slide'),
+    ('effects.core', 'effects.transfer'),
+    ('ui.core', 'ui.accordion'),
+    ('ui.widget', 'ui.accordion'),
+    ('ui.core', 'ui.autocomplete'),
+    ('ui.widget', 'ui.autocomplete'),
+    ('ui.position', 'ui.autocomplete'),
+    ('ui.core', 'ui.button'),
+    ('ui.widget', 'ui.button'),
+    ('ui.core', 'ui.datepicker'),
+    ('ui.core', 'ui.dialog'),
+    ('ui.widget', 'ui.dialog'),
+    ('ui.button', 'ui.dialog'),
+    ('ui.draggable', 'ui.dialog'),
+    ('ui.mouse', 'ui.dialog'),
+    ('ui.position', 'ui.dialog'),
+    ('ui.resizable', 'ui.dialog'),
+    ('ui.core', 'ui.draggable'),
+    ('ui.mouse', 'ui.draggable'),
+    ('ui.widget', 'ui.draggable'),
+    ('ui.core', 'ui.droppable'),
+    ('ui.widget', 'ui.droppable'),
+    ('ui.mouse', 'ui.droppable'),
+    ('ui.draggable', 'ui.droppable'),
+    ('ui.widget', 'ui.mouse'),
+    ('ui.core', 'ui.progressbar'),
+    ('ui.widget', 'ui.progressbar'),
+    ('ui.core', 'ui.resizable'),
+    ('ui.mouse', 'ui.resizable'),
+    ('ui.widget', 'ui.resizable'),
+    ('ui.core', 'ui.selectable'),
+    ('ui.mouse', 'ui.selectable'),
+    ('ui.widget', 'ui.selectable'),
+    ('ui.core', 'ui.slider'),
+    ('ui.mouse', 'ui.slider'),
+    ('ui.widget', 'ui.slider'),
+    ('ui.core', 'ui.sortable'),
+    ('ui.mouse', 'ui.sortable'),
+    ('ui.widget', 'ui.sortable'),
+    ('ui.core', 'ui.tabs'),
+    ('ui.widget', 'ui.tabs'),
+    # old stuff
+    ('jquery', 'jquery-ui'),
+    ('jquery', 'jquery.watermarkinput'),
+    ('jquery', 'jquery.clickable'),
+    ('ui.core', 'ui.scoreboard'),
+    ('jquery-ui', 'ui.scoreboard'),
+    ('jquery', 'jquery.svg'),
+    ('karma', 'common'),
+    ('common', 'multiple-choice'),
+    ('common', 'clock'),
+    ('common', 'base'),
+    ('jquery', 'clock'),
+    ('jquery', 'i18n')
+    ]
+
+#TBD: factor this out in a separate file, so it is easy to provide
+#     your own header/footer
+#TBD: make header/footer customizable
+def generate_header(karma, dir, body, title):
+    header = body.div(id='header')
+
+    header.div(id='topbtn_left').div(id='linkBackLesson',
+                                     title='Back',
+                                     className='linkBack')
+
+    lesson_title = header.div(id='lesson_title')
+    lesson_title.img(src=karma.image('title_block_lt').relative_path(dir, web=True),
+                     width=33, height=75, align='absmiddle')
+    lesson_title.text(title)
+    lesson_title.img(src=karma.image('title_block_rt').relative_path(dir, web=True),
+             width=33, height=75, align='absmiddle')
+
+
+    header.div(className='topbtn_right').div(title='Help', id='linkHelp')
+
+    header.div(className='topbtn_right').div(title=u'साझा शिक्षा ई-पाटी द्वारा निर्मित',
+                                             id='linkOle')
+
+
+def generate_footer(body):
+    footer = body.div(id='footer')
+
+    config = theLesson.footer_configuration
+
+    if config['link_next']:
+        footer.div(title='Next', id='linkNextLesson', className='linkNext')
+    if config['link_previous']:
+        footer.div(title='Previous', id='linkPrevLesson', className='linkBack')
+    if config['scoreboard']:
+        footer.div(id='score_box', display='none')
+
+    footer.div(className='botbtn_right').div(title='Play Again', id='linkPlayAgain')
+
+    if config['link_check_answer']:
+        footer.div(className='botbtn_right').div(title='Check Answer', id='linkCheck')
+
+
+def topological_sort(nodes, dependencies, key):
+    """Sort nodes topologically according to dependencies.
+    A dependency is a tuple (key(earlier_node), key(later_node)),
+    meaning that earlier_node should come before later_node in the
+    result."""
+    from collections import deque
+    successors = {}
+    predecessor_count = {}
+    node_map = {}
+    for node in nodes:
+        k = key(node)
+        node_map[k] = node
+        successors[k] = []
+        predecessor_count[k] = 0
+    for (dep0, dep1) in dependencies:
+        if dep0 in node_map and dep1 in node_map:
+            successors[dep0].append(dep1)
+            predecessor_count[dep1] = predecessor_count[dep1] + 1
+    next = deque()
+    for k,v in successors.items():
+        if predecessor_count[k] == 0:
+            next.append(k)
+    result = []
+    while len(next) != 0:
+        k = next.popleft()
+        result.append(node_map[k])
+        for successor in successors[k]:
+            predecessor_count[successor] = predecessor_count[successor] - 1
+            if predecessor_count[successor] == 0:
+                next.append(successor)
+    if len(result) != len(nodes):
+        print 'Error: dependency loop.'
+        sys.exit(1)
+    return result
+
+
+def include_dependencies(karma, files):
+    result = []
+    visited = set()
+    deps = {}
+    for dep in java_script_dependencies:
+        deps[dep[1]] = deps.setdefault(dep[1], []) + [dep[0]]
+    js_files = {}
+    for f in karma.java_script_files:
+        js_files[f.name()] = f
+    def add_dependencies(list):
+        for x in list:
+            if x not in visited:
+                add_dependencies([js_files[name] for name
+                                  in deps.setdefault(x.name(), [])])
+                result.append(x)
+                visited.add(x)
+    add_dependencies(files)
+    return result
+
+def sort_java_script_files(files):
+    karma_files = filter(lambda x: isinstance(x, KarmaFile), files)
+    other_files = filter(lambda x: not isinstance(x, KarmaFile), files)
+    result = topological_sort(karma_files,
+                              java_script_dependencies,
+                              lambda x: x.name()) + other_files
+    return result
+
+
+def createDiv(id):
+    return HtmlElement('body', 'div', True).attr(id=id)
+
+class Lesson():
+    def __init__(self, src_directory):
+        self.src_directory = src_directory
+        self.parent_directory = ''
+        self.directory = ''
+        self.title = ''
+        self.lesson_title = ''
+        self._grade = None;
+        self._subject = '';
+        self._week = None
+        self.summary = '';
+        self.java_script_files = []
+        self.css_files = []
+        self.image_files = []
+        self.audio_files = []
+        self.divs = [createDiv('content')]
+        self.footer_configuration = dict(link_previous=True,
+                                         link_next=True,
+                                         scoreboard=False,
+                                         link_check_answer=False,
+                                         audio_buttons=False)
+
+    def grade(self):
+        return self._grade
+
+    def subject(self):
+        return self._subject
+
+    def week(self):
+        return self._week
+
+    def copy_files(self):
+        def create_dir(d):
+            if not os.path.exists(d):
+                os.makedirs(d)
+        create_dir(self.parent_directory)
+        os.chdir(self.parent_directory)
+#        map(create_dir, ['css', 'js', 'js/locale', 'assets/image', 'assets/audio', 'assets/video'])
+        for f in self.java_script_files + self.css_files:
+            f.make_available()
+        for f in self.image_files + self.audio_files:
+            f[1].make_available()
+
+        def copy_required(f):
+            src = os.path.join(self.src_directory, f)
+            if (os.path.exists(src)):
+                shutil.copy(src, self.directory)
+            else:
+                print 'Warning: missing ' + src
+
+        # if a screenshot.jpg exists in the source, copy it to the dest
+        screenshot_img = os.path.join(self.src_directory, 'screenshot.jpg')
+        if (os.path.exists(screenshot_img)):
+            shutil.copy(screenshot_img, os.path.join(self.directory, 'screenshot.jpg'))
+
+        self.compile_translations()
+
+    def name(self):
+        return self.deploy_name()
+
+    def deploy_name(self):
+        return os.path.basename(self.directory)
+
+    def generate(self):
+        print 'writing lesson to ' + self.deploy_name()
+        self.copy_files()
+        self.print_html_on(codecs.open('index.html', 'w', 'UTF-8'))
+        #self.print_start_html_on(codecs.open('start.html', 'w', 'UTF-8'))
+        #self.print_kdoc_html_on(codecs.open('kDoc.html', 'w', 'UTF-8'))
+        self.print_karma_js_on(open('lesson-karma.js', 'w'))
+
+    def compile_translations(self):
+        # compile translation JS files from MO files
+        for srcfile in os.listdir(self.src_directory):
+            if fnmatch.fnmatch(srcfile, '*.mo'):
+                lang = os.path.splitext(srcfile)[0]
+                srcpath = os.path.join(self.src_directory, srcfile)
+                targpath = os.path.join(self.directory, lang +'.js')
+                json_translations = mo2js.gettext_json(open(srcpath, 'r'), True)
+
+                f = codecs.open(targpath, encoding='utf-8', mode='w+')
+                f.write('$.i18n.storeLocaleStrings("%s",\n' % lang);
+                f.write(json_translations)
+                f.write(');\n');
+                f.write('$.i18n.setLocale("%s");\n' % lang);
+
+    def set_directory(self, dir):
+        self.directory = os.path.abspath(os.path.join(self.parent_directory, dir))
+
+    def print_html_on(self, stream):
+        doc = HtmlDocument()
+        html = doc.html()
+        head = html.head()
+        head.title().text(self.title)
+        head.meta(content='text/html, charset=utf-8', httpEquiv='Content-Type')
+        head.link(type='image/ico',
+                  rel='icon',
+                  href=self.karma.image('favicon').relative_path(None, web=True))
+        for file in self.css_files:
+            head.link(type='text/css',
+                      rel='stylesheet',
+                      href=file.relative_path(None, web=True))
+        all_java_script_files = include_dependencies(self.karma,
+                                                     self.java_script_files)
+        for file in sort_java_script_files(all_java_script_files):
+            head.script(type='text/javascript',
+                        src=file.relative_path(None, web=True))
+        head.script(type='text/javascript',
+                        src='../../../../subjects.js')
+        body = html.body()
+        header = body.div(id='header')
+        body.children.extend(self.divs)
+        footer = body.div(id='footer')
+        doc.print_on(stream)
+
+    def print_karma_js_on(self, stream):
+        print >>stream, '/*'
+        for l in warning_text_lines:
+            print >>stream, ' *', l
+        print >>stream, ' */'
+        def format_image(img):
+            return "{name:'%s', file:'%s'}" % (img[0], img[1].relative_path('deploy', web=True))
+        def format_audio(a):
+            return "{name:'%s', file:'%s'}" % (a[0], a[1].relative_path('deploy', web=True))
+        def format_assets(name, assets, format_asset, indentation):
+            prefix = '%s: [' % name
+            sep = ',\n' + (len(prefix) + indentation) * ' '
+            postfix = ']'
+            to_preload = filter(lambda asset: asset[1].preload(), assets)
+            return prefix + sep.join(map(format_asset, to_preload)) + postfix
+        print >>stream, 'function lesson_karma() {'
+        return_karma = '    return Karma({'
+        indentation = len(return_karma)
+        print >>stream, return_karma + (',\n' + indentation * ' ').join(
+            [format_assets('image',
+                           self.image_files,
+                           format_image,
+                           indentation),
+             format_assets('audio',
+                           self.audio_files,
+                           format_audio,
+                           indentation)]) + '});'
+        print >>stream, '}'
+
+def lesson(grade, subject, title, week, browser_title=None, lesson_title=None, locale=None, summary=''):
+    def camelcase(str):
+        words = str.replace("'", '').split()
+        return ''.join([words[0].lower()] + [x.capitalize() for x in words[1:]])
+
+    #dirname = '%s_%s_%s_%s_K' % (grade, subject, camelcase(title), week);
+    title = path(theLesson.parent_directory).namebase
+    dirname = title
+    theLesson.set_directory(path(theLesson.parent_directory).namebase)
+    theLesson.start_title = title
+    theLesson.title = title
+    theLesson.lesson_title = lesson_title or title
+    theLesson._grade = grade
+    theLesson._subject = subject
+    theLesson._week = week
+    theLesson.summary = summary
+    java_script('jquery')
+    java_script('karma')
+    java_script('common')
+    java_script('i18n')
+    java_script('base')
+    # include the lesson.js if it exists
+    lesson_js = frob_path('lesson.js')
+    if os.path.exists(lesson_js):
+        java_script('lesson.js')
+    #add_help()
+    # include the locale strings too
+
+    if locale != None:
+        theLesson.java_script_files.append(File('jquery.i18n.'+ locale +'.js', type='js', karma=True))
+
+        locale_mo = frob_path(locale + '.mo')
+        if os.path.exists(locale_mo):
+            targpath = os.path.join(theLesson.directory, locale +'.js')
+            theLesson.java_script_files.append(File(targpath, None, type='js', karma=True))
+
+
+def java_script(name, **kw):
+    result = theLesson.karma.java_script(name)
+    if not result:
+        result = File(name, name, **kw)
+    if name in [f.name() for f in theLesson.java_script_files]:
+        print 'Warning: the java_script file \'' + name + '\' is included twice.'
+    else:
+        theLesson.java_script_files.append(result)
+    return result
+
+
+def css(name):
+    result = theLesson.karma.css(name)
+    if not result:
+        result = File(name, name)
+    theLesson.css_files.append(result)
+    return result
+
+
+def image(file, name=None):
+    result = None
+    if name == None:
+        name = file
+        result = theLesson.karma.image(name)
+    if not result:
+        result = File(file, name)
+    theLesson.image_files.append([name, result])
+    return result
+
+
+def audio(file, name=None):
+    result = None
+    if name == None:
+        name = file
+        result = theLesson.karma.audio(name)
+    if not result:
+        result = File(file, name)
+    theLesson.audio_files.append([name, result])
+    return result
+
+
+def div(**info):
+    if 'id' in info and info['id'] == 'content':
+        print 'Warning: div(id=\'content\') no longer needed (it\'s added automatically).'
+        return None
+    result = createDiv(info['id'])
+    theLesson.divs.append(result)
+    return result
+
+
+def footer_configuration(**kw):
+    global theLesson
+    config = theLesson.footer_configuration
+    for k,v in kw.items():
+        if not k in config:
+            print 'Error: unsupported footer configuration option: ' + k + '.'
+            print 'Possible options:', ', '.join(config.keys())
+            sys.exit(1)
+        config[k] = v
+    if config['scoreboard']:
+        css('ui.scoreboard')
+        java_script('ui.scoreboard')
+
+
+def frob_path(path):
+    if not os.path.isabs(path):
+        return os.path.normpath(os.path.join(os.path.dirname(include_stack[-1]),
+                                             path))
+    else:
+        return os.path.abspath(path)
+
+
+def include(pth):
+    pth = frob_path(pth)
+    include_stack.append(pth)
+    check_file_exists(pth)
+    execfile(pth, globals())
+    include_stack.pop()
+
+
+def add_help():
+    # add html help content if it exists, otherwise the help image
+    help_html = frob_path('help.html')
+    help_img = frob_path('help.png')
+    if (os.path.exists(help_html)):
+        f = codecs.open(help_html, 'r', 'UTF-8')
+        div(id='help').div(id='helpText').innerhtml(f.read())
+    elif (os.path.exists(help_img)):
+        img = image(help_img, 'help')
+        src = img.relative_path(None, web=True)
+        div(id='help').img(src=src)
+    else:
+        print 'Warning: the file ' + str(help_path) + ' doesn\'t exist.'
+
+
+def check_file_exists(path):
+    if not os.path.isfile(path):
+        print 'Error: the file ' + path + ' doesn\'t exist.'
+        sys.exit(1)
+
+
+def find_all_description_files():
+    result = []
+    lesson_folder = os.path.join(script_root, 'lessons')
+    for root, dirs, files in os.walk(lesson_folder):
+        if 'description.py' in files:
+            result.append(os.path.abspath(os.path.join(script_root, root, 'description.py')))
+    return result
+
+
+def constantly(x):
+    return lambda y: x
+
+
+def process_description(karma, description, output_dir, lesson_filter=constantly(True)):
+    os.chdir(script_root)
+    description = os.path.abspath(description)
+
+    global theLesson
+    theLesson = Lesson(os.path.abspath(os.path.dirname(description)))
+    theLesson.karma =karma
+    theLesson.parent_directory = os.path.abspath(output_dir)
+    theLesson.java_script_files.append(File('lesson-karma.js', None, generated=True))
+    include_stack.append(description)
+    check_file_exists(description)
+    execfile(description, globals())
+    include_stack.pop()
+
+    if lesson_filter(theLesson):
+        theLesson.generate()
+        return theLesson
+    else:
+        return None
+
diff --git a/html.py b/html.py
new file mode 100755
index 0000000..5f90894
--- /dev/null
+++ b/html.py
@@ -0,0 +1,218 @@
+#! /usr/bin/env python2.6
+# -*- coding: utf-8 -*-
+
+attribute_names = dict(
+    httpEquiv='http-equiv',
+    className='class'
+)
+
+
+html_escapes = [(u'&', u'&amp;'),
+                (u'>', u'&gt;'),
+                (u'<', u'&lt;'),
+                (u'"', u'&quot;'),
+                (u"'", u'&apos;')]
+
+
+def escape(string):
+    for (x, y) in html_escapes:
+        if x in string:
+            string = string.replace(x, y)
+    return string
+
+class HtmlFragment():
+    def __init__(self):
+        self.children = []
+
+    def append_child(self, child):
+        self.children.append(child)
+
+    def _create_element(self, tag, attrs, separate_closing_tag=True):
+        result = HtmlElement(self.document(), tag, separate_closing_tag)
+        self._hookup_to_parent(result)
+        result.attr(**attrs)
+        return result
+
+    def html(self, **attrs):
+        return self._create_element(u'html', attrs)
+
+    def head(self, **attrs):
+        return self._create_element(u'head', attrs)
+
+    def body(self, **attrs):
+        return self._create_element(u'body', attrs)
+
+    def meta(self, **attrs):
+        return self._create_element(u'meta', attrs, False)
+
+    def link(self, **attrs):
+        return self._create_element(u'link', attrs, False)
+
+    def title(self, **attrs):
+        return self._create_element(u'title', attrs)
+
+    def script(self, **attrs):
+        return self._create_element(u'script', attrs)
+
+    def a(self, **attrs):
+        return self._create_element(u'a', attrs)
+
+    def p(self, **attrs):
+        return self._create_element(u'p', attrs)
+
+    def br(self, **attrs):
+        return self._create_element(u'br', attrs, False)
+
+    def ul(self, **attrs):
+        return self._create_element(u'ul', attrs)
+
+    def li(self, **attrs):
+        return self._create_element(u'li', attrs)
+
+    def iframe(self, **attrs):
+        return self._create_element(u'iframe', attrs)
+
+    def div(self, **attrs):
+        return self._create_element(u'div', attrs)
+
+    def span(self, **attrs):
+        return self._create_element(u'span', attrs)
+
+    def img(self, **attrs):
+        return self._create_element(u'img', attrs, False)
+
+    def text(self, txt):
+        result = HtmlText(txt)
+        self._hookup_to_parent(result)
+        return result
+
+    def innerhtml(self, html):
+        result = HtmlSource(html)
+        self._hookup_to_parent(result)
+        return result
+
+    def comment(self, txt):
+        result = HtmlComment(txt)
+        self._hookup_to_parent(result)
+        return result
+
+    # Utility functions
+    def meta_utf8(self):
+        self.meta(content='text/html, charset=utf-8', httpEquiv='Content-Type')
+
+    def favicon(self, href):
+        self.link(type='image/ico', rel='icon', href=href)
+
+    def css(self, href):
+        self.link(type='text/css', rel='stylesheet', href=href)
+
+    def java_script(self, src):
+        self.script(type='text/javascript', src=src)
+
+    def print_on(self, stream):
+        for child in self.children:
+            child.print_on(stream)
+
+
+class HtmlDocument(HtmlFragment):
+    def __init__(self):
+        HtmlFragment.__init__(self)
+        self.stack = [self]
+
+    def document(self):
+        return self
+
+    def _hookup_to_parent(self, child):
+        self.document().stack[-1].append_child(child)
+
+    def print_on(self, stream):
+        print >>stream, '<!DOCTYPE html>'
+        HtmlFragment.print_on(self, stream)
+
+    def to_file(self, file_name):
+        import codecs
+        f = codecs.open(file_name, 'w', 'UTF-8')
+        self.print_on(f)
+        f.close()
+
+
+class HtmlElement(HtmlFragment):
+    def __init__(self, document, tag, separate_closing_tag):
+        HtmlFragment.__init__(self)
+        self._document = document
+        self.tag = tag
+        self._separate_closing_tag = separate_closing_tag
+
+    def _hookup_to_parent(self, child):
+        self.append_child(child)
+
+    def document(self):
+        return self._document
+
+    # __enter__ and __exit__ enable use of 'with' statement
+    def __enter__(self):
+        self.document().stack.append(self)
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.document().stack.pop()
+
+    def attr(self, **kw):
+        self.attributes = kw
+        return self
+
+    def print_on(self, stream):
+        def attribute_key_to_string(k):
+            return attribute_names[k] if k in attribute_names else k
+
+        def attribute_to_string(kv):
+            return u'%s="%s"' % (attribute_key_to_string(kv[0]),
+                                 unicode(kv[1]))
+
+        def attributes_to_string(attrs):
+            if len(attrs):
+                return u' ' + u' '.join([attribute_to_string(kv)
+                                         for kv in attrs.items()])
+            else:
+                return u''
+
+        attributes_string = attributes_to_string(self.attributes)
+
+        if self._separate_closing_tag:
+            tag_open = u'<%s%s>' % (self.tag, attributes_string)
+            tag_close = u'</%s>' % self.tag
+            if len(self.children):
+                print >>stream, tag_open
+                HtmlFragment.print_on(self, stream)
+                print >>stream, tag_close
+            else:
+                print >>stream, tag_open + tag_close
+        else:
+            print >>stream, u'<%s%s/>' % (self.tag, attributes_string)
+
+
+class HtmlText():
+    def __init__(self, txt):
+        self.text = txt
+
+    def print_on(self, stream):
+        print >>stream, escape(self.text)
+
+
+class HtmlComment():
+    def __init__(self, txt):
+        if (txt.find('-->') != -1):
+            print >>sys.stderr, 'Unable to deal with \'-->\' in comment string.'
+            sys.exit(1)
+        self.text = txt
+
+    def print_on(self, stream):
+        print >>stream, '<!--', self.text, '-->'
+
+
+class HtmlSource():
+    def __init__(self, txt):
+        self.text = txt
+
+    def print_on(self, stream):
+        print >>stream, self.text
diff --git a/lesson_html.py b/lesson_html.py
new file mode 100755
index 0000000..847a00c
--- /dev/null
+++ b/lesson_html.py
@@ -0,0 +1,145 @@
+#! /usr/bin/env python2.6
+# -*- coding: utf-8 -*-
+
+import html
+import string
+
+def start_html(lesson):
+    displayGrade = u'०१२३४५६७८९'[lesson.grade()];
+    displaySubject = {
+        'English': 'English',
+        'Maths':u'गणित',
+        'Nepali':u'नेपाली' }[lesson.subject()];
+    karma = lesson.karma
+
+    doc = html.HtmlDocument()
+    with doc.html():
+        with doc.head():
+            with doc.title():
+                doc.text(lesson.lesson_title)
+            doc.meta_utf8()
+            doc.favicon(karma.image('favicon').relative_path(None, web=True))
+            doc.css(karma.css('kstart').relative_path(None, web=True))
+            for f in ['jquery', 'kstart']:
+                doc.java_script(karma.java_script(f).relative_path(None, web=True))
+        with doc.body(id='kStart'):
+            with doc.div(id='top'):
+                doc.div(id='backBtn', title='Back')
+                with doc.div(id='topMiddle'):
+                    doc.div(id='topDesc', className='center').text(u'साझा शिक्षा ई-पाटीद्वारा निर्मित')
+                    doc.div(id='topE-Paath', className='center').text(u'ई-पाठ')
+            with doc.div(id='middle'):
+                with doc.div(id='grade', className='center'):
+                    doc.span(id='gradeText').text(u'कक्षा:')
+                    doc.span(id='gradeNum').text(displayGrade)
+                doc.div(id='subject', className='center').text(displaySubject)
+                with doc.div(id='lessonTitle', className='center'):
+                    doc.a(href='./index.html').text(lesson.lesson_title)
+                doc.div(id='lessonDesc', className='center').text(lesson.summary)
+                with doc.div(id='teachersNoteBtn', className='button'):
+                    with doc.a(href='./kDoc.html?back=start.html&doc=teachersNote'):
+                        doc.div().text(u'Teacher\'s Note')
+                        doc.div().text(u'पाठविवरण')
+            with doc.div(id='bottom'):
+                doc.div(id='logo', title=u'साझा शिक्षा ई-पाटी द्वारा निर्मित')
+            doc.div(id='logoHelp')
+    return doc
+
+def index_html_header(doc, karma, dir, body, titles):
+    def create_title(text):
+        def img(name):
+            return doc.img(src=karma.image(name).relative_path(dir, web=True),
+                           width=33, height=75, align='absmiddle')
+
+        with doc.div(className='lesson_title'):
+            img('title_block_lt')
+            doc.text(text)
+            img('title_block_rt')
+
+    with doc.div(id='header'):
+        with doc.div(id='topbtn_left'):
+            doc.div(id='linkBackLesson', title='Back', className='linkBack')
+        for title in titles:
+            create_title(title)
+        with doc.div(className='topbtn_right'):
+            doc.div(title='Help', id='linkHelp')
+        with doc.div(className='topbtn_right'):
+            doc.div(id='linkOle', title=u'साझा शिक्षा ई-पाटी द्वारा निर्मित')
+
+
+def index_html_footer(doc, subject, body, config):
+    with doc.div(id='footer'):
+        if config['link_next']:
+            doc.div(title='Next', id='linkNextLesson', className='linkNext')
+        if config['link_previous']:
+            doc.div(title='Previous', id='linkPrevLesson', className='linkBack')
+        if config['scoreboard']:
+            doc.div(id='score_box', display='none')
+
+        with doc.div(className='botbtn_right'):
+            if subject == 'English':
+                doc.div(title='Play Again', id='linkPlayAgain', className='english').text('Play Again')
+            else:
+                doc.div(title='Play Again', id='linkPlayAgain', className='nepali').text(u'फेरी खेलौँ')
+
+        if config['link_check_answer']:
+            with doc.div(className='botbtn_right'):
+                doc.div(title='Check Answer', id='linkCheck')
+
+        if config['audio_buttons']:
+            with doc.div(className='botbtn_right'):
+                doc.div(id='linkPlayAudio', className='english audioButton').text('Play')
+            with doc.div(className='botbtn_right'):
+                doc.div(id='linkPlayRecordedAudio', className='english audioButton').text('Play')
+            with doc.div(className='botbtn_right'):
+                doc.div(id='linkRecordAudio', className='english audioButton').text('Record')
+
+
+def index_html(lesson, warning_text_lines):
+    karma = lesson.karma
+    doc = html.HtmlDocument()
+    for line in warning_text_lines:
+        doc.comment(string.replace(line, '--', '__'))
+    with doc.html():
+        with doc.head():
+            doc.title().text(lesson.title)
+            doc.meta_utf8()
+            for file in lesson.css_files:
+                doc.css(file.relative_path(None, web=True))
+            doc.favicon(karma.image('favicon').relative_path(None, web=True))
+            for file in lesson.all_java_script_files():
+                doc.java_script(file.relative_path(None, web=True))
+        with doc.body() as body:
+            titles = [lesson.lesson_title]
+            if lesson.subject() == 'Maths' and lesson.lesson_title != lesson.start_title:
+                titles.append(lesson.start_title)
+            index_html_header(doc, karma, lesson.directory, body, titles)
+            body.children.extend(lesson.divs)
+            index_html_footer(doc, lesson.subject(), body,
+                              lesson.footer_configuration)
+    return doc
+
+def kdoc_html(lesson):
+    karma = lesson.karma
+    subject = unicode(lesson.subject())
+    title = unicode(lesson.title)
+    doc = html.HtmlDocument()
+    with doc.html():
+        with doc.head():
+            doc.title().text('Lesson Plan for {0} {1}'.format(subject,title))
+            doc.meta_utf8()
+            doc.favicon(karma.image('favicon').relative_path(None, web=True))
+            for css in ['karma', 'ui.kHeader', 'kDoc']:
+                doc.css(karma.css(css).relative_path(None, web=True))
+            for js in ['jquery', 'jquery-ui', 'karma', 'ui.kHeader', 'kDoc']:
+                doc.java_script(karma.java_script(js).relative_path(None, web=True))
+        with doc.body(id='kDoc'):
+            doc.div(id='kHeader')
+            with doc.div(id='kHelp', title='Help'):
+                doc.text(u'पाठविवरणर पाठयोजना सहज तरिकाले पढ्न तपाईले निम्न कार्य गर्न सक्नु हुन्छ ।')
+                doc.br()
+                doc.text(u'पाठ पृष्टको दाईने तर्फको ठाडो  रेखा तल माथि गर्दा')
+                doc.br()
+                doc.text(u'तपाईले हाल पढिरहेको पृष्टलाई आवश्यकता अनुसार तल माथि गर्न सक्नुहुन्छ')
+            doc.iframe(id='iframeLessonPlan', src='')
+    return doc
diff --git a/makeActivities.py b/makeActivities.py
new file mode 100755
index 0000000..88b4a41
--- /dev/null
+++ b/makeActivities.py
@@ -0,0 +1,44 @@
+#!/usr/bin/python
+#
+#utility to remove 'activity' from .html files and replace with horizontal rule tag
+from path import path
+import os, sys, subprocess
+from optparse import OptionParser
+
+SOURCE = path('../')
+
+parser = OptionParser(usage="Usage: %prog [options] file")
+(options, args) = parser.parse_args()
+if not args:
+    print 'Specify a course and milestone, e.g. zs5 zs5m08'
+    parser.print_help()
+    sys.exit(1)
+
+SUBJECT = args[0]
+COURSE = args[1]
+MILESTONE = args[2]
+tag = '<hr />'
+basepath = SOURCE / SUBJECT / COURSE / MILESTONE
+files = basepath.files('*.html')
+file = files[0]
+fin = open(basepath / file.name,'r')
+txt = fin.read()
+fin.close()
+pos = txt.find('Activity')
+count = 0
+while pos > -1:
+    pos1 = txt.find('<CENTER>')
+    pos2 = txt.find('</CENTER>')
+    if pos1 < 0 or pos2 < 0:
+        pos = -1
+        continue
+    if count < 1:
+        txt = txt[:pos1]+txt[pos2+9:]
+    else:
+        txt = txt[:pos1] + '<hr />' + txt[pos2+9:]
+    count += 1
+    pos = txt.find('Activity')
+    print count, pos, len(txt)
+fout = open(basepath / MILESTONE+'.html','w')
+fout.write(txt)
+fout.close
diff --git a/menus/activity.html b/menus/activity.html
new file mode 100755
index 0000000..9f7f515
--- /dev/null
+++ b/menus/activity.html
@@ -0,0 +1,24 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <title>Activity Ladder </title>
+    <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
+    <link type="image/ico" rel="icon" href="../../../karma/image/favicon.ico" />
+    <link type="text/css" rel="stylesheet" href="../../../karma/css/global.css" />
+    <link type="text/css" rel="stylesheet" href="../../../karma/css/menu.css" />
+    <script type="text/javascript" src="../../../karma/js/external/jquery-1.4.2.js"></script>
+    <script type="text/javascript" src="../../../karma/js/external/jquery-ui-1.8.2.js"></script>
+    <script type="text/javascript" src="../../../karma/js/karma.js"></script>
+    <script type="text/javascript" src="../../../karma/js/global.js"></script>
+    <script type="text/javascript" src="../../../subjects.js"></script>
+    <script type="text/javascript" src="activities.js"></script>
+    <script type="text/javascript" src="../../../karma/js/lesson.js"></script>
+  </head>
+  <body>
+    <div id="header"></div>
+    <div id="content">
+      <div id="frame" style="position:absolute;top:0px;left:0px"></div>
+      <canvas id="ladder" style="position:absolute;top:0px;left:0px"></canvas>
+    </div>
+  </body>
+</html>
diff --git a/menus/course.html b/menus/course.html
new file mode 100755
index 0000000..30256dc
--- /dev/null
+++ b/menus/course.html
@@ -0,0 +1,20 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <title>Course </title>
+    <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
+    <link type="text/css" rel="stylesheet" href="../karma/css/global.css" />
+    <link type="text/css" rel="stylesheet" href="../karma/css/menu.css" />
+    <link type="image/ico" rel="icon" href="../karma/image/favicon.ico" />
+    <script type="text/javascript" src="../karma/js/external/jquery-1.4.2.js"></script>
+    <script type="text/javascript" src="../karma/js/external/jquery-ui-1.8.2.js"></script>
+    <script type="text/javascript" src="../karma/js/karma.js"></script>
+    <script type="text/javascript" src="../karma/js/global.js"></script>
+    <script type="text/javascript" src="courses.js"></script>
+    <script type="text/javascript" src="../karma/js/course.js"></script>
+  </head>
+  <body>
+    <div id="header"></div>
+    <div id='content'></div>
+  </body>
+</html>
diff --git a/menus/index.html b/menus/index.html
new file mode 100755
index 0000000..ff7290c
--- /dev/null
+++ b/menus/index.html
@@ -0,0 +1,29 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta content="text/html, charset=utf-8" http-equiv="Content-Type"/>
+<link href="../../../../karma/image/favicon.ico" type="image/ico" rel="icon"/>
+<link href="../../../../karma/css/global.css" type="text/css" rel="stylesheet"/>
+<link href="../../../../karma/css/jquizme.css" type="text/css" rel="stylesheet"/>
+<link href="lesson.css" type="text/css" rel="stylesheet"/>
+<script src="../../../../karma/js/external/jquery-1.4.2.js" type="text/javascript"></script>
+<script src="../../../../karma/js/external/jquery-ui-1.8.2.js" type="text/javascript"></script>
+<script src="../../../../karma/js/karma.js" type="text/javascript"></script>
+<script src="../../../../karma/js/common.js" type="text/javascript"></script>
+<script src="../../../../karma/js/jquery.clickable.js" type="text/javascript"></script>
+<script src="../../../../karma/js/jquery.i18n.js" type="text/javascript"></script>
+<script src="../../../../karma/js/jquizme.js" type="text/javascript"></script>
+<script src="../../../../karma/js/math.js" type="text/javascript"></script>
+<script src="../../../../karma/js/global.js" type="text/javascript"></script>
+<script src="../../../../subjects.js" type="text/javascript"></script>
+<script src="../../../../karma/js/base.js" type="text/javascript"></script>
+<script src="quiz.js" type="text/javascript"></script>
+<script src="lesson-karma.js" type="text/javascript"></script>
+<script src="lesson.js" type="text/javascript"></script>
+</head>
+<body>
+<div id="header"</div>
+<div id="content"></div>
+<div id="footer"></div>
+</body>
+</html>
diff --git a/menus/milestone.html b/menus/milestone.html
new file mode 100755
index 0000000..690e2b2
--- /dev/null
+++ b/menus/milestone.html
@@ -0,0 +1,24 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <title>Milestone Ladder </title>
+    <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
+    <link type="image/ico" rel="icon" href="../../karma/image/favicon.ico" />
+    <link type="text/css" rel="stylesheet" href="../../karma/css/global.css" />
+    <link type="text/css" rel="stylesheet" href="../../karma/css/menu.css" />
+    <script type="text/javascript" src="../../karma/js/external/jquery-1.4.2.js"></script>
+    <script type="text/javascript" src="../../karma/js/external/jquery-ui-1.8.2.js"></script>
+    <script type="text/javascript" src="../../karma/js/karma.js"></script>
+    <script type="text/javascript" src="../../karma/js/global.js"></script>
+    <script type="text/javascript" src="../../subjects.js"></script>
+    <script type="text/javascript" src="milestones.js"></script>
+    <script type="text/javascript" src="../../karma/js/unit.js"></script>
+  </head>
+  <body>
+    <div id="header"></div>
+    <div id="content">
+      <div id="frame" style="position:absolute;top:0px;left:0px"></div>
+      <canvas id="ladder" style="position:absolute;top:0px;left:0px"></canvas>
+    </div>
+  </body>
+</html>
diff --git a/menus/subject.html b/menus/subject.html
new file mode 100755
index 0000000..4b28c79
--- /dev/null
+++ b/menus/subject.html
@@ -0,0 +1,39 @@
+<!DOCTYPE html>
+<html>
+<head>
+<title>Learn</title>
+<meta charset="UTF-8"/>
+<link type="image/ico" rel="icon" href="./karma/image/favicon.ico"/>
+<link rel="stylesheet" href="./karma/css/menu.css" type="text/css"/>
+<script type="text/javascript" src="./karma/js/external/jquery-1.4.2.js"></script>
+<script type="text/javascript" src="./karma/js/karma.js"></script>
+<script type="text/javascript" src="subjects.js"></script>
+<script type="text/javascript" src="./karma/js/main.js"></script>
+</head>
+<body>
+<div id="content">
+      <div class = 'c11' id="English">English
+                  <img id="English_icon" alt="English" width=175 height=175 />
+      </div>
+      <div class = 'c31' id="Mathematics">Mathematics
+                  <img id="Mathematics_icon" alt="Mathematics" width=175 height=175" />
+      </div>
+      <div class = 'c51' id ='Science'>Science
+                  <img id='Science_icon' alt='Science' width=175 height=175' />
+      </div>
+      <div class = 'c23' id ="Library">Library
+                <img id="Library_icon" alt="Library" width=175 height=175>
+      </div>
+      <div class = 'c43' id="Explore">Laboratory
+                <img id="Explore_icon" alt="Laboratory" width=175 height=175>
+      </div>
+      <div class="footer">
+              Karma.js is licensed under the 
+              <a href="http://www.opensource.org/licenses/mit-license.php">MIT License</a>.
+              Karma is a sub-project of          
+              <a href="http://sugarlabs.org">SugarLabs</a>. <br />
+              Copyright (c) 2009 Bryan Willson Berry
+      </div>
+</div>
+</body>
+</html>
diff --git a/mo2js.py b/mo2js.py
new file mode 100755
index 0000000..ed8f0dc
--- /dev/null
+++ b/mo2js.py
@@ -0,0 +1,66 @@
+#! /usr/bin/env python
+
+import codecs
+import gettext
+import json
+
+# TBD: generate $.i18n.choose_pluralized_msg. Similar to how python
+# gettext does this.
+
+def context_and_key(key):
+    """Return a tuple containing the context (or None) and the message
+    key."""
+    # A context, if present, is prepended to the key, with a \x04
+    # character in between.
+    (context, separator, k) = key.partition(u'\x04')
+    if (separator != ''):
+        return (context, k)
+    else:
+        return (None, key)
+
+def group_pluralized_forms(dict):
+    """Return a dictionary where the pluralized forms from dict are
+    grouped. Elements of the form
+    (msg, i) -> tr1
+    ...
+    (msg, j) -> trn
+    are grouped into:
+    msg -> [tr1, ..., trn]
+    """
+    result = {}
+    keys = dict.keys()
+    keys.sort()
+    for k in keys:
+        translation = dict[k]
+        if type(k) is tuple:
+            # A pluralized form k = (msg, n)
+            k = k[0]
+            if k not in result:
+                result[k] = []
+            result[k].append(translation)
+        else:
+            result[k] = translation
+    return result
+
+def path(key):
+    """Return the path in the dictionary for key"""
+    (context, key) = context_and_key(key)
+    if context is not None:
+        return ['contextualized_strings', context, key]
+    else:
+        return ['strings', key]
+
+def store_translation(dictionary, key, translation):
+    p = path(key)
+    while len(p) > 1:
+        x = p.pop(0)
+        dictionary = dictionary.setdefault(x, {})
+    dictionary[p[0]] = translation
+
+def gettext_json(fp, indent = False):
+    result = {}
+    tr = gettext.GNUTranslations(fp)
+    dictionary = group_pluralized_forms(tr._catalog)
+    for k, v in group_pluralized_forms(tr._catalog).items():
+        store_translation(result, k, v)
+    return json.dumps(result, ensure_ascii = False, indent = indent)
diff --git a/path.py b/path.py
new file mode 100755
index 0000000..01c2c04
--- /dev/null
+++ b/path.py
@@ -0,0 +1,971 @@
+""" path.py - An object representing a path to a file or directory.
+
+Example:
+
+from path import path
+d = path('/home/guido/bin')
+for f in d.files('*.py'):
+    f.chmod(0755)
+
+This module requires Python 2.2 or later.
+
+
+URL:     http://www.jorendorff.com/articles/python/path
+Author:  Jason Orendorff <jason.orendorff\x40gmail\x2ecom> (and others - see the url!)
+Date:    9 Mar 2007
+"""
+
+
+# TODO
+#   - Tree-walking functions don't avoid symlink loops.  Matt Harrison
+#     sent me a patch for this.
+#   - Bug in write_text().  It doesn't support Universal newline mode.
+#   - Better error message in listdir() when self isn't a
+#     directory. (On Windows, the error message really sucks.)
+#   - Make sure everything has a good docstring.
+#   - Add methods for regex find and replace.
+#   - guess_content_type() method?
+#   - Perhaps support arguments to touch().
+
+from __future__ import generators
+
+import sys, warnings, os, fnmatch, glob, shutil, codecs, md5
+
+__version__ = '2.2'
+__all__ = ['path']
+
+# Platform-specific support for path.owner
+if os.name == 'nt':
+    try:
+        import win32security
+    except ImportError:
+        win32security = None
+else:
+    try:
+        import pwd
+    except ImportError:
+        pwd = None
+
+# Pre-2.3 support.  Are unicode filenames supported?
+_base = str
+_getcwd = os.getcwd
+try:
+    if os.path.supports_unicode_filenames:
+        _base = unicode
+        _getcwd = os.getcwdu
+except AttributeError:
+    pass
+
+# Pre-2.3 workaround for booleans
+try:
+    True, False
+except NameError:
+    True, False = 1, 0
+
+# Pre-2.3 workaround for basestring.
+try:
+    basestring
+except NameError:
+    basestring = (str, unicode)
+
+# Universal newline support
+_textmode = 'r'
+if hasattr(file, 'newlines'):
+    _textmode = 'U'
+
+
+class TreeWalkWarning(Warning):
+    pass
+
+class path(_base):
+    """ Represents a filesystem path.
+
+    For documentation on individual methods, consult their
+    counterparts in os.path.
+    """
+
+    # --- Special Python methods.
+
+    def __repr__(self):
+        return 'path(%s)' % _base.__repr__(self)
+
+    # Adding a path and a string yields a path.
+    def __add__(self, more):
+        try:
+            resultStr = _base.__add__(self, more)
+        except TypeError:  #Python bug
+            resultStr = NotImplemented
+        if resultStr is NotImplemented:
+            return resultStr
+        return self.__class__(resultStr)
+
+    def __radd__(self, other):
+        if isinstance(other, basestring):
+            return self.__class__(other.__add__(self))
+        else:
+            return NotImplemented
+
+    # The / operator joins paths.
+    def __div__(self, rel):
+        """ fp.__div__(rel) == fp / rel == fp.joinpath(rel)
+
+        Join two path components, adding a separator character if
+        needed.
+        """
+        return self.__class__(os.path.join(self, rel))
+
+    # Make the / operator work even when true division is enabled.
+    __truediv__ = __div__
+
+    def getcwd(cls):
+        """ Return the current working directory as a path object. """
+        return cls(_getcwd())
+    getcwd = classmethod(getcwd)
+
+
+    # --- Operations on path strings.
+
+    isabs = os.path.isabs
+    def abspath(self):       return self.__class__(os.path.abspath(self))
+    def normcase(self):      return self.__class__(os.path.normcase(self))
+    def normpath(self):      return self.__class__(os.path.normpath(self))
+    def realpath(self):      return self.__class__(os.path.realpath(self))
+    def expanduser(self):    return self.__class__(os.path.expanduser(self))
+    def expandvars(self):    return self.__class__(os.path.expandvars(self))
+    def dirname(self):       return self.__class__(os.path.dirname(self))
+    basename = os.path.basename
+
+    def expand(self):
+        """ Clean up a filename by calling expandvars(),
+        expanduser(), and normpath() on it.
+
+        This is commonly everything needed to clean up a filename
+        read from a configuration file, for example.
+        """
+        return self.expandvars().expanduser().normpath()
+
+    def _get_namebase(self):
+        base, ext = os.path.splitext(self.name)
+        return base
+
+    def _get_ext(self):
+        f, ext = os.path.splitext(_base(self))
+        return ext
+
+    def _get_drive(self):
+        drive, r = os.path.splitdrive(self)
+        return self.__class__(drive)
+
+    parent = property(
+        dirname, None, None,
+        """ This path's parent directory, as a new path object.
+
+        For example, path('/usr/local/lib/libpython.so').parent == path('/usr/local/lib')
+        """)
+
+    name = property(
+        basename, None, None,
+        """ The name of this file or directory without the full path.
+
+        For example, path('/usr/local/lib/libpython.so').name == 'libpython.so'
+        """)
+
+    namebase = property(
+        _get_namebase, None, None,
+        """ The same as path.name, but with one file extension stripped off.
+
+        For example, path('/home/guido/python.tar.gz').name     == 'python.tar.gz',
+        but          path('/home/guido/python.tar.gz').namebase == 'python.tar'
+        """)
+
+    ext = property(
+        _get_ext, None, None,
+        """ The file extension, for example '.py'. """)
+
+    drive = property(
+        _get_drive, None, None,
+        """ The drive specifier, for example 'C:'.
+        This is always empty on systems that don't use drive specifiers.
+        """)
+
+    def splitpath(self):
+        """ p.splitpath() -> Return (p.parent, p.name). """
+        parent, child = os.path.split(self)
+        return self.__class__(parent), child
+
+    def splitdrive(self):
+        """ p.splitdrive() -> Return (p.drive, <the rest of p>).
+
+        Split the drive specifier from this path.  If there is
+        no drive specifier, p.drive is empty, so the return value
+        is simply (path(''), p).  This is always the case on Unix.
+        """
+        drive, rel = os.path.splitdrive(self)
+        return self.__class__(drive), rel
+
+    def splitext(self):
+        """ p.splitext() -> Return (p.stripext(), p.ext).
+
+        Split the filename extension from this path and return
+        the two parts.  Either part may be empty.
+
+        The extension is everything from '.' to the end of the
+        last path segment.  This has the property that if
+        (a, b) == p.splitext(), then a + b == p.
+        """
+        filename, ext = os.path.splitext(self)
+        return self.__class__(filename), ext
+
+    def stripext(self):
+        """ p.stripext() -> Remove one file extension from the path.
+
+        For example, path('/home/guido/python.tar.gz').stripext()
+        returns path('/home/guido/python.tar').
+        """
+        return self.splitext()[0]
+
+    if hasattr(os.path, 'splitunc'):
+        def splitunc(self):
+            unc, rest = os.path.splitunc(self)
+            return self.__class__(unc), rest
+
+        def _get_uncshare(self):
+            unc, r = os.path.splitunc(self)
+            return self.__class__(unc)
+
+        uncshare = property(
+            _get_uncshare, None, None,
+            """ The UNC mount point for this path.
+            This is empty for paths on local drives. """)
+
+    def joinpath(self, *args):
+        """ Join two or more path components, adding a separator
+        character (os.sep) if needed.  Returns a new path
+        object.
+        """
+        return self.__class__(os.path.join(self, *args))
+
+    def splitall(self):
+        r""" Return a list of the path components in this path.
+
+        The first item in the list will be a path.  Its value will be
+        either os.curdir, os.pardir, empty, or the root directory of
+        this path (for example, '/' or 'C:\\').  The other items in
+        the list will be strings.
+
+        path.path.joinpath(*result) will yield the original path.
+        """
+        parts = []
+        loc = self
+        while loc != os.curdir and loc != os.pardir:
+            prev = loc
+            loc, child = prev.splitpath()
+            if loc == prev:
+                break
+            parts.append(child)
+        parts.append(loc)
+        parts.reverse()
+        return parts
+
+    def relpath(self):
+        """ Return this path as a relative path,
+        based from the current working directory.
+        """
+        cwd = self.__class__(os.getcwd())
+        return cwd.relpathto(self)
+
+    def relpathto(self, dest):
+        """ Return a relative path from self to dest.
+
+        If there is no relative path from self to dest, for example if
+        they reside on different drives in Windows, then this returns
+        dest.abspath().
+        """
+        origin = self.abspath()
+        dest = self.__class__(dest).abspath()
+
+        orig_list = origin.normcase().splitall()
+        # Don't normcase dest!  We want to preserve the case.
+        dest_list = dest.splitall()
+
+        if orig_list[0] != os.path.normcase(dest_list[0]):
+            # Can't get here from there.
+            return dest
+
+        # Find the location where the two paths start to differ.
+        i = 0
+        for start_seg, dest_seg in zip(orig_list, dest_list):
+            if start_seg != os.path.normcase(dest_seg):
+                break
+            i += 1
+
+        # Now i is the point where the two paths diverge.
+        # Need a certain number of "os.pardir"s to work up
+        # from the origin to the point of divergence.
+        segments = [os.pardir] * (len(orig_list) - i)
+        # Need to add the diverging part of dest_list.
+        segments += dest_list[i:]
+        if len(segments) == 0:
+            # If they happen to be identical, use os.curdir.
+            relpath = os.curdir
+        else:
+            relpath = os.path.join(*segments)
+        return self.__class__(relpath)
+
+    # --- Listing, searching, walking, and matching
+
+    def listdir(self, pattern=None):
+        """ D.listdir() -> List of items in this directory.
+
+        Use D.files() or D.dirs() instead if you want a listing
+        of just files or just subdirectories.
+
+        The elements of the list are path objects.
+
+        With the optional 'pattern' argument, this only lists
+        items whose names match the given pattern.
+        """
+        names = os.listdir(self)
+        if pattern is not None:
+            names = fnmatch.filter(names, pattern)
+        return [self / child for child in names]
+
+    def dirs(self, pattern=None):
+        """ D.dirs() -> List of this directory's subdirectories.
+
+        The elements of the list are path objects.
+        This does not walk recursively into subdirectories
+        (but see path.walkdirs).
+
+        With the optional 'pattern' argument, this only lists
+        directories whose names match the given pattern.  For
+        example, d.dirs('build-*').
+        """
+        return [p for p in self.listdir(pattern) if p.isdir()]
+
+    def files(self, pattern=None):
+        """ D.files() -> List of the files in this directory.
+
+        The elements of the list are path objects.
+        This does not walk into subdirectories (see path.walkfiles).
+
+        With the optional 'pattern' argument, this only lists files
+        whose names match the given pattern.  For example,
+        d.files('*.pyc').
+        """
+        
+        return [p for p in self.listdir(pattern) if p.isfile()]
+
+    def walk(self, pattern=None, errors='strict'):
+        """ D.walk() -> iterator over files and subdirs, recursively.
+
+        The iterator yields path objects naming each child item of
+        this directory and its descendants.  This requires that
+        D.isdir().
+
+        This performs a depth-first traversal of the directory tree.
+        Each directory is returned just before all its children.
+
+        The errors= keyword argument controls behavior when an
+        error occurs.  The default is 'strict', which causes an
+        exception.  The other allowed values are 'warn', which
+        reports the error via warnings.warn(), and 'ignore'.
+        """
+        if errors not in ('strict', 'warn', 'ignore'):
+            raise ValueError("invalid errors parameter")
+
+        try:
+            childList = self.listdir()
+        except Exception:
+            if errors == 'ignore':
+                return
+            elif errors == 'warn':
+                warnings.warn(
+                    "Unable to list directory '%s': %s"
+                    % (self, sys.exc_info()[1]),
+                    TreeWalkWarning)
+                return
+            else:
+                raise
+
+        for child in childList:
+            if pattern is None or child.fnmatch(pattern):
+                yield child
+            try:
+                isdir = child.isdir()
+            except Exception:
+                if errors == 'ignore':
+                    isdir = False
+                elif errors == 'warn':
+                    warnings.warn(
+                        "Unable to access '%s': %s"
+                        % (child, sys.exc_info()[1]),
+                        TreeWalkWarning)
+                    isdir = False
+                else:
+                    raise
+
+            if isdir:
+                for item in child.walk(pattern, errors):
+                    yield item
+
+    def walkdirs(self, pattern=None, errors='strict'):
+        """ D.walkdirs() -> iterator over subdirs, recursively.
+
+        With the optional 'pattern' argument, this yields only
+        directories whose names match the given pattern.  For
+        example, mydir.walkdirs('*test') yields only directories
+        with names ending in 'test'.
+
+        The errors= keyword argument controls behavior when an
+        error occurs.  The default is 'strict', which causes an
+        exception.  The other allowed values are 'warn', which
+        reports the error via warnings.warn(), and 'ignore'.
+        """
+        if errors not in ('strict', 'warn', 'ignore'):
+            raise ValueError("invalid errors parameter")
+
+        try:
+            dirs = self.dirs()
+        except Exception:
+            if errors == 'ignore':
+                return
+            elif errors == 'warn':
+                warnings.warn(
+                    "Unable to list directory '%s': %s"
+                    % (self, sys.exc_info()[1]),
+                    TreeWalkWarning)
+                return
+            else:
+                raise
+
+        for child in dirs:
+            if pattern is None or child.fnmatch(pattern):
+                yield child
+            for subsubdir in child.walkdirs(pattern, errors):
+                yield subsubdir
+
+    def walkfiles(self, pattern=None, errors='strict'):
+        """ D.walkfiles() -> iterator over files in D, recursively.
+
+        The optional argument, pattern, limits the results to files
+        with names that match the pattern.  For example,
+        mydir.walkfiles('*.tmp') yields only files with the .tmp
+        extension.
+        """
+        if errors not in ('strict', 'warn', 'ignore'):
+            raise ValueError("invalid errors parameter")
+
+        try:
+            childList = self.listdir()
+        except Exception:
+            if errors == 'ignore':
+                return
+            elif errors == 'warn':
+                warnings.warn(
+                    "Unable to list directory '%s': %s"
+                    % (self, sys.exc_info()[1]),
+                    TreeWalkWarning)
+                return
+            else:
+                raise
+
+        for child in childList:
+            try:
+                isfile = child.isfile()
+                isdir = not isfile and child.isdir()
+            except:
+                if errors == 'ignore':
+                    continue
+                elif errors == 'warn':
+                    warnings.warn(
+                        "Unable to access '%s': %s"
+                        % (self, sys.exc_info()[1]),
+                        TreeWalkWarning)
+                    continue
+                else:
+                    raise
+
+            if isfile:
+                if pattern is None or child.fnmatch(pattern):
+                    yield child
+            elif isdir:
+                for f in child.walkfiles(pattern, errors):
+                    yield f
+
+    def fnmatch(self, pattern):
+        """ Return True if self.name matches the given pattern.
+
+        pattern - A filename pattern with wildcards,
+            for example '*.py'.
+        """
+        return fnmatch.fnmatch(self.name, pattern)
+
+    def glob(self, pattern):
+        """ Return a list of path objects that match the pattern.
+
+        pattern - a path relative to this directory, with wildcards.
+
+        For example, path('/users').glob('*/bin/*') returns a list
+        of all the files users have in their bin directories.
+        """
+        cls = self.__class__
+        return [cls(s) for s in glob.glob(_base(self / pattern))]
+
+
+    # --- Reading or writing an entire file at once.
+
+    def open(self, mode='r'):
+        """ Open this file.  Return a file object. """
+        return file(self, mode)
+
+    def bytes(self):
+        """ Open this file, read all bytes, return them as a string. """
+        f = self.open('rb')
+        try:
+            return f.read()
+        finally:
+            f.close()
+
+    def write_bytes(self, bytes, append=False):
+        """ Open this file and write the given bytes to it.
+
+        Default behavior is to overwrite any existing file.
+        Call p.write_bytes(bytes, append=True) to append instead.
+        """
+        if append:
+            mode = 'ab'
+        else:
+            mode = 'wb'
+        f = self.open(mode)
+        try:
+            f.write(bytes)
+        finally:
+            f.close()
+
+    def text(self, encoding=None, errors='strict'):
+        r""" Open this file, read it in, return the content as a string.
+
+        This uses 'U' mode in Python 2.3 and later, so '\r\n' and '\r'
+        are automatically translated to '\n'.
+
+        Optional arguments:
+
+        encoding - The Unicode encoding (or character set) of
+            the file.  If present, the content of the file is
+            decoded and returned as a unicode object; otherwise
+            it is returned as an 8-bit str.
+        errors - How to handle Unicode errors; see help(str.decode)
+            for the options.  Default is 'strict'.
+        """
+        if encoding is None:
+            # 8-bit
+            f = self.open(_textmode)
+            try:
+                return f.read()
+            finally:
+                f.close()
+        else:
+            # Unicode
+            f = codecs.open(self, 'r', encoding, errors)
+            # (Note - Can't use 'U' mode here, since codecs.open
+            # doesn't support 'U' mode, even in Python 2.3.)
+            try:
+                t = f.read()
+            finally:
+                f.close()
+            return (t.replace(u'\r\n', u'\n')
+                     .replace(u'\r\x85', u'\n')
+                     .replace(u'\r', u'\n')
+                     .replace(u'\x85', u'\n')
+                     .replace(u'\u2028', u'\n'))
+
+    def write_text(self, text, encoding=None, errors='strict', linesep=os.linesep, append=False):
+        r""" Write the given text to this file.
+
+        The default behavior is to overwrite any existing file;
+        to append instead, use the 'append=True' keyword argument.
+
+        There are two differences between path.write_text() and
+        path.write_bytes(): newline handling and Unicode handling.
+        See below.
+
+        Parameters:
+
+          - text - str/unicode - The text to be written.
+
+          - encoding - str - The Unicode encoding that will be used.
+            This is ignored if 'text' isn't a Unicode string.
+
+          - errors - str - How to handle Unicode encoding errors.
+            Default is 'strict'.  See help(unicode.encode) for the
+            options.  This is ignored if 'text' isn't a Unicode
+            string.
+
+          - linesep - keyword argument - str/unicode - The sequence of
+            characters to be used to mark end-of-line.  The default is
+            os.linesep.  You can also specify None; this means to
+            leave all newlines as they are in 'text'.
+
+          - append - keyword argument - bool - Specifies what to do if
+            the file already exists (True: append to the end of it;
+            False: overwrite it.)  The default is False.
+
+
+        --- Newline handling.
+
+        write_text() converts all standard end-of-line sequences
+        ('\n', '\r', and '\r\n') to your platform's default end-of-line
+        sequence (see os.linesep; on Windows, for example, the
+        end-of-line marker is '\r\n').
+
+        If you don't like your platform's default, you can override it
+        using the 'linesep=' keyword argument.  If you specifically want
+        write_text() to preserve the newlines as-is, use 'linesep=None'.
+
+        This applies to Unicode text the same as to 8-bit text, except
+        there are three additional standard Unicode end-of-line sequences:
+        u'\x85', u'\r\x85', and u'\u2028'.
+
+        (This is slightly different from when you open a file for
+        writing with fopen(filename, "w") in C or file(filename, 'w')
+        in Python.)
+
+
+        --- Unicode
+
+        If 'text' isn't Unicode, then apart from newline handling, the
+        bytes are written verbatim to the file.  The 'encoding' and
+        'errors' arguments are not used and must be omitted.
+
+        If 'text' is Unicode, it is first converted to bytes using the
+        specified 'encoding' (or the default encoding if 'encoding'
+        isn't specified).  The 'errors' argument applies only to this
+        conversion.
+
+        """
+        if isinstance(text, unicode):
+            if linesep is not None:
+                # Convert all standard end-of-line sequences to
+                # ordinary newline characters.
+                text = (text.replace(u'\r\n', u'\n')
+                            .replace(u'\r\x85', u'\n')
+                            .replace(u'\r', u'\n')
+                            .replace(u'\x85', u'\n')
+                            .replace(u'\u2028', u'\n'))
+                text = text.replace(u'\n', linesep)
+            if encoding is None:
+                encoding = sys.getdefaultencoding()
+            bytes = text.encode(encoding, errors)
+        else:
+            # It is an error to specify an encoding if 'text' is
+            # an 8-bit string.
+            assert encoding is None
+
+            if linesep is not None:
+                text = (text.replace('\r\n', '\n')
+                            .replace('\r', '\n'))
+                bytes = text.replace('\n', linesep)
+
+        self.write_bytes(bytes, append)
+
+    def lines(self, encoding=None, errors='strict', retain=True):
+        r""" Open this file, read all lines, return them in a list.
+
+        Optional arguments:
+            encoding - The Unicode encoding (or character set) of
+                the file.  The default is None, meaning the content
+                of the file is read as 8-bit characters and returned
+                as a list of (non-Unicode) str objects.
+            errors - How to handle Unicode errors; see help(str.decode)
+                for the options.  Default is 'strict'
+            retain - If true, retain newline characters; but all newline
+                character combinations ('\r', '\n', '\r\n') are
+                translated to '\n'.  If false, newline characters are
+                stripped off.  Default is True.
+
+        This uses 'U' mode in Python 2.3 and later.
+        """
+        if encoding is None and retain:
+            f = self.open(_textmode)
+            try:
+                return f.readlines()
+            finally:
+                f.close()
+        else:
+            return self.text(encoding, errors).splitlines(retain)
+
+    def write_lines(self, lines, encoding=None, errors='strict',
+                    linesep=os.linesep, append=False):
+        r""" Write the given lines of text to this file.
+
+        By default this overwrites any existing file at this path.
+
+        This puts a platform-specific newline sequence on every line.
+        See 'linesep' below.
+
+        lines - A list of strings.
+
+        encoding - A Unicode encoding to use.  This applies only if
+            'lines' contains any Unicode strings.
+
+        errors - How to handle errors in Unicode encoding.  This
+            also applies only to Unicode strings.
+
+        linesep - The desired line-ending.  This line-ending is
+            applied to every line.  If a line already has any
+            standard line ending ('\r', '\n', '\r\n', u'\x85',
+            u'\r\x85', u'\u2028'), that will be stripped off and
+            this will be used instead.  The default is os.linesep,
+            which is platform-dependent ('\r\n' on Windows, '\n' on
+            Unix, etc.)  Specify None to write the lines as-is,
+            like file.writelines().
+
+        Use the keyword argument append=True to append lines to the
+        file.  The default is to overwrite the file.  Warning:
+        When you use this with Unicode data, if the encoding of the
+        existing data in the file is different from the encoding
+        you specify with the encoding= parameter, the result is
+        mixed-encoding data, which can really confuse someone trying
+        to read the file later.
+        """
+        if append:
+            mode = 'ab'
+        else:
+            mode = 'wb'
+        f = self.open(mode)
+        try:
+            for line in lines:
+                isUnicode = isinstance(line, unicode)
+                if linesep is not None:
+                    # Strip off any existing line-end and add the
+                    # specified linesep string.
+                    if isUnicode:
+                        if line[-2:] in (u'\r\n', u'\x0d\x85'):
+                            line = line[:-2]
+                        elif line[-1:] in (u'\r', u'\n',
+                                           u'\x85', u'\u2028'):
+                            line = line[:-1]
+                    else:
+                        if line[-2:] == '\r\n':
+                            line = line[:-2]
+                        elif line[-1:] in ('\r', '\n'):
+                            line = line[:-1]
+                    line += linesep
+                if isUnicode:
+                    if encoding is None:
+                        encoding = sys.getdefaultencoding()
+                    line = line.encode(encoding, errors)
+                f.write(line)
+        finally:
+            f.close()
+
+    def read_md5(self):
+        """ Calculate the md5 hash for this file.
+
+        This reads through the entire file.
+        """
+        f = self.open('rb')
+        try:
+            m = md5.new()
+            while True:
+                d = f.read(8192)
+                if not d:
+                    break
+                m.update(d)
+        finally:
+            f.close()
+        return m.digest()
+
+    # --- Methods for querying the filesystem.
+
+    exists = os.path.exists
+    isdir = os.path.isdir
+    isfile = os.path.isfile
+    islink = os.path.islink
+    ismount = os.path.ismount
+
+    if hasattr(os.path, 'samefile'):
+        samefile = os.path.samefile
+
+    getatime = os.path.getatime
+    atime = property(
+        getatime, None, None,
+        """ Last access time of the file. """)
+
+    getmtime = os.path.getmtime
+    mtime = property(
+        getmtime, None, None,
+        """ Last-modified time of the file. """)
+
+    if hasattr(os.path, 'getctime'):
+        getctime = os.path.getctime
+        ctime = property(
+            getctime, None, None,
+            """ Creation time of the file. """)
+
+    getsize = os.path.getsize
+    size = property(
+        getsize, None, None,
+        """ Size of the file, in bytes. """)
+
+    if hasattr(os, 'access'):
+        def access(self, mode):
+            """ Return true if current user has access to this path.
+
+            mode - One of the constants os.F_OK, os.R_OK, os.W_OK, os.X_OK
+            """
+            return os.access(self, mode)
+
+    def stat(self):
+        """ Perform a stat() system call on this path. """
+        return os.stat(self)
+
+    def lstat(self):
+        """ Like path.stat(), but do not follow symbolic links. """
+        return os.lstat(self)
+
+    def get_owner(self):
+        r""" Return the name of the owner of this file or directory.
+
+        This follows symbolic links.
+
+        On Windows, this returns a name of the form ur'DOMAIN\User Name'.
+        On Windows, a group can own a file or directory.
+        """
+        if os.name == 'nt':
+            if win32security is None:
+                raise Exception("path.owner requires win32all to be installed")
+            desc = win32security.GetFileSecurity(
+                self, win32security.OWNER_SECURITY_INFORMATION)
+            sid = desc.GetSecurityDescriptorOwner()
+            account, domain, typecode = win32security.LookupAccountSid(None, sid)
+            return domain + u'\\' + account
+        else:
+            if pwd is None:
+                raise NotImplementedError("path.owner is not implemented on this platform.")
+            st = self.stat()
+            return pwd.getpwuid(st.st_uid).pw_name
+
+    owner = property(
+        get_owner, None, None,
+        """ Name of the owner of this file or directory. """)
+
+    if hasattr(os, 'statvfs'):
+        def statvfs(self):
+            """ Perform a statvfs() system call on this path. """
+            return os.statvfs(self)
+
+    if hasattr(os, 'pathconf'):
+        def pathconf(self, name):
+            return os.pathconf(self, name)
+
+
+    # --- Modifying operations on files and directories
+
+    def utime(self, times):
+        """ Set the access and modified times of this file. """
+        os.utime(self, times)
+
+    def chmod(self, mode):
+        os.chmod(self, mode)
+
+    if hasattr(os, 'chown'):
+        def chown(self, uid, gid):
+            os.chown(self, uid, gid)
+
+    def rename(self, new):
+        os.rename(self, new)
+
+    def renames(self, new):
+        os.renames(self, new)
+
+
+    # --- Create/delete operations on directories
+
+    def mkdir(self, mode=0777):
+        os.mkdir(self, mode)
+
+    def makedirs(self, mode=0777):
+        os.makedirs(self, mode)
+
+    def rmdir(self):
+        os.rmdir(self)
+
+    def removedirs(self):
+        os.removedirs(self)
+
+
+    # --- Modifying operations on files
+
+    def touch(self):
+        """ Set the access/modified times of this file to the current time.
+        Create the file if it does not exist.
+        """
+        fd = os.open(self, os.O_WRONLY | os.O_CREAT, 0666)
+        os.close(fd)
+        os.utime(self, None)
+
+    def remove(self):
+        os.remove(self)
+
+    def unlink(self):
+        os.unlink(self)
+
+
+    # --- Links
+
+    if hasattr(os, 'link'):
+        def link(self, newpath):
+            """ Create a hard link at 'newpath', pointing to this file. """
+            os.link(self, newpath)
+
+    if hasattr(os, 'symlink'):
+        def symlink(self, newlink):
+            """ Create a symbolic link at 'newlink', pointing here. """
+            os.symlink(self, newlink)
+
+    if hasattr(os, 'readlink'):
+        def readlink(self):
+            """ Return the path to which this symbolic link points.
+
+            The result may be an absolute or a relative path.
+            """
+            return self.__class__(os.readlink(self))
+
+        def readlinkabs(self):
+            """ Return the path to which this symbolic link points.
+
+            The result is always an absolute path.
+            """
+            p = self.readlink()
+            if p.isabs():
+                return p
+            else:
+                return (self.parent / p).abspath()
+
+
+    # --- High-level functions from shutil
+
+    copyfile = shutil.copyfile
+    copymode = shutil.copymode
+    copystat = shutil.copystat
+    copy = shutil.copy
+    copy2 = shutil.copy2
+    copytree = shutil.copytree
+    if hasattr(shutil, 'move'):
+        move = shutil.move
+    rmtree = shutil.rmtree
+
+
+    # --- Special stuff from os
+
+    if hasattr(os, 'chroot'):
+        def chroot(self):
+            os.chroot(self)
+
+    if hasattr(os, 'startfile'):
+        def startfile(self):
+            os.startfile(self)
+
+
diff --git a/zipper.py b/zipper.py
new file mode 100755
index 0000000..a9f82c1
--- /dev/null
+++ b/zipper.py
@@ -0,0 +1,121 @@
+#!/usr/bin/python
+
+#create courseware from master folder
+#proceed by level: subject, course, milestone, activity
+
+from path import path
+import subprocess, os, sys
+from optparse import OptionParser
+
+parser = OptionParser(usage="Usage: %prog [options] file")
+(options, args) = parser.parse_args()
+if not args:
+    SUBJECT = 'All'
+else:
+    SUBJECT = args[0]
+    COURSE = args[1]
+MAINPATH = path('/home/tony/Desktop/master')
+TARGET = path('/home/tony/courseware')
+BACKUP = path('/home/tony/courseware.bak')
+if SUBJECT == 'All':
+    subprocess.call('rm -rf ' + BACKUP,shell=True)
+    subprocess.call('mv ' + TARGET + ' ' + BACKUP, shell=True)
+    subprocess.call('mkdir ' + TARGET, shell=True)
+    #copy version to TARGET
+    subprocess.call('cp ' + MAINPATH / 'version* ' + TARGET, shell=True)
+    #courseware folder needs subjects.js, subject.html, and karma.zip
+    subjectsfile = MAINPATH / 'subjects.js'
+    subprocess.call('cp '+subjectsfile+' '+TARGET,shell=True)
+    subprocess.call('cp '+ MAINPATH / 'index.html' + ' ' + TARGET,shell=True)
+    cwd = MAINPATH
+    cmd = 'zip -qr ' + TARGET / 'karma.zip' + ' karma'
+    print cmd
+    subprocess.call(cmd,cwd=cwd,shell=True)
+    #create subject folders based on subjects.js
+    fin = open(subjectsfile,'r') 
+    txt = fin.read()
+    fin.close()
+    lines = txt.split('\n')
+    for line in lines:
+        try:
+            entry = eval(line)[0]
+        except:
+            continue
+        if len(entry) < 3:
+            entry = eval(line)
+        subject = entry[1]
+        sbj = entry[0]
+        src = MAINPATH / subject
+        if sbj == 'li':
+            subprocess.call('cp -r ' + src + ' ' + TARGET,shell=True)
+            continue 
+        tpth = TARGET / subject
+        subprocess.call('mkdir ' + tpth, shell=True)
+        subprocess.call('cp ' + src / 'index.html' + ' ' + tpth,shell=True)
+        subprocess.call('cp ' + src / subject.lower()+'.png' + ' ' + tpth,shell=True)
+        subprocess.call('cp ' + src / 'courses.js' + ' ' + tpth,shell=True) 
+        #create course folders for each course in courses.js
+        fin = open(MAINPATH / subject / 'courses.js')
+        txt = fin.read()
+        fin.close()
+        lines = txt.split('\n')
+        for line in lines:
+            try:
+                entry = eval(line)[0]
+            except:
+                continue
+            if len(entry)<4:
+                entry = eval(line)
+            coursename = entry[1]
+            course = entry[0].lower()
+            srcpth = MAINPATH / subject / course.lower()
+            tgtpth = TARGET / subject / course
+            print tgtpth
+            subprocess.call('mkdir ' + tgtpth, shell=True)
+            subprocess.call('cp ' + srcpth / 'index.html ' + tgtpth,shell=True) 
+            subprocess.call('cp ' + srcpth / 'milestones.js ' + tgtpth,shell=True) 
+            #now get milestones based on milestones.js  
+            fin = open(srcpth / 'milestones.js','r')
+            txt = fin.read()
+            fin.close()
+            milestones = []
+            lines = txt.split('\n')
+            for line in lines:
+                try:
+                    entry = eval(line)[0]
+                except:
+                    continue
+                if len(entry)<3:
+                    entry = eval(line)
+                milestones.append(entry)
+            milestones.sort()
+            for milestone in milestones:
+                print 'milestone', milestone[4]
+                cwd = srcpth
+                cmd = 'zip -qr ' + tgtpth / milestone[4] + '.msxo ' + milestone[4]
+                subprocess.call(cmd, cwd=cwd, shell=True)
+else: #we are doing one course
+    #now get milestones based on milestones.js  
+    srcpth = MAINPATH / SUBJECT / COURSE
+    tgtpth = TARGET / SUBJECT / COURSE
+    #also copy to target folder
+    subprocess.call('cp ' + srcpth / 'milestones.js ' + tgtpth,shell=True) 
+    fin = open(srcpth / 'milestones.js','r')
+    txt = fin.read()
+    fin.close()
+    milestones = []
+    lines = txt.split('\n')
+    for line in lines:
+        try:
+            entry = eval(line)[0]
+        except:
+            continue
+        if len(entry)<3:
+            entry = eval(line)
+        milestones.append(entry)
+    for milestone in milestones:
+        print 'milestone', milestone[4]
+        cwd = srcpth
+        cmd = 'zip -qr ' + tgtpth / milestone[4] + '.msxo ' + milestone[4]
+        subprocess.call(cmd, cwd=cwd, shell=True)
+
author	Tony Anderson <tony_anderson@usa.net>	2011-04-25 10:33:11 (GMT)
committer	Tony Anderson <tony_anderson@usa.net>	2011-04-25 10:33:11 (GMT)
commit	b9a2719691b4c6cf83f31eb0b6c3e7e878524c0e (patch)
tree	f9c01373e56e9c1c749dfe9b1bf7c7482e05eb77