diff options
-rwxr-xr-x | BeautifulSoup.py | 2011 | ||||
-rw-r--r-- | ReadMe | 62 | ||||
-rwxr-xr-x | cleanUp.py | 60 | ||||
-rwxr-xr-x | cvtFile.py | 185 | ||||
-rwxr-xr-x | cvtSiyavula.py | 157 | ||||
-rwxr-xr-x | generate | 150 | ||||
-rwxr-xr-x | generate_basic_lesson.py | 407 | ||||
-rwxr-xr-x | generate_karma_lesson.py | 692 | ||||
-rwxr-xr-x | html.py | 218 | ||||
-rwxr-xr-x | lesson_html.py | 145 | ||||
-rwxr-xr-x | makeActivities.py | 44 | ||||
-rwxr-xr-x | menus/activity.html | 24 | ||||
-rwxr-xr-x | menus/course.html | 20 | ||||
-rwxr-xr-x | menus/index.html | 29 | ||||
-rwxr-xr-x | menus/milestone.html | 24 | ||||
-rwxr-xr-x | menus/subject.html | 39 | ||||
-rwxr-xr-x | mo2js.py | 66 | ||||
-rwxr-xr-x | path.py | 971 | ||||
-rwxr-xr-x | zipper.py | 121 |
19 files changed, 5425 insertions, 0 deletions
diff --git a/BeautifulSoup.py b/BeautifulSoup.py new file mode 100755 index 0000000..28e3e96 --- /dev/null +++ b/BeautifulSoup.py @@ -0,0 +1,2011 @@ +"""Beautiful Soup +Elixir and Tonic +"The Screen-Scraper's Friend" +http://www.crummy.com/software/BeautifulSoup/ + +Beautiful Soup parses a (possibly invalid) XML or HTML document into a +tree representation. It provides methods and Pythonic idioms that make +it easy to navigate, search, and modify the tree. + +A well-formed XML/HTML document yields a well-formed data +structure. An ill-formed XML/HTML document yields a correspondingly +ill-formed data structure. If your document is only locally +well-formed, you can use this library to find and process the +well-formed part of it. + +Beautiful Soup works with Python 2.2 and up. It has no external +dependencies, but you'll have more success at converting data to UTF-8 +if you also install these three packages: + +* chardet, for auto-detecting character encodings + http://chardet.feedparser.org/ +* cjkcodecs and iconv_codec, which add more encodings to the ones supported + by stock Python. + http://cjkpython.i18n.org/ + +Beautiful Soup defines classes for two main parsing strategies: + + * BeautifulStoneSoup, for parsing XML, SGML, or your domain-specific + language that kind of looks like XML. + + * BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid + or invalid. This class has web browser-like heuristics for + obtaining a sensible parse tree in the face of common HTML errors. + +Beautiful Soup also defines a class (UnicodeDammit) for autodetecting +the encoding of an HTML or XML document, and converting it to +Unicode. Much of this code is taken from Mark Pilgrim's Universal Feed Parser. + +For more than you ever wanted to know about Beautiful Soup, see the +documentation: +http://www.crummy.com/software/BeautifulSoup/documentation.html + +Here, have some legalese: + +Copyright (c) 2004-2009, Leonard Richardson + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the the Beautiful Soup Consortium and All + Night Kosher Bakery nor the names of its contributors may be + used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT. + +""" +from __future__ import generators + +__author__ = "Leonard Richardson (leonardr@segfault.org)" +__version__ = "3.0.8" +__copyright__ = "Copyright (c) 2004-2009 Leonard Richardson" +__license__ = "New-style BSD" + +from sgmllib import SGMLParser, SGMLParseError +import codecs +import markupbase +import types +import re +import sgmllib +try: + from htmlentitydefs import name2codepoint +except ImportError: + name2codepoint = {} +try: + set +except NameError: + from sets import Set as set + +#These hacks make Beautiful Soup able to parse XML with namespaces +sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*') +markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match + +DEFAULT_OUTPUT_ENCODING = "utf-8" + +def _match_css_class(str): + """Build a RE to match the given CSS class.""" + return re.compile(r"(^|.*\s)%s($|\s)" % str) + +# First, the classes that represent markup elements. + +class PageElement(object): + """Contains the navigational information for some part of the page + (either a tag or a piece of text)""" + + def setup(self, parent=None, previous=None): + """Sets up the initial relations between this element and + other elements.""" + self.parent = parent + self.previous = previous + self.next = None + self.previousSibling = None + self.nextSibling = None + if self.parent and self.parent.contents: + self.previousSibling = self.parent.contents[-1] + self.previousSibling.nextSibling = self + + def replaceWith(self, replaceWith): + oldParent = self.parent + myIndex = self.parent.index(self) + if hasattr(replaceWith, "parent")\ + and replaceWith.parent is self.parent: + # We're replacing this element with one of its siblings. + index = replaceWith.parent.index(replaceWith) + if index and index < myIndex: + # Furthermore, it comes before this element. That + # means that when we extract it, the index of this + # element will change. + myIndex = myIndex - 1 + self.extract() + oldParent.insert(myIndex, replaceWith) + + def replaceWithChildren(self): + myParent = self.parent + myIndex = self.parent.index(self) + self.extract() + reversedChildren = list(self.contents) + reversedChildren.reverse() + for child in reversedChildren: + myParent.insert(myIndex, child) + + def extract(self): + """Destructively rips this element out of the tree.""" + if self.parent: + try: + del self.parent.contents[self.parent.index(self)] + except ValueError: + pass + + #Find the two elements that would be next to each other if + #this element (and any children) hadn't been parsed. Connect + #the two. + lastChild = self._lastRecursiveChild() + nextElement = lastChild.next + + if self.previous: + self.previous.next = nextElement + if nextElement: + nextElement.previous = self.previous + self.previous = None + lastChild.next = None + + self.parent = None + if self.previousSibling: + self.previousSibling.nextSibling = self.nextSibling + if self.nextSibling: + self.nextSibling.previousSibling = self.previousSibling + self.previousSibling = self.nextSibling = None + return self + + def _lastRecursiveChild(self): + "Finds the last element beneath this object to be parsed." + lastChild = self + while hasattr(lastChild, 'contents') and lastChild.contents: + lastChild = lastChild.contents[-1] + return lastChild + + def insert(self, position, newChild): + if isinstance(newChild, basestring) \ + and not isinstance(newChild, NavigableString): + newChild = NavigableString(newChild) + + position = min(position, len(self.contents)) + if hasattr(newChild, 'parent') and newChild.parent is not None: + # We're 'inserting' an element that's already one + # of this object's children. + if newChild.parent is self: + index = self.index(newChild) + if index > position: + # Furthermore we're moving it further down the + # list of this object's children. That means that + # when we extract this element, our target index + # will jump down one. + position = position - 1 + newChild.extract() + + newChild.parent = self + previousChild = None + if position == 0: + newChild.previousSibling = None + newChild.previous = self + else: + previousChild = self.contents[position-1] + newChild.previousSibling = previousChild + newChild.previousSibling.nextSibling = newChild + newChild.previous = previousChild._lastRecursiveChild() + if newChild.previous: + newChild.previous.next = newChild + + newChildsLastElement = newChild._lastRecursiveChild() + + if position >= len(self.contents): + newChild.nextSibling = None + + parent = self + parentsNextSibling = None + while not parentsNextSibling: + parentsNextSibling = parent.nextSibling + parent = parent.parent + if not parent: # This is the last element in the document. + break + if parentsNextSibling: + newChildsLastElement.next = parentsNextSibling + else: + newChildsLastElement.next = None + else: + nextChild = self.contents[position] + newChild.nextSibling = nextChild + if newChild.nextSibling: + newChild.nextSibling.previousSibling = newChild + newChildsLastElement.next = nextChild + + if newChildsLastElement.next: + newChildsLastElement.next.previous = newChildsLastElement + self.contents.insert(position, newChild) + + def append(self, tag): + """Appends the given tag to the contents of this tag.""" + self.insert(len(self.contents), tag) + + def findNext(self, name=None, attrs={}, text=None, **kwargs): + """Returns the first item that matches the given criteria and + appears after this Tag in the document.""" + return self._findOne(self.findAllNext, name, attrs, text, **kwargs) + + def findAllNext(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns all items that match the given criteria and appear + after this Tag in the document.""" + return self._findAll(name, attrs, text, limit, self.nextGenerator, + **kwargs) + + def findNextSibling(self, name=None, attrs={}, text=None, **kwargs): + """Returns the closest sibling to this Tag that matches the + given criteria and appears after this Tag in the document.""" + return self._findOne(self.findNextSiblings, name, attrs, text, + **kwargs) + + def findNextSiblings(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns the siblings of this Tag that match the given + criteria and appear after this Tag in the document.""" + return self._findAll(name, attrs, text, limit, + self.nextSiblingGenerator, **kwargs) + fetchNextSiblings = findNextSiblings # Compatibility with pre-3.x + + def findPrevious(self, name=None, attrs={}, text=None, **kwargs): + """Returns the first item that matches the given criteria and + appears before this Tag in the document.""" + return self._findOne(self.findAllPrevious, name, attrs, text, **kwargs) + + def findAllPrevious(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns all items that match the given criteria and appear + before this Tag in the document.""" + return self._findAll(name, attrs, text, limit, self.previousGenerator, + **kwargs) + fetchPrevious = findAllPrevious # Compatibility with pre-3.x + + def findPreviousSibling(self, name=None, attrs={}, text=None, **kwargs): + """Returns the closest sibling to this Tag that matches the + given criteria and appears before this Tag in the document.""" + return self._findOne(self.findPreviousSiblings, name, attrs, text, + **kwargs) + + def findPreviousSiblings(self, name=None, attrs={}, text=None, + limit=None, **kwargs): + """Returns the siblings of this Tag that match the given + criteria and appear before this Tag in the document.""" + return self._findAll(name, attrs, text, limit, + self.previousSiblingGenerator, **kwargs) + fetchPreviousSiblings = findPreviousSiblings # Compatibility with pre-3.x + + def findParent(self, name=None, attrs={}, **kwargs): + """Returns the closest parent of this Tag that matches the given + criteria.""" + # NOTE: We can't use _findOne because findParents takes a different + # set of arguments. + r = None + l = self.findParents(name, attrs, 1) + if l: + r = l[0] + return r + + def findParents(self, name=None, attrs={}, limit=None, **kwargs): + """Returns the parents of this Tag that match the given + criteria.""" + + return self._findAll(name, attrs, None, limit, self.parentGenerator, + **kwargs) + fetchParents = findParents # Compatibility with pre-3.x + + #These methods do the real heavy lifting. + + def _findOne(self, method, name, attrs, text, **kwargs): + r = None + l = method(name, attrs, text, 1, **kwargs) + if l: + r = l[0] + return r + + def _findAll(self, name, attrs, text, limit, generator, **kwargs): + "Iterates over a generator looking for things that match." + + if isinstance(name, SoupStrainer): + strainer = name + # Special case some findAll* searches + # findAll*(True) + elif not limit and name is True and not attrs and not kwargs: + return [element for element in generator() + if isinstance(element, Tag)] + + # findAll*('tag-name') + elif not limit and isinstance(name, basestring) and not attrs \ + and not kwargs: + return [element for element in generator() + if isinstance(element, Tag) and element.name == name] + + # Build a SoupStrainer + else: + strainer = SoupStrainer(name, attrs, text, **kwargs) + results = ResultSet(strainer) + g = generator() + while True: + try: + i = g.next() + except StopIteration: + break + if i: + found = strainer.search(i) + if found: + results.append(found) + if limit and len(results) >= limit: + break + return results + + #These Generators can be used to navigate starting from both + #NavigableStrings and Tags. + def nextGenerator(self): + i = self + while i is not None: + i = i.next + yield i + + def nextSiblingGenerator(self): + i = self + while i is not None: + i = i.nextSibling + yield i + + def previousGenerator(self): + i = self + while i is not None: + i = i.previous + yield i + + def previousSiblingGenerator(self): + i = self + while i is not None: + i = i.previousSibling + yield i + + def parentGenerator(self): + i = self + while i is not None: + i = i.parent + yield i + + # Utility methods + def substituteEncoding(self, str, encoding=None): + encoding = encoding or "utf-8" + return str.replace("%SOUP-ENCODING%", encoding) + + def toEncoding(self, s, encoding=None): + """Encodes an object to a string in some encoding, or to Unicode. + .""" + if isinstance(s, unicode): + if encoding: + s = s.encode(encoding) + elif isinstance(s, str): + if encoding: + s = s.encode(encoding) + else: + s = unicode(s) + else: + if encoding: + s = self.toEncoding(str(s), encoding) + else: + s = unicode(s) + return s + +class NavigableString(unicode, PageElement): + + def __new__(cls, value): + """Create a new NavigableString. + + When unpickling a NavigableString, this method is called with + the string in DEFAULT_OUTPUT_ENCODING. That encoding needs to be + passed in to the superclass's __new__ or the superclass won't know + how to handle non-ASCII characters. + """ + if isinstance(value, unicode): + return unicode.__new__(cls, value) + return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING) + + def __getnewargs__(self): + return (NavigableString.__str__(self),) + + def __getattr__(self, attr): + """text.string gives you text. This is for backwards + compatibility for Navigable*String, but for CData* it lets you + get the string without the CData wrapper.""" + if attr == 'string': + return self + else: + raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr) + + def __unicode__(self): + return str(self).decode(DEFAULT_OUTPUT_ENCODING) + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + if encoding: + return self.encode(encoding) + else: + return self + +class CData(NavigableString): + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "<![CDATA[%s]]>" % NavigableString.__str__(self, encoding) + +class ProcessingInstruction(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + output = self + if "%SOUP-ENCODING%" in output: + output = self.substituteEncoding(output, encoding) + return "<?%s?>" % self.toEncoding(output, encoding) + +class Comment(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "<!--%s-->" % NavigableString.__str__(self, encoding) + +class Declaration(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "<!%s>" % NavigableString.__str__(self, encoding) + +class Tag(PageElement): + + """Represents a found HTML tag with its attributes and contents.""" + + def _invert(h): + "Cheap function to invert a hash." + i = {} + for k,v in h.items(): + i[v] = k + return i + + XML_ENTITIES_TO_SPECIAL_CHARS = { "apos" : "'", + "quot" : '"', + "amp" : "&", + "lt" : "<", + "gt" : ">" } + + XML_SPECIAL_CHARS_TO_ENTITIES = _invert(XML_ENTITIES_TO_SPECIAL_CHARS) + + def _convertEntities(self, match): + """Used in a call to re.sub to replace HTML, XML, and numeric + entities with the appropriate Unicode characters. If HTML + entities are being converted, any unrecognized entities are + escaped.""" + x = match.group(1) + if self.convertHTMLEntities and x in name2codepoint: + return unichr(name2codepoint[x]) + elif x in self.XML_ENTITIES_TO_SPECIAL_CHARS: + if self.convertXMLEntities: + return self.XML_ENTITIES_TO_SPECIAL_CHARS[x] + else: + return u'&%s;' % x + elif len(x) > 0 and x[0] == '#': + # Handle numeric entities + if len(x) > 1 and x[1] == 'x': + return unichr(int(x[2:], 16)) + else: + return unichr(int(x[1:])) + + elif self.escapeUnrecognizedEntities: + return u'&%s;' % x + else: + return u'&%s;' % x + + def __init__(self, parser, name, attrs=None, parent=None, + previous=None): + "Basic constructor." + + # We don't actually store the parser object: that lets extracted + # chunks be garbage-collected + self.parserClass = parser.__class__ + self.isSelfClosing = parser.isSelfClosingTag(name) + self.name = name + if attrs is None: + attrs = [] + self.attrs = attrs + self.contents = [] + self.setup(parent, previous) + self.hidden = False + self.containsSubstitutions = False + self.convertHTMLEntities = parser.convertHTMLEntities + self.convertXMLEntities = parser.convertXMLEntities + self.escapeUnrecognizedEntities = parser.escapeUnrecognizedEntities + + # Convert any HTML, XML, or numeric entities in the attribute values. + convert = lambda(k, val): (k, + re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);", + self._convertEntities, + val)) + self.attrs = map(convert, self.attrs) + + def getString(self): + if (len(self.contents) == 1 + and isinstance(self.contents[0], NavigableString)): + return self.contents[0] + + def setString(self, string): + """Replace the contents of the tag with a string""" + self.clear() + self.append(string) + + string = property(getString, setString) + + def getText(self, separator=u""): + if not len(self.contents): + return u"" + stopNode = self._lastRecursiveChild().next + strings = [] + current = self.contents[0] + while current is not stopNode: + if isinstance(current, NavigableString): + strings.append(current.strip()) + current = current.next + return separator.join(strings) + + text = property(getText) + + def get(self, key, default=None): + """Returns the value of the 'key' attribute for the tag, or + the value given for 'default' if it doesn't have that + attribute.""" + return self._getAttrMap().get(key, default) + + def clear(self): + """Extract all children.""" + for child in self.contents[:]: + child.extract() + + def index(self, element): + for i, child in enumerate(self.contents): + if child is element: + return i + raise ValueError("Tag.index: element not in tag") + + def has_key(self, key): + return self._getAttrMap().has_key(key) + + def __getitem__(self, key): + """tag[key] returns the value of the 'key' attribute for the tag, + and throws an exception if it's not there.""" + return self._getAttrMap()[key] + + def __iter__(self): + "Iterating over a tag iterates over its contents." + return iter(self.contents) + + def __len__(self): + "The length of a tag is the length of its list of contents." + return len(self.contents) + + def __contains__(self, x): + return x in self.contents + + def __nonzero__(self): + "A tag is non-None even if it has no contents." + return True + + def __setitem__(self, key, value): + """Setting tag[key] sets the value of the 'key' attribute for the + tag.""" + self._getAttrMap() + self.attrMap[key] = value + found = False + for i in range(0, len(self.attrs)): + if self.attrs[i][0] == key: + self.attrs[i] = (key, value) + found = True + if not found: + self.attrs.append((key, value)) + self._getAttrMap()[key] = value + + def __delitem__(self, key): + "Deleting tag[key] deletes all 'key' attributes for the tag." + for item in self.attrs: + if item[0] == key: + self.attrs.remove(item) + #We don't break because bad HTML can define the same + #attribute multiple times. + self._getAttrMap() + if self.attrMap.has_key(key): + del self.attrMap[key] + + def __call__(self, *args, **kwargs): + """Calling a tag like a function is the same as calling its + findAll() method. Eg. tag('a') returns a list of all the A tags + found within this tag.""" + return apply(self.findAll, args, kwargs) + + def __getattr__(self, tag): + #print "Getattr %s.%s" % (self.__class__, tag) + if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3: + return self.find(tag[:-3]) + elif tag.find('__') != 0: + return self.find(tag) + raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__, tag) + + def __eq__(self, other): + """Returns true iff this tag has the same name, the same attributes, + and the same contents (recursively) as the given tag. + + NOTE: right now this will return false if two tags have the + same attributes in a different order. Should this be fixed?""" + if other is self: + return True + if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other): + return False + for i in range(0, len(self.contents)): + if self.contents[i] != other.contents[i]: + return False + return True + + def __ne__(self, other): + """Returns true iff this tag is not identical to the other tag, + as defined in __eq__.""" + return not self == other + + def __repr__(self, encoding=DEFAULT_OUTPUT_ENCODING): + """Renders this tag as a string.""" + return self.__str__(encoding) + + def __unicode__(self): + return self.__str__(None) + + BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|" + + "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)" + + ")") + + def _sub_entity(self, x): + """Used with a regular expression to substitute the + appropriate XML entity for an XML special character.""" + return "&" + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + ";" + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING, + prettyPrint=False, indentLevel=0): + """Returns a string or Unicode representation of this tag and + its contents. To get Unicode, pass None for encoding. + + NOTE: since Python's HTML parser consumes whitespace, this + method is not certain to reproduce the whitespace present in + the original string.""" + + encodedName = self.toEncoding(self.name, encoding) + + attrs = [] + if self.attrs: + for key, val in self.attrs: + fmt = '%s="%s"' + if isinstance(val, basestring): + if self.containsSubstitutions and '%SOUP-ENCODING%' in val: + val = self.substituteEncoding(val, encoding) + + # The attribute value either: + # + # * Contains no embedded double quotes or single quotes. + # No problem: we enclose it in double quotes. + # * Contains embedded single quotes. No problem: + # double quotes work here too. + # * Contains embedded double quotes. No problem: + # we enclose it in single quotes. + # * Embeds both single _and_ double quotes. This + # can't happen naturally, but it can happen if + # you modify an attribute value after parsing + # the document. Now we have a bit of a + # problem. We solve it by enclosing the + # attribute in single quotes, and escaping any + # embedded single quotes to XML entities. + if '"' in val: + fmt = "%s='%s'" + if "'" in val: + # TODO: replace with apos when + # appropriate. + val = val.replace("'", "&squot;") + + # Now we're okay w/r/t quotes. But the attribute + # value might also contain angle brackets, or + # ampersands that aren't part of entities. We need + # to escape those to XML entities too. + val = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity, val) + + attrs.append(fmt % (self.toEncoding(key, encoding), + self.toEncoding(val, encoding))) + close = '' + closeTag = '' + if self.isSelfClosing: + close = ' /' + else: + closeTag = '</%s>' % encodedName + + indentTag, indentContents = 0, 0 + if prettyPrint: + indentTag = indentLevel + space = (' ' * (indentTag-1)) + indentContents = indentTag + 1 + contents = self.renderContents(encoding, prettyPrint, indentContents) + if self.hidden: + s = contents + else: + s = [] + attributeString = '' + if attrs: + attributeString = ' ' + ' '.join(attrs) + if prettyPrint: + s.append(space) + s.append('<%s%s%s>' % (encodedName, attributeString, close)) + if prettyPrint: + s.append("\n") + s.append(contents) + if prettyPrint and contents and contents[-1] != "\n": + s.append("\n") + if prettyPrint and closeTag: + s.append(space) + s.append(closeTag) + if prettyPrint and closeTag and self.nextSibling: + s.append("\n") + s = ''.join(s) + return s + + def decompose(self): + """Recursively destroys the contents of this tree.""" + self.extract() + if len(self.contents) == 0: + return + current = self.contents[0] + while current is not None: + next = current.next + if isinstance(current, Tag): + del current.contents[:] + current.parent = None + current.previous = None + current.previousSibling = None + current.next = None + current.nextSibling = None + current = next + + def prettify(self, encoding=DEFAULT_OUTPUT_ENCODING): + return self.__str__(encoding, True) + + def renderContents(self, encoding=DEFAULT_OUTPUT_ENCODING, + prettyPrint=False, indentLevel=0): + """Renders the contents of this tag as a string in the given + encoding. If encoding is None, returns a Unicode string..""" + s=[] + for c in self: + text = None + if isinstance(c, NavigableString): + text = c.__str__(encoding) + elif isinstance(c, Tag): + s.append(c.__str__(encoding, prettyPrint, indentLevel)) + if text and prettyPrint: + text = text.strip() + if text: + if prettyPrint: + s.append(" " * (indentLevel-1)) + s.append(text) + if prettyPrint: + s.append("\n") + return ''.join(s) + + #Soup methods + + def find(self, name=None, attrs={}, recursive=True, text=None, + **kwargs): + """Return only the first child of this Tag matching the given + criteria.""" + r = None + l = self.findAll(name, attrs, recursive, text, 1, **kwargs) + if l: + r = l[0] + return r + findChild = find + + def findAll(self, name=None, attrs={}, recursive=True, text=None, + limit=None, **kwargs): + """Extracts a list of Tag objects that match the given + criteria. You can specify the name of the Tag and any + attributes you want the Tag to have. + + The value of a key-value pair in the 'attrs' map can be a + string, a list of strings, a regular expression object, or a + callable that takes a string and returns whether or not the + string matches for some custom definition of 'matches'. The + same is true of the tag name.""" + generator = self.recursiveChildGenerator + if not recursive: + generator = self.childGenerator + return self._findAll(name, attrs, text, limit, generator, **kwargs) + findChildren = findAll + + # Pre-3.x compatibility methods + first = find + fetch = findAll + + def fetchText(self, text=None, recursive=True, limit=None): + return self.findAll(text=text, recursive=recursive, limit=limit) + + def firstText(self, text=None, recursive=True): + return self.find(text=text, recursive=recursive) + + #Private methods + + def _getAttrMap(self): + """Initializes a map representation of this tag's attributes, + if not already initialized.""" + if not getattr(self, 'attrMap'): + self.attrMap = {} + for (key, value) in self.attrs: + self.attrMap[key] = value + return self.attrMap + + #Generator methods + def childGenerator(self): + # Just use the iterator from the contents + return iter(self.contents) + + def recursiveChildGenerator(self): + if not len(self.contents): + raise StopIteration + stopNode = self._lastRecursiveChild().next + current = self.contents[0] + while current is not stopNode: + yield current + current = current.next + + +# Next, a couple classes to represent queries and their results. +class SoupStrainer: + """Encapsulates a number of ways of matching a markup element (tag or + text).""" + + def __init__(self, name=None, attrs={}, text=None, **kwargs): + self.name = name + if isinstance(attrs, basestring): + kwargs['class'] = _match_css_class(attrs) + attrs = None + if kwargs: + if attrs: + attrs = attrs.copy() + attrs.update(kwargs) + else: + attrs = kwargs + self.attrs = attrs + self.text = text + + def __str__(self): + if self.text: + return self.text + else: + return "%s|%s" % (self.name, self.attrs) + + def searchTag(self, markupName=None, markupAttrs={}): + found = None + markup = None + if isinstance(markupName, Tag): + markup = markupName + markupAttrs = markup + callFunctionWithTagData = callable(self.name) \ + and not isinstance(markupName, Tag) + + if (not self.name) \ + or callFunctionWithTagData \ + or (markup and self._matches(markup, self.name)) \ + or (not markup and self._matches(markupName, self.name)): + if callFunctionWithTagData: + match = self.name(markupName, markupAttrs) + else: + match = True + markupAttrMap = None + for attr, matchAgainst in self.attrs.items(): + if not markupAttrMap: + if hasattr(markupAttrs, 'get'): + markupAttrMap = markupAttrs + else: + markupAttrMap = {} + for k,v in markupAttrs: + markupAttrMap[k] = v + attrValue = markupAttrMap.get(attr) + if not self._matches(attrValue, matchAgainst): + match = False + break + if match: + if markup: + found = markup + else: + found = markupName + return found + + def search(self, markup): + #print 'looking for %s in %s' % (self, markup) + found = None + # If given a list of items, scan it for a text element that + # matches. + if hasattr(markup, "__iter__") \ + and not isinstance(markup, Tag): + for element in markup: + if isinstance(element, NavigableString) \ + and self.search(element): + found = element + break + # If it's a Tag, make sure its name or attributes match. + # Don't bother with Tags if we're searching for text. + elif isinstance(markup, Tag): + if not self.text: + found = self.searchTag(markup) + # If it's text, make sure the text matches. + elif isinstance(markup, NavigableString) or \ + isinstance(markup, basestring): + if self._matches(markup, self.text): + found = markup + else: + raise Exception, "I don't know how to match against a %s" \ + % markup.__class__ + return found + + def _matches(self, markup, matchAgainst): + #print "Matching %s against %s" % (markup, matchAgainst) + result = False + if matchAgainst is True: + result = markup is not None + elif callable(matchAgainst): + result = matchAgainst(markup) + else: + #Custom match methods take the tag as an argument, but all + #other ways of matching match the tag name as a string. + if isinstance(markup, Tag): + markup = markup.name + if markup and not isinstance(markup, basestring): + markup = unicode(markup) + #Now we know that chunk is either a string, or None. + if hasattr(matchAgainst, 'match'): + # It's a regexp object. + result = markup and matchAgainst.search(markup) + elif hasattr(matchAgainst, '__iter__'): # list-like + result = markup in matchAgainst + elif hasattr(matchAgainst, 'items'): + result = markup.has_key(matchAgainst) + elif matchAgainst and isinstance(markup, basestring): + if isinstance(markup, unicode): + matchAgainst = unicode(matchAgainst) + else: + matchAgainst = str(matchAgainst) + + if not result: + result = matchAgainst == markup + return result + +class ResultSet(list): + """A ResultSet is just a list that keeps track of the SoupStrainer + that created it.""" + def __init__(self, source): + list.__init__([]) + self.source = source + +# Now, some helper functions. + +def buildTagMap(default, *args): + """Turns a list of maps, lists, or scalars into a single map. + Used to build the SELF_CLOSING_TAGS, NESTABLE_TAGS, and + NESTING_RESET_TAGS maps out of lists and partial maps.""" + built = {} + for portion in args: + if hasattr(portion, 'items'): + #It's a map. Merge it. + for k,v in portion.items(): + built[k] = v + elif hasattr(portion, '__iter__'): # is a list + #It's a list. Map each item to the default. + for k in portion: + built[k] = default + else: + #It's a scalar. Map it to the default. + built[portion] = default + return built + +# Now, the parser classes. + +class BeautifulStoneSoup(Tag, SGMLParser): + + """This class contains the basic parser and search code. It defines + a parser that knows nothing about tag behavior except for the + following: + + You can't close a tag without closing all the tags it encloses. + That is, "<foo><bar></foo>" actually means + "<foo><bar></bar></foo>". + + [Another possible explanation is "<foo><bar /></foo>", but since + this class defines no SELF_CLOSING_TAGS, it will never use that + explanation.] + + This class is useful for parsing XML or made-up markup languages, + or when BeautifulSoup makes an assumption counter to what you were + expecting.""" + + SELF_CLOSING_TAGS = {} + NESTABLE_TAGS = {} + RESET_NESTING_TAGS = {} + QUOTE_TAGS = {} + PRESERVE_WHITESPACE_TAGS = [] + + MARKUP_MASSAGE = [(re.compile('(<[^<>]*)/>'), + lambda x: x.group(1) + ' />'), + (re.compile('<!\s+([^<>]*)>'), + lambda x: '<!' + x.group(1) + '>') + ] + + ROOT_TAG_NAME = u'[document]' + + HTML_ENTITIES = "html" + XML_ENTITIES = "xml" + XHTML_ENTITIES = "xhtml" + # TODO: This only exists for backwards-compatibility + ALL_ENTITIES = XHTML_ENTITIES + + # Used when determining whether a text node is all whitespace and + # can be replaced with a single space. A text node that contains + # fancy Unicode spaces (usually non-breaking) should be left + # alone. + STRIP_ASCII_SPACES = { 9: None, 10: None, 12: None, 13: None, 32: None, } + + def __init__(self, markup="", parseOnlyThese=None, fromEncoding=None, + markupMassage=True, smartQuotesTo=XML_ENTITIES, + convertEntities=None, selfClosingTags=None, isHTML=False): + """The Soup object is initialized as the 'root tag', and the + provided markup (which can be a string or a file-like object) + is fed into the underlying parser. + + sgmllib will process most bad HTML, and the BeautifulSoup + class has some tricks for dealing with some HTML that kills + sgmllib, but Beautiful Soup can nonetheless choke or lose data + if your data uses self-closing tags or declarations + incorrectly. + + By default, Beautiful Soup uses regexes to sanitize input, + avoiding the vast majority of these problems. If the problems + don't apply to you, pass in False for markupMassage, and + you'll get better performance. + + The default parser massage techniques fix the two most common + instances of invalid HTML that choke sgmllib: + + <br/> (No space between name of closing tag and tag close) + <! --Comment--> (Extraneous whitespace in declaration) + + You can pass in a custom list of (RE object, replace method) + tuples to get Beautiful Soup to scrub your input the way you + want.""" + + self.parseOnlyThese = parseOnlyThese + self.fromEncoding = fromEncoding + self.smartQuotesTo = smartQuotesTo + self.convertEntities = convertEntities + # Set the rules for how we'll deal with the entities we + # encounter + if self.convertEntities: + # It doesn't make sense to convert encoded characters to + # entities even while you're converting entities to Unicode. + # Just convert it all to Unicode. + self.smartQuotesTo = None + if convertEntities == self.HTML_ENTITIES: + self.convertXMLEntities = False + self.convertHTMLEntities = True + self.escapeUnrecognizedEntities = True + elif convertEntities == self.XHTML_ENTITIES: + self.convertXMLEntities = True + self.convertHTMLEntities = True + self.escapeUnrecognizedEntities = False + elif convertEntities == self.XML_ENTITIES: + self.convertXMLEntities = True + self.convertHTMLEntities = False + self.escapeUnrecognizedEntities = False + else: + self.convertXMLEntities = False + self.convertHTMLEntities = False + self.escapeUnrecognizedEntities = False + + self.instanceSelfClosingTags = buildTagMap(None, selfClosingTags) + SGMLParser.__init__(self) + + if hasattr(markup, 'read'): # It's a file-type object. + markup = markup.read() + self.markup = markup + self.markupMassage = markupMassage + try: + self._feed(isHTML=isHTML) + except StopParsing: + pass + self.markup = None # The markup can now be GCed + + def convert_charref(self, name): + """This method fixes a bug in Python's SGMLParser.""" + try: + n = int(name) + except ValueError: + return + if not 0 <= n <= 127 : # ASCII ends at 127, not 255 + return + return self.convert_codepoint(n) + + def _feed(self, inDocumentEncoding=None, isHTML=False): + # Convert the document to Unicode. + markup = self.markup + if isinstance(markup, unicode): + if not hasattr(self, 'originalEncoding'): + self.originalEncoding = None + else: + dammit = UnicodeDammit\ + (markup, [self.fromEncoding, inDocumentEncoding], + smartQuotesTo=self.smartQuotesTo, isHTML=isHTML) + markup = dammit.unicode + self.originalEncoding = dammit.originalEncoding + self.declaredHTMLEncoding = dammit.declaredHTMLEncoding + if markup: + if self.markupMassage: + if not hasattr(self.markupMassage, "__iter__"): + self.markupMassage = self.MARKUP_MASSAGE + for fix, m in self.markupMassage: + markup = fix.sub(m, markup) + # TODO: We get rid of markupMassage so that the + # soup object can be deepcopied later on. Some + # Python installations can't copy regexes. If anyone + # was relying on the existence of markupMassage, this + # might cause problems. + del(self.markupMassage) + self.reset() + + SGMLParser.feed(self, markup) + # Close out any unfinished strings and close all the open tags. + self.endData() + while self.currentTag.name != self.ROOT_TAG_NAME: + self.popTag() + + def __getattr__(self, methodName): + """This method routes method call requests to either the SGMLParser + superclass or the Tag superclass, depending on the method name.""" + #print "__getattr__ called on %s.%s" % (self.__class__, methodName) + + if methodName.startswith('start_') or methodName.startswith('end_') \ + or methodName.startswith('do_'): + return SGMLParser.__getattr__(self, methodName) + elif not methodName.startswith('__'): + return Tag.__getattr__(self, methodName) + else: + raise AttributeError + + def isSelfClosingTag(self, name): + """Returns true iff the given string is the name of a + self-closing tag according to this parser.""" + return self.SELF_CLOSING_TAGS.has_key(name) \ + or self.instanceSelfClosingTags.has_key(name) + + def reset(self): + Tag.__init__(self, self, self.ROOT_TAG_NAME) + self.hidden = 1 + SGMLParser.reset(self) + self.currentData = [] + self.currentTag = None + self.tagStack = [] + self.quoteStack = [] + self.pushTag(self) + + def popTag(self): + tag = self.tagStack.pop() + + #print "Pop", tag.name + if self.tagStack: + self.currentTag = self.tagStack[-1] + return self.currentTag + + def pushTag(self, tag): + #print "Push", tag.name + if self.currentTag: + self.currentTag.contents.append(tag) + self.tagStack.append(tag) + self.currentTag = self.tagStack[-1] + + def endData(self, containerClass=NavigableString): + if self.currentData: + currentData = u''.join(self.currentData) + if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and + not set([tag.name for tag in self.tagStack]).intersection( + self.PRESERVE_WHITESPACE_TAGS)): + if '\n' in currentData: + currentData = '\n' + else: + currentData = ' ' + self.currentData = [] + if self.parseOnlyThese and len(self.tagStack) <= 1 and \ + (not self.parseOnlyThese.text or \ + not self.parseOnlyThese.search(currentData)): + return + o = containerClass(currentData) + o.setup(self.currentTag, self.previous) + if self.previous: + self.previous.next = o + self.previous = o + self.currentTag.contents.append(o) + + + def _popToTag(self, name, inclusivePop=True): + """Pops the tag stack up to and including the most recent + instance of the given tag. If inclusivePop is false, pops the tag + stack up to but *not* including the most recent instqance of + the given tag.""" + #print "Popping to %s" % name + if name == self.ROOT_TAG_NAME: + return + + numPops = 0 + mostRecentTag = None + for i in range(len(self.tagStack)-1, 0, -1): + if name == self.tagStack[i].name: + numPops = len(self.tagStack)-i + break + if not inclusivePop: + numPops = numPops - 1 + + for i in range(0, numPops): + mostRecentTag = self.popTag() + return mostRecentTag + + def _smartPop(self, name): + + """We need to pop up to the previous tag of this type, unless + one of this tag's nesting reset triggers comes between this + tag and the previous tag of this type, OR unless this tag is a + generic nesting trigger and another generic nesting trigger + comes between this tag and the previous tag of this type. + + Examples: + <p>Foo<b>Bar *<p>* should pop to 'p', not 'b'. + <p>Foo<table>Bar *<p>* should pop to 'table', not 'p'. + <p>Foo<table><tr>Bar *<p>* should pop to 'tr', not 'p'. + + <li><ul><li> *<li>* should pop to 'ul', not the first 'li'. + <tr><table><tr> *<tr>* should pop to 'table', not the first 'tr' + <td><tr><td> *<td>* should pop to 'tr', not the first 'td' + """ + + nestingResetTriggers = self.NESTABLE_TAGS.get(name) + isNestable = nestingResetTriggers != None + isResetNesting = self.RESET_NESTING_TAGS.has_key(name) + popTo = None + inclusive = True + for i in range(len(self.tagStack)-1, 0, -1): + p = self.tagStack[i] + if (not p or p.name == name) and not isNestable: + #Non-nestable tags get popped to the top or to their + #last occurance. + popTo = name + break + if (nestingResetTriggers is not None + and p.name in nestingResetTriggers) \ + or (nestingResetTriggers is None and isResetNesting + and self.RESET_NESTING_TAGS.has_key(p.name)): + + #If we encounter one of the nesting reset triggers + #peculiar to this tag, or we encounter another tag + #that causes nesting to reset, pop up to but not + #including that tag. + popTo = p.name + inclusive = False + break + p = p.parent + if popTo: + self._popToTag(popTo, inclusive) + + def unknown_starttag(self, name, attrs, selfClosing=0): + #print "Start tag %s: %s" % (name, attrs) + if self.quoteStack: + #This is not a real tag. + #print "<%s> is not real!" % name + attrs = ''.join([' %s="%s"' % (x, y) for x, y in attrs]) + self.handle_data('<%s%s>' % (name, attrs)) + return + self.endData() + + if not self.isSelfClosingTag(name) and not selfClosing: + self._smartPop(name) + + if self.parseOnlyThese and len(self.tagStack) <= 1 \ + and (self.parseOnlyThese.text or not self.parseOnlyThese.searchTag(name, attrs)): + return + + tag = Tag(self, name, attrs, self.currentTag, self.previous) + if self.previous: + self.previous.next = tag + self.previous = tag + self.pushTag(tag) + if selfClosing or self.isSelfClosingTag(name): + self.popTag() + if name in self.QUOTE_TAGS: + #print "Beginning quote (%s)" % name + self.quoteStack.append(name) + self.literal = 1 + return tag + + def unknown_endtag(self, name): + #print "End tag %s" % name + if self.quoteStack and self.quoteStack[-1] != name: + #This is not a real end tag. + #print "</%s> is not real!" % name + self.handle_data('</%s>' % name) + return + self.endData() + self._popToTag(name) + if self.quoteStack and self.quoteStack[-1] == name: + self.quoteStack.pop() + self.literal = (len(self.quoteStack) > 0) + + def handle_data(self, data): + self.currentData.append(data) + + def _toStringSubclass(self, text, subclass): + """Adds a certain piece of text to the tree as a NavigableString + subclass.""" + self.endData() + self.handle_data(text) + self.endData(subclass) + + def handle_pi(self, text): + """Handle a processing instruction as a ProcessingInstruction + object, possibly one with a %SOUP-ENCODING% slot into which an + encoding will be plugged later.""" + if text[:3] == "xml": + text = u"xml version='1.0' encoding='%SOUP-ENCODING%'" + self._toStringSubclass(text, ProcessingInstruction) + + def handle_comment(self, text): + "Handle comments as Comment objects." + self._toStringSubclass(text, Comment) + + def handle_charref(self, ref): + "Handle character references as data." + if self.convertEntities: + data = unichr(int(ref)) + else: + data = '&#%s;' % ref + self.handle_data(data) + + def handle_entityref(self, ref): + """Handle entity references as data, possibly converting known + HTML and/or XML entity references to the corresponding Unicode + characters.""" + data = None + if self.convertHTMLEntities: + try: + data = unichr(name2codepoint[ref]) + except KeyError: + pass + + if not data and self.convertXMLEntities: + data = self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref) + + if not data and self.convertHTMLEntities and \ + not self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref): + # TODO: We've got a problem here. We're told this is + # an entity reference, but it's not an XML entity + # reference or an HTML entity reference. Nonetheless, + # the logical thing to do is to pass it through as an + # unrecognized entity reference. + # + # Except: when the input is "&carol;" this function + # will be called with input "carol". When the input is + # "AT&T", this function will be called with input + # "T". We have no way of knowing whether a semicolon + # was present originally, so we don't know whether + # this is an unknown entity or just a misplaced + # ampersand. + # + # The more common case is a misplaced ampersand, so I + # escape the ampersand and omit the trailing semicolon. + data = "&%s" % ref + if not data: + # This case is different from the one above, because we + # haven't already gone through a supposedly comprehensive + # mapping of entities to Unicode characters. We might not + # have gone through any mapping at all. So the chances are + # very high that this is a real entity, and not a + # misplaced ampersand. + data = "&%s;" % ref + self.handle_data(data) + + def handle_decl(self, data): + "Handle DOCTYPEs and the like as Declaration objects." + self._toStringSubclass(data, Declaration) + + def parse_declaration(self, i): + """Treat a bogus SGML declaration as raw data. Treat a CDATA + declaration as a CData object.""" + j = None + if self.rawdata[i:i+9] == '<![CDATA[': + k = self.rawdata.find(']]>', i) + if k == -1: + k = len(self.rawdata) + data = self.rawdata[i+9:k] + j = k+3 + self._toStringSubclass(data, CData) + else: + try: + j = SGMLParser.parse_declaration(self, i) + except SGMLParseError: + toHandle = self.rawdata[i:] + self.handle_data(toHandle) + j = i + len(toHandle) + return j + +class BeautifulSoup(BeautifulStoneSoup): + + """This parser knows the following facts about HTML: + + * Some tags have no closing tag and should be interpreted as being + closed as soon as they are encountered. + + * The text inside some tags (ie. 'script') may contain tags which + are not really part of the document and which should be parsed + as text, not tags. If you want to parse the text as tags, you can + always fetch it and parse it explicitly. + + * Tag nesting rules: + + Most tags can't be nested at all. For instance, the occurance of + a <p> tag should implicitly close the previous <p> tag. + + <p>Para1<p>Para2 + should be transformed into: + <p>Para1</p><p>Para2 + + Some tags can be nested arbitrarily. For instance, the occurance + of a <blockquote> tag should _not_ implicitly close the previous + <blockquote> tag. + + Alice said: <blockquote>Bob said: <blockquote>Blah + should NOT be transformed into: + Alice said: <blockquote>Bob said: </blockquote><blockquote>Blah + + Some tags can be nested, but the nesting is reset by the + interposition of other tags. For instance, a <tr> tag should + implicitly close the previous <tr> tag within the same <table>, + but not close a <tr> tag in another table. + + <table><tr>Blah<tr>Blah + should be transformed into: + <table><tr>Blah</tr><tr>Blah + but, + <tr>Blah<table><tr>Blah + should NOT be transformed into + <tr>Blah<table></tr><tr>Blah + + Differing assumptions about tag nesting rules are a major source + of problems with the BeautifulSoup class. If BeautifulSoup is not + treating as nestable a tag your page author treats as nestable, + try ICantBelieveItsBeautifulSoup, MinimalSoup, or + BeautifulStoneSoup before writing your own subclass.""" + + def __init__(self, *args, **kwargs): + if not kwargs.has_key('smartQuotesTo'): + kwargs['smartQuotesTo'] = self.HTML_ENTITIES + kwargs['isHTML'] = True + BeautifulStoneSoup.__init__(self, *args, **kwargs) + + SELF_CLOSING_TAGS = buildTagMap(None, + ('br' , 'hr', 'input', 'img', 'meta', + 'spacer', 'link', 'frame', 'base', 'col')) + + PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea']) + + QUOTE_TAGS = {'script' : None, 'textarea' : None} + + #According to the HTML standard, each of these inline tags can + #contain another tag of the same type. Furthermore, it's common + #to actually use these tags this way. + NESTABLE_INLINE_TAGS = ('span', 'font', 'q', 'object', 'bdo', 'sub', 'sup', + 'center') + + #According to the HTML standard, these block tags can contain + #another tag of the same type. Furthermore, it's common + #to actually use these tags this way. + NESTABLE_BLOCK_TAGS = ('blockquote', 'div', 'fieldset', 'ins', 'del') + + #Lists can contain other lists, but there are restrictions. + NESTABLE_LIST_TAGS = { 'ol' : [], + 'ul' : [], + 'li' : ['ul', 'ol'], + 'dl' : [], + 'dd' : ['dl'], + 'dt' : ['dl'] } + + #Tables can contain other tables, but there are restrictions. + NESTABLE_TABLE_TAGS = {'table' : [], + 'tr' : ['table', 'tbody', 'tfoot', 'thead'], + 'td' : ['tr'], + 'th' : ['tr'], + 'thead' : ['table'], + 'tbody' : ['table'], + 'tfoot' : ['table'], + } + + NON_NESTABLE_BLOCK_TAGS = ('address', 'form', 'p', 'pre') + + #If one of these tags is encountered, all tags up to the next tag of + #this type are popped. + RESET_NESTING_TAGS = buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript', + NON_NESTABLE_BLOCK_TAGS, + NESTABLE_LIST_TAGS, + NESTABLE_TABLE_TAGS) + + NESTABLE_TAGS = buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS, + NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS) + + # Used to detect the charset in a META tag; see start_meta + CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M) + + def start_meta(self, attrs): + """Beautiful Soup can detect a charset included in a META tag, + try to convert the document to that charset, and re-parse the + document from the beginning.""" + httpEquiv = None + contentType = None + contentTypeIndex = None + tagNeedsEncodingSubstitution = False + + for i in range(0, len(attrs)): + key, value = attrs[i] + key = key.lower() + if key == 'http-equiv': + httpEquiv = value + elif key == 'content': + contentType = value + contentTypeIndex = i + + if httpEquiv and contentType: # It's an interesting meta tag. + match = self.CHARSET_RE.search(contentType) + if match: + if (self.declaredHTMLEncoding is not None or + self.originalEncoding == self.fromEncoding): + # An HTML encoding was sniffed while converting + # the document to Unicode, or an HTML encoding was + # sniffed during a previous pass through the + # document, or an encoding was specified + # explicitly and it worked. Rewrite the meta tag. + def rewrite(match): + return match.group(1) + "%SOUP-ENCODING%" + newAttr = self.CHARSET_RE.sub(rewrite, contentType) + attrs[contentTypeIndex] = (attrs[contentTypeIndex][0], + newAttr) + tagNeedsEncodingSubstitution = True + else: + # This is our first pass through the document. + # Go through it again with the encoding information. + newCharset = match.group(3) + if newCharset and newCharset != self.originalEncoding: + self.declaredHTMLEncoding = newCharset + self._feed(self.declaredHTMLEncoding) + raise StopParsing + pass + tag = self.unknown_starttag("meta", attrs) + if tag and tagNeedsEncodingSubstitution: + tag.containsSubstitutions = True + +class StopParsing(Exception): + pass + +class ICantBelieveItsBeautifulSoup(BeautifulSoup): + + """The BeautifulSoup class is oriented towards skipping over + common HTML errors like unclosed tags. However, sometimes it makes + errors of its own. For instance, consider this fragment: + + <b>Foo<b>Bar</b></b> + + This is perfectly valid (if bizarre) HTML. However, the + BeautifulSoup class will implicitly close the first b tag when it + encounters the second 'b'. It will think the author wrote + "<b>Foo<b>Bar", and didn't close the first 'b' tag, because + there's no real-world reason to bold something that's already + bold. When it encounters '</b></b>' it will close two more 'b' + tags, for a grand total of three tags closed instead of two. This + can throw off the rest of your document structure. The same is + true of a number of other tags, listed below. + + It's much more common for someone to forget to close a 'b' tag + than to actually use nested 'b' tags, and the BeautifulSoup class + handles the common case. This class handles the not-co-common + case: where you can't believe someone wrote what they did, but + it's valid HTML and BeautifulSoup screwed up by assuming it + wouldn't be.""" + + I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \ + ('em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong', + 'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b', + 'big') + + I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ('noscript') + + NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS, + I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS, + I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS) + +class MinimalSoup(BeautifulSoup): + """The MinimalSoup class is for parsing HTML that contains + pathologically bad markup. It makes no assumptions about tag + nesting, but it does know which tags are self-closing, that + <script> tags contain Javascript and should not be parsed, that + META tags may contain encoding information, and so on. + + This also makes it better for subclassing than BeautifulStoneSoup + or BeautifulSoup.""" + + RESET_NESTING_TAGS = buildTagMap('noscript') + NESTABLE_TAGS = {} + +class BeautifulSOAP(BeautifulStoneSoup): + """This class will push a tag with only a single string child into + the tag's parent as an attribute. The attribute's name is the tag + name, and the value is the string child. An example should give + the flavor of the change: + + <foo><bar>baz</bar></foo> + => + <foo bar="baz"><bar>baz</bar></foo> + + You can then access fooTag['bar'] instead of fooTag.barTag.string. + + This is, of course, useful for scraping structures that tend to + use subelements instead of attributes, such as SOAP messages. Note + that it modifies its input, so don't print the modified version + out. + + I'm not sure how many people really want to use this class; let me + know if you do. Mainly I like the name.""" + + def popTag(self): + if len(self.tagStack) > 1: + tag = self.tagStack[-1] + parent = self.tagStack[-2] + parent._getAttrMap() + if (isinstance(tag, Tag) and len(tag.contents) == 1 and + isinstance(tag.contents[0], NavigableString) and + not parent.attrMap.has_key(tag.name)): + parent[tag.name] = tag.contents[0] + BeautifulStoneSoup.popTag(self) + +#Enterprise class names! It has come to our attention that some people +#think the names of the Beautiful Soup parser classes are too silly +#and "unprofessional" for use in enterprise screen-scraping. We feel +#your pain! For such-minded folk, the Beautiful Soup Consortium And +#All-Night Kosher Bakery recommends renaming this file to +#"RobustParser.py" (or, in cases of extreme enterprisiness, +#"RobustParserBeanInterface.class") and using the following +#enterprise-friendly class aliases: +class RobustXMLParser(BeautifulStoneSoup): + pass +class RobustHTMLParser(BeautifulSoup): + pass +class RobustWackAssHTMLParser(ICantBelieveItsBeautifulSoup): + pass +class RobustInsanelyWackAssHTMLParser(MinimalSoup): + pass +class SimplifyingSOAPParser(BeautifulSOAP): + pass + +###################################################### +# +# Bonus library: Unicode, Dammit +# +# This class forces XML data into a standard format (usually to UTF-8 +# or Unicode). It is heavily based on code from Mark Pilgrim's +# Universal Feed Parser. It does not rewrite the XML or HTML to +# reflect a new encoding: that happens in BeautifulStoneSoup.handle_pi +# (XML) and BeautifulSoup.start_meta (HTML). + +# Autodetects character encodings. +# Download from http://chardet.feedparser.org/ +try: + import chardet +# import chardet.constants +# chardet.constants._debug = 1 +except ImportError: + chardet = None + +# cjkcodecs and iconv_codec make Python know about more character encodings. +# Both are available from http://cjkpython.i18n.org/ +# They're built in if you use Python 2.4. +try: + import cjkcodecs.aliases +except ImportError: + pass +try: + import iconv_codec +except ImportError: + pass + +class UnicodeDammit: + """A class for detecting the encoding of a *ML document and + converting it to a Unicode string. If the source encoding is + windows-1252, can replace MS smart quotes with their HTML or XML + equivalents.""" + + # This dictionary maps commonly seen values for "charset" in HTML + # meta tags to the corresponding Python codec names. It only covers + # values that aren't in Python's aliases and can't be determined + # by the heuristics in find_codec. + CHARSET_ALIASES = { "macintosh" : "mac-roman", + "x-sjis" : "shift-jis" } + + def __init__(self, markup, overrideEncodings=[], + smartQuotesTo='xml', isHTML=False): + self.declaredHTMLEncoding = None + self.markup, documentEncoding, sniffedEncoding = \ + self._detectEncoding(markup, isHTML) + self.smartQuotesTo = smartQuotesTo + self.triedEncodings = [] + if markup == '' or isinstance(markup, unicode): + self.originalEncoding = None + self.unicode = unicode(markup) + return + + u = None + for proposedEncoding in overrideEncodings: + u = self._convertFrom(proposedEncoding) + if u: break + if not u: + for proposedEncoding in (documentEncoding, sniffedEncoding): + u = self._convertFrom(proposedEncoding) + if u: break + + # If no luck and we have auto-detection library, try that: + if not u and chardet and not isinstance(self.markup, unicode): + u = self._convertFrom(chardet.detect(self.markup)['encoding']) + + # As a last resort, try utf-8 and windows-1252: + if not u: + for proposed_encoding in ("utf-8", "windows-1252"): + u = self._convertFrom(proposed_encoding) + if u: break + + self.unicode = u + if not u: self.originalEncoding = None + + def _subMSChar(self, orig): + """Changes a MS smart quote character to an XML or HTML + entity.""" + sub = self.MS_CHARS.get(orig) + if isinstance(sub, tuple): + if self.smartQuotesTo == 'xml': + sub = '&#x%s;' % sub[1] + else: + sub = '&%s;' % sub[0] + return sub + + def _convertFrom(self, proposed): + proposed = self.find_codec(proposed) + if not proposed or proposed in self.triedEncodings: + return None + self.triedEncodings.append(proposed) + markup = self.markup + + # Convert smart quotes to HTML if coming from an encoding + # that might have them. + if self.smartQuotesTo and proposed.lower() in("windows-1252", + "iso-8859-1", + "iso-8859-2"): + markup = re.compile("([\x80-\x9f])").sub \ + (lambda(x): self._subMSChar(x.group(1)), + markup) + + try: + # print "Trying to convert document to %s" % proposed + u = self._toUnicode(markup, proposed) + self.markup = u + self.originalEncoding = proposed + except Exception, e: + # print "That didn't work!" + # print e + return None + #print "Correct encoding: %s" % proposed + return self.markup + + def _toUnicode(self, data, encoding): + '''Given a string and its encoding, decodes the string into Unicode. + %encoding is a string recognized by encodings.aliases''' + + # strip Byte Order Mark (if present) + if (len(data) >= 4) and (data[:2] == '\xfe\xff') \ + and (data[2:4] != '\x00\x00'): + encoding = 'utf-16be' + data = data[2:] + elif (len(data) >= 4) and (data[:2] == '\xff\xfe') \ + and (data[2:4] != '\x00\x00'): + encoding = 'utf-16le' + data = data[2:] + elif data[:3] == '\xef\xbb\xbf': + encoding = 'utf-8' + data = data[3:] + elif data[:4] == '\x00\x00\xfe\xff': + encoding = 'utf-32be' + data = data[4:] + elif data[:4] == '\xff\xfe\x00\x00': + encoding = 'utf-32le' + data = data[4:] + newdata = unicode(data, encoding) + return newdata + + def _detectEncoding(self, xml_data, isHTML=False): + """Given a document, tries to detect its XML encoding.""" + xml_encoding = sniffed_xml_encoding = None + try: + if xml_data[:4] == '\x4c\x6f\xa7\x94': + # EBCDIC + xml_data = self._ebcdic_to_ascii(xml_data) + elif xml_data[:4] == '\x00\x3c\x00\x3f': + # UTF-16BE + sniffed_xml_encoding = 'utf-16be' + xml_data = unicode(xml_data, 'utf-16be').encode('utf-8') + elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \ + and (xml_data[2:4] != '\x00\x00'): + # UTF-16BE with BOM + sniffed_xml_encoding = 'utf-16be' + xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8') + elif xml_data[:4] == '\x3c\x00\x3f\x00': + # UTF-16LE + sniffed_xml_encoding = 'utf-16le' + xml_data = unicode(xml_data, 'utf-16le').encode('utf-8') + elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \ + (xml_data[2:4] != '\x00\x00'): + # UTF-16LE with BOM + sniffed_xml_encoding = 'utf-16le' + xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8') + elif xml_data[:4] == '\x00\x00\x00\x3c': + # UTF-32BE + sniffed_xml_encoding = 'utf-32be' + xml_data = unicode(xml_data, 'utf-32be').encode('utf-8') + elif xml_data[:4] == '\x3c\x00\x00\x00': + # UTF-32LE + sniffed_xml_encoding = 'utf-32le' + xml_data = unicode(xml_data, 'utf-32le').encode('utf-8') + elif xml_data[:4] == '\x00\x00\xfe\xff': + # UTF-32BE with BOM + sniffed_xml_encoding = 'utf-32be' + xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8') + elif xml_data[:4] == '\xff\xfe\x00\x00': + # UTF-32LE with BOM + sniffed_xml_encoding = 'utf-32le' + xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8') + elif xml_data[:3] == '\xef\xbb\xbf': + # UTF-8 with BOM + sniffed_xml_encoding = 'utf-8' + xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8') + else: + sniffed_xml_encoding = 'ascii' + pass + except: + xml_encoding_match = None + xml_encoding_match = re.compile( + '^<\?.*encoding=[\'"](.*?)[\'"].*\?>').match(xml_data) + if not xml_encoding_match and isHTML: + regexp = re.compile('<\s*meta[^>]+charset=([^>]*?)[;\'">]', re.I) + xml_encoding_match = regexp.search(xml_data) + if xml_encoding_match is not None: + xml_encoding = xml_encoding_match.groups()[0].lower() + if isHTML: + self.declaredHTMLEncoding = xml_encoding + if sniffed_xml_encoding and \ + (xml_encoding in ('iso-10646-ucs-2', 'ucs-2', 'csunicode', + 'iso-10646-ucs-4', 'ucs-4', 'csucs4', + 'utf-16', 'utf-32', 'utf_16', 'utf_32', + 'utf16', 'u16')): + xml_encoding = sniffed_xml_encoding + return xml_data, xml_encoding, sniffed_xml_encoding + + + def find_codec(self, charset): + return self._codec(self.CHARSET_ALIASES.get(charset, charset)) \ + or (charset and self._codec(charset.replace("-", ""))) \ + or (charset and self._codec(charset.replace("-", "_"))) \ + or charset + + def _codec(self, charset): + if not charset: return charset + codec = None + try: + codecs.lookup(charset) + codec = charset + except (LookupError, ValueError): + pass + return codec + + EBCDIC_TO_ASCII_MAP = None + def _ebcdic_to_ascii(self, s): + c = self.__class__ + if not c.EBCDIC_TO_ASCII_MAP: + emap = (0,1,2,3,156,9,134,127,151,141,142,11,12,13,14,15, + 16,17,18,19,157,133,8,135,24,25,146,143,28,29,30,31, + 128,129,130,131,132,10,23,27,136,137,138,139,140,5,6,7, + 144,145,22,147,148,149,150,4,152,153,154,155,20,21,158,26, + 32,160,161,162,163,164,165,166,167,168,91,46,60,40,43,33, + 38,169,170,171,172,173,174,175,176,177,93,36,42,41,59,94, + 45,47,178,179,180,181,182,183,184,185,124,44,37,95,62,63, + 186,187,188,189,190,191,192,193,194,96,58,35,64,39,61,34, + 195,97,98,99,100,101,102,103,104,105,196,197,198,199,200, + 201,202,106,107,108,109,110,111,112,113,114,203,204,205, + 206,207,208,209,126,115,116,117,118,119,120,121,122,210, + 211,212,213,214,215,216,217,218,219,220,221,222,223,224, + 225,226,227,228,229,230,231,123,65,66,67,68,69,70,71,72, + 73,232,233,234,235,236,237,125,74,75,76,77,78,79,80,81, + 82,238,239,240,241,242,243,92,159,83,84,85,86,87,88,89, + 90,244,245,246,247,248,249,48,49,50,51,52,53,54,55,56,57, + 250,251,252,253,254,255) + import string + c.EBCDIC_TO_ASCII_MAP = string.maketrans( \ + ''.join(map(chr, range(256))), ''.join(map(chr, emap))) + return s.translate(c.EBCDIC_TO_ASCII_MAP) + + MS_CHARS = { '\x80' : ('euro', '20AC'), + '\x81' : ' ', + '\x82' : ('sbquo', '201A'), + '\x83' : ('fnof', '192'), + '\x84' : ('bdquo', '201E'), + '\x85' : ('hellip', '2026'), + '\x86' : ('dagger', '2020'), + '\x87' : ('Dagger', '2021'), + '\x88' : ('circ', '2C6'), + '\x89' : ('permil', '2030'), + '\x8A' : ('Scaron', '160'), + '\x8B' : ('lsaquo', '2039'), + '\x8C' : ('OElig', '152'), + '\x8D' : '?', + '\x8E' : ('#x17D', '17D'), + '\x8F' : '?', + '\x90' : '?', + '\x91' : ('lsquo', '2018'), + '\x92' : ('rsquo', '2019'), + '\x93' : ('ldquo', '201C'), + '\x94' : ('rdquo', '201D'), + '\x95' : ('bull', '2022'), + '\x96' : ('ndash', '2013'), + '\x97' : ('mdash', '2014'), + '\x98' : ('tilde', '2DC'), + '\x99' : ('trade', '2122'), + '\x9a' : ('scaron', '161'), + '\x9b' : ('rsaquo', '203A'), + '\x9c' : ('oelig', '153'), + '\x9d' : '?', + '\x9e' : ('#x17E', '17E'), + '\x9f' : ('Yuml', ''),} + +####################################################################### + + +#By default, act as an HTML pretty-printer. +if __name__ == '__main__': + import sys + soup = BeautifulSoup(sys.stdin) + print soup.prettify() @@ -0,0 +1,62 @@ +The Build directory contains scripts which are used to prepare milestones and activities from the original +Siyavula content. It also contains scripts to generate 'basic' and 'Karma' activities from the source 'content' folder. + +The scripts assume the following directory structure: + +/library/ +/library/courseware +/library/rwcd +/library/rwcd/git +/library/rwcd/git/content +/library/rwcd/git/build +/library/rwcd/Siyavula +/library/rwcd/siyavula_science +/library/rwcd/siyavula_technology +/library/rwcd/siyavula_mathematics +/library/rwcd/siyavula_english +/library/rwcd/master +/library/rwcd/trial + +Conversion of Siyavula Learner's Modules to 'basic' activities: + +(Assume conversion of Mathematics modules for P1) + +1. In siyavula_english, create folders for each course: e.g. za1. +2. The original files are in the Siyavula folder: Ed Gd 1/Mathematics/Learner's Modules as MS Word documents. + Some of these modules are equivalent to a milestone. Others have enough material for multiple milestones. These + modules are often divided into Learning Units. These modules represent the course work for a complete year + (30 weeks). A typical milestone should contain enough material (activities) to require a student from one to + two weeks to complete. This means a year's course should have between 15 and 30 milestones. In Rwanda, the + year's course may be divided into three terms, each of which has from 5 to 10 milestones (typically 7-8). +3. In this case there are eight modules (.doc files) containing Create a folder in za1 for each milestone, + e.g. map1m1, map1m2, .... The current conversion in this case each module is converted directly to a milestone. + Copy each module (.doc file) into the corresponding folder in siyavula_mathematics (map1m1 etc.). + At this point you should have eight folders in siyavula_english each containing a single .doc file. +4. Open the .doc file in Open Office (Write). Save it as html (Save As and then select html format at the bottom + right of the pop-up window.). At this point the folder will contain the original .doc file, a .html file with the + same name and the images for the module (.png and .gif). +5. Look at the original doc file or at a printed copy. Decide where the module should be divided into activities. This + is art not science. There are several considerations in making this decision. First, the content is intended to be + printed on A4 paper. The html will be displayed on the OLPC laptop screen. Therefore an activity should fit on one + or more screens. Second, an activity should be a single task for the student. The student must be able to understand + clearly what he/she is to do and when he or she has finished. Students in Rwanda will normally have a forty minute + period each day to work on the course. If a typical student can finish the milestone in seven periods and the + milestone has twelve activities, a typical activity would require about twenty-five minutes to complete. So the + process to identify activities within a milestone is to look at the original Siyavula module and decide how to divide + it into tasks. Some modules have activities or assignments or a list (A., B., C.) to help identify activities). +6. Edit the .html file for the module in gedit or notepad. Enter a horizontal rule tag (<hr />) in the text to show + the separation between activities. +6. In the command line, run the .cleanUp.py script (./cleanUp.py English za1 map1m1). The cleanUp.py script will clean + up the html and write a file: source.txt in the folder. This script operates on a single milestone and should be run + for each milestone to be converted. The source.txt file includes the <hr /> tags entered in the previous step. +7. In the command line, run the .cvtSiyavula.py script (./cleanup.py English za1). Note: The cvtSiyavula script + operates on all of the milestones in a course. The cvtSiyavula script creates folders in /library/rwcd/trial with + the top level named for the subject (siyavula_mathematics). In this folder, there is a folder per milestone. In the + milestone folder, is a folder per activity. The cvtSiyavula script renames image files as img1.png, img2.png, .... + These images are resized to the height and width specified in the html <img> tag. Images in .gif format are + converted to .png format retaining the size of the original. +8. Copy the folders for the course (za1) to the content folder replacing the one already there, if any. In this case, + the /library/git/content/Mathematics folder. +9. In general, this conversion will be done only once for a module. Beyond that point, the content will be edited for + the OLPC laptop, to meet Rwanda curriculum requirements, and to improve the lesson. If the conversion process is + repeated, those edits will be lost. diff --git a/cleanUp.py b/cleanUp.py new file mode 100755 index 0000000..a9dc662 --- /dev/null +++ b/cleanUp.py @@ -0,0 +1,60 @@ +#!/usr/bin/python +#clean up tags in html conversion of Siyavula module +#write cleaned up version as source.txt in source folder +from path import path +import os, sys, subprocess +from BeautifulSoup import BeautifulSoup +from optparse import OptionParser +from cvtFile import Cvt + +SOURCE = path('../') + +def makesoup(txtin): + soup = BeautifulSoup(txtin,fromEncoding="utf-8") + return soup + +#use BeautifulSoup to clean up tags +def applyFix(soup): + #remove <head> + try: + soup.head.extract() + except: + pass + #perform conversions + cvt = Cvt(soup) + for method in cvt.processlist: + cvt.methods[method]() + #set up txt + txtout = cvt.soup.prettify() + #return + return txtout + +parser = OptionParser(usage="Usage: %prog [options] file") +(options, args) = parser.parse_args() +if not args: + print 'Specify a folder and module (e.g. Z4 z4m1 arguments.' + parser.print_help() + sys.exit(1) + +SUBJECT = args[0] +COURSE = args[1] +MODULE = args[2] +tag = 'page-break-before: always">' +basepath = SOURCE / SUBJECT / COURSE / MODULE +fin = open(basepath / MODULE + '.html','r') +txt = fin.read() +fin.close() +txt = txt.replace('\n',' ') +txtout = '' +while txt.find(tag) > -1: + pos = txt.find(tag) + txtin = txt[:pos+len(tag)] + txt = txt[pos+len(tag):] + if txtout: + txtout += '\n\n<hr />\n\n' + #set up soup + soup = makesoup(txtin) + txtout += applyFix(soup) +fout = open(basepath / 'source.txt','w') +fout.write(txtout) +fout.close diff --git a/cvtFile.py b/cvtFile.py new file mode 100755 index 0000000..f70881a --- /dev/null +++ b/cvtFile.py @@ -0,0 +1,185 @@ +#!/usr/bin/python +""" +This version acts as cgi script, applying selected conversions +to a file supplied in the form + +get form parameters +make soup +apply conversions +make txt +return +""" +import os, sys +import logging +from BeautifulSoup import BeautifulSoup, Tag, NavigableString, Comment +from path import path +import subprocess +from PIL import Image + +class Cvt(): + def __init__(self, soup): + self.soup = soup + self.logfile = open('logfile', 'w') + self.kstrongs = 0 + self.kfonts = 0 + self.kspans = 0 + #methods + #method_2 - remove style attributes from table tags + #method_3 - remove style attributes from td tags + #method_4 - remove style attributes from p tags + #method_5 - remove style attributes from tr tags + #method_6 - remove col tags + #method_9 - remove fonts retaining content + #method_10 - remove empty p tags (string == ' ') + #method_11 - remove spans retaining content + self.processlist = [2, 3, 4, 5, 6, 7, 9, 10, 11, 12] + self.iterlist = [9, 11, 12] + self.methods = { 1:self.method_1, 2:self.method_2, 3:self.method_3, 4:self.method_4, 5:self.method_5, + 6:self.method_6, 7:self.method_7, 8:self.method_8, 9:self.method_9, 10:self.method_10, + 11:self.method_11, 12:self.method_12, + } + + def close_logfile(self): + self.logfile.close() + + def method_1(self): + link = Tag(self.soup, 'link') + link['rel']="StyleSheet" + link['type']="text/css" + link['href']="../../css/activity.css" + meta = self.soup.find('meta') + meta.insert(0,link) + + #remove style attributes from table tags + def method_2(self): + tblkeys = ['width', 'border', 'bordercolor', 'cellpadding', 'cellspacing', 'frame', 'rules', 'dir'] + tbls = self.soup.findAll('table') + for tbl in tbls: + for key in tblkeys: + try: + del tbl[key] + except: + pass + + #remove style attributes from td tags + def method_3(self): + tdkeys = ['width', 'height', 'bgcolor', 'valign'] + tds = self.soup.findAll('td') + for td in tds: + for key in tdkeys: + try: + del td[key] + except: + pass + + #remove style attributes from p tags + def method_4(self): + pkeys=['lang', 'align', 'style', 'class'] + ps = self.soup.findAll('p') + for p in ps: + for key in pkeys: + centerflag = False + try: + if 'head' in p['class']: + centerflag = True + except: + pass + try: + del p[key] + except: + pass + if centerflag: + p['class'] = 'center' + + #remove style attributes from tr tags + def method_5(self): + pkeys=['lang', 'align', 'style', 'class', 'valign'] + ps = self.soup.findAll('tr') + for p in ps: + for key in pkeys: + try: + del p[key] + except: + pass + + #remove col tags + def method_6(self): + cols = self.soup.findAll('col') + for col in cols: + col.extract() + + #remove attributes from span tags + def method_7(self): + pkeys=['lang', 'align', 'style', 'class'] + ps = self.soup.findAll('span') + for p in ps: + for key in pkeys: + try: + del p[key] + except: + pass + + + def method_8(self): + ps = self.soup.findAll('p') + for p in ps: + bs = p.findAll('b') + for b in bs: + try: + if 'LO' in b.string: + if p.findParents('h1'): + h1=p.findParent('h1') + h1.replaceWith('<p class="solid">' + str(b) + '</p>') + else: + p.replaceWith('<p class="solid">' + str(b) + '</p>') + except: + print 'b string not found', b.prettify() + + #remove font tags retaining content + def method_9(self): + while self.soup.font: + if len(self.soup.font.contents) > 1: + self.soup.font.replaceWith(self.soup.font.contents[1]) + elif len(self.soup.font.contents) > 0: + self.soup.font.replaceWith(self.soup.font.contents[0]) + else: + self.soup.font.extract() + + #remove empty paragraphs (used for spacing) + def method_10(self): + tags = self.soup.findAll('br') + for tag in tags: + if len(tag) == 0: + tag.extract() + tags = self.soup.findAll('p') + for tag in tags: + if len(tag)==0: + tag.extract() + if tag.string: + str = tag.string.strip() + if not str: + tag.extract() + + #remove spans (replace with p tags) retaining content + def method_11(self): + while self.soup.span: + if len(self.soup.span.contents)>1: + self.soup.span.replaceWith(self.soup.span.contents[1]) + elif len(self.soup.span.contents)>0: + self.soup.span.replaceWith(self.soup.span.contents[0]) + else: + self.soup.span.extract() + + #remove strong tags retaining content + def method_12(self): + while self.soup.strong: + if len(self.soup.strong.contents)>1: + self.soup.strong.replaceWith(self.soup.strong.contents[1]) + elif len(self.soup.strong.contents)>0: + self.soup.string.replaceWith(self.soup.strong.contents[0]) + else: + self.soup.strong.extract() + +def makesoup(txtin): + soup = BeautifulSoup(txtin) + return soup diff --git a/cvtSiyavula.py b/cvtSiyavula.py new file mode 100755 index 0000000..6a6db14 --- /dev/null +++ b/cvtSiyavula.py @@ -0,0 +1,157 @@ +#!/usr/bin/python +#create milestone of basic activities from Siyavula folder +#source folder has text files a01..a99.txt representing individual activities after conversion from +#doc format. Folder also has all of the images for the milestone. +# +#for each folder in SOURCE create a milestone folder in TARGET +#for each a*.txt file in SOURCE folder create an activity folder in TARGET / milestone +#also add an entry in activities.js +#copy the a*.txt file to the activity folder as source.txt +#for each image tag in a*.txt, replace the tag with a comment <!--In_imgn.png height xxx width xxx left--> +#copy the source image to the activity folder renaming it imgn (where n is the number of the image in this activity) +from path import path +from PIL import Image +import subprocess +import sys +from optparse import OptionParser + +SOURCE = path('../') +TARGET = path('../trial/') + +def getField(element, tag): + pos1 = element.find(tag) + if pos1 < 0: + return '' + pos2 = element[pos1:].find(' ') + if pos2 < 0: + pos2 = len(element) + field = element[pos1+len(tag):pos1+pos2].replace('"','') + if tag == 'src=': + pos1 = field.find('html_') + field = '*' + field[pos1+len('html_'):] + return field + +def parseElement(count, element, folder, activityName): + # <img src="Math_Gr1_m1_NUMBER_FUN_html_m3e0e7122.png" border="1" alt="" hspace="12" width="100" height="115" align="LEFT" /> + #need to build comment + tgtimg = 'img' + str(count) + '.png' + tgtpth = TARGET / folder /activityName / tgtimg + searchImg = getField(element, 'src=') + h = getField(element,'height=') + w = getField(element,'width=') + if len(h) < 1 or len(w) < 1: + print element, 'h:',h,'w:',w + align = getField(element,'align=') + comment = '<!--I' + str(count) +'_' + tgtimg + ' height ' + h + ' width ' + w + ' ' + align.lower() + '-->' + srcpth = SOURCE / folder + files = srcpth.files(searchImg) + srcimg = files[0].name + try: + im = Image.open(srcpth / srcimg) + try: + size = (int(w), int(h)) + imt = im.resize(size,Image.ANTIALIAS) + imt.save(tgtpth) + except: + im.save(tgtpth,'png') + except IOError: + print 'cannot create thumbnail for', tgtpth, srcpth/srcimg + return comment + +def processImages(txt, folder, activityName): + tag = '<img ' + endTag = '>' + count = 0 + while tag in txt: + count += 1 + start = txt.find(tag) + end = start + txt[start:].find(endTag) + element = txt[start+len(tag):end] + comment = parseElement(count, element, folder, activityName) + txt = txt[:start]+comment+txt[end+len(endTag):] + return txt + +#get command line options +parser = OptionParser(usage="Usage: %prog [options] file") +(options, args) = parser.parse_args() +if not args: + print 'Specify a course (e.g. zs4) as argument.' + parser.print_help() + sys.exit(1) + +SUBJECT = path(args[0]) +COURSE = path(args[1]) +if SUBJECT == 'siyavula_science': + sbj = 'sci' +elif SUBJECT == 'siyavula_technology': + sbj = 'tek' +elif SUBJECT == 'siyavula_mathematics': + sbj = 'ma' +elif SUBJECT == 'siyavula_english': + sbj = 'en' +else: + print 'subject not known' + sys.exit() + +srcpth = SOURCE / SUBJECT / COURSE +tgtpth = TARGET / SUBJECT / COURSE +print 'pths', srcpth, tgtpth +#create clean output folder +subprocess.call('rm -rf ' + tgtpth, shell=True) +subprocess.call('mkdir -p ' + tgtpth, shell=True) +milestones = srcpth.dirs() +milestones.sort() +ms_entries = [] +mcount = 0 +for milestone in milestones: + subprocess.call('mkdir ' + tgtpth / milestone.namebase, shell=True) + fin = open(srcpth / milestone.namebase / 'source.txt', 'r') + txt = fin.read() + fin.close() + entry = ['0',str(mcount),sbj,'milestone',milestone.namebase,'cyan'] + ms_entries.append(entry) + mcount += 1 + acount = 0 + entries = [] + tag = '<hr />' + done = False + txtout = '' + while not done: + acount += 1 + acnt = str(acount) + if len(acnt) < 2: + acnt = '0' + acnt + activityName = milestone.namebase + 'a' + acnt + entry = [str(acount),str(acount),'Technology','basic',activityName,'blue'] + entries.append(entry) + tgt = tgtpth / milestone.namebase / activityName + print 'tgt', tgt + subprocess.call('mkdir ' + tgt, shell=True) + pos = txt.find(tag) + txtin = txt[:pos] + txt = txt[pos+len(tag):] + folder = SUBJECT / COURSE / milestone.namebase + txtout = processImages(txtin, folder, activityName) + fout = open(tgt / 'source.txt','w') + fout.write(txtout) + fout.close() + txtout = '' + if not tag in txt: + done = True + #write out activities.js + txtout = 'var activities = [\n' + for entry in entries: + txtout += str(entry) + ',\n' + txtout += ']\n\n' + fout=open(TARGET / folder / 'activities.js','w') + fout.write(txtout) + fout.close() +#also write out milestones.js +txtout = 'var activities = [\n' +for entry in ms_entries: + txtout += str(entry) + ',\n' +txtout += ']\n\n' +fout = open(TARGET / SUBJECT / COURSE / 'milestones.js','w') +fout.write(txtout) +fout.close() + diff --git a/generate b/generate new file mode 100755 index 0000000..a240300 --- /dev/null +++ b/generate @@ -0,0 +1,150 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +#this program generates activities for a course based on milestones.js and activities.js +#it calls different generators depending on the activity type: +#currently Karma, basic, Sugar or EPaath (Flash) + +import sys, subprocess +from optparse import OptionParser +from path import path +from generate_basic_lesson import generateBasicLesson +from generate_karma_lesson import process_description +from generate_karma_lesson import KarmaFramework +# +SOURCE = path('/home/tony/Desktop/git/newcontent') +#SOURCE = path('/home/tony/testcontent') +TARGET = path('/home/tony/Desktop/master') +#TARGET = path('/home/tony/testmaster') +MENUS = path('/home/tony/Desktop/git/generate/menus') + +def copyFiles(src, dst, files): + for file in files: + srcpth = path(src) / file + dstpth = path(dst) / file + rmcmd = 'rm -rf ' + dstpth + cpcmd = 'cp -r ' + srcpth + ' ' + dstpth + if srcpth.exists(): + if dstpth.exists(): + subprocess.call(rmcmd, shell=True) + subprocess.call(cpcmd, shell=True) + +def get_entries(srcpth): + fin = open(srcpth,'r') + txt = fin.read() + fin.close() + lines = txt.split('\n') + entries = [] + for line in lines: + try: + entry = eval(line)[0] + except: + continue + if len(entry) < 3: + entry = eval(line) + entries.append(entry) + return entries + +parser = OptionParser(usage="Usage: %prog [options] file") +(options, args) = parser.parse_args() +if not args: + print 'Specify a course (e.g. enp411 as an argument.' + parser.print_help() + sys.exit(1) + +SUBJECT = args[0] +COURSE = args[1].lower() +#update version +cmd = 'git shortlog' +pipe = 'subprocess.PIPE' +pid = subprocess.Popen(cmd, cwd=SOURCE, stdout = subprocess.PIPE, stderr = subprocess.PIPE, shell=True) +pid.wait() +(result,err) = pid.communicate() +lines = result.split('\n') +version = lines[len(lines)-3].strip() +files = SOURCE.files('version*') +for file in files: + subprocess.call('rm ' + file,shell=True) +files = TARGET.files('version*') +for file in files: + subprocess.call('rm ' + file,shell=True) +fout = open(SOURCE / version,'w') +fout.write('') +fout.close() +cmd = 'cp ' + SOURCE / version + ' ' + TARGET +print cmd +subprocess.call(cmd, shell=True) +print 'reset basic files' +#reset basic files +cmd = 'cp ' + MENUS / 'subject.html ' + TARGET / 'index.html' +subprocess.call(cmd, shell=True) +fileList = ['subjects.js', 'karma'] +copyFiles(SOURCE, TARGET, fileList) +subprocess.call('cp ' + SOURCE / 'version-* ' + TARGET,shell=True) +#setup subjects +subjects = get_entries(SOURCE / 'subjects.js') +for entry in subjects: + subject = entry[1] + subprocess.call('mkdir -p ' + TARGET /subject, shell=True) + #copy icons + cmd = 'cp ' + SOURCE / subject / subject.lower() + '.png ' + TARGET / subject + subprocess.call(cmd, shell=True) + if subject == 'Library': + continue + #copy index.html + cmd = 'cp ' + MENUS / 'course.html ' + TARGET / subject / 'index.html' + subprocess.call(cmd, shell=True) + #copy courses.js + subprocess.call('cp ' + SOURCE / subject / 'courses.js ' + TARGET / subject, shell=True) +#create folder in TARGET (master) for SUBJECT COURSE +spth = SOURCE / SUBJECT / COURSE +tpth = TARGET / SUBJECT / COURSE +subprocess.call('rm -rf ' + tpth + '/*',shell=True) +subprocess.call('mkdir -p ' + tpth, shell=True) +#copy milestones.js to dst +subprocess.call('cp ' + spth / 'milestones.js ' + tpth, shell=True) +#copy index.html to dst +subprocess.call('cp ' + MENUS / 'milestone.html ' + tpth / 'index.html',shell=True) +#make list of milestones from milestones.js +milestones = get_entries(spth / 'milestones.js') +print 'milestones',len(milestones) +count = 0 +for entry in milestones: + milestone = entry[4] + dstms = tpth / milestone + srcms = spth / milestone + #create target milestone folder + try: + subprocess.call('rm -rf ' + dstms, shell=True) + subprocess.call('mkdir ' + dstms, shell=True) + except: + print 'make',dstms,'failed',sys.exc_info()[:2] + #copy activities.js to activity folder + subprocess.call('cp '+ srcms / 'activities.js ' + dstms,shell=True) + #copy index.html to activity folder + cmd = 'cp ' + MENUS / 'activity.html ' + dstms / 'index.html' + subprocess.call(cmd ,shell=True) + #get list of activities in milestone + activities = get_entries(srcms / 'activities.js') + for entry in activities: + activity = entry[4] + activity_type = entry[3] + print 'milestone',milestone,'activity',activity,'activity_type', activity_type + #generate activities + src = srcms / activity + dst = dstms / activity + act = path('content') / SUBJECT / COURSE / milestone / activity + karmapth = SOURCE / 'karma' + subprocess.call('mkdir ' + dst,shell=True) + #switch based on activity type + if activity_type == 'basic': + generateBasicLesson(act, src,dst) + elif activity_type == 'Karma': + karma = KarmaFramework(karmapth) + process_description(karma,src / 'description.py', dst) + elif activity_type == 'EPaath': + generate-EPaath-lesson(src,dst) + else: #copy activity to master + subprocess.call('cp -r ' + src + '/* ' + dst, shell=True) + + + diff --git a/generate_basic_lesson.py b/generate_basic_lesson.py new file mode 100755 index 0000000..8876c3f --- /dev/null +++ b/generate_basic_lesson.py @@ -0,0 +1,407 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +import sys, subprocess +from path import path + +MENUS = path('/home/tony/Desktop/git/generate/menus') +# +def getTag(txt, tag): + start = txt.find(tag)+len(tag)+1 + end = txt[start:].find(' ')+start + if end < start: + end = len(txt) + return txt[start:end] + +def getImageData(image): + #parse image:format: 1_yyyy.png height xxx width yyy left + #get image number + end = image.find('_') + imgno = image[:end] + #get image name + start = image.find('_') + end = image.find(' ') + name = image[start+1:end] + #get height + height = getTag(image, 'height') + width = getTag(image, 'width') + if 'left' in image: + ifloat = 'left' + elif 'right'in image: + ifloat = 'right' + else: + ifloat = None + return (imgno, name, height, width, ifloat) + +def makeQuiz(screen): + #scan for quiz comments + tag = '<!--Q' + endTag = '-->' + quiz = [] + if tag+endTag in screen: + return True, quiz + while tag in screen: + #comment = <!--Q:'question' A:'answer'--> + start = screen.find(tag) + end = screen.find(endTag) + question = screen[start+4:end] + if question: + quiz.append(question) + screen = screen[end+len(endTag):] + return False, quiz + + +def findImages(screen): + #process imagelist + imageList = [] + tag = '<!--I' + endTag = '-->' + while tag in screen: + start = screen.find(tag) + end = screen.find(endTag) + comment = screen[start+len(tag):end] + if len(comment) > 0: + imageList.append(comment) + screen = screen[end+len(endTag):] + return imageList + + +def findAudio(screen): + #process audiolist + audioList = [] + tag = '<!--' + tags = "ARSXH" + endTag = '-->' + while tag in screen: + start = screen.find(tag) + end = screen.find(endTag) + comment = screen[start+len(tag):end] + if len(comment) > 0 and comment[0] in tags: + audioList.append(comment) + screen = screen[end+len(endTag):] + return audioList + +def generate_image(imageList): + #format: nn_img1.png height:xxx width:yyy left + txtout = '' + for line in imageList: + (imgno, name, height, width, ifloat) = getImageData(line) + real = path(name).namebase + txtout = txtout + " $('#I" + imgno + "')\n" + if ifloat: + txtout = txtout + " .addClass('image_"+ifloat+"')\n" + #txtout = txtout + " .append(karma.createImg('" + real + "'))\n" + txtout += " $('<img>',{\n" + txtout += " src:host+pth+'/"+name+"'\n })\n .appendTo('#I"+imgno+"')\n" + return txtout + +def generate_load(pth): + lesson = '' + lesson = lesson + " $('<div id=" + lesson = lesson + '"txtMain"/>' + lesson = lesson + "')\n" + lesson = lesson + " .appendTo('#content')\n" + lesson = lesson + " .load(host+'cgi-bin/getFile.py',\n" + if screen == 0: + lesson = lesson + " {'filename':" + pth + "/a.txt'},\n" + else: + lesson = lesson + " {'filename':" + pth + "/a" + str(screen) + ".txt'},\n" + lesson = lesson + " function(){\n" + return lesson + +def generate_audio(audiolist): + #format A1_clip.ogg or R1_clip.ogg or X1_clip.ogg or S1_clip.txt or H1_clip.ogg + txtout = '' + for line in audiolist: + typ = line[0] + pos = line.find('_') + clip = line[pos+1:] + if typ == 'A': + txtout += " playAudio(pth+'"+clip+"')\n" + elif typ == 'X': + txtout += " playXoAudio(pth+'"+clip+"')\n" + elif typ == 'H': + txtout += " playHelp(pth+'"+clip+"')\n" + elif typ == 'S' and line[1] != 'G': + txtout += " sayText(pth+'"+clip+"')\n" + elif typ == 'R': + txtout += " recordAudio(pth+'"+clip+"')\n" + return txtout + +def generateLessonCSS(imageList): + txtout = '' + for image in imageList: + (imgno, name, height, width, ifloat) = getImageData(image) + insert = "#I"+imgno+"{height:"+height+"px; width:"+width+"px; }\n\n" + txtout += insert + return txtout + +def generateLessonKarma(imageList): + txtout = 'function lesson_karma(){\n return Karma({\n image:[\n' + for image in imageList: + (imgno, name, height, width, float) = getImageData(image) + txtout += "{name:'"+path(name).namebase+"', file:'"+name+"'},\n" + txtout += ' ],\n audio:[\n ]})};\n' + return txtout + +def generateQuiz(activity, quizList): + for i in range(len(quizList)): + quiz = quizList[i] + if len(quizList) == 1: + txtout = 'var quiz = {\n' + else: + txtout = 'var quiz'+str(i+1)+' = {\n' + tflist = [] + mclist = [] + salist = [] + #we need to make a list of the questions by type: multilist, tf, fill + #note: we should have cloze and sa should accept variant answers (e.g a/b where a, b are regular expressions + for question in quiz: + if "S:" in question: + mclist.append(question) + elif "T:" in question or 'F:' in question: + tflist.append(question) + else: + salist.append(question) + if mclist: + txtout += ' multiList:[\n' + for q in mclist: + #process question and add to txtout + apos = q.find('A:') + spos = q.find('S:') + ans = 'ans: ' + q[apos+2:spos] + ', ' + sel = 'ansSel: [' + q[spos+2:] + ']' + ques = '{ques: ' + q[:apos] + ', ' + txtout += ques + ans + sel + '},\n' + txtout += '],\n' + if tflist: + txtout += ' tf:[\n' + for q in tflist: + #process questionstring and add to quiztxt + apos1 = q.find('T:') + apos2 = q.find('F:') + ans = 'ans:' + q[apos1+2:apos2] + ', ' + sel = 'ansSel:' + q[apos2+2:] + ques = '{ques: ' + q[:apos1] + ', ' + txtout += ques + ans + sel + '},\n' + txtout += '],\n' + if salist: + txtout += ' fill:[\n' + for q in salist: + #process question string and add to quiztxt + apos = q.find('A:') + ans = 'ans: ' + q[apos + 2:] + ques = '{ques: ' + q[2:apos] + ',' + txtout += ques + ans + '},\n' + txtout += ' ]\n' + txtout += '};\n\n' + txtout += 'var options = {\n' + txtout += " title: '" + activity.namebase + "',\n" + txtout += ' random: false,\n' + txtout += ' allrandom: false,\n' + txtout += '};\n' + return txtout + +def generateLesson(srcpth, pth, screens, imageList, audioList, quiz): + #generate lesson.js + tag = '<!--SG-->' + txtout = '' + lessonpth = srcpth / 'lesson.js' + if lessonpth.exists(): + fin = open(lessonpth, 'r') + txtout += fin.read() + txtout += '\n' + fin.close() + if len(screens) == 1: + #single screen + txtout += 'function initialize(karma) {\n' + txtout += " host = 'http://localhost:8008/'\n" + txtout += " pth='"+pth+"/'\n" + txtout += " if(mode=='Faculty'){\n $('#ĺinkEdit').addClass('linkEdit');\n" + txtout += " }else{\n" + txtout += " $('#linkApply')\n .addClass('linkApply')\n .attr('score','70');\n }\n" + txtout += " $('<div id=" + txtout += '"txtMain"/>' + txtout += "')\n" + txtout += " .appendTo('#content')\n" + txtout += " .load(host+'cgi-bin/getFile.py',\n" + txtout += " {'filename':pth+'a.txt'},\n" + txtout += " function(){\n" + if imageList and imageList[0]: + txtout += generate_image(imageList[0]) + if audioList and audioList[0]: + txtout += generate_audio(audioList[0]) + txtout += ' });\n' + if tag in screens[0]: + txtout += " $('#linkStart')\n" + txtout += " .addClass('linkStart')\n" + txtout += " .click(function(){startGame(karma);\n" + txtout += " });\n" + txtout += '};\n\n' + if not 'startGame' in txtout: + txtout += 'function startGame(karma) {\n' + if quiz: + txtout += " $('#linkStart').addClass('linkStart');\n" + txtout += " $('<div id=" + '"quizArea"/>' + "')\n" + txtout += ' .appendTo("#content")\n' + txtout += ' $("#quizArea")\n' + txtout += ' .jQuizMe(quiz, options)\n' + txtout += '};\n\n' + txtout += 'setUpLesson(initialize, startGame);\n' + else: + #multiple screen + txtout += 'var currentScreen;\n\n' + for i in range(len(screens)): + screen = i+1 + txtout += 'function generateScreen'+str(screen)+'(karma) {\n' + txtout += ' currentScreen = ' + str(i+1) + '\n' + txtout += " host = 'http://localhost:8008/'\n" + txtout += " pth='"+pth+"/'\n" + txtout += " if(mode=='Faculty'){\n $('#ĺinkEdit').addClass('linkEdit');\n" + txtout += " }else{\n" + txtout += " $('#linkApply')\n .addClass('linkApply')\n .attr('score','70');\n }\n" + txtout += " $('<div id=" + txtout += '"txtMain"/>' + txtout += "')\n" + txtout += " .appendTo('#content')\n" + txtout += " .load(host+'cgi-bin/getFile.py',\n" + txtout += " {'filename':pth+'a" + str(screen) + ".txt'},\n" + txtout += " function(){\n" + if imageList[i]: + txtout += generate_image(imageList[i]) + txtout += ' });\n' + if audioList[i]: + txtout += generate_audio(audioList[i]) + if tag in screens[i]: + txtout += " $('#linkStart')\n" + txtout += " .addClass('linkStart')\n" + txtout += " .click(function(){startGame(karma);});\n" + txtout += '};\n' + txtout += '\n' + txtout += 'function initialize() {};\n\n' + if not 'startGame' in txtout: + txtout += 'function startGame(karma){\n' + if quiz: + if not srcpth / 'quiz.js': + txtout += " url = window.location+'';\n" + txtout += " tmp = url.split('/');\n" + txtout += " tmp.pop();\n" + txtout += " tmp.shift();\n" + txtout += " tmp.shift();\n" + txtout += " tmp.shift();\n" + txtout += " pth = tmp.join('/');\n" + txtout += " $('div id = " + '"workArea"'+"'/>)\n" + txtout += " .load(host+'cgi/writeQuiz.py',\n" + txtout += " {'activity':pth,'text':quiztxt});\n" + txtout += " $('#linkStart').addClass('linkStart');\n" + txtout += " $('<div id=" + '"quizArea"/>' + "')\n" + txtout += ' .appendTo("#content")\n' + txtout += ' $("#quizArea")\n' + txtout += ' .jQuizMe(quiz[currentScreen], options)\n' + txtout += '};\n\n' + txtout += '\nsetUpMultiScreenLesson([\n' + for i in range(len(screens)): + txtout += ' generateScreen' + str(i+1) + ',\n' + txtout += ']);\n' + return txtout + +def makeScreens(txt): + screens = [] + tag = '<hr />' + while tag in txt: + pos = txt.find(tag) + screen = txt[:pos] + txt = txt[pos+len(tag):] + screens.append(screen) + if len(txt)>0: + screens.append(txt) + return screens + +def addSpans(screen): + tag = '<!--I' + endTag = '-->' + start = 0 + while screen[start:].find(tag)>-1: + pos1 = screen[start:].find(tag) + pos2 = screen[start:].find(endTag) + end = start + pos2 + comment = screen[start+pos1+len(tag):end] + if len(comment) > 0: + imgno, name, height, width, ifloat = getImageData(comment) + span = "<span id = 'I" + imgno + "'></span>" + screen = screen[:end+len(endTag)] + span + screen[end+len(endTag):] + end = end + len(span) + start = end + len(endTag) + return screen + +def generateBasicLesson(actpth, srcpth, dstpth): + #read source.txt + txtpth = srcpth / 'source.txt' + fin = open(txtpth,'r') + txt = fin.read() + fin.close() + #copy index.html to dst + subprocess.call('cp ' + MENUS / 'index.html ' + dstpth, shell=True) + #copy source.txt to dst + subprocess.call('cp ' + srcpth / 'source.txt ' + dstpth, shell=True) + #copy assets to dst + images = path(srcpth).files('*.png') + for image in images: + subprocess.call('cp ' + image + ' ' + dstpth, shell=True) + images = path(srcpth).files('*.ogg') + for image in images: + subprocess.call('cp ' + image + ' ' + dstpth, shell=True) + #we don't copy gif because Karma doesn't handle them - they need to be converted to png + #subprocess.call('cp ' + srcpth / '*.gif ' + dstpth, shell = True) + #scan source text returning list of screens (a*.txt) + screens = makeScreens(txt) + quiz = [] + imageList = [] + audioList = [] + quizFlag = False #True when activity contains generated quiz + for i in range(len(screens)): + screen = screens[i] + flag, quizText = makeQuiz(screen) + if flag: + quizFlag = True + if quizText: + quiz.append(quizText) + imageList.append(findImages(screen)) + audioList.append(findAudio(screen)) + #write screen + #but first add spans for images + screen = addSpans(screen) + if len(screens) == 1: + fout = open(dstpth / 'a.txt','w') + else: + fout = open(dstpth / 'a' + str(i+1) + '.txt','w') + fout.write(screen) + fout.close() + #create lesson.js + txtout = generateLesson(srcpth, actpth, screens, imageList, audioList, quiz) + fout = open(dstpth / 'lesson.js','w') + fout.write(txtout) + fout.close() + if imageList: + totalList = [] + for list in imageList: + for image in list: + totalList.append(image) + #create lesson-karma.js + txtout = generateLessonKarma(totalList) + fout = open(dstpth / 'lesson-karma.js','w') + fout.write(txtout) + fout.close() + #create lesson.css + txtout = generateLessonCSS(totalList) + fout = open(dstpth / 'lesson.css','w') + fout.write(txtout) + fout.close() + if quiz and not quizFlag: + #create quiz.js + txtout = generateQuiz(dstpth, quiz) + fout = open(dstpth / 'quiz.js','w') + fout.write(txtout) + fout.close() + + diff --git a/generate_karma_lesson.py b/generate_karma_lesson.py new file mode 100755 index 0000000..e0b532e --- /dev/null +++ b/generate_karma_lesson.py @@ -0,0 +1,692 @@ +#! /usr/bin/env python2.6 +# -*- coding: utf-8 -*- + +from html import HtmlDocument, HtmlElement +import mo2js +import codecs +import os +import shutil +import string +import sys +import time +import fnmatch +from optparse import OptionParser +from path import path + +class KarmaFramework(): + def __init__(self, root_dir): + self.root_dir = root_dir + self.java_script_files = [ + self._karma_file('js/external/jquery-1.4.2.js', 'jquery'), + self._karma_file('js/external/jquery-ui-1.8.2.js', 'jquery-ui'), + self._karma_file('js/external/jquery.ui.core.js', 'ui.core'), + self._karma_file('js/external/jquery.ui.mouse.js', 'ui.mouse'), + self._karma_file('js/external/jquery.ui.widget.js', 'ui.widget'), + self._karma_file('js/external/jquery.ui.position.js', 'ui.position'), + self._karma_file('js/external/jquery.ui.draggable.js', 'ui.draggable'), + self._karma_file('js/external/jquery.ui.droppable.js', 'ui.droppable'), + self._karma_file('js/jquery.watermarkinput.js', 'jquery.watermarkinput'), + self._karma_file('js/ui.scoreboard.js', 'ui.scoreboard'), + self._karma_file('js/jquery.svg.js', 'jquery.svg'), + self._karma_file('js/karma.js', 'karma'), + self._karma_file('js/global.js', 'global'), + self._karma_file('js/common.js', 'common'), + self._karma_file('js/jquery.clickable.js', 'jquery.clickable'), + self._karma_file('js/multiple-choice.js', 'multiple-choice'), + self._karma_file('js/clock.js', 'clock'), + self._karma_file('js/jquery.i18n.js', 'i18n'), + self._karma_file('js/jquery.strings.js', 'jquery.strings'), + self._karma_file('js/jquery.keyfilter.js', 'jquery.keyfilter'), + self._karma_file('js/kStart.js', 'kstart'), + self._karma_file('js/config.js', 'config'), + self._karma_file('js/base.js','base') + ] + self.css_files = [ + self._karma_file('css/global.css', 'global'), + self._karma_file('css/ui.scoreboard.css', 'ui.scoreboard'), + self._karma_file('css/kStart.css', 'kstart') + ] + self.audio_files = [ + self._karma_file('audio/en_correct.ogg', 'correct'), + self._karma_file('audio/en_incorrect.ogg', 'incorrect'), + self._karma_file('audio/ne_correct.ogg', 'ne_correct'), + self._karma_file('audio/ne_incorrect.ogg', 'ne_incorrect'), + self._karma_file('audio/byebye.ogg', 'byebye'), + self._karma_file('audio/trigger.ogg', 'trigger') + ] + self.image_files = [ + self._karma_file('image/title_block_lt.png', 'title_block_lt'), + self._karma_file('image/title_block_rt.png', 'title_block_rt'), + self._karma_file('image/favicon.ico', 'favicon') + ] + + def _karma_file(self, path, name, **kw): + kw['karma_root'] = self.root_dir + return KarmaFile(path, name, **kw) + + def _find_file(self, name, files): + for f in files: + if f.name() == name: + return f + return None + + def java_script(self, name): + return self._find_file(name, self.java_script_files) + + def css(self, name): + return self._find_file(name, self.css_files) + + def audio(self, name): + return self._find_file(name, self.audio_files) + + def image(self, name): + return self._find_file(name, self.image_files) + + +argv0 = sys.argv[0] +now = time.strftime("%Y/%m/%d %H:%M:%S", time.localtime()) +warning_text_lines = ['This file was generated by %s on %s.' % (argv0, now), + 'Do not edit.', + 'cwd: %s' % os.getcwd(), + 'command: %s' % ' '.join(sys.argv)] + + +theLesson = None + +include_stack = [] +script_root = os.path.abspath(os.path.dirname(argv0)) + + +class File(): + _name = None + src = '' + lesson_deploy = True + create_file = False + data = '' + + def to_string(self): + print 'name:', self._name + print 'src:', self.src + print 'lesson_deploy:', self.lesson_deploy + print 'create_file:', self.create_file + print 'data:', self.data + print + + def __init__(self, pth, name=None, **kw): + self._name = name + self.src = pth + + if 'karma' in kw and kw['karma']: + self.lesson_deploy = False + if 'generated' in kw and kw['generated']: + self.create_file = True + + if not self.create_file: + # find the existing file + test_files = [] + if self.lesson_deploy: + test_files.append(os.path.join(theLesson.src_directory, pth)) + if len(include_stack) > 0: + test_files.append(os.path.join(os.path.dirname(include_stack[-1]), pth)) + if 'karma_root' in kw: + test_files.append(os.path.join(kw['karma_root'], pth)) + + for f in test_files: + abs_path = os.path.abspath(f) + if os.path.isfile(abs_path): + self.src = abs_path + else: + self.src = self._absolute_path() + + def _deploy_folder(self): + return os.path.abspath(path(theLesson.directory).parent) + + def name(self): + return self._name + + # only applicable for assets + def preload(self): + return self._name != None + + def _src_path(self): + return self.src + + def relative_path(self, start=None, **kw): + if start == None or start == '': + # default relative is to lesson output + start = theLesson.directory + elif start == 'deploy': + start = self._deploy_folder() + + #rel_path = os.path.relpath(self._absolute_path(), start) + pth = str(self._absolute_path()) + pos = pth.find('/karma/') + if pos > -1: + rel_path = path('../../../..' + pth[pos:]) + else: + rel_path = path(pth).name + if 'web' in kw and kw['web']: + rel_path = string.replace(rel_path, '\\', '/') + + return rel_path + + def _absolute_path(self): + if self.lesson_deploy: + return os.path.join(self._deploy_folder(), self._basename()) + else: + return self.src + + def _basename(self): + return os.path.basename(self.src) + + def make_available(self): + if self.create_file: + f = open(self._absolute_path(), 'w') + print >>f, self.data + f.close() + elif self.lesson_deploy: + check_file_exists(self._src_path()) + shutil.copy(self._src_path(), self._absolute_path()) + + # only applicable for generated files + def write(self, x): + self.data = self.data + x + + +class KarmaFile(File): + def __init__(self, path, name=None, **kw): + kw['karma'] = True + File.__init__(self, path, name, **kw) + + +java_script_dependencies = [ + ('effects.core', 'effects.blind'), + ('effects.core', 'effects.bounce'), + ('effects.core', 'effects.clip'), + ('effects.core', 'effects.drop'), + ('effects.core', 'effects.explode'), + ('effects.core', 'effects.fold'), + ('effects.core', 'effects.highlight'), + ('effects.core', 'effects.pulsate'), + ('effects.core', 'effects.scale'), + ('effects.core', 'effects.shake'), + ('effects.core', 'effects.slide'), + ('effects.core', 'effects.transfer'), + ('ui.core', 'ui.accordion'), + ('ui.widget', 'ui.accordion'), + ('ui.core', 'ui.autocomplete'), + ('ui.widget', 'ui.autocomplete'), + ('ui.position', 'ui.autocomplete'), + ('ui.core', 'ui.button'), + ('ui.widget', 'ui.button'), + ('ui.core', 'ui.datepicker'), + ('ui.core', 'ui.dialog'), + ('ui.widget', 'ui.dialog'), + ('ui.button', 'ui.dialog'), + ('ui.draggable', 'ui.dialog'), + ('ui.mouse', 'ui.dialog'), + ('ui.position', 'ui.dialog'), + ('ui.resizable', 'ui.dialog'), + ('ui.core', 'ui.draggable'), + ('ui.mouse', 'ui.draggable'), + ('ui.widget', 'ui.draggable'), + ('ui.core', 'ui.droppable'), + ('ui.widget', 'ui.droppable'), + ('ui.mouse', 'ui.droppable'), + ('ui.draggable', 'ui.droppable'), + ('ui.widget', 'ui.mouse'), + ('ui.core', 'ui.progressbar'), + ('ui.widget', 'ui.progressbar'), + ('ui.core', 'ui.resizable'), + ('ui.mouse', 'ui.resizable'), + ('ui.widget', 'ui.resizable'), + ('ui.core', 'ui.selectable'), + ('ui.mouse', 'ui.selectable'), + ('ui.widget', 'ui.selectable'), + ('ui.core', 'ui.slider'), + ('ui.mouse', 'ui.slider'), + ('ui.widget', 'ui.slider'), + ('ui.core', 'ui.sortable'), + ('ui.mouse', 'ui.sortable'), + ('ui.widget', 'ui.sortable'), + ('ui.core', 'ui.tabs'), + ('ui.widget', 'ui.tabs'), + # old stuff + ('jquery', 'jquery-ui'), + ('jquery', 'jquery.watermarkinput'), + ('jquery', 'jquery.clickable'), + ('ui.core', 'ui.scoreboard'), + ('jquery-ui', 'ui.scoreboard'), + ('jquery', 'jquery.svg'), + ('karma', 'common'), + ('common', 'multiple-choice'), + ('common', 'clock'), + ('common', 'base'), + ('jquery', 'clock'), + ('jquery', 'i18n') + ] + +#TBD: factor this out in a separate file, so it is easy to provide +# your own header/footer +#TBD: make header/footer customizable +def generate_header(karma, dir, body, title): + header = body.div(id='header') + + header.div(id='topbtn_left').div(id='linkBackLesson', + title='Back', + className='linkBack') + + lesson_title = header.div(id='lesson_title') + lesson_title.img(src=karma.image('title_block_lt').relative_path(dir, web=True), + width=33, height=75, align='absmiddle') + lesson_title.text(title) + lesson_title.img(src=karma.image('title_block_rt').relative_path(dir, web=True), + width=33, height=75, align='absmiddle') + + + header.div(className='topbtn_right').div(title='Help', id='linkHelp') + + header.div(className='topbtn_right').div(title=u'साझा शिक्षा ई-पाटी द्वारा निर्मित', + id='linkOle') + + +def generate_footer(body): + footer = body.div(id='footer') + + config = theLesson.footer_configuration + + if config['link_next']: + footer.div(title='Next', id='linkNextLesson', className='linkNext') + if config['link_previous']: + footer.div(title='Previous', id='linkPrevLesson', className='linkBack') + if config['scoreboard']: + footer.div(id='score_box', display='none') + + footer.div(className='botbtn_right').div(title='Play Again', id='linkPlayAgain') + + if config['link_check_answer']: + footer.div(className='botbtn_right').div(title='Check Answer', id='linkCheck') + + +def topological_sort(nodes, dependencies, key): + """Sort nodes topologically according to dependencies. + A dependency is a tuple (key(earlier_node), key(later_node)), + meaning that earlier_node should come before later_node in the + result.""" + from collections import deque + successors = {} + predecessor_count = {} + node_map = {} + for node in nodes: + k = key(node) + node_map[k] = node + successors[k] = [] + predecessor_count[k] = 0 + for (dep0, dep1) in dependencies: + if dep0 in node_map and dep1 in node_map: + successors[dep0].append(dep1) + predecessor_count[dep1] = predecessor_count[dep1] + 1 + next = deque() + for k,v in successors.items(): + if predecessor_count[k] == 0: + next.append(k) + result = [] + while len(next) != 0: + k = next.popleft() + result.append(node_map[k]) + for successor in successors[k]: + predecessor_count[successor] = predecessor_count[successor] - 1 + if predecessor_count[successor] == 0: + next.append(successor) + if len(result) != len(nodes): + print 'Error: dependency loop.' + sys.exit(1) + return result + + +def include_dependencies(karma, files): + result = [] + visited = set() + deps = {} + for dep in java_script_dependencies: + deps[dep[1]] = deps.setdefault(dep[1], []) + [dep[0]] + js_files = {} + for f in karma.java_script_files: + js_files[f.name()] = f + def add_dependencies(list): + for x in list: + if x not in visited: + add_dependencies([js_files[name] for name + in deps.setdefault(x.name(), [])]) + result.append(x) + visited.add(x) + add_dependencies(files) + return result + +def sort_java_script_files(files): + karma_files = filter(lambda x: isinstance(x, KarmaFile), files) + other_files = filter(lambda x: not isinstance(x, KarmaFile), files) + result = topological_sort(karma_files, + java_script_dependencies, + lambda x: x.name()) + other_files + return result + + +def createDiv(id): + return HtmlElement('body', 'div', True).attr(id=id) + +class Lesson(): + def __init__(self, src_directory): + self.src_directory = src_directory + self.parent_directory = '' + self.directory = '' + self.title = '' + self.lesson_title = '' + self._grade = None; + self._subject = ''; + self._week = None + self.summary = ''; + self.java_script_files = [] + self.css_files = [] + self.image_files = [] + self.audio_files = [] + self.divs = [createDiv('content')] + self.footer_configuration = dict(link_previous=True, + link_next=True, + scoreboard=False, + link_check_answer=False, + audio_buttons=False) + + def grade(self): + return self._grade + + def subject(self): + return self._subject + + def week(self): + return self._week + + def copy_files(self): + def create_dir(d): + if not os.path.exists(d): + os.makedirs(d) + create_dir(self.parent_directory) + os.chdir(self.parent_directory) +# map(create_dir, ['css', 'js', 'js/locale', 'assets/image', 'assets/audio', 'assets/video']) + for f in self.java_script_files + self.css_files: + f.make_available() + for f in self.image_files + self.audio_files: + f[1].make_available() + + def copy_required(f): + src = os.path.join(self.src_directory, f) + if (os.path.exists(src)): + shutil.copy(src, self.directory) + else: + print 'Warning: missing ' + src + + # if a screenshot.jpg exists in the source, copy it to the dest + screenshot_img = os.path.join(self.src_directory, 'screenshot.jpg') + if (os.path.exists(screenshot_img)): + shutil.copy(screenshot_img, os.path.join(self.directory, 'screenshot.jpg')) + + self.compile_translations() + + def name(self): + return self.deploy_name() + + def deploy_name(self): + return os.path.basename(self.directory) + + def generate(self): + print 'writing lesson to ' + self.deploy_name() + self.copy_files() + self.print_html_on(codecs.open('index.html', 'w', 'UTF-8')) + #self.print_start_html_on(codecs.open('start.html', 'w', 'UTF-8')) + #self.print_kdoc_html_on(codecs.open('kDoc.html', 'w', 'UTF-8')) + self.print_karma_js_on(open('lesson-karma.js', 'w')) + + def compile_translations(self): + # compile translation JS files from MO files + for srcfile in os.listdir(self.src_directory): + if fnmatch.fnmatch(srcfile, '*.mo'): + lang = os.path.splitext(srcfile)[0] + srcpath = os.path.join(self.src_directory, srcfile) + targpath = os.path.join(self.directory, lang +'.js') + json_translations = mo2js.gettext_json(open(srcpath, 'r'), True) + + f = codecs.open(targpath, encoding='utf-8', mode='w+') + f.write('$.i18n.storeLocaleStrings("%s",\n' % lang); + f.write(json_translations) + f.write(');\n'); + f.write('$.i18n.setLocale("%s");\n' % lang); + + def set_directory(self, dir): + self.directory = os.path.abspath(os.path.join(self.parent_directory, dir)) + + def print_html_on(self, stream): + doc = HtmlDocument() + html = doc.html() + head = html.head() + head.title().text(self.title) + head.meta(content='text/html, charset=utf-8', httpEquiv='Content-Type') + head.link(type='image/ico', + rel='icon', + href=self.karma.image('favicon').relative_path(None, web=True)) + for file in self.css_files: + head.link(type='text/css', + rel='stylesheet', + href=file.relative_path(None, web=True)) + all_java_script_files = include_dependencies(self.karma, + self.java_script_files) + for file in sort_java_script_files(all_java_script_files): + head.script(type='text/javascript', + src=file.relative_path(None, web=True)) + head.script(type='text/javascript', + src='../../../../subjects.js') + body = html.body() + header = body.div(id='header') + body.children.extend(self.divs) + footer = body.div(id='footer') + doc.print_on(stream) + + def print_karma_js_on(self, stream): + print >>stream, '/*' + for l in warning_text_lines: + print >>stream, ' *', l + print >>stream, ' */' + def format_image(img): + return "{name:'%s', file:'%s'}" % (img[0], img[1].relative_path('deploy', web=True)) + def format_audio(a): + return "{name:'%s', file:'%s'}" % (a[0], a[1].relative_path('deploy', web=True)) + def format_assets(name, assets, format_asset, indentation): + prefix = '%s: [' % name + sep = ',\n' + (len(prefix) + indentation) * ' ' + postfix = ']' + to_preload = filter(lambda asset: asset[1].preload(), assets) + return prefix + sep.join(map(format_asset, to_preload)) + postfix + print >>stream, 'function lesson_karma() {' + return_karma = ' return Karma({' + indentation = len(return_karma) + print >>stream, return_karma + (',\n' + indentation * ' ').join( + [format_assets('image', + self.image_files, + format_image, + indentation), + format_assets('audio', + self.audio_files, + format_audio, + indentation)]) + '});' + print >>stream, '}' + +def lesson(grade, subject, title, week, browser_title=None, lesson_title=None, locale=None, summary=''): + def camelcase(str): + words = str.replace("'", '').split() + return ''.join([words[0].lower()] + [x.capitalize() for x in words[1:]]) + + #dirname = '%s_%s_%s_%s_K' % (grade, subject, camelcase(title), week); + title = path(theLesson.parent_directory).namebase + dirname = title + theLesson.set_directory(path(theLesson.parent_directory).namebase) + theLesson.start_title = title + theLesson.title = title + theLesson.lesson_title = lesson_title or title + theLesson._grade = grade + theLesson._subject = subject + theLesson._week = week + theLesson.summary = summary + java_script('jquery') + java_script('karma') + java_script('common') + java_script('i18n') + java_script('base') + # include the lesson.js if it exists + lesson_js = frob_path('lesson.js') + if os.path.exists(lesson_js): + java_script('lesson.js') + #add_help() + # include the locale strings too + + if locale != None: + theLesson.java_script_files.append(File('jquery.i18n.'+ locale +'.js', type='js', karma=True)) + + locale_mo = frob_path(locale + '.mo') + if os.path.exists(locale_mo): + targpath = os.path.join(theLesson.directory, locale +'.js') + theLesson.java_script_files.append(File(targpath, None, type='js', karma=True)) + + +def java_script(name, **kw): + result = theLesson.karma.java_script(name) + if not result: + result = File(name, name, **kw) + if name in [f.name() for f in theLesson.java_script_files]: + print 'Warning: the java_script file \'' + name + '\' is included twice.' + else: + theLesson.java_script_files.append(result) + return result + + +def css(name): + result = theLesson.karma.css(name) + if not result: + result = File(name, name) + theLesson.css_files.append(result) + return result + + +def image(file, name=None): + result = None + if name == None: + name = file + result = theLesson.karma.image(name) + if not result: + result = File(file, name) + theLesson.image_files.append([name, result]) + return result + + +def audio(file, name=None): + result = None + if name == None: + name = file + result = theLesson.karma.audio(name) + if not result: + result = File(file, name) + theLesson.audio_files.append([name, result]) + return result + + +def div(**info): + if 'id' in info and info['id'] == 'content': + print 'Warning: div(id=\'content\') no longer needed (it\'s added automatically).' + return None + result = createDiv(info['id']) + theLesson.divs.append(result) + return result + + +def footer_configuration(**kw): + global theLesson + config = theLesson.footer_configuration + for k,v in kw.items(): + if not k in config: + print 'Error: unsupported footer configuration option: ' + k + '.' + print 'Possible options:', ', '.join(config.keys()) + sys.exit(1) + config[k] = v + if config['scoreboard']: + css('ui.scoreboard') + java_script('ui.scoreboard') + + +def frob_path(path): + if not os.path.isabs(path): + return os.path.normpath(os.path.join(os.path.dirname(include_stack[-1]), + path)) + else: + return os.path.abspath(path) + + +def include(pth): + pth = frob_path(pth) + include_stack.append(pth) + check_file_exists(pth) + execfile(pth, globals()) + include_stack.pop() + + +def add_help(): + # add html help content if it exists, otherwise the help image + help_html = frob_path('help.html') + help_img = frob_path('help.png') + if (os.path.exists(help_html)): + f = codecs.open(help_html, 'r', 'UTF-8') + div(id='help').div(id='helpText').innerhtml(f.read()) + elif (os.path.exists(help_img)): + img = image(help_img, 'help') + src = img.relative_path(None, web=True) + div(id='help').img(src=src) + else: + print 'Warning: the file ' + str(help_path) + ' doesn\'t exist.' + + +def check_file_exists(path): + if not os.path.isfile(path): + print 'Error: the file ' + path + ' doesn\'t exist.' + sys.exit(1) + + +def find_all_description_files(): + result = [] + lesson_folder = os.path.join(script_root, 'lessons') + for root, dirs, files in os.walk(lesson_folder): + if 'description.py' in files: + result.append(os.path.abspath(os.path.join(script_root, root, 'description.py'))) + return result + + +def constantly(x): + return lambda y: x + + +def process_description(karma, description, output_dir, lesson_filter=constantly(True)): + os.chdir(script_root) + description = os.path.abspath(description) + + global theLesson + theLesson = Lesson(os.path.abspath(os.path.dirname(description))) + theLesson.karma =karma + theLesson.parent_directory = os.path.abspath(output_dir) + theLesson.java_script_files.append(File('lesson-karma.js', None, generated=True)) + include_stack.append(description) + check_file_exists(description) + execfile(description, globals()) + include_stack.pop() + + if lesson_filter(theLesson): + theLesson.generate() + return theLesson + else: + return None + @@ -0,0 +1,218 @@ +#! /usr/bin/env python2.6 +# -*- coding: utf-8 -*- + +attribute_names = dict( + httpEquiv='http-equiv', + className='class' +) + + +html_escapes = [(u'&', u'&'), + (u'>', u'>'), + (u'<', u'<'), + (u'"', u'"'), + (u"'", u''')] + + +def escape(string): + for (x, y) in html_escapes: + if x in string: + string = string.replace(x, y) + return string + +class HtmlFragment(): + def __init__(self): + self.children = [] + + def append_child(self, child): + self.children.append(child) + + def _create_element(self, tag, attrs, separate_closing_tag=True): + result = HtmlElement(self.document(), tag, separate_closing_tag) + self._hookup_to_parent(result) + result.attr(**attrs) + return result + + def html(self, **attrs): + return self._create_element(u'html', attrs) + + def head(self, **attrs): + return self._create_element(u'head', attrs) + + def body(self, **attrs): + return self._create_element(u'body', attrs) + + def meta(self, **attrs): + return self._create_element(u'meta', attrs, False) + + def link(self, **attrs): + return self._create_element(u'link', attrs, False) + + def title(self, **attrs): + return self._create_element(u'title', attrs) + + def script(self, **attrs): + return self._create_element(u'script', attrs) + + def a(self, **attrs): + return self._create_element(u'a', attrs) + + def p(self, **attrs): + return self._create_element(u'p', attrs) + + def br(self, **attrs): + return self._create_element(u'br', attrs, False) + + def ul(self, **attrs): + return self._create_element(u'ul', attrs) + + def li(self, **attrs): + return self._create_element(u'li', attrs) + + def iframe(self, **attrs): + return self._create_element(u'iframe', attrs) + + def div(self, **attrs): + return self._create_element(u'div', attrs) + + def span(self, **attrs): + return self._create_element(u'span', attrs) + + def img(self, **attrs): + return self._create_element(u'img', attrs, False) + + def text(self, txt): + result = HtmlText(txt) + self._hookup_to_parent(result) + return result + + def innerhtml(self, html): + result = HtmlSource(html) + self._hookup_to_parent(result) + return result + + def comment(self, txt): + result = HtmlComment(txt) + self._hookup_to_parent(result) + return result + + # Utility functions + def meta_utf8(self): + self.meta(content='text/html, charset=utf-8', httpEquiv='Content-Type') + + def favicon(self, href): + self.link(type='image/ico', rel='icon', href=href) + + def css(self, href): + self.link(type='text/css', rel='stylesheet', href=href) + + def java_script(self, src): + self.script(type='text/javascript', src=src) + + def print_on(self, stream): + for child in self.children: + child.print_on(stream) + + +class HtmlDocument(HtmlFragment): + def __init__(self): + HtmlFragment.__init__(self) + self.stack = [self] + + def document(self): + return self + + def _hookup_to_parent(self, child): + self.document().stack[-1].append_child(child) + + def print_on(self, stream): + print >>stream, '<!DOCTYPE html>' + HtmlFragment.print_on(self, stream) + + def to_file(self, file_name): + import codecs + f = codecs.open(file_name, 'w', 'UTF-8') + self.print_on(f) + f.close() + + +class HtmlElement(HtmlFragment): + def __init__(self, document, tag, separate_closing_tag): + HtmlFragment.__init__(self) + self._document = document + self.tag = tag + self._separate_closing_tag = separate_closing_tag + + def _hookup_to_parent(self, child): + self.append_child(child) + + def document(self): + return self._document + + # __enter__ and __exit__ enable use of 'with' statement + def __enter__(self): + self.document().stack.append(self) + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.document().stack.pop() + + def attr(self, **kw): + self.attributes = kw + return self + + def print_on(self, stream): + def attribute_key_to_string(k): + return attribute_names[k] if k in attribute_names else k + + def attribute_to_string(kv): + return u'%s="%s"' % (attribute_key_to_string(kv[0]), + unicode(kv[1])) + + def attributes_to_string(attrs): + if len(attrs): + return u' ' + u' '.join([attribute_to_string(kv) + for kv in attrs.items()]) + else: + return u'' + + attributes_string = attributes_to_string(self.attributes) + + if self._separate_closing_tag: + tag_open = u'<%s%s>' % (self.tag, attributes_string) + tag_close = u'</%s>' % self.tag + if len(self.children): + print >>stream, tag_open + HtmlFragment.print_on(self, stream) + print >>stream, tag_close + else: + print >>stream, tag_open + tag_close + else: + print >>stream, u'<%s%s/>' % (self.tag, attributes_string) + + +class HtmlText(): + def __init__(self, txt): + self.text = txt + + def print_on(self, stream): + print >>stream, escape(self.text) + + +class HtmlComment(): + def __init__(self, txt): + if (txt.find('-->') != -1): + print >>sys.stderr, 'Unable to deal with \'-->\' in comment string.' + sys.exit(1) + self.text = txt + + def print_on(self, stream): + print >>stream, '<!--', self.text, '-->' + + +class HtmlSource(): + def __init__(self, txt): + self.text = txt + + def print_on(self, stream): + print >>stream, self.text diff --git a/lesson_html.py b/lesson_html.py new file mode 100755 index 0000000..847a00c --- /dev/null +++ b/lesson_html.py @@ -0,0 +1,145 @@ +#! /usr/bin/env python2.6 +# -*- coding: utf-8 -*- + +import html +import string + +def start_html(lesson): + displayGrade = u'०१२३४५६७८९'[lesson.grade()]; + displaySubject = { + 'English': 'English', + 'Maths':u'गणित', + 'Nepali':u'नेपाली' }[lesson.subject()]; + karma = lesson.karma + + doc = html.HtmlDocument() + with doc.html(): + with doc.head(): + with doc.title(): + doc.text(lesson.lesson_title) + doc.meta_utf8() + doc.favicon(karma.image('favicon').relative_path(None, web=True)) + doc.css(karma.css('kstart').relative_path(None, web=True)) + for f in ['jquery', 'kstart']: + doc.java_script(karma.java_script(f).relative_path(None, web=True)) + with doc.body(id='kStart'): + with doc.div(id='top'): + doc.div(id='backBtn', title='Back') + with doc.div(id='topMiddle'): + doc.div(id='topDesc', className='center').text(u'साझा शिक्षा ई-पाटीद्वारा निर्मित') + doc.div(id='topE-Paath', className='center').text(u'ई-पाठ') + with doc.div(id='middle'): + with doc.div(id='grade', className='center'): + doc.span(id='gradeText').text(u'कक्षा:') + doc.span(id='gradeNum').text(displayGrade) + doc.div(id='subject', className='center').text(displaySubject) + with doc.div(id='lessonTitle', className='center'): + doc.a(href='./index.html').text(lesson.lesson_title) + doc.div(id='lessonDesc', className='center').text(lesson.summary) + with doc.div(id='teachersNoteBtn', className='button'): + with doc.a(href='./kDoc.html?back=start.html&doc=teachersNote'): + doc.div().text(u'Teacher\'s Note') + doc.div().text(u'पाठविवरण') + with doc.div(id='bottom'): + doc.div(id='logo', title=u'साझा शिक्षा ई-पाटी द्वारा निर्मित') + doc.div(id='logoHelp') + return doc + +def index_html_header(doc, karma, dir, body, titles): + def create_title(text): + def img(name): + return doc.img(src=karma.image(name).relative_path(dir, web=True), + width=33, height=75, align='absmiddle') + + with doc.div(className='lesson_title'): + img('title_block_lt') + doc.text(text) + img('title_block_rt') + + with doc.div(id='header'): + with doc.div(id='topbtn_left'): + doc.div(id='linkBackLesson', title='Back', className='linkBack') + for title in titles: + create_title(title) + with doc.div(className='topbtn_right'): + doc.div(title='Help', id='linkHelp') + with doc.div(className='topbtn_right'): + doc.div(id='linkOle', title=u'साझा शिक्षा ई-पाटी द्वारा निर्मित') + + +def index_html_footer(doc, subject, body, config): + with doc.div(id='footer'): + if config['link_next']: + doc.div(title='Next', id='linkNextLesson', className='linkNext') + if config['link_previous']: + doc.div(title='Previous', id='linkPrevLesson', className='linkBack') + if config['scoreboard']: + doc.div(id='score_box', display='none') + + with doc.div(className='botbtn_right'): + if subject == 'English': + doc.div(title='Play Again', id='linkPlayAgain', className='english').text('Play Again') + else: + doc.div(title='Play Again', id='linkPlayAgain', className='nepali').text(u'फेरी खेलौँ') + + if config['link_check_answer']: + with doc.div(className='botbtn_right'): + doc.div(title='Check Answer', id='linkCheck') + + if config['audio_buttons']: + with doc.div(className='botbtn_right'): + doc.div(id='linkPlayAudio', className='english audioButton').text('Play') + with doc.div(className='botbtn_right'): + doc.div(id='linkPlayRecordedAudio', className='english audioButton').text('Play') + with doc.div(className='botbtn_right'): + doc.div(id='linkRecordAudio', className='english audioButton').text('Record') + + +def index_html(lesson, warning_text_lines): + karma = lesson.karma + doc = html.HtmlDocument() + for line in warning_text_lines: + doc.comment(string.replace(line, '--', '__')) + with doc.html(): + with doc.head(): + doc.title().text(lesson.title) + doc.meta_utf8() + for file in lesson.css_files: + doc.css(file.relative_path(None, web=True)) + doc.favicon(karma.image('favicon').relative_path(None, web=True)) + for file in lesson.all_java_script_files(): + doc.java_script(file.relative_path(None, web=True)) + with doc.body() as body: + titles = [lesson.lesson_title] + if lesson.subject() == 'Maths' and lesson.lesson_title != lesson.start_title: + titles.append(lesson.start_title) + index_html_header(doc, karma, lesson.directory, body, titles) + body.children.extend(lesson.divs) + index_html_footer(doc, lesson.subject(), body, + lesson.footer_configuration) + return doc + +def kdoc_html(lesson): + karma = lesson.karma + subject = unicode(lesson.subject()) + title = unicode(lesson.title) + doc = html.HtmlDocument() + with doc.html(): + with doc.head(): + doc.title().text('Lesson Plan for {0} {1}'.format(subject,title)) + doc.meta_utf8() + doc.favicon(karma.image('favicon').relative_path(None, web=True)) + for css in ['karma', 'ui.kHeader', 'kDoc']: + doc.css(karma.css(css).relative_path(None, web=True)) + for js in ['jquery', 'jquery-ui', 'karma', 'ui.kHeader', 'kDoc']: + doc.java_script(karma.java_script(js).relative_path(None, web=True)) + with doc.body(id='kDoc'): + doc.div(id='kHeader') + with doc.div(id='kHelp', title='Help'): + doc.text(u'पाठविवरणर पाठयोजना सहज तरिकाले पढ्न तपाईले निम्न कार्य गर्न सक्नु हुन्छ ।') + doc.br() + doc.text(u'पाठ पृष्टको दाईने तर्फको ठाडो रेखा तल माथि गर्दा') + doc.br() + doc.text(u'तपाईले हाल पढिरहेको पृष्टलाई आवश्यकता अनुसार तल माथि गर्न सक्नुहुन्छ') + doc.iframe(id='iframeLessonPlan', src='') + return doc diff --git a/makeActivities.py b/makeActivities.py new file mode 100755 index 0000000..88b4a41 --- /dev/null +++ b/makeActivities.py @@ -0,0 +1,44 @@ +#!/usr/bin/python +# +#utility to remove 'activity' from .html files and replace with horizontal rule tag +from path import path +import os, sys, subprocess +from optparse import OptionParser + +SOURCE = path('../') + +parser = OptionParser(usage="Usage: %prog [options] file") +(options, args) = parser.parse_args() +if not args: + print 'Specify a course and milestone, e.g. zs5 zs5m08' + parser.print_help() + sys.exit(1) + +SUBJECT = args[0] +COURSE = args[1] +MILESTONE = args[2] +tag = '<hr />' +basepath = SOURCE / SUBJECT / COURSE / MILESTONE +files = basepath.files('*.html') +file = files[0] +fin = open(basepath / file.name,'r') +txt = fin.read() +fin.close() +pos = txt.find('Activity') +count = 0 +while pos > -1: + pos1 = txt.find('<CENTER>') + pos2 = txt.find('</CENTER>') + if pos1 < 0 or pos2 < 0: + pos = -1 + continue + if count < 1: + txt = txt[:pos1]+txt[pos2+9:] + else: + txt = txt[:pos1] + '<hr />' + txt[pos2+9:] + count += 1 + pos = txt.find('Activity') + print count, pos, len(txt) +fout = open(basepath / MILESTONE+'.html','w') +fout.write(txt) +fout.close diff --git a/menus/activity.html b/menus/activity.html new file mode 100755 index 0000000..9f7f515 --- /dev/null +++ b/menus/activity.html @@ -0,0 +1,24 @@ +<!DOCTYPE html> +<html> + <head> + <title>Activity Ladder </title> + <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/> + <link type="image/ico" rel="icon" href="../../../karma/image/favicon.ico" /> + <link type="text/css" rel="stylesheet" href="../../../karma/css/global.css" /> + <link type="text/css" rel="stylesheet" href="../../../karma/css/menu.css" /> + <script type="text/javascript" src="../../../karma/js/external/jquery-1.4.2.js"></script> + <script type="text/javascript" src="../../../karma/js/external/jquery-ui-1.8.2.js"></script> + <script type="text/javascript" src="../../../karma/js/karma.js"></script> + <script type="text/javascript" src="../../../karma/js/global.js"></script> + <script type="text/javascript" src="../../../subjects.js"></script> + <script type="text/javascript" src="activities.js"></script> + <script type="text/javascript" src="../../../karma/js/lesson.js"></script> + </head> + <body> + <div id="header"></div> + <div id="content"> + <div id="frame" style="position:absolute;top:0px;left:0px"></div> + <canvas id="ladder" style="position:absolute;top:0px;left:0px"></canvas> + </div> + </body> +</html> diff --git a/menus/course.html b/menus/course.html new file mode 100755 index 0000000..30256dc --- /dev/null +++ b/menus/course.html @@ -0,0 +1,20 @@ +<!DOCTYPE html> +<html> + <head> + <title>Course </title> + <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/> + <link type="text/css" rel="stylesheet" href="../karma/css/global.css" /> + <link type="text/css" rel="stylesheet" href="../karma/css/menu.css" /> + <link type="image/ico" rel="icon" href="../karma/image/favicon.ico" /> + <script type="text/javascript" src="../karma/js/external/jquery-1.4.2.js"></script> + <script type="text/javascript" src="../karma/js/external/jquery-ui-1.8.2.js"></script> + <script type="text/javascript" src="../karma/js/karma.js"></script> + <script type="text/javascript" src="../karma/js/global.js"></script> + <script type="text/javascript" src="courses.js"></script> + <script type="text/javascript" src="../karma/js/course.js"></script> + </head> + <body> + <div id="header"></div> + <div id='content'></div> + </body> +</html> diff --git a/menus/index.html b/menus/index.html new file mode 100755 index 0000000..ff7290c --- /dev/null +++ b/menus/index.html @@ -0,0 +1,29 @@ +<!DOCTYPE html> +<html> +<head> +<meta content="text/html, charset=utf-8" http-equiv="Content-Type"/> +<link href="../../../../karma/image/favicon.ico" type="image/ico" rel="icon"/> +<link href="../../../../karma/css/global.css" type="text/css" rel="stylesheet"/> +<link href="../../../../karma/css/jquizme.css" type="text/css" rel="stylesheet"/> +<link href="lesson.css" type="text/css" rel="stylesheet"/> +<script src="../../../../karma/js/external/jquery-1.4.2.js" type="text/javascript"></script> +<script src="../../../../karma/js/external/jquery-ui-1.8.2.js" type="text/javascript"></script> +<script src="../../../../karma/js/karma.js" type="text/javascript"></script> +<script src="../../../../karma/js/common.js" type="text/javascript"></script> +<script src="../../../../karma/js/jquery.clickable.js" type="text/javascript"></script> +<script src="../../../../karma/js/jquery.i18n.js" type="text/javascript"></script> +<script src="../../../../karma/js/jquizme.js" type="text/javascript"></script> +<script src="../../../../karma/js/math.js" type="text/javascript"></script> +<script src="../../../../karma/js/global.js" type="text/javascript"></script> +<script src="../../../../subjects.js" type="text/javascript"></script> +<script src="../../../../karma/js/base.js" type="text/javascript"></script> +<script src="quiz.js" type="text/javascript"></script> +<script src="lesson-karma.js" type="text/javascript"></script> +<script src="lesson.js" type="text/javascript"></script> +</head> +<body> +<div id="header"</div> +<div id="content"></div> +<div id="footer"></div> +</body> +</html> diff --git a/menus/milestone.html b/menus/milestone.html new file mode 100755 index 0000000..690e2b2 --- /dev/null +++ b/menus/milestone.html @@ -0,0 +1,24 @@ +<!DOCTYPE html> +<html> + <head> + <title>Milestone Ladder </title> + <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/> + <link type="image/ico" rel="icon" href="../../karma/image/favicon.ico" /> + <link type="text/css" rel="stylesheet" href="../../karma/css/global.css" /> + <link type="text/css" rel="stylesheet" href="../../karma/css/menu.css" /> + <script type="text/javascript" src="../../karma/js/external/jquery-1.4.2.js"></script> + <script type="text/javascript" src="../../karma/js/external/jquery-ui-1.8.2.js"></script> + <script type="text/javascript" src="../../karma/js/karma.js"></script> + <script type="text/javascript" src="../../karma/js/global.js"></script> + <script type="text/javascript" src="../../subjects.js"></script> + <script type="text/javascript" src="milestones.js"></script> + <script type="text/javascript" src="../../karma/js/unit.js"></script> + </head> + <body> + <div id="header"></div> + <div id="content"> + <div id="frame" style="position:absolute;top:0px;left:0px"></div> + <canvas id="ladder" style="position:absolute;top:0px;left:0px"></canvas> + </div> + </body> +</html> diff --git a/menus/subject.html b/menus/subject.html new file mode 100755 index 0000000..4b28c79 --- /dev/null +++ b/menus/subject.html @@ -0,0 +1,39 @@ +<!DOCTYPE html> +<html> +<head> +<title>Learn</title> +<meta charset="UTF-8"/> +<link type="image/ico" rel="icon" href="./karma/image/favicon.ico"/> +<link rel="stylesheet" href="./karma/css/menu.css" type="text/css"/> +<script type="text/javascript" src="./karma/js/external/jquery-1.4.2.js"></script> +<script type="text/javascript" src="./karma/js/karma.js"></script> +<script type="text/javascript" src="subjects.js"></script> +<script type="text/javascript" src="./karma/js/main.js"></script> +</head> +<body> +<div id="content"> + <div class = 'c11' id="English">English + <img id="English_icon" alt="English" width=175 height=175 /> + </div> + <div class = 'c31' id="Mathematics">Mathematics + <img id="Mathematics_icon" alt="Mathematics" width=175 height=175" /> + </div> + <div class = 'c51' id ='Science'>Science + <img id='Science_icon' alt='Science' width=175 height=175' /> + </div> + <div class = 'c23' id ="Library">Library + <img id="Library_icon" alt="Library" width=175 height=175> + </div> + <div class = 'c43' id="Explore">Laboratory + <img id="Explore_icon" alt="Laboratory" width=175 height=175> + </div> + <div class="footer"> + Karma.js is licensed under the + <a href="http://www.opensource.org/licenses/mit-license.php">MIT License</a>. + Karma is a sub-project of + <a href="http://sugarlabs.org">SugarLabs</a>. <br /> + Copyright (c) 2009 Bryan Willson Berry + </div> +</div> +</body> +</html> diff --git a/mo2js.py b/mo2js.py new file mode 100755 index 0000000..ed8f0dc --- /dev/null +++ b/mo2js.py @@ -0,0 +1,66 @@ +#! /usr/bin/env python + +import codecs +import gettext +import json + +# TBD: generate $.i18n.choose_pluralized_msg. Similar to how python +# gettext does this. + +def context_and_key(key): + """Return a tuple containing the context (or None) and the message + key.""" + # A context, if present, is prepended to the key, with a \x04 + # character in between. + (context, separator, k) = key.partition(u'\x04') + if (separator != ''): + return (context, k) + else: + return (None, key) + +def group_pluralized_forms(dict): + """Return a dictionary where the pluralized forms from dict are + grouped. Elements of the form + (msg, i) -> tr1 + ... + (msg, j) -> trn + are grouped into: + msg -> [tr1, ..., trn] + """ + result = {} + keys = dict.keys() + keys.sort() + for k in keys: + translation = dict[k] + if type(k) is tuple: + # A pluralized form k = (msg, n) + k = k[0] + if k not in result: + result[k] = [] + result[k].append(translation) + else: + result[k] = translation + return result + +def path(key): + """Return the path in the dictionary for key""" + (context, key) = context_and_key(key) + if context is not None: + return ['contextualized_strings', context, key] + else: + return ['strings', key] + +def store_translation(dictionary, key, translation): + p = path(key) + while len(p) > 1: + x = p.pop(0) + dictionary = dictionary.setdefault(x, {}) + dictionary[p[0]] = translation + +def gettext_json(fp, indent = False): + result = {} + tr = gettext.GNUTranslations(fp) + dictionary = group_pluralized_forms(tr._catalog) + for k, v in group_pluralized_forms(tr._catalog).items(): + store_translation(result, k, v) + return json.dumps(result, ensure_ascii = False, indent = indent) @@ -0,0 +1,971 @@ +""" path.py - An object representing a path to a file or directory. + +Example: + +from path import path +d = path('/home/guido/bin') +for f in d.files('*.py'): + f.chmod(0755) + +This module requires Python 2.2 or later. + + +URL: http://www.jorendorff.com/articles/python/path +Author: Jason Orendorff <jason.orendorff\x40gmail\x2ecom> (and others - see the url!) +Date: 9 Mar 2007 +""" + + +# TODO +# - Tree-walking functions don't avoid symlink loops. Matt Harrison +# sent me a patch for this. +# - Bug in write_text(). It doesn't support Universal newline mode. +# - Better error message in listdir() when self isn't a +# directory. (On Windows, the error message really sucks.) +# - Make sure everything has a good docstring. +# - Add methods for regex find and replace. +# - guess_content_type() method? +# - Perhaps support arguments to touch(). + +from __future__ import generators + +import sys, warnings, os, fnmatch, glob, shutil, codecs, md5 + +__version__ = '2.2' +__all__ = ['path'] + +# Platform-specific support for path.owner +if os.name == 'nt': + try: + import win32security + except ImportError: + win32security = None +else: + try: + import pwd + except ImportError: + pwd = None + +# Pre-2.3 support. Are unicode filenames supported? +_base = str +_getcwd = os.getcwd +try: + if os.path.supports_unicode_filenames: + _base = unicode + _getcwd = os.getcwdu +except AttributeError: + pass + +# Pre-2.3 workaround for booleans +try: + True, False +except NameError: + True, False = 1, 0 + +# Pre-2.3 workaround for basestring. +try: + basestring +except NameError: + basestring = (str, unicode) + +# Universal newline support +_textmode = 'r' +if hasattr(file, 'newlines'): + _textmode = 'U' + + +class TreeWalkWarning(Warning): + pass + +class path(_base): + """ Represents a filesystem path. + + For documentation on individual methods, consult their + counterparts in os.path. + """ + + # --- Special Python methods. + + def __repr__(self): + return 'path(%s)' % _base.__repr__(self) + + # Adding a path and a string yields a path. + def __add__(self, more): + try: + resultStr = _base.__add__(self, more) + except TypeError: #Python bug + resultStr = NotImplemented + if resultStr is NotImplemented: + return resultStr + return self.__class__(resultStr) + + def __radd__(self, other): + if isinstance(other, basestring): + return self.__class__(other.__add__(self)) + else: + return NotImplemented + + # The / operator joins paths. + def __div__(self, rel): + """ fp.__div__(rel) == fp / rel == fp.joinpath(rel) + + Join two path components, adding a separator character if + needed. + """ + return self.__class__(os.path.join(self, rel)) + + # Make the / operator work even when true division is enabled. + __truediv__ = __div__ + + def getcwd(cls): + """ Return the current working directory as a path object. """ + return cls(_getcwd()) + getcwd = classmethod(getcwd) + + + # --- Operations on path strings. + + isabs = os.path.isabs + def abspath(self): return self.__class__(os.path.abspath(self)) + def normcase(self): return self.__class__(os.path.normcase(self)) + def normpath(self): return self.__class__(os.path.normpath(self)) + def realpath(self): return self.__class__(os.path.realpath(self)) + def expanduser(self): return self.__class__(os.path.expanduser(self)) + def expandvars(self): return self.__class__(os.path.expandvars(self)) + def dirname(self): return self.__class__(os.path.dirname(self)) + basename = os.path.basename + + def expand(self): + """ Clean up a filename by calling expandvars(), + expanduser(), and normpath() on it. + + This is commonly everything needed to clean up a filename + read from a configuration file, for example. + """ + return self.expandvars().expanduser().normpath() + + def _get_namebase(self): + base, ext = os.path.splitext(self.name) + return base + + def _get_ext(self): + f, ext = os.path.splitext(_base(self)) + return ext + + def _get_drive(self): + drive, r = os.path.splitdrive(self) + return self.__class__(drive) + + parent = property( + dirname, None, None, + """ This path's parent directory, as a new path object. + + For example, path('/usr/local/lib/libpython.so').parent == path('/usr/local/lib') + """) + + name = property( + basename, None, None, + """ The name of this file or directory without the full path. + + For example, path('/usr/local/lib/libpython.so').name == 'libpython.so' + """) + + namebase = property( + _get_namebase, None, None, + """ The same as path.name, but with one file extension stripped off. + + For example, path('/home/guido/python.tar.gz').name == 'python.tar.gz', + but path('/home/guido/python.tar.gz').namebase == 'python.tar' + """) + + ext = property( + _get_ext, None, None, + """ The file extension, for example '.py'. """) + + drive = property( + _get_drive, None, None, + """ The drive specifier, for example 'C:'. + This is always empty on systems that don't use drive specifiers. + """) + + def splitpath(self): + """ p.splitpath() -> Return (p.parent, p.name). """ + parent, child = os.path.split(self) + return self.__class__(parent), child + + def splitdrive(self): + """ p.splitdrive() -> Return (p.drive, <the rest of p>). + + Split the drive specifier from this path. If there is + no drive specifier, p.drive is empty, so the return value + is simply (path(''), p). This is always the case on Unix. + """ + drive, rel = os.path.splitdrive(self) + return self.__class__(drive), rel + + def splitext(self): + """ p.splitext() -> Return (p.stripext(), p.ext). + + Split the filename extension from this path and return + the two parts. Either part may be empty. + + The extension is everything from '.' to the end of the + last path segment. This has the property that if + (a, b) == p.splitext(), then a + b == p. + """ + filename, ext = os.path.splitext(self) + return self.__class__(filename), ext + + def stripext(self): + """ p.stripext() -> Remove one file extension from the path. + + For example, path('/home/guido/python.tar.gz').stripext() + returns path('/home/guido/python.tar'). + """ + return self.splitext()[0] + + if hasattr(os.path, 'splitunc'): + def splitunc(self): + unc, rest = os.path.splitunc(self) + return self.__class__(unc), rest + + def _get_uncshare(self): + unc, r = os.path.splitunc(self) + return self.__class__(unc) + + uncshare = property( + _get_uncshare, None, None, + """ The UNC mount point for this path. + This is empty for paths on local drives. """) + + def joinpath(self, *args): + """ Join two or more path components, adding a separator + character (os.sep) if needed. Returns a new path + object. + """ + return self.__class__(os.path.join(self, *args)) + + def splitall(self): + r""" Return a list of the path components in this path. + + The first item in the list will be a path. Its value will be + either os.curdir, os.pardir, empty, or the root directory of + this path (for example, '/' or 'C:\\'). The other items in + the list will be strings. + + path.path.joinpath(*result) will yield the original path. + """ + parts = [] + loc = self + while loc != os.curdir and loc != os.pardir: + prev = loc + loc, child = prev.splitpath() + if loc == prev: + break + parts.append(child) + parts.append(loc) + parts.reverse() + return parts + + def relpath(self): + """ Return this path as a relative path, + based from the current working directory. + """ + cwd = self.__class__(os.getcwd()) + return cwd.relpathto(self) + + def relpathto(self, dest): + """ Return a relative path from self to dest. + + If there is no relative path from self to dest, for example if + they reside on different drives in Windows, then this returns + dest.abspath(). + """ + origin = self.abspath() + dest = self.__class__(dest).abspath() + + orig_list = origin.normcase().splitall() + # Don't normcase dest! We want to preserve the case. + dest_list = dest.splitall() + + if orig_list[0] != os.path.normcase(dest_list[0]): + # Can't get here from there. + return dest + + # Find the location where the two paths start to differ. + i = 0 + for start_seg, dest_seg in zip(orig_list, dest_list): + if start_seg != os.path.normcase(dest_seg): + break + i += 1 + + # Now i is the point where the two paths diverge. + # Need a certain number of "os.pardir"s to work up + # from the origin to the point of divergence. + segments = [os.pardir] * (len(orig_list) - i) + # Need to add the diverging part of dest_list. + segments += dest_list[i:] + if len(segments) == 0: + # If they happen to be identical, use os.curdir. + relpath = os.curdir + else: + relpath = os.path.join(*segments) + return self.__class__(relpath) + + # --- Listing, searching, walking, and matching + + def listdir(self, pattern=None): + """ D.listdir() -> List of items in this directory. + + Use D.files() or D.dirs() instead if you want a listing + of just files or just subdirectories. + + The elements of the list are path objects. + + With the optional 'pattern' argument, this only lists + items whose names match the given pattern. + """ + names = os.listdir(self) + if pattern is not None: + names = fnmatch.filter(names, pattern) + return [self / child for child in names] + + def dirs(self, pattern=None): + """ D.dirs() -> List of this directory's subdirectories. + + The elements of the list are path objects. + This does not walk recursively into subdirectories + (but see path.walkdirs). + + With the optional 'pattern' argument, this only lists + directories whose names match the given pattern. For + example, d.dirs('build-*'). + """ + return [p for p in self.listdir(pattern) if p.isdir()] + + def files(self, pattern=None): + """ D.files() -> List of the files in this directory. + + The elements of the list are path objects. + This does not walk into subdirectories (see path.walkfiles). + + With the optional 'pattern' argument, this only lists files + whose names match the given pattern. For example, + d.files('*.pyc'). + """ + + return [p for p in self.listdir(pattern) if p.isfile()] + + def walk(self, pattern=None, errors='strict'): + """ D.walk() -> iterator over files and subdirs, recursively. + + The iterator yields path objects naming each child item of + this directory and its descendants. This requires that + D.isdir(). + + This performs a depth-first traversal of the directory tree. + Each directory is returned just before all its children. + + The errors= keyword argument controls behavior when an + error occurs. The default is 'strict', which causes an + exception. The other allowed values are 'warn', which + reports the error via warnings.warn(), and 'ignore'. + """ + if errors not in ('strict', 'warn', 'ignore'): + raise ValueError("invalid errors parameter") + + try: + childList = self.listdir() + except Exception: + if errors == 'ignore': + return + elif errors == 'warn': + warnings.warn( + "Unable to list directory '%s': %s" + % (self, sys.exc_info()[1]), + TreeWalkWarning) + return + else: + raise + + for child in childList: + if pattern is None or child.fnmatch(pattern): + yield child + try: + isdir = child.isdir() + except Exception: + if errors == 'ignore': + isdir = False + elif errors == 'warn': + warnings.warn( + "Unable to access '%s': %s" + % (child, sys.exc_info()[1]), + TreeWalkWarning) + isdir = False + else: + raise + + if isdir: + for item in child.walk(pattern, errors): + yield item + + def walkdirs(self, pattern=None, errors='strict'): + """ D.walkdirs() -> iterator over subdirs, recursively. + + With the optional 'pattern' argument, this yields only + directories whose names match the given pattern. For + example, mydir.walkdirs('*test') yields only directories + with names ending in 'test'. + + The errors= keyword argument controls behavior when an + error occurs. The default is 'strict', which causes an + exception. The other allowed values are 'warn', which + reports the error via warnings.warn(), and 'ignore'. + """ + if errors not in ('strict', 'warn', 'ignore'): + raise ValueError("invalid errors parameter") + + try: + dirs = self.dirs() + except Exception: + if errors == 'ignore': + return + elif errors == 'warn': + warnings.warn( + "Unable to list directory '%s': %s" + % (self, sys.exc_info()[1]), + TreeWalkWarning) + return + else: + raise + + for child in dirs: + if pattern is None or child.fnmatch(pattern): + yield child + for subsubdir in child.walkdirs(pattern, errors): + yield subsubdir + + def walkfiles(self, pattern=None, errors='strict'): + """ D.walkfiles() -> iterator over files in D, recursively. + + The optional argument, pattern, limits the results to files + with names that match the pattern. For example, + mydir.walkfiles('*.tmp') yields only files with the .tmp + extension. + """ + if errors not in ('strict', 'warn', 'ignore'): + raise ValueError("invalid errors parameter") + + try: + childList = self.listdir() + except Exception: + if errors == 'ignore': + return + elif errors == 'warn': + warnings.warn( + "Unable to list directory '%s': %s" + % (self, sys.exc_info()[1]), + TreeWalkWarning) + return + else: + raise + + for child in childList: + try: + isfile = child.isfile() + isdir = not isfile and child.isdir() + except: + if errors == 'ignore': + continue + elif errors == 'warn': + warnings.warn( + "Unable to access '%s': %s" + % (self, sys.exc_info()[1]), + TreeWalkWarning) + continue + else: + raise + + if isfile: + if pattern is None or child.fnmatch(pattern): + yield child + elif isdir: + for f in child.walkfiles(pattern, errors): + yield f + + def fnmatch(self, pattern): + """ Return True if self.name matches the given pattern. + + pattern - A filename pattern with wildcards, + for example '*.py'. + """ + return fnmatch.fnmatch(self.name, pattern) + + def glob(self, pattern): + """ Return a list of path objects that match the pattern. + + pattern - a path relative to this directory, with wildcards. + + For example, path('/users').glob('*/bin/*') returns a list + of all the files users have in their bin directories. + """ + cls = self.__class__ + return [cls(s) for s in glob.glob(_base(self / pattern))] + + + # --- Reading or writing an entire file at once. + + def open(self, mode='r'): + """ Open this file. Return a file object. """ + return file(self, mode) + + def bytes(self): + """ Open this file, read all bytes, return them as a string. """ + f = self.open('rb') + try: + return f.read() + finally: + f.close() + + def write_bytes(self, bytes, append=False): + """ Open this file and write the given bytes to it. + + Default behavior is to overwrite any existing file. + Call p.write_bytes(bytes, append=True) to append instead. + """ + if append: + mode = 'ab' + else: + mode = 'wb' + f = self.open(mode) + try: + f.write(bytes) + finally: + f.close() + + def text(self, encoding=None, errors='strict'): + r""" Open this file, read it in, return the content as a string. + + This uses 'U' mode in Python 2.3 and later, so '\r\n' and '\r' + are automatically translated to '\n'. + + Optional arguments: + + encoding - The Unicode encoding (or character set) of + the file. If present, the content of the file is + decoded and returned as a unicode object; otherwise + it is returned as an 8-bit str. + errors - How to handle Unicode errors; see help(str.decode) + for the options. Default is 'strict'. + """ + if encoding is None: + # 8-bit + f = self.open(_textmode) + try: + return f.read() + finally: + f.close() + else: + # Unicode + f = codecs.open(self, 'r', encoding, errors) + # (Note - Can't use 'U' mode here, since codecs.open + # doesn't support 'U' mode, even in Python 2.3.) + try: + t = f.read() + finally: + f.close() + return (t.replace(u'\r\n', u'\n') + .replace(u'\r\x85', u'\n') + .replace(u'\r', u'\n') + .replace(u'\x85', u'\n') + .replace(u'\u2028', u'\n')) + + def write_text(self, text, encoding=None, errors='strict', linesep=os.linesep, append=False): + r""" Write the given text to this file. + + The default behavior is to overwrite any existing file; + to append instead, use the 'append=True' keyword argument. + + There are two differences between path.write_text() and + path.write_bytes(): newline handling and Unicode handling. + See below. + + Parameters: + + - text - str/unicode - The text to be written. + + - encoding - str - The Unicode encoding that will be used. + This is ignored if 'text' isn't a Unicode string. + + - errors - str - How to handle Unicode encoding errors. + Default is 'strict'. See help(unicode.encode) for the + options. This is ignored if 'text' isn't a Unicode + string. + + - linesep - keyword argument - str/unicode - The sequence of + characters to be used to mark end-of-line. The default is + os.linesep. You can also specify None; this means to + leave all newlines as they are in 'text'. + + - append - keyword argument - bool - Specifies what to do if + the file already exists (True: append to the end of it; + False: overwrite it.) The default is False. + + + --- Newline handling. + + write_text() converts all standard end-of-line sequences + ('\n', '\r', and '\r\n') to your platform's default end-of-line + sequence (see os.linesep; on Windows, for example, the + end-of-line marker is '\r\n'). + + If you don't like your platform's default, you can override it + using the 'linesep=' keyword argument. If you specifically want + write_text() to preserve the newlines as-is, use 'linesep=None'. + + This applies to Unicode text the same as to 8-bit text, except + there are three additional standard Unicode end-of-line sequences: + u'\x85', u'\r\x85', and u'\u2028'. + + (This is slightly different from when you open a file for + writing with fopen(filename, "w") in C or file(filename, 'w') + in Python.) + + + --- Unicode + + If 'text' isn't Unicode, then apart from newline handling, the + bytes are written verbatim to the file. The 'encoding' and + 'errors' arguments are not used and must be omitted. + + If 'text' is Unicode, it is first converted to bytes using the + specified 'encoding' (or the default encoding if 'encoding' + isn't specified). The 'errors' argument applies only to this + conversion. + + """ + if isinstance(text, unicode): + if linesep is not None: + # Convert all standard end-of-line sequences to + # ordinary newline characters. + text = (text.replace(u'\r\n', u'\n') + .replace(u'\r\x85', u'\n') + .replace(u'\r', u'\n') + .replace(u'\x85', u'\n') + .replace(u'\u2028', u'\n')) + text = text.replace(u'\n', linesep) + if encoding is None: + encoding = sys.getdefaultencoding() + bytes = text.encode(encoding, errors) + else: + # It is an error to specify an encoding if 'text' is + # an 8-bit string. + assert encoding is None + + if linesep is not None: + text = (text.replace('\r\n', '\n') + .replace('\r', '\n')) + bytes = text.replace('\n', linesep) + + self.write_bytes(bytes, append) + + def lines(self, encoding=None, errors='strict', retain=True): + r""" Open this file, read all lines, return them in a list. + + Optional arguments: + encoding - The Unicode encoding (or character set) of + the file. The default is None, meaning the content + of the file is read as 8-bit characters and returned + as a list of (non-Unicode) str objects. + errors - How to handle Unicode errors; see help(str.decode) + for the options. Default is 'strict' + retain - If true, retain newline characters; but all newline + character combinations ('\r', '\n', '\r\n') are + translated to '\n'. If false, newline characters are + stripped off. Default is True. + + This uses 'U' mode in Python 2.3 and later. + """ + if encoding is None and retain: + f = self.open(_textmode) + try: + return f.readlines() + finally: + f.close() + else: + return self.text(encoding, errors).splitlines(retain) + + def write_lines(self, lines, encoding=None, errors='strict', + linesep=os.linesep, append=False): + r""" Write the given lines of text to this file. + + By default this overwrites any existing file at this path. + + This puts a platform-specific newline sequence on every line. + See 'linesep' below. + + lines - A list of strings. + + encoding - A Unicode encoding to use. This applies only if + 'lines' contains any Unicode strings. + + errors - How to handle errors in Unicode encoding. This + also applies only to Unicode strings. + + linesep - The desired line-ending. This line-ending is + applied to every line. If a line already has any + standard line ending ('\r', '\n', '\r\n', u'\x85', + u'\r\x85', u'\u2028'), that will be stripped off and + this will be used instead. The default is os.linesep, + which is platform-dependent ('\r\n' on Windows, '\n' on + Unix, etc.) Specify None to write the lines as-is, + like file.writelines(). + + Use the keyword argument append=True to append lines to the + file. The default is to overwrite the file. Warning: + When you use this with Unicode data, if the encoding of the + existing data in the file is different from the encoding + you specify with the encoding= parameter, the result is + mixed-encoding data, which can really confuse someone trying + to read the file later. + """ + if append: + mode = 'ab' + else: + mode = 'wb' + f = self.open(mode) + try: + for line in lines: + isUnicode = isinstance(line, unicode) + if linesep is not None: + # Strip off any existing line-end and add the + # specified linesep string. + if isUnicode: + if line[-2:] in (u'\r\n', u'\x0d\x85'): + line = line[:-2] + elif line[-1:] in (u'\r', u'\n', + u'\x85', u'\u2028'): + line = line[:-1] + else: + if line[-2:] == '\r\n': + line = line[:-2] + elif line[-1:] in ('\r', '\n'): + line = line[:-1] + line += linesep + if isUnicode: + if encoding is None: + encoding = sys.getdefaultencoding() + line = line.encode(encoding, errors) + f.write(line) + finally: + f.close() + + def read_md5(self): + """ Calculate the md5 hash for this file. + + This reads through the entire file. + """ + f = self.open('rb') + try: + m = md5.new() + while True: + d = f.read(8192) + if not d: + break + m.update(d) + finally: + f.close() + return m.digest() + + # --- Methods for querying the filesystem. + + exists = os.path.exists + isdir = os.path.isdir + isfile = os.path.isfile + islink = os.path.islink + ismount = os.path.ismount + + if hasattr(os.path, 'samefile'): + samefile = os.path.samefile + + getatime = os.path.getatime + atime = property( + getatime, None, None, + """ Last access time of the file. """) + + getmtime = os.path.getmtime + mtime = property( + getmtime, None, None, + """ Last-modified time of the file. """) + + if hasattr(os.path, 'getctime'): + getctime = os.path.getctime + ctime = property( + getctime, None, None, + """ Creation time of the file. """) + + getsize = os.path.getsize + size = property( + getsize, None, None, + """ Size of the file, in bytes. """) + + if hasattr(os, 'access'): + def access(self, mode): + """ Return true if current user has access to this path. + + mode - One of the constants os.F_OK, os.R_OK, os.W_OK, os.X_OK + """ + return os.access(self, mode) + + def stat(self): + """ Perform a stat() system call on this path. """ + return os.stat(self) + + def lstat(self): + """ Like path.stat(), but do not follow symbolic links. """ + return os.lstat(self) + + def get_owner(self): + r""" Return the name of the owner of this file or directory. + + This follows symbolic links. + + On Windows, this returns a name of the form ur'DOMAIN\User Name'. + On Windows, a group can own a file or directory. + """ + if os.name == 'nt': + if win32security is None: + raise Exception("path.owner requires win32all to be installed") + desc = win32security.GetFileSecurity( + self, win32security.OWNER_SECURITY_INFORMATION) + sid = desc.GetSecurityDescriptorOwner() + account, domain, typecode = win32security.LookupAccountSid(None, sid) + return domain + u'\\' + account + else: + if pwd is None: + raise NotImplementedError("path.owner is not implemented on this platform.") + st = self.stat() + return pwd.getpwuid(st.st_uid).pw_name + + owner = property( + get_owner, None, None, + """ Name of the owner of this file or directory. """) + + if hasattr(os, 'statvfs'): + def statvfs(self): + """ Perform a statvfs() system call on this path. """ + return os.statvfs(self) + + if hasattr(os, 'pathconf'): + def pathconf(self, name): + return os.pathconf(self, name) + + + # --- Modifying operations on files and directories + + def utime(self, times): + """ Set the access and modified times of this file. """ + os.utime(self, times) + + def chmod(self, mode): + os.chmod(self, mode) + + if hasattr(os, 'chown'): + def chown(self, uid, gid): + os.chown(self, uid, gid) + + def rename(self, new): + os.rename(self, new) + + def renames(self, new): + os.renames(self, new) + + + # --- Create/delete operations on directories + + def mkdir(self, mode=0777): + os.mkdir(self, mode) + + def makedirs(self, mode=0777): + os.makedirs(self, mode) + + def rmdir(self): + os.rmdir(self) + + def removedirs(self): + os.removedirs(self) + + + # --- Modifying operations on files + + def touch(self): + """ Set the access/modified times of this file to the current time. + Create the file if it does not exist. + """ + fd = os.open(self, os.O_WRONLY | os.O_CREAT, 0666) + os.close(fd) + os.utime(self, None) + + def remove(self): + os.remove(self) + + def unlink(self): + os.unlink(self) + + + # --- Links + + if hasattr(os, 'link'): + def link(self, newpath): + """ Create a hard link at 'newpath', pointing to this file. """ + os.link(self, newpath) + + if hasattr(os, 'symlink'): + def symlink(self, newlink): + """ Create a symbolic link at 'newlink', pointing here. """ + os.symlink(self, newlink) + + if hasattr(os, 'readlink'): + def readlink(self): + """ Return the path to which this symbolic link points. + + The result may be an absolute or a relative path. + """ + return self.__class__(os.readlink(self)) + + def readlinkabs(self): + """ Return the path to which this symbolic link points. + + The result is always an absolute path. + """ + p = self.readlink() + if p.isabs(): + return p + else: + return (self.parent / p).abspath() + + + # --- High-level functions from shutil + + copyfile = shutil.copyfile + copymode = shutil.copymode + copystat = shutil.copystat + copy = shutil.copy + copy2 = shutil.copy2 + copytree = shutil.copytree + if hasattr(shutil, 'move'): + move = shutil.move + rmtree = shutil.rmtree + + + # --- Special stuff from os + + if hasattr(os, 'chroot'): + def chroot(self): + os.chroot(self) + + if hasattr(os, 'startfile'): + def startfile(self): + os.startfile(self) + + diff --git a/zipper.py b/zipper.py new file mode 100755 index 0000000..a9f82c1 --- /dev/null +++ b/zipper.py @@ -0,0 +1,121 @@ +#!/usr/bin/python + +#create courseware from master folder +#proceed by level: subject, course, milestone, activity + +from path import path +import subprocess, os, sys +from optparse import OptionParser + +parser = OptionParser(usage="Usage: %prog [options] file") +(options, args) = parser.parse_args() +if not args: + SUBJECT = 'All' +else: + SUBJECT = args[0] + COURSE = args[1] +MAINPATH = path('/home/tony/Desktop/master') +TARGET = path('/home/tony/courseware') +BACKUP = path('/home/tony/courseware.bak') +if SUBJECT == 'All': + subprocess.call('rm -rf ' + BACKUP,shell=True) + subprocess.call('mv ' + TARGET + ' ' + BACKUP, shell=True) + subprocess.call('mkdir ' + TARGET, shell=True) + #copy version to TARGET + subprocess.call('cp ' + MAINPATH / 'version* ' + TARGET, shell=True) + #courseware folder needs subjects.js, subject.html, and karma.zip + subjectsfile = MAINPATH / 'subjects.js' + subprocess.call('cp '+subjectsfile+' '+TARGET,shell=True) + subprocess.call('cp '+ MAINPATH / 'index.html' + ' ' + TARGET,shell=True) + cwd = MAINPATH + cmd = 'zip -qr ' + TARGET / 'karma.zip' + ' karma' + print cmd + subprocess.call(cmd,cwd=cwd,shell=True) + #create subject folders based on subjects.js + fin = open(subjectsfile,'r') + txt = fin.read() + fin.close() + lines = txt.split('\n') + for line in lines: + try: + entry = eval(line)[0] + except: + continue + if len(entry) < 3: + entry = eval(line) + subject = entry[1] + sbj = entry[0] + src = MAINPATH / subject + if sbj == 'li': + subprocess.call('cp -r ' + src + ' ' + TARGET,shell=True) + continue + tpth = TARGET / subject + subprocess.call('mkdir ' + tpth, shell=True) + subprocess.call('cp ' + src / 'index.html' + ' ' + tpth,shell=True) + subprocess.call('cp ' + src / subject.lower()+'.png' + ' ' + tpth,shell=True) + subprocess.call('cp ' + src / 'courses.js' + ' ' + tpth,shell=True) + #create course folders for each course in courses.js + fin = open(MAINPATH / subject / 'courses.js') + txt = fin.read() + fin.close() + lines = txt.split('\n') + for line in lines: + try: + entry = eval(line)[0] + except: + continue + if len(entry)<4: + entry = eval(line) + coursename = entry[1] + course = entry[0].lower() + srcpth = MAINPATH / subject / course.lower() + tgtpth = TARGET / subject / course + print tgtpth + subprocess.call('mkdir ' + tgtpth, shell=True) + subprocess.call('cp ' + srcpth / 'index.html ' + tgtpth,shell=True) + subprocess.call('cp ' + srcpth / 'milestones.js ' + tgtpth,shell=True) + #now get milestones based on milestones.js + fin = open(srcpth / 'milestones.js','r') + txt = fin.read() + fin.close() + milestones = [] + lines = txt.split('\n') + for line in lines: + try: + entry = eval(line)[0] + except: + continue + if len(entry)<3: + entry = eval(line) + milestones.append(entry) + milestones.sort() + for milestone in milestones: + print 'milestone', milestone[4] + cwd = srcpth + cmd = 'zip -qr ' + tgtpth / milestone[4] + '.msxo ' + milestone[4] + subprocess.call(cmd, cwd=cwd, shell=True) +else: #we are doing one course + #now get milestones based on milestones.js + srcpth = MAINPATH / SUBJECT / COURSE + tgtpth = TARGET / SUBJECT / COURSE + #also copy to target folder + subprocess.call('cp ' + srcpth / 'milestones.js ' + tgtpth,shell=True) + fin = open(srcpth / 'milestones.js','r') + txt = fin.read() + fin.close() + milestones = [] + lines = txt.split('\n') + for line in lines: + try: + entry = eval(line)[0] + except: + continue + if len(entry)<3: + entry = eval(line) + milestones.append(entry) + for milestone in milestones: + print 'milestone', milestone[4] + cwd = srcpth + cmd = 'zip -qr ' + tgtpth / milestone[4] + '.msxo ' + milestone[4] + subprocess.call(cmd, cwd=cwd, shell=True) + |