diff options
28 files changed, 3580 insertions, 0 deletions
diff --git a/.hg/00changelog.i b/.hg/00changelog.i Binary files differnew file mode 100755 index 0000000..d3a8311 --- /dev/null +++ b/.hg/00changelog.i diff --git a/.hg/dirstate b/.hg/dirstate Binary files differnew file mode 100755 index 0000000..ea9d567 --- /dev/null +++ b/.hg/dirstate diff --git a/.hg/requires b/.hg/requires new file mode 100755 index 0000000..57601b5 --- /dev/null +++ b/.hg/requires @@ -0,0 +1,2 @@ +revlogv1 +store diff --git a/.hg/store/00changelog.i b/.hg/store/00changelog.i Binary files differnew file mode 100755 index 0000000..37b9876 --- /dev/null +++ b/.hg/store/00changelog.i diff --git a/.hg/store/00manifest.i b/.hg/store/00manifest.i Binary files differnew file mode 100755 index 0000000..738f818 --- /dev/null +++ b/.hg/store/00manifest.i diff --git a/.hg/store/data/_beautiful_soup.py.i b/.hg/store/data/_beautiful_soup.py.i Binary files differnew file mode 100755 index 0000000..6074600 --- /dev/null +++ b/.hg/store/data/_beautiful_soup.py.i diff --git a/.hg/store/data/activity/activity.info.i b/.hg/store/data/activity/activity.info.i Binary files differnew file mode 100755 index 0000000..fed86ea --- /dev/null +++ b/.hg/store/data/activity/activity.info.i diff --git a/.hg/store/data/activity/journalmanager-activity.svg.i b/.hg/store/data/activity/journalmanager-activity.svg.i Binary files differnew file mode 100755 index 0000000..cce66cd --- /dev/null +++ b/.hg/store/data/activity/journalmanager-activity.svg.i diff --git a/.hg/store/data/activity/permissions.info.i b/.hg/store/data/activity/permissions.info.i Binary files differnew file mode 100755 index 0000000..7900069 --- /dev/null +++ b/.hg/store/data/activity/permissions.info.i diff --git a/.hg/store/data/journalmanager.py.i b/.hg/store/data/journalmanager.py.i Binary files differnew file mode 100755 index 0000000..1f5a47a --- /dev/null +++ b/.hg/store/data/journalmanager.py.i diff --git a/.hg/store/data/listview.py.i b/.hg/store/data/listview.py.i Binary files differnew file mode 100755 index 0000000..f2a99b7 --- /dev/null +++ b/.hg/store/data/listview.py.i diff --git a/.hg/store/data/objectchooser.py.i b/.hg/store/data/objectchooser.py.i Binary files differnew file mode 100755 index 0000000..4d62422 --- /dev/null +++ b/.hg/store/data/objectchooser.py.i diff --git a/.hg/store/data/objectfetch.py.i b/.hg/store/data/objectfetch.py.i Binary files differnew file mode 100755 index 0000000..b2c77b6 --- /dev/null +++ b/.hg/store/data/objectfetch.py.i diff --git a/.hg/store/data/xoconfig.i b/.hg/store/data/xoconfig.i Binary files differnew file mode 100755 index 0000000..115635c --- /dev/null +++ b/.hg/store/data/xoconfig.i diff --git a/.hg/store/undo b/.hg/store/undo Binary files differnew file mode 100755 index 0000000..7b9bbbd --- /dev/null +++ b/.hg/store/undo diff --git a/.hg/undo.branch b/.hg/undo.branch new file mode 100755 index 0000000..331d858 --- /dev/null +++ b/.hg/undo.branch @@ -0,0 +1 @@ +default
\ No newline at end of file diff --git a/.hg/undo.dirstate b/.hg/undo.dirstate Binary files differnew file mode 100755 index 0000000..7365d63 --- /dev/null +++ b/.hg/undo.dirstate diff --git a/BeautifulSoup.py b/BeautifulSoup.py new file mode 100755 index 0000000..34204e7 --- /dev/null +++ b/BeautifulSoup.py @@ -0,0 +1,2000 @@ +"""Beautiful Soup +Elixir and Tonic +"The Screen-Scraper's Friend" +http://www.crummy.com/software/BeautifulSoup/ + +Beautiful Soup parses a (possibly invalid) XML or HTML document into a +tree representation. It provides methods and Pythonic idioms that make +it easy to navigate, search, and modify the tree. + +A well-formed XML/HTML document yields a well-formed data +structure. An ill-formed XML/HTML document yields a correspondingly +ill-formed data structure. If your document is only locally +well-formed, you can use this library to find and process the +well-formed part of it. + +Beautiful Soup works with Python 2.2 and up. It has no external +dependencies, but you'll have more success at converting data to UTF-8 +if you also install these three packages: + +* chardet, for auto-detecting character encodings + http://chardet.feedparser.org/ +* cjkcodecs and iconv_codec, which add more encodings to the ones supported + by stock Python. + http://cjkpython.i18n.org/ + +Beautiful Soup defines classes for two main parsing strategies: + + * BeautifulStoneSoup, for parsing XML, SGML, or your domain-specific + language that kind of looks like XML. + + * BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid + or invalid. This class has web browser-like heuristics for + obtaining a sensible parse tree in the face of common HTML errors. + +Beautiful Soup also defines a class (UnicodeDammit) for autodetecting +the encoding of an HTML or XML document, and converting it to +Unicode. Much of this code is taken from Mark Pilgrim's Universal Feed Parser. + +For more than you ever wanted to know about Beautiful Soup, see the +documentation: +http://www.crummy.com/software/BeautifulSoup/documentation.html + +Here, have some legalese: + +Copyright (c) 2004-2009, Leonard Richardson + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the the Beautiful Soup Consortium and All + Night Kosher Bakery nor the names of its contributors may be + used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT. + +""" +from __future__ import generators + +__author__ = "Leonard Richardson (leonardr@segfault.org)" +__version__ = "3.1.0.1" +__copyright__ = "Copyright (c) 2004-2009 Leonard Richardson" +__license__ = "New-style BSD" + +import codecs +import markupbase +import types +import re +from HTMLParser import HTMLParser, HTMLParseError +try: + from htmlentitydefs import name2codepoint +except ImportError: + name2codepoint = {} +try: + set +except NameError: + from sets import Set as set + +#These hacks make Beautiful Soup able to parse XML with namespaces +markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match + +DEFAULT_OUTPUT_ENCODING = "utf-8" + +# First, the classes that represent markup elements. + +def sob(unicode, encoding): + """Returns either the given Unicode string or its encoding.""" + if encoding is None: + return unicode + else: + return unicode.encode(encoding) + +class PageElement: + """Contains the navigational information for some part of the page + (either a tag or a piece of text)""" + + def setup(self, parent=None, previous=None): + """Sets up the initial relations between this element and + other elements.""" + self.parent = parent + self.previous = previous + self.next = None + self.previousSibling = None + self.nextSibling = None + if self.parent and self.parent.contents: + self.previousSibling = self.parent.contents[-1] + self.previousSibling.nextSibling = self + + def replaceWith(self, replaceWith): + oldParent = self.parent + myIndex = self.parent.contents.index(self) + if hasattr(replaceWith, 'parent') and replaceWith.parent == self.parent: + # We're replacing this element with one of its siblings. + index = self.parent.contents.index(replaceWith) + if index and index < myIndex: + # Furthermore, it comes before this element. That + # means that when we extract it, the index of this + # element will change. + myIndex = myIndex - 1 + self.extract() + oldParent.insert(myIndex, replaceWith) + + def extract(self): + """Destructively rips this element out of the tree.""" + if self.parent: + try: + self.parent.contents.remove(self) + except ValueError: + pass + + #Find the two elements that would be next to each other if + #this element (and any children) hadn't been parsed. Connect + #the two. + lastChild = self._lastRecursiveChild() + nextElement = lastChild.next + + if self.previous: + self.previous.next = nextElement + if nextElement: + nextElement.previous = self.previous + self.previous = None + lastChild.next = None + + self.parent = None + if self.previousSibling: + self.previousSibling.nextSibling = self.nextSibling + if self.nextSibling: + self.nextSibling.previousSibling = self.previousSibling + self.previousSibling = self.nextSibling = None + return self + + def _lastRecursiveChild(self): + "Finds the last element beneath this object to be parsed." + lastChild = self + while hasattr(lastChild, 'contents') and lastChild.contents: + lastChild = lastChild.contents[-1] + return lastChild + + def insert(self, position, newChild): + if (isinstance(newChild, basestring) + or isinstance(newChild, unicode)) \ + and not isinstance(newChild, NavigableString): + newChild = NavigableString(newChild) + + position = min(position, len(self.contents)) + if hasattr(newChild, 'parent') and newChild.parent != None: + # We're 'inserting' an element that's already one + # of this object's children. + if newChild.parent == self: + index = self.find(newChild) + if index and index < position: + # Furthermore we're moving it further down the + # list of this object's children. That means that + # when we extract this element, our target index + # will jump down one. + position = position - 1 + newChild.extract() + + newChild.parent = self + previousChild = None + if position == 0: + newChild.previousSibling = None + newChild.previous = self + else: + previousChild = self.contents[position-1] + newChild.previousSibling = previousChild + newChild.previousSibling.nextSibling = newChild + newChild.previous = previousChild._lastRecursiveChild() + if newChild.previous: + newChild.previous.next = newChild + + newChildsLastElement = newChild._lastRecursiveChild() + + if position >= len(self.contents): + newChild.nextSibling = None + + parent = self + parentsNextSibling = None + while not parentsNextSibling: + parentsNextSibling = parent.nextSibling + parent = parent.parent + if not parent: # This is the last element in the document. + break + if parentsNextSibling: + newChildsLastElement.next = parentsNextSibling + else: + newChildsLastElement.next = None + else: + nextChild = self.contents[position] + newChild.nextSibling = nextChild + if newChild.nextSibling: + newChild.nextSibling.previousSibling = newChild + newChildsLastElement.next = nextChild + + if newChildsLastElement.next: + newChildsLastElement.next.previous = newChildsLastElement + self.contents.insert(position, newChild) + + def append(self, tag): + """Appends the given tag to the contents of this tag.""" + self.insert(len(self.contents), tag) + + def findNext(self, name=None, attrs={}, text=None, **kwargs): + """Returns the first item that matches the given criteria and + appears after this Tag in the document.""" + return self._findOne(self.findAllNext, name, attrs, text, **kwargs) + + def findAllNext(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns all items that match the given criteria and appear + after this Tag in the document.""" + return self._findAll(name, attrs, text, limit, self.nextGenerator, + **kwargs) + + def findNextSibling(self, name=None, attrs={}, text=None, **kwargs): + """Returns the closest sibling to this Tag that matches the + given criteria and appears after this Tag in the document.""" + return self._findOne(self.findNextSiblings, name, attrs, text, + **kwargs) + + def findNextSiblings(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns the siblings of this Tag that match the given + criteria and appear after this Tag in the document.""" + return self._findAll(name, attrs, text, limit, + self.nextSiblingGenerator, **kwargs) + fetchNextSiblings = findNextSiblings # Compatibility with pre-3.x + + def findPrevious(self, name=None, attrs={}, text=None, **kwargs): + """Returns the first item that matches the given criteria and + appears before this Tag in the document.""" + return self._findOne(self.findAllPrevious, name, attrs, text, **kwargs) + + def findAllPrevious(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns all items that match the given criteria and appear + before this Tag in the document.""" + return self._findAll(name, attrs, text, limit, self.previousGenerator, + **kwargs) + fetchPrevious = findAllPrevious # Compatibility with pre-3.x + + def findPreviousSibling(self, name=None, attrs={}, text=None, **kwargs): + """Returns the closest sibling to this Tag that matches the + given criteria and appears before this Tag in the document.""" + return self._findOne(self.findPreviousSiblings, name, attrs, text, + **kwargs) + + def findPreviousSiblings(self, name=None, attrs={}, text=None, + limit=None, **kwargs): + """Returns the siblings of this Tag that match the given + criteria and appear before this Tag in the document.""" + return self._findAll(name, attrs, text, limit, + self.previousSiblingGenerator, **kwargs) + fetchPreviousSiblings = findPreviousSiblings # Compatibility with pre-3.x + + def findParent(self, name=None, attrs={}, **kwargs): + """Returns the closest parent of this Tag that matches the given + criteria.""" + # NOTE: We can't use _findOne because findParents takes a different + # set of arguments. + r = None + l = self.findParents(name, attrs, 1) + if l: + r = l[0] + return r + + def findParents(self, name=None, attrs={}, limit=None, **kwargs): + """Returns the parents of this Tag that match the given + criteria.""" + + return self._findAll(name, attrs, None, limit, self.parentGenerator, + **kwargs) + fetchParents = findParents # Compatibility with pre-3.x + + #These methods do the real heavy lifting. + + def _findOne(self, method, name, attrs, text, **kwargs): + r = None + l = method(name, attrs, text, 1, **kwargs) + if l: + r = l[0] + return r + + def _findAll(self, name, attrs, text, limit, generator, **kwargs): + "Iterates over a generator looking for things that match." + + if isinstance(name, SoupStrainer): + strainer = name + else: + # Build a SoupStrainer + strainer = SoupStrainer(name, attrs, text, **kwargs) + results = ResultSet(strainer) + g = generator() + while True: + try: + i = g.next() + except StopIteration: + break + if i: + found = strainer.search(i) + if found: + results.append(found) + if limit and len(results) >= limit: + break + return results + + #These Generators can be used to navigate starting from both + #NavigableStrings and Tags. + def nextGenerator(self): + i = self + while i: + i = i.next + yield i + + def nextSiblingGenerator(self): + i = self + while i: + i = i.nextSibling + yield i + + def previousGenerator(self): + i = self + while i: + i = i.previous + yield i + + def previousSiblingGenerator(self): + i = self + while i: + i = i.previousSibling + yield i + + def parentGenerator(self): + i = self + while i: + i = i.parent + yield i + + # Utility methods + def substituteEncoding(self, str, encoding=None): + encoding = encoding or "utf-8" + return str.replace("%SOUP-ENCODING%", encoding) + + def toEncoding(self, s, encoding=None): + """Encodes an object to a string in some encoding, or to Unicode. + .""" + if isinstance(s, unicode): + if encoding: + s = s.encode(encoding) + elif isinstance(s, str): + if encoding: + s = s.encode(encoding) + else: + s = unicode(s) + else: + if encoding: + s = self.toEncoding(str(s), encoding) + else: + s = unicode(s) + return s + +class NavigableString(unicode, PageElement): + + def __new__(cls, value): + """Create a new NavigableString. + + When unpickling a NavigableString, this method is called with + the string in DEFAULT_OUTPUT_ENCODING. That encoding needs to be + passed in to the superclass's __new__ or the superclass won't know + how to handle non-ASCII characters. + """ + if isinstance(value, unicode): + return unicode.__new__(cls, value) + return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING) + + def __getnewargs__(self): + return (unicode(self),) + + def __getattr__(self, attr): + """text.string gives you text. This is for backwards + compatibility for Navigable*String, but for CData* it lets you + get the string without the CData wrapper.""" + if attr == 'string': + return self + else: + raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr) + + def encode(self, encoding=DEFAULT_OUTPUT_ENCODING): + return self.decode().encode(encoding) + + def decodeGivenEventualEncoding(self, eventualEncoding): + return self + +class CData(NavigableString): + + def decodeGivenEventualEncoding(self, eventualEncoding): + return u'<![CDATA[' + self + u']]>' + +class ProcessingInstruction(NavigableString): + + def decodeGivenEventualEncoding(self, eventualEncoding): + output = self + if u'%SOUP-ENCODING%' in output: + output = self.substituteEncoding(output, eventualEncoding) + return u'<?' + output + u'?>' + +class Comment(NavigableString): + def decodeGivenEventualEncoding(self, eventualEncoding): + return u'<!--' + self + u'-->' + +class Declaration(NavigableString): + def decodeGivenEventualEncoding(self, eventualEncoding): + return u'<!' + self + u'>' + +class Tag(PageElement): + + """Represents a found HTML tag with its attributes and contents.""" + + def _invert(h): + "Cheap function to invert a hash." + i = {} + for k,v in h.items(): + i[v] = k + return i + + XML_ENTITIES_TO_SPECIAL_CHARS = { "apos" : "'", + "quot" : '"', + "amp" : "&", + "lt" : "<", + "gt" : ">" } + + XML_SPECIAL_CHARS_TO_ENTITIES = _invert(XML_ENTITIES_TO_SPECIAL_CHARS) + + def _convertEntities(self, match): + """Used in a call to re.sub to replace HTML, XML, and numeric + entities with the appropriate Unicode characters. If HTML + entities are being converted, any unrecognized entities are + escaped.""" + x = match.group(1) + if self.convertHTMLEntities and x in name2codepoint: + return unichr(name2codepoint[x]) + elif x in self.XML_ENTITIES_TO_SPECIAL_CHARS: + if self.convertXMLEntities: + return self.XML_ENTITIES_TO_SPECIAL_CHARS[x] + else: + return u'&%s;' % x + elif len(x) > 0 and x[0] == '#': + # Handle numeric entities + if len(x) > 1 and x[1] == 'x': + return unichr(int(x[2:], 16)) + else: + return unichr(int(x[1:])) + + elif self.escapeUnrecognizedEntities: + return u'&%s;' % x + else: + return u'&%s;' % x + + def __init__(self, parser, name, attrs=None, parent=None, + previous=None): + "Basic constructor." + + # We don't actually store the parser object: that lets extracted + # chunks be garbage-collected + self.parserClass = parser.__class__ + self.isSelfClosing = parser.isSelfClosingTag(name) + self.name = name + if attrs == None: + attrs = [] + self.attrs = attrs + self.contents = [] + self.setup(parent, previous) + self.hidden = False + self.containsSubstitutions = False + self.convertHTMLEntities = parser.convertHTMLEntities + self.convertXMLEntities = parser.convertXMLEntities + self.escapeUnrecognizedEntities = parser.escapeUnrecognizedEntities + + def convert(kval): + "Converts HTML, XML and numeric entities in the attribute value." + k, val = kval + if val is None: + return kval + return (k, re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);", + self._convertEntities, val)) + self.attrs = map(convert, self.attrs) + + def get(self, key, default=None): + """Returns the value of the 'key' attribute for the tag, or + the value given for 'default' if it doesn't have that + attribute.""" + return self._getAttrMap().get(key, default) + + def has_key(self, key): + return self._getAttrMap().has_key(key) + + def __getitem__(self, key): + """tag[key] returns the value of the 'key' attribute for the tag, + and throws an exception if it's not there.""" + return self._getAttrMap()[key] + + def __iter__(self): + "Iterating over a tag iterates over its contents." + return iter(self.contents) + + def __len__(self): + "The length of a tag is the length of its list of contents." + return len(self.contents) + + def __contains__(self, x): + return x in self.contents + + def __nonzero__(self): + "A tag is non-None even if it has no contents." + return True + + def __setitem__(self, key, value): + """Setting tag[key] sets the value of the 'key' attribute for the + tag.""" + self._getAttrMap() + self.attrMap[key] = value + found = False + for i in range(0, len(self.attrs)): + if self.attrs[i][0] == key: + self.attrs[i] = (key, value) + found = True + if not found: + self.attrs.append((key, value)) + self._getAttrMap()[key] = value + + def __delitem__(self, key): + "Deleting tag[key] deletes all 'key' attributes for the tag." + for item in self.attrs: + if item[0] == key: + self.attrs.remove(item) + #We don't break because bad HTML can define the same + #attribute multiple times. + self._getAttrMap() + if self.attrMap.has_key(key): + del self.attrMap[key] + + def __call__(self, *args, **kwargs): + """Calling a tag like a function is the same as calling its + findAll() method. Eg. tag('a') returns a list of all the A tags + found within this tag.""" + return apply(self.findAll, args, kwargs) + + def __getattr__(self, tag): + #print "Getattr %s.%s" % (self.__class__, tag) + if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3: + return self.find(tag[:-3]) + elif tag.find('__') != 0: + return self.find(tag) + raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__, tag) + + def __eq__(self, other): + """Returns true iff this tag has the same name, the same attributes, + and the same contents (recursively) as the given tag. + + NOTE: right now this will return false if two tags have the + same attributes in a different order. Should this be fixed?""" + if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other): + return False + for i in range(0, len(self.contents)): + if self.contents[i] != other.contents[i]: + return False + return True + + def __ne__(self, other): + """Returns true iff this tag is not identical to the other tag, + as defined in __eq__.""" + return not self == other + + def __repr__(self, encoding=DEFAULT_OUTPUT_ENCODING): + """Renders this tag as a string.""" + return self.decode(eventualEncoding=encoding) + + BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|" + + "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)" + + ")") + + def _sub_entity(self, x): + """Used with a regular expression to substitute the + appropriate XML entity for an XML special character.""" + return "&" + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + ";" + + def __unicode__(self): + return self.decode() + + def __str__(self): + return self.encode() + + def encode(self, encoding=DEFAULT_OUTPUT_ENCODING, + prettyPrint=False, indentLevel=0): + return self.decode(prettyPrint, indentLevel, encoding).encode(encoding) + + def decode(self, prettyPrint=False, indentLevel=0, + eventualEncoding=DEFAULT_OUTPUT_ENCODING): + """Returns a string or Unicode representation of this tag and + its contents. To get Unicode, pass None for encoding.""" + + attrs = [] + if self.attrs: + for key, val in self.attrs: + fmt = '%s="%s"' + if isString(val): + if (self.containsSubstitutions + and eventualEncoding is not None + and '%SOUP-ENCODING%' in val): + val = self.substituteEncoding(val, eventualEncoding) + + # The attribute value either: + # + # * Contains no embedded double quotes or single quotes. + # No problem: we enclose it in double quotes. + # * Contains embedded single quotes. No problem: + # double quotes work here too. + # * Contains embedded double quotes. No problem: + # we enclose it in single quotes. + # * Embeds both single _and_ double quotes. This + # can't happen naturally, but it can happen if + # you modify an attribute value after parsing + # the document. Now we have a bit of a + # problem. We solve it by enclosing the + # attribute in single quotes, and escaping any + # embedded single quotes to XML entities. + if '"' in val: + fmt = "%s='%s'" + if "'" in val: + # TODO: replace with apos when + # appropriate. + val = val.replace("'", "&squot;") + + # Now we're okay w/r/t quotes. But the attribute + # value might also contain angle brackets, or + # ampersands that aren't part of entities. We need + # to escape those to XML entities too. + val = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity, val) + if val is None: + # Handle boolean attributes. + decoded = key + else: + decoded = fmt % (key, val) + attrs.append(decoded) + close = '' + closeTag = '' + if self.isSelfClosing: + close = ' /' + else: + closeTag = '</%s>' % self.name + + indentTag, indentContents = 0, 0 + if prettyPrint: + indentTag = indentLevel + space = (' ' * (indentTag-1)) + indentContents = indentTag + 1 + contents = self.decodeContents(prettyPrint, indentContents, + eventualEncoding) + if self.hidden: + s = contents + else: + s = [] + attributeString = '' + if attrs: + attributeString = ' ' + ' '.join(attrs) + if prettyPrint: + s.append(space) + s.append('<%s%s%s>' % (self.name, attributeString, close)) + if prettyPrint: + s.append("\n") + s.append(contents) + if prettyPrint and contents and contents[-1] != "\n": + s.append("\n") + if prettyPrint and closeTag: + s.append(space) + s.append(closeTag) + if prettyPrint and closeTag and self.nextSibling: + s.append("\n") + s = ''.join(s) + return s + + def decompose(self): + """Recursively destroys the contents of this tree.""" + contents = [i for i in self.contents] + for i in contents: + if isinstance(i, Tag): + i.decompose() + else: + i.extract() + self.extract() + + def prettify(self, encoding=DEFAULT_OUTPUT_ENCODING): + return self.encode(encoding, True) + + def encodeContents(self, encoding=DEFAULT_OUTPUT_ENCODING, + prettyPrint=False, indentLevel=0): + return self.decodeContents(prettyPrint, indentLevel).encode(encoding) + + def decodeContents(self, prettyPrint=False, indentLevel=0, + eventualEncoding=DEFAULT_OUTPUT_ENCODING): + """Renders the contents of this tag as a string in the given + encoding. If encoding is None, returns a Unicode string..""" + s=[] + for c in self: + text = None + if isinstance(c, NavigableString): + text = c.decodeGivenEventualEncoding(eventualEncoding) + elif isinstance(c, Tag): + s.append(c.decode(prettyPrint, indentLevel, eventualEncoding)) + if text and prettyPrint: + text = text.strip() + if text: + if prettyPrint: + s.append(" " * (indentLevel-1)) + s.append(text) + if prettyPrint: + s.append("\n") + return ''.join(s) + + #Soup methods + + def find(self, name=None, attrs={}, recursive=True, text=None, + **kwargs): + """Return only the first child of this Tag matching the given + criteria.""" + r = None + l = self.findAll(name, attrs, recursive, text, 1, **kwargs) + if l: + r = l[0] + return r + findChild = find + + def findAll(self, name=None, attrs={}, recursive=True, text=None, + limit=None, **kwargs): + """Extracts a list of Tag objects that match the given + criteria. You can specify the name of the Tag and any + attributes you want the Tag to have. + + The value of a key-value pair in the 'attrs' map can be a + string, a list of strings, a regular expression object, or a + callable that takes a string and returns whether or not the + string matches for some custom definition of 'matches'. The + same is true of the tag name.""" + generator = self.recursiveChildGenerator + if not recursive: + generator = self.childGenerator + return self._findAll(name, attrs, text, limit, generator, **kwargs) + findChildren = findAll + + # Pre-3.x compatibility methods. Will go away in 4.0. + first = find + fetch = findAll + + def fetchText(self, text=None, recursive=True, limit=None): + return self.findAll(text=text, recursive=recursive, limit=limit) + + def firstText(self, text=None, recursive=True): + return self.find(text=text, recursive=recursive) + + # 3.x compatibility methods. Will go away in 4.0. + def renderContents(self, encoding=DEFAULT_OUTPUT_ENCODING, + prettyPrint=False, indentLevel=0): + if encoding is None: + return self.decodeContents(prettyPrint, indentLevel, encoding) + else: + return self.encodeContents(encoding, prettyPrint, indentLevel) + + + #Private methods + + def _getAttrMap(self): + """Initializes a map representation of this tag's attributes, + if not already initialized.""" + if not getattr(self, 'attrMap'): + self.attrMap = {} + for (key, value) in self.attrs: + self.attrMap[key] = value + return self.attrMap + + #Generator methods + def recursiveChildGenerator(self): + if not len(self.contents): + raise StopIteration + stopNode = self._lastRecursiveChild().next + current = self.contents[0] + while current is not stopNode: + yield current + current = current.next + + def childGenerator(self): + if not len(self.contents): + raise StopIteration + current = self.contents[0] + while current: + yield current + current = current.nextSibling + raise StopIteration + +# Next, a couple classes to represent queries and their results. +class SoupStrainer: + """Encapsulates a number of ways of matching a markup element (tag or + text).""" + + def __init__(self, name=None, attrs={}, text=None, **kwargs): + self.name = name + if isString(attrs): + kwargs['class'] = attrs + attrs = None + if kwargs: + if attrs: + attrs = attrs.copy() + attrs.update(kwargs) + else: + attrs = kwargs + self.attrs = attrs + self.text = text + + def __str__(self): + if self.text: + return self.text + else: + return "%s|%s" % (self.name, self.attrs) + + def searchTag(self, markupName=None, markupAttrs={}): + found = None + markup = None + if isinstance(markupName, Tag): + markup = markupName + markupAttrs = markup + callFunctionWithTagData = callable(self.name) \ + and not isinstance(markupName, Tag) + + if (not self.name) \ + or callFunctionWithTagData \ + or (markup and self._matches(markup, self.name)) \ + or (not markup and self._matches(markupName, self.name)): + if callFunctionWithTagData: + match = self.name(markupName, markupAttrs) + else: + match = True + markupAttrMap = None + for attr, matchAgainst in self.attrs.items(): + if not markupAttrMap: + if hasattr(markupAttrs, 'get'): + markupAttrMap = markupAttrs + else: + markupAttrMap = {} + for k,v in markupAttrs: + markupAttrMap[k] = v + attrValue = markupAttrMap.get(attr) + if not self._matches(attrValue, matchAgainst): + match = False + break + if match: + if markup: + found = markup + else: + found = markupName + return found + + def search(self, markup): + #print 'looking for %s in %s' % (self, markup) + found = None + # If given a list of items, scan it for a text element that + # matches. + if isList(markup) and not isinstance(markup, Tag): + for element in markup: + if isinstance(element, NavigableString) \ + and self.search(element): + found = element + break + # If it's a Tag, make sure its name or attributes match. + # Don't bother with Tags if we're searching for text. + elif isinstance(markup, Tag): + if not self.text: + found = self.searchTag(markup) + # If it's text, make sure the text matches. + elif isinstance(markup, NavigableString) or \ + isString(markup): + if self._matches(markup, self.text): + found = markup + else: + raise Exception, "I don't know how to match against a %s" \ + % markup.__class__ + return found + + def _matches(self, markup, matchAgainst): + #print "Matching %s against %s" % (markup, matchAgainst) + result = False + if matchAgainst == True and type(matchAgainst) == types.BooleanType: + result = markup != None + elif callable(matchAgainst): + result = matchAgainst(markup) + else: + #Custom match methods take the tag as an argument, but all + #other ways of matching match the tag name as a string. + if isinstance(markup, Tag): + markup = markup.name + if markup is not None and not isString(markup): + markup = unicode(markup) + #Now we know that chunk is either a string, or None. + if hasattr(matchAgainst, 'match'): + # It's a regexp object. + result = markup and matchAgainst.search(markup) + elif (isList(matchAgainst) + and (markup is not None or not isString(matchAgainst))): + result = markup in matchAgainst + elif hasattr(matchAgainst, 'items'): + result = markup.has_key(matchAgainst) + elif matchAgainst and isString(markup): + if isinstance(markup, unicode): + matchAgainst = unicode(matchAgainst) + else: + matchAgainst = str(matchAgainst) + + if not result: + result = matchAgainst == markup + return result + +class ResultSet(list): + """A ResultSet is just a list that keeps track of the SoupStrainer + that created it.""" + def __init__(self, source): + list.__init__([]) + self.source = source + +# Now, some helper functions. + +def isList(l): + """Convenience method that works with all 2.x versions of Python + to determine whether or not something is listlike.""" + return ((hasattr(l, '__iter__') and not isString(l)) + or (type(l) in (types.ListType, types.TupleType))) + +def isString(s): + """Convenience method that works with all 2.x versions of Python + to determine whether or not something is stringlike.""" + try: + return isinstance(s, unicode) or isinstance(s, basestring) + except NameError: + return isinstance(s, str) + +def buildTagMap(default, *args): + """Turns a list of maps, lists, or scalars into a single map. + Used to build the SELF_CLOSING_TAGS, NESTABLE_TAGS, and + NESTING_RESET_TAGS maps out of lists and partial maps.""" + built = {} + for portion in args: + if hasattr(portion, 'items'): + #It's a map. Merge it. + for k,v in portion.items(): + built[k] = v + elif isList(portion) and not isString(portion): + #It's a list. Map each item to the default. + for k in portion: + built[k] = default + else: + #It's a scalar. Map it to the default. + built[portion] = default + return built + +# Now, the parser classes. + +class HTMLParserBuilder(HTMLParser): + + def __init__(self, soup): + HTMLParser.__init__(self) + self.soup = soup + + # We inherit feed() and reset(). + + def handle_starttag(self, name, attrs): + if name == 'meta': + self.soup.extractCharsetFromMeta(attrs) + else: + self.soup.unknown_starttag(name, attrs) + + def handle_endtag(self, name): + self.soup.unknown_endtag(name) + + def handle_data(self, content): + self.soup.handle_data(content) + + def _toStringSubclass(self, text, subclass): + """Adds a certain piece of text to the tree as a NavigableString + subclass.""" + self.soup.endData() + self.handle_data(text) + self.soup.endData(subclass) + + def handle_pi(self, text): + """Handle a processing instruction as a ProcessingInstruction + object, possibly one with a %SOUP-ENCODING% slot into which an + encoding will be plugged later.""" + if text[:3] == "xml": + text = u"xml version='1.0' encoding='%SOUP-ENCODING%'" + self._toStringSubclass(text, ProcessingInstruction) + + def handle_comment(self, text): + "Handle comments as Comment objects." + self._toStringSubclass(text, Comment) + + def handle_charref(self, ref): + "Handle character references as data." + if self.soup.convertEntities: + data = unichr(int(ref)) + else: + data = '&#%s;' % ref + self.handle_data(data) + + def handle_entityref(self, ref): + """Handle entity references as data, possibly converting known + HTML and/or XML entity references to the corresponding Unicode + characters.""" + data = None + if self.soup.convertHTMLEntities: + try: + data = unichr(name2codepoint[ref]) + except KeyError: + pass + + if not data and self.soup.convertXMLEntities: + data = self.soup.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref) + + if not data and self.soup.convertHTMLEntities and \ + not self.soup.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref): + # TODO: We've got a problem here. We're told this is + # an entity reference, but it's not an XML entity + # reference or an HTML entity reference. Nonetheless, + # the logical thing to do is to pass it through as an + # unrecognized entity reference. + # + # Except: when the input is "&carol;" this function + # will be called with input "carol". When the input is + # "AT&T", this function will be called with input + # "T". We have no way of knowing whether a semicolon + # was present originally, so we don't know whether + # this is an unknown entity or just a misplaced + # ampersand. + # + # The more common case is a misplaced ampersand, so I + # escape the ampersand and omit the trailing semicolon. + data = "&%s" % ref + if not data: + # This case is different from the one above, because we + # haven't already gone through a supposedly comprehensive + # mapping of entities to Unicode characters. We might not + # have gone through any mapping at all. So the chances are + # very high that this is a real entity, and not a + # misplaced ampersand. + data = "&%s;" % ref + self.handle_data(data) + + def handle_decl(self, data): + "Handle DOCTYPEs and the like as Declaration objects." + self._toStringSubclass(data, Declaration) + + def parse_declaration(self, i): + """Treat a bogus SGML declaration as raw data. Treat a CDATA + declaration as a CData object.""" + j = None + if self.rawdata[i:i+9] == '<![CDATA[': + k = self.rawdata.find(']]>', i) + if k == -1: + k = len(self.rawdata) + data = self.rawdata[i+9:k] + j = k+3 + self._toStringSubclass(data, CData) + else: + try: + j = HTMLParser.parse_declaration(self, i) + except HTMLParseError: + toHandle = self.rawdata[i:] + self.handle_data(toHandle) + j = i + len(toHandle) + return j + + +class BeautifulStoneSoup(Tag): + + """This class contains the basic parser and search code. It defines + a parser that knows nothing about tag behavior except for the + following: + + You can't close a tag without closing all the tags it encloses. + That is, "<foo><bar></foo>" actually means + "<foo><bar></bar></foo>". + + [Another possible explanation is "<foo><bar /></foo>", but since + this class defines no SELF_CLOSING_TAGS, it will never use that + explanation.] + + This class is useful for parsing XML or made-up markup languages, + or when BeautifulSoup makes an assumption counter to what you were + expecting.""" + + SELF_CLOSING_TAGS = {} + NESTABLE_TAGS = {} + RESET_NESTING_TAGS = {} + QUOTE_TAGS = {} + PRESERVE_WHITESPACE_TAGS = [] + + MARKUP_MASSAGE = [(re.compile('(<[^<>]*)/>'), + lambda x: x.group(1) + ' />'), + (re.compile('<!\s+([^<>]*)>'), + lambda x: '<!' + x.group(1) + '>') + ] + + ROOT_TAG_NAME = u'[document]' + + HTML_ENTITIES = "html" + XML_ENTITIES = "xml" + XHTML_ENTITIES = "xhtml" + # TODO: This only exists for backwards-compatibility + ALL_ENTITIES = XHTML_ENTITIES + + # Used when determining whether a text node is all whitespace and + # can be replaced with a single space. A text node that contains + # fancy Unicode spaces (usually non-breaking) should be left + # alone. + STRIP_ASCII_SPACES = { 9: None, 10: None, 12: None, 13: None, 32: None, } + + def __init__(self, markup="", parseOnlyThese=None, fromEncoding=None, + markupMassage=True, smartQuotesTo=XML_ENTITIES, + convertEntities=None, selfClosingTags=None, isHTML=False, + builder=HTMLParserBuilder): + """The Soup object is initialized as the 'root tag', and the + provided markup (which can be a string or a file-like object) + is fed into the underlying parser. + + HTMLParser will process most bad HTML, and the BeautifulSoup + class has some tricks for dealing with some HTML that kills + HTMLParser, but Beautiful Soup can nonetheless choke or lose data + if your data uses self-closing tags or declarations + incorrectly. + + By default, Beautiful Soup uses regexes to sanitize input, + avoiding the vast majority of these problems. If the problems + don't apply to you, pass in False for markupMassage, and + you'll get better performance. + + The default parser massage techniques fix the two most common + instances of invalid HTML that choke HTMLParser: + + <br/> (No space between name of closing tag and tag close) + <! --Comment--> (Extraneous whitespace in declaration) + + You can pass in a custom list of (RE object, replace method) + tuples to get Beautiful Soup to scrub your input the way you + want.""" + + self.parseOnlyThese = parseOnlyThese + self.fromEncoding = fromEncoding + self.smartQuotesTo = smartQuotesTo + self.convertEntities = convertEntities + # Set the rules for how we'll deal with the entities we + # encounter + if self.convertEntities: + # It doesn't make sense to convert encoded characters to + # entities even while you're converting entities to Unicode. + # Just convert it all to Unicode. + self.smartQuotesTo = None + if convertEntities == self.HTML_ENTITIES: + self.convertXMLEntities = False + self.convertHTMLEntities = True + self.escapeUnrecognizedEntities = True + elif convertEntities == self.XHTML_ENTITIES: + self.convertXMLEntities = True + self.convertHTMLEntities = True + self.escapeUnrecognizedEntities = False + elif convertEntities == self.XML_ENTITIES: + self.convertXMLEntities = True + self.convertHTMLEntities = False + self.escapeUnrecognizedEntities = False + else: + self.convertXMLEntities = False + self.convertHTMLEntities = False + self.escapeUnrecognizedEntities = False + + self.instanceSelfClosingTags = buildTagMap(None, selfClosingTags) + self.builder = builder(self) + self.reset() + + if hasattr(markup, 'read'): # It's a file-type object. + markup = markup.read() + self.markup = markup + self.markupMassage = markupMassage + try: + self._feed(isHTML=isHTML) + except StopParsing: + pass + self.markup = None # The markup can now be GCed. + self.builder = None # So can the builder. + + def _feed(self, inDocumentEncoding=None, isHTML=False): + # Convert the document to Unicode. + markup = self.markup + if isinstance(markup, unicode): + if not hasattr(self, 'originalEncoding'): + self.originalEncoding = None + else: + dammit = UnicodeDammit\ + (markup, [self.fromEncoding, inDocumentEncoding], + smartQuotesTo=self.smartQuotesTo, isHTML=isHTML) + markup = dammit.unicode + self.originalEncoding = dammit.originalEncoding + self.declaredHTMLEncoding = dammit.declaredHTMLEncoding + if markup: + if self.markupMassage: + if not isList(self.markupMassage): + self.markupMassage = self.MARKUP_MASSAGE + for fix, m in self.markupMassage: + markup = fix.sub(m, markup) + # TODO: We get rid of markupMassage so that the + # soup object can be deepcopied later on. Some + # Python installations can't copy regexes. If anyone + # was relying on the existence of markupMassage, this + # might cause problems. + del(self.markupMassage) + self.builder.reset() + + self.builder.feed(markup) + # Close out any unfinished strings and close all the open tags. + self.endData() + while self.currentTag.name != self.ROOT_TAG_NAME: + self.popTag() + + def isSelfClosingTag(self, name): + """Returns true iff the given string is the name of a + self-closing tag according to this parser.""" + return self.SELF_CLOSING_TAGS.has_key(name) \ + or self.instanceSelfClosingTags.has_key(name) + + def reset(self): + Tag.__init__(self, self, self.ROOT_TAG_NAME) + self.hidden = 1 + self.builder.reset() + self.currentData = [] + self.currentTag = None + self.tagStack = [] + self.quoteStack = [] + self.pushTag(self) + + def popTag(self): + tag = self.tagStack.pop() + # Tags with just one string-owning child get the child as a + # 'string' property, so that soup.tag.string is shorthand for + # soup.tag.contents[0] + if len(self.currentTag.contents) == 1 and \ + isinstance(self.currentTag.contents[0], NavigableString): + self.currentTag.string = self.currentTag.contents[0] + + #print "Pop", tag.name + if self.tagStack: + self.currentTag = self.tagStack[-1] + return self.currentTag + + def pushTag(self, tag): + #print "Push", tag.name + if self.currentTag: + self.currentTag.contents.append(tag) + self.tagStack.append(tag) + self.currentTag = self.tagStack[-1] + + def endData(self, containerClass=NavigableString): + if self.currentData: + currentData = u''.join(self.currentData) + if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and + not set([tag.name for tag in self.tagStack]).intersection( + self.PRESERVE_WHITESPACE_TAGS)): + if '\n' in currentData: + currentData = '\n' + else: + currentData = ' ' + self.currentData = [] + if self.parseOnlyThese and len(self.tagStack) <= 1 and \ + (not self.parseOnlyThese.text or \ + not self.parseOnlyThese.search(currentData)): + return + o = containerClass(currentData) + o.setup(self.currentTag, self.previous) + if self.previous: + self.previous.next = o + self.previous = o + self.currentTag.contents.append(o) + + + def _popToTag(self, name, inclusivePop=True): + """Pops the tag stack up to and including the most recent + instance of the given tag. If inclusivePop is false, pops the tag + stack up to but *not* including the most recent instqance of + the given tag.""" + #print "Popping to %s" % name + if name == self.ROOT_TAG_NAME: + return + + numPops = 0 + mostRecentTag = None + for i in range(len(self.tagStack)-1, 0, -1): + if name == self.tagStack[i].name: + numPops = len(self.tagStack)-i + break + if not inclusivePop: + numPops = numPops - 1 + + for i in range(0, numPops): + mostRecentTag = self.popTag() + return mostRecentTag + + def _smartPop(self, name): + + """We need to pop up to the previous tag of this type, unless + one of this tag's nesting reset triggers comes between this + tag and the previous tag of this type, OR unless this tag is a + generic nesting trigger and another generic nesting trigger + comes between this tag and the previous tag of this type. + + Examples: + <p>Foo<b>Bar *<p>* should pop to 'p', not 'b'. + <p>Foo<table>Bar *<p>* should pop to 'table', not 'p'. + <p>Foo<table><tr>Bar *<p>* should pop to 'tr', not 'p'. + + <li><ul><li> *<li>* should pop to 'ul', not the first 'li'. + <tr><table><tr> *<tr>* should pop to 'table', not the first 'tr' + <td><tr><td> *<td>* should pop to 'tr', not the first 'td' + """ + + nestingResetTriggers = self.NESTABLE_TAGS.get(name) + isNestable = nestingResetTriggers != None + isResetNesting = self.RESET_NESTING_TAGS.has_key(name) + popTo = None + inclusive = True + for i in range(len(self.tagStack)-1, 0, -1): + p = self.tagStack[i] + if (not p or p.name == name) and not isNestable: + #Non-nestable tags get popped to the top or to their + #last occurance. + popTo = name + break + if (nestingResetTriggers != None + and p.name in nestingResetTriggers) \ + or (nestingResetTriggers == None and isResetNesting + and self.RESET_NESTING_TAGS.has_key(p.name)): + + #If we encounter one of the nesting reset triggers + #peculiar to this tag, or we encounter another tag + #that causes nesting to reset, pop up to but not + #including that tag. + popTo = p.name + inclusive = False + break + p = p.parent + if popTo: + self._popToTag(popTo, inclusive) + + def unknown_starttag(self, name, attrs, selfClosing=0): + #print "Start tag %s: %s" % (name, attrs) + if self.quoteStack: + #This is not a real tag. + #print "<%s> is not real!" % name + attrs = ''.join(map(lambda(x, y): ' %s="%s"' % (x, y), attrs)) + self.handle_data('<%s%s>' % (name, attrs)) + return + self.endData() + + if not self.isSelfClosingTag(name) and not selfClosing: + self._smartPop(name) + + if self.parseOnlyThese and len(self.tagStack) <= 1 \ + and (self.parseOnlyThese.text or not self.parseOnlyThese.searchTag(name, attrs)): + return + + tag = Tag(self, name, attrs, self.currentTag, self.previous) + if self.previous: + self.previous.next = tag + self.previous = tag + self.pushTag(tag) + if selfClosing or self.isSelfClosingTag(name): + self.popTag() + if name in self.QUOTE_TAGS: + #print "Beginning quote (%s)" % name + self.quoteStack.append(name) + self.literal = 1 + return tag + + def unknown_endtag(self, name): + #print "End tag %s" % name + if self.quoteStack and self.quoteStack[-1] != name: + #This is not a real end tag. + #print "</%s> is not real!" % name + self.handle_data('</%s>' % name) + return + self.endData() + self._popToTag(name) + if self.quoteStack and self.quoteStack[-1] == name: + self.quoteStack.pop() + self.literal = (len(self.quoteStack) > 0) + + def handle_data(self, data): + self.currentData.append(data) + + def extractCharsetFromMeta(self, attrs): + self.unknown_starttag('meta', attrs) + + +class BeautifulSoup(BeautifulStoneSoup): + + """This parser knows the following facts about HTML: + + * Some tags have no closing tag and should be interpreted as being + closed as soon as they are encountered. + + * The text inside some tags (ie. 'script') may contain tags which + are not really part of the document and which should be parsed + as text, not tags. If you want to parse the text as tags, you can + always fetch it and parse it explicitly. + + * Tag nesting rules: + + Most tags can't be nested at all. For instance, the occurance of + a <p> tag should implicitly close the previous <p> tag. + + <p>Para1<p>Para2 + should be transformed into: + <p>Para1</p><p>Para2 + + Some tags can be nested arbitrarily. For instance, the occurance + of a <blockquote> tag should _not_ implicitly close the previous + <blockquote> tag. + + Alice said: <blockquote>Bob said: <blockquote>Blah + should NOT be transformed into: + Alice said: <blockquote>Bob said: </blockquote><blockquote>Blah + + Some tags can be nested, but the nesting is reset by the + interposition of other tags. For instance, a <tr> tag should + implicitly close the previous <tr> tag within the same <table>, + but not close a <tr> tag in another table. + + <table><tr>Blah<tr>Blah + should be transformed into: + <table><tr>Blah</tr><tr>Blah + but, + <tr>Blah<table><tr>Blah + should NOT be transformed into + <tr>Blah<table></tr><tr>Blah + + Differing assumptions about tag nesting rules are a major source + of problems with the BeautifulSoup class. If BeautifulSoup is not + treating as nestable a tag your page author treats as nestable, + try ICantBelieveItsBeautifulSoup, MinimalSoup, or + BeautifulStoneSoup before writing your own subclass.""" + + def __init__(self, *args, **kwargs): + if not kwargs.has_key('smartQuotesTo'): + kwargs['smartQuotesTo'] = self.HTML_ENTITIES + kwargs['isHTML'] = True + BeautifulStoneSoup.__init__(self, *args, **kwargs) + + SELF_CLOSING_TAGS = buildTagMap(None, + ['br' , 'hr', 'input', 'img', 'meta', + 'spacer', 'link', 'frame', 'base']) + + PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea']) + + QUOTE_TAGS = {'script' : None, 'textarea' : None} + + #According to the HTML standard, each of these inline tags can + #contain another tag of the same type. Furthermore, it's common + #to actually use these tags this way. + NESTABLE_INLINE_TAGS = ['span', 'font', 'q', 'object', 'bdo', 'sub', 'sup', + 'center'] + + #According to the HTML standard, these block tags can contain + #another tag of the same type. Furthermore, it's common + #to actually use these tags this way. + NESTABLE_BLOCK_TAGS = ['blockquote', 'div', 'fieldset', 'ins', 'del'] + + #Lists can contain other lists, but there are restrictions. + NESTABLE_LIST_TAGS = { 'ol' : [], + 'ul' : [], + 'li' : ['ul', 'ol'], + 'dl' : [], + 'dd' : ['dl'], + 'dt' : ['dl'] } + + #Tables can contain other tables, but there are restrictions. + NESTABLE_TABLE_TAGS = {'table' : [], + 'tr' : ['table', 'tbody', 'tfoot', 'thead'], + 'td' : ['tr'], + 'th' : ['tr'], + 'thead' : ['table'], + 'tbody' : ['table'], + 'tfoot' : ['table'], + } + + NON_NESTABLE_BLOCK_TAGS = ['address', 'form', 'p', 'pre'] + + #If one of these tags is encountered, all tags up to the next tag of + #this type are popped. + RESET_NESTING_TAGS = buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript', + NON_NESTABLE_BLOCK_TAGS, + NESTABLE_LIST_TAGS, + NESTABLE_TABLE_TAGS) + + NESTABLE_TAGS = buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS, + NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS) + + # Used to detect the charset in a META tag; see start_meta + CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M) + + def extractCharsetFromMeta(self, attrs): + """Beautiful Soup can detect a charset included in a META tag, + try to convert the document to that charset, and re-parse the + document from the beginning.""" + httpEquiv = None + contentType = None + contentTypeIndex = None + tagNeedsEncodingSubstitution = False + + for i in range(0, len(attrs)): + key, value = attrs[i] + key = key.lower() + if key == 'http-equiv': + httpEquiv = value + elif key == 'content': + contentType = value + contentTypeIndex = i + + if httpEquiv and contentType: # It's an interesting meta tag. + match = self.CHARSET_RE.search(contentType) + if match: + if (self.declaredHTMLEncoding is not None or + self.originalEncoding == self.fromEncoding): + # An HTML encoding was sniffed while converting + # the document to Unicode, or an HTML encoding was + # sniffed during a previous pass through the + # document, or an encoding was specified + # explicitly and it worked. Rewrite the meta tag. + def rewrite(match): + return match.group(1) + "%SOUP-ENCODING%" + newAttr = self.CHARSET_RE.sub(rewrite, contentType) + attrs[contentTypeIndex] = (attrs[contentTypeIndex][0], + newAttr) + tagNeedsEncodingSubstitution = True + else: + # This is our first pass through the document. + # Go through it again with the encoding information. + newCharset = match.group(3) + if newCharset and newCharset != self.originalEncoding: + self.declaredHTMLEncoding = newCharset + self._feed(self.declaredHTMLEncoding) + raise StopParsing + pass + tag = self.unknown_starttag("meta", attrs) + if tag and tagNeedsEncodingSubstitution: + tag.containsSubstitutions = True + + +class StopParsing(Exception): + pass + +class ICantBelieveItsBeautifulSoup(BeautifulSoup): + + """The BeautifulSoup class is oriented towards skipping over + common HTML errors like unclosed tags. However, sometimes it makes + errors of its own. For instance, consider this fragment: + + <b>Foo<b>Bar</b></b> + + This is perfectly valid (if bizarre) HTML. However, the + BeautifulSoup class will implicitly close the first b tag when it + encounters the second 'b'. It will think the author wrote + "<b>Foo<b>Bar", and didn't close the first 'b' tag, because + there's no real-world reason to bold something that's already + bold. When it encounters '</b></b>' it will close two more 'b' + tags, for a grand total of three tags closed instead of two. This + can throw off the rest of your document structure. The same is + true of a number of other tags, listed below. + + It's much more common for someone to forget to close a 'b' tag + than to actually use nested 'b' tags, and the BeautifulSoup class + handles the common case. This class handles the not-co-common + case: where you can't believe someone wrote what they did, but + it's valid HTML and BeautifulSoup screwed up by assuming it + wouldn't be.""" + + I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \ + ['em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong', + 'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b', + 'big'] + + I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ['noscript'] + + NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS, + I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS, + I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS) + +class MinimalSoup(BeautifulSoup): + """The MinimalSoup class is for parsing HTML that contains + pathologically bad markup. It makes no assumptions about tag + nesting, but it does know which tags are self-closing, that + <script> tags contain Javascript and should not be parsed, that + META tags may contain encoding information, and so on. + + This also makes it better for subclassing than BeautifulStoneSoup + or BeautifulSoup.""" + + RESET_NESTING_TAGS = buildTagMap('noscript') + NESTABLE_TAGS = {} + +class BeautifulSOAP(BeautifulStoneSoup): + """This class will push a tag with only a single string child into + the tag's parent as an attribute. The attribute's name is the tag + name, and the value is the string child. An example should give + the flavor of the change: + + <foo><bar>baz</bar></foo> + => + <foo bar="baz"><bar>baz</bar></foo> + + You can then access fooTag['bar'] instead of fooTag.barTag.string. + + This is, of course, useful for scraping structures that tend to + use subelements instead of attributes, such as SOAP messages. Note + that it modifies its input, so don't print the modified version + out. + + I'm not sure how many people really want to use this class; let me + know if you do. Mainly I like the name.""" + + def popTag(self): + if len(self.tagStack) > 1: + tag = self.tagStack[-1] + parent = self.tagStack[-2] + parent._getAttrMap() + if (isinstance(tag, Tag) and len(tag.contents) == 1 and + isinstance(tag.contents[0], NavigableString) and + not parent.attrMap.has_key(tag.name)): + parent[tag.name] = tag.contents[0] + BeautifulStoneSoup.popTag(self) + +#Enterprise class names! It has come to our attention that some people +#think the names of the Beautiful Soup parser classes are too silly +#and "unprofessional" for use in enterprise screen-scraping. We feel +#your pain! For such-minded folk, the Beautiful Soup Consortium And +#All-Night Kosher Bakery recommends renaming this file to +#"RobustParser.py" (or, in cases of extreme enterprisiness, +#"RobustParserBeanInterface.class") and using the following +#enterprise-friendly class aliases: +class RobustXMLParser(BeautifulStoneSoup): + pass +class RobustHTMLParser(BeautifulSoup): + pass +class RobustWackAssHTMLParser(ICantBelieveItsBeautifulSoup): + pass +class RobustInsanelyWackAssHTMLParser(MinimalSoup): + pass +class SimplifyingSOAPParser(BeautifulSOAP): + pass + +###################################################### +# +# Bonus library: Unicode, Dammit +# +# This class forces XML data into a standard format (usually to UTF-8 +# or Unicode). It is heavily based on code from Mark Pilgrim's +# Universal Feed Parser. It does not rewrite the XML or HTML to +# reflect a new encoding: that happens in BeautifulStoneSoup.handle_pi +# (XML) and BeautifulSoup.start_meta (HTML). + +# Autodetects character encodings. +# Download from http://chardet.feedparser.org/ +try: + import chardet +# import chardet.constants +# chardet.constants._debug = 1 +except ImportError: + chardet = None + +# cjkcodecs and iconv_codec make Python know about more character encodings. +# Both are available from http://cjkpython.i18n.org/ +# They're built in if you use Python 2.4. +try: + import cjkcodecs.aliases +except ImportError: + pass +try: + import iconv_codec +except ImportError: + pass + +class UnicodeDammit: + """A class for detecting the encoding of a *ML document and + converting it to a Unicode string. If the source encoding is + windows-1252, can replace MS smart quotes with their HTML or XML + equivalents.""" + + # This dictionary maps commonly seen values for "charset" in HTML + # meta tags to the corresponding Python codec names. It only covers + # values that aren't in Python's aliases and can't be determined + # by the heuristics in find_codec. + CHARSET_ALIASES = { "macintosh" : "mac-roman", + "x-sjis" : "shift-jis" } + + def __init__(self, markup, overrideEncodings=[], + smartQuotesTo='xml', isHTML=False): + self.declaredHTMLEncoding = None + self.markup, documentEncoding, sniffedEncoding = \ + self._detectEncoding(markup, isHTML) + self.smartQuotesTo = smartQuotesTo + self.triedEncodings = [] + if markup == '' or isinstance(markup, unicode): + self.originalEncoding = None + self.unicode = unicode(markup) + return + + u = None + for proposedEncoding in overrideEncodings: + u = self._convertFrom(proposedEncoding) + if u: break + if not u: + for proposedEncoding in (documentEncoding, sniffedEncoding): + u = self._convertFrom(proposedEncoding) + if u: break + + # If no luck and we have auto-detection library, try that: + if not u and chardet and not isinstance(self.markup, unicode): + u = self._convertFrom(chardet.detect(self.markup)['encoding']) + + # As a last resort, try utf-8 and windows-1252: + if not u: + for proposed_encoding in ("utf-8", "windows-1252"): + u = self._convertFrom(proposed_encoding) + if u: break + + self.unicode = u + if not u: self.originalEncoding = None + + def _subMSChar(self, match): + """Changes a MS smart quote character to an XML or HTML + entity.""" + orig = match.group(1) + sub = self.MS_CHARS.get(orig) + if type(sub) == types.TupleType: + if self.smartQuotesTo == 'xml': + sub = '&#x'.encode() + sub[1].encode() + ';'.encode() + else: + sub = '&'.encode() + sub[0].encode() + ';'.encode() + else: + sub = sub.encode() + return sub + + def _convertFrom(self, proposed): + proposed = self.find_codec(proposed) + if not proposed or proposed in self.triedEncodings: + return None + self.triedEncodings.append(proposed) + markup = self.markup + + # Convert smart quotes to HTML if coming from an encoding + # that might have them. + if self.smartQuotesTo and proposed.lower() in("windows-1252", + "iso-8859-1", + "iso-8859-2"): + smart_quotes_re = "([\x80-\x9f])" + smart_quotes_compiled = re.compile(smart_quotes_re) + markup = smart_quotes_compiled.sub(self._subMSChar, markup) + + try: + # print "Trying to convert document to %s" % proposed + u = self._toUnicode(markup, proposed) + self.markup = u + self.originalEncoding = proposed + except Exception, e: + # print "That didn't work!" + # print e + return None + #print "Correct encoding: %s" % proposed + return self.markup + + def _toUnicode(self, data, encoding): + '''Given a string and its encoding, decodes the string into Unicode. + %encoding is a string recognized by encodings.aliases''' + + # strip Byte Order Mark (if present) + if (len(data) >= 4) and (data[:2] == '\xfe\xff') \ + and (data[2:4] != '\x00\x00'): + encoding = 'utf-16be' + data = data[2:] + elif (len(data) >= 4) and (data[:2] == '\xff\xfe') \ + and (data[2:4] != '\x00\x00'): + encoding = 'utf-16le' + data = data[2:] + elif data[:3] == '\xef\xbb\xbf': + encoding = 'utf-8' + data = data[3:] + elif data[:4] == '\x00\x00\xfe\xff': + encoding = 'utf-32be' + data = data[4:] + elif data[:4] == '\xff\xfe\x00\x00': + encoding = 'utf-32le' + data = data[4:] + newdata = unicode(data, encoding) + return newdata + + def _detectEncoding(self, xml_data, isHTML=False): + """Given a document, tries to detect its XML encoding.""" + xml_encoding = sniffed_xml_encoding = None + try: + if xml_data[:4] == '\x4c\x6f\xa7\x94': + # EBCDIC + xml_data = self._ebcdic_to_ascii(xml_data) + elif xml_data[:4] == '\x00\x3c\x00\x3f': + # UTF-16BE + sniffed_xml_encoding = 'utf-16be' + xml_data = unicode(xml_data, 'utf-16be').encode('utf-8') + elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \ + and (xml_data[2:4] != '\x00\x00'): + # UTF-16BE with BOM + sniffed_xml_encoding = 'utf-16be' + xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8') + elif xml_data[:4] == '\x3c\x00\x3f\x00': + # UTF-16LE + sniffed_xml_encoding = 'utf-16le' + xml_data = unicode(xml_data, 'utf-16le').encode('utf-8') + elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \ + (xml_data[2:4] != '\x00\x00'): + # UTF-16LE with BOM + sniffed_xml_encoding = 'utf-16le' + xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8') + elif xml_data[:4] == '\x00\x00\x00\x3c': + # UTF-32BE + sniffed_xml_encoding = 'utf-32be' + xml_data = unicode(xml_data, 'utf-32be').encode('utf-8') + elif xml_data[:4] == '\x3c\x00\x00\x00': + # UTF-32LE + sniffed_xml_encoding = 'utf-32le' + xml_data = unicode(xml_data, 'utf-32le').encode('utf-8') + elif xml_data[:4] == '\x00\x00\xfe\xff': + # UTF-32BE with BOM + sniffed_xml_encoding = 'utf-32be' + xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8') + elif xml_data[:4] == '\xff\xfe\x00\x00': + # UTF-32LE with BOM + sniffed_xml_encoding = 'utf-32le' + xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8') + elif xml_data[:3] == '\xef\xbb\xbf': + # UTF-8 with BOM + sniffed_xml_encoding = 'utf-8' + xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8') + else: + sniffed_xml_encoding = 'ascii' + pass + except: + xml_encoding_match = None + xml_encoding_re = '^<\?.*encoding=[\'"](.*?)[\'"].*\?>'.encode() + xml_encoding_match = re.compile(xml_encoding_re).match(xml_data) + if not xml_encoding_match and isHTML: + meta_re = '<\s*meta[^>]+charset=([^>]*?)[;\'">]'.encode() + regexp = re.compile(meta_re, re.I) + xml_encoding_match = regexp.search(xml_data) + if xml_encoding_match is not None: + xml_encoding = xml_encoding_match.groups()[0].decode( + 'ascii').lower() + if isHTML: + self.declaredHTMLEncoding = xml_encoding + if sniffed_xml_encoding and \ + (xml_encoding in ('iso-10646-ucs-2', 'ucs-2', 'csunicode', + 'iso-10646-ucs-4', 'ucs-4', 'csucs4', + 'utf-16', 'utf-32', 'utf_16', 'utf_32', + 'utf16', 'u16')): + xml_encoding = sniffed_xml_encoding + return xml_data, xml_encoding, sniffed_xml_encoding + + + def find_codec(self, charset): + return self._codec(self.CHARSET_ALIASES.get(charset, charset)) \ + or (charset and self._codec(charset.replace("-", ""))) \ + or (charset and self._codec(charset.replace("-", "_"))) \ + or charset + + def _codec(self, charset): + if not charset: return charset + codec = None + try: + codecs.lookup(charset) + codec = charset + except (LookupError, ValueError): + pass + return codec + + EBCDIC_TO_ASCII_MAP = None + def _ebcdic_to_ascii(self, s): + c = self.__class__ + if not c.EBCDIC_TO_ASCII_MAP: + emap = (0,1,2,3,156,9,134,127,151,141,142,11,12,13,14,15, + 16,17,18,19,157,133,8,135,24,25,146,143,28,29,30,31, + 128,129,130,131,132,10,23,27,136,137,138,139,140,5,6,7, + 144,145,22,147,148,149,150,4,152,153,154,155,20,21,158,26, + 32,160,161,162,163,164,165,166,167,168,91,46,60,40,43,33, + 38,169,170,171,172,173,174,175,176,177,93,36,42,41,59,94, + 45,47,178,179,180,181,182,183,184,185,124,44,37,95,62,63, + 186,187,188,189,190,191,192,193,194,96,58,35,64,39,61,34, + 195,97,98,99,100,101,102,103,104,105,196,197,198,199,200, + 201,202,106,107,108,109,110,111,112,113,114,203,204,205, + 206,207,208,209,126,115,116,117,118,119,120,121,122,210, + 211,212,213,214,215,216,217,218,219,220,221,222,223,224, + 225,226,227,228,229,230,231,123,65,66,67,68,69,70,71,72, + 73,232,233,234,235,236,237,125,74,75,76,77,78,79,80,81, + 82,238,239,240,241,242,243,92,159,83,84,85,86,87,88,89, + 90,244,245,246,247,248,249,48,49,50,51,52,53,54,55,56,57, + 250,251,252,253,254,255) + import string + c.EBCDIC_TO_ASCII_MAP = string.maketrans( \ + ''.join(map(chr, range(256))), ''.join(map(chr, emap))) + return s.translate(c.EBCDIC_TO_ASCII_MAP) + + MS_CHARS = { '\x80' : ('euro', '20AC'), + '\x81' : ' ', + '\x82' : ('sbquo', '201A'), + '\x83' : ('fnof', '192'), + '\x84' : ('bdquo', '201E'), + '\x85' : ('hellip', '2026'), + '\x86' : ('dagger', '2020'), + '\x87' : ('Dagger', '2021'), + '\x88' : ('circ', '2C6'), + '\x89' : ('permil', '2030'), + '\x8A' : ('Scaron', '160'), + '\x8B' : ('lsaquo', '2039'), + '\x8C' : ('OElig', '152'), + '\x8D' : '?', + '\x8E' : ('#x17D', '17D'), + '\x8F' : '?', + '\x90' : '?', + '\x91' : ('lsquo', '2018'), + '\x92' : ('rsquo', '2019'), + '\x93' : ('ldquo', '201C'), + '\x94' : ('rdquo', '201D'), + '\x95' : ('bull', '2022'), + '\x96' : ('ndash', '2013'), + '\x97' : ('mdash', '2014'), + '\x98' : ('tilde', '2DC'), + '\x99' : ('trade', '2122'), + '\x9a' : ('scaron', '161'), + '\x9b' : ('rsaquo', '203A'), + '\x9c' : ('oelig', '153'), + '\x9d' : '?', + '\x9e' : ('#x17E', '17E'), + '\x9f' : ('Yuml', ''),} + +####################################################################### + + +#By default, act as an HTML pretty-printer. +if __name__ == '__main__': + import sys + soup = BeautifulSoup(sys.stdin) + print soup.prettify() diff --git a/activity/activity.info b/activity/activity.info new file mode 100755 index 0000000..23398d2 --- /dev/null +++ b/activity/activity.info @@ -0,0 +1,7 @@ +[Activity] +name = Data Manager +service_name = org.olenepal.DataManager +class = datamanager.DataManager +icon = datamanager-activity +activity_version = 1 +show_launcher = yes diff --git a/activity/datamanager-activity.svg b/activity/datamanager-activity.svg new file mode 100755 index 0000000..1ae35db --- /dev/null +++ b/activity/datamanager-activity.svg @@ -0,0 +1,11 @@ +<?xml version="1.0" ?><!DOCTYPE svg PUBLIC '-//W3C//DTD SVG 1.1//EN' 'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd' [ + <!ENTITY stroke_color "#666666"> + <!ENTITY fill_color "#ffffff"> +]><svg enable-background="new 0 0 55 55" height="55px" id="Layer_1" version="1.1" viewBox="0 0 55 55" width="55px" x="0px" xml:space="preserve" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" y="0px"><g display="block" id="activity-journal"> + <path d="M45.866,44.669 c0,2.511-1.528,4.331-4.332,4.331H12.077V6h29.458c2.15,0,4.332,2.154,4.332,4.33L45.866,44.669L45.866,44.669z" fill="&fill_color;" stroke="&stroke_color;" stroke-linecap="round" stroke-linejoin="round" stroke-width="3.5"/> + + <line fill="none" stroke="&stroke_color;" stroke-linecap="round" stroke-linejoin="round" stroke-width="3.5" x1="21.341" x2="21.341" y1="6.121" y2="48.881"/> + <path d="M7.384,14.464 c0,0,2.084,0.695,4.17,0.695c2.086,0,4.173-0.695,4.173-0.695" fill="none" stroke="&stroke_color;" stroke-linecap="round" stroke-linejoin="round" stroke-width="3.5"/> + <path d="M7.384,28.021 c0,0,1.912,0.695,4.345,0.695s3.999-0.695,3.999-0.695" fill="none" stroke="&stroke_color;" stroke-linecap="round" stroke-linejoin="round" stroke-width="3.5"/> + <path d="M7.384,41.232 c0,0,1.736,0.695,4.518,0.695c2.781,0,3.825-0.695,3.825-0.695" fill="none" stroke="&stroke_color;" stroke-linecap="round" stroke-linejoin="round" stroke-width="3.5"/> +</g></svg>
\ No newline at end of file diff --git a/activity/permissions.info b/activity/permissions.info new file mode 100755 index 0000000..585d713 --- /dev/null +++ b/activity/permissions.info @@ -0,0 +1 @@ +constant-uid diff --git a/datamanager.py b/datamanager.py new file mode 100755 index 0000000..0dc5ba4 --- /dev/null +++ b/datamanager.py @@ -0,0 +1,12 @@ +from sugar.activity import activity +import listview + +class DataManager(activity.Activity): + + def __init__(self, handle): + activity.Activity.__init__(self, handle) + toolbox = activity.ActivityToolbox(self) + self.set_toolbox(toolbox) + toolbox.show() + listview.pyApp(self) + diff --git a/filelisting.py b/filelisting.py new file mode 100755 index 0000000..f0a1aa0 --- /dev/null +++ b/filelisting.py @@ -0,0 +1,168 @@ +#!/usr/bin/env python + +import os, stat, time +import pygtk +pygtk.require('2.0') +import gtk + +folderxpm = [ + "17 16 7 1", + " c #000000", + ". c #808000", + "X c yellow", + "o c #808080", + "O c #c0c0c0", + "+ c white", + "@ c None", + "@@@@@@@@@@@@@@@@@", + "@@@@@@@@@@@@@@@@@", + "@@+XXXX.@@@@@@@@@", + "@+OOOOOO.@@@@@@@@", + "@+OXOXOXOXOXOXO. ", + "@+XOXOXOXOXOXOX. ", + "@+OXOXOXOXOXOXO. ", + "@+XOXOXOXOXOXOX. ", + "@+OXOXOXOXOXOXO. ", + "@+XOXOXOXOXOXOX. ", + "@+OXOXOXOXOXOXO. ", + "@+XOXOXOXOXOXOX. ", + "@+OOOOOOOOOOOOO. ", + "@ ", + "@@@@@@@@@@@@@@@@@", + "@@@@@@@@@@@@@@@@@" + ] +folderpb = gtk.gdk.pixbuf_new_from_xpm_data(folderxpm) + +filexpm = [ + "12 12 3 1", + " c #000000", + ". c #ffff04", + "X c #b2c0dc", + "X XXX", + "X ...... XXX", + "X ...... X", + "X . ... X", + "X ........ X", + "X . .... X", + "X ........ X", + "X . .. X", + "X ........ X", + "X . .. X", + "X ........ X", + "X X" + ] +filepb = gtk.gdk.pixbuf_new_from_xpm_data(filexpm) + +class FileListingCellDataExample: + column_names = ['Name', 'Size', 'Mode', 'Last Changed'] + + def delete_event(self, widget, event, data=None): + gtk.main_quit() + return False + + def __init__(self, dname = None): + cell_data_funcs = (None, self.file_size, self.file_mode, + self.file_last_changed) + + # Create a new window + self.window = gtk.Window(gtk.WINDOW_TOPLEVEL) + + self.window.set_size_request(400, 300) + + self.window.connect("delete_event", self.delete_event) + + listmodel = self.make_list(dname) + + # create the TreeView + self.treeview = gtk.TreeView() + + # create the TreeViewColumns to display the data + self.tvcolumn = [None] * len(self.column_names) + cellpb = gtk.CellRendererPixbuf() + self.tvcolumn[0] = gtk.TreeViewColumn(self.column_names[0], cellpb) + self.tvcolumn[0].set_cell_data_func(cellpb, self.file_pixbuf) + cell = gtk.CellRendererText() + self.tvcolumn[0].pack_start(cell, False) + self.tvcolumn[0].set_cell_data_func(cell, self.file_name) + self.treeview.append_column(self.tvcolumn[0]) + for n in range(1, len(self.column_names)): + cell = gtk.CellRendererText() + self.tvcolumn[n] = gtk.TreeViewColumn(self.column_names[n], cell) + if n == 1: + cell.set_property('xalign', 1.0) + self.tvcolumn[n].set_cell_data_func(cell, cell_data_funcs[n]) + self.treeview.append_column(self.tvcolumn[n]) + + self.treeview.connect('row-activated', self.open_file) + self.scrolledwindow = gtk.ScrolledWindow() + self.scrolledwindow.add(self.treeview) + self.window.add(self.scrolledwindow) + self.treeview.set_model(listmodel) + + self.window.show_all() + return + + def make_list(self, dname=None): + if not dname: + self.dirname = os.path.expanduser('~') + else: + self.dirname = os.path.abspath(dname) + self.window.set_title(self.dirname) + files = [f for f in os.listdir(self.dirname) if f[0] <> '.'] + files.sort() + files = ['..'] + files + listmodel = gtk.ListStore(object) + for f in files: + listmodel.append([f]) + return listmodel + + def open_file(self, treeview, path, column): + model = treeview.get_model() + iter = model.get_iter(path) + filename = os.path.join(self.dirname, model.get_value(iter, 0)) + filestat = os.stat(filename) + if stat.S_ISDIR(filestat.st_mode): + new_model = self.make_list(filename) + treeview.set_model(new_model) + return + + def file_pixbuf(self, column, cell, model, iter): + filename = os.path.join(self.dirname, model.get_value(iter, 0)) + filestat = os.stat(filename) + if stat.S_ISDIR(filestat.st_mode): + pb = folderpb + else: + pb = filepb + cell.set_property('pixbuf', pb) + return + + def file_name(self, column, cell, model, iter): + cell.set_property('text', model.get_value(iter, 0)) + return + + def file_size(self, column, cell, model, iter): + filename = os.path.join(self.dirname, model.get_value(iter, 0)) + filestat = os.stat(filename) + cell.set_property('text', filestat.st_size) + return + + def file_mode(self, column, cell, model, iter): + filename = os.path.join(self.dirname, model.get_value(iter, 0)) + filestat = os.stat(filename) + cell.set_property('text', oct(stat.S_IMODE(filestat.st_mode))) + return + + + def file_last_changed(self, column, cell, model, iter): + filename = os.path.join(self.dirname, model.get_value(iter, 0)) + filestat = os.stat(filename) + cell.set_property('text', time.ctime(filestat.st_mtime)) + return + +def main(): + gtk.main() + +if __name__ == "__main__": + flcdexample = FileListingCellDataExample() + main() + diff --git a/listview.py b/listview.py new file mode 100755 index 0000000..67b6d6a --- /dev/null +++ b/listview.py @@ -0,0 +1,144 @@ +#!/usr/bin/python + +# ZetCode PyGTK tutorial +# +# This example shows a TreeView widget +# in a list view mode +# +# author: jan bodnar +# website: zetcode.com +# last edited: February 2009 + +import sys, os +import gtk +import objectfetch +import urllib2 +from BeautifulSoup import BeautifulSoup +from xml.dom.minidom import parse, parseString +from sugar.activity import activity +from sugar.datastore import datastore +from path import path + +ACTIVITYPATH = path(activity.get_bundle_path()) +global online +online = False + +class PyApp(gtk.Window): + def __init__(self): + super(PyApp, self).__init__() + self.set_size_request(350, 250) + self.set_position(gtk.WIN_POS_CENTER) + + self.connect("destroy", gtk.main_quit) + self.set_title("DataManager") + + vbox = gtk.VBox(False, 8) + + sw = gtk.ScrolledWindow() + sw.set_shadow_type(gtk.SHADOW_ETCHED_IN) + sw.set_policy(gtk.POLICY_AUTOMATIC, gtk.POLICY_AUTOMATIC) + + vbox.pack_start(sw, True, True, 0) + + store = self.create_model() + + treeView = gtk.TreeView(store) + treeView.connect("row-activated", self.on_activated) + treeView.set_rules_hint(True) + sw.add(treeView) + + self.create_columns(treeView) + self.statusbar = gtk.Statusbar() + + vbox.pack_start(self.statusbar, False, False, 0) + + self.add(vbox) + self.show_all() + + + def create_model(self): + global online + store = gtk.ListStore(str, str, str) + #let's display objects from the schoolserver + pth = ACTIVITYPATH / 'xoconfig' + configf = open(pth, "r") + config = configf.read() + configf.close() + pos = config.find('[Server]') + if pos < 0: + print 'schoolserver not found' + xoserial = None + store.append(["schoolserver not found", "",""]) + else: + pos = pos + 8 + print 'config', config[pos:] + pos1 = pos + config[pos:].find("= ") + 2 + pos2 = pos + config[pos:].find("@") + xoserial = config[pos1:pos2] + print 'xoserial', xoserial, config[pos1:pos2+1] + #temp for testing + xoserial = 'SHC84201F3B' + if xoserial: + url = "http://schoolserver/ds-restore/" + xoserial + "/datastore-latest" + print 'url', url + try: + response = urllib2.urlopen(url) + except: + print 'schoolserver not found' + response = None + if response: + soup = BeautifulSoup(response) + online = True + else: + print 'schoolserver not found at', url + if online: + print 'soup=', soup.prettify() + entries = soup.findAll("li") + print len(entries) + for i in range(10): + print len(entries[i].contents), entries[i].contents + colx = str(entries[i].contents[0]) + pos1 = colx.find('<') + pos2 = colx.find('>') + col0 = colx[pos1:pos2] + col1 = 'col1' + colx[pos2+1:] + temp = entries[i].contents[1].split(' ') + col2 = 'col2:' + temp[0] + col3 = 'col3:' + temp[1] + store.append([col1, col2, col3]) + #print 'sample', entries[0].toprettyxml() + #objects from the local datastore + results, count = datastore.find(dict()) + print 'number of datastore items', count + for f in results: + obj = datastore.get(f.object_id) + f.destroy() + store.append([obj.get_file_path(), obj.metadata['title'], obj.metadata['mime_type']]) + #ignore for the moment + return store + + + def create_columns(self, treeView): + + rendererText = gtk.CellRendererText() + column = gtk.TreeViewColumn("Title", rendererText, text=1) + column.set_sort_column_id(0) + treeView.append_column(column) + + rendererText = gtk.CellRendererText() + column = gtk.TreeViewColumn("Mime_type", rendererText, text=2) + column.set_sort_column_id(1) + treeView.append_column(column) + + + def on_activated(self, widget, row, col): + + model = widget.get_model() + text = model[row][0] + ", " + model[row][1] + ", " + model[row][2] + self.statusbar.push(0, text) + + + +PyApp() +gtk.main() + diff --git a/objectchooser.py b/objectchooser.py new file mode 100755 index 0000000..861d0ab --- /dev/null +++ b/objectchooser.py @@ -0,0 +1,118 @@ +# Copyright (C) 2007, One Laptop Per Child +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place - Suite 330, +# Boston, MA 02111-1307, USA. + +import logging + +import gobject +import gtk +import dbus + +from sugar.datastore import datastore + +J_DBUS_SERVICE = 'org.laptop.Journal' +J_DBUS_INTERFACE = 'org.laptop.Journal' +J_DBUS_PATH = '/org/laptop/Journal' + +class ObjectChooser(object): + def __init__(self, title=None, parent=None, flags=None, buttons=None): + # For backwards compatibility: + # - We ignore title, flags and buttons. + # - 'parent' can be a xid or a gtk.Window + + if title is not None or flags is not None or buttons is not None: + logging.warning('Invocation of ObjectChooser() has deprecated ' + 'parameters.') + + if parent is None: + parent_xid = 0 + elif hasattr(parent, 'window') and hasattr(parent.window, 'xid'): + parent_xid = parent.window.xid + else: + parent_xid = parent + + self._parent_xid = parent_xid + self._main_loop = None + self._object_id = None + self._bus = None + self._chooser_id = None + self._response_code = gtk.RESPONSE_NONE + + def run(self): + self._object_id = None + + self._main_loop = gobject.MainLoop() + + self._bus = dbus.SessionBus(mainloop=self._main_loop) + self._bus.add_signal_receiver( + self.__name_owner_changed_cb, + signal_name="NameOwnerChanged", + dbus_interface="org.freedesktop.DBus", + arg0=J_DBUS_SERVICE) + + obj = self._bus.get_object(J_DBUS_SERVICE, J_DBUS_PATH) + journal = dbus.Interface(obj, J_DBUS_INTERFACE) + journal.connect_to_signal('ObjectChooserResponse', + self.__chooser_response_cb) + journal.connect_to_signal('ObjectChooserCancelled', + self.__chooser_cancelled_cb) + self._chooser_id = journal.ChooseObject(self._parent_xid) + + gtk.gdk.threads_leave() + try: + self._main_loop.run() + finally: + gtk.gdk.threads_enter() + self._main_loop = None + + return self._response_code + + def get_selected_object(self): + if self._object_id is None: + return None + else: + return datastore.get(self._object_id) + + def destroy(self): + self._cleanup() + + def _cleanup(self): + if self._main_loop is not None: + self._main_loop.quit() + self._main_loop = None + self._bus = None + + def __chooser_response_cb(self, chooser_id, object_id): + if chooser_id != self._chooser_id: + return + logging.debug('ObjectChooser.__chooser_response_cb: %r' % object_id) + self._response_code = gtk.RESPONSE_ACCEPT + self._object_id = object_id + self._cleanup() + + def __chooser_cancelled_cb(self, chooser_id): + if chooser_id != self._chooser_id: + return + logging.debug('ObjectChooser.__chooser_cancelled_cb: %r' % chooser_id) + self._response_code = gtk.RESPONSE_CANCEL + self._cleanup() + + def __name_owner_changed_cb(self, name, old, new): + logging.debug('ObjectChooser.__name_owner_changed_cb') + # Journal service disappeared from the bus + self._response_code = gtk.RESPONSE_CANCEL + self._cleanup() + diff --git a/objectfetch.py b/objectfetch.py new file mode 100755 index 0000000..6c3bb92 --- /dev/null +++ b/objectfetch.py @@ -0,0 +1,120 @@ +# Copyright (C) 2007, One Laptop Per Child +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place - Suite 330, +# Boston, MA 02111-1307, USA. + +import logging + +import gobject +import gtk +import dbus + +from sugar.datastore import datastore + +J_DBUS_SERVICE = 'org.laptop.Journal' +J_DBUS_INTERFACE = 'org.laptop.Journal' +J_DBUS_PATH = '/org/laptop/Journal' + +class ObjectChooser(object): + def __init__(self, title=None, parent=None, flags=None, buttons=None): + # For backwards compatibility: + # - We ignore title, flags and buttons. + # - 'parent' can be a xid or a gtk.Window + + if title is not None or flags is not None or buttons is not None: + logging.warning('Invocation of ObjectChooser() has deprecated ' + 'parameters.') + + if parent is None: + parent_xid = 0 + elif hasattr(parent, 'window') and hasattr(parent.window, 'xid'): + parent_xid = parent.window.xid + else: + parent_xid = parent + + self._parent_xid = parent_xid + self._main_loop = None + self._object_id = None + self._bus = None + self._chooser_id = None + self._response_code = gtk.RESPONSE_NONE + + def run(self): + self._object_id = None + + self._main_loop = gobject.MainLoop() + + self._bus = dbus.SessionBus(mainloop=self._main_loop) + self._bus.add_signal_receiver( + self.__name_owner_changed_cb, + signal_name="NameOwnerChanged", + dbus_interface="org.freedesktop.DBus", + arg0=J_DBUS_SERVICE) + + obj = self._bus.get_object(J_DBUS_SERVICE, J_DBUS_PATH) + print 'obj', obj.get_metadata['title'] + print 'obj', obj.file_path + journal = dbus.Interface(obj, J_DBUS_INTERFACE) + journal.connect_to_signal('ObjectChooserResponse', + self.__chooser_response_cb) + journal.connect_to_signal('ObjectChooserCancelled', + self.__chooser_cancelled_cb) + self._chooser_id = journal.ChooseObject(self._parent_xid) + + gtk.gdk.threads_leave() + try: + self._main_loop.run() + finally: + gtk.gdk.threads_enter() + self._main_loop = None + + return self._response_code + + def get_selected_object(self): + if self._object_id is None: + return None + else: + return datastore.get(self._object_id) + + def destroy(self): + self._cleanup() + + def _cleanup(self): + if self._main_loop is not None: + self._main_loop.quit() + self._main_loop = None + self._bus = None + + def __chooser_response_cb(self, chooser_id, object_id): + if chooser_id != self._chooser_id: + return + logging.debug('ObjectChooser.__chooser_response_cb: %r' % object_id) + self._response_code = gtk.RESPONSE_ACCEPT + self._object_id = object_id + self._cleanup() + + def __chooser_cancelled_cb(self, chooser_id): + if chooser_id != self._chooser_id: + return + logging.debug('ObjectChooser.__chooser_cancelled_cb: %r' % chooser_id) + self._response_code = gtk.RESPONSE_CANCEL + self._cleanup() + + def __name_owner_changed_cb(self, name, old, new): + logging.debug('ObjectChooser.__name_owner_changed_cb') + # Journal service disappeared from the bus + self._response_code = gtk.RESPONSE_CANCEL + self._cleanup() + @@ -0,0 +1,971 @@ +""" path.py - An object representing a path to a file or directory. + +Example: + +from path import path +d = path('/home/guido/bin') +for f in d.files('*.py'): + f.chmod(0755) + +This module requires Python 2.2 or later. + + +URL: http://www.jorendorff.com/articles/python/path +Author: Jason Orendorff <jason.orendorff\x40gmail\x2ecom> (and others - see the url!) +Date: 9 Mar 2007 +""" + + +# TODO +# - Tree-walking functions don't avoid symlink loops. Matt Harrison +# sent me a patch for this. +# - Bug in write_text(). It doesn't support Universal newline mode. +# - Better error message in listdir() when self isn't a +# directory. (On Windows, the error message really sucks.) +# - Make sure everything has a good docstring. +# - Add methods for regex find and replace. +# - guess_content_type() method? +# - Perhaps support arguments to touch(). + +from __future__ import generators + +import sys, warnings, os, fnmatch, glob, shutil, codecs, md5 + +__version__ = '2.2' +__all__ = ['path'] + +# Platform-specific support for path.owner +if os.name == 'nt': + try: + import win32security + except ImportError: + win32security = None +else: + try: + import pwd + except ImportError: + pwd = None + +# Pre-2.3 support. Are unicode filenames supported? +_base = str +_getcwd = os.getcwd +try: + if os.path.supports_unicode_filenames: + _base = unicode + _getcwd = os.getcwdu +except AttributeError: + pass + +# Pre-2.3 workaround for booleans +try: + True, False +except NameError: + True, False = 1, 0 + +# Pre-2.3 workaround for basestring. +try: + basestring +except NameError: + basestring = (str, unicode) + +# Universal newline support +_textmode = 'r' +if hasattr(file, 'newlines'): + _textmode = 'U' + + +class TreeWalkWarning(Warning): + pass + +class path(_base): + """ Represents a filesystem path. + + For documentation on individual methods, consult their + counterparts in os.path. + """ + + # --- Special Python methods. + + def __repr__(self): + return 'path(%s)' % _base.__repr__(self) + + # Adding a path and a string yields a path. + def __add__(self, more): + try: + resultStr = _base.__add__(self, more) + except TypeError: #Python bug + resultStr = NotImplemented + if resultStr is NotImplemented: + return resultStr + return self.__class__(resultStr) + + def __radd__(self, other): + if isinstance(other, basestring): + return self.__class__(other.__add__(self)) + else: + return NotImplemented + + # The / operator joins paths. + def __div__(self, rel): + """ fp.__div__(rel) == fp / rel == fp.joinpath(rel) + + Join two path components, adding a separator character if + needed. + """ + return self.__class__(os.path.join(self, rel)) + + # Make the / operator work even when true division is enabled. + __truediv__ = __div__ + + def getcwd(cls): + """ Return the current working directory as a path object. """ + return cls(_getcwd()) + getcwd = classmethod(getcwd) + + + # --- Operations on path strings. + + isabs = os.path.isabs + def abspath(self): return self.__class__(os.path.abspath(self)) + def normcase(self): return self.__class__(os.path.normcase(self)) + def normpath(self): return self.__class__(os.path.normpath(self)) + def realpath(self): return self.__class__(os.path.realpath(self)) + def expanduser(self): return self.__class__(os.path.expanduser(self)) + def expandvars(self): return self.__class__(os.path.expandvars(self)) + def dirname(self): return self.__class__(os.path.dirname(self)) + basename = os.path.basename + + def expand(self): + """ Clean up a filename by calling expandvars(), + expanduser(), and normpath() on it. + + This is commonly everything needed to clean up a filename + read from a configuration file, for example. + """ + return self.expandvars().expanduser().normpath() + + def _get_namebase(self): + base, ext = os.path.splitext(self.name) + return base + + def _get_ext(self): + f, ext = os.path.splitext(_base(self)) + return ext + + def _get_drive(self): + drive, r = os.path.splitdrive(self) + return self.__class__(drive) + + parent = property( + dirname, None, None, + """ This path's parent directory, as a new path object. + + For example, path('/usr/local/lib/libpython.so').parent == path('/usr/local/lib') + """) + + name = property( + basename, None, None, + """ The name of this file or directory without the full path. + + For example, path('/usr/local/lib/libpython.so').name == 'libpython.so' + """) + + namebase = property( + _get_namebase, None, None, + """ The same as path.name, but with one file extension stripped off. + + For example, path('/home/guido/python.tar.gz').name == 'python.tar.gz', + but path('/home/guido/python.tar.gz').namebase == 'python.tar' + """) + + ext = property( + _get_ext, None, None, + """ The file extension, for example '.py'. """) + + drive = property( + _get_drive, None, None, + """ The drive specifier, for example 'C:'. + This is always empty on systems that don't use drive specifiers. + """) + + def splitpath(self): + """ p.splitpath() -> Return (p.parent, p.name). """ + parent, child = os.path.split(self) + return self.__class__(parent), child + + def splitdrive(self): + """ p.splitdrive() -> Return (p.drive, <the rest of p>). + + Split the drive specifier from this path. If there is + no drive specifier, p.drive is empty, so the return value + is simply (path(''), p). This is always the case on Unix. + """ + drive, rel = os.path.splitdrive(self) + return self.__class__(drive), rel + + def splitext(self): + """ p.splitext() -> Return (p.stripext(), p.ext). + + Split the filename extension from this path and return + the two parts. Either part may be empty. + + The extension is everything from '.' to the end of the + last path segment. This has the property that if + (a, b) == p.splitext(), then a + b == p. + """ + filename, ext = os.path.splitext(self) + return self.__class__(filename), ext + + def stripext(self): + """ p.stripext() -> Remove one file extension from the path. + + For example, path('/home/guido/python.tar.gz').stripext() + returns path('/home/guido/python.tar'). + """ + return self.splitext()[0] + + if hasattr(os.path, 'splitunc'): + def splitunc(self): + unc, rest = os.path.splitunc(self) + return self.__class__(unc), rest + + def _get_uncshare(self): + unc, r = os.path.splitunc(self) + return self.__class__(unc) + + uncshare = property( + _get_uncshare, None, None, + """ The UNC mount point for this path. + This is empty for paths on local drives. """) + + def joinpath(self, *args): + """ Join two or more path components, adding a separator + character (os.sep) if needed. Returns a new path + object. + """ + return self.__class__(os.path.join(self, *args)) + + def splitall(self): + r""" Return a list of the path components in this path. + + The first item in the list will be a path. Its value will be + either os.curdir, os.pardir, empty, or the root directory of + this path (for example, '/' or 'C:\\'). The other items in + the list will be strings. + + path.path.joinpath(*result) will yield the original path. + """ + parts = [] + loc = self + while loc != os.curdir and loc != os.pardir: + prev = loc + loc, child = prev.splitpath() + if loc == prev: + break + parts.append(child) + parts.append(loc) + parts.reverse() + return parts + + def relpath(self): + """ Return this path as a relative path, + based from the current working directory. + """ + cwd = self.__class__(os.getcwd()) + return cwd.relpathto(self) + + def relpathto(self, dest): + """ Return a relative path from self to dest. + + If there is no relative path from self to dest, for example if + they reside on different drives in Windows, then this returns + dest.abspath(). + """ + origin = self.abspath() + dest = self.__class__(dest).abspath() + + orig_list = origin.normcase().splitall() + # Don't normcase dest! We want to preserve the case. + dest_list = dest.splitall() + + if orig_list[0] != os.path.normcase(dest_list[0]): + # Can't get here from there. + return dest + + # Find the location where the two paths start to differ. + i = 0 + for start_seg, dest_seg in zip(orig_list, dest_list): + if start_seg != os.path.normcase(dest_seg): + break + i += 1 + + # Now i is the point where the two paths diverge. + # Need a certain number of "os.pardir"s to work up + # from the origin to the point of divergence. + segments = [os.pardir] * (len(orig_list) - i) + # Need to add the diverging part of dest_list. + segments += dest_list[i:] + if len(segments) == 0: + # If they happen to be identical, use os.curdir. + relpath = os.curdir + else: + relpath = os.path.join(*segments) + return self.__class__(relpath) + + # --- Listing, searching, walking, and matching + + def listdir(self, pattern=None): + """ D.listdir() -> List of items in this directory. + + Use D.files() or D.dirs() instead if you want a listing + of just files or just subdirectories. + + The elements of the list are path objects. + + With the optional 'pattern' argument, this only lists + items whose names match the given pattern. + """ + names = os.listdir(self) + if pattern is not None: + names = fnmatch.filter(names, pattern) + return [self / child for child in names] + + def dirs(self, pattern=None): + """ D.dirs() -> List of this directory's subdirectories. + + The elements of the list are path objects. + This does not walk recursively into subdirectories + (but see path.walkdirs). + + With the optional 'pattern' argument, this only lists + directories whose names match the given pattern. For + example, d.dirs('build-*'). + """ + return [p for p in self.listdir(pattern) if p.isdir()] + + def files(self, pattern=None): + """ D.files() -> List of the files in this directory. + + The elements of the list are path objects. + This does not walk into subdirectories (see path.walkfiles). + + With the optional 'pattern' argument, this only lists files + whose names match the given pattern. For example, + d.files('*.pyc'). + """ + + return [p for p in self.listdir(pattern) if p.isfile()] + + def walk(self, pattern=None, errors='strict'): + """ D.walk() -> iterator over files and subdirs, recursively. + + The iterator yields path objects naming each child item of + this directory and its descendants. This requires that + D.isdir(). + + This performs a depth-first traversal of the directory tree. + Each directory is returned just before all its children. + + The errors= keyword argument controls behavior when an + error occurs. The default is 'strict', which causes an + exception. The other allowed values are 'warn', which + reports the error via warnings.warn(), and 'ignore'. + """ + if errors not in ('strict', 'warn', 'ignore'): + raise ValueError("invalid errors parameter") + + try: + childList = self.listdir() + except Exception: + if errors == 'ignore': + return + elif errors == 'warn': + warnings.warn( + "Unable to list directory '%s': %s" + % (self, sys.exc_info()[1]), + TreeWalkWarning) + return + else: + raise + + for child in childList: + if pattern is None or child.fnmatch(pattern): + yield child + try: + isdir = child.isdir() + except Exception: + if errors == 'ignore': + isdir = False + elif errors == 'warn': + warnings.warn( + "Unable to access '%s': %s" + % (child, sys.exc_info()[1]), + TreeWalkWarning) + isdir = False + else: + raise + + if isdir: + for item in child.walk(pattern, errors): + yield item + + def walkdirs(self, pattern=None, errors='strict'): + """ D.walkdirs() -> iterator over subdirs, recursively. + + With the optional 'pattern' argument, this yields only + directories whose names match the given pattern. For + example, mydir.walkdirs('*test') yields only directories + with names ending in 'test'. + + The errors= keyword argument controls behavior when an + error occurs. The default is 'strict', which causes an + exception. The other allowed values are 'warn', which + reports the error via warnings.warn(), and 'ignore'. + """ + if errors not in ('strict', 'warn', 'ignore'): + raise ValueError("invalid errors parameter") + + try: + dirs = self.dirs() + except Exception: + if errors == 'ignore': + return + elif errors == 'warn': + warnings.warn( + "Unable to list directory '%s': %s" + % (self, sys.exc_info()[1]), + TreeWalkWarning) + return + else: + raise + + for child in dirs: + if pattern is None or child.fnmatch(pattern): + yield child + for subsubdir in child.walkdirs(pattern, errors): + yield subsubdir + + def walkfiles(self, pattern=None, errors='strict'): + """ D.walkfiles() -> iterator over files in D, recursively. + + The optional argument, pattern, limits the results to files + with names that match the pattern. For example, + mydir.walkfiles('*.tmp') yields only files with the .tmp + extension. + """ + if errors not in ('strict', 'warn', 'ignore'): + raise ValueError("invalid errors parameter") + + try: + childList = self.listdir() + except Exception: + if errors == 'ignore': + return + elif errors == 'warn': + warnings.warn( + "Unable to list directory '%s': %s" + % (self, sys.exc_info()[1]), + TreeWalkWarning) + return + else: + raise + + for child in childList: + try: + isfile = child.isfile() + isdir = not isfile and child.isdir() + except: + if errors == 'ignore': + continue + elif errors == 'warn': + warnings.warn( + "Unable to access '%s': %s" + % (self, sys.exc_info()[1]), + TreeWalkWarning) + continue + else: + raise + + if isfile: + if pattern is None or child.fnmatch(pattern): + yield child + elif isdir: + for f in child.walkfiles(pattern, errors): + yield f + + def fnmatch(self, pattern): + """ Return True if self.name matches the given pattern. + + pattern - A filename pattern with wildcards, + for example '*.py'. + """ + return fnmatch.fnmatch(self.name, pattern) + + def glob(self, pattern): + """ Return a list of path objects that match the pattern. + + pattern - a path relative to this directory, with wildcards. + + For example, path('/users').glob('*/bin/*') returns a list + of all the files users have in their bin directories. + """ + cls = self.__class__ + return [cls(s) for s in glob.glob(_base(self / pattern))] + + + # --- Reading or writing an entire file at once. + + def open(self, mode='r'): + """ Open this file. Return a file object. """ + return file(self, mode) + + def bytes(self): + """ Open this file, read all bytes, return them as a string. """ + f = self.open('rb') + try: + return f.read() + finally: + f.close() + + def write_bytes(self, bytes, append=False): + """ Open this file and write the given bytes to it. + + Default behavior is to overwrite any existing file. + Call p.write_bytes(bytes, append=True) to append instead. + """ + if append: + mode = 'ab' + else: + mode = 'wb' + f = self.open(mode) + try: + f.write(bytes) + finally: + f.close() + + def text(self, encoding=None, errors='strict'): + r""" Open this file, read it in, return the content as a string. + + This uses 'U' mode in Python 2.3 and later, so '\r\n' and '\r' + are automatically translated to '\n'. + + Optional arguments: + + encoding - The Unicode encoding (or character set) of + the file. If present, the content of the file is + decoded and returned as a unicode object; otherwise + it is returned as an 8-bit str. + errors - How to handle Unicode errors; see help(str.decode) + for the options. Default is 'strict'. + """ + if encoding is None: + # 8-bit + f = self.open(_textmode) + try: + return f.read() + finally: + f.close() + else: + # Unicode + f = codecs.open(self, 'r', encoding, errors) + # (Note - Can't use 'U' mode here, since codecs.open + # doesn't support 'U' mode, even in Python 2.3.) + try: + t = f.read() + finally: + f.close() + return (t.replace(u'\r\n', u'\n') + .replace(u'\r\x85', u'\n') + .replace(u'\r', u'\n') + .replace(u'\x85', u'\n') + .replace(u'\u2028', u'\n')) + + def write_text(self, text, encoding=None, errors='strict', linesep=os.linesep, append=False): + r""" Write the given text to this file. + + The default behavior is to overwrite any existing file; + to append instead, use the 'append=True' keyword argument. + + There are two differences between path.write_text() and + path.write_bytes(): newline handling and Unicode handling. + See below. + + Parameters: + + - text - str/unicode - The text to be written. + + - encoding - str - The Unicode encoding that will be used. + This is ignored if 'text' isn't a Unicode string. + + - errors - str - How to handle Unicode encoding errors. + Default is 'strict'. See help(unicode.encode) for the + options. This is ignored if 'text' isn't a Unicode + string. + + - linesep - keyword argument - str/unicode - The sequence of + characters to be used to mark end-of-line. The default is + os.linesep. You can also specify None; this means to + leave all newlines as they are in 'text'. + + - append - keyword argument - bool - Specifies what to do if + the file already exists (True: append to the end of it; + False: overwrite it.) The default is False. + + + --- Newline handling. + + write_text() converts all standard end-of-line sequences + ('\n', '\r', and '\r\n') to your platform's default end-of-line + sequence (see os.linesep; on Windows, for example, the + end-of-line marker is '\r\n'). + + If you don't like your platform's default, you can override it + using the 'linesep=' keyword argument. If you specifically want + write_text() to preserve the newlines as-is, use 'linesep=None'. + + This applies to Unicode text the same as to 8-bit text, except + there are three additional standard Unicode end-of-line sequences: + u'\x85', u'\r\x85', and u'\u2028'. + + (This is slightly different from when you open a file for + writing with fopen(filename, "w") in C or file(filename, 'w') + in Python.) + + + --- Unicode + + If 'text' isn't Unicode, then apart from newline handling, the + bytes are written verbatim to the file. The 'encoding' and + 'errors' arguments are not used and must be omitted. + + If 'text' is Unicode, it is first converted to bytes using the + specified 'encoding' (or the default encoding if 'encoding' + isn't specified). The 'errors' argument applies only to this + conversion. + + """ + if isinstance(text, unicode): + if linesep is not None: + # Convert all standard end-of-line sequences to + # ordinary newline characters. + text = (text.replace(u'\r\n', u'\n') + .replace(u'\r\x85', u'\n') + .replace(u'\r', u'\n') + .replace(u'\x85', u'\n') + .replace(u'\u2028', u'\n')) + text = text.replace(u'\n', linesep) + if encoding is None: + encoding = sys.getdefaultencoding() + bytes = text.encode(encoding, errors) + else: + # It is an error to specify an encoding if 'text' is + # an 8-bit string. + assert encoding is None + + if linesep is not None: + text = (text.replace('\r\n', '\n') + .replace('\r', '\n')) + bytes = text.replace('\n', linesep) + + self.write_bytes(bytes, append) + + def lines(self, encoding=None, errors='strict', retain=True): + r""" Open this file, read all lines, return them in a list. + + Optional arguments: + encoding - The Unicode encoding (or character set) of + the file. The default is None, meaning the content + of the file is read as 8-bit characters and returned + as a list of (non-Unicode) str objects. + errors - How to handle Unicode errors; see help(str.decode) + for the options. Default is 'strict' + retain - If true, retain newline characters; but all newline + character combinations ('\r', '\n', '\r\n') are + translated to '\n'. If false, newline characters are + stripped off. Default is True. + + This uses 'U' mode in Python 2.3 and later. + """ + if encoding is None and retain: + f = self.open(_textmode) + try: + return f.readlines() + finally: + f.close() + else: + return self.text(encoding, errors).splitlines(retain) + + def write_lines(self, lines, encoding=None, errors='strict', + linesep=os.linesep, append=False): + r""" Write the given lines of text to this file. + + By default this overwrites any existing file at this path. + + This puts a platform-specific newline sequence on every line. + See 'linesep' below. + + lines - A list of strings. + + encoding - A Unicode encoding to use. This applies only if + 'lines' contains any Unicode strings. + + errors - How to handle errors in Unicode encoding. This + also applies only to Unicode strings. + + linesep - The desired line-ending. This line-ending is + applied to every line. If a line already has any + standard line ending ('\r', '\n', '\r\n', u'\x85', + u'\r\x85', u'\u2028'), that will be stripped off and + this will be used instead. The default is os.linesep, + which is platform-dependent ('\r\n' on Windows, '\n' on + Unix, etc.) Specify None to write the lines as-is, + like file.writelines(). + + Use the keyword argument append=True to append lines to the + file. The default is to overwrite the file. Warning: + When you use this with Unicode data, if the encoding of the + existing data in the file is different from the encoding + you specify with the encoding= parameter, the result is + mixed-encoding data, which can really confuse someone trying + to read the file later. + """ + if append: + mode = 'ab' + else: + mode = 'wb' + f = self.open(mode) + try: + for line in lines: + isUnicode = isinstance(line, unicode) + if linesep is not None: + # Strip off any existing line-end and add the + # specified linesep string. + if isUnicode: + if line[-2:] in (u'\r\n', u'\x0d\x85'): + line = line[:-2] + elif line[-1:] in (u'\r', u'\n', + u'\x85', u'\u2028'): + line = line[:-1] + else: + if line[-2:] == '\r\n': + line = line[:-2] + elif line[-1:] in ('\r', '\n'): + line = line[:-1] + line += linesep + if isUnicode: + if encoding is None: + encoding = sys.getdefaultencoding() + line = line.encode(encoding, errors) + f.write(line) + finally: + f.close() + + def read_md5(self): + """ Calculate the md5 hash for this file. + + This reads through the entire file. + """ + f = self.open('rb') + try: + m = md5.new() + while True: + d = f.read(8192) + if not d: + break + m.update(d) + finally: + f.close() + return m.digest() + + # --- Methods for querying the filesystem. + + exists = os.path.exists + isdir = os.path.isdir + isfile = os.path.isfile + islink = os.path.islink + ismount = os.path.ismount + + if hasattr(os.path, 'samefile'): + samefile = os.path.samefile + + getatime = os.path.getatime + atime = property( + getatime, None, None, + """ Last access time of the file. """) + + getmtime = os.path.getmtime + mtime = property( + getmtime, None, None, + """ Last-modified time of the file. """) + + if hasattr(os.path, 'getctime'): + getctime = os.path.getctime + ctime = property( + getctime, None, None, + """ Creation time of the file. """) + + getsize = os.path.getsize + size = property( + getsize, None, None, + """ Size of the file, in bytes. """) + + if hasattr(os, 'access'): + def access(self, mode): + """ Return true if current user has access to this path. + + mode - One of the constants os.F_OK, os.R_OK, os.W_OK, os.X_OK + """ + return os.access(self, mode) + + def stat(self): + """ Perform a stat() system call on this path. """ + return os.stat(self) + + def lstat(self): + """ Like path.stat(), but do not follow symbolic links. """ + return os.lstat(self) + + def get_owner(self): + r""" Return the name of the owner of this file or directory. + + This follows symbolic links. + + On Windows, this returns a name of the form ur'DOMAIN\User Name'. + On Windows, a group can own a file or directory. + """ + if os.name == 'nt': + if win32security is None: + raise Exception("path.owner requires win32all to be installed") + desc = win32security.GetFileSecurity( + self, win32security.OWNER_SECURITY_INFORMATION) + sid = desc.GetSecurityDescriptorOwner() + account, domain, typecode = win32security.LookupAccountSid(None, sid) + return domain + u'\\' + account + else: + if pwd is None: + raise NotImplementedError("path.owner is not implemented on this platform.") + st = self.stat() + return pwd.getpwuid(st.st_uid).pw_name + + owner = property( + get_owner, None, None, + """ Name of the owner of this file or directory. """) + + if hasattr(os, 'statvfs'): + def statvfs(self): + """ Perform a statvfs() system call on this path. """ + return os.statvfs(self) + + if hasattr(os, 'pathconf'): + def pathconf(self, name): + return os.pathconf(self, name) + + + # --- Modifying operations on files and directories + + def utime(self, times): + """ Set the access and modified times of this file. """ + os.utime(self, times) + + def chmod(self, mode): + os.chmod(self, mode) + + if hasattr(os, 'chown'): + def chown(self, uid, gid): + os.chown(self, uid, gid) + + def rename(self, new): + os.rename(self, new) + + def renames(self, new): + os.renames(self, new) + + + # --- Create/delete operations on directories + + def mkdir(self, mode=0777): + os.mkdir(self, mode) + + def makedirs(self, mode=0777): + os.makedirs(self, mode) + + def rmdir(self): + os.rmdir(self) + + def removedirs(self): + os.removedirs(self) + + + # --- Modifying operations on files + + def touch(self): + """ Set the access/modified times of this file to the current time. + Create the file if it does not exist. + """ + fd = os.open(self, os.O_WRONLY | os.O_CREAT, 0666) + os.close(fd) + os.utime(self, None) + + def remove(self): + os.remove(self) + + def unlink(self): + os.unlink(self) + + + # --- Links + + if hasattr(os, 'link'): + def link(self, newpath): + """ Create a hard link at 'newpath', pointing to this file. """ + os.link(self, newpath) + + if hasattr(os, 'symlink'): + def symlink(self, newlink): + """ Create a symbolic link at 'newlink', pointing here. """ + os.symlink(self, newlink) + + if hasattr(os, 'readlink'): + def readlink(self): + """ Return the path to which this symbolic link points. + + The result may be an absolute or a relative path. + """ + return self.__class__(os.readlink(self)) + + def readlinkabs(self): + """ Return the path to which this symbolic link points. + + The result is always an absolute path. + """ + p = self.readlink() + if p.isabs(): + return p + else: + return (self.parent / p).abspath() + + + # --- High-level functions from shutil + + copyfile = shutil.copyfile + copymode = shutil.copymode + copystat = shutil.copystat + copy = shutil.copy + copy2 = shutil.copy2 + copytree = shutil.copytree + if hasattr(shutil, 'move'): + move = shutil.move + rmtree = shutil.rmtree + + + # --- Special stuff from os + + if hasattr(os, 'chroot'): + def chroot(self): + os.chroot(self) + + if hasattr(os, 'startfile'): + def startfile(self): + os.startfile(self) + + diff --git a/xoconfig b/xoconfig new file mode 100755 index 0000000..733851b --- /dev/null +++ b/xoconfig @@ -0,0 +1,25 @@ +[Sound] +volume = 100 + +[Shell] +favoriteslayout = ring-layout + +[Buddy] +color = #FF8F00,#00B20D +nickname = GOV007 + +[Power] +automaticpm = False +extremepm = False + +[Frame] +hotcorners = 0.0 +warmedges = 1000.0 + +[Date] +timezone = UTC + +[Jabber] +registered = False +server = schoolserver.schoolnet.gov.np + |