diff options
author | Sebastian Silva <sebastian@sugarlabs.org> | 2011-09-28 00:19:33 (GMT) |
---|---|---|
committer | Sebastian Silva <sebastian@sugarlabs.org> | 2011-09-28 06:54:34 (GMT) |
commit | 5861585e94a32b3032ac473804bf90c6e1363940 (patch) | |
tree | fb3a5bab0d75bf8eb780e749737fea87369754db /websdk/genshi | |
parent | be7aa93d7ba3682d5189e1a7d72169c0b02a1ec1 (diff) |
Migrated to Flask, added JQuery sugar theme, fixed race condition
Diffstat (limited to 'websdk/genshi')
-rw-r--r-- | websdk/genshi/__init__.py | 26 | ||||
-rw-r--r-- | websdk/genshi/builder.py | 359 | ||||
-rw-r--r-- | websdk/genshi/core.py | 727 | ||||
-rw-r--r-- | websdk/genshi/filters/__init__.py | 20 | ||||
-rw-r--r-- | websdk/genshi/filters/html.py | 453 | ||||
-rw-r--r-- | websdk/genshi/filters/i18n.py | 1238 | ||||
-rw-r--r-- | websdk/genshi/filters/transform.py | 1310 | ||||
-rw-r--r-- | websdk/genshi/input.py | 443 | ||||
-rw-r--r-- | websdk/genshi/output.py | 838 | ||||
-rw-r--r-- | websdk/genshi/path.py | 1528 | ||||
-rw-r--r-- | websdk/genshi/template/__init__.py | 23 | ||||
-rw-r--r-- | websdk/genshi/template/_ast24.py | 446 | ||||
-rw-r--r-- | websdk/genshi/template/ast24.py | 505 | ||||
-rw-r--r-- | websdk/genshi/template/astutil.py | 784 | ||||
-rw-r--r-- | websdk/genshi/template/base.py | 634 | ||||
-rw-r--r-- | websdk/genshi/template/directives.py | 725 | ||||
-rw-r--r-- | websdk/genshi/template/eval.py | 629 | ||||
-rw-r--r-- | websdk/genshi/template/interpolation.py | 153 | ||||
-rw-r--r-- | websdk/genshi/template/loader.py | 344 | ||||
-rw-r--r-- | websdk/genshi/template/markup.py | 397 | ||||
-rw-r--r-- | websdk/genshi/template/plugin.py | 176 | ||||
-rw-r--r-- | websdk/genshi/template/text.py | 333 | ||||
-rw-r--r-- | websdk/genshi/util.py | 274 |
23 files changed, 12365 insertions, 0 deletions
diff --git a/websdk/genshi/__init__.py b/websdk/genshi/__init__.py new file mode 100644 index 0000000..02f4347 --- /dev/null +++ b/websdk/genshi/__init__.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2006-2009 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://genshi.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://genshi.edgewall.org/log/. + +"""This package provides various means for generating and processing web markup +(XML or HTML). + +The design is centered around the concept of streams of markup events (similar +in concept to SAX parsing events) which can be processed in a uniform manner +independently of where or how they are produced. +""" + +__docformat__ = 'restructuredtext en' +__version__ = '0.6' + +from genshi.core import * +from genshi.input import ParseError, XML, HTML diff --git a/websdk/genshi/builder.py b/websdk/genshi/builder.py new file mode 100644 index 0000000..724e364 --- /dev/null +++ b/websdk/genshi/builder.py @@ -0,0 +1,359 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2006-2009 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://genshi.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://genshi.edgewall.org/log/. + +"""Support for programmatically generating markup streams from Python code using +a very simple syntax. The main entry point to this module is the `tag` object +(which is actually an instance of the ``ElementFactory`` class). You should +rarely (if ever) need to directly import and use any of the other classes in +this module. + +Elements can be created using the `tag` object using attribute access. For +example: + +>>> doc = tag.p('Some text and ', tag.a('a link', href='http://example.org/'), '.') +>>> doc +<Element "p"> + +This produces an `Element` instance which can be further modified to add child +nodes and attributes. This is done by "calling" the element: positional +arguments are added as child nodes (alternatively, the `Element.append` method +can be used for that purpose), whereas keywords arguments are added as +attributes: + +>>> doc(tag.br) +<Element "p"> +>>> print(doc) +<p>Some text and <a href="http://example.org/">a link</a>.<br/></p> + +If an attribute name collides with a Python keyword, simply append an underscore +to the name: + +>>> doc(class_='intro') +<Element "p"> +>>> print(doc) +<p class="intro">Some text and <a href="http://example.org/">a link</a>.<br/></p> + +As shown above, an `Element` can easily be directly rendered to XML text by +printing it or using the Python ``str()`` function. This is basically a +shortcut for converting the `Element` to a stream and serializing that +stream: + +>>> stream = doc.generate() +>>> stream #doctest: +ELLIPSIS +<genshi.core.Stream object at ...> +>>> print(stream) +<p class="intro">Some text and <a href="http://example.org/">a link</a>.<br/></p> + + +The `tag` object also allows creating "fragments", which are basically lists +of nodes (elements or text) that don't have a parent element. This can be useful +for creating snippets of markup that are attached to a parent element later (for +example in a template). Fragments are created by calling the `tag` object, which +returns an object of type `Fragment`: + +>>> fragment = tag('Hello, ', tag.em('world'), '!') +>>> fragment +<Fragment> +>>> print(fragment) +Hello, <em>world</em>! +""" + +from genshi.core import Attrs, Markup, Namespace, QName, Stream, \ + START, END, TEXT + +__all__ = ['Fragment', 'Element', 'ElementFactory', 'tag'] +__docformat__ = 'restructuredtext en' + + +class Fragment(object): + """Represents a markup fragment, which is basically just a list of element + or text nodes. + """ + __slots__ = ['children'] + + def __init__(self): + """Create a new fragment.""" + self.children = [] + + def __add__(self, other): + return Fragment()(self, other) + + def __call__(self, *args): + """Append any positional arguments as child nodes. + + :see: `append` + """ + for arg in args: + self.append(arg) + return self + + def __iter__(self): + return self._generate() + + def __repr__(self): + return '<%s>' % type(self).__name__ + + def __str__(self): + return str(self.generate()) + + def __unicode__(self): + return unicode(self.generate()) + + def __html__(self): + return Markup(self.generate()) + + def append(self, node): + """Append an element or string as child node. + + :param node: the node to append; can be an `Element`, `Fragment`, or a + `Stream`, or a Python string or number + """ + if isinstance(node, (Stream, Element, basestring, int, float, long)): + # For objects of a known/primitive type, we avoid the check for + # whether it is iterable for better performance + self.children.append(node) + elif isinstance(node, Fragment): + self.children.extend(node.children) + elif node is not None: + try: + for child in node: + self.append(child) + except TypeError: + self.children.append(node) + + def _generate(self): + for child in self.children: + if isinstance(child, Fragment): + for event in child._generate(): + yield event + elif isinstance(child, Stream): + for event in child: + yield event + else: + if not isinstance(child, basestring): + child = unicode(child) + yield TEXT, child, (None, -1, -1) + + def generate(self): + """Return a markup event stream for the fragment. + + :rtype: `Stream` + """ + return Stream(self._generate()) + + +def _kwargs_to_attrs(kwargs): + attrs = [] + names = set() + for name, value in kwargs.items(): + name = name.rstrip('_').replace('_', '-') + if value is not None and name not in names: + attrs.append((QName(name), unicode(value))) + names.add(name) + return Attrs(attrs) + + +class Element(Fragment): + """Simple XML output generator based on the builder pattern. + + Construct XML elements by passing the tag name to the constructor: + + >>> print(Element('strong')) + <strong/> + + Attributes can be specified using keyword arguments. The values of the + arguments will be converted to strings and any special XML characters + escaped: + + >>> print(Element('textarea', rows=10, cols=60)) + <textarea rows="10" cols="60"/> + >>> print(Element('span', title='1 < 2')) + <span title="1 < 2"/> + >>> print(Element('span', title='"baz"')) + <span title=""baz""/> + + The " character is escaped using a numerical entity. + The order in which attributes are rendered is undefined. + + If an attribute value evaluates to `None`, that attribute is not included + in the output: + + >>> print(Element('a', name=None)) + <a/> + + Attribute names that conflict with Python keywords can be specified by + appending an underscore: + + >>> print(Element('div', class_='warning')) + <div class="warning"/> + + Nested elements can be added to an element using item access notation. + The call notation can also be used for this and for adding attributes + using keyword arguments, as one would do in the constructor. + + >>> print(Element('ul')(Element('li'), Element('li'))) + <ul><li/><li/></ul> + >>> print(Element('a')('Label')) + <a>Label</a> + >>> print(Element('a')('Label', href="target")) + <a href="target">Label</a> + + Text nodes can be nested in an element by adding strings instead of + elements. Any special characters in the strings are escaped automatically: + + >>> print(Element('em')('Hello world')) + <em>Hello world</em> + >>> print(Element('em')(42)) + <em>42</em> + >>> print(Element('em')('1 < 2')) + <em>1 < 2</em> + + This technique also allows mixed content: + + >>> print(Element('p')('Hello ', Element('b')('world'))) + <p>Hello <b>world</b></p> + + Quotes are not escaped inside text nodes: + >>> print(Element('p')('"Hello"')) + <p>"Hello"</p> + + Elements can also be combined with other elements or strings using the + addition operator, which results in a `Fragment` object that contains the + operands: + + >>> print(Element('br') + 'some text' + Element('br')) + <br/>some text<br/> + + Elements with a namespace can be generated using the `Namespace` and/or + `QName` classes: + + >>> from genshi.core import Namespace + >>> xhtml = Namespace('http://www.w3.org/1999/xhtml') + >>> print(Element(xhtml.html, lang='en')) + <html xmlns="http://www.w3.org/1999/xhtml" lang="en"/> + """ + __slots__ = ['tag', 'attrib'] + + def __init__(self, tag_, **attrib): + Fragment.__init__(self) + self.tag = QName(tag_) + self.attrib = _kwargs_to_attrs(attrib) + + def __call__(self, *args, **kwargs): + """Append any positional arguments as child nodes, and keyword arguments + as attributes. + + :return: the element itself so that calls can be chained + :rtype: `Element` + :see: `Fragment.append` + """ + self.attrib |= _kwargs_to_attrs(kwargs) + Fragment.__call__(self, *args) + return self + + def __repr__(self): + return '<%s "%s">' % (type(self).__name__, self.tag) + + def _generate(self): + yield START, (self.tag, self.attrib), (None, -1, -1) + for kind, data, pos in Fragment._generate(self): + yield kind, data, pos + yield END, self.tag, (None, -1, -1) + + def generate(self): + """Return a markup event stream for the fragment. + + :rtype: `Stream` + """ + return Stream(self._generate()) + + +class ElementFactory(object): + """Factory for `Element` objects. + + A new element is created simply by accessing a correspondingly named + attribute of the factory object: + + >>> factory = ElementFactory() + >>> print(factory.foo) + <foo/> + >>> print(factory.foo(id=2)) + <foo id="2"/> + + Markup fragments (lists of nodes without a parent element) can be created + by calling the factory: + + >>> print(factory('Hello, ', factory.em('world'), '!')) + Hello, <em>world</em>! + + A factory can also be bound to a specific namespace: + + >>> factory = ElementFactory('http://www.w3.org/1999/xhtml') + >>> print(factory.html(lang="en")) + <html xmlns="http://www.w3.org/1999/xhtml" lang="en"/> + + The namespace for a specific element can be altered on an existing factory + by specifying the new namespace using item access: + + >>> factory = ElementFactory() + >>> print(factory.html(factory['http://www.w3.org/2000/svg'].g(id=3))) + <html><g xmlns="http://www.w3.org/2000/svg" id="3"/></html> + + Usually, the `ElementFactory` class is not be used directly. Rather, the + `tag` instance should be used to create elements. + """ + + def __init__(self, namespace=None): + """Create the factory, optionally bound to the given namespace. + + :param namespace: the namespace URI for any created elements, or `None` + for no namespace + """ + if namespace and not isinstance(namespace, Namespace): + namespace = Namespace(namespace) + self.namespace = namespace + + def __call__(self, *args): + """Create a fragment that has the given positional arguments as child + nodes. + + :return: the created `Fragment` + :rtype: `Fragment` + """ + return Fragment()(*args) + + def __getitem__(self, namespace): + """Return a new factory that is bound to the specified namespace. + + :param namespace: the namespace URI or `Namespace` object + :return: an `ElementFactory` that produces elements bound to the given + namespace + :rtype: `ElementFactory` + """ + return ElementFactory(namespace) + + def __getattr__(self, name): + """Create an `Element` with the given name. + + :param name: the tag name of the element to create + :return: an `Element` with the specified name + :rtype: `Element` + """ + return Element(self.namespace and self.namespace[name] or name) + + +tag = ElementFactory() +"""Global `ElementFactory` bound to the default namespace. + +:type: `ElementFactory` +""" diff --git a/websdk/genshi/core.py b/websdk/genshi/core.py new file mode 100644 index 0000000..f7cddff --- /dev/null +++ b/websdk/genshi/core.py @@ -0,0 +1,727 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2006-2009 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://genshi.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://genshi.edgewall.org/log/. + +"""Core classes for markup processing.""" + +try: + reduce # builtin in Python < 3 +except NameError: + from functools import reduce +from itertools import chain +import operator + +from genshi.util import plaintext, stripentities, striptags, stringrepr + +__all__ = ['Stream', 'Markup', 'escape', 'unescape', 'Attrs', 'Namespace', + 'QName'] +__docformat__ = 'restructuredtext en' + + +class StreamEventKind(str): + """A kind of event on a markup stream.""" + __slots__ = [] + _instances = {} + + def __new__(cls, val): + return cls._instances.setdefault(val, str.__new__(cls, val)) + + +class Stream(object): + """Represents a stream of markup events. + + This class is basically an iterator over the events. + + Stream events are tuples of the form:: + + (kind, data, position) + + where ``kind`` is the event kind (such as `START`, `END`, `TEXT`, etc), + ``data`` depends on the kind of event, and ``position`` is a + ``(filename, line, offset)`` tuple that contains the location of the + original element or text in the input. If the original location is unknown, + ``position`` is ``(None, -1, -1)``. + + Also provided are ways to serialize the stream to text. The `serialize()` + method will return an iterator over generated strings, while `render()` + returns the complete generated text at once. Both accept various parameters + that impact the way the stream is serialized. + """ + __slots__ = ['events', 'serializer'] + + START = StreamEventKind('START') #: a start tag + END = StreamEventKind('END') #: an end tag + TEXT = StreamEventKind('TEXT') #: literal text + XML_DECL = StreamEventKind('XML_DECL') #: XML declaration + DOCTYPE = StreamEventKind('DOCTYPE') #: doctype declaration + START_NS = StreamEventKind('START_NS') #: start namespace mapping + END_NS = StreamEventKind('END_NS') #: end namespace mapping + START_CDATA = StreamEventKind('START_CDATA') #: start CDATA section + END_CDATA = StreamEventKind('END_CDATA') #: end CDATA section + PI = StreamEventKind('PI') #: processing instruction + COMMENT = StreamEventKind('COMMENT') #: comment + + def __init__(self, events, serializer=None): + """Initialize the stream with a sequence of markup events. + + :param events: a sequence or iterable providing the events + :param serializer: the default serialization method to use for this + stream + + :note: Changed in 0.5: added the `serializer` argument + """ + self.events = events #: The underlying iterable producing the events + self.serializer = serializer #: The default serializion method + + def __iter__(self): + return iter(self.events) + + def __or__(self, function): + """Override the "bitwise or" operator to apply filters or serializers + to the stream, providing a syntax similar to pipes on Unix shells. + + Assume the following stream produced by the `HTML` function: + + >>> from genshi.input import HTML + >>> html = HTML('''<p onclick="alert('Whoa')">Hello, world!</p>''') + >>> print(html) + <p onclick="alert('Whoa')">Hello, world!</p> + + A filter such as the HTML sanitizer can be applied to that stream using + the pipe notation as follows: + + >>> from genshi.filters import HTMLSanitizer + >>> sanitizer = HTMLSanitizer() + >>> print(html | sanitizer) + <p>Hello, world!</p> + + Filters can be any function that accepts and produces a stream (where + a stream is anything that iterates over events): + + >>> def uppercase(stream): + ... for kind, data, pos in stream: + ... if kind is TEXT: + ... data = data.upper() + ... yield kind, data, pos + >>> print(html | sanitizer | uppercase) + <p>HELLO, WORLD!</p> + + Serializers can also be used with this notation: + + >>> from genshi.output import TextSerializer + >>> output = TextSerializer() + >>> print(html | sanitizer | uppercase | output) + HELLO, WORLD! + + Commonly, serializers should be used at the end of the "pipeline"; + using them somewhere in the middle may produce unexpected results. + + :param function: the callable object that should be applied as a filter + :return: the filtered stream + :rtype: `Stream` + """ + return Stream(_ensure(function(self)), serializer=self.serializer) + + def filter(self, *filters): + """Apply filters to the stream. + + This method returns a new stream with the given filters applied. The + filters must be callables that accept the stream object as parameter, + and return the filtered stream. + + The call:: + + stream.filter(filter1, filter2) + + is equivalent to:: + + stream | filter1 | filter2 + + :param filters: one or more callable objects that should be applied as + filters + :return: the filtered stream + :rtype: `Stream` + """ + return reduce(operator.or_, (self,) + filters) + + def render(self, method=None, encoding='utf-8', out=None, **kwargs): + """Return a string representation of the stream. + + Any additional keyword arguments are passed to the serializer, and thus + depend on the `method` parameter value. + + :param method: determines how the stream is serialized; can be either + "xml", "xhtml", "html", "text", or a custom serializer + class; if `None`, the default serialization method of + the stream is used + :param encoding: how the output string should be encoded; if set to + `None`, this method returns a `unicode` object + :param out: a file-like object that the output should be written to + instead of being returned as one big string; note that if + this is a file or socket (or similar), the `encoding` must + not be `None` (that is, the output must be encoded) + :return: a `str` or `unicode` object (depending on the `encoding` + parameter), or `None` if the `out` parameter is provided + :rtype: `basestring` + + :see: XMLSerializer, XHTMLSerializer, HTMLSerializer, TextSerializer + :note: Changed in 0.5: added the `out` parameter + """ + from genshi.output import encode + if method is None: + method = self.serializer or 'xml' + generator = self.serialize(method=method, **kwargs) + return encode(generator, method=method, encoding=encoding, out=out) + + def select(self, path, namespaces=None, variables=None): + """Return a new stream that contains the events matching the given + XPath expression. + + >>> from genshi import HTML + >>> stream = HTML('<doc><elem>foo</elem><elem>bar</elem></doc>') + >>> print(stream.select('elem')) + <elem>foo</elem><elem>bar</elem> + >>> print(stream.select('elem/text()')) + foobar + + Note that the outermost element of the stream becomes the *context + node* for the XPath test. That means that the expression "doc" would + not match anything in the example above, because it only tests against + child elements of the outermost element: + + >>> print(stream.select('doc')) + <BLANKLINE> + + You can use the "." expression to match the context node itself + (although that usually makes little sense): + + >>> print(stream.select('.')) + <doc><elem>foo</elem><elem>bar</elem></doc> + + :param path: a string containing the XPath expression + :param namespaces: mapping of namespace prefixes used in the path + :param variables: mapping of variable names to values + :return: the selected substream + :rtype: `Stream` + :raises PathSyntaxError: if the given path expression is invalid or not + supported + """ + from genshi.path import Path + return Path(path).select(self, namespaces, variables) + + def serialize(self, method='xml', **kwargs): + """Generate strings corresponding to a specific serialization of the + stream. + + Unlike the `render()` method, this method is a generator that returns + the serialized output incrementally, as opposed to returning a single + string. + + Any additional keyword arguments are passed to the serializer, and thus + depend on the `method` parameter value. + + :param method: determines how the stream is serialized; can be either + "xml", "xhtml", "html", "text", or a custom serializer + class; if `None`, the default serialization method of + the stream is used + :return: an iterator over the serialization results (`Markup` or + `unicode` objects, depending on the serialization method) + :rtype: ``iterator`` + :see: XMLSerializer, XHTMLSerializer, HTMLSerializer, TextSerializer + """ + from genshi.output import get_serializer + if method is None: + method = self.serializer or 'xml' + return get_serializer(method, **kwargs)(_ensure(self)) + + def __str__(self): + return self.render() + + def __unicode__(self): + return self.render(encoding=None) + + def __html__(self): + return self + + +START = Stream.START +END = Stream.END +TEXT = Stream.TEXT +XML_DECL = Stream.XML_DECL +DOCTYPE = Stream.DOCTYPE +START_NS = Stream.START_NS +END_NS = Stream.END_NS +START_CDATA = Stream.START_CDATA +END_CDATA = Stream.END_CDATA +PI = Stream.PI +COMMENT = Stream.COMMENT + + +def _ensure(stream): + """Ensure that every item on the stream is actually a markup event.""" + stream = iter(stream) + event = stream.next() + + # Check whether the iterable is a real markup event stream by examining the + # first item it yields; if it's not we'll need to do some conversion + if type(event) is not tuple or len(event) != 3: + for event in chain([event], stream): + if hasattr(event, 'totuple'): + event = event.totuple() + else: + event = TEXT, unicode(event), (None, -1, -1) + yield event + return + + # This looks like a markup event stream, so we'll just pass it through + # unchanged + yield event + for event in stream: + yield event + + +class Attrs(tuple): + """Immutable sequence type that stores the attributes of an element. + + Ordering of the attributes is preserved, while access by name is also + supported. + + >>> attrs = Attrs([('href', '#'), ('title', 'Foo')]) + >>> attrs + Attrs([('href', '#'), ('title', 'Foo')]) + + >>> 'href' in attrs + True + >>> 'tabindex' in attrs + False + >>> attrs.get('title') + 'Foo' + + Instances may not be manipulated directly. Instead, the operators ``|`` and + ``-`` can be used to produce new instances that have specific attributes + added, replaced or removed. + + To remove an attribute, use the ``-`` operator. The right hand side can be + either a string or a set/sequence of strings, identifying the name(s) of + the attribute(s) to remove: + + >>> attrs - 'title' + Attrs([('href', '#')]) + >>> attrs - ('title', 'href') + Attrs() + + The original instance is not modified, but the operator can of course be + used with an assignment: + + >>> attrs + Attrs([('href', '#'), ('title', 'Foo')]) + >>> attrs -= 'title' + >>> attrs + Attrs([('href', '#')]) + + To add a new attribute, use the ``|`` operator, where the right hand value + is a sequence of ``(name, value)`` tuples (which includes `Attrs` + instances): + + >>> attrs | [('title', 'Bar')] + Attrs([('href', '#'), ('title', 'Bar')]) + + If the attributes already contain an attribute with a given name, the value + of that attribute is replaced: + + >>> attrs | [('href', 'http://example.org/')] + Attrs([('href', 'http://example.org/')]) + """ + __slots__ = [] + + def __contains__(self, name): + """Return whether the list includes an attribute with the specified + name. + + :return: `True` if the list includes the attribute + :rtype: `bool` + """ + for attr, _ in self: + if attr == name: + return True + + def __getitem__(self, i): + """Return an item or slice of the attributes list. + + >>> attrs = Attrs([('href', '#'), ('title', 'Foo')]) + >>> attrs[1] + ('title', 'Foo') + >>> attrs[1:] + Attrs([('title', 'Foo')]) + """ + items = tuple.__getitem__(self, i) + if type(i) is slice: + return Attrs(items) + return items + + def __getslice__(self, i, j): + """Return a slice of the attributes list. + + >>> attrs = Attrs([('href', '#'), ('title', 'Foo')]) + >>> attrs[1:] + Attrs([('title', 'Foo')]) + """ + return Attrs(tuple.__getslice__(self, i, j)) + + def __or__(self, attrs): + """Return a new instance that contains the attributes in `attrs` in + addition to any already existing attributes. + + :return: a new instance with the merged attributes + :rtype: `Attrs` + """ + repl = dict([(an, av) for an, av in attrs if an in self]) + return Attrs([(sn, repl.get(sn, sv)) for sn, sv in self] + + [(an, av) for an, av in attrs if an not in self]) + + def __repr__(self): + if not self: + return 'Attrs()' + return 'Attrs([%s])' % ', '.join([repr(item) for item in self]) + + def __sub__(self, names): + """Return a new instance with all attributes with a name in `names` are + removed. + + :param names: the names of the attributes to remove + :return: a new instance with the attribute removed + :rtype: `Attrs` + """ + if isinstance(names, basestring): + names = (names,) + return Attrs([(name, val) for name, val in self if name not in names]) + + def get(self, name, default=None): + """Return the value of the attribute with the specified name, or the + value of the `default` parameter if no such attribute is found. + + :param name: the name of the attribute + :param default: the value to return when the attribute does not exist + :return: the attribute value, or the `default` value if that attribute + does not exist + :rtype: `object` + """ + for attr, value in self: + if attr == name: + return value + return default + + def totuple(self): + """Return the attributes as a markup event. + + The returned event is a `TEXT` event, the data is the value of all + attributes joined together. + + >>> Attrs([('href', '#'), ('title', 'Foo')]).totuple() + ('TEXT', '#Foo', (None, -1, -1)) + + :return: a `TEXT` event + :rtype: `tuple` + """ + return TEXT, ''.join([x[1] for x in self]), (None, -1, -1) + + +class Markup(unicode): + """Marks a string as being safe for inclusion in HTML/XML output without + needing to be escaped. + """ + __slots__ = [] + + def __add__(self, other): + return Markup(unicode.__add__(self, escape(other))) + + def __radd__(self, other): + return Markup(unicode.__add__(escape(other), self)) + + def __mod__(self, args): + if isinstance(args, dict): + args = dict(zip(args.keys(), map(escape, args.values()))) + elif isinstance(args, (list, tuple)): + args = tuple(map(escape, args)) + else: + args = escape(args) + return Markup(unicode.__mod__(self, args)) + + def __mul__(self, num): + return Markup(unicode.__mul__(self, num)) + __rmul__ = __mul__ + + def __repr__(self): + return "<%s %s>" % (type(self).__name__, unicode.__repr__(self)) + + def join(self, seq, escape_quotes=True): + """Return a `Markup` object which is the concatenation of the strings + in the given sequence, where this `Markup` object is the separator + between the joined elements. + + Any element in the sequence that is not a `Markup` instance is + automatically escaped. + + :param seq: the sequence of strings to join + :param escape_quotes: whether double quote characters in the elements + should be escaped + :return: the joined `Markup` object + :rtype: `Markup` + :see: `escape` + """ + return Markup(unicode.join(self, [escape(item, quotes=escape_quotes) + for item in seq])) + + @classmethod + def escape(cls, text, quotes=True): + """Create a Markup instance from a string and escape special characters + it may contain (<, >, & and \"). + + >>> escape('"1 < 2"') + <Markup u'"1 < 2"'> + + If the `quotes` parameter is set to `False`, the \" character is left + as is. Escaping quotes is generally only required for strings that are + to be used in attribute values. + + >>> escape('"1 < 2"', quotes=False) + <Markup u'"1 < 2"'> + + :param text: the text to escape + :param quotes: if ``True``, double quote characters are escaped in + addition to the other special characters + :return: the escaped `Markup` string + :rtype: `Markup` + """ + if not text: + return cls() + if type(text) is cls: + return text + if hasattr(text, '__html__'): + return Markup(text.__html__()) + + text = text.replace('&', '&') \ + .replace('<', '<') \ + .replace('>', '>') + if quotes: + text = text.replace('"', '"') + return cls(text) + + def unescape(self): + """Reverse-escapes &, <, >, and \" and returns a `unicode` object. + + >>> Markup('1 < 2').unescape() + u'1 < 2' + + :return: the unescaped string + :rtype: `unicode` + :see: `genshi.core.unescape` + """ + if not self: + return '' + return unicode(self).replace('"', '"') \ + .replace('>', '>') \ + .replace('<', '<') \ + .replace('&', '&') + + def stripentities(self, keepxmlentities=False): + """Return a copy of the text with any character or numeric entities + replaced by the equivalent UTF-8 characters. + + If the `keepxmlentities` parameter is provided and evaluates to `True`, + the core XML entities (``&``, ``'``, ``>``, ``<`` and + ``"``) are not stripped. + + :return: a `Markup` instance with entities removed + :rtype: `Markup` + :see: `genshi.util.stripentities` + """ + return Markup(stripentities(self, keepxmlentities=keepxmlentities)) + + def striptags(self): + """Return a copy of the text with all XML/HTML tags removed. + + :return: a `Markup` instance with all tags removed + :rtype: `Markup` + :see: `genshi.util.striptags` + """ + return Markup(striptags(self)) + + +try: + from genshi._speedups import Markup +except ImportError: + pass # just use the Python implementation + + +escape = Markup.escape + + +def unescape(text): + """Reverse-escapes &, <, >, and \" and returns a `unicode` object. + + >>> unescape(Markup('1 < 2')) + u'1 < 2' + + If the provided `text` object is not a `Markup` instance, it is returned + unchanged. + + >>> unescape('1 < 2') + '1 < 2' + + :param text: the text to unescape + :return: the unescsaped string + :rtype: `unicode` + """ + if not isinstance(text, Markup): + return text + return text.unescape() + + +class Namespace(object): + """Utility class creating and testing elements with a namespace. + + Internally, namespace URIs are encoded in the `QName` of any element or + attribute, the namespace URI being enclosed in curly braces. This class + helps create and test these strings. + + A `Namespace` object is instantiated with the namespace URI. + + >>> html = Namespace('http://www.w3.org/1999/xhtml') + >>> html + Namespace('http://www.w3.org/1999/xhtml') + >>> html.uri + u'http://www.w3.org/1999/xhtml' + + The `Namespace` object can than be used to generate `QName` objects with + that namespace: + + >>> html.body + QName('http://www.w3.org/1999/xhtml}body') + >>> html.body.localname + u'body' + >>> html.body.namespace + u'http://www.w3.org/1999/xhtml' + + The same works using item access notation, which is useful for element or + attribute names that are not valid Python identifiers: + + >>> html['body'] + QName('http://www.w3.org/1999/xhtml}body') + + A `Namespace` object can also be used to test whether a specific `QName` + belongs to that namespace using the ``in`` operator: + + >>> qname = html.body + >>> qname in html + True + >>> qname in Namespace('http://www.w3.org/2002/06/xhtml2') + False + """ + def __new__(cls, uri): + if type(uri) is cls: + return uri + return object.__new__(cls) + + def __getnewargs__(self): + return (self.uri,) + + def __getstate__(self): + return self.uri + + def __setstate__(self, uri): + self.uri = uri + + def __init__(self, uri): + self.uri = unicode(uri) + + def __contains__(self, qname): + return qname.namespace == self.uri + + def __ne__(self, other): + return not self == other + + def __eq__(self, other): + if isinstance(other, Namespace): + return self.uri == other.uri + return self.uri == other + + def __getitem__(self, name): + return QName(self.uri + '}' + name) + __getattr__ = __getitem__ + + def __hash__(self): + return hash(self.uri) + + def __repr__(self): + return '%s(%s)' % (type(self).__name__, stringrepr(self.uri)) + + def __str__(self): + return self.uri.encode('utf-8') + + def __unicode__(self): + return self.uri + + +# The namespace used by attributes such as xml:lang and xml:space +XML_NAMESPACE = Namespace('http://www.w3.org/XML/1998/namespace') + + +class QName(unicode): + """A qualified element or attribute name. + + The unicode value of instances of this class contains the qualified name of + the element or attribute, in the form ``{namespace-uri}local-name``. The + namespace URI can be obtained through the additional `namespace` attribute, + while the local name can be accessed through the `localname` attribute. + + >>> qname = QName('foo') + >>> qname + QName('foo') + >>> qname.localname + u'foo' + >>> qname.namespace + + >>> qname = QName('http://www.w3.org/1999/xhtml}body') + >>> qname + QName('http://www.w3.org/1999/xhtml}body') + >>> qname.localname + u'body' + >>> qname.namespace + u'http://www.w3.org/1999/xhtml' + """ + __slots__ = ['namespace', 'localname'] + + def __new__(cls, qname): + """Create the `QName` instance. + + :param qname: the qualified name as a string of the form + ``{namespace-uri}local-name``, where the leading curly + brace is optional + """ + if type(qname) is cls: + return qname + + parts = qname.lstrip('{').split('}', 1) + if len(parts) > 1: + self = unicode.__new__(cls, '{%s' % qname) + self.namespace, self.localname = map(unicode, parts) + else: + self = unicode.__new__(cls, qname) + self.namespace, self.localname = None, unicode(qname) + return self + + def __getnewargs__(self): + return (self.lstrip('{'),) + + def __repr__(self): + return '%s(%s)' % (type(self).__name__, stringrepr(self.lstrip('{'))) diff --git a/websdk/genshi/filters/__init__.py b/websdk/genshi/filters/__init__.py new file mode 100644 index 0000000..efc2565 --- /dev/null +++ b/websdk/genshi/filters/__init__.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2007-2009 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://genshi.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://genshi.edgewall.org/log/. + +"""Implementation of a number of stream filters.""" + +from genshi.filters.html import HTMLFormFiller, HTMLSanitizer +from genshi.filters.i18n import Translator +from genshi.filters.transform import Transformer + +__docformat__ = 'restructuredtext en' diff --git a/websdk/genshi/filters/html.py b/websdk/genshi/filters/html.py new file mode 100644 index 0000000..d554a54 --- /dev/null +++ b/websdk/genshi/filters/html.py @@ -0,0 +1,453 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2006-2009 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://genshi.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://genshi.edgewall.org/log/. + +"""Implementation of a number of stream filters.""" + +try: + any +except NameError: + from genshi.util import any +import re + +from genshi.core import Attrs, QName, stripentities +from genshi.core import END, START, TEXT, COMMENT + +__all__ = ['HTMLFormFiller', 'HTMLSanitizer'] +__docformat__ = 'restructuredtext en' + + +class HTMLFormFiller(object): + """A stream filter that can populate HTML forms from a dictionary of values. + + >>> from genshi.input import HTML + >>> html = HTML('''<form> + ... <p><input type="text" name="foo" /></p> + ... </form>''') + >>> filler = HTMLFormFiller(data={'foo': 'bar'}) + >>> print(html | filler) + <form> + <p><input type="text" name="foo" value="bar"/></p> + </form> + """ + # TODO: only select the first radio button, and the first select option + # (if not in a multiple-select) + # TODO: only apply to elements in the XHTML namespace (or no namespace)? + + def __init__(self, name=None, id=None, data=None, passwords=False): + """Create the filter. + + :param name: The name of the form that should be populated. If this + parameter is given, only forms where the ``name`` attribute + value matches the parameter are processed. + :param id: The ID of the form that should be populated. If this + parameter is given, only forms where the ``id`` attribute + value matches the parameter are processed. + :param data: The dictionary of form values, where the keys are the names + of the form fields, and the values are the values to fill + in. + :param passwords: Whether password input fields should be populated. + This is off by default for security reasons (for + example, a password may end up in the browser cache) + :note: Changed in 0.5.2: added the `passwords` option + """ + self.name = name + self.id = id + if data is None: + data = {} + self.data = data + self.passwords = passwords + + def __call__(self, stream): + """Apply the filter to the given stream. + + :param stream: the markup event stream to filter + """ + in_form = in_select = in_option = in_textarea = False + select_value = option_value = textarea_value = None + option_start = None + option_text = [] + no_option_value = False + + for kind, data, pos in stream: + + if kind is START: + tag, attrs = data + tagname = tag.localname + + if tagname == 'form' and ( + self.name and attrs.get('name') == self.name or + self.id and attrs.get('id') == self.id or + not (self.id or self.name)): + in_form = True + + elif in_form: + if tagname == 'input': + type = attrs.get('type', '').lower() + if type in ('checkbox', 'radio'): + name = attrs.get('name') + if name and name in self.data: + value = self.data[name] + declval = attrs.get('value') + checked = False + if isinstance(value, (list, tuple)): + if declval: + checked = declval in [unicode(v) for v + in value] + else: + checked = any(value) + else: + if declval: + checked = declval == unicode(value) + elif type == 'checkbox': + checked = bool(value) + if checked: + attrs |= [(QName('checked'), 'checked')] + elif 'checked' in attrs: + attrs -= 'checked' + elif type in ('', 'hidden', 'text') \ + or type == 'password' and self.passwords: + name = attrs.get('name') + if name and name in self.data: + value = self.data[name] + if isinstance(value, (list, tuple)): + value = value[0] + if value is not None: + attrs |= [ + (QName('value'), unicode(value)) + ] + elif tagname == 'select': + name = attrs.get('name') + if name in self.data: + select_value = self.data[name] + in_select = True + elif tagname == 'textarea': + name = attrs.get('name') + if name in self.data: + textarea_value = self.data.get(name) + if isinstance(textarea_value, (list, tuple)): + textarea_value = textarea_value[0] + in_textarea = True + elif in_select and tagname == 'option': + option_start = kind, data, pos + option_value = attrs.get('value') + if option_value is None: + no_option_value = True + option_value = '' + in_option = True + continue + yield kind, (tag, attrs), pos + + elif in_form and kind is TEXT: + if in_select and in_option: + if no_option_value: + option_value += data + option_text.append((kind, data, pos)) + continue + elif in_textarea: + continue + yield kind, data, pos + + elif in_form and kind is END: + tagname = data.localname + if tagname == 'form': + in_form = False + elif tagname == 'select': + in_select = False + select_value = None + elif in_select and tagname == 'option': + if isinstance(select_value, (tuple, list)): + selected = option_value in [unicode(v) for v + in select_value] + else: + selected = option_value == unicode(select_value) + okind, (tag, attrs), opos = option_start + if selected: + attrs |= [(QName('selected'), 'selected')] + elif 'selected' in attrs: + attrs -= 'selected' + yield okind, (tag, attrs), opos + if option_text: + for event in option_text: + yield event + in_option = False + no_option_value = False + option_start = option_value = None + option_text = [] + elif tagname == 'textarea': + if textarea_value: + yield TEXT, unicode(textarea_value), pos + in_textarea = False + yield kind, data, pos + + else: + yield kind, data, pos + + +class HTMLSanitizer(object): + """A filter that removes potentially dangerous HTML tags and attributes + from the stream. + + >>> from genshi import HTML + >>> html = HTML('<div><script>alert(document.cookie)</script></div>') + >>> print(html | HTMLSanitizer()) + <div/> + + The default set of safe tags and attributes can be modified when the filter + is instantiated. For example, to allow inline ``style`` attributes, the + following instantation would work: + + >>> html = HTML('<div style="background: #000"></div>') + >>> sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style'])) + >>> print(html | sanitizer) + <div style="background: #000"/> + + Note that even in this case, the filter *does* attempt to remove dangerous + constructs from style attributes: + + >>> html = HTML('<div style="background: url(javascript:void); color: #000"></div>') + >>> print(html | sanitizer) + <div style="color: #000"/> + + This handles HTML entities, unicode escapes in CSS and Javascript text, as + well as a lot of other things. However, the style tag is still excluded by + default because it is very hard for such sanitizing to be completely safe, + especially considering how much error recovery current web browsers perform. + + It also does some basic filtering of CSS properties that may be used for + typical phishing attacks. For more sophisticated filtering, this class + provides a couple of hooks that can be overridden in sub-classes. + + :warn: Note that this special processing of CSS is currently only applied to + style attributes, **not** style elements. + """ + + SAFE_TAGS = frozenset(['a', 'abbr', 'acronym', 'address', 'area', 'b', + 'big', 'blockquote', 'br', 'button', 'caption', 'center', 'cite', + 'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt', + 'em', 'fieldset', 'font', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', + 'hr', 'i', 'img', 'input', 'ins', 'kbd', 'label', 'legend', 'li', 'map', + 'menu', 'ol', 'optgroup', 'option', 'p', 'pre', 'q', 's', 'samp', + 'select', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'table', + 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'tt', 'u', + 'ul', 'var']) + + SAFE_ATTRS = frozenset(['abbr', 'accept', 'accept-charset', 'accesskey', + 'action', 'align', 'alt', 'axis', 'bgcolor', 'border', 'cellpadding', + 'cellspacing', 'char', 'charoff', 'charset', 'checked', 'cite', 'class', + 'clear', 'cols', 'colspan', 'color', 'compact', 'coords', 'datetime', + 'dir', 'disabled', 'enctype', 'for', 'frame', 'headers', 'height', + 'href', 'hreflang', 'hspace', 'id', 'ismap', 'label', 'lang', + 'longdesc', 'maxlength', 'media', 'method', 'multiple', 'name', + 'nohref', 'noshade', 'nowrap', 'prompt', 'readonly', 'rel', 'rev', + 'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size', + 'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title', + 'type', 'usemap', 'valign', 'value', 'vspace', 'width']) + + SAFE_SCHEMES = frozenset(['file', 'ftp', 'http', 'https', 'mailto', None]) + + URI_ATTRS = frozenset(['action', 'background', 'dynsrc', 'href', 'lowsrc', + 'src']) + + def __init__(self, safe_tags=SAFE_TAGS, safe_attrs=SAFE_ATTRS, + safe_schemes=SAFE_SCHEMES, uri_attrs=URI_ATTRS): + """Create the sanitizer. + + The exact set of allowed elements and attributes can be configured. + + :param safe_tags: a set of tag names that are considered safe + :param safe_attrs: a set of attribute names that are considered safe + :param safe_schemes: a set of URI schemes that are considered safe + :param uri_attrs: a set of names of attributes that contain URIs + """ + self.safe_tags = safe_tags + "The set of tag names that are considered safe." + self.safe_attrs = safe_attrs + "The set of attribute names that are considered safe." + self.uri_attrs = uri_attrs + "The set of names of attributes that may contain URIs." + self.safe_schemes = safe_schemes + "The set of URI schemes that are considered safe." + + def __call__(self, stream): + """Apply the filter to the given stream. + + :param stream: the markup event stream to filter + """ + waiting_for = None + + for kind, data, pos in stream: + if kind is START: + if waiting_for: + continue + tag, attrs = data + if not self.is_safe_elem(tag, attrs): + waiting_for = tag + continue + + new_attrs = [] + for attr, value in attrs: + value = stripentities(value) + if attr not in self.safe_attrs: + continue + elif attr in self.uri_attrs: + # Don't allow URI schemes such as "javascript:" + if not self.is_safe_uri(value): + continue + elif attr == 'style': + # Remove dangerous CSS declarations from inline styles + decls = self.sanitize_css(value) + if not decls: + continue + value = '; '.join(decls) + new_attrs.append((attr, value)) + + yield kind, (tag, Attrs(new_attrs)), pos + + elif kind is END: + tag = data + if waiting_for: + if waiting_for == tag: + waiting_for = None + else: + yield kind, data, pos + + elif kind is not COMMENT: + if not waiting_for: + yield kind, data, pos + + def is_safe_css(self, propname, value): + """Determine whether the given css property declaration is to be + considered safe for inclusion in the output. + + :param propname: the CSS property name + :param value: the value of the property + :return: whether the property value should be considered safe + :rtype: bool + :since: version 0.6 + """ + if propname == 'position': + return False + if propname.startswith('margin') and '-' in value: + # Negative margins can be used for phishing + return False + return True + + def is_safe_elem(self, tag, attrs): + """Determine whether the given element should be considered safe for + inclusion in the output. + + :param tag: the tag name of the element + :type tag: QName + :param attrs: the element attributes + :type attrs: Attrs + :return: whether the element should be considered safe + :rtype: bool + :since: version 0.6 + """ + if tag not in self.safe_tags: + return False + if tag.localname == 'input': + input_type = attrs.get('type', '').lower() + if input_type == 'password': + return False + return True + + def is_safe_uri(self, uri): + """Determine whether the given URI is to be considered safe for + inclusion in the output. + + The default implementation checks whether the scheme of the URI is in + the set of allowed URIs (`safe_schemes`). + + >>> sanitizer = HTMLSanitizer() + >>> sanitizer.is_safe_uri('http://example.org/') + True + >>> sanitizer.is_safe_uri('javascript:alert(document.cookie)') + False + + :param uri: the URI to check + :return: `True` if the URI can be considered safe, `False` otherwise + :rtype: `bool` + :since: version 0.4.3 + """ + if '#' in uri: + uri = uri.split('#', 1)[0] # Strip out the fragment identifier + if ':' not in uri: + return True # This is a relative URI + chars = [char for char in uri.split(':', 1)[0] if char.isalnum()] + return ''.join(chars).lower() in self.safe_schemes + + def sanitize_css(self, text): + """Remove potentially dangerous property declarations from CSS code. + + In particular, properties using the CSS ``url()`` function with a scheme + that is not considered safe are removed: + + >>> sanitizer = HTMLSanitizer() + >>> sanitizer.sanitize_css(u''' + ... background: url(javascript:alert("foo")); + ... color: #000; + ... ''') + [u'color: #000'] + + Also, the proprietary Internet Explorer function ``expression()`` is + always stripped: + + >>> sanitizer.sanitize_css(u''' + ... background: #fff; + ... color: #000; + ... width: e/**/xpression(alert("foo")); + ... ''') + [u'background: #fff', u'color: #000'] + + :param text: the CSS text; this is expected to be `unicode` and to not + contain any character or numeric references + :return: a list of declarations that are considered safe + :rtype: `list` + :since: version 0.4.3 + """ + decls = [] + text = self._strip_css_comments(self._replace_unicode_escapes(text)) + for decl in text.split(';'): + decl = decl.strip() + if not decl: + continue + try: + propname, value = decl.split(':', 1) + except ValueError: + continue + if not self.is_safe_css(propname.strip().lower(), value.strip()): + continue + is_evil = False + if 'expression' in value: + is_evil = True + for match in re.finditer(r'url\s*\(([^)]+)', value): + if not self.is_safe_uri(match.group(1)): + is_evil = True + break + if not is_evil: + decls.append(decl.strip()) + return decls + + _NORMALIZE_NEWLINES = re.compile(r'\r\n').sub + _UNICODE_ESCAPE = re.compile(r'\\([0-9a-fA-F]{1,6})\s?').sub + + def _replace_unicode_escapes(self, text): + def _repl(match): + return unichr(int(match.group(1), 16)) + return self._UNICODE_ESCAPE(_repl, self._NORMALIZE_NEWLINES('\n', text)) + + _CSS_COMMENTS = re.compile(r'/\*.*?\*/').sub + + def _strip_css_comments(self, text): + return self._CSS_COMMENTS('', text) diff --git a/websdk/genshi/filters/i18n.py b/websdk/genshi/filters/i18n.py new file mode 100644 index 0000000..7852875 --- /dev/null +++ b/websdk/genshi/filters/i18n.py @@ -0,0 +1,1238 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2007-2010 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://genshi.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://genshi.edgewall.org/log/. + +"""Directives and utilities for internationalization and localization of +templates. + +:since: version 0.4 +:note: Directives support added since version 0.6 +""" + +try: + any +except NameError: + from genshi.util import any +from gettext import NullTranslations +import os +import re +from types import FunctionType + +from genshi.core import Attrs, Namespace, QName, START, END, TEXT, \ + XML_NAMESPACE, _ensure, StreamEventKind +from genshi.template.eval import _ast +from genshi.template.base import DirectiveFactory, EXPR, SUB, _apply_directives +from genshi.template.directives import Directive, StripDirective +from genshi.template.markup import MarkupTemplate, EXEC + +__all__ = ['Translator', 'extract'] +__docformat__ = 'restructuredtext en' + + +I18N_NAMESPACE = Namespace('http://genshi.edgewall.org/i18n') + +MSGBUF = StreamEventKind('MSGBUF') +SUB_START = StreamEventKind('SUB_START') +SUB_END = StreamEventKind('SUB_END') + +GETTEXT_FUNCTIONS = ('_', 'gettext', 'ngettext', 'dgettext', 'dngettext', + 'ugettext', 'ungettext') + + +class I18NDirective(Directive): + """Simple interface for i18n directives to support messages extraction.""" + + def __call__(self, stream, directives, ctxt, **vars): + return _apply_directives(stream, directives, ctxt, vars) + + +class ExtractableI18NDirective(I18NDirective): + """Simple interface for directives to support messages extraction.""" + + def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS, + search_text=True, comment_stack=None): + raise NotImplementedError + + +class CommentDirective(I18NDirective): + """Implementation of the ``i18n:comment`` template directive which adds + translation comments. + + >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n"> + ... <p i18n:comment="As in Foo Bar">Foo</p> + ... </html>''') + >>> translator = Translator() + >>> translator.setup(tmpl) + >>> list(translator.extract(tmpl.stream)) + [(2, None, u'Foo', [u'As in Foo Bar'])] + """ + __slots__ = ['comment'] + + def __init__(self, value, template=None, namespaces=None, lineno=-1, + offset=-1): + Directive.__init__(self, None, template, namespaces, lineno, offset) + self.comment = value + + +class MsgDirective(ExtractableI18NDirective): + r"""Implementation of the ``i18n:msg`` directive which marks inner content + as translatable. Consider the following examples: + + >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n"> + ... <div i18n:msg=""> + ... <p>Foo</p> + ... <p>Bar</p> + ... </div> + ... <p i18n:msg="">Foo <em>bar</em>!</p> + ... </html>''') + + >>> translator = Translator() + >>> translator.setup(tmpl) + >>> list(translator.extract(tmpl.stream)) + [(2, None, u'[1:Foo]\n [2:Bar]', []), (6, None, u'Foo [1:bar]!', [])] + >>> print(tmpl.generate().render()) + <html> + <div><p>Foo</p> + <p>Bar</p></div> + <p>Foo <em>bar</em>!</p> + </html> + + >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n"> + ... <div i18n:msg="fname, lname"> + ... <p>First Name: ${fname}</p> + ... <p>Last Name: ${lname}</p> + ... </div> + ... <p i18n:msg="">Foo <em>bar</em>!</p> + ... </html>''') + >>> translator.setup(tmpl) + >>> list(translator.extract(tmpl.stream)) #doctest: +NORMALIZE_WHITESPACE + [(2, None, u'[1:First Name: %(fname)s]\n [2:Last Name: %(lname)s]', []), + (6, None, u'Foo [1:bar]!', [])] + + >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n"> + ... <div i18n:msg="fname, lname"> + ... <p>First Name: ${fname}</p> + ... <p>Last Name: ${lname}</p> + ... </div> + ... <p i18n:msg="">Foo <em>bar</em>!</p> + ... </html>''') + >>> translator.setup(tmpl) + >>> print(tmpl.generate(fname='John', lname='Doe').render()) + <html> + <div><p>First Name: John</p> + <p>Last Name: Doe</p></div> + <p>Foo <em>bar</em>!</p> + </html> + + Starting and ending white-space is stripped of to make it simpler for + translators. Stripping it is not that important since it's on the html + source, the rendered output will remain the same. + """ + __slots__ = ['params', 'lineno'] + + def __init__(self, value, template=None, namespaces=None, lineno=-1, + offset=-1): + Directive.__init__(self, None, template, namespaces, lineno, offset) + self.params = [param.strip() for param in value.split(',') if param] + self.lineno = lineno + + @classmethod + def attach(cls, template, stream, value, namespaces, pos): + if type(value) is dict: + value = value.get('params', '').strip() + return super(MsgDirective, cls).attach(template, stream, value.strip(), + namespaces, pos) + + def __call__(self, stream, directives, ctxt, **vars): + gettext = ctxt.get('_i18n.gettext') + if ctxt.get('_i18n.domain'): + dgettext = ctxt.get('_i18n.dgettext') + assert hasattr(dgettext, '__call__'), \ + 'No domain gettext function passed' + gettext = lambda msg: dgettext(ctxt.get('_i18n.domain'), msg) + + def _generate(): + msgbuf = MessageBuffer(self) + previous = stream.next() + if previous[0] is START: + yield previous + else: + msgbuf.append(*previous) + previous = stream.next() + for kind, data, pos in stream: + msgbuf.append(*previous) + previous = kind, data, pos + if previous[0] is not END: + msgbuf.append(*previous) + previous = None + for event in msgbuf.translate(gettext(msgbuf.format())): + yield event + if previous: + yield previous + + return _apply_directives(_generate(), directives, ctxt, vars) + + def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS, + search_text=True, comment_stack=None): + msgbuf = MessageBuffer(self) + strip = False + + stream = iter(stream) + previous = stream.next() + if previous[0] is START: + for message in translator._extract_attrs(previous, + gettext_functions, + search_text=search_text): + yield message + previous = stream.next() + strip = True + for event in stream: + if event[0] is START: + for message in translator._extract_attrs(event, + gettext_functions, + search_text=search_text): + yield message + msgbuf.append(*previous) + previous = event + if not strip: + msgbuf.append(*previous) + + yield self.lineno, None, msgbuf.format(), comment_stack[-1:] + + +class ChooseBranchDirective(I18NDirective): + __slots__ = ['params'] + + def __call__(self, stream, directives, ctxt, **vars): + self.params = ctxt.get('_i18n.choose.params', [])[:] + msgbuf = MessageBuffer(self) + stream = _apply_directives(stream, directives, ctxt, vars) + + previous = stream.next() + if previous[0] is START: + yield previous + else: + msgbuf.append(*previous) + + try: + previous = stream.next() + except StopIteration: + # For example <i18n:singular> or <i18n:plural> directives + yield MSGBUF, (), -1 # the place holder for msgbuf output + ctxt['_i18n.choose.%s' % self.tagname] = msgbuf + return + + for event in stream: + msgbuf.append(*previous) + previous = event + yield MSGBUF, (), -1 # the place holder for msgbuf output + + if previous[0] is END: + yield previous # the outer end tag + else: + msgbuf.append(*previous) + ctxt['_i18n.choose.%s' % self.tagname] = msgbuf + + def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS, + search_text=True, comment_stack=None, msgbuf=None): + stream = iter(stream) + previous = stream.next() + + if previous[0] is START: + # skip the enclosing element + for message in translator._extract_attrs(previous, + gettext_functions, + search_text=search_text): + yield message + previous = stream.next() + + for event in stream: + if previous[0] is START: + for message in translator._extract_attrs(previous, + gettext_functions, + search_text=search_text): + yield message + msgbuf.append(*previous) + previous = event + + if previous[0] is not END: + msgbuf.append(*previous) + + +class SingularDirective(ChooseBranchDirective): + """Implementation of the ``i18n:singular`` directive to be used with the + ``i18n:choose`` directive.""" + + +class PluralDirective(ChooseBranchDirective): + """Implementation of the ``i18n:plural`` directive to be used with the + ``i18n:choose`` directive.""" + + +class ChooseDirective(ExtractableI18NDirective): + """Implementation of the ``i18n:choose`` directive which provides plural + internationalisation of strings. + + This directive requires at least one parameter, the one which evaluates to + an integer which will allow to choose the plural/singular form. If you also + have expressions inside the singular and plural version of the string you + also need to pass a name for those parameters. Consider the following + examples: + + >>> tmpl = MarkupTemplate('''\ + <html xmlns:i18n="http://genshi.edgewall.org/i18n"> + ... <div i18n:choose="num; num"> + ... <p i18n:singular="">There is $num coin</p> + ... <p i18n:plural="">There are $num coins</p> + ... </div> + ... </html>''') + >>> translator = Translator() + >>> translator.setup(tmpl) + >>> list(translator.extract(tmpl.stream)) #doctest: +NORMALIZE_WHITESPACE + [(2, 'ngettext', (u'There is %(num)s coin', + u'There are %(num)s coins'), [])] + + >>> tmpl = MarkupTemplate('''\ + <html xmlns:i18n="http://genshi.edgewall.org/i18n"> + ... <div i18n:choose="num; num"> + ... <p i18n:singular="">There is $num coin</p> + ... <p i18n:plural="">There are $num coins</p> + ... </div> + ... </html>''') + >>> translator.setup(tmpl) + >>> print(tmpl.generate(num=1).render()) + <html> + <div> + <p>There is 1 coin</p> + </div> + </html> + >>> print(tmpl.generate(num=2).render()) + <html> + <div> + <p>There are 2 coins</p> + </div> + </html> + + When used as a element and not as an attribute: + + >>> tmpl = MarkupTemplate('''\ + <html xmlns:i18n="http://genshi.edgewall.org/i18n"> + ... <i18n:choose numeral="num" params="num"> + ... <p i18n:singular="">There is $num coin</p> + ... <p i18n:plural="">There are $num coins</p> + ... </i18n:choose> + ... </html>''') + >>> translator.setup(tmpl) + >>> list(translator.extract(tmpl.stream)) #doctest: +NORMALIZE_WHITESPACE + [(2, 'ngettext', (u'There is %(num)s coin', + u'There are %(num)s coins'), [])] + """ + __slots__ = ['numeral', 'params', 'lineno'] + + def __init__(self, value, template=None, namespaces=None, lineno=-1, + offset=-1): + Directive.__init__(self, None, template, namespaces, lineno, offset) + params = [v.strip() for v in value.split(';')] + self.numeral = self._parse_expr(params.pop(0), template, lineno, offset) + self.params = params and [name.strip() for name in + params[0].split(',') if name] or [] + self.lineno = lineno + + @classmethod + def attach(cls, template, stream, value, namespaces, pos): + if type(value) is dict: + numeral = value.get('numeral', '').strip() + assert numeral is not '', "at least pass the numeral param" + params = [v.strip() for v in value.get('params', '').split(',')] + value = '%s; ' % numeral + ', '.join(params) + return super(ChooseDirective, cls).attach(template, stream, value, + namespaces, pos) + + def __call__(self, stream, directives, ctxt, **vars): + ctxt.push({'_i18n.choose.params': self.params, + '_i18n.choose.singular': None, + '_i18n.choose.plural': None}) + + ngettext = ctxt.get('_i18n.ngettext') + assert hasattr(ngettext, '__call__'), 'No ngettext function available' + dngettext = ctxt.get('_i18n.dngettext') + if not dngettext: + dngettext = lambda d, s, p, n: ngettext(s, p, n) + + new_stream = [] + singular_stream = None + singular_msgbuf = None + plural_stream = None + plural_msgbuf = None + + numeral = self.numeral.evaluate(ctxt) + is_plural = self._is_plural(numeral, ngettext) + + for event in stream: + if event[0] is SUB and any(isinstance(d, ChooseBranchDirective) + for d in event[1][0]): + subdirectives, substream = event[1] + + if isinstance(subdirectives[0], SingularDirective): + singular_stream = list(_apply_directives(substream, + subdirectives, + ctxt, vars)) + new_stream.append((MSGBUF, None, (None, -1, -1))) + + elif isinstance(subdirectives[0], PluralDirective): + if is_plural: + plural_stream = list(_apply_directives(substream, + subdirectives, + ctxt, vars)) + + else: + new_stream.append(event) + + if ctxt.get('_i18n.domain'): + ngettext = lambda s, p, n: dngettext(ctxt.get('_i18n.domain'), + s, p, n) + + singular_msgbuf = ctxt.get('_i18n.choose.singular') + if is_plural: + plural_msgbuf = ctxt.get('_i18n.choose.plural') + msgbuf, choice = plural_msgbuf, plural_stream + else: + msgbuf, choice = singular_msgbuf, singular_stream + plural_msgbuf = MessageBuffer(self) + + for kind, data, pos in new_stream: + if kind is MSGBUF: + for event in choice: + if event[0] is MSGBUF: + translation = ngettext(singular_msgbuf.format(), + plural_msgbuf.format(), + numeral) + for subevent in msgbuf.translate(translation): + yield subevent + else: + yield event + else: + yield kind, data, pos + + ctxt.pop() + + def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS, + search_text=True, comment_stack=None): + strip = False + stream = iter(stream) + previous = stream.next() + + if previous[0] is START: + # skip the enclosing element + for message in translator._extract_attrs(previous, + gettext_functions, + search_text=search_text): + yield message + previous = stream.next() + strip = True + + singular_msgbuf = MessageBuffer(self) + plural_msgbuf = MessageBuffer(self) + + for event in stream: + if previous[0] is SUB: + directives, substream = previous[1] + for directive in directives: + if isinstance(directive, SingularDirective): + for message in directive.extract(translator, + substream, gettext_functions, search_text, + comment_stack, msgbuf=singular_msgbuf): + yield message + elif isinstance(directive, PluralDirective): + for message in directive.extract(translator, + substream, gettext_functions, search_text, + comment_stack, msgbuf=plural_msgbuf): + yield message + elif not isinstance(directive, StripDirective): + singular_msgbuf.append(*previous) + plural_msgbuf.append(*previous) + else: + if previous[0] is START: + for message in translator._extract_attrs(previous, + gettext_functions, + search_text): + yield message + singular_msgbuf.append(*previous) + plural_msgbuf.append(*previous) + previous = event + + if not strip: + singular_msgbuf.append(*previous) + plural_msgbuf.append(*previous) + + yield self.lineno, 'ngettext', \ + (singular_msgbuf.format(), plural_msgbuf.format()), \ + comment_stack[-1:] + + def _is_plural(self, numeral, ngettext): + # XXX: should we test which form was chosen like this!?!?!? + # There should be no match in any catalogue for these singular and + # plural test strings + singular = u'O\x85\xbe\xa9\xa8az\xc3?\xe6\xa1\x02n\x84\x93' + plural = u'\xcc\xfb+\xd3Pn\x9d\tT\xec\x1d\xda\x1a\x88\x00' + return ngettext(singular, plural, numeral) == plural + + +class DomainDirective(I18NDirective): + """Implementation of the ``i18n:domain`` directive which allows choosing + another i18n domain(catalog) to translate from. + + >>> from genshi.filters.tests.i18n import DummyTranslations + >>> tmpl = MarkupTemplate('''\ + <html xmlns:i18n="http://genshi.edgewall.org/i18n"> + ... <p i18n:msg="">Bar</p> + ... <div i18n:domain="foo"> + ... <p i18n:msg="">FooBar</p> + ... <p>Bar</p> + ... <p i18n:domain="bar" i18n:msg="">Bar</p> + ... <p i18n:domain="">Bar</p> + ... </div> + ... <p>Bar</p> + ... </html>''') + + >>> translations = DummyTranslations({'Bar': 'Voh'}) + >>> translations.add_domain('foo', {'FooBar': 'BarFoo', 'Bar': 'foo_Bar'}) + >>> translations.add_domain('bar', {'Bar': 'bar_Bar'}) + >>> translator = Translator(translations) + >>> translator.setup(tmpl) + + >>> print(tmpl.generate().render()) + <html> + <p>Voh</p> + <div> + <p>BarFoo</p> + <p>foo_Bar</p> + <p>bar_Bar</p> + <p>Voh</p> + </div> + <p>Voh</p> + </html> + """ + __slots__ = ['domain'] + + def __init__(self, value, template=None, namespaces=None, lineno=-1, + offset=-1): + Directive.__init__(self, None, template, namespaces, lineno, offset) + self.domain = value and value.strip() or '__DEFAULT__' + + @classmethod + def attach(cls, template, stream, value, namespaces, pos): + if type(value) is dict: + value = value.get('name') + return super(DomainDirective, cls).attach(template, stream, value, + namespaces, pos) + + def __call__(self, stream, directives, ctxt, **vars): + ctxt.push({'_i18n.domain': self.domain}) + for event in _apply_directives(stream, directives, ctxt, vars): + yield event + ctxt.pop() + + +class Translator(DirectiveFactory): + """Can extract and translate localizable strings from markup streams and + templates. + + For example, assume the following template: + + >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/"> + ... <head> + ... <title>Example</title> + ... </head> + ... <body> + ... <h1>Example</h1> + ... <p>${_("Hello, %(name)s") % dict(name=username)}</p> + ... </body> + ... </html>''', filename='example.html') + + For demonstration, we define a dummy ``gettext``-style function with a + hard-coded translation table, and pass that to the `Translator` initializer: + + >>> def pseudo_gettext(string): + ... return { + ... 'Example': 'Beispiel', + ... 'Hello, %(name)s': 'Hallo, %(name)s' + ... }[string] + >>> translator = Translator(pseudo_gettext) + + Next, the translator needs to be prepended to any already defined filters + on the template: + + >>> tmpl.filters.insert(0, translator) + + When generating the template output, our hard-coded translations should be + applied as expected: + + >>> print(tmpl.generate(username='Hans', _=pseudo_gettext)) + <html> + <head> + <title>Beispiel</title> + </head> + <body> + <h1>Beispiel</h1> + <p>Hallo, Hans</p> + </body> + </html> + + Note that elements defining ``xml:lang`` attributes that do not contain + variable expressions are ignored by this filter. That can be used to + exclude specific parts of a template from being extracted and translated. + """ + + directives = [ + ('domain', DomainDirective), + ('comment', CommentDirective), + ('msg', MsgDirective), + ('choose', ChooseDirective), + ('singular', SingularDirective), + ('plural', PluralDirective) + ] + + IGNORE_TAGS = frozenset([ + QName('script'), QName('http://www.w3.org/1999/xhtml}script'), + QName('style'), QName('http://www.w3.org/1999/xhtml}style') + ]) + INCLUDE_ATTRS = frozenset([ + 'abbr', 'alt', 'label', 'prompt', 'standby', 'summary', 'title' + ]) + NAMESPACE = I18N_NAMESPACE + + def __init__(self, translate=NullTranslations(), ignore_tags=IGNORE_TAGS, + include_attrs=INCLUDE_ATTRS, extract_text=True): + """Initialize the translator. + + :param translate: the translation function, for example ``gettext`` or + ``ugettext``. + :param ignore_tags: a set of tag names that should not be localized + :param include_attrs: a set of attribute names should be localized + :param extract_text: whether the content of text nodes should be + extracted, or only text in explicit ``gettext`` + function calls + + :note: Changed in 0.6: the `translate` parameter can now be either + a ``gettext``-style function, or an object compatible with the + ``NullTransalations`` or ``GNUTranslations`` interface + """ + self.translate = translate + self.ignore_tags = ignore_tags + self.include_attrs = include_attrs + self.extract_text = extract_text + + def __call__(self, stream, ctxt=None, translate_text=True, + translate_attrs=True): + """Translate any localizable strings in the given stream. + + This function shouldn't be called directly. Instead, an instance of + the `Translator` class should be registered as a filter with the + `Template` or the `TemplateLoader`, or applied as a regular stream + filter. If used as a template filter, it should be inserted in front of + all the default filters. + + :param stream: the markup event stream + :param ctxt: the template context (not used) + :param translate_text: whether text nodes should be translated (used + internally) + :param translate_attrs: whether attribute values should be translated + (used internally) + :return: the localized stream + """ + ignore_tags = self.ignore_tags + include_attrs = self.include_attrs + skip = 0 + xml_lang = XML_NAMESPACE['lang'] + if not self.extract_text: + translate_text = False + translate_attrs = False + + if type(self.translate) is FunctionType: + gettext = self.translate + if ctxt: + ctxt['_i18n.gettext'] = gettext + else: + gettext = self.translate.ugettext + ngettext = self.translate.ungettext + try: + dgettext = self.translate.dugettext + dngettext = self.translate.dungettext + except AttributeError: + dgettext = lambda _, y: gettext(y) + dngettext = lambda _, s, p, n: ngettext(s, p, n) + if ctxt: + ctxt['_i18n.gettext'] = gettext + ctxt['_i18n.ngettext'] = ngettext + ctxt['_i18n.dgettext'] = dgettext + ctxt['_i18n.dngettext'] = dngettext + + if ctxt and ctxt.get('_i18n.domain'): + gettext = lambda msg: dgettext(ctxt.get('_i18n.domain'), msg) + + for kind, data, pos in stream: + + # skip chunks that should not be localized + if skip: + if kind is START: + skip += 1 + elif kind is END: + skip -= 1 + yield kind, data, pos + continue + + # handle different events that can be localized + if kind is START: + tag, attrs = data + if tag in self.ignore_tags or \ + isinstance(attrs.get(xml_lang), basestring): + skip += 1 + yield kind, data, pos + continue + + new_attrs = [] + changed = False + + for name, value in attrs: + newval = value + if isinstance(value, basestring): + if translate_attrs and name in include_attrs: + newval = gettext(value) + else: + newval = list( + self(_ensure(value), ctxt, translate_text=False) + ) + if newval != value: + value = newval + changed = True + new_attrs.append((name, value)) + if changed: + attrs = Attrs(new_attrs) + + yield kind, (tag, attrs), pos + + elif translate_text and kind is TEXT: + text = data.strip() + if text: + data = data.replace(text, unicode(gettext(text))) + yield kind, data, pos + + elif kind is SUB: + directives, substream = data + current_domain = None + for idx, directive in enumerate(directives): + # Organize directives to make everything work + # FIXME: There's got to be a better way to do this! + if isinstance(directive, DomainDirective): + # Grab current domain and update context + current_domain = directive.domain + ctxt.push({'_i18n.domain': current_domain}) + # Put domain directive as the first one in order to + # update context before any other directives evaluation + directives.insert(0, directives.pop(idx)) + + # If this is an i18n directive, no need to translate text + # nodes here + is_i18n_directive = any([ + isinstance(d, ExtractableI18NDirective) + for d in directives + ]) + substream = list(self(substream, ctxt, + translate_text=not is_i18n_directive, + translate_attrs=translate_attrs)) + yield kind, (directives, substream), pos + + if current_domain: + ctxt.pop() + else: + yield kind, data, pos + + def extract(self, stream, gettext_functions=GETTEXT_FUNCTIONS, + search_text=True, comment_stack=None): + """Extract localizable strings from the given template stream. + + For every string found, this function yields a ``(lineno, function, + message, comments)`` tuple, where: + + * ``lineno`` is the number of the line on which the string was found, + * ``function`` is the name of the ``gettext`` function used (if the + string was extracted from embedded Python code), and + * ``message`` is the string itself (a ``unicode`` object, or a tuple + of ``unicode`` objects for functions with multiple string + arguments). + * ``comments`` is a list of comments related to the message, extracted + from ``i18n:comment`` attributes found in the markup + + >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/"> + ... <head> + ... <title>Example</title> + ... </head> + ... <body> + ... <h1>Example</h1> + ... <p>${_("Hello, %(name)s") % dict(name=username)}</p> + ... <p>${ngettext("You have %d item", "You have %d items", num)}</p> + ... </body> + ... </html>''', filename='example.html') + >>> for line, func, msg, comments in Translator().extract(tmpl.stream): + ... print('%d, %r, %r' % (line, func, msg)) + 3, None, u'Example' + 6, None, u'Example' + 7, '_', u'Hello, %(name)s' + 8, 'ngettext', (u'You have %d item', u'You have %d items', None) + + :param stream: the event stream to extract strings from; can be a + regular stream or a template stream + :param gettext_functions: a sequence of function names that should be + treated as gettext-style localization + functions + :param search_text: whether the content of text nodes should be + extracted (used internally) + + :note: Changed in 0.4.1: For a function with multiple string arguments + (such as ``ngettext``), a single item with a tuple of strings is + yielded, instead an item for each string argument. + :note: Changed in 0.6: The returned tuples now include a fourth + element, which is a list of comments for the translator. + """ + if not self.extract_text: + search_text = False + if comment_stack is None: + comment_stack = [] + skip = 0 + + xml_lang = XML_NAMESPACE['lang'] + + for kind, data, pos in stream: + if skip: + if kind is START: + skip += 1 + if kind is END: + skip -= 1 + + if kind is START and not skip: + tag, attrs = data + if tag in self.ignore_tags or \ + isinstance(attrs.get(xml_lang), basestring): + skip += 1 + continue + + for message in self._extract_attrs((kind, data, pos), + gettext_functions, + search_text=search_text): + yield message + + elif not skip and search_text and kind is TEXT: + text = data.strip() + if text and [ch for ch in text if ch.isalpha()]: + yield pos[1], None, text, comment_stack[-1:] + + elif kind is EXPR or kind is EXEC: + for funcname, strings in extract_from_code(data, + gettext_functions): + # XXX: Do we need to grab i18n:comment from comment_stack ??? + yield pos[1], funcname, strings, [] + + elif kind is SUB: + directives, substream = data + in_comment = False + + for idx, directive in enumerate(directives): + # Do a first loop to see if there's a comment directive + # If there is update context and pop it from directives + if isinstance(directive, CommentDirective): + in_comment = True + comment_stack.append(directive.comment) + if len(directives) == 1: + # in case we're in the presence of something like: + # <p i18n:comment="foo">Foo</p> + for message in self.extract( + substream, gettext_functions, + search_text=search_text and not skip, + comment_stack=comment_stack): + yield message + directives.pop(idx) + elif not isinstance(directive, I18NDirective): + # Remove all other non i18n directives from the process + directives.pop(idx) + + if not directives and not in_comment: + # Extract content if there's no directives because + # strip was pop'ed and not because comment was pop'ed. + # Extraction in this case has been taken care of. + for message in self.extract( + substream, gettext_functions, + search_text=search_text and not skip): + yield message + + for directive in directives: + if isinstance(directive, ExtractableI18NDirective): + for message in directive.extract(self, + substream, gettext_functions, + search_text=search_text and not skip, + comment_stack=comment_stack): + yield message + else: + for message in self.extract( + substream, gettext_functions, + search_text=search_text and not skip, + comment_stack=comment_stack): + yield message + + if in_comment: + comment_stack.pop() + + def get_directive_index(self, dir_cls): + total = len(self._dir_order) + if dir_cls in self._dir_order: + return self._dir_order.index(dir_cls) - total + return total + + def setup(self, template): + """Convenience function to register the `Translator` filter and the + related directives with the given template. + + :param template: a `Template` instance + """ + template.filters.insert(0, self) + if hasattr(template, 'add_directives'): + template.add_directives(Translator.NAMESPACE, self) + + def _extract_attrs(self, event, gettext_functions, search_text): + for name, value in event[1][1]: + if search_text and isinstance(value, basestring): + if name in self.include_attrs: + text = value.strip() + if text: + yield event[2][1], None, text, [] + else: + for message in self.extract(_ensure(value), gettext_functions, + search_text=False): + yield message + + +class MessageBuffer(object): + """Helper class for managing internationalized mixed content. + + :since: version 0.5 + """ + + def __init__(self, directive=None): + """Initialize the message buffer. + + :param directive: the directive owning the buffer + :type directive: I18NDirective + """ + # params list needs to be copied so that directives can be evaluated + # more than once + self.orig_params = self.params = directive.params[:] + self.directive = directive + self.string = [] + self.events = {} + self.values = {} + self.depth = 1 + self.order = 1 + self.stack = [0] + self.subdirectives = {} + + def append(self, kind, data, pos): + """Append a stream event to the buffer. + + :param kind: the stream event kind + :param data: the event data + :param pos: the position of the event in the source + """ + if kind is SUB: + # The order needs to be +1 because a new START kind event will + # happen and we we need to wrap those events into our custom kind(s) + order = self.stack[-1] + 1 + subdirectives, substream = data + # Store the directives that should be applied after translation + self.subdirectives.setdefault(order, []).extend(subdirectives) + self.events.setdefault(order, []).append((SUB_START, None, pos)) + for skind, sdata, spos in substream: + self.append(skind, sdata, spos) + self.events.setdefault(order, []).append((SUB_END, None, pos)) + elif kind is TEXT: + if '[' in data or ']' in data: + # Quote [ and ] if it ain't us adding it, ie, if the user is + # using those chars in his templates, escape them + data = data.replace('[', '\[').replace(']', '\]') + self.string.append(data) + self.events.setdefault(self.stack[-1], []).append((kind, data, pos)) + elif kind is EXPR: + if self.params: + param = self.params.pop(0) + else: + params = ', '.join(['"%s"' % p for p in self.orig_params if p]) + if params: + params = "(%s)" % params + raise IndexError("%d parameters%s given to 'i18n:%s' but " + "%d or more expressions used in '%s', line %s" + % (len(self.orig_params), params, + self.directive.tagname, + len(self.orig_params) + 1, + os.path.basename(pos[0] or + 'In-memory Template'), + pos[1])) + self.string.append('%%(%s)s' % param) + self.events.setdefault(self.stack[-1], []).append((kind, data, pos)) + self.values[param] = (kind, data, pos) + else: + if kind is START: + self.string.append('[%d:' % self.order) + self.stack.append(self.order) + self.events.setdefault(self.stack[-1], + []).append((kind, data, pos)) + self.depth += 1 + self.order += 1 + elif kind is END: + self.depth -= 1 + if self.depth: + self.events[self.stack[-1]].append((kind, data, pos)) + self.string.append(']') + self.stack.pop() + + def format(self): + """Return a message identifier representing the content in the + buffer. + """ + return ''.join(self.string).strip() + + def translate(self, string, regex=re.compile(r'%\((\w+)\)s')): + """Interpolate the given message translation with the events in the + buffer and return the translated stream. + + :param string: the translated message string + """ + substream = None + + def yield_parts(string): + for idx, part in enumerate(regex.split(string)): + if idx % 2: + yield self.values[part] + elif part: + yield (TEXT, + part.replace('\[', '[').replace('\]', ']'), + (None, -1, -1) + ) + + parts = parse_msg(string) + parts_counter = {} + for order, string in parts: + parts_counter.setdefault(order, []).append(None) + + while parts: + order, string = parts.pop(0) + if len(parts_counter[order]) == 1: + events = self.events[order] + else: + events = [self.events[order].pop(0)] + parts_counter[order].pop() + + for event in events: + if event[0] is SUB_START: + substream = [] + elif event[0] is SUB_END: + # Yield a substream which might have directives to be + # applied to it (after translation events) + yield SUB, (self.subdirectives[order], substream), event[2] + substream = None + elif event[0] is TEXT: + if string: + for part in yield_parts(string): + if substream is not None: + substream.append(part) + else: + yield part + # String handled, reset it + string = None + elif event[0] is START: + if substream is not None: + substream.append(event) + else: + yield event + if string: + for part in yield_parts(string): + if substream is not None: + substream.append(part) + else: + yield part + # String handled, reset it + string = None + elif event[0] is END: + if string: + for part in yield_parts(string): + if substream is not None: + substream.append(part) + else: + yield part + # String handled, reset it + string = None + if substream is not None: + substream.append(event) + else: + yield event + elif event[0] is EXPR: + # These are handled on the strings itself + continue + else: + if string: + for part in yield_parts(string): + if substream is not None: + substream.append(part) + else: + yield part + # String handled, reset it + string = None + if substream is not None: + substream.append(event) + else: + yield event + + +def parse_msg(string, regex=re.compile(r'(?:\[(\d+)\:)|(?<!\\)\]')): + """Parse a translated message using Genshi mixed content message + formatting. + + >>> parse_msg("See [1:Help].") + [(0, 'See '), (1, 'Help'), (0, '.')] + + >>> parse_msg("See [1:our [2:Help] page] for details.") + [(0, 'See '), (1, 'our '), (2, 'Help'), (1, ' page'), (0, ' for details.')] + + >>> parse_msg("[2:Details] finden Sie in [1:Hilfe].") + [(2, 'Details'), (0, ' finden Sie in '), (1, 'Hilfe'), (0, '.')] + + >>> parse_msg("[1:] Bilder pro Seite anzeigen.") + [(1, ''), (0, ' Bilder pro Seite anzeigen.')] + + :param string: the translated message string + :return: a list of ``(order, string)`` tuples + :rtype: `list` + """ + parts = [] + stack = [0] + while True: + mo = regex.search(string) + if not mo: + break + + if mo.start() or stack[-1]: + parts.append((stack[-1], string[:mo.start()])) + string = string[mo.end():] + + orderno = mo.group(1) + if orderno is not None: + stack.append(int(orderno)) + else: + stack.pop() + if not stack: + break + + if string: + parts.append((stack[-1], string)) + + return parts + + +def extract_from_code(code, gettext_functions): + """Extract strings from Python bytecode. + + >>> from genshi.template.eval import Expression + >>> expr = Expression('_("Hello")') + >>> list(extract_from_code(expr, GETTEXT_FUNCTIONS)) + [('_', u'Hello')] + + >>> expr = Expression('ngettext("You have %(num)s item", ' + ... '"You have %(num)s items", num)') + >>> list(extract_from_code(expr, GETTEXT_FUNCTIONS)) + [('ngettext', (u'You have %(num)s item', u'You have %(num)s items', None))] + + :param code: the `Code` object + :type code: `genshi.template.eval.Code` + :param gettext_functions: a sequence of function names + :since: version 0.5 + """ + def _walk(node): + if isinstance(node, _ast.Call) and isinstance(node.func, _ast.Name) \ + and node.func.id in gettext_functions: + strings = [] + def _add(arg): + if isinstance(arg, _ast.Str) and isinstance(arg.s, basestring): + strings.append(unicode(arg.s, 'utf-8')) + elif arg: + strings.append(None) + [_add(arg) for arg in node.args] + _add(node.starargs) + _add(node.kwargs) + if len(strings) == 1: + strings = strings[0] + else: + strings = tuple(strings) + yield node.func.id, strings + elif node._fields: + children = [] + for field in node._fields: + child = getattr(node, field, None) + if isinstance(child, list): + for elem in child: + children.append(elem) + elif isinstance(child, _ast.AST): + children.append(child) + for child in children: + for funcname, strings in _walk(child): + yield funcname, strings + return _walk(code.ast) + + +def extract(fileobj, keywords, comment_tags, options): + """Babel extraction method for Genshi templates. + + :param fileobj: the file-like object the messages should be extracted from + :param keywords: a list of keywords (i.e. function names) that should be + recognized as translation functions + :param comment_tags: a list of translator tags to search for and include + in the results + :param options: a dictionary of additional options (optional) + :return: an iterator over ``(lineno, funcname, message, comments)`` tuples + :rtype: ``iterator`` + """ + template_class = options.get('template_class', MarkupTemplate) + if isinstance(template_class, basestring): + module, clsname = template_class.split(':', 1) + template_class = getattr(__import__(module, {}, {}, [clsname]), clsname) + encoding = options.get('encoding', None) + + extract_text = options.get('extract_text', True) + if isinstance(extract_text, basestring): + extract_text = extract_text.lower() in ('1', 'on', 'yes', 'true') + + ignore_tags = options.get('ignore_tags', Translator.IGNORE_TAGS) + if isinstance(ignore_tags, basestring): + ignore_tags = ignore_tags.split() + ignore_tags = [QName(tag) for tag in ignore_tags] + + include_attrs = options.get('include_attrs', Translator.INCLUDE_ATTRS) + if isinstance(include_attrs, basestring): + include_attrs = include_attrs.split() + include_attrs = [QName(attr) for attr in include_attrs] + + tmpl = template_class(fileobj, filename=getattr(fileobj, 'name', None), + encoding=encoding) + tmpl.loader = None + + translator = Translator(None, ignore_tags, include_attrs, extract_text) + if hasattr(tmpl, 'add_directives'): + tmpl.add_directives(Translator.NAMESPACE, translator) + for message in translator.extract(tmpl.stream, gettext_functions=keywords): + yield message diff --git a/websdk/genshi/filters/transform.py b/websdk/genshi/filters/transform.py new file mode 100644 index 0000000..9b75b06 --- /dev/null +++ b/websdk/genshi/filters/transform.py @@ -0,0 +1,1310 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2007-2009 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://genshi.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://genshi.edgewall.org/log/. + +"""A filter for functional-style transformations of markup streams. + +The `Transformer` filter provides a variety of transformations that can be +applied to parts of streams that match given XPath expressions. These +transformations can be chained to achieve results that would be comparitively +tedious to achieve by writing stream filters by hand. The approach of chaining +node selection and transformation has been inspired by the `jQuery`_ Javascript +library. + + .. _`jQuery`: http://jquery.com/ + +For example, the following transformation removes the ``<title>`` element from +the ``<head>`` of the input document: + +>>> from genshi.builder import tag +>>> html = HTML('''<html> +... <head><title>Some Title</title></head> +... <body> +... Some <em>body</em> text. +... </body> +... </html>''') +>>> print(html | Transformer('body/em').map(unicode.upper, TEXT) +... .unwrap().wrap(tag.u)) +<html> + <head><title>Some Title</title></head> + <body> + Some <u>BODY</u> text. + </body> +</html> + +The ``Transformer`` support a large number of useful transformations out of the +box, but custom transformations can be added easily. + +:since: version 0.5 +""" + +import re +import sys + +from genshi.builder import Element +from genshi.core import Stream, Attrs, QName, TEXT, START, END, _ensure, Markup +from genshi.path import Path + +__all__ = ['Transformer', 'StreamBuffer', 'InjectorTransformation', 'ENTER', + 'EXIT', 'INSIDE', 'OUTSIDE', 'BREAK'] + + +class TransformMark(str): + """A mark on a transformation stream.""" + __slots__ = [] + _instances = {} + + def __new__(cls, val): + return cls._instances.setdefault(val, str.__new__(cls, val)) + + +ENTER = TransformMark('ENTER') +"""Stream augmentation mark indicating that a selected element is being +entered.""" + +INSIDE = TransformMark('INSIDE') +"""Stream augmentation mark indicating that processing is currently inside a +selected element.""" + +OUTSIDE = TransformMark('OUTSIDE') +"""Stream augmentation mark indicating that a match occurred outside a selected +element.""" + +ATTR = TransformMark('ATTR') +"""Stream augmentation mark indicating a selected element attribute.""" + +EXIT = TransformMark('EXIT') +"""Stream augmentation mark indicating that a selected element is being +exited.""" + +BREAK = TransformMark('BREAK') +"""Stream augmentation mark indicating a break between two otherwise contiguous +blocks of marked events. + +This is used primarily by the cut() transform to provide later transforms with +an opportunity to operate on the cut buffer. +""" + + +class PushBackStream(object): + """Allows a single event to be pushed back onto the stream and re-consumed. + """ + def __init__(self, stream): + self.stream = iter(stream) + self.peek = None + + def push(self, event): + assert self.peek is None + self.peek = event + + def __iter__(self): + while True: + if self.peek is not None: + peek = self.peek + self.peek = None + yield peek + else: + try: + event = self.stream.next() + yield event + except StopIteration: + if self.peek is None: + raise + + +class Transformer(object): + """Stream filter that can apply a variety of different transformations to + a stream. + + This is achieved by selecting the events to be transformed using XPath, + then applying the transformations to the events matched by the path + expression. Each marked event is in the form (mark, (kind, data, pos)), + where mark can be any of `ENTER`, `INSIDE`, `EXIT`, `OUTSIDE`, or `None`. + + The first three marks match `START` and `END` events, and any events + contained `INSIDE` any selected XML/HTML element. A non-element match + outside a `START`/`END` container (e.g. ``text()``) will yield an `OUTSIDE` + mark. + + >>> html = HTML('<html><head><title>Some Title</title></head>' + ... '<body>Some <em>body</em> text.</body></html>') + + Transformations act on selected stream events matching an XPath expression. + Here's an example of removing some markup (the title, in this case) + selected by an expression: + + >>> print(html | Transformer('head/title').remove()) + <html><head/><body>Some <em>body</em> text.</body></html> + + Inserted content can be passed in the form of a string, or a markup event + stream, which includes streams generated programmatically via the + `builder` module: + + >>> from genshi.builder import tag + >>> print(html | Transformer('body').prepend(tag.h1('Document Title'))) + <html><head><title>Some Title</title></head><body><h1>Document + Title</h1>Some <em>body</em> text.</body></html> + + Each XPath expression determines the set of tags that will be acted upon by + subsequent transformations. In this example we select the ``<title>`` text, + copy it into a buffer, then select the ``<body>`` element and paste the + copied text into the body as ``<h1>`` enclosed text: + + >>> buffer = StreamBuffer() + >>> print(html | Transformer('head/title/text()').copy(buffer) + ... .end().select('body').prepend(tag.h1(buffer))) + <html><head><title>Some Title</title></head><body><h1>Some Title</h1>Some + <em>body</em> text.</body></html> + + Transformations can also be assigned and reused, although care must be + taken when using buffers, to ensure that buffers are cleared between + transforms: + + >>> emphasis = Transformer('body//em').attr('class', 'emphasis') + >>> print(html | emphasis) + <html><head><title>Some Title</title></head><body>Some <em + class="emphasis">body</em> text.</body></html> + """ + + __slots__ = ['transforms'] + + def __init__(self, path='.'): + """Construct a new transformation filter. + + :param path: an XPath expression (as string) or a `Path` instance + """ + self.transforms = [SelectTransformation(path)] + + def __call__(self, stream, keep_marks=False): + """Apply the transform filter to the marked stream. + + :param stream: the marked event stream to filter + :param keep_marks: Do not strip transformer selection marks from the + stream. Useful for testing. + :return: the transformed stream + :rtype: `Stream` + """ + transforms = self._mark(stream) + for link in self.transforms: + transforms = link(transforms) + if not keep_marks: + transforms = self._unmark(transforms) + return Stream(transforms, + serializer=getattr(stream, 'serializer', None)) + + def apply(self, function): + """Apply a transformation to the stream. + + Transformations can be chained, similar to stream filters. Any callable + accepting a marked stream can be used as a transform. + + As an example, here is a simple `TEXT` event upper-casing transform: + + >>> def upper(stream): + ... for mark, (kind, data, pos) in stream: + ... if mark and kind is TEXT: + ... yield mark, (kind, data.upper(), pos) + ... else: + ... yield mark, (kind, data, pos) + >>> short_stream = HTML('<body>Some <em>test</em> text</body>') + >>> print(short_stream | Transformer('.//em/text()').apply(upper)) + <body>Some <em>TEST</em> text</body> + """ + transformer = Transformer() + transformer.transforms = self.transforms[:] + if isinstance(function, Transformer): + transformer.transforms.extend(function.transforms) + else: + transformer.transforms.append(function) + return transformer + + #{ Selection operations + + def select(self, path): + """Mark events matching the given XPath expression, within the current + selection. + + >>> html = HTML('<body>Some <em>test</em> text</body>') + >>> print(html | Transformer().select('.//em').trace()) + (None, ('START', (QName('body'), Attrs()), (None, 1, 0))) + (None, ('TEXT', u'Some ', (None, 1, 6))) + ('ENTER', ('START', (QName('em'), Attrs()), (None, 1, 11))) + ('INSIDE', ('TEXT', u'test', (None, 1, 15))) + ('EXIT', ('END', QName('em'), (None, 1, 19))) + (None, ('TEXT', u' text', (None, 1, 24))) + (None, ('END', QName('body'), (None, 1, 29))) + <body>Some <em>test</em> text</body> + + :param path: an XPath expression (as string) or a `Path` instance + :return: the stream augmented by transformation marks + :rtype: `Transformer` + """ + return self.apply(SelectTransformation(path)) + + def invert(self): + """Invert selection so that marked events become unmarked, and vice + versa. + + Specificaly, all marks are converted to null marks, and all null marks + are converted to OUTSIDE marks. + + >>> html = HTML('<body>Some <em>test</em> text</body>') + >>> print(html | Transformer('//em').invert().trace()) + ('OUTSIDE', ('START', (QName('body'), Attrs()), (None, 1, 0))) + ('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6))) + (None, ('START', (QName('em'), Attrs()), (None, 1, 11))) + (None, ('TEXT', u'test', (None, 1, 15))) + (None, ('END', QName('em'), (None, 1, 19))) + ('OUTSIDE', ('TEXT', u' text', (None, 1, 24))) + ('OUTSIDE', ('END', QName('body'), (None, 1, 29))) + <body>Some <em>test</em> text</body> + + :rtype: `Transformer` + """ + return self.apply(InvertTransformation()) + + def end(self): + """End current selection, allowing all events to be selected. + + Example: + + >>> html = HTML('<body>Some <em>test</em> text</body>') + >>> print(html | Transformer('//em').end().trace()) + ('OUTSIDE', ('START', (QName('body'), Attrs()), (None, 1, 0))) + ('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6))) + ('OUTSIDE', ('START', (QName('em'), Attrs()), (None, 1, 11))) + ('OUTSIDE', ('TEXT', u'test', (None, 1, 15))) + ('OUTSIDE', ('END', QName('em'), (None, 1, 19))) + ('OUTSIDE', ('TEXT', u' text', (None, 1, 24))) + ('OUTSIDE', ('END', QName('body'), (None, 1, 29))) + <body>Some <em>test</em> text</body> + + :return: the stream augmented by transformation marks + :rtype: `Transformer` + """ + return self.apply(EndTransformation()) + + #{ Deletion operations + + def empty(self): + """Empty selected elements of all content. + + Example: + + >>> html = HTML('<html><head><title>Some Title</title></head>' + ... '<body>Some <em>body</em> text.</body></html>') + >>> print(html | Transformer('.//em').empty()) + <html><head><title>Some Title</title></head><body>Some <em/> + text.</body></html> + + :rtype: `Transformer` + """ + return self.apply(EmptyTransformation()) + + def remove(self): + """Remove selection from the stream. + + Example: + + >>> html = HTML('<html><head><title>Some Title</title></head>' + ... '<body>Some <em>body</em> text.</body></html>') + >>> print(html | Transformer('.//em').remove()) + <html><head><title>Some Title</title></head><body>Some + text.</body></html> + + :rtype: `Transformer` + """ + return self.apply(RemoveTransformation()) + + #{ Direct element operations + + def unwrap(self): + """Remove outermost enclosing elements from selection. + + Example: + + >>> html = HTML('<html><head><title>Some Title</title></head>' + ... '<body>Some <em>body</em> text.</body></html>') + >>> print(html | Transformer('.//em').unwrap()) + <html><head><title>Some Title</title></head><body>Some body + text.</body></html> + + :rtype: `Transformer` + """ + return self.apply(UnwrapTransformation()) + + def wrap(self, element): + """Wrap selection in an element. + + >>> html = HTML('<html><head><title>Some Title</title></head>' + ... '<body>Some <em>body</em> text.</body></html>') + >>> print(html | Transformer('.//em').wrap('strong')) + <html><head><title>Some Title</title></head><body>Some + <strong><em>body</em></strong> text.</body></html> + + :param element: either a tag name (as string) or an `Element` object + :rtype: `Transformer` + """ + return self.apply(WrapTransformation(element)) + + #{ Content insertion operations + + def replace(self, content): + """Replace selection with content. + + >>> html = HTML('<html><head><title>Some Title</title></head>' + ... '<body>Some <em>body</em> text.</body></html>') + >>> print(html | Transformer('.//title/text()').replace('New Title')) + <html><head><title>New Title</title></head><body>Some <em>body</em> + text.</body></html> + + :param content: Either a callable, an iterable of events, or a string + to insert. + :rtype: `Transformer` + """ + return self.apply(ReplaceTransformation(content)) + + def before(self, content): + """Insert content before selection. + + In this example we insert the word 'emphasised' before the <em> opening + tag: + + >>> html = HTML('<html><head><title>Some Title</title></head>' + ... '<body>Some <em>body</em> text.</body></html>') + >>> print(html | Transformer('.//em').before('emphasised ')) + <html><head><title>Some Title</title></head><body>Some emphasised + <em>body</em> text.</body></html> + + :param content: Either a callable, an iterable of events, or a string + to insert. + :rtype: `Transformer` + """ + return self.apply(BeforeTransformation(content)) + + def after(self, content): + """Insert content after selection. + + Here, we insert some text after the </em> closing tag: + + >>> html = HTML('<html><head><title>Some Title</title></head>' + ... '<body>Some <em>body</em> text.</body></html>') + >>> print(html | Transformer('.//em').after(' rock')) + <html><head><title>Some Title</title></head><body>Some <em>body</em> + rock text.</body></html> + + :param content: Either a callable, an iterable of events, or a string + to insert. + :rtype: `Transformer` + """ + return self.apply(AfterTransformation(content)) + + def prepend(self, content): + """Insert content after the ENTER event of the selection. + + Inserting some new text at the start of the <body>: + + >>> html = HTML('<html><head><title>Some Title</title></head>' + ... '<body>Some <em>body</em> text.</body></html>') + >>> print(html | Transformer('.//body').prepend('Some new body text. ')) + <html><head><title>Some Title</title></head><body>Some new body text. + Some <em>body</em> text.</body></html> + + :param content: Either a callable, an iterable of events, or a string + to insert. + :rtype: `Transformer` + """ + return self.apply(PrependTransformation(content)) + + def append(self, content): + """Insert content before the END event of the selection. + + >>> html = HTML('<html><head><title>Some Title</title></head>' + ... '<body>Some <em>body</em> text.</body></html>') + >>> print(html | Transformer('.//body').append(' Some new body text.')) + <html><head><title>Some Title</title></head><body>Some <em>body</em> + text. Some new body text.</body></html> + + :param content: Either a callable, an iterable of events, or a string + to insert. + :rtype: `Transformer` + """ + return self.apply(AppendTransformation(content)) + + #{ Attribute manipulation + + def attr(self, name, value): + """Add, replace or delete an attribute on selected elements. + + If `value` evaulates to `None` the attribute will be deleted from the + element: + + >>> html = HTML('<html><head><title>Some Title</title></head>' + ... '<body>Some <em class="before">body</em> <em>text</em>.</body>' + ... '</html>') + >>> print(html | Transformer('body/em').attr('class', None)) + <html><head><title>Some Title</title></head><body>Some <em>body</em> + <em>text</em>.</body></html> + + Otherwise the attribute will be set to `value`: + + >>> print(html | Transformer('body/em').attr('class', 'emphasis')) + <html><head><title>Some Title</title></head><body>Some <em + class="emphasis">body</em> <em class="emphasis">text</em>.</body></html> + + If `value` is a callable it will be called with the attribute name and + the `START` event for the matching element. Its return value will then + be used to set the attribute: + + >>> def print_attr(name, event): + ... attrs = event[1][1] + ... print(attrs) + ... return attrs.get(name) + >>> print(html | Transformer('body/em').attr('class', print_attr)) + Attrs([(QName('class'), u'before')]) + Attrs() + <html><head><title>Some Title</title></head><body>Some <em + class="before">body</em> <em>text</em>.</body></html> + + :param name: the name of the attribute + :param value: the value that should be set for the attribute. + :rtype: `Transformer` + """ + return self.apply(AttrTransformation(name, value)) + + #{ Buffer operations + + def copy(self, buffer, accumulate=False): + """Copy selection into buffer. + + The buffer is replaced by each *contiguous* selection before being passed + to the next transformation. If accumulate=True, further selections will + be appended to the buffer rather than replacing it. + + >>> from genshi.builder import tag + >>> buffer = StreamBuffer() + >>> html = HTML('<html><head><title>Some Title</title></head>' + ... '<body>Some <em>body</em> text.</body></html>') + >>> print(html | Transformer('head/title/text()').copy(buffer) + ... .end().select('body').prepend(tag.h1(buffer))) + <html><head><title>Some Title</title></head><body><h1>Some + Title</h1>Some <em>body</em> text.</body></html> + + This example illustrates that only a single contiguous selection will + be buffered: + + >>> print(html | Transformer('head/title/text()').copy(buffer) + ... .end().select('body/em').copy(buffer).end().select('body') + ... .prepend(tag.h1(buffer))) + <html><head><title>Some Title</title></head><body><h1>Some + Title</h1>Some <em>body</em> text.</body></html> + >>> print(buffer) + <em>body</em> + + Element attributes can also be copied for later use: + + >>> html = HTML('<html><head><title>Some Title</title></head>' + ... '<body><em>Some</em> <em class="before">body</em>' + ... '<em>text</em>.</body></html>') + >>> buffer = StreamBuffer() + >>> def apply_attr(name, entry): + ... return list(buffer)[0][1][1].get('class') + >>> print(html | Transformer('body/em[@class]/@class').copy(buffer) + ... .end().buffer().select('body/em[not(@class)]') + ... .attr('class', apply_attr)) + <html><head><title>Some Title</title></head><body><em + class="before">Some</em> <em class="before">body</em><em + class="before">text</em>.</body></html> + + + :param buffer: the `StreamBuffer` in which the selection should be + stored + :rtype: `Transformer` + :note: Copy (and cut) copy each individual selected object into the + buffer before passing to the next transform. For example, the + XPath ``*|text()`` will select all elements and text, each + instance of which will be copied to the buffer individually + before passing to the next transform. This has implications for + how ``StreamBuffer`` objects can be used, so some + experimentation may be required. + + """ + return self.apply(CopyTransformation(buffer, accumulate)) + + def cut(self, buffer, accumulate=False): + """Copy selection into buffer and remove the selection from the stream. + + >>> from genshi.builder import tag + >>> buffer = StreamBuffer() + >>> html = HTML('<html><head><title>Some Title</title></head>' + ... '<body>Some <em>body</em> text.</body></html>') + >>> print(html | Transformer('.//em/text()').cut(buffer) + ... .end().select('.//em').after(tag.h1(buffer))) + <html><head><title>Some Title</title></head><body>Some + <em/><h1>body</h1> text.</body></html> + + Specifying accumulate=True, appends all selected intervals onto the + buffer. Combining this with the .buffer() operation allows us operate + on all copied events rather than per-segment. See the documentation on + buffer() for more information. + + :param buffer: the `StreamBuffer` in which the selection should be + stored + :rtype: `Transformer` + :note: this transformation will buffer the entire input stream + """ + return self.apply(CutTransformation(buffer, accumulate)) + + def buffer(self): + """Buffer the entire stream (can consume a considerable amount of + memory). + + Useful in conjunction with copy(accumulate=True) and + cut(accumulate=True) to ensure that all marked events in the entire + stream are copied to the buffer before further transformations are + applied. + + For example, to move all <note> elements inside a <notes> tag at the + top of the document: + + >>> doc = HTML('<doc><notes></notes><body>Some <note>one</note> ' + ... 'text <note>two</note>.</body></doc>') + >>> buffer = StreamBuffer() + >>> print(doc | Transformer('body/note').cut(buffer, accumulate=True) + ... .end().buffer().select('notes').prepend(buffer)) + <doc><notes><note>one</note><note>two</note></notes><body>Some text + .</body></doc> + + """ + return self.apply(list) + + #{ Miscellaneous operations + + def filter(self, filter): + """Apply a normal stream filter to the selection. The filter is called + once for each contiguous block of marked events. + + >>> from genshi.filters.html import HTMLSanitizer + >>> html = HTML('<html><body>Some text<script>alert(document.cookie)' + ... '</script> and some more text</body></html>') + >>> print(html | Transformer('body/*').filter(HTMLSanitizer())) + <html><body>Some text and some more text</body></html> + + :param filter: The stream filter to apply. + :rtype: `Transformer` + """ + return self.apply(FilterTransformation(filter)) + + def map(self, function, kind): + """Applies a function to the ``data`` element of events of ``kind`` in + the selection. + + >>> html = HTML('<html><head><title>Some Title</title></head>' + ... '<body>Some <em>body</em> text.</body></html>') + >>> print(html | Transformer('head/title').map(unicode.upper, TEXT)) + <html><head><title>SOME TITLE</title></head><body>Some <em>body</em> + text.</body></html> + + :param function: the function to apply + :param kind: the kind of event the function should be applied to + :rtype: `Transformer` + """ + return self.apply(MapTransformation(function, kind)) + + def substitute(self, pattern, replace, count=1): + """Replace text matching a regular expression. + + Refer to the documentation for ``re.sub()`` for details. + + >>> html = HTML('<html><body>Some text, some more text and ' + ... '<b>some bold text</b>\\n' + ... '<i>some italicised text</i></body></html>') + >>> print(html | Transformer('body/b').substitute('(?i)some', 'SOME')) + <html><body>Some text, some more text and <b>SOME bold text</b> + <i>some italicised text</i></body></html> + >>> tags = tag.html(tag.body('Some text, some more text and\\n', + ... Markup('<b>some bold text</b>'))) + >>> print(tags.generate() | Transformer('body').substitute( + ... '(?i)some', 'SOME')) + <html><body>SOME text, some more text and + <b>SOME bold text</b></body></html> + + :param pattern: A regular expression object or string. + :param replace: Replacement pattern. + :param count: Number of replacements to make in each text fragment. + :rtype: `Transformer` + """ + return self.apply(SubstituteTransformation(pattern, replace, count)) + + def rename(self, name): + """Rename matching elements. + + >>> html = HTML('<html><body>Some text, some more text and ' + ... '<b>some bold text</b></body></html>') + >>> print(html | Transformer('body/b').rename('strong')) + <html><body>Some text, some more text and <strong>some bold text</strong></body></html> + """ + return self.apply(RenameTransformation(name)) + + def trace(self, prefix='', fileobj=None): + """Print events as they pass through the transform. + + >>> html = HTML('<body>Some <em>test</em> text</body>') + >>> print(html | Transformer('em').trace()) + (None, ('START', (QName('body'), Attrs()), (None, 1, 0))) + (None, ('TEXT', u'Some ', (None, 1, 6))) + ('ENTER', ('START', (QName('em'), Attrs()), (None, 1, 11))) + ('INSIDE', ('TEXT', u'test', (None, 1, 15))) + ('EXIT', ('END', QName('em'), (None, 1, 19))) + (None, ('TEXT', u' text', (None, 1, 24))) + (None, ('END', QName('body'), (None, 1, 29))) + <body>Some <em>test</em> text</body> + + :param prefix: a string to prefix each event with in the output + :param fileobj: the writable file-like object to write to; defaults to + the standard output stream + :rtype: `Transformer` + """ + return self.apply(TraceTransformation(prefix, fileobj=fileobj)) + + # Internal methods + + def _mark(self, stream): + for event in stream: + yield OUTSIDE, event + + def _unmark(self, stream): + for mark, event in stream: + kind = event[0] + if not (kind is None or kind is ATTR or kind is BREAK): + yield event + + +class SelectTransformation(object): + """Select and mark events that match an XPath expression.""" + + def __init__(self, path): + """Create selection. + + :param path: an XPath expression (as string) or a `Path` object + """ + if not isinstance(path, Path): + path = Path(path) + self.path = path + + def __call__(self, stream): + """Apply the transform filter to the marked stream. + + :param stream: the marked event stream to filter + """ + namespaces = {} + variables = {} + test = self.path.test() + stream = iter(stream) + next = stream.next + for mark, event in stream: + if mark is None: + yield mark, event + continue + result = test(event, namespaces, variables) + # XXX This is effectively genshi.core._ensure() for transform + # streams. + if result is True: + if event[0] is START: + yield ENTER, event + depth = 1 + while depth > 0: + mark, subevent = next() + if subevent[0] is START: + depth += 1 + elif subevent[0] is END: + depth -= 1 + if depth == 0: + yield EXIT, subevent + else: + yield INSIDE, subevent + test(subevent, namespaces, variables, updateonly=True) + else: + yield OUTSIDE, event + elif isinstance(result, Attrs): + # XXX Selected *attributes* are given a "kind" of None to + # indicate they are not really part of the stream. + yield ATTR, (ATTR, (QName(event[1][0] + '@*'), result), event[2]) + yield None, event + elif isinstance(result, tuple): + yield OUTSIDE, result + elif result: + # XXX Assume everything else is "text"? + yield None, (TEXT, unicode(result), (None, -1, -1)) + else: + yield None, event + + +class InvertTransformation(object): + """Invert selection so that marked events become unmarked, and vice versa. + + Specificaly, all input marks are converted to null marks, and all input + null marks are converted to OUTSIDE marks. + """ + + def __call__(self, stream): + """Apply the transform filter to the marked stream. + + :param stream: the marked event stream to filter + """ + for mark, event in stream: + if mark: + yield None, event + else: + yield OUTSIDE, event + + +class EndTransformation(object): + """End the current selection.""" + + def __call__(self, stream): + """Apply the transform filter to the marked stream. + + :param stream: the marked event stream to filter + """ + for mark, event in stream: + yield OUTSIDE, event + + +class EmptyTransformation(object): + """Empty selected elements of all content.""" + + def __call__(self, stream): + """Apply the transform filter to the marked stream. + + :param stream: the marked event stream to filter + """ + for mark, event in stream: + yield mark, event + if mark is ENTER: + for mark, event in stream: + if mark is EXIT: + yield mark, event + break + + +class RemoveTransformation(object): + """Remove selection from the stream.""" + + def __call__(self, stream): + """Apply the transform filter to the marked stream. + + :param stream: the marked event stream to filter + """ + for mark, event in stream: + if mark is None: + yield mark, event + + +class UnwrapTransformation(object): + """Remove outtermost enclosing elements from selection.""" + + def __call__(self, stream): + """Apply the transform filter to the marked stream. + + :param stream: the marked event stream to filter + """ + for mark, event in stream: + if mark not in (ENTER, EXIT): + yield mark, event + + +class WrapTransformation(object): + """Wrap selection in an element.""" + + def __init__(self, element): + if isinstance(element, Element): + self.element = element + else: + self.element = Element(element) + + def __call__(self, stream): + for mark, event in stream: + if mark: + element = list(self.element.generate()) + for prefix in element[:-1]: + yield None, prefix + yield mark, event + start = mark + stopped = False + for mark, event in stream: + if start is ENTER and mark is EXIT: + yield mark, event + stopped = True + break + if not mark: + break + yield mark, event + else: + stopped = True + yield None, element[-1] + if not stopped: + yield mark, event + else: + yield mark, event + + +class TraceTransformation(object): + """Print events as they pass through the transform.""" + + def __init__(self, prefix='', fileobj=None): + """Trace constructor. + + :param prefix: text to prefix each traced line with. + :param fileobj: the writable file-like object to write to + """ + self.prefix = prefix + self.fileobj = fileobj or sys.stdout + + def __call__(self, stream): + """Apply the transform filter to the marked stream. + + :param stream: the marked event stream to filter + """ + for event in stream: + self.fileobj.write('%s%s\n' % (self.prefix, event)) + yield event + + +class FilterTransformation(object): + """Apply a normal stream filter to the selection. The filter is called once + for each selection.""" + + def __init__(self, filter): + """Create the transform. + + :param filter: The stream filter to apply. + """ + self.filter = filter + + def __call__(self, stream): + """Apply the transform filter to the marked stream. + + :param stream: The marked event stream to filter + """ + def flush(queue): + if queue: + for event in self.filter(queue): + yield OUTSIDE, event + del queue[:] + + queue = [] + for mark, event in stream: + if mark is ENTER: + queue.append(event) + for mark, event in stream: + queue.append(event) + if mark is EXIT: + break + for queue_event in flush(queue): + yield queue_event + elif mark is OUTSIDE: + stopped = False + queue.append(event) + for mark, event in stream: + if mark is not OUTSIDE: + break + queue.append(event) + else: + stopped = True + for queue_event in flush(queue): + yield queue_event + if not stopped: + yield mark, event + else: + yield mark, event + for queue_event in flush(queue): + yield queue_event + + +class MapTransformation(object): + """Apply a function to the `data` element of events of ``kind`` in the + selection. + """ + + def __init__(self, function, kind): + """Create the transform. + + :param function: the function to apply; the function must take one + argument, the `data` element of each selected event + :param kind: the stream event ``kind`` to apply the `function` to + """ + self.function = function + self.kind = kind + + def __call__(self, stream): + """Apply the transform filter to the marked stream. + + :param stream: The marked event stream to filter + """ + for mark, (kind, data, pos) in stream: + if mark and self.kind in (None, kind): + yield mark, (kind, self.function(data), pos) + else: + yield mark, (kind, data, pos) + + +class SubstituteTransformation(object): + """Replace text matching a regular expression. + + Refer to the documentation for ``re.sub()`` for details. + """ + def __init__(self, pattern, replace, count=0): + """Create the transform. + + :param pattern: A regular expression object, or string. + :param replace: Replacement pattern. + :param count: Number of replacements to make in each text fragment. + """ + if isinstance(pattern, basestring): + self.pattern = re.compile(pattern) + else: + self.pattern = pattern + self.count = count + self.replace = replace + + def __call__(self, stream): + """Apply the transform filter to the marked stream. + + :param stream: The marked event stream to filter + """ + for mark, (kind, data, pos) in stream: + if mark is not None and kind is TEXT: + new_data = self.pattern.sub(self.replace, data, self.count) + if isinstance(data, Markup): + data = Markup(new_data) + else: + data = new_data + yield mark, (kind, data, pos) + + +class RenameTransformation(object): + """Rename matching elements.""" + def __init__(self, name): + """Create the transform. + + :param name: New element name. + """ + self.name = QName(name) + + def __call__(self, stream): + """Apply the transform filter to the marked stream. + + :param stream: The marked event stream to filter + """ + for mark, (kind, data, pos) in stream: + if mark is ENTER: + data = self.name, data[1] + elif mark is EXIT: + data = self.name + yield mark, (kind, data, pos) + + +class InjectorTransformation(object): + """Abstract base class for transformations that inject content into a + stream. + + >>> class Top(InjectorTransformation): + ... def __call__(self, stream): + ... for event in self._inject(): + ... yield event + ... for event in stream: + ... yield event + >>> html = HTML('<body>Some <em>test</em> text</body>') + >>> print(html | Transformer('.//em').apply(Top('Prefix '))) + Prefix <body>Some <em>test</em> text</body> + """ + def __init__(self, content): + """Create a new injector. + + :param content: An iterable of Genshi stream events, or a string to be + injected. + """ + self.content = content + + def _inject(self): + content = self.content + if hasattr(content, '__call__'): + content = content() + for event in _ensure(content): + yield None, event + + +class ReplaceTransformation(InjectorTransformation): + """Replace selection with content.""" + + def __call__(self, stream): + """Apply the transform filter to the marked stream. + + :param stream: The marked event stream to filter + """ + stream = PushBackStream(stream) + for mark, event in stream: + if mark is not None: + start = mark + for subevent in self._inject(): + yield subevent + for mark, event in stream: + if start is ENTER: + if mark is EXIT: + break + elif mark != start: + stream.push((mark, event)) + break + else: + yield mark, event + + +class BeforeTransformation(InjectorTransformation): + """Insert content before selection.""" + + def __call__(self, stream): + """Apply the transform filter to the marked stream. + + :param stream: The marked event stream to filter + """ + stream = PushBackStream(stream) + for mark, event in stream: + if mark is not None: + start = mark + for subevent in self._inject(): + yield subevent + yield mark, event + for mark, event in stream: + if mark != start and start is not ENTER: + stream.push((mark, event)) + break + yield mark, event + if start is ENTER and mark is EXIT: + break + else: + yield mark, event + + +class AfterTransformation(InjectorTransformation): + """Insert content after selection.""" + + def __call__(self, stream): + """Apply the transform filter to the marked stream. + + :param stream: The marked event stream to filter + """ + stream = PushBackStream(stream) + for mark, event in stream: + yield mark, event + if mark: + start = mark + for mark, event in stream: + if start is not ENTER and mark != start: + stream.push((mark, event)) + break + yield mark, event + if start is ENTER and mark is EXIT: + break + for subevent in self._inject(): + yield subevent + + +class PrependTransformation(InjectorTransformation): + """Prepend content to the inside of selected elements.""" + + def __call__(self, stream): + """Apply the transform filter to the marked stream. + + :param stream: The marked event stream to filter + """ + for mark, event in stream: + yield mark, event + if mark is ENTER: + for subevent in self._inject(): + yield subevent + + +class AppendTransformation(InjectorTransformation): + """Append content after the content of selected elements.""" + + def __call__(self, stream): + """Apply the transform filter to the marked stream. + + :param stream: The marked event stream to filter + """ + for mark, event in stream: + yield mark, event + if mark is ENTER: + for mark, event in stream: + if mark is EXIT: + break + yield mark, event + for subevent in self._inject(): + yield subevent + yield mark, event + + +class AttrTransformation(object): + """Set an attribute on selected elements.""" + + def __init__(self, name, value): + """Construct transform. + + :param name: name of the attribute that should be set + :param value: the value to set + """ + self.name = name + self.value = value + + def __call__(self, stream): + """Apply the transform filter to the marked stream. + + :param stream: The marked event stream to filter + """ + callable_value = hasattr(self.value, '__call__') + for mark, (kind, data, pos) in stream: + if mark is ENTER: + if callable_value: + value = self.value(self.name, (kind, data, pos)) + else: + value = self.value + if value is None: + attrs = data[1] - [QName(self.name)] + else: + attrs = data[1] | [(QName(self.name), value)] + data = (data[0], attrs) + yield mark, (kind, data, pos) + + + +class StreamBuffer(Stream): + """Stream event buffer used for cut and copy transformations.""" + + def __init__(self): + """Create the buffer.""" + Stream.__init__(self, []) + + def append(self, event): + """Add an event to the buffer. + + :param event: the markup event to add + """ + self.events.append(event) + + def reset(self): + """Empty the buffer of events.""" + del self.events[:] + + +class CopyTransformation(object): + """Copy selected events into a buffer for later insertion.""" + + def __init__(self, buffer, accumulate=False): + """Create the copy transformation. + + :param buffer: the `StreamBuffer` in which the selection should be + stored + """ + if not accumulate: + buffer.reset() + self.buffer = buffer + self.accumulate = accumulate + + def __call__(self, stream): + """Apply the transformation to the marked stream. + + :param stream: the marked event stream to filter + """ + stream = PushBackStream(stream) + + for mark, event in stream: + if mark: + if not self.accumulate: + self.buffer.reset() + events = [(mark, event)] + self.buffer.append(event) + start = mark + for mark, event in stream: + if start is not ENTER and mark != start: + stream.push((mark, event)) + break + events.append((mark, event)) + self.buffer.append(event) + if start is ENTER and mark is EXIT: + break + for i in events: + yield i + else: + yield mark, event + + +class CutTransformation(object): + """Cut selected events into a buffer for later insertion and remove the + selection. + """ + + def __init__(self, buffer, accumulate=False): + """Create the cut transformation. + + :param buffer: the `StreamBuffer` in which the selection should be + stored + """ + self.buffer = buffer + self.accumulate = accumulate + + + def __call__(self, stream): + """Apply the transform filter to the marked stream. + + :param stream: the marked event stream to filter + """ + attributes = [] + stream = PushBackStream(stream) + broken = False + if not self.accumulate: + self.buffer.reset() + for mark, event in stream: + if mark: + # Send a BREAK event if there was no other event sent between + if not self.accumulate: + if not broken and self.buffer: + yield BREAK, (BREAK, None, None) + self.buffer.reset() + self.buffer.append(event) + start = mark + if mark is ATTR: + attributes.extend([name for name, _ in event[1][1]]) + for mark, event in stream: + if start is mark is ATTR: + attributes.extend([name for name, _ in event[1][1]]) + # Handle non-element contiguous selection + if start is not ENTER and mark != start: + # Operating on the attributes of a START event + if start is ATTR: + kind, data, pos = event + assert kind is START + data = (data[0], data[1] - attributes) + attributes = None + stream.push((mark, (kind, data, pos))) + else: + stream.push((mark, event)) + break + self.buffer.append(event) + if start is ENTER and mark is EXIT: + break + broken = False + else: + broken = True + yield mark, event + if not broken and self.buffer: + yield BREAK, (BREAK, None, None) diff --git a/websdk/genshi/input.py b/websdk/genshi/input.py new file mode 100644 index 0000000..039e5e5 --- /dev/null +++ b/websdk/genshi/input.py @@ -0,0 +1,443 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2006-2009 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://genshi.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://genshi.edgewall.org/log/. + +"""Support for constructing markup streams from files, strings, or other +sources. +""" + +from itertools import chain +import htmlentitydefs as entities +import HTMLParser as html +from StringIO import StringIO +from xml.parsers import expat + +from genshi.core import Attrs, QName, Stream, stripentities +from genshi.core import START, END, XML_DECL, DOCTYPE, TEXT, START_NS, \ + END_NS, START_CDATA, END_CDATA, PI, COMMENT + +__all__ = ['ET', 'ParseError', 'XMLParser', 'XML', 'HTMLParser', 'HTML'] +__docformat__ = 'restructuredtext en' + + +def ET(element): + """Convert a given ElementTree element to a markup stream. + + :param element: an ElementTree element + :return: a markup stream + """ + tag_name = QName(element.tag.lstrip('{')) + attrs = Attrs([(QName(attr.lstrip('{')), value) + for attr, value in element.items()]) + + yield START, (tag_name, attrs), (None, -1, -1) + if element.text: + yield TEXT, element.text, (None, -1, -1) + for child in element.getchildren(): + for item in ET(child): + yield item + yield END, tag_name, (None, -1, -1) + if element.tail: + yield TEXT, element.tail, (None, -1, -1) + + +class ParseError(Exception): + """Exception raised when fatal syntax errors are found in the input being + parsed. + """ + + def __init__(self, message, filename=None, lineno=-1, offset=-1): + """Exception initializer. + + :param message: the error message from the parser + :param filename: the path to the file that was parsed + :param lineno: the number of the line on which the error was encountered + :param offset: the column number where the error was encountered + """ + self.msg = message + if filename: + message += ', in ' + filename + Exception.__init__(self, message) + self.filename = filename or '<string>' + self.lineno = lineno + self.offset = offset + + +class XMLParser(object): + """Generator-based XML parser based on roughly equivalent code in + Kid/ElementTree. + + The parsing is initiated by iterating over the parser object: + + >>> parser = XMLParser(StringIO('<root id="2"><child>Foo</child></root>')) + >>> for kind, data, pos in parser: + ... print('%s %s' % (kind, data)) + START (QName('root'), Attrs([(QName('id'), u'2')])) + START (QName('child'), Attrs()) + TEXT Foo + END child + END root + """ + + _entitydefs = ['<!ENTITY %s "&#%d;">' % (name, value) for name, value in + entities.name2codepoint.items()] + _external_dtd = '\n'.join(_entitydefs) + + def __init__(self, source, filename=None, encoding=None): + """Initialize the parser for the given XML input. + + :param source: the XML text as a file-like object + :param filename: the name of the file, if appropriate + :param encoding: the encoding of the file; if not specified, the + encoding is assumed to be ASCII, UTF-8, or UTF-16, or + whatever the encoding specified in the XML declaration + (if any) + """ + self.source = source + self.filename = filename + + # Setup the Expat parser + parser = expat.ParserCreate(encoding, '}') + parser.buffer_text = True + parser.returns_unicode = True + parser.ordered_attributes = True + + parser.StartElementHandler = self._handle_start + parser.EndElementHandler = self._handle_end + parser.CharacterDataHandler = self._handle_data + parser.StartDoctypeDeclHandler = self._handle_doctype + parser.StartNamespaceDeclHandler = self._handle_start_ns + parser.EndNamespaceDeclHandler = self._handle_end_ns + parser.StartCdataSectionHandler = self._handle_start_cdata + parser.EndCdataSectionHandler = self._handle_end_cdata + parser.ProcessingInstructionHandler = self._handle_pi + parser.XmlDeclHandler = self._handle_xml_decl + parser.CommentHandler = self._handle_comment + + # Tell Expat that we'll handle non-XML entities ourselves + # (in _handle_other) + parser.DefaultHandler = self._handle_other + parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS) + parser.UseForeignDTD() + parser.ExternalEntityRefHandler = self._build_foreign + + self.expat = parser + self._queue = [] + + def parse(self): + """Generator that parses the XML source, yielding markup events. + + :return: a markup event stream + :raises ParseError: if the XML text is not well formed + """ + def _generate(): + try: + bufsize = 4 * 1024 # 4K + done = False + while 1: + while not done and len(self._queue) == 0: + data = self.source.read(bufsize) + if data == '': # end of data + if hasattr(self, 'expat'): + self.expat.Parse('', True) + del self.expat # get rid of circular references + done = True + else: + if isinstance(data, unicode): + data = data.encode('utf-8') + self.expat.Parse(data, False) + for event in self._queue: + yield event + self._queue = [] + if done: + break + except expat.ExpatError, e: + msg = str(e) + raise ParseError(msg, self.filename, e.lineno, e.offset) + return Stream(_generate()).filter(_coalesce) + + def __iter__(self): + return iter(self.parse()) + + def _build_foreign(self, context, base, sysid, pubid): + parser = self.expat.ExternalEntityParserCreate(context) + parser.ParseFile(StringIO(self._external_dtd)) + return 1 + + def _enqueue(self, kind, data=None, pos=None): + if pos is None: + pos = self._getpos() + if kind is TEXT: + # Expat reports the *end* of the text event as current position. We + # try to fix that up here as much as possible. Unfortunately, the + # offset is only valid for single-line text. For multi-line text, + # it is apparently not possible to determine at what offset it + # started + if '\n' in data: + lines = data.splitlines() + lineno = pos[1] - len(lines) + 1 + offset = -1 + else: + lineno = pos[1] + offset = pos[2] - len(data) + pos = (pos[0], lineno, offset) + self._queue.append((kind, data, pos)) + + def _getpos_unknown(self): + return (self.filename, -1, -1) + + def _getpos(self): + return (self.filename, self.expat.CurrentLineNumber, + self.expat.CurrentColumnNumber) + + def _handle_start(self, tag, attrib): + attrs = Attrs([(QName(name), value) for name, value in + zip(*[iter(attrib)] * 2)]) + self._enqueue(START, (QName(tag), attrs)) + + def _handle_end(self, tag): + self._enqueue(END, QName(tag)) + + def _handle_data(self, text): + self._enqueue(TEXT, text) + + def _handle_xml_decl(self, version, encoding, standalone): + self._enqueue(XML_DECL, (version, encoding, standalone)) + + def _handle_doctype(self, name, sysid, pubid, has_internal_subset): + self._enqueue(DOCTYPE, (name, pubid, sysid)) + + def _handle_start_ns(self, prefix, uri): + self._enqueue(START_NS, (prefix or '', uri)) + + def _handle_end_ns(self, prefix): + self._enqueue(END_NS, prefix or '') + + def _handle_start_cdata(self): + self._enqueue(START_CDATA) + + def _handle_end_cdata(self): + self._enqueue(END_CDATA) + + def _handle_pi(self, target, data): + self._enqueue(PI, (target, data)) + + def _handle_comment(self, text): + self._enqueue(COMMENT, text) + + def _handle_other(self, text): + if text.startswith('&'): + # deal with undefined entities + try: + text = unichr(entities.name2codepoint[text[1:-1]]) + self._enqueue(TEXT, text) + except KeyError: + filename, lineno, offset = self._getpos() + error = expat.error('undefined entity "%s": line %d, column %d' + % (text, lineno, offset)) + error.code = expat.errors.XML_ERROR_UNDEFINED_ENTITY + error.lineno = lineno + error.offset = offset + raise error + + +def XML(text): + """Parse the given XML source and return a markup stream. + + Unlike with `XMLParser`, the returned stream is reusable, meaning it can be + iterated over multiple times: + + >>> xml = XML('<doc><elem>Foo</elem><elem>Bar</elem></doc>') + >>> print(xml) + <doc><elem>Foo</elem><elem>Bar</elem></doc> + >>> print(xml.select('elem')) + <elem>Foo</elem><elem>Bar</elem> + >>> print(xml.select('elem/text()')) + FooBar + + :param text: the XML source + :return: the parsed XML event stream + :raises ParseError: if the XML text is not well-formed + """ + return Stream(list(XMLParser(StringIO(text)))) + + +class HTMLParser(html.HTMLParser, object): + """Parser for HTML input based on the Python `HTMLParser` module. + + This class provides the same interface for generating stream events as + `XMLParser`, and attempts to automatically balance tags. + + The parsing is initiated by iterating over the parser object: + + >>> parser = HTMLParser(StringIO('<UL compact><LI>Foo</UL>')) + >>> for kind, data, pos in parser: + ... print('%s %s' % (kind, data)) + START (QName('ul'), Attrs([(QName('compact'), u'compact')])) + START (QName('li'), Attrs()) + TEXT Foo + END li + END ul + """ + + _EMPTY_ELEMS = frozenset(['area', 'base', 'basefont', 'br', 'col', 'frame', + 'hr', 'img', 'input', 'isindex', 'link', 'meta', + 'param']) + + def __init__(self, source, filename=None, encoding='utf-8'): + """Initialize the parser for the given HTML input. + + :param source: the HTML text as a file-like object + :param filename: the name of the file, if known + :param filename: encoding of the file; ignored if the input is unicode + """ + html.HTMLParser.__init__(self) + self.source = source + self.filename = filename + self.encoding = encoding + self._queue = [] + self._open_tags = [] + + def parse(self): + """Generator that parses the HTML source, yielding markup events. + + :return: a markup event stream + :raises ParseError: if the HTML text is not well formed + """ + def _generate(): + try: + bufsize = 4 * 1024 # 4K + done = False + while 1: + while not done and len(self._queue) == 0: + data = self.source.read(bufsize) + if data == '': # end of data + self.close() + done = True + else: + self.feed(data) + for kind, data, pos in self._queue: + yield kind, data, pos + self._queue = [] + if done: + open_tags = self._open_tags + open_tags.reverse() + for tag in open_tags: + yield END, QName(tag), pos + break + except html.HTMLParseError, e: + msg = '%s: line %d, column %d' % (e.msg, e.lineno, e.offset) + raise ParseError(msg, self.filename, e.lineno, e.offset) + return Stream(_generate()).filter(_coalesce) + + def __iter__(self): + return iter(self.parse()) + + def _enqueue(self, kind, data, pos=None): + if pos is None: + pos = self._getpos() + self._queue.append((kind, data, pos)) + + def _getpos(self): + lineno, column = self.getpos() + return (self.filename, lineno, column) + + def handle_starttag(self, tag, attrib): + fixed_attrib = [] + for name, value in attrib: # Fixup minimized attributes + if value is None: + value = unicode(name) + elif not isinstance(value, unicode): + value = value.decode(self.encoding, 'replace') + fixed_attrib.append((QName(name), stripentities(value))) + + self._enqueue(START, (QName(tag), Attrs(fixed_attrib))) + if tag in self._EMPTY_ELEMS: + self._enqueue(END, QName(tag)) + else: + self._open_tags.append(tag) + + def handle_endtag(self, tag): + if tag not in self._EMPTY_ELEMS: + while self._open_tags: + open_tag = self._open_tags.pop() + self._enqueue(END, QName(open_tag)) + if open_tag.lower() == tag.lower(): + break + + def handle_data(self, text): + if not isinstance(text, unicode): + text = text.decode(self.encoding, 'replace') + self._enqueue(TEXT, text) + + def handle_charref(self, name): + if name.lower().startswith('x'): + text = unichr(int(name[1:], 16)) + else: + text = unichr(int(name)) + self._enqueue(TEXT, text) + + def handle_entityref(self, name): + try: + text = unichr(entities.name2codepoint[name]) + except KeyError: + text = '&%s;' % name + self._enqueue(TEXT, text) + + def handle_pi(self, data): + target, data = data.split(None, 1) + if data.endswith('?'): + data = data[:-1] + self._enqueue(PI, (target.strip(), data.strip())) + + def handle_comment(self, text): + self._enqueue(COMMENT, text) + + +def HTML(text, encoding='utf-8'): + """Parse the given HTML source and return a markup stream. + + Unlike with `HTMLParser`, the returned stream is reusable, meaning it can be + iterated over multiple times: + + >>> html = HTML('<body><h1>Foo</h1></body>') + >>> print(html) + <body><h1>Foo</h1></body> + >>> print(html.select('h1')) + <h1>Foo</h1> + >>> print(html.select('h1/text()')) + Foo + + :param text: the HTML source + :return: the parsed XML event stream + :raises ParseError: if the HTML text is not well-formed, and error recovery + fails + """ + return Stream(list(HTMLParser(StringIO(text), encoding=encoding))) + + +def _coalesce(stream): + """Coalesces adjacent TEXT events into a single event.""" + textbuf = [] + textpos = None + for kind, data, pos in chain(stream, [(None, None, None)]): + if kind is TEXT: + textbuf.append(data) + if textpos is None: + textpos = pos + else: + if textbuf: + yield TEXT, ''.join(textbuf), textpos + del textbuf[:] + textpos = None + if kind: + yield kind, data, pos diff --git a/websdk/genshi/output.py b/websdk/genshi/output.py new file mode 100644 index 0000000..2ebb38b --- /dev/null +++ b/websdk/genshi/output.py @@ -0,0 +1,838 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2006-2009 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://genshi.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://genshi.edgewall.org/log/. + +"""This module provides different kinds of serialization methods for XML event +streams. +""" + +from itertools import chain +import re + +from genshi.core import escape, Attrs, Markup, Namespace, QName, StreamEventKind +from genshi.core import START, END, TEXT, XML_DECL, DOCTYPE, START_NS, END_NS, \ + START_CDATA, END_CDATA, PI, COMMENT, XML_NAMESPACE + +__all__ = ['encode', 'get_serializer', 'DocType', 'XMLSerializer', + 'XHTMLSerializer', 'HTMLSerializer', 'TextSerializer'] +__docformat__ = 'restructuredtext en' + + +def encode(iterator, method='xml', encoding='utf-8', out=None): + """Encode serializer output into a string. + + :param iterator: the iterator returned from serializing a stream (basically + any iterator that yields unicode objects) + :param method: the serialization method; determines how characters not + representable in the specified encoding are treated + :param encoding: how the output string should be encoded; if set to `None`, + this method returns a `unicode` object + :param out: a file-like object that the output should be written to + instead of being returned as one big string; note that if + this is a file or socket (or similar), the `encoding` must + not be `None` (that is, the output must be encoded) + :return: a `str` or `unicode` object (depending on the `encoding` + parameter), or `None` if the `out` parameter is provided + + :since: version 0.4.1 + :note: Changed in 0.5: added the `out` parameter + """ + if encoding is not None: + errors = 'replace' + if method != 'text' and not isinstance(method, TextSerializer): + errors = 'xmlcharrefreplace' + _encode = lambda string: string.encode(encoding, errors) + else: + _encode = lambda string: string + if out is None: + return _encode(''.join(list(iterator))) + for chunk in iterator: + out.write(_encode(chunk)) + + +def get_serializer(method='xml', **kwargs): + """Return a serializer object for the given method. + + :param method: the serialization method; can be either "xml", "xhtml", + "html", "text", or a custom serializer class + + Any additional keyword arguments are passed to the serializer, and thus + depend on the `method` parameter value. + + :see: `XMLSerializer`, `XHTMLSerializer`, `HTMLSerializer`, `TextSerializer` + :since: version 0.4.1 + """ + if isinstance(method, basestring): + method = {'xml': XMLSerializer, + 'xhtml': XHTMLSerializer, + 'html': HTMLSerializer, + 'text': TextSerializer}[method.lower()] + return method(**kwargs) + + +class DocType(object): + """Defines a number of commonly used DOCTYPE declarations as constants.""" + + HTML_STRICT = ( + 'html', '-//W3C//DTD HTML 4.01//EN', + 'http://www.w3.org/TR/html4/strict.dtd' + ) + HTML_TRANSITIONAL = ( + 'html', '-//W3C//DTD HTML 4.01 Transitional//EN', + 'http://www.w3.org/TR/html4/loose.dtd' + ) + HTML_FRAMESET = ( + 'html', '-//W3C//DTD HTML 4.01 Frameset//EN', + 'http://www.w3.org/TR/html4/frameset.dtd' + ) + HTML = HTML_STRICT + + HTML5 = ('html', None, None) + + XHTML_STRICT = ( + 'html', '-//W3C//DTD XHTML 1.0 Strict//EN', + 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd' + ) + XHTML_TRANSITIONAL = ( + 'html', '-//W3C//DTD XHTML 1.0 Transitional//EN', + 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd' + ) + XHTML_FRAMESET = ( + 'html', '-//W3C//DTD XHTML 1.0 Frameset//EN', + 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd' + ) + XHTML = XHTML_STRICT + + XHTML11 = ( + 'html', '-//W3C//DTD XHTML 1.1//EN', + 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' + ) + + SVG_FULL = ( + 'svg', '-//W3C//DTD SVG 1.1//EN', + 'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd' + ) + SVG_BASIC = ( + 'svg', '-//W3C//DTD SVG Basic 1.1//EN', + 'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-basic.dtd' + ) + SVG_TINY = ( + 'svg', '-//W3C//DTD SVG Tiny 1.1//EN', + 'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-tiny.dtd' + ) + SVG = SVG_FULL + + @classmethod + def get(cls, name): + """Return the ``(name, pubid, sysid)`` tuple of the ``DOCTYPE`` + declaration for the specified name. + + The following names are recognized in this version: + * "html" or "html-strict" for the HTML 4.01 strict DTD + * "html-transitional" for the HTML 4.01 transitional DTD + * "html-frameset" for the HTML 4.01 frameset DTD + * "html5" for the ``DOCTYPE`` proposed for HTML5 + * "xhtml" or "xhtml-strict" for the XHTML 1.0 strict DTD + * "xhtml-transitional" for the XHTML 1.0 transitional DTD + * "xhtml-frameset" for the XHTML 1.0 frameset DTD + * "xhtml11" for the XHTML 1.1 DTD + * "svg" or "svg-full" for the SVG 1.1 DTD + * "svg-basic" for the SVG Basic 1.1 DTD + * "svg-tiny" for the SVG Tiny 1.1 DTD + + :param name: the name of the ``DOCTYPE`` + :return: the ``(name, pubid, sysid)`` tuple for the requested + ``DOCTYPE``, or ``None`` if the name is not recognized + :since: version 0.4.1 + """ + return { + 'html': cls.HTML, 'html-strict': cls.HTML_STRICT, + 'html-transitional': DocType.HTML_TRANSITIONAL, + 'html-frameset': DocType.HTML_FRAMESET, + 'html5': cls.HTML5, + 'xhtml': cls.XHTML, 'xhtml-strict': cls.XHTML_STRICT, + 'xhtml-transitional': cls.XHTML_TRANSITIONAL, + 'xhtml-frameset': cls.XHTML_FRAMESET, + 'xhtml11': cls.XHTML11, + 'svg': cls.SVG, 'svg-full': cls.SVG_FULL, + 'svg-basic': cls.SVG_BASIC, + 'svg-tiny': cls.SVG_TINY + }.get(name.lower()) + + +class XMLSerializer(object): + """Produces XML text from an event stream. + + >>> from genshi.builder import tag + >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True)) + >>> print(''.join(XMLSerializer()(elem.generate()))) + <div><a href="foo"/><br/><hr noshade="True"/></div> + """ + + _PRESERVE_SPACE = frozenset() + + def __init__(self, doctype=None, strip_whitespace=True, + namespace_prefixes=None, cache=True): + """Initialize the XML serializer. + + :param doctype: a ``(name, pubid, sysid)`` tuple that represents the + DOCTYPE declaration that should be included at the top + of the generated output, or the name of a DOCTYPE as + defined in `DocType.get` + :param strip_whitespace: whether extraneous whitespace should be + stripped from the output + :param cache: whether to cache the text output per event, which + improves performance for repetitive markup + :note: Changed in 0.4.2: The `doctype` parameter can now be a string. + :note: Changed in 0.6: The `cache` parameter was added + """ + self.filters = [EmptyTagFilter()] + if strip_whitespace: + self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE)) + self.filters.append(NamespaceFlattener(prefixes=namespace_prefixes, + cache=cache)) + if doctype: + self.filters.append(DocTypeInserter(doctype)) + self.cache = cache + + def __call__(self, stream): + have_decl = have_doctype = False + in_cdata = False + + cache = {} + cache_get = cache.get + if self.cache: + def _emit(kind, input, output): + cache[kind, input] = output + return output + else: + def _emit(kind, input, output): + return output + + for filter_ in self.filters: + stream = filter_(stream) + for kind, data, pos in stream: + cached = cache_get((kind, data)) + if cached is not None: + yield cached + + elif kind is START or kind is EMPTY: + tag, attrib = data + buf = ['<', tag] + for attr, value in attrib: + buf += [' ', attr, '="', escape(value), '"'] + buf.append(kind is EMPTY and '/>' or '>') + yield _emit(kind, data, Markup(''.join(buf))) + + elif kind is END: + yield _emit(kind, data, Markup('</%s>' % data)) + + elif kind is TEXT: + if in_cdata: + yield _emit(kind, data, data) + else: + yield _emit(kind, data, escape(data, quotes=False)) + + elif kind is COMMENT: + yield _emit(kind, data, Markup('<!--%s-->' % data)) + + elif kind is XML_DECL and not have_decl: + version, encoding, standalone = data + buf = ['<?xml version="%s"' % version] + if encoding: + buf.append(' encoding="%s"' % encoding) + if standalone != -1: + standalone = standalone and 'yes' or 'no' + buf.append(' standalone="%s"' % standalone) + buf.append('?>\n') + yield Markup(''.join(buf)) + have_decl = True + + elif kind is DOCTYPE and not have_doctype: + name, pubid, sysid = data + buf = ['<!DOCTYPE %s'] + if pubid: + buf.append(' PUBLIC "%s"') + elif sysid: + buf.append(' SYSTEM') + if sysid: + buf.append(' "%s"') + buf.append('>\n') + yield Markup(''.join(buf)) % tuple([p for p in data if p]) + have_doctype = True + + elif kind is START_CDATA: + yield Markup('<![CDATA[') + in_cdata = True + + elif kind is END_CDATA: + yield Markup(']]>') + in_cdata = False + + elif kind is PI: + yield _emit(kind, data, Markup('<?%s %s?>' % data)) + + +class XHTMLSerializer(XMLSerializer): + """Produces XHTML text from an event stream. + + >>> from genshi.builder import tag + >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True)) + >>> print(''.join(XHTMLSerializer()(elem.generate()))) + <div><a href="foo"></a><br /><hr noshade="noshade" /></div> + """ + + _EMPTY_ELEMS = frozenset(['area', 'base', 'basefont', 'br', 'col', 'frame', + 'hr', 'img', 'input', 'isindex', 'link', 'meta', + 'param']) + _BOOLEAN_ATTRS = frozenset(['selected', 'checked', 'compact', 'declare', + 'defer', 'disabled', 'ismap', 'multiple', + 'nohref', 'noresize', 'noshade', 'nowrap']) + _PRESERVE_SPACE = frozenset([ + QName('pre'), QName('http://www.w3.org/1999/xhtml}pre'), + QName('textarea'), QName('http://www.w3.org/1999/xhtml}textarea') + ]) + + def __init__(self, doctype=None, strip_whitespace=True, + namespace_prefixes=None, drop_xml_decl=True, cache=True): + super(XHTMLSerializer, self).__init__(doctype, False) + self.filters = [EmptyTagFilter()] + if strip_whitespace: + self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE)) + namespace_prefixes = namespace_prefixes or {} + namespace_prefixes['http://www.w3.org/1999/xhtml'] = '' + self.filters.append(NamespaceFlattener(prefixes=namespace_prefixes, + cache=cache)) + if doctype: + self.filters.append(DocTypeInserter(doctype)) + self.drop_xml_decl = drop_xml_decl + self.cache = cache + + def __call__(self, stream): + boolean_attrs = self._BOOLEAN_ATTRS + empty_elems = self._EMPTY_ELEMS + drop_xml_decl = self.drop_xml_decl + have_decl = have_doctype = False + in_cdata = False + + cache = {} + cache_get = cache.get + if self.cache: + def _emit(kind, input, output): + cache[kind, input] = output + return output + else: + def _emit(kind, input, output): + return output + + for filter_ in self.filters: + stream = filter_(stream) + for kind, data, pos in stream: + cached = cache_get((kind, data)) + if cached is not None: + yield cached + + elif kind is START or kind is EMPTY: + tag, attrib = data + buf = ['<', tag] + for attr, value in attrib: + if attr in boolean_attrs: + value = attr + elif attr == 'xml:lang' and 'lang' not in attrib: + buf += [' lang="', escape(value), '"'] + elif attr == 'xml:space': + continue + buf += [' ', attr, '="', escape(value), '"'] + if kind is EMPTY: + if tag in empty_elems: + buf.append(' />') + else: + buf.append('></%s>' % tag) + else: + buf.append('>') + yield _emit(kind, data, Markup(''.join(buf))) + + elif kind is END: + yield _emit(kind, data, Markup('</%s>' % data)) + + elif kind is TEXT: + if in_cdata: + yield _emit(kind, data, data) + else: + yield _emit(kind, data, escape(data, quotes=False)) + + elif kind is COMMENT: + yield _emit(kind, data, Markup('<!--%s-->' % data)) + + elif kind is DOCTYPE and not have_doctype: + name, pubid, sysid = data + buf = ['<!DOCTYPE %s'] + if pubid: + buf.append(' PUBLIC "%s"') + elif sysid: + buf.append(' SYSTEM') + if sysid: + buf.append(' "%s"') + buf.append('>\n') + yield Markup(''.join(buf)) % tuple([p for p in data if p]) + have_doctype = True + + elif kind is XML_DECL and not have_decl and not drop_xml_decl: + version, encoding, standalone = data + buf = ['<?xml version="%s"' % version] + if encoding: + buf.append(' encoding="%s"' % encoding) + if standalone != -1: + standalone = standalone and 'yes' or 'no' + buf.append(' standalone="%s"' % standalone) + buf.append('?>\n') + yield Markup(''.join(buf)) + have_decl = True + + elif kind is START_CDATA: + yield Markup('<![CDATA[') + in_cdata = True + + elif kind is END_CDATA: + yield Markup(']]>') + in_cdata = False + + elif kind is PI: + yield _emit(kind, data, Markup('<?%s %s?>' % data)) + + +class HTMLSerializer(XHTMLSerializer): + """Produces HTML text from an event stream. + + >>> from genshi.builder import tag + >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True)) + >>> print(''.join(HTMLSerializer()(elem.generate()))) + <div><a href="foo"></a><br><hr noshade></div> + """ + + _NOESCAPE_ELEMS = frozenset([ + QName('script'), QName('http://www.w3.org/1999/xhtml}script'), + QName('style'), QName('http://www.w3.org/1999/xhtml}style') + ]) + + def __init__(self, doctype=None, strip_whitespace=True, cache=True): + """Initialize the HTML serializer. + + :param doctype: a ``(name, pubid, sysid)`` tuple that represents the + DOCTYPE declaration that should be included at the top + of the generated output + :param strip_whitespace: whether extraneous whitespace should be + stripped from the output + :param cache: whether to cache the text output per event, which + improves performance for repetitive markup + :note: Changed in 0.6: The `cache` parameter was added + """ + super(HTMLSerializer, self).__init__(doctype, False) + self.filters = [EmptyTagFilter()] + if strip_whitespace: + self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE, + self._NOESCAPE_ELEMS)) + self.filters.append(NamespaceFlattener(prefixes={ + 'http://www.w3.org/1999/xhtml': '' + }, cache=cache)) + if doctype: + self.filters.append(DocTypeInserter(doctype)) + self.cache = True + + def __call__(self, stream): + boolean_attrs = self._BOOLEAN_ATTRS + empty_elems = self._EMPTY_ELEMS + noescape_elems = self._NOESCAPE_ELEMS + have_doctype = False + noescape = False + + cache = {} + cache_get = cache.get + if self.cache: + def _emit(kind, input, output): + cache[kind, input] = output + return output + else: + def _emit(kind, input, output): + return output + + for filter_ in self.filters: + stream = filter_(stream) + for kind, data, _ in stream: + output = cache_get((kind, data)) + if output is not None: + yield output + if (kind is START or kind is EMPTY) \ + and data[0] in noescape_elems: + noescape = True + elif kind is END: + noescape = False + + elif kind is START or kind is EMPTY: + tag, attrib = data + buf = ['<', tag] + for attr, value in attrib: + if attr in boolean_attrs: + if value: + buf += [' ', attr] + elif ':' in attr: + if attr == 'xml:lang' and 'lang' not in attrib: + buf += [' lang="', escape(value), '"'] + elif attr != 'xmlns': + buf += [' ', attr, '="', escape(value), '"'] + buf.append('>') + if kind is EMPTY: + if tag not in empty_elems: + buf.append('</%s>' % tag) + yield _emit(kind, data, Markup(''.join(buf))) + if tag in noescape_elems: + noescape = True + + elif kind is END: + yield _emit(kind, data, Markup('</%s>' % data)) + noescape = False + + elif kind is TEXT: + if noescape: + yield _emit(kind, data, data) + else: + yield _emit(kind, data, escape(data, quotes=False)) + + elif kind is COMMENT: + yield _emit(kind, data, Markup('<!--%s-->' % data)) + + elif kind is DOCTYPE and not have_doctype: + name, pubid, sysid = data + buf = ['<!DOCTYPE %s'] + if pubid: + buf.append(' PUBLIC "%s"') + elif sysid: + buf.append(' SYSTEM') + if sysid: + buf.append(' "%s"') + buf.append('>\n') + yield Markup(''.join(buf)) % tuple([p for p in data if p]) + have_doctype = True + + elif kind is PI: + yield _emit(kind, data, Markup('<?%s %s?>' % data)) + + +class TextSerializer(object): + """Produces plain text from an event stream. + + Only text events are included in the output. Unlike the other serializer, + special XML characters are not escaped: + + >>> from genshi.builder import tag + >>> elem = tag.div(tag.a('<Hello!>', href='foo'), tag.br) + >>> print(elem) + <div><a href="foo"><Hello!></a><br/></div> + >>> print(''.join(TextSerializer()(elem.generate()))) + <Hello!> + + If text events contain literal markup (instances of the `Markup` class), + that markup is by default passed through unchanged: + + >>> elem = tag.div(Markup('<a href="foo">Hello & Bye!</a><br/>')) + >>> print(elem.generate().render(TextSerializer, encoding=None)) + <a href="foo">Hello & Bye!</a><br/> + + You can use the ``strip_markup`` to change this behavior, so that tags and + entities are stripped from the output (or in the case of entities, + replaced with the equivalent character): + + >>> print(elem.generate().render(TextSerializer, strip_markup=True, + ... encoding=None)) + Hello & Bye! + """ + + def __init__(self, strip_markup=False): + """Create the serializer. + + :param strip_markup: whether markup (tags and encoded characters) found + in the text should be removed + """ + self.strip_markup = strip_markup + + def __call__(self, stream): + strip_markup = self.strip_markup + for event in stream: + if event[0] is TEXT: + data = event[1] + if strip_markup and type(data) is Markup: + data = data.striptags().stripentities() + yield unicode(data) + + +class EmptyTagFilter(object): + """Combines `START` and `STOP` events into `EMPTY` events for elements that + have no contents. + """ + + EMPTY = StreamEventKind('EMPTY') + + def __call__(self, stream): + prev = (None, None, None) + for ev in stream: + if prev[0] is START: + if ev[0] is END: + prev = EMPTY, prev[1], prev[2] + yield prev + continue + else: + yield prev + if ev[0] is not START: + yield ev + prev = ev + + +EMPTY = EmptyTagFilter.EMPTY + + +class NamespaceFlattener(object): + r"""Output stream filter that removes namespace information from the stream, + instead adding namespace attributes and prefixes as needed. + + :param prefixes: optional mapping of namespace URIs to prefixes + + >>> from genshi.input import XML + >>> xml = XML('''<doc xmlns="NS1" xmlns:two="NS2"> + ... <two:item/> + ... </doc>''') + >>> for kind, data, pos in NamespaceFlattener()(xml): + ... print('%s %r' % (kind, data)) + START (u'doc', Attrs([('xmlns', u'NS1'), (u'xmlns:two', u'NS2')])) + TEXT u'\n ' + START (u'two:item', Attrs()) + END u'two:item' + TEXT u'\n' + END u'doc' + """ + + def __init__(self, prefixes=None, cache=True): + self.prefixes = {XML_NAMESPACE.uri: 'xml'} + if prefixes is not None: + self.prefixes.update(prefixes) + self.cache = cache + + def __call__(self, stream): + cache = {} + cache_get = cache.get + if self.cache: + def _emit(kind, input, output, pos): + cache[kind, input] = output + return kind, output, pos + else: + def _emit(kind, input, output, pos): + return output + + prefixes = dict([(v, [k]) for k, v in self.prefixes.items()]) + namespaces = {XML_NAMESPACE.uri: ['xml']} + def _push_ns(prefix, uri): + namespaces.setdefault(uri, []).append(prefix) + prefixes.setdefault(prefix, []).append(uri) + cache.clear() + def _pop_ns(prefix): + uris = prefixes.get(prefix) + uri = uris.pop() + if not uris: + del prefixes[prefix] + if uri not in uris or uri != uris[-1]: + uri_prefixes = namespaces[uri] + uri_prefixes.pop() + if not uri_prefixes: + del namespaces[uri] + cache.clear() + return uri + + ns_attrs = [] + _push_ns_attr = ns_attrs.append + def _make_ns_attr(prefix, uri): + return 'xmlns%s' % (prefix and ':%s' % prefix or ''), uri + + def _gen_prefix(): + val = 0 + while 1: + val += 1 + yield 'ns%d' % val + _gen_prefix = _gen_prefix().next + + for kind, data, pos in stream: + output = cache_get((kind, data)) + if output is not None: + yield kind, output, pos + + elif kind is START or kind is EMPTY: + tag, attrs = data + + tagname = tag.localname + tagns = tag.namespace + if tagns: + if tagns in namespaces: + prefix = namespaces[tagns][-1] + if prefix: + tagname = '%s:%s' % (prefix, tagname) + else: + _push_ns_attr(('xmlns', tagns)) + _push_ns('', tagns) + + new_attrs = [] + for attr, value in attrs: + attrname = attr.localname + attrns = attr.namespace + if attrns: + if attrns not in namespaces: + prefix = _gen_prefix() + _push_ns(prefix, attrns) + _push_ns_attr(('xmlns:%s' % prefix, attrns)) + else: + prefix = namespaces[attrns][-1] + if prefix: + attrname = '%s:%s' % (prefix, attrname) + new_attrs.append((attrname, value)) + + yield _emit(kind, data, (tagname, Attrs(ns_attrs + new_attrs)), pos) + del ns_attrs[:] + + elif kind is END: + tagname = data.localname + tagns = data.namespace + if tagns: + prefix = namespaces[tagns][-1] + if prefix: + tagname = '%s:%s' % (prefix, tagname) + yield _emit(kind, data, tagname, pos) + + elif kind is START_NS: + prefix, uri = data + if uri not in namespaces: + prefix = prefixes.get(uri, [prefix])[-1] + _push_ns_attr(_make_ns_attr(prefix, uri)) + _push_ns(prefix, uri) + + elif kind is END_NS: + if data in prefixes: + uri = _pop_ns(data) + if ns_attrs: + attr = _make_ns_attr(data, uri) + if attr in ns_attrs: + ns_attrs.remove(attr) + + else: + yield kind, data, pos + + +class WhitespaceFilter(object): + """A filter that removes extraneous ignorable white space from the + stream. + """ + + def __init__(self, preserve=None, noescape=None): + """Initialize the filter. + + :param preserve: a set or sequence of tag names for which white-space + should be preserved + :param noescape: a set or sequence of tag names for which text content + should not be escaped + + The `noescape` set is expected to refer to elements that cannot contain + further child elements (such as ``<style>`` or ``<script>`` in HTML + documents). + """ + if preserve is None: + preserve = [] + self.preserve = frozenset(preserve) + if noescape is None: + noescape = [] + self.noescape = frozenset(noescape) + + def __call__(self, stream, ctxt=None, space=XML_NAMESPACE['space'], + trim_trailing_space=re.compile('[ \t]+(?=\n)').sub, + collapse_lines=re.compile('\n{2,}').sub): + mjoin = Markup('').join + preserve_elems = self.preserve + preserve = 0 + noescape_elems = self.noescape + noescape = False + + textbuf = [] + push_text = textbuf.append + pop_text = textbuf.pop + for kind, data, pos in chain(stream, [(None, None, None)]): + + if kind is TEXT: + if noescape: + data = Markup(data) + push_text(data) + else: + if textbuf: + if len(textbuf) > 1: + text = mjoin(textbuf, escape_quotes=False) + del textbuf[:] + else: + text = escape(pop_text(), quotes=False) + if not preserve: + text = collapse_lines('\n', trim_trailing_space('', text)) + yield TEXT, Markup(text), pos + + if kind is START: + tag, attrs = data + if preserve or (tag in preserve_elems or + attrs.get(space) == 'preserve'): + preserve += 1 + if not noescape and tag in noescape_elems: + noescape = True + + elif kind is END: + noescape = False + if preserve: + preserve -= 1 + + elif kind is START_CDATA: + noescape = True + + elif kind is END_CDATA: + noescape = False + + if kind: + yield kind, data, pos + + +class DocTypeInserter(object): + """A filter that inserts the DOCTYPE declaration in the correct location, + after the XML declaration. + """ + def __init__(self, doctype): + """Initialize the filter. + + :param doctype: DOCTYPE as a string or DocType object. + """ + if isinstance(doctype, basestring): + doctype = DocType.get(doctype) + self.doctype_event = (DOCTYPE, doctype, (None, -1, -1)) + + def __call__(self, stream): + doctype_inserted = False + for kind, data, pos in stream: + if not doctype_inserted: + doctype_inserted = True + if kind is XML_DECL: + yield (kind, data, pos) + yield self.doctype_event + continue + yield self.doctype_event + + yield (kind, data, pos) + + if not doctype_inserted: + yield self.doctype_event diff --git a/websdk/genshi/path.py b/websdk/genshi/path.py new file mode 100644 index 0000000..122fbf0 --- /dev/null +++ b/websdk/genshi/path.py @@ -0,0 +1,1528 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2006-2009 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://genshi.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://genshi.edgewall.org/log/. + +"""Basic support for evaluating XPath expressions against streams. + +>>> from genshi.input import XML +>>> doc = XML('''<doc> +... <items count="4"> +... <item status="new"> +... <summary>Foo</summary> +... </item> +... <item status="closed"> +... <summary>Bar</summary> +... </item> +... <item status="closed" resolution="invalid"> +... <summary>Baz</summary> +... </item> +... <item status="closed" resolution="fixed"> +... <summary>Waz</summary> +... </item> +... </items> +... </doc>''') +>>> print(doc.select('items/item[@status="closed" and ' +... '(@resolution="invalid" or not(@resolution))]/summary/text()')) +BarBaz + +Because the XPath engine operates on markup streams (as opposed to tree +structures), it only implements a subset of the full XPath 1.0 language. +""" + +from collections import deque +try: + reduce # builtin in Python < 3 +except NameError: + from functools import reduce +from math import ceil, floor +import operator +import re +from itertools import chain + +from genshi.core import Stream, Attrs, Namespace, QName +from genshi.core import START, END, TEXT, START_NS, END_NS, COMMENT, PI, \ + START_CDATA, END_CDATA + +__all__ = ['Path', 'PathSyntaxError'] +__docformat__ = 'restructuredtext en' + + +class Axis(object): + """Defines constants for the various supported XPath axes.""" + + ATTRIBUTE = 'attribute' + CHILD = 'child' + DESCENDANT = 'descendant' + DESCENDANT_OR_SELF = 'descendant-or-self' + SELF = 'self' + + @classmethod + def forname(cls, name): + """Return the axis constant for the given name, or `None` if no such + axis was defined. + """ + return getattr(cls, name.upper().replace('-', '_'), None) + + +ATTRIBUTE = Axis.ATTRIBUTE +CHILD = Axis.CHILD +DESCENDANT = Axis.DESCENDANT +DESCENDANT_OR_SELF = Axis.DESCENDANT_OR_SELF +SELF = Axis.SELF + + +class GenericStrategy(object): + + @classmethod + def supports(cls, path): + return True + + def __init__(self, path): + self.path = path + + def test(self, ignore_context): + p = self.path + if ignore_context: + if p[0][0] is ATTRIBUTE: + steps = [_DOTSLASHSLASH] + p + else: + steps = [(DESCENDANT_OR_SELF, p[0][1], p[0][2])] + p[1:] + elif p[0][0] is CHILD or p[0][0] is ATTRIBUTE \ + or p[0][0] is DESCENDANT: + steps = [_DOTSLASH] + p + else: + steps = p + + # for node it contains all positions of xpath expression + # where its child should start checking for matches + # with list of corresponding context counters + # there can be many of them, because position that is from + # descendant-like axis can be achieved from different nodes + # for example <a><a><b/></a></a> should match both //a//b[1] + # and //a//b[2] + # positions always form increasing sequence (invariant) + stack = [[(0, [[]])]] + + def _test(event, namespaces, variables, updateonly=False): + kind, data, pos = event[:3] + retval = None + + # Manage the stack that tells us "where we are" in the stream + if kind is END: + if stack: + stack.pop() + return None + if kind is START_NS or kind is END_NS \ + or kind is START_CDATA or kind is END_CDATA: + # should we make namespaces work? + return None + + pos_queue = deque([(pos, cou, []) for pos, cou in stack[-1]]) + next_pos = [] + + # length of real part of path - we omit attribute axis + real_len = len(steps) - ((steps[-1][0] == ATTRIBUTE) or 1 and 0) + last_checked = -1 + + # places where we have to check for match, are these + # provided by parent + while pos_queue: + x, pcou, mcou = pos_queue.popleft() + axis, nodetest, predicates = steps[x] + + # we need to push descendant-like positions from parent + # further + if (axis is DESCENDANT or axis is DESCENDANT_OR_SELF) and pcou: + if next_pos and next_pos[-1][0] == x: + next_pos[-1][1].extend(pcou) + else: + next_pos.append((x, pcou)) + + # nodetest first + if not nodetest(kind, data, pos, namespaces, variables): + continue + + # counters packs that were already bad + missed = set() + counters_len = len(pcou) + len(mcou) + + # number of counters - we have to create one + # for every context position based predicate + cnum = 0 + + # tells if we have match with position x + matched = True + + if predicates: + for predicate in predicates: + pretval = predicate(kind, data, pos, + namespaces, + variables) + if type(pretval) is float: # FIXME <- need to check + # this for other types that + # can be coerced to float + + # each counter pack needs to be checked + for i, cou in enumerate(chain(pcou, mcou)): + # it was bad before + if i in missed: + continue + + if len(cou) < cnum + 1: + cou.append(0) + cou[cnum] += 1 + + # it is bad now + if cou[cnum] != int(pretval): + missed.add(i) + + # none of counters pack was good + if len(missed) == counters_len: + pretval = False + cnum += 1 + + if not pretval: + matched = False + break + + if not matched: + continue + + # counter for next position with current node as context node + child_counter = [] + + if x + 1 == real_len: + # we reached end of expression, because x + 1 + # is equal to the length of expression + matched = True + axis, nodetest, predicates = steps[-1] + if axis is ATTRIBUTE: + matched = nodetest(kind, data, pos, namespaces, + variables) + if matched: + retval = matched + else: + next_axis = steps[x + 1][0] + + # if next axis allows matching self we have + # to add next position to our queue + if next_axis is DESCENDANT_OR_SELF or next_axis is SELF: + if not pos_queue or pos_queue[0][0] > x + 1: + pos_queue.appendleft((x + 1, [], [child_counter])) + else: + pos_queue[0][2].append(child_counter) + + # if axis is not self we have to add it to child's list + if next_axis is not SELF: + next_pos.append((x + 1, [child_counter])) + + if kind is START: + stack.append(next_pos) + + return retval + + return _test + + +class SimplePathStrategy(object): + """Strategy for path with only local names, attributes and text nodes.""" + + @classmethod + def supports(cls, path): + if path[0][0] is ATTRIBUTE: + return False + allowed_tests = (LocalNameTest, CommentNodeTest, TextNodeTest) + for _, nodetest, predicates in path: + if predicates: + return False + if not isinstance(nodetest, allowed_tests): + return False + return True + + def __init__(self, path): + # fragments is list of tuples (fragment, pi, attr, self_beginning) + # fragment is list of nodetests for fragment of path with only + # child:: axes between + # pi is KMP partial match table for this fragment + # attr is attribute nodetest if fragment ends with @ and None otherwise + # self_beginning is True if axis for first fragment element + # was self (first fragment) or descendant-or-self (farther fragment) + self.fragments = [] + + self_beginning = False + fragment = [] + + def nodes_equal(node1, node2): + """Tests if two node tests are equal""" + if type(node1) is not type(node2): + return False + if type(node1) == LocalNameTest: + return node1.name == node2.name + return True + + def calculate_pi(f): + """KMP prefix calculation for table""" + # the indexes in prefix table are shifted by one + # in comparision with common implementations + # pi[i] = NORMAL_PI[i + 1] + if len(f) == 0: + return [] + pi = [0] + s = 0 + for i in range(1, len(f)): + while s > 0 and not nodes_equal(f[s], f[i]): + s = pi[s-1] + if nodes_equal(f[s], f[i]): + s += 1 + pi.append(s) + return pi + + for axis in path: + if axis[0] is SELF: + if len(fragment) != 0: + # if element is not first in fragment it has to be + # the same as previous one + # for example child::a/self::b is always wrong + if axis[1] != fragment[-1][1]: + self.fragments = None + return + else: + self_beginning = True + fragment.append(axis[1]) + elif axis[0] is CHILD: + fragment.append(axis[1]) + elif axis[0] is ATTRIBUTE: + pi = calculate_pi(fragment) + self.fragments.append((fragment, pi, axis[1], self_beginning)) + # attribute has always to be at the end, so we can jump out + return + else: + pi = calculate_pi(fragment) + self.fragments.append((fragment, pi, None, self_beginning)) + fragment = [axis[1]] + if axis[0] is DESCENDANT: + self_beginning = False + else: # DESCENDANT_OR_SELF + self_beginning = True + pi = calculate_pi(fragment) + self.fragments.append((fragment, pi, None, self_beginning)) + + def test(self, ignore_context): + # stack of triples (fid, p, ic) + # fid is index of current fragment + # p is position in this fragment + # ic is if we ignore context in this fragment + stack = [] + stack_push = stack.append + stack_pop = stack.pop + frags = self.fragments + frags_len = len(frags) + + def _test(event, namespaces, variables, updateonly=False): + # expression found impossible during init + if frags is None: + return None + + kind, data, pos = event[:3] + + # skip events we don't care about + if kind is END: + if stack: + stack_pop() + return None + if kind is START_NS or kind is END_NS \ + or kind is START_CDATA or kind is END_CDATA: + return None + + if not stack: + # root node, nothing on stack, special case + fid = 0 + # skip empty fragments (there can be actually only one) + while not frags[fid][0]: + fid += 1 + p = 0 + # empty fragment means descendant node at beginning + ic = ignore_context or (fid > 0) + + # expression can match first node, if first axis is self::, + # descendant-or-self:: or if ignore_context is True and + # axis is not descendant:: + if not frags[fid][3] and (not ignore_context or fid > 0): + # axis is not self-beggining, we have to skip this node + stack_push((fid, p, ic)) + return None + else: + # take position of parent + fid, p, ic = stack[-1] + + if fid is not None and not ic: + # fragment not ignoring context - we can't jump back + frag, pi, attrib, _ = frags[fid] + frag_len = len(frag) + + if p == frag_len: + # that probably means empty first fragment + pass + elif frag[p](kind, data, pos, namespaces, variables): + # match, so we can go further + p += 1 + else: + # not matched, so there will be no match in subtree + fid, p = None, None + + if p == frag_len and fid + 1 != frags_len: + # we made it to end of fragment, we can go to following + fid += 1 + p = 0 + ic = True + + if fid is None: + # there was no match in fragment not ignoring context + if kind is START: + stack_push((fid, p, ic)) + return None + + if ic: + # we are in fragment ignoring context + while True: + frag, pi, attrib, _ = frags[fid] + frag_len = len(frag) + + # KMP new "character" + while p > 0 and (p >= frag_len or not \ + frag[p](kind, data, pos, namespaces, variables)): + p = pi[p-1] + if frag[p](kind, data, pos, namespaces, variables): + p += 1 + + if p == frag_len: + # end of fragment reached + if fid + 1 == frags_len: + # that was last fragment + break + else: + fid += 1 + p = 0 + ic = True + if not frags[fid][3]: + # next fragment not self-beginning + break + else: + break + + if kind is START: + # we have to put new position on stack, for children + + if not ic and fid + 1 == frags_len and p == frag_len: + # it is end of the only, not context ignoring fragment + # so there will be no matches in subtree + stack_push((None, None, ic)) + else: + stack_push((fid, p, ic)) + + # have we reached the end of the last fragment? + if fid + 1 == frags_len and p == frag_len: + if attrib: # attribute ended path, return value + return attrib(kind, data, pos, namespaces, variables) + return True + + return None + + return _test + + +class SingleStepStrategy(object): + + @classmethod + def supports(cls, path): + return len(path) == 1 + + def __init__(self, path): + self.path = path + + def test(self, ignore_context): + steps = self.path + if steps[0][0] is ATTRIBUTE: + steps = [_DOTSLASH] + steps + select_attr = steps[-1][0] is ATTRIBUTE and steps[-1][1] or None + + # for every position in expression stores counters' list + # it is used for position based predicates + counters = [] + depth = [0] + + def _test(event, namespaces, variables, updateonly=False): + kind, data, pos = event[:3] + + # Manage the stack that tells us "where we are" in the stream + if kind is END: + if not ignore_context: + depth[0] -= 1 + return None + elif kind is START_NS or kind is END_NS \ + or kind is START_CDATA or kind is END_CDATA: + # should we make namespaces work? + return None + + if not ignore_context: + outside = (steps[0][0] is SELF and depth[0] != 0) \ + or (steps[0][0] is CHILD and depth[0] != 1) \ + or (steps[0][0] is DESCENDANT and depth[0] < 1) + if kind is START: + depth[0] += 1 + if outside: + return None + + axis, nodetest, predicates = steps[0] + if not nodetest(kind, data, pos, namespaces, variables): + return None + + if predicates: + cnum = 0 + for predicate in predicates: + pretval = predicate(kind, data, pos, namespaces, variables) + if type(pretval) is float: # FIXME <- need to check this + # for other types that can be + # coerced to float + if len(counters) < cnum + 1: + counters.append(0) + counters[cnum] += 1 + if counters[cnum] != int(pretval): + pretval = False + cnum += 1 + if not pretval: + return None + + if select_attr: + return select_attr(kind, data, pos, namespaces, variables) + + return True + + return _test + + +class Path(object): + """Implements basic XPath support on streams. + + Instances of this class represent a "compiled" XPath expression, and + provide methods for testing the path against a stream, as well as + extracting a substream matching that path. + """ + + STRATEGIES = (SingleStepStrategy, SimplePathStrategy, GenericStrategy) + + def __init__(self, text, filename=None, lineno=-1): + """Create the path object from a string. + + :param text: the path expression + :param filename: the name of the file in which the path expression was + found (used in error messages) + :param lineno: the line on which the expression was found + """ + self.source = text + self.paths = PathParser(text, filename, lineno).parse() + self.strategies = [] + for path in self.paths: + for strategy_class in self.STRATEGIES: + if strategy_class.supports(path): + self.strategies.append(strategy_class(path)) + break + else: + raise NotImplemented('No strategy found for path') + + def __repr__(self): + paths = [] + for path in self.paths: + steps = [] + for axis, nodetest, predicates in path: + steps.append('%s::%s' % (axis, nodetest)) + for predicate in predicates: + steps[-1] += '[%s]' % predicate + paths.append('/'.join(steps)) + return '<%s "%s">' % (type(self).__name__, '|'.join(paths)) + + def select(self, stream, namespaces=None, variables=None): + """Returns a substream of the given stream that matches the path. + + If there are no matches, this method returns an empty stream. + + >>> from genshi.input import XML + >>> xml = XML('<root><elem><child>Text</child></elem></root>') + + >>> print(Path('.//child').select(xml)) + <child>Text</child> + + >>> print(Path('.//child/text()').select(xml)) + Text + + :param stream: the stream to select from + :param namespaces: (optional) a mapping of namespace prefixes to URIs + :param variables: (optional) a mapping of variable names to values + :return: the substream matching the path, or an empty stream + :rtype: `Stream` + """ + if namespaces is None: + namespaces = {} + if variables is None: + variables = {} + stream = iter(stream) + def _generate(stream=stream, ns=namespaces, vs=variables): + next = stream.next + test = self.test() + for event in stream: + result = test(event, ns, vs) + if result is True: + yield event + if event[0] is START: + depth = 1 + while depth > 0: + subevent = next() + if subevent[0] is START: + depth += 1 + elif subevent[0] is END: + depth -= 1 + yield subevent + test(subevent, ns, vs, updateonly=True) + elif result: + yield result + return Stream(_generate(), + serializer=getattr(stream, 'serializer', None)) + + def test(self, ignore_context=False): + """Returns a function that can be used to track whether the path matches + a specific stream event. + + The function returned expects the positional arguments ``event``, + ``namespaces`` and ``variables``. The first is a stream event, while the + latter two are a mapping of namespace prefixes to URIs, and a mapping + of variable names to values, respectively. In addition, the function + accepts an ``updateonly`` keyword argument that default to ``False``. If + it is set to ``True``, the function only updates its internal state, + but does not perform any tests or return a result. + + If the path matches the event, the function returns the match (for + example, a `START` or `TEXT` event.) Otherwise, it returns ``None``. + + >>> from genshi.input import XML + >>> xml = XML('<root><elem><child id="1"/></elem><child id="2"/></root>') + >>> test = Path('child').test() + >>> namespaces, variables = {}, {} + >>> for event in xml: + ... if test(event, namespaces, variables): + ... print('%s %r' % (event[0], event[1])) + START (QName('child'), Attrs([(QName('id'), u'2')])) + + :param ignore_context: if `True`, the path is interpreted like a pattern + in XSLT, meaning for example that it will match + at any depth + :return: a function that can be used to test individual events in a + stream against the path + :rtype: ``function`` + """ + tests = [s.test(ignore_context) for s in self.strategies] + if len(tests) == 1: + return tests[0] + + def _multi(event, namespaces, variables, updateonly=False): + retval = None + for test in tests: + val = test(event, namespaces, variables, updateonly=updateonly) + if retval is None: + retval = val + return retval + return _multi + + +class PathSyntaxError(Exception): + """Exception raised when an XPath expression is syntactically incorrect.""" + + def __init__(self, message, filename=None, lineno=-1, offset=-1): + if filename: + message = '%s (%s, line %d)' % (message, filename, lineno) + Exception.__init__(self, message) + self.filename = filename + self.lineno = lineno + self.offset = offset + + +class PathParser(object): + """Tokenizes and parses an XPath expression.""" + + _QUOTES = (("'", "'"), ('"', '"')) + _TOKENS = ('::', ':', '..', '.', '//', '/', '[', ']', '()', '(', ')', '@', + '=', '!=', '!', '|', ',', '>=', '>', '<=', '<', '$') + _tokenize = re.compile('("[^"]*")|(\'[^\']*\')|((?:\d+)?\.\d+)|(%s)|([^%s\s]+)|\s+' % ( + '|'.join([re.escape(t) for t in _TOKENS]), + ''.join([re.escape(t[0]) for t in _TOKENS]))).findall + + def __init__(self, text, filename=None, lineno=-1): + self.filename = filename + self.lineno = lineno + self.tokens = [t for t in [dqstr or sqstr or number or token or name + for dqstr, sqstr, number, token, name in + self._tokenize(text)] if t] + self.pos = 0 + + # Tokenizer + + @property + def at_end(self): + return self.pos == len(self.tokens) - 1 + + @property + def cur_token(self): + return self.tokens[self.pos] + + def next_token(self): + self.pos += 1 + return self.tokens[self.pos] + + def peek_token(self): + if not self.at_end: + return self.tokens[self.pos + 1] + return None + + # Recursive descent parser + + def parse(self): + """Parses the XPath expression and returns a list of location path + tests. + + For union expressions (such as `*|text()`), this function returns one + test for each operand in the union. For patch expressions that don't + use the union operator, the function always returns a list of size 1. + + Each path test in turn is a sequence of tests that correspond to the + location steps, each tuples of the form `(axis, testfunc, predicates)` + """ + paths = [self._location_path()] + while self.cur_token == '|': + self.next_token() + paths.append(self._location_path()) + if not self.at_end: + raise PathSyntaxError('Unexpected token %r after end of expression' + % self.cur_token, self.filename, self.lineno) + return paths + + def _location_path(self): + steps = [] + while True: + if self.cur_token.startswith('/'): + if not steps: + if self.cur_token == '//': + # hack to make //* match every node - also root + self.next_token() + axis, nodetest, predicates = self._location_step() + steps.append((DESCENDANT_OR_SELF, nodetest, + predicates)) + if self.at_end or not self.cur_token.startswith('/'): + break + continue + else: + raise PathSyntaxError('Absolute location paths not ' + 'supported', self.filename, + self.lineno) + elif self.cur_token == '//': + steps.append((DESCENDANT_OR_SELF, NodeTest(), [])) + self.next_token() + + axis, nodetest, predicates = self._location_step() + if not axis: + axis = CHILD + steps.append((axis, nodetest, predicates)) + if self.at_end or not self.cur_token.startswith('/'): + break + + return steps + + def _location_step(self): + if self.cur_token == '@': + axis = ATTRIBUTE + self.next_token() + elif self.cur_token == '.': + axis = SELF + elif self.cur_token == '..': + raise PathSyntaxError('Unsupported axis "parent"', self.filename, + self.lineno) + elif self.peek_token() == '::': + axis = Axis.forname(self.cur_token) + if axis is None: + raise PathSyntaxError('Unsupport axis "%s"' % axis, + self.filename, self.lineno) + self.next_token() + self.next_token() + else: + axis = None + nodetest = self._node_test(axis or CHILD) + predicates = [] + while self.cur_token == '[': + predicates.append(self._predicate()) + return axis, nodetest, predicates + + def _node_test(self, axis=None): + test = prefix = None + next_token = self.peek_token() + if next_token in ('(', '()'): # Node type test + test = self._node_type() + + elif next_token == ':': # Namespace prefix + prefix = self.cur_token + self.next_token() + localname = self.next_token() + if localname == '*': + test = QualifiedPrincipalTypeTest(axis, prefix) + else: + test = QualifiedNameTest(axis, prefix, localname) + + else: # Name test + if self.cur_token == '*': + test = PrincipalTypeTest(axis) + elif self.cur_token == '.': + test = NodeTest() + else: + test = LocalNameTest(axis, self.cur_token) + + if not self.at_end: + self.next_token() + return test + + def _node_type(self): + name = self.cur_token + self.next_token() + + args = [] + if self.cur_token != '()': + # The processing-instruction() function optionally accepts the + # name of the PI as argument, which must be a literal string + self.next_token() # ( + if self.cur_token != ')': + string = self.cur_token + if (string[0], string[-1]) in self._QUOTES: + string = string[1:-1] + args.append(string) + + cls = _nodetest_map.get(name) + if not cls: + raise PathSyntaxError('%s() not allowed here' % name, self.filename, + self.lineno) + return cls(*args) + + def _predicate(self): + assert self.cur_token == '[' + self.next_token() + expr = self._or_expr() + if self.cur_token != ']': + raise PathSyntaxError('Expected "]" to close predicate, ' + 'but found "%s"' % self.cur_token, + self.filename, self.lineno) + if not self.at_end: + self.next_token() + return expr + + def _or_expr(self): + expr = self._and_expr() + while self.cur_token == 'or': + self.next_token() + expr = OrOperator(expr, self._and_expr()) + return expr + + def _and_expr(self): + expr = self._equality_expr() + while self.cur_token == 'and': + self.next_token() + expr = AndOperator(expr, self._equality_expr()) + return expr + + def _equality_expr(self): + expr = self._relational_expr() + while self.cur_token in ('=', '!='): + op = _operator_map[self.cur_token] + self.next_token() + expr = op(expr, self._relational_expr()) + return expr + + def _relational_expr(self): + expr = self._sub_expr() + while self.cur_token in ('>', '>=', '<', '>='): + op = _operator_map[self.cur_token] + self.next_token() + expr = op(expr, self._sub_expr()) + return expr + + def _sub_expr(self): + token = self.cur_token + if token != '(': + return self._primary_expr() + self.next_token() + expr = self._or_expr() + if self.cur_token != ')': + raise PathSyntaxError('Expected ")" to close sub-expression, ' + 'but found "%s"' % self.cur_token, + self.filename, self.lineno) + self.next_token() + return expr + + def _primary_expr(self): + token = self.cur_token + if len(token) > 1 and (token[0], token[-1]) in self._QUOTES: + self.next_token() + return StringLiteral(token[1:-1]) + elif token[0].isdigit() or token[0] == '.': + self.next_token() + return NumberLiteral(as_float(token)) + elif token == '$': + token = self.next_token() + self.next_token() + return VariableReference(token) + elif not self.at_end and self.peek_token().startswith('('): + return self._function_call() + else: + axis = None + if token == '@': + axis = ATTRIBUTE + self.next_token() + return self._node_test(axis) + + def _function_call(self): + name = self.cur_token + if self.next_token() == '()': + args = [] + else: + assert self.cur_token == '(' + self.next_token() + args = [self._or_expr()] + while self.cur_token == ',': + self.next_token() + args.append(self._or_expr()) + if not self.cur_token == ')': + raise PathSyntaxError('Expected ")" to close function argument ' + 'list, but found "%s"' % self.cur_token, + self.filename, self.lineno) + self.next_token() + cls = _function_map.get(name) + if not cls: + raise PathSyntaxError('Unsupported function "%s"' % name, + self.filename, self.lineno) + return cls(*args) + + +# Type coercion + +def as_scalar(value): + """Convert value to a scalar. If a single element Attrs() object is passed + the value of the single attribute will be returned.""" + if isinstance(value, Attrs): + assert len(value) == 1 + return value[0][1] + else: + return value + +def as_float(value): + # FIXME - if value is a bool it will be coerced to 0.0 and consequently + # compared as a float. This is probably not ideal. + return float(as_scalar(value)) + +def as_long(value): + return long(as_scalar(value)) + +def as_string(value): + value = as_scalar(value) + if value is False: + return '' + return unicode(value) + +def as_bool(value): + return bool(as_scalar(value)) + + +# Node tests + +class PrincipalTypeTest(object): + """Node test that matches any event with the given principal type.""" + __slots__ = ['principal_type'] + def __init__(self, principal_type): + self.principal_type = principal_type + def __call__(self, kind, data, pos, namespaces, variables): + if kind is START: + if self.principal_type is ATTRIBUTE: + return data[1] or None + else: + return True + def __repr__(self): + return '*' + +class QualifiedPrincipalTypeTest(object): + """Node test that matches any event with the given principal type in a + specific namespace.""" + __slots__ = ['principal_type', 'prefix'] + def __init__(self, principal_type, prefix): + self.principal_type = principal_type + self.prefix = prefix + def __call__(self, kind, data, pos, namespaces, variables): + namespace = Namespace(namespaces.get(self.prefix)) + if kind is START: + if self.principal_type is ATTRIBUTE and data[1]: + return Attrs([(name, value) for name, value in data[1] + if name in namespace]) or None + else: + return data[0] in namespace + def __repr__(self): + return '%s:*' % self.prefix + +class LocalNameTest(object): + """Node test that matches any event with the given principal type and + local name. + """ + __slots__ = ['principal_type', 'name'] + def __init__(self, principal_type, name): + self.principal_type = principal_type + self.name = name + def __call__(self, kind, data, pos, namespaces, variables): + if kind is START: + if self.principal_type is ATTRIBUTE and self.name in data[1]: + return Attrs([(self.name, data[1].get(self.name))]) + else: + return data[0].localname == self.name + def __repr__(self): + return self.name + +class QualifiedNameTest(object): + """Node test that matches any event with the given principal type and + qualified name. + """ + __slots__ = ['principal_type', 'prefix', 'name'] + def __init__(self, principal_type, prefix, name): + self.principal_type = principal_type + self.prefix = prefix + self.name = name + def __call__(self, kind, data, pos, namespaces, variables): + qname = QName('%s}%s' % (namespaces.get(self.prefix), self.name)) + if kind is START: + if self.principal_type is ATTRIBUTE and qname in data[1]: + return Attrs([(self.name, data[1].get(self.name))]) + else: + return data[0] == qname + def __repr__(self): + return '%s:%s' % (self.prefix, self.name) + +class CommentNodeTest(object): + """Node test that matches any comment events.""" + __slots__ = [] + def __call__(self, kind, data, pos, namespaces, variables): + return kind is COMMENT + def __repr__(self): + return 'comment()' + +class NodeTest(object): + """Node test that matches any node.""" + __slots__ = [] + def __call__(self, kind, data, pos, namespaces, variables): + if kind is START: + return True + return kind, data, pos + def __repr__(self): + return 'node()' + +class ProcessingInstructionNodeTest(object): + """Node test that matches any processing instruction event.""" + __slots__ = ['target'] + def __init__(self, target=None): + self.target = target + def __call__(self, kind, data, pos, namespaces, variables): + return kind is PI and (not self.target or data[0] == self.target) + def __repr__(self): + arg = '' + if self.target: + arg = '"' + self.target + '"' + return 'processing-instruction(%s)' % arg + +class TextNodeTest(object): + """Node test that matches any text event.""" + __slots__ = [] + def __call__(self, kind, data, pos, namespaces, variables): + return kind is TEXT + def __repr__(self): + return 'text()' + +_nodetest_map = {'comment': CommentNodeTest, 'node': NodeTest, + 'processing-instruction': ProcessingInstructionNodeTest, + 'text': TextNodeTest} + +# Functions + +class Function(object): + """Base class for function nodes in XPath expressions.""" + +class BooleanFunction(Function): + """The `boolean` function, which converts its argument to a boolean + value. + """ + __slots__ = ['expr'] + _return_type = bool + def __init__(self, expr): + self.expr = expr + def __call__(self, kind, data, pos, namespaces, variables): + val = self.expr(kind, data, pos, namespaces, variables) + return as_bool(val) + def __repr__(self): + return 'boolean(%r)' % self.expr + +class CeilingFunction(Function): + """The `ceiling` function, which returns the nearest lower integer number + for the given number. + """ + __slots__ = ['number'] + def __init__(self, number): + self.number = number + def __call__(self, kind, data, pos, namespaces, variables): + number = self.number(kind, data, pos, namespaces, variables) + return ceil(as_float(number)) + def __repr__(self): + return 'ceiling(%r)' % self.number + +class ConcatFunction(Function): + """The `concat` function, which concatenates (joins) the variable number of + strings it gets as arguments. + """ + __slots__ = ['exprs'] + def __init__(self, *exprs): + self.exprs = exprs + def __call__(self, kind, data, pos, namespaces, variables): + strings = [] + for item in [expr(kind, data, pos, namespaces, variables) + for expr in self.exprs]: + strings.append(as_string(item)) + return ''.join(strings) + def __repr__(self): + return 'concat(%s)' % ', '.join([repr(expr) for expr in self.exprs]) + +class ContainsFunction(Function): + """The `contains` function, which returns whether a string contains a given + substring. + """ + __slots__ = ['string1', 'string2'] + def __init__(self, string1, string2): + self.string1 = string1 + self.string2 = string2 + def __call__(self, kind, data, pos, namespaces, variables): + string1 = self.string1(kind, data, pos, namespaces, variables) + string2 = self.string2(kind, data, pos, namespaces, variables) + return as_string(string2) in as_string(string1) + def __repr__(self): + return 'contains(%r, %r)' % (self.string1, self.string2) + +class MatchesFunction(Function): + """The `matches` function, which returns whether a string matches a regular + expression. + """ + __slots__ = ['string1', 'string2'] + flag_mapping = {'s': re.S, 'm': re.M, 'i': re.I, 'x': re.X} + + def __init__(self, string1, string2, flags=''): + self.string1 = string1 + self.string2 = string2 + self.flags = self._map_flags(flags) + def __call__(self, kind, data, pos, namespaces, variables): + string1 = as_string(self.string1(kind, data, pos, namespaces, variables)) + string2 = as_string(self.string2(kind, data, pos, namespaces, variables)) + return re.search(string2, string1, self.flags) + def _map_flags(self, flags): + return reduce(operator.or_, + [self.flag_map[flag] for flag in flags], re.U) + def __repr__(self): + return 'contains(%r, %r)' % (self.string1, self.string2) + +class FalseFunction(Function): + """The `false` function, which always returns the boolean `false` value.""" + __slots__ = [] + def __call__(self, kind, data, pos, namespaces, variables): + return False + def __repr__(self): + return 'false()' + +class FloorFunction(Function): + """The `ceiling` function, which returns the nearest higher integer number + for the given number. + """ + __slots__ = ['number'] + def __init__(self, number): + self.number = number + def __call__(self, kind, data, pos, namespaces, variables): + number = self.number(kind, data, pos, namespaces, variables) + return floor(as_float(number)) + def __repr__(self): + return 'floor(%r)' % self.number + +class LocalNameFunction(Function): + """The `local-name` function, which returns the local name of the current + element. + """ + __slots__ = [] + def __call__(self, kind, data, pos, namespaces, variables): + if kind is START: + return data[0].localname + def __repr__(self): + return 'local-name()' + +class NameFunction(Function): + """The `name` function, which returns the qualified name of the current + element. + """ + __slots__ = [] + def __call__(self, kind, data, pos, namespaces, variables): + if kind is START: + return data[0] + def __repr__(self): + return 'name()' + +class NamespaceUriFunction(Function): + """The `namespace-uri` function, which returns the namespace URI of the + current element. + """ + __slots__ = [] + def __call__(self, kind, data, pos, namespaces, variables): + if kind is START: + return data[0].namespace + def __repr__(self): + return 'namespace-uri()' + +class NotFunction(Function): + """The `not` function, which returns the negated boolean value of its + argument. + """ + __slots__ = ['expr'] + def __init__(self, expr): + self.expr = expr + def __call__(self, kind, data, pos, namespaces, variables): + return not as_bool(self.expr(kind, data, pos, namespaces, variables)) + def __repr__(self): + return 'not(%s)' % self.expr + +class NormalizeSpaceFunction(Function): + """The `normalize-space` function, which removes leading and trailing + whitespace in the given string, and replaces multiple adjacent whitespace + characters inside the string with a single space. + """ + __slots__ = ['expr'] + _normalize = re.compile(r'\s{2,}').sub + def __init__(self, expr): + self.expr = expr + def __call__(self, kind, data, pos, namespaces, variables): + string = self.expr(kind, data, pos, namespaces, variables) + return self._normalize(' ', as_string(string).strip()) + def __repr__(self): + return 'normalize-space(%s)' % repr(self.expr) + +class NumberFunction(Function): + """The `number` function that converts its argument to a number.""" + __slots__ = ['expr'] + def __init__(self, expr): + self.expr = expr + def __call__(self, kind, data, pos, namespaces, variables): + val = self.expr(kind, data, pos, namespaces, variables) + return as_float(val) + def __repr__(self): + return 'number(%r)' % self.expr + +class RoundFunction(Function): + """The `round` function, which returns the nearest integer number for the + given number. + """ + __slots__ = ['number'] + def __init__(self, number): + self.number = number + def __call__(self, kind, data, pos, namespaces, variables): + number = self.number(kind, data, pos, namespaces, variables) + return round(as_float(number)) + def __repr__(self): + return 'round(%r)' % self.number + +class StartsWithFunction(Function): + """The `starts-with` function that returns whether one string starts with + a given substring. + """ + __slots__ = ['string1', 'string2'] + def __init__(self, string1, string2): + self.string1 = string1 + self.string2 = string2 + def __call__(self, kind, data, pos, namespaces, variables): + string1 = self.string1(kind, data, pos, namespaces, variables) + string2 = self.string2(kind, data, pos, namespaces, variables) + return as_string(string1).startswith(as_string(string2)) + def __repr__(self): + return 'starts-with(%r, %r)' % (self.string1, self.string2) + +class StringLengthFunction(Function): + """The `string-length` function that returns the length of the given + string. + """ + __slots__ = ['expr'] + def __init__(self, expr): + self.expr = expr + def __call__(self, kind, data, pos, namespaces, variables): + string = self.expr(kind, data, pos, namespaces, variables) + return len(as_string(string)) + def __repr__(self): + return 'string-length(%r)' % self.expr + +class SubstringFunction(Function): + """The `substring` function that returns the part of a string that starts + at the given offset, and optionally limited to the given length. + """ + __slots__ = ['string', 'start', 'length'] + def __init__(self, string, start, length=None): + self.string = string + self.start = start + self.length = length + def __call__(self, kind, data, pos, namespaces, variables): + string = self.string(kind, data, pos, namespaces, variables) + start = self.start(kind, data, pos, namespaces, variables) + length = 0 + if self.length is not None: + length = self.length(kind, data, pos, namespaces, variables) + return string[as_long(start):len(as_string(string)) - as_long(length)] + def __repr__(self): + if self.length is not None: + return 'substring(%r, %r, %r)' % (self.string, self.start, + self.length) + else: + return 'substring(%r, %r)' % (self.string, self.start) + +class SubstringAfterFunction(Function): + """The `substring-after` function that returns the part of a string that + is found after the given substring. + """ + __slots__ = ['string1', 'string2'] + def __init__(self, string1, string2): + self.string1 = string1 + self.string2 = string2 + def __call__(self, kind, data, pos, namespaces, variables): + string1 = as_string(self.string1(kind, data, pos, namespaces, variables)) + string2 = as_string(self.string2(kind, data, pos, namespaces, variables)) + index = string1.find(string2) + if index >= 0: + return string1[index + len(string2):] + return '' + def __repr__(self): + return 'substring-after(%r, %r)' % (self.string1, self.string2) + +class SubstringBeforeFunction(Function): + """The `substring-before` function that returns the part of a string that + is found before the given substring. + """ + __slots__ = ['string1', 'string2'] + def __init__(self, string1, string2): + self.string1 = string1 + self.string2 = string2 + def __call__(self, kind, data, pos, namespaces, variables): + string1 = as_string(self.string1(kind, data, pos, namespaces, variables)) + string2 = as_string(self.string2(kind, data, pos, namespaces, variables)) + index = string1.find(string2) + if index >= 0: + return string1[:index] + return '' + def __repr__(self): + return 'substring-after(%r, %r)' % (self.string1, self.string2) + +class TranslateFunction(Function): + """The `translate` function that translates a set of characters in a + string to target set of characters. + """ + __slots__ = ['string', 'fromchars', 'tochars'] + def __init__(self, string, fromchars, tochars): + self.string = string + self.fromchars = fromchars + self.tochars = tochars + def __call__(self, kind, data, pos, namespaces, variables): + string = as_string(self.string(kind, data, pos, namespaces, variables)) + fromchars = as_string(self.fromchars(kind, data, pos, namespaces, variables)) + tochars = as_string(self.tochars(kind, data, pos, namespaces, variables)) + table = dict(zip([ord(c) for c in fromchars], + [ord(c) for c in tochars])) + return string.translate(table) + def __repr__(self): + return 'translate(%r, %r, %r)' % (self.string, self.fromchars, + self.tochars) + +class TrueFunction(Function): + """The `true` function, which always returns the boolean `true` value.""" + __slots__ = [] + def __call__(self, kind, data, pos, namespaces, variables): + return True + def __repr__(self): + return 'true()' + +_function_map = {'boolean': BooleanFunction, 'ceiling': CeilingFunction, + 'concat': ConcatFunction, 'contains': ContainsFunction, + 'matches': MatchesFunction, 'false': FalseFunction, 'floor': + FloorFunction, 'local-name': LocalNameFunction, 'name': + NameFunction, 'namespace-uri': NamespaceUriFunction, + 'normalize-space': NormalizeSpaceFunction, 'not': NotFunction, + 'number': NumberFunction, 'round': RoundFunction, + 'starts-with': StartsWithFunction, 'string-length': + StringLengthFunction, 'substring': SubstringFunction, + 'substring-after': SubstringAfterFunction, 'substring-before': + SubstringBeforeFunction, 'translate': TranslateFunction, + 'true': TrueFunction} + +# Literals & Variables + +class Literal(object): + """Abstract base class for literal nodes.""" + +class StringLiteral(Literal): + """A string literal node.""" + __slots__ = ['text'] + def __init__(self, text): + self.text = text + def __call__(self, kind, data, pos, namespaces, variables): + return self.text + def __repr__(self): + return '"%s"' % self.text + +class NumberLiteral(Literal): + """A number literal node.""" + __slots__ = ['number'] + def __init__(self, number): + self.number = number + def __call__(self, kind, data, pos, namespaces, variables): + return self.number + def __repr__(self): + return str(self.number) + +class VariableReference(Literal): + """A variable reference node.""" + __slots__ = ['name'] + def __init__(self, name): + self.name = name + def __call__(self, kind, data, pos, namespaces, variables): + return variables.get(self.name) + def __repr__(self): + return str(self.name) + +# Operators + +class AndOperator(object): + """The boolean operator `and`.""" + __slots__ = ['lval', 'rval'] + def __init__(self, lval, rval): + self.lval = lval + self.rval = rval + def __call__(self, kind, data, pos, namespaces, variables): + lval = as_bool(self.lval(kind, data, pos, namespaces, variables)) + if not lval: + return False + rval = self.rval(kind, data, pos, namespaces, variables) + return as_bool(rval) + def __repr__(self): + return '%s and %s' % (self.lval, self.rval) + +class EqualsOperator(object): + """The equality operator `=`.""" + __slots__ = ['lval', 'rval'] + def __init__(self, lval, rval): + self.lval = lval + self.rval = rval + def __call__(self, kind, data, pos, namespaces, variables): + lval = as_scalar(self.lval(kind, data, pos, namespaces, variables)) + rval = as_scalar(self.rval(kind, data, pos, namespaces, variables)) + return lval == rval + def __repr__(self): + return '%s=%s' % (self.lval, self.rval) + +class NotEqualsOperator(object): + """The equality operator `!=`.""" + __slots__ = ['lval', 'rval'] + def __init__(self, lval, rval): + self.lval = lval + self.rval = rval + def __call__(self, kind, data, pos, namespaces, variables): + lval = as_scalar(self.lval(kind, data, pos, namespaces, variables)) + rval = as_scalar(self.rval(kind, data, pos, namespaces, variables)) + return lval != rval + def __repr__(self): + return '%s!=%s' % (self.lval, self.rval) + +class OrOperator(object): + """The boolean operator `or`.""" + __slots__ = ['lval', 'rval'] + def __init__(self, lval, rval): + self.lval = lval + self.rval = rval + def __call__(self, kind, data, pos, namespaces, variables): + lval = as_bool(self.lval(kind, data, pos, namespaces, variables)) + if lval: + return True + rval = self.rval(kind, data, pos, namespaces, variables) + return as_bool(rval) + def __repr__(self): + return '%s or %s' % (self.lval, self.rval) + +class GreaterThanOperator(object): + """The relational operator `>` (greater than).""" + __slots__ = ['lval', 'rval'] + def __init__(self, lval, rval): + self.lval = lval + self.rval = rval + def __call__(self, kind, data, pos, namespaces, variables): + lval = self.lval(kind, data, pos, namespaces, variables) + rval = self.rval(kind, data, pos, namespaces, variables) + return as_float(lval) > as_float(rval) + def __repr__(self): + return '%s>%s' % (self.lval, self.rval) + +class GreaterThanOrEqualOperator(object): + """The relational operator `>=` (greater than or equal).""" + __slots__ = ['lval', 'rval'] + def __init__(self, lval, rval): + self.lval = lval + self.rval = rval + def __call__(self, kind, data, pos, namespaces, variables): + lval = self.lval(kind, data, pos, namespaces, variables) + rval = self.rval(kind, data, pos, namespaces, variables) + return as_float(lval) >= as_float(rval) + def __repr__(self): + return '%s>=%s' % (self.lval, self.rval) + +class LessThanOperator(object): + """The relational operator `<` (less than).""" + __slots__ = ['lval', 'rval'] + def __init__(self, lval, rval): + self.lval = lval + self.rval = rval + def __call__(self, kind, data, pos, namespaces, variables): + lval = self.lval(kind, data, pos, namespaces, variables) + rval = self.rval(kind, data, pos, namespaces, variables) + return as_float(lval) < as_float(rval) + def __repr__(self): + return '%s<%s' % (self.lval, self.rval) + +class LessThanOrEqualOperator(object): + """The relational operator `<=` (less than or equal).""" + __slots__ = ['lval', 'rval'] + def __init__(self, lval, rval): + self.lval = lval + self.rval = rval + def __call__(self, kind, data, pos, namespaces, variables): + lval = self.lval(kind, data, pos, namespaces, variables) + rval = self.rval(kind, data, pos, namespaces, variables) + return as_float(lval) <= as_float(rval) + def __repr__(self): + return '%s<=%s' % (self.lval, self.rval) + +_operator_map = {'=': EqualsOperator, '!=': NotEqualsOperator, + '>': GreaterThanOperator, '>=': GreaterThanOrEqualOperator, + '<': LessThanOperator, '>=': LessThanOrEqualOperator} + + +_DOTSLASHSLASH = (DESCENDANT_OR_SELF, PrincipalTypeTest(None), ()) +_DOTSLASH = (SELF, PrincipalTypeTest(None), ()) diff --git a/websdk/genshi/template/__init__.py b/websdk/genshi/template/__init__.py new file mode 100644 index 0000000..47a9310 --- /dev/null +++ b/websdk/genshi/template/__init__.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2006-2007 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://genshi.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://genshi.edgewall.org/log/. + +"""Implementation of the template engine.""" + +from genshi.template.base import Context, Template, TemplateError, \ + TemplateRuntimeError, TemplateSyntaxError, \ + BadDirectiveError +from genshi.template.loader import TemplateLoader, TemplateNotFound +from genshi.template.markup import MarkupTemplate +from genshi.template.text import TextTemplate, OldTextTemplate, NewTextTemplate + +__docformat__ = 'restructuredtext en' diff --git a/websdk/genshi/template/_ast24.py b/websdk/genshi/template/_ast24.py new file mode 100644 index 0000000..05d241b --- /dev/null +++ b/websdk/genshi/template/_ast24.py @@ -0,0 +1,446 @@ +# Generated automatically, please do not edit +# Generator can be found in Genshi SVN, scripts/ast-generator.py + +__version__ = 43614 + +class AST(object): + _fields = None + __doc__ = None + +class operator(AST): + _fields = None + __doc__ = None + _attributes = [] +class Add(operator): + _fields = None + __doc__ = None + +class boolop(AST): + _fields = None + __doc__ = None + _attributes = [] +class And(boolop): + _fields = None + __doc__ = None + +class stmt(AST): + _fields = None + __doc__ = None + _attributes = ['lineno', 'col_offset'] +class Assert(stmt): + _fields = ('test', 'msg') + __doc__ = None + +class Assign(stmt): + _fields = ('targets', 'value') + __doc__ = None + +class expr(AST): + _fields = None + __doc__ = None + _attributes = ['lineno', 'col_offset'] +class Attribute(expr): + _fields = ('value', 'attr', 'ctx') + __doc__ = None + +class AugAssign(stmt): + _fields = ('target', 'op', 'value') + __doc__ = None + +class expr_context(AST): + _fields = None + __doc__ = None + _attributes = [] +class AugLoad(expr_context): + _fields = None + __doc__ = None + +class AugStore(expr_context): + _fields = None + __doc__ = None + +class BinOp(expr): + _fields = ('left', 'op', 'right') + __doc__ = None + +class BitAnd(operator): + _fields = None + __doc__ = None + +class BitOr(operator): + _fields = None + __doc__ = None + +class BitXor(operator): + _fields = None + __doc__ = None + +class BoolOp(expr): + _fields = ('op', 'values') + __doc__ = None + +class Break(stmt): + _fields = None + __doc__ = None + +class Call(expr): + _fields = ('func', 'args', 'keywords', 'starargs', 'kwargs') + __doc__ = None + +class ClassDef(stmt): + _fields = ('name', 'bases', 'body') + __doc__ = None + +class Compare(expr): + _fields = ('left', 'ops', 'comparators') + __doc__ = None + +class Continue(stmt): + _fields = None + __doc__ = None + +class Del(expr_context): + _fields = None + __doc__ = None + +class Delete(stmt): + _fields = ('targets',) + __doc__ = None + +class Dict(expr): + _fields = ('keys', 'values') + __doc__ = None + +class Div(operator): + _fields = None + __doc__ = None + +class slice(AST): + _fields = None + __doc__ = None + _attributes = [] +class Ellipsis(slice): + _fields = None + __doc__ = None + +class cmpop(AST): + _fields = None + __doc__ = None + _attributes = [] +class Eq(cmpop): + _fields = None + __doc__ = None + +class Exec(stmt): + _fields = ('body', 'globals', 'locals') + __doc__ = None + +class Expr(stmt): + _fields = ('value',) + __doc__ = None + +class mod(AST): + _fields = None + __doc__ = None + _attributes = [] +class Expression(mod): + _fields = ('body',) + __doc__ = None + +class ExtSlice(slice): + _fields = ('dims',) + __doc__ = None + +class FloorDiv(operator): + _fields = None + __doc__ = None + +class For(stmt): + _fields = ('target', 'iter', 'body', 'orelse') + __doc__ = None + +class FunctionDef(stmt): + _fields = ('name', 'args', 'body', 'decorators') + __doc__ = None + +class GeneratorExp(expr): + _fields = ('elt', 'generators') + __doc__ = None + +class Global(stmt): + _fields = ('names',) + __doc__ = None + +class Gt(cmpop): + _fields = None + __doc__ = None + +class GtE(cmpop): + _fields = None + __doc__ = None + +class If(stmt): + _fields = ('test', 'body', 'orelse') + __doc__ = None + +class IfExp(expr): + _fields = ('test', 'body', 'orelse') + __doc__ = None + +class Import(stmt): + _fields = ('names',) + __doc__ = None + +class ImportFrom(stmt): + _fields = ('module', 'names', 'level') + __doc__ = None + +class In(cmpop): + _fields = None + __doc__ = None + +class Index(slice): + _fields = ('value',) + __doc__ = None + +class Interactive(mod): + _fields = ('body',) + __doc__ = None + +class unaryop(AST): + _fields = None + __doc__ = None + _attributes = [] +class Invert(unaryop): + _fields = None + __doc__ = None + +class Is(cmpop): + _fields = None + __doc__ = None + +class IsNot(cmpop): + _fields = None + __doc__ = None + +class LShift(operator): + _fields = None + __doc__ = None + +class Lambda(expr): + _fields = ('args', 'body') + __doc__ = None + +class List(expr): + _fields = ('elts', 'ctx') + __doc__ = None + +class ListComp(expr): + _fields = ('elt', 'generators') + __doc__ = None + +class Load(expr_context): + _fields = None + __doc__ = None + +class Lt(cmpop): + _fields = None + __doc__ = None + +class LtE(cmpop): + _fields = None + __doc__ = None + +class Mod(operator): + _fields = None + __doc__ = None + +class Module(mod): + _fields = ('body',) + __doc__ = None + +class Mult(operator): + _fields = None + __doc__ = None + +class Name(expr): + _fields = ('id', 'ctx') + __doc__ = None + +class Not(unaryop): + _fields = None + __doc__ = None + +class NotEq(cmpop): + _fields = None + __doc__ = None + +class NotIn(cmpop): + _fields = None + __doc__ = None + +class Num(expr): + _fields = ('n',) + __doc__ = None + +class Or(boolop): + _fields = None + __doc__ = None + +class Param(expr_context): + _fields = None + __doc__ = None + +class Pass(stmt): + _fields = None + __doc__ = None + +class Pow(operator): + _fields = None + __doc__ = None + +class Print(stmt): + _fields = ('dest', 'values', 'nl') + __doc__ = None + +class RShift(operator): + _fields = None + __doc__ = None + +class Raise(stmt): + _fields = ('type', 'inst', 'tback') + __doc__ = None + +class Repr(expr): + _fields = ('value',) + __doc__ = None + +class Return(stmt): + _fields = ('value',) + __doc__ = None + +class Slice(slice): + _fields = ('lower', 'upper', 'step') + __doc__ = None + +class Store(expr_context): + _fields = None + __doc__ = None + +class Str(expr): + _fields = ('s',) + __doc__ = None + +class Sub(operator): + _fields = None + __doc__ = None + +class Subscript(expr): + _fields = ('value', 'slice', 'ctx') + __doc__ = None + +class Suite(mod): + _fields = ('body',) + __doc__ = None + +class TryExcept(stmt): + _fields = ('body', 'handlers', 'orelse') + __doc__ = None + +class TryFinally(stmt): + _fields = ('body', 'finalbody') + __doc__ = None + +class Tuple(expr): + _fields = ('elts', 'ctx') + __doc__ = None + +class UAdd(unaryop): + _fields = None + __doc__ = None + +class USub(unaryop): + _fields = None + __doc__ = None + +class UnaryOp(expr): + _fields = ('op', 'operand') + __doc__ = None + +class While(stmt): + _fields = ('test', 'body', 'orelse') + __doc__ = None + +class With(stmt): + _fields = ('context_expr', 'optional_vars', 'body') + __doc__ = None + +class Yield(expr): + _fields = ('value',) + __doc__ = None + +class alias(AST): + _fields = ('name', 'asname') + __doc__ = None + +class arguments(AST): + _fields = ('args', 'vararg', 'kwarg', 'defaults') + __doc__ = None + +class boolop(AST): + _fields = None + __doc__ = None + _attributes = [] + +class cmpop(AST): + _fields = None + __doc__ = None + _attributes = [] + +class comprehension(AST): + _fields = ('target', 'iter', 'ifs') + __doc__ = None + +class excepthandler(AST): + _fields = ('type', 'name', 'body', 'lineno', 'col_offset') + __doc__ = None + +class expr(AST): + _fields = None + __doc__ = None + _attributes = ['lineno', 'col_offset'] + +class expr_context(AST): + _fields = None + __doc__ = None + _attributes = [] + +class keyword(AST): + _fields = ('arg', 'value') + __doc__ = None + +class mod(AST): + _fields = None + __doc__ = None + _attributes = [] + +class operator(AST): + _fields = None + __doc__ = None + _attributes = [] + +class slice(AST): + _fields = None + __doc__ = None + _attributes = [] + +class stmt(AST): + _fields = None + __doc__ = None + _attributes = ['lineno', 'col_offset'] + +class unaryop(AST): + _fields = None + __doc__ = None + _attributes = [] + diff --git a/websdk/genshi/template/ast24.py b/websdk/genshi/template/ast24.py new file mode 100644 index 0000000..af6dce9 --- /dev/null +++ b/websdk/genshi/template/ast24.py @@ -0,0 +1,505 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2008-2009 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://genshi.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://genshi.edgewall.org/log/. + +"""Emulation of the proper abstract syntax tree API for Python 2.4.""" + +import compiler +import compiler.ast + +from genshi.template import _ast24 as _ast + +__all__ = ['_ast', 'parse'] +__docformat__ = 'restructuredtext en' + + +def _new(cls, *args, **kwargs): + ret = cls() + if ret._fields: + for attr, value in zip(ret._fields, args): + if attr in kwargs: + raise ValueError('Field set both in args and kwargs') + setattr(ret, attr, value) + for attr in kwargs: + if (getattr(ret, '_fields', None) and attr in ret._fields) \ + or (getattr(ret, '_attributes', None) and + attr in ret._attributes): + setattr(ret, attr, kwargs[attr]) + return ret + + +class ASTUpgrader(object): + """Transformer changing structure of Python 2.4 ASTs to + Python 2.5 ones. + + Transforms ``compiler.ast`` Abstract Syntax Tree to builtin ``_ast``. + It can use fake`` _ast`` classes and this way allow ``_ast`` emulation + in Python 2.4. + """ + + def __init__(self): + self.out_flags = None + self.lines = [-1] + + def _new(self, *args, **kwargs): + return _new(lineno = self.lines[-1], *args, **kwargs) + + def visit(self, node): + if node is None: + return None + if type(node) is tuple: + return tuple([self.visit(n) for n in node]) + lno = getattr(node, 'lineno', None) + if lno is not None: + self.lines.append(lno) + visitor = getattr(self, 'visit_%s' % node.__class__.__name__, None) + if visitor is None: + raise Exception('Unhandled node type %r' % type(node)) + + retval = visitor(node) + if lno is not None: + self.lines.pop() + return retval + + def visit_Module(self, node): + body = self.visit(node.node) + if node.doc: + body = [self._new(_ast.Expr, self._new(_ast.Str, node.doc))] + body + return self._new(_ast.Module, body) + + def visit_Expression(self, node): + return self._new(_ast.Expression, self.visit(node.node)) + + def _extract_args(self, node): + tab = node.argnames[:] + if node.flags & compiler.ast.CO_VARKEYWORDS: + kwarg = tab[-1] + tab = tab[:-1] + else: + kwarg = None + + if node.flags & compiler.ast.CO_VARARGS: + vararg = tab[-1] + tab = tab[:-1] + else: + vararg = None + + def _tup(t): + if isinstance(t, str): + return self._new(_ast.Name, t, _ast.Store()) + elif isinstance(t, tuple): + elts = [_tup(x) for x in t] + return self._new(_ast.Tuple, elts, _ast.Store()) + else: + raise NotImplemented + + args = [] + for arg in tab: + if isinstance(arg, str): + args.append(self._new(_ast.Name, arg, _ast.Param())) + elif isinstance(arg, tuple): + args.append(_tup(arg)) + else: + assert False, node.__class__ + + defaults = [self.visit(d) for d in node.defaults] + return self._new(_ast.arguments, args, vararg, kwarg, defaults) + + + def visit_Function(self, node): + if getattr(node, 'decorators', ()): + decorators = [self.visit(d) for d in node.decorators.nodes] + else: + decorators = [] + + args = self._extract_args(node) + body = self.visit(node.code) + if node.doc: + body = [self._new(_ast.Expr, self._new(_ast.Str, node.doc))] + body + return self._new(_ast.FunctionDef, node.name, args, body, decorators) + + def visit_Class(self, node): + #self.name_types.append(_ast.Load) + bases = [self.visit(b) for b in node.bases] + #self.name_types.pop() + body = self.visit(node.code) + if node.doc: + body = [self._new(_ast.Expr, self._new(_ast.Str, node.doc))] + body + return self._new(_ast.ClassDef, node.name, bases, body) + + def visit_Return(self, node): + return self._new(_ast.Return, self.visit(node.value)) + + def visit_Assign(self, node): + #self.name_types.append(_ast.Store) + targets = [self.visit(t) for t in node.nodes] + #self.name_types.pop() + return self._new(_ast.Assign, targets, self.visit(node.expr)) + + aug_operators = { + '+=': _ast.Add, + '/=': _ast.Div, + '//=': _ast.FloorDiv, + '<<=': _ast.LShift, + '%=': _ast.Mod, + '*=': _ast.Mult, + '**=': _ast.Pow, + '>>=': _ast.RShift, + '-=': _ast.Sub, + } + + def visit_AugAssign(self, node): + target = self.visit(node.node) + + # Because it's AugAssign target can't be list nor tuple + # so we only have to change context of one node + target.ctx = _ast.Store() + op = self.aug_operators[node.op]() + return self._new(_ast.AugAssign, target, op, self.visit(node.expr)) + + def _visit_Print(nl): + def _visit(self, node): + values = [self.visit(v) for v in node.nodes] + return self._new(_ast.Print, self.visit(node.dest), values, nl) + return _visit + + visit_Print = _visit_Print(False) + visit_Printnl = _visit_Print(True) + del _visit_Print + + def visit_For(self, node): + return self._new(_ast.For, self.visit(node.assign), self.visit(node.list), + self.visit(node.body), self.visit(node.else_)) + + def visit_While(self, node): + return self._new(_ast.While, self.visit(node.test), self.visit(node.body), + self.visit(node.else_)) + + def visit_If(self, node): + def _level(tests, else_): + test = self.visit(tests[0][0]) + body = self.visit(tests[0][1]) + if len(tests) == 1: + orelse = self.visit(else_) + else: + orelse = [_level(tests[1:], else_)] + return self._new(_ast.If, test, body, orelse) + return _level(node.tests, node.else_) + + def visit_With(self, node): + return self._new(_ast.With, self.visit(node.expr), + self.visit(node.vars), self.visit(node.body)) + + def visit_Raise(self, node): + return self._new(_ast.Raise, self.visit(node.expr1), + self.visit(node.expr2), self.visit(node.expr3)) + + def visit_TryExcept(self, node): + handlers = [] + for type, name, body in node.handlers: + handlers.append(self._new(_ast.excepthandler, self.visit(type), + self.visit(name), self.visit(body))) + return self._new(_ast.TryExcept, self.visit(node.body), + handlers, self.visit(node.else_)) + + def visit_TryFinally(self, node): + return self._new(_ast.TryFinally, self.visit(node.body), + self.visit(node.final)) + + def visit_Assert(self, node): + return self._new(_ast.Assert, self.visit(node.test), self.visit(node.fail)) + + def visit_Import(self, node): + names = [self._new(_ast.alias, n[0], n[1]) for n in node.names] + return self._new(_ast.Import, names) + + def visit_From(self, node): + names = [self._new(_ast.alias, n[0], n[1]) for n in node.names] + return self._new(_ast.ImportFrom, node.modname, names, 0) + + def visit_Exec(self, node): + return self._new(_ast.Exec, self.visit(node.expr), + self.visit(node.locals), self.visit(node.globals)) + + def visit_Global(self, node): + return self._new(_ast.Global, node.names[:]) + + def visit_Discard(self, node): + return self._new(_ast.Expr, self.visit(node.expr)) + + def _map_class(to): + def _visit(self, node): + return self._new(to) + return _visit + + visit_Pass = _map_class(_ast.Pass) + visit_Break = _map_class(_ast.Break) + visit_Continue = _map_class(_ast.Continue) + + def _visit_BinOperator(opcls): + def _visit(self, node): + return self._new(_ast.BinOp, self.visit(node.left), + opcls(), self.visit(node.right)) + return _visit + visit_Add = _visit_BinOperator(_ast.Add) + visit_Div = _visit_BinOperator(_ast.Div) + visit_FloorDiv = _visit_BinOperator(_ast.FloorDiv) + visit_LeftShift = _visit_BinOperator(_ast.LShift) + visit_Mod = _visit_BinOperator(_ast.Mod) + visit_Mul = _visit_BinOperator(_ast.Mult) + visit_Power = _visit_BinOperator(_ast.Pow) + visit_RightShift = _visit_BinOperator(_ast.RShift) + visit_Sub = _visit_BinOperator(_ast.Sub) + del _visit_BinOperator + + def _visit_BitOperator(opcls): + def _visit(self, node): + def _make(nodes): + if len(nodes) == 1: + return self.visit(nodes[0]) + left = _make(nodes[:-1]) + right = self.visit(nodes[-1]) + return self._new(_ast.BinOp, left, opcls(), right) + return _make(node.nodes) + return _visit + visit_Bitand = _visit_BitOperator(_ast.BitAnd) + visit_Bitor = _visit_BitOperator(_ast.BitOr) + visit_Bitxor = _visit_BitOperator(_ast.BitXor) + del _visit_BitOperator + + def _visit_UnaryOperator(opcls): + def _visit(self, node): + return self._new(_ast.UnaryOp, opcls(), self.visit(node.expr)) + return _visit + + visit_Invert = _visit_UnaryOperator(_ast.Invert) + visit_Not = _visit_UnaryOperator(_ast.Not) + visit_UnaryAdd = _visit_UnaryOperator(_ast.UAdd) + visit_UnarySub = _visit_UnaryOperator(_ast.USub) + del _visit_UnaryOperator + + def _visit_BoolOperator(opcls): + def _visit(self, node): + values = [self.visit(n) for n in node.nodes] + return self._new(_ast.BoolOp, opcls(), values) + return _visit + visit_And = _visit_BoolOperator(_ast.And) + visit_Or = _visit_BoolOperator(_ast.Or) + del _visit_BoolOperator + + cmp_operators = { + '==': _ast.Eq, + '!=': _ast.NotEq, + '<': _ast.Lt, + '<=': _ast.LtE, + '>': _ast.Gt, + '>=': _ast.GtE, + 'is': _ast.Is, + 'is not': _ast.IsNot, + 'in': _ast.In, + 'not in': _ast.NotIn, + } + + def visit_Compare(self, node): + left = self.visit(node.expr) + ops = [] + comparators = [] + for optype, expr in node.ops: + ops.append(self.cmp_operators[optype]()) + comparators.append(self.visit(expr)) + return self._new(_ast.Compare, left, ops, comparators) + + def visit_Lambda(self, node): + args = self._extract_args(node) + body = self.visit(node.code) + return self._new(_ast.Lambda, args, body) + + def visit_IfExp(self, node): + return self._new(_ast.IfExp, self.visit(node.test), self.visit(node.then), + self.visit(node.else_)) + + def visit_Dict(self, node): + keys = [self.visit(x[0]) for x in node.items] + values = [self.visit(x[1]) for x in node.items] + return self._new(_ast.Dict, keys, values) + + def visit_ListComp(self, node): + generators = [self.visit(q) for q in node.quals] + return self._new(_ast.ListComp, self.visit(node.expr), generators) + + def visit_GenExprInner(self, node): + generators = [self.visit(q) for q in node.quals] + return self._new(_ast.GeneratorExp, self.visit(node.expr), generators) + + def visit_GenExpr(self, node): + return self.visit(node.code) + + def visit_GenExprFor(self, node): + ifs = [self.visit(i) for i in node.ifs] + return self._new(_ast.comprehension, self.visit(node.assign), + self.visit(node.iter), ifs) + + def visit_ListCompFor(self, node): + ifs = [self.visit(i) for i in node.ifs] + return self._new(_ast.comprehension, self.visit(node.assign), + self.visit(node.list), ifs) + + def visit_GenExprIf(self, node): + return self.visit(node.test) + visit_ListCompIf = visit_GenExprIf + + def visit_Yield(self, node): + return self._new(_ast.Yield, self.visit(node.value)) + + def visit_CallFunc(self, node): + args = [] + keywords = [] + for arg in node.args: + if isinstance(arg, compiler.ast.Keyword): + keywords.append(self._new(_ast.keyword, arg.name, + self.visit(arg.expr))) + else: + args.append(self.visit(arg)) + return self._new(_ast.Call, self.visit(node.node), args, keywords, + self.visit(node.star_args), self.visit(node.dstar_args)) + + def visit_Backquote(self, node): + return self._new(_ast.Repr, self.visit(node.expr)) + + def visit_Const(self, node): + if node.value is None: # appears in slices + return None + elif isinstance(node.value, basestring): + return self._new(_ast.Str, node.value) + else: + return self._new(_ast.Num, node.value) + + def visit_Name(self, node): + return self._new(_ast.Name, node.name, _ast.Load()) + + def visit_Getattr(self, node): + return self._new(_ast.Attribute, self.visit(node.expr), node.attrname, + _ast.Load()) + + def visit_Tuple(self, node): + nodes = [self.visit(n) for n in node.nodes] + return self._new(_ast.Tuple, nodes, _ast.Load()) + + def visit_List(self, node): + nodes = [self.visit(n) for n in node.nodes] + return self._new(_ast.List, nodes, _ast.Load()) + + def get_ctx(self, flags): + if flags == 'OP_DELETE': + return _ast.Del() + elif flags == 'OP_APPLY': + return _ast.Load() + elif flags == 'OP_ASSIGN': + return _ast.Store() + else: + # FIXME Exception here + assert False, repr(flags) + + def visit_AssName(self, node): + self.out_flags = node.flags + ctx = self.get_ctx(node.flags) + return self._new(_ast.Name, node.name, ctx) + + def visit_AssAttr(self, node): + self.out_flags = node.flags + ctx = self.get_ctx(node.flags) + return self._new(_ast.Attribute, self.visit(node.expr), + node.attrname, ctx) + + def _visit_AssCollection(cls): + def _visit(self, node): + flags = None + elts = [] + for n in node.nodes: + elts.append(self.visit(n)) + if flags is None: + flags = self.out_flags + else: + assert flags == self.out_flags + self.out_flags = flags + ctx = self.get_ctx(flags) + return self._new(cls, elts, ctx) + return _visit + + visit_AssList = _visit_AssCollection(_ast.List) + visit_AssTuple = _visit_AssCollection(_ast.Tuple) + del _visit_AssCollection + + def visit_Slice(self, node): + lower = self.visit(node.lower) + upper = self.visit(node.upper) + ctx = self.get_ctx(node.flags) + self.out_flags = node.flags + return self._new(_ast.Subscript, self.visit(node.expr), + self._new(_ast.Slice, lower, upper, None), ctx) + + def visit_Subscript(self, node): + ctx = self.get_ctx(node.flags) + subs = [self.visit(s) for s in node.subs] + + advanced = (_ast.Slice, _ast.Ellipsis) + slices = [] + nonindex = False + for sub in subs: + if isinstance(sub, advanced): + nonindex = True + slices.append(sub) + else: + slices.append(self._new(_ast.Index, sub)) + if len(slices) == 1: + slice = slices[0] + elif nonindex: + slice = self._new(_ast.ExtSlice, slices) + else: + slice = self._new(_ast.Tuple, slices, _ast.Load()) + + self.out_flags = node.flags + return self._new(_ast.Subscript, self.visit(node.expr), slice, ctx) + + def visit_Sliceobj(self, node): + a = [self.visit(n) for n in node.nodes + [None]*(3 - len(node.nodes))] + return self._new(_ast.Slice, a[0], a[1], a[2]) + + def visit_Ellipsis(self, node): + return self._new(_ast.Ellipsis) + + def visit_Stmt(self, node): + def _check_del(n): + # del x is just AssName('x', 'OP_DELETE') + # we want to transform it to Delete([Name('x', Del())]) + dcls = (_ast.Name, _ast.List, _ast.Subscript, _ast.Attribute) + if isinstance(n, dcls) and isinstance(n.ctx, _ast.Del): + return self._new(_ast.Delete, [n]) + elif isinstance(n, _ast.Tuple) and isinstance(n.ctx, _ast.Del): + # unpack last tuple to avoid making del (x, y, z,); + # out of del x, y, z; (there's no difference between + # this two in compiler.ast) + return self._new(_ast.Delete, n.elts) + else: + return n + def _keep(n): + if isinstance(n, _ast.Expr) and n.value is None: + return False + else: + return True + return [s for s in [_check_del(self.visit(n)) for n in node.nodes] + if _keep(s)] + + +def parse(source, mode): + node = compiler.parse(source, mode) + return ASTUpgrader().visit(node) diff --git a/websdk/genshi/template/astutil.py b/websdk/genshi/template/astutil.py new file mode 100644 index 0000000..c3ad107 --- /dev/null +++ b/websdk/genshi/template/astutil.py @@ -0,0 +1,784 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2008-2010 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://genshi.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://genshi.edgewall.org/log/. + +"""Support classes for generating code from abstract syntax trees.""" + +try: + import _ast +except ImportError: + from genshi.template.ast24 import _ast, parse +else: + def parse(source, mode): + return compile(source, '', mode, _ast.PyCF_ONLY_AST) + + +__docformat__ = 'restructuredtext en' + + +class ASTCodeGenerator(object): + """General purpose base class for AST transformations. + + Every visitor method can be overridden to return an AST node that has been + altered or replaced in some way. + """ + def __init__(self, tree): + self.lines_info = [] + self.line_info = None + self.code = '' + self.line = None + self.last = None + self.indent = 0 + self.blame_stack = [] + self.visit(tree) + if self.line.strip(): + self.code += self.line + '\n' + self.lines_info.append(self.line_info) + self.line = None + self.line_info = None + + def _change_indent(self, delta): + self.indent += delta + + def _new_line(self): + if self.line is not None: + self.code += self.line + '\n' + self.lines_info.append(self.line_info) + self.line = ' '*4*self.indent + if len(self.blame_stack) == 0: + self.line_info = [] + self.last = None + else: + self.line_info = [(0, self.blame_stack[-1],)] + self.last = self.blame_stack[-1] + + def _write(self, s): + if len(s) == 0: + return + if len(self.blame_stack) == 0: + if self.last is not None: + self.last = None + self.line_info.append((len(self.line), self.last)) + else: + if self.last != self.blame_stack[-1]: + self.last = self.blame_stack[-1] + self.line_info.append((len(self.line), self.last)) + self.line += s + + def visit(self, node): + if node is None: + return None + if type(node) is tuple: + return tuple([self.visit(n) for n in node]) + try: + self.blame_stack.append((node.lineno, node.col_offset,)) + info = True + except AttributeError: + info = False + visitor = getattr(self, 'visit_%s' % node.__class__.__name__, None) + if visitor is None: + raise Exception('Unhandled node type %r' % type(node)) + ret = visitor(node) + if info: + self.blame_stack.pop() + return ret + + def visit_Module(self, node): + for n in node.body: + self.visit(n) + visit_Interactive = visit_Module + visit_Suite = visit_Module + + def visit_Expression(self, node): + self._new_line() + return self.visit(node.body) + + # arguments = (expr* args, identifier? vararg, + # identifier? kwarg, expr* defaults) + def visit_arguments(self, node): + first = True + no_default_count = len(node.args) - len(node.defaults) + for i, arg in enumerate(node.args): + if not first: + self._write(', ') + else: + first = False + self.visit(arg) + if i >= no_default_count: + self._write('=') + self.visit(node.defaults[i - no_default_count]) + if getattr(node, 'vararg', None): + if not first: + self._write(', ') + else: + first = False + self._write('*' + node.vararg) + if getattr(node, 'kwarg', None): + if not first: + self._write(', ') + else: + first = False + self._write('**' + node.kwarg) + + # FunctionDef(identifier name, arguments args, + # stmt* body, expr* decorator_list) + def visit_FunctionDef(self, node): + decarators = () + if hasattr(node, 'decorator_list'): + decorators = getattr(node, 'decorator_list') + else: # different name in earlier Python versions + decorators = getattr(node, 'decorators', ()) + for decorator in decorators: + self._new_line() + self._write('@') + self.visit(decorator) + self._new_line() + self._write('def ' + node.name + '(') + self.visit(node.args) + self._write('):') + self._change_indent(1) + for statement in node.body: + self.visit(statement) + self._change_indent(-1) + + # ClassDef(identifier name, expr* bases, stmt* body) + def visit_ClassDef(self, node): + self._new_line() + self._write('class ' + node.name) + if node.bases: + self._write('(') + self.visit(node.bases[0]) + for base in node.bases[1:]: + self._write(', ') + self.visit(base) + self._write(')') + self._write(':') + self._change_indent(1) + for statement in node.body: + self.visit(statement) + self._change_indent(-1) + + # Return(expr? value) + def visit_Return(self, node): + self._new_line() + self._write('return') + if getattr(node, 'value', None): + self._write(' ') + self.visit(node.value) + + # Delete(expr* targets) + def visit_Delete(self, node): + self._new_line() + self._write('del ') + self.visit(node.targets[0]) + for target in node.targets[1:]: + self._write(', ') + self.visit(target) + + # Assign(expr* targets, expr value) + def visit_Assign(self, node): + self._new_line() + for target in node.targets: + self.visit(target) + self._write(' = ') + self.visit(node.value) + + # AugAssign(expr target, operator op, expr value) + def visit_AugAssign(self, node): + self._new_line() + self.visit(node.target) + self._write(' ' + self.binary_operators[node.op.__class__] + '= ') + self.visit(node.value) + + # Print(expr? dest, expr* values, bool nl) + def visit_Print(self, node): + self._new_line() + self._write('print') + if getattr(node, 'dest', None): + self._write(' >> ') + self.visit(node.dest) + if getattr(node, 'values', None): + self._write(', ') + else: + self._write(' ') + if getattr(node, 'values', None): + self.visit(node.values[0]) + for value in node.values[1:]: + self._write(', ') + self.visit(value) + if not node.nl: + self._write(',') + + # For(expr target, expr iter, stmt* body, stmt* orelse) + def visit_For(self, node): + self._new_line() + self._write('for ') + self.visit(node.target) + self._write(' in ') + self.visit(node.iter) + self._write(':') + self._change_indent(1) + for statement in node.body: + self.visit(statement) + self._change_indent(-1) + if getattr(node, 'orelse', None): + self._new_line() + self._write('else:') + self._change_indent(1) + for statement in node.orelse: + self.visit(statement) + self._change_indent(-1) + + # While(expr test, stmt* body, stmt* orelse) + def visit_While(self, node): + self._new_line() + self._write('while ') + self.visit(node.test) + self._write(':') + self._change_indent(1) + for statement in node.body: + self.visit(statement) + self._change_indent(-1) + if getattr(node, 'orelse', None): + self._new_line() + self._write('else:') + self._change_indent(1) + for statement in node.orelse: + self.visit(statement) + self._change_indent(-1) + + # If(expr test, stmt* body, stmt* orelse) + def visit_If(self, node): + self._new_line() + self._write('if ') + self.visit(node.test) + self._write(':') + self._change_indent(1) + for statement in node.body: + self.visit(statement) + self._change_indent(-1) + if getattr(node, 'orelse', None): + self._new_line() + self._write('else:') + self._change_indent(1) + for statement in node.orelse: + self.visit(statement) + self._change_indent(-1) + + # With(expr context_expr, expr? optional_vars, stmt* body) + def visit_With(self, node): + self._new_line() + self._write('with ') + self.visit(node.context_expr) + if getattr(node, 'optional_vars', None): + self._write(' as ') + self.visit(node.optional_vars) + self._write(':') + self._change_indent(1) + for statement in node.body: + self.visit(statement) + self._change_indent(-1) + + + # Raise(expr? type, expr? inst, expr? tback) + def visit_Raise(self, node): + self._new_line() + self._write('raise') + if not node.type: + return + self._write(' ') + self.visit(node.type) + if not node.inst: + return + self._write(', ') + self.visit(node.inst) + if not node.tback: + return + self._write(', ') + self.visit(node.tback) + + # TryExcept(stmt* body, excepthandler* handlers, stmt* orelse) + def visit_TryExcept(self, node): + self._new_line() + self._write('try:') + self._change_indent(1) + for statement in node.body: + self.visit(statement) + self._change_indent(-1) + if getattr(node, 'handlers', None): + for handler in node.handlers: + self.visit(handler) + self._new_line() + if getattr(node, 'orelse', None): + self._write('else:') + self._change_indent(1) + for statement in node.orelse: + self.visit(statement) + self._change_indent(-1) + + # excepthandler = (expr? type, expr? name, stmt* body) + def visit_ExceptHandler(self, node): + self._new_line() + self._write('except') + if getattr(node, 'type', None): + self._write(' ') + self.visit(node.type) + if getattr(node, 'name', None): + self._write(', ') + self.visit(node.name) + self._write(':') + self._change_indent(1) + for statement in node.body: + self.visit(statement) + self._change_indent(-1) + visit_excepthandler = visit_ExceptHandler + + # TryFinally(stmt* body, stmt* finalbody) + def visit_TryFinally(self, node): + self._new_line() + self._write('try:') + self._change_indent(1) + for statement in node.body: + self.visit(statement) + self._change_indent(-1) + + if getattr(node, 'finalbody', None): + self._new_line() + self._write('finally:') + self._change_indent(1) + for statement in node.finalbody: + self.visit(statement) + self._change_indent(-1) + + # Assert(expr test, expr? msg) + def visit_Assert(self, node): + self._new_line() + self._write('assert ') + self.visit(node.test) + if getattr(node, 'msg', None): + self._write(', ') + self.visit(node.msg) + + def visit_alias(self, node): + self._write(node.name) + if getattr(node, 'asname', None): + self._write(' as ') + self._write(node.asname) + + # Import(alias* names) + def visit_Import(self, node): + self._new_line() + self._write('import ') + self.visit(node.names[0]) + for name in node.names[1:]: + self._write(', ') + self.visit(name) + + # ImportFrom(identifier module, alias* names, int? level) + def visit_ImportFrom(self, node): + self._new_line() + self._write('from ') + if node.level: + self._write('.' * node.level) + self._write(node.module) + self._write(' import ') + self.visit(node.names[0]) + for name in node.names[1:]: + self._write(', ') + self.visit(name) + + # Exec(expr body, expr? globals, expr? locals) + def visit_Exec(self, node): + self._new_line() + self._write('exec ') + self.visit(node.body) + if not node.globals: + return + self._write(', ') + self.visit(node.globals) + if not node.locals: + return + self._write(', ') + self.visit(node.locals) + + # Global(identifier* names) + def visit_Global(self, node): + self._new_line() + self._write('global ') + self.visit(node.names[0]) + for name in node.names[1:]: + self._write(', ') + self.visit(name) + + # Expr(expr value) + def visit_Expr(self, node): + self._new_line() + self.visit(node.value) + + # Pass + def visit_Pass(self, node): + self._new_line() + self._write('pass') + + # Break + def visit_Break(self, node): + self._new_line() + self._write('break') + + # Continue + def visit_Continue(self, node): + self._new_line() + self._write('continue') + + ### EXPRESSIONS + def with_parens(f): + def _f(self, node): + self._write('(') + f(self, node) + self._write(')') + return _f + + bool_operators = {_ast.And: 'and', _ast.Or: 'or'} + + # BoolOp(boolop op, expr* values) + @with_parens + def visit_BoolOp(self, node): + joiner = ' ' + self.bool_operators[node.op.__class__] + ' ' + self.visit(node.values[0]) + for value in node.values[1:]: + self._write(joiner) + self.visit(value) + + binary_operators = { + _ast.Add: '+', + _ast.Sub: '-', + _ast.Mult: '*', + _ast.Div: '/', + _ast.Mod: '%', + _ast.Pow: '**', + _ast.LShift: '<<', + _ast.RShift: '>>', + _ast.BitOr: '|', + _ast.BitXor: '^', + _ast.BitAnd: '&', + _ast.FloorDiv: '//' + } + + # BinOp(expr left, operator op, expr right) + @with_parens + def visit_BinOp(self, node): + self.visit(node.left) + self._write(' ' + self.binary_operators[node.op.__class__] + ' ') + self.visit(node.right) + + unary_operators = { + _ast.Invert: '~', + _ast.Not: 'not', + _ast.UAdd: '+', + _ast.USub: '-', + } + + # UnaryOp(unaryop op, expr operand) + def visit_UnaryOp(self, node): + self._write(self.unary_operators[node.op.__class__] + ' ') + self.visit(node.operand) + + # Lambda(arguments args, expr body) + @with_parens + def visit_Lambda(self, node): + self._write('lambda ') + self.visit(node.args) + self._write(': ') + self.visit(node.body) + + # IfExp(expr test, expr body, expr orelse) + @with_parens + def visit_IfExp(self, node): + self.visit(node.body) + self._write(' if ') + self.visit(node.test) + self._write(' else ') + self.visit(node.orelse) + + # Dict(expr* keys, expr* values) + def visit_Dict(self, node): + self._write('{') + for key, value in zip(node.keys, node.values): + self.visit(key) + self._write(': ') + self.visit(value) + self._write(', ') + self._write('}') + + # ListComp(expr elt, comprehension* generators) + def visit_ListComp(self, node): + self._write('[') + self.visit(node.elt) + for generator in node.generators: + # comprehension = (expr target, expr iter, expr* ifs) + self._write(' for ') + self.visit(generator.target) + self._write(' in ') + self.visit(generator.iter) + for ifexpr in generator.ifs: + self._write(' if ') + self.visit(ifexpr) + self._write(']') + + # GeneratorExp(expr elt, comprehension* generators) + def visit_GeneratorExp(self, node): + self._write('(') + self.visit(node.elt) + for generator in node.generators: + # comprehension = (expr target, expr iter, expr* ifs) + self._write(' for ') + self.visit(generator.target) + self._write(' in ') + self.visit(generator.iter) + for ifexpr in generator.ifs: + self._write(' if ') + self.visit(ifexpr) + self._write(')') + + # Yield(expr? value) + def visit_Yield(self, node): + self._write('yield') + if getattr(node, 'value', None): + self._write(' ') + self.visit(node.value) + + comparision_operators = { + _ast.Eq: '==', + _ast.NotEq: '!=', + _ast.Lt: '<', + _ast.LtE: '<=', + _ast.Gt: '>', + _ast.GtE: '>=', + _ast.Is: 'is', + _ast.IsNot: 'is not', + _ast.In: 'in', + _ast.NotIn: 'not in', + } + + # Compare(expr left, cmpop* ops, expr* comparators) + @with_parens + def visit_Compare(self, node): + self.visit(node.left) + for op, comparator in zip(node.ops, node.comparators): + self._write(' ' + self.comparision_operators[op.__class__] + ' ') + self.visit(comparator) + + # Call(expr func, expr* args, keyword* keywords, + # expr? starargs, expr? kwargs) + def visit_Call(self, node): + self.visit(node.func) + self._write('(') + first = True + for arg in node.args: + if not first: + self._write(', ') + first = False + self.visit(arg) + + for keyword in node.keywords: + if not first: + self._write(', ') + first = False + # keyword = (identifier arg, expr value) + self._write(keyword.arg) + self._write('=') + self.visit(keyword.value) + if getattr(node, 'starargs', None): + if not first: + self._write(', ') + first = False + self._write('*') + self.visit(node.starargs) + + if getattr(node, 'kwargs', None): + if not first: + self._write(', ') + first = False + self._write('**') + self.visit(node.kwargs) + self._write(')') + + # Repr(expr value) + def visit_Repr(self, node): + self._write('`') + self.visit(node.value) + self._write('`') + + # Num(object n) + def visit_Num(self, node): + self._write(repr(node.n)) + + # Str(string s) + def visit_Str(self, node): + self._write(repr(node.s)) + + # Attribute(expr value, identifier attr, expr_context ctx) + def visit_Attribute(self, node): + self.visit(node.value) + self._write('.') + self._write(node.attr) + + # Subscript(expr value, slice slice, expr_context ctx) + def visit_Subscript(self, node): + self.visit(node.value) + self._write('[') + def _process_slice(node): + if isinstance(node, _ast.Ellipsis): + self._write('...') + elif isinstance(node, _ast.Slice): + if getattr(node, 'lower', 'None'): + self.visit(node.lower) + self._write(':') + if getattr(node, 'upper', None): + self.visit(node.upper) + if getattr(node, 'step', None): + self._write(':') + self.visit(node.step) + elif isinstance(node, _ast.Index): + self.visit(node.value) + elif isinstance(node, _ast.ExtSlice): + self.visit(node.dims[0]) + for dim in node.dims[1:]: + self._write(', ') + self.visit(dim) + else: + raise NotImplemented('Slice type not implemented') + _process_slice(node.slice) + self._write(']') + + # Name(identifier id, expr_context ctx) + def visit_Name(self, node): + self._write(node.id) + + # List(expr* elts, expr_context ctx) + def visit_List(self, node): + self._write('[') + for elt in node.elts: + self.visit(elt) + self._write(', ') + self._write(']') + + # Tuple(expr *elts, expr_context ctx) + def visit_Tuple(self, node): + self._write('(') + for elt in node.elts: + self.visit(elt) + self._write(', ') + self._write(')') + + +class ASTTransformer(object): + """General purpose base class for AST transformations. + + Every visitor method can be overridden to return an AST node that has been + altered or replaced in some way. + """ + + def visit(self, node): + if node is None: + return None + if type(node) is tuple: + return tuple([self.visit(n) for n in node]) + visitor = getattr(self, 'visit_%s' % node.__class__.__name__, None) + if visitor is None: + return node + return visitor(node) + + def _clone(self, node): + clone = node.__class__() + for name in getattr(clone, '_attributes', ()): + try: + setattr(clone, 'name', getattr(node, name)) + except AttributeError: + pass + for name in clone._fields: + try: + value = getattr(node, name) + except AttributeError: + pass + else: + if value is None: + pass + elif isinstance(value, list): + value = [self.visit(x) for x in value] + elif isinstance(value, tuple): + value = tuple(self.visit(x) for x in value) + else: + value = self.visit(value) + setattr(clone, name, value) + return clone + + visit_Module = _clone + visit_Interactive = _clone + visit_Expression = _clone + visit_Suite = _clone + + visit_FunctionDef = _clone + visit_ClassDef = _clone + visit_Return = _clone + visit_Delete = _clone + visit_Assign = _clone + visit_AugAssign = _clone + visit_Print = _clone + visit_For = _clone + visit_While = _clone + visit_If = _clone + visit_With = _clone + visit_Raise = _clone + visit_TryExcept = _clone + visit_TryFinally = _clone + visit_Assert = _clone + visit_ExceptHandler = _clone + + visit_Import = _clone + visit_ImportFrom = _clone + visit_Exec = _clone + visit_Global = _clone + visit_Expr = _clone + # Pass, Break, Continue don't need to be copied + + visit_BoolOp = _clone + visit_BinOp = _clone + visit_UnaryOp = _clone + visit_Lambda = _clone + visit_IfExp = _clone + visit_Dict = _clone + visit_ListComp = _clone + visit_GeneratorExp = _clone + visit_Yield = _clone + visit_Compare = _clone + visit_Call = _clone + visit_Repr = _clone + # Num, Str don't need to be copied + + visit_Attribute = _clone + visit_Subscript = _clone + visit_Name = _clone + visit_List = _clone + visit_Tuple = _clone + + visit_comprehension = _clone + visit_excepthandler = _clone + visit_arguments = _clone + visit_keyword = _clone + visit_alias = _clone + + visit_Slice = _clone + visit_ExtSlice = _clone + visit_Index = _clone + + del _clone diff --git a/websdk/genshi/template/base.py b/websdk/genshi/template/base.py new file mode 100644 index 0000000..202faae --- /dev/null +++ b/websdk/genshi/template/base.py @@ -0,0 +1,634 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2006-2010 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://genshi.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://genshi.edgewall.org/log/. + +"""Basic templating functionality.""" + +from collections import deque +import os +from StringIO import StringIO +import sys + +from genshi.core import Attrs, Stream, StreamEventKind, START, TEXT, _ensure +from genshi.input import ParseError + +__all__ = ['Context', 'DirectiveFactory', 'Template', 'TemplateError', + 'TemplateRuntimeError', 'TemplateSyntaxError', 'BadDirectiveError'] +__docformat__ = 'restructuredtext en' + + +class TemplateError(Exception): + """Base exception class for errors related to template processing.""" + + def __init__(self, message, filename=None, lineno=-1, offset=-1): + """Create the exception. + + :param message: the error message + :param filename: the filename of the template + :param lineno: the number of line in the template at which the error + occurred + :param offset: the column number at which the error occurred + """ + if filename is None: + filename = '<string>' + self.msg = message #: the error message string + if filename != '<string>' or lineno >= 0: + message = '%s (%s, line %d)' % (self.msg, filename, lineno) + Exception.__init__(self, message) + self.filename = filename #: the name of the template file + self.lineno = lineno #: the number of the line containing the error + self.offset = offset #: the offset on the line + + +class TemplateSyntaxError(TemplateError): + """Exception raised when an expression in a template causes a Python syntax + error, or the template is not well-formed. + """ + + def __init__(self, message, filename=None, lineno=-1, offset=-1): + """Create the exception + + :param message: the error message + :param filename: the filename of the template + :param lineno: the number of line in the template at which the error + occurred + :param offset: the column number at which the error occurred + """ + if isinstance(message, SyntaxError) and message.lineno is not None: + message = str(message).replace(' (line %d)' % message.lineno, '') + TemplateError.__init__(self, message, filename, lineno) + + +class BadDirectiveError(TemplateSyntaxError): + """Exception raised when an unknown directive is encountered when parsing + a template. + + An unknown directive is any attribute using the namespace for directives, + with a local name that doesn't match any registered directive. + """ + + def __init__(self, name, filename=None, lineno=-1): + """Create the exception + + :param name: the name of the directive + :param filename: the filename of the template + :param lineno: the number of line in the template at which the error + occurred + """ + TemplateSyntaxError.__init__(self, 'bad directive "%s"' % name, + filename, lineno) + + +class TemplateRuntimeError(TemplateError): + """Exception raised when an the evaluation of a Python expression in a + template causes an error. + """ + + +class Context(object): + """Container for template input data. + + A context provides a stack of scopes (represented by dictionaries). + + Template directives such as loops can push a new scope on the stack with + data that should only be available inside the loop. When the loop + terminates, that scope can get popped off the stack again. + + >>> ctxt = Context(one='foo', other=1) + >>> ctxt.get('one') + 'foo' + >>> ctxt.get('other') + 1 + >>> ctxt.push(dict(one='frost')) + >>> ctxt.get('one') + 'frost' + >>> ctxt.get('other') + 1 + >>> ctxt.pop() + {'one': 'frost'} + >>> ctxt.get('one') + 'foo' + """ + + def __init__(self, **data): + """Initialize the template context with the given keyword arguments as + data. + """ + self.frames = deque([data]) + self.pop = self.frames.popleft + self.push = self.frames.appendleft + self._match_templates = [] + self._choice_stack = [] + + # Helper functions for use in expressions + def defined(name): + """Return whether a variable with the specified name exists in the + expression scope.""" + return name in self + def value_of(name, default=None): + """If a variable of the specified name is defined, return its value. + Otherwise, return the provided default value, or ``None``.""" + return self.get(name, default) + data.setdefault('defined', defined) + data.setdefault('value_of', value_of) + + def __repr__(self): + return repr(list(self.frames)) + + def __contains__(self, key): + """Return whether a variable exists in any of the scopes. + + :param key: the name of the variable + """ + return self._find(key)[1] is not None + has_key = __contains__ + + def __delitem__(self, key): + """Remove a variable from all scopes. + + :param key: the name of the variable + """ + for frame in self.frames: + if key in frame: + del frame[key] + + def __getitem__(self, key): + """Get a variables's value, starting at the current scope and going + upward. + + :param key: the name of the variable + :return: the variable value + :raises KeyError: if the requested variable wasn't found in any scope + """ + value, frame = self._find(key) + if frame is None: + raise KeyError(key) + return value + + def __len__(self): + """Return the number of distinctly named variables in the context. + + :return: the number of variables in the context + """ + return len(self.items()) + + def __setitem__(self, key, value): + """Set a variable in the current scope. + + :param key: the name of the variable + :param value: the variable value + """ + self.frames[0][key] = value + + def _find(self, key, default=None): + """Retrieve a given variable's value and the frame it was found in. + + Intended primarily for internal use by directives. + + :param key: the name of the variable + :param default: the default value to return when the variable is not + found + """ + for frame in self.frames: + if key in frame: + return frame[key], frame + return default, None + + def get(self, key, default=None): + """Get a variable's value, starting at the current scope and going + upward. + + :param key: the name of the variable + :param default: the default value to return when the variable is not + found + """ + for frame in self.frames: + if key in frame: + return frame[key] + return default + + def keys(self): + """Return the name of all variables in the context. + + :return: a list of variable names + """ + keys = [] + for frame in self.frames: + keys += [key for key in frame if key not in keys] + return keys + + def items(self): + """Return a list of ``(name, value)`` tuples for all variables in the + context. + + :return: a list of variables + """ + return [(key, self.get(key)) for key in self.keys()] + + def update(self, mapping): + """Update the context from the mapping provided.""" + self.frames[0].update(mapping) + + def push(self, data): + """Push a new scope on the stack. + + :param data: the data dictionary to push on the context stack. + """ + + def pop(self): + """Pop the top-most scope from the stack.""" + + +def _apply_directives(stream, directives, ctxt, vars): + """Apply the given directives to the stream. + + :param stream: the stream the directives should be applied to + :param directives: the list of directives to apply + :param ctxt: the `Context` + :param vars: additional variables that should be available when Python + code is executed + :return: the stream with the given directives applied + """ + if directives: + stream = directives[0](iter(stream), directives[1:], ctxt, **vars) + return stream + + +def _eval_expr(expr, ctxt, vars=None): + """Evaluate the given `Expression` object. + + :param expr: the expression to evaluate + :param ctxt: the `Context` + :param vars: additional variables that should be available to the + expression + :return: the result of the evaluation + """ + if vars: + ctxt.push(vars) + retval = expr.evaluate(ctxt) + if vars: + ctxt.pop() + return retval + + +def _exec_suite(suite, ctxt, vars=None): + """Execute the given `Suite` object. + + :param suite: the code suite to execute + :param ctxt: the `Context` + :param vars: additional variables that should be available to the + code + """ + if vars: + ctxt.push(vars) + ctxt.push({}) + suite.execute(ctxt) + if vars: + top = ctxt.pop() + ctxt.pop() + ctxt.frames[0].update(top) + + +class DirectiveFactoryMeta(type): + """Meta class for directive factories.""" + + def __new__(cls, name, bases, d): + if 'directives' in d: + d['_dir_by_name'] = dict(d['directives']) + d['_dir_order'] = [directive[1] for directive in d['directives']] + + return type.__new__(cls, name, bases, d) + + +class DirectiveFactory(object): + """Base for classes that provide a set of template directives. + + :since: version 0.6 + """ + __metaclass__ = DirectiveFactoryMeta + + directives = [] + """A list of ``(name, cls)`` tuples that define the set of directives + provided by this factory. + """ + + def get_directive(self, name): + """Return the directive class for the given name. + + :param name: the directive name as used in the template + :return: the directive class + :see: `Directive` + """ + return self._dir_by_name.get(name) + + def get_directive_index(self, dir_cls): + """Return a key for the given directive class that should be used to + sort it among other directives on the same `SUB` event. + + The default implementation simply returns the index of the directive in + the `directives` list. + + :param dir_cls: the directive class + :return: the sort key + """ + if dir_cls in self._dir_order: + return self._dir_order.index(dir_cls) + return len(self._dir_order) + + +class Template(DirectiveFactory): + """Abstract template base class. + + This class implements most of the template processing model, but does not + specify the syntax of templates. + """ + + EXEC = StreamEventKind('EXEC') + """Stream event kind representing a Python code suite to execute.""" + + EXPR = StreamEventKind('EXPR') + """Stream event kind representing a Python expression.""" + + INCLUDE = StreamEventKind('INCLUDE') + """Stream event kind representing the inclusion of another template.""" + + SUB = StreamEventKind('SUB') + """Stream event kind representing a nested stream to which one or more + directives should be applied. + """ + + serializer = None + _number_conv = unicode # function used to convert numbers to event data + + def __init__(self, source, filepath=None, filename=None, loader=None, + encoding=None, lookup='strict', allow_exec=True): + """Initialize a template from either a string, a file-like object, or + an already parsed markup stream. + + :param source: a string, file-like object, or markup stream to read the + template from + :param filepath: the absolute path to the template file + :param filename: the path to the template file relative to the search + path + :param loader: the `TemplateLoader` to use for loading included + templates + :param encoding: the encoding of the `source` + :param lookup: the variable lookup mechanism; either "strict" (the + default), "lenient", or a custom lookup class + :param allow_exec: whether Python code blocks in templates should be + allowed + + :note: Changed in 0.5: Added the `allow_exec` argument + """ + self.filepath = filepath or filename + self.filename = filename + self.loader = loader + self.lookup = lookup + self.allow_exec = allow_exec + self._init_filters() + self._init_loader() + self._prepared = False + + if isinstance(source, basestring): + source = StringIO(source) + else: + source = source + try: + self._stream = self._parse(source, encoding) + except ParseError, e: + raise TemplateSyntaxError(e.msg, self.filepath, e.lineno, e.offset) + + def __getstate__(self): + state = self.__dict__.copy() + state['filters'] = [] + return state + + def __setstate__(self, state): + self.__dict__ = state + self._init_filters() + + def __repr__(self): + return '<%s "%s">' % (type(self).__name__, self.filename) + + def _init_filters(self): + self.filters = [self._flatten, self._include] + + def _init_loader(self): + if self.loader is None: + from genshi.template.loader import TemplateLoader + if self.filename: + if self.filepath != self.filename: + basedir = os.path.normpath(self.filepath)[:-len( + os.path.normpath(self.filename)) + ] + else: + basedir = os.path.dirname(self.filename) + else: + basedir = '.' + self.loader = TemplateLoader([os.path.abspath(basedir)]) + + @property + def stream(self): + if not self._prepared: + self._stream = list(self._prepare(self._stream)) + self._prepared = True + return self._stream + + def _parse(self, source, encoding): + """Parse the template. + + The parsing stage parses the template and constructs a list of + directives that will be executed in the render stage. The input is + split up into literal output (text that does not depend on the context + data) and directives or expressions. + + :param source: a file-like object containing the XML source of the + template, or an XML event stream + :param encoding: the encoding of the `source` + """ + raise NotImplementedError + + def _prepare(self, stream): + """Call the `attach` method of every directive found in the template. + + :param stream: the event stream of the template + """ + from genshi.template.loader import TemplateNotFound + + for kind, data, pos in stream: + if kind is SUB: + directives = [] + substream = data[1] + for _, cls, value, namespaces, pos in sorted(data[0]): + directive, substream = cls.attach(self, substream, value, + namespaces, pos) + if directive: + directives.append(directive) + substream = self._prepare(substream) + if directives: + yield kind, (directives, list(substream)), pos + else: + for event in substream: + yield event + else: + if kind is INCLUDE: + href, cls, fallback = data + if isinstance(href, basestring) and \ + not getattr(self.loader, 'auto_reload', True): + # If the path to the included template is static, and + # auto-reloading is disabled on the template loader, + # the template is inlined into the stream + try: + tmpl = self.loader.load(href, relative_to=pos[0], + cls=cls or self.__class__) + for event in tmpl.stream: + yield event + except TemplateNotFound: + if fallback is None: + raise + for event in self._prepare(fallback): + yield event + continue + elif fallback: + # Otherwise the include is performed at run time + data = href, cls, list(self._prepare(fallback)) + + yield kind, data, pos + + def generate(self, *args, **kwargs): + """Apply the template to the given context data. + + Any keyword arguments are made available to the template as context + data. + + Only one positional argument is accepted: if it is provided, it must be + an instance of the `Context` class, and keyword arguments are ignored. + This calling style is used for internal processing. + + :return: a markup event stream representing the result of applying + the template to the context data. + """ + vars = {} + if args: + assert len(args) == 1 + ctxt = args[0] + if ctxt is None: + ctxt = Context(**kwargs) + else: + vars = kwargs + assert isinstance(ctxt, Context) + else: + ctxt = Context(**kwargs) + + stream = self.stream + for filter_ in self.filters: + stream = filter_(iter(stream), ctxt, **vars) + return Stream(stream, self.serializer) + + def _flatten(self, stream, ctxt, **vars): + number_conv = self._number_conv + stack = [] + push = stack.append + pop = stack.pop + stream = iter(stream) + + while 1: + for kind, data, pos in stream: + + if kind is START and data[1]: + # Attributes may still contain expressions in start tags at + # this point, so do some evaluation + tag, attrs = data + new_attrs = [] + for name, value in attrs: + if type(value) is list: # this is an interpolated string + values = [event[1] + for event in self._flatten(value, ctxt, **vars) + if event[0] is TEXT and event[1] is not None + ] + if not values: + continue + value = ''.join(values) + new_attrs.append((name, value)) + yield kind, (tag, Attrs(new_attrs)), pos + + elif kind is EXPR: + result = _eval_expr(data, ctxt, vars) + if result is not None: + # First check for a string, otherwise the iterable test + # below succeeds, and the string will be chopped up into + # individual characters + if isinstance(result, basestring): + yield TEXT, result, pos + elif isinstance(result, (int, float, long)): + yield TEXT, number_conv(result), pos + elif hasattr(result, '__iter__'): + push(stream) + stream = _ensure(result) + break + else: + yield TEXT, unicode(result), pos + + elif kind is SUB: + # This event is a list of directives and a list of nested + # events to which those directives should be applied + push(stream) + stream = _apply_directives(data[1], data[0], ctxt, vars) + break + + elif kind is EXEC: + _exec_suite(data, ctxt, vars) + + else: + yield kind, data, pos + + else: + if not stack: + break + stream = pop() + + def _include(self, stream, ctxt, **vars): + """Internal stream filter that performs inclusion of external + template files. + """ + from genshi.template.loader import TemplateNotFound + + for event in stream: + if event[0] is INCLUDE: + href, cls, fallback = event[1] + if not isinstance(href, basestring): + parts = [] + for subkind, subdata, subpos in self._flatten(href, ctxt, + **vars): + if subkind is TEXT: + parts.append(subdata) + href = ''.join([x for x in parts if x is not None]) + try: + tmpl = self.loader.load(href, relative_to=event[2][0], + cls=cls or self.__class__) + for event in tmpl.generate(ctxt, **vars): + yield event + except TemplateNotFound: + if fallback is None: + raise + for filter_ in self.filters: + fallback = filter_(iter(fallback), ctxt, **vars) + for event in fallback: + yield event + else: + yield event + + +EXEC = Template.EXEC +EXPR = Template.EXPR +INCLUDE = Template.INCLUDE +SUB = Template.SUB diff --git a/websdk/genshi/template/directives.py b/websdk/genshi/template/directives.py new file mode 100644 index 0000000..e2c9424 --- /dev/null +++ b/websdk/genshi/template/directives.py @@ -0,0 +1,725 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2006-2009 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://genshi.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://genshi.edgewall.org/log/. + +"""Implementation of the various template directives.""" + +from genshi.core import QName, Stream +from genshi.path import Path +from genshi.template.base import TemplateRuntimeError, TemplateSyntaxError, \ + EXPR, _apply_directives, _eval_expr +from genshi.template.eval import Expression, ExpressionASTTransformer, \ + _ast, _parse + +__all__ = ['AttrsDirective', 'ChooseDirective', 'ContentDirective', + 'DefDirective', 'ForDirective', 'IfDirective', 'MatchDirective', + 'OtherwiseDirective', 'ReplaceDirective', 'StripDirective', + 'WhenDirective', 'WithDirective'] +__docformat__ = 'restructuredtext en' + + +class DirectiveMeta(type): + """Meta class for template directives.""" + + def __new__(cls, name, bases, d): + d['tagname'] = name.lower().replace('directive', '') + return type.__new__(cls, name, bases, d) + + +class Directive(object): + """Abstract base class for template directives. + + A directive is basically a callable that takes three positional arguments: + ``ctxt`` is the template data context, ``stream`` is an iterable over the + events that the directive applies to, and ``directives`` is is a list of + other directives on the same stream that need to be applied. + + Directives can be "anonymous" or "registered". Registered directives can be + applied by the template author using an XML attribute with the + corresponding name in the template. Such directives should be subclasses of + this base class that can be instantiated with the value of the directive + attribute as parameter. + + Anonymous directives are simply functions conforming to the protocol + described above, and can only be applied programmatically (for example by + template filters). + """ + __metaclass__ = DirectiveMeta + __slots__ = ['expr'] + + def __init__(self, value, template=None, namespaces=None, lineno=-1, + offset=-1): + self.expr = self._parse_expr(value, template, lineno, offset) + + @classmethod + def attach(cls, template, stream, value, namespaces, pos): + """Called after the template stream has been completely parsed. + + :param template: the `Template` object + :param stream: the event stream associated with the directive + :param value: the argument value for the directive; if the directive was + specified as an element, this will be an `Attrs` instance + with all specified attributes, otherwise it will be a + `unicode` object with just the attribute value + :param namespaces: a mapping of namespace URIs to prefixes + :param pos: a ``(filename, lineno, offset)`` tuple describing the + location where the directive was found in the source + + This class method should return a ``(directive, stream)`` tuple. If + ``directive`` is not ``None``, it should be an instance of the `Directive` + class, and gets added to the list of directives applied to the substream + at runtime. `stream` is an event stream that replaces the original + stream associated with the directive. + """ + return cls(value, template, namespaces, *pos[1:]), stream + + def __call__(self, stream, directives, ctxt, **vars): + """Apply the directive to the given stream. + + :param stream: the event stream + :param directives: a list of the remaining directives that should + process the stream + :param ctxt: the context data + :param vars: additional variables that should be made available when + Python code is executed + """ + raise NotImplementedError + + def __repr__(self): + expr = '' + if getattr(self, 'expr', None) is not None: + expr = ' "%s"' % self.expr.source + return '<%s%s>' % (type(self).__name__, expr) + + @classmethod + def _parse_expr(cls, expr, template, lineno=-1, offset=-1): + """Parses the given expression, raising a useful error message when a + syntax error is encountered. + """ + try: + return expr and Expression(expr, template.filepath, lineno, + lookup=template.lookup) or None + except SyntaxError, err: + err.msg += ' in expression "%s" of "%s" directive' % (expr, + cls.tagname) + raise TemplateSyntaxError(err, template.filepath, lineno, + offset + (err.offset or 0)) + + +def _assignment(ast): + """Takes the AST representation of an assignment, and returns a + function that applies the assignment of a given value to a dictionary. + """ + def _names(node): + if isinstance(node, _ast.Tuple): + return tuple([_names(child) for child in node.elts]) + elif isinstance(node, _ast.Name): + return node.id + def _assign(data, value, names=_names(ast)): + if type(names) is tuple: + for idx in range(len(names)): + _assign(data, value[idx], names[idx]) + else: + data[names] = value + return _assign + + +class AttrsDirective(Directive): + """Implementation of the ``py:attrs`` template directive. + + The value of the ``py:attrs`` attribute should be a dictionary or a sequence + of ``(name, value)`` tuples. The items in that dictionary or sequence are + added as attributes to the element: + + >>> from genshi.template import MarkupTemplate + >>> tmpl = MarkupTemplate('''<ul xmlns:py="http://genshi.edgewall.org/"> + ... <li py:attrs="foo">Bar</li> + ... </ul>''') + >>> print(tmpl.generate(foo={'class': 'collapse'})) + <ul> + <li class="collapse">Bar</li> + </ul> + >>> print(tmpl.generate(foo=[('class', 'collapse')])) + <ul> + <li class="collapse">Bar</li> + </ul> + + If the value evaluates to ``None`` (or any other non-truth value), no + attributes are added: + + >>> print(tmpl.generate(foo=None)) + <ul> + <li>Bar</li> + </ul> + """ + __slots__ = [] + + def __call__(self, stream, directives, ctxt, **vars): + def _generate(): + kind, (tag, attrib), pos = stream.next() + attrs = _eval_expr(self.expr, ctxt, vars) + if attrs: + if isinstance(attrs, Stream): + try: + attrs = iter(attrs).next() + except StopIteration: + attrs = [] + elif not isinstance(attrs, list): # assume it's a dict + attrs = attrs.items() + attrib -= [name for name, val in attrs if val is None] + attrib |= [(QName(name), unicode(val).strip()) for name, val + in attrs if val is not None] + yield kind, (tag, attrib), pos + for event in stream: + yield event + + return _apply_directives(_generate(), directives, ctxt, vars) + + +class ContentDirective(Directive): + """Implementation of the ``py:content`` template directive. + + This directive replaces the content of the element with the result of + evaluating the value of the ``py:content`` attribute: + + >>> from genshi.template import MarkupTemplate + >>> tmpl = MarkupTemplate('''<ul xmlns:py="http://genshi.edgewall.org/"> + ... <li py:content="bar">Hello</li> + ... </ul>''') + >>> print(tmpl.generate(bar='Bye')) + <ul> + <li>Bye</li> + </ul> + """ + __slots__ = [] + + @classmethod + def attach(cls, template, stream, value, namespaces, pos): + if type(value) is dict: + raise TemplateSyntaxError('The content directive can not be used ' + 'as an element', template.filepath, + *pos[1:]) + expr = cls._parse_expr(value, template, *pos[1:]) + return None, [stream[0], (EXPR, expr, pos), stream[-1]] + + +class DefDirective(Directive): + """Implementation of the ``py:def`` template directive. + + This directive can be used to create "Named Template Functions", which + are template snippets that are not actually output during normal + processing, but rather can be expanded from expressions in other places + in the template. + + A named template function can be used just like a normal Python function + from template expressions: + + >>> from genshi.template import MarkupTemplate + >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/"> + ... <p py:def="echo(greeting, name='world')" class="message"> + ... ${greeting}, ${name}! + ... </p> + ... ${echo('Hi', name='you')} + ... </div>''') + >>> print(tmpl.generate(bar='Bye')) + <div> + <p class="message"> + Hi, you! + </p> + </div> + + If a function does not require parameters, the parenthesis can be omitted + in the definition: + + >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/"> + ... <p py:def="helloworld" class="message"> + ... Hello, world! + ... </p> + ... ${helloworld()} + ... </div>''') + >>> print(tmpl.generate(bar='Bye')) + <div> + <p class="message"> + Hello, world! + </p> + </div> + """ + __slots__ = ['name', 'args', 'star_args', 'dstar_args', 'defaults'] + + def __init__(self, args, template, namespaces=None, lineno=-1, offset=-1): + Directive.__init__(self, None, template, namespaces, lineno, offset) + ast = _parse(args).body + self.args = [] + self.star_args = None + self.dstar_args = None + self.defaults = {} + if isinstance(ast, _ast.Call): + self.name = ast.func.id + for arg in ast.args: + # only names + self.args.append(arg.id) + for kwd in ast.keywords: + self.args.append(kwd.arg) + exp = Expression(kwd.value, template.filepath, + lineno, lookup=template.lookup) + self.defaults[kwd.arg] = exp + if getattr(ast, 'starargs', None): + self.star_args = ast.starargs.id + if getattr(ast, 'kwargs', None): + self.dstar_args = ast.kwargs.id + else: + self.name = ast.id + + @classmethod + def attach(cls, template, stream, value, namespaces, pos): + if type(value) is dict: + value = value.get('function') + return super(DefDirective, cls).attach(template, stream, value, + namespaces, pos) + + def __call__(self, stream, directives, ctxt, **vars): + stream = list(stream) + + def function(*args, **kwargs): + scope = {} + args = list(args) # make mutable + for name in self.args: + if args: + scope[name] = args.pop(0) + else: + if name in kwargs: + val = kwargs.pop(name) + else: + val = _eval_expr(self.defaults.get(name), ctxt, vars) + scope[name] = val + if not self.star_args is None: + scope[self.star_args] = args + if not self.dstar_args is None: + scope[self.dstar_args] = kwargs + ctxt.push(scope) + for event in _apply_directives(stream, directives, ctxt, vars): + yield event + ctxt.pop() + function.__name__ = self.name + + # Store the function reference in the bottom context frame so that it + # doesn't get popped off before processing the template has finished + # FIXME: this makes context data mutable as a side-effect + ctxt.frames[-1][self.name] = function + + return [] + + def __repr__(self): + return '<%s "%s">' % (type(self).__name__, self.name) + + +class ForDirective(Directive): + """Implementation of the ``py:for`` template directive for repeating an + element based on an iterable in the context data. + + >>> from genshi.template import MarkupTemplate + >>> tmpl = MarkupTemplate('''<ul xmlns:py="http://genshi.edgewall.org/"> + ... <li py:for="item in items">${item}</li> + ... </ul>''') + >>> print(tmpl.generate(items=[1, 2, 3])) + <ul> + <li>1</li><li>2</li><li>3</li> + </ul> + """ + __slots__ = ['assign', 'filename'] + + def __init__(self, value, template, namespaces=None, lineno=-1, offset=-1): + if ' in ' not in value: + raise TemplateSyntaxError('"in" keyword missing in "for" directive', + template.filepath, lineno, offset) + assign, value = value.split(' in ', 1) + ast = _parse(assign, 'exec') + value = 'iter(%s)' % value.strip() + self.assign = _assignment(ast.body[0].value) + self.filename = template.filepath + Directive.__init__(self, value, template, namespaces, lineno, offset) + + @classmethod + def attach(cls, template, stream, value, namespaces, pos): + if type(value) is dict: + value = value.get('each') + return super(ForDirective, cls).attach(template, stream, value, + namespaces, pos) + + def __call__(self, stream, directives, ctxt, **vars): + iterable = _eval_expr(self.expr, ctxt, vars) + if iterable is None: + return + + assign = self.assign + scope = {} + stream = list(stream) + for item in iterable: + assign(scope, item) + ctxt.push(scope) + for event in _apply_directives(stream, directives, ctxt, vars): + yield event + ctxt.pop() + + def __repr__(self): + return '<%s>' % type(self).__name__ + + +class IfDirective(Directive): + """Implementation of the ``py:if`` template directive for conditionally + excluding elements from being output. + + >>> from genshi.template import MarkupTemplate + >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/"> + ... <b py:if="foo">${bar}</b> + ... </div>''') + >>> print(tmpl.generate(foo=True, bar='Hello')) + <div> + <b>Hello</b> + </div> + """ + __slots__ = [] + + @classmethod + def attach(cls, template, stream, value, namespaces, pos): + if type(value) is dict: + value = value.get('test') + return super(IfDirective, cls).attach(template, stream, value, + namespaces, pos) + + def __call__(self, stream, directives, ctxt, **vars): + value = _eval_expr(self.expr, ctxt, vars) + if value: + return _apply_directives(stream, directives, ctxt, vars) + return [] + + +class MatchDirective(Directive): + """Implementation of the ``py:match`` template directive. + + >>> from genshi.template import MarkupTemplate + >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/"> + ... <span py:match="greeting"> + ... Hello ${select('@name')} + ... </span> + ... <greeting name="Dude" /> + ... </div>''') + >>> print(tmpl.generate()) + <div> + <span> + Hello Dude + </span> + </div> + """ + __slots__ = ['path', 'namespaces', 'hints'] + + def __init__(self, value, template, hints=None, namespaces=None, + lineno=-1, offset=-1): + Directive.__init__(self, None, template, namespaces, lineno, offset) + self.path = Path(value, template.filepath, lineno) + self.namespaces = namespaces or {} + self.hints = hints or () + + @classmethod + def attach(cls, template, stream, value, namespaces, pos): + hints = [] + if type(value) is dict: + if value.get('buffer', '').lower() == 'false': + hints.append('not_buffered') + if value.get('once', '').lower() == 'true': + hints.append('match_once') + if value.get('recursive', '').lower() == 'false': + hints.append('not_recursive') + value = value.get('path') + return cls(value, template, frozenset(hints), namespaces, *pos[1:]), \ + stream + + def __call__(self, stream, directives, ctxt, **vars): + ctxt._match_templates.append((self.path.test(ignore_context=True), + self.path, list(stream), self.hints, + self.namespaces, directives)) + return [] + + def __repr__(self): + return '<%s "%s">' % (type(self).__name__, self.path.source) + + +class ReplaceDirective(Directive): + """Implementation of the ``py:replace`` template directive. + + This directive replaces the element with the result of evaluating the + value of the ``py:replace`` attribute: + + >>> from genshi.template import MarkupTemplate + >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/"> + ... <span py:replace="bar">Hello</span> + ... </div>''') + >>> print(tmpl.generate(bar='Bye')) + <div> + Bye + </div> + + This directive is equivalent to ``py:content`` combined with ``py:strip``, + providing a less verbose way to achieve the same effect: + + >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/"> + ... <span py:content="bar" py:strip="">Hello</span> + ... </div>''') + >>> print(tmpl.generate(bar='Bye')) + <div> + Bye + </div> + """ + __slots__ = [] + + @classmethod + def attach(cls, template, stream, value, namespaces, pos): + if type(value) is dict: + value = value.get('value') + if not value: + raise TemplateSyntaxError('missing value for "replace" directive', + template.filepath, *pos[1:]) + expr = cls._parse_expr(value, template, *pos[1:]) + return None, [(EXPR, expr, pos)] + + +class StripDirective(Directive): + """Implementation of the ``py:strip`` template directive. + + When the value of the ``py:strip`` attribute evaluates to ``True``, the + element is stripped from the output + + >>> from genshi.template import MarkupTemplate + >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/"> + ... <div py:strip="True"><b>foo</b></div> + ... </div>''') + >>> print(tmpl.generate()) + <div> + <b>foo</b> + </div> + + Leaving the attribute value empty is equivalent to a truth value. + + This directive is particulary interesting for named template functions or + match templates that do not generate a top-level element: + + >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/"> + ... <div py:def="echo(what)" py:strip=""> + ... <b>${what}</b> + ... </div> + ... ${echo('foo')} + ... </div>''') + >>> print(tmpl.generate()) + <div> + <b>foo</b> + </div> + """ + __slots__ = [] + + def __call__(self, stream, directives, ctxt, **vars): + def _generate(): + if not self.expr or _eval_expr(self.expr, ctxt, vars): + stream.next() # skip start tag + previous = stream.next() + for event in stream: + yield previous + previous = event + else: + for event in stream: + yield event + return _apply_directives(_generate(), directives, ctxt, vars) + + +class ChooseDirective(Directive): + """Implementation of the ``py:choose`` directive for conditionally selecting + one of several body elements to display. + + If the ``py:choose`` expression is empty the expressions of nested + ``py:when`` directives are tested for truth. The first true ``py:when`` + body is output. If no ``py:when`` directive is matched then the fallback + directive ``py:otherwise`` will be used. + + >>> from genshi.template import MarkupTemplate + >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/" + ... py:choose=""> + ... <span py:when="0 == 1">0</span> + ... <span py:when="1 == 1">1</span> + ... <span py:otherwise="">2</span> + ... </div>''') + >>> print(tmpl.generate()) + <div> + <span>1</span> + </div> + + If the ``py:choose`` directive contains an expression, the nested + ``py:when`` directives are tested for equality to the ``py:choose`` + expression: + + >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/" + ... py:choose="2"> + ... <span py:when="1">1</span> + ... <span py:when="2">2</span> + ... </div>''') + >>> print(tmpl.generate()) + <div> + <span>2</span> + </div> + + Behavior is undefined if a ``py:choose`` block contains content outside a + ``py:when`` or ``py:otherwise`` block. Behavior is also undefined if a + ``py:otherwise`` occurs before ``py:when`` blocks. + """ + __slots__ = ['matched', 'value'] + + @classmethod + def attach(cls, template, stream, value, namespaces, pos): + if type(value) is dict: + value = value.get('test') + return super(ChooseDirective, cls).attach(template, stream, value, + namespaces, pos) + + def __call__(self, stream, directives, ctxt, **vars): + info = [False, bool(self.expr), None] + if self.expr: + info[2] = _eval_expr(self.expr, ctxt, vars) + ctxt._choice_stack.append(info) + for event in _apply_directives(stream, directives, ctxt, vars): + yield event + ctxt._choice_stack.pop() + + +class WhenDirective(Directive): + """Implementation of the ``py:when`` directive for nesting in a parent with + the ``py:choose`` directive. + + See the documentation of the `ChooseDirective` for usage. + """ + __slots__ = ['filename'] + + def __init__(self, value, template, namespaces=None, lineno=-1, offset=-1): + Directive.__init__(self, value, template, namespaces, lineno, offset) + self.filename = template.filepath + + @classmethod + def attach(cls, template, stream, value, namespaces, pos): + if type(value) is dict: + value = value.get('test') + return super(WhenDirective, cls).attach(template, stream, value, + namespaces, pos) + + def __call__(self, stream, directives, ctxt, **vars): + info = ctxt._choice_stack and ctxt._choice_stack[-1] + if not info: + raise TemplateRuntimeError('"when" directives can only be used ' + 'inside a "choose" directive', + self.filename, *stream.next()[2][1:]) + if info[0]: + return [] + if not self.expr and not info[1]: + raise TemplateRuntimeError('either "choose" or "when" directive ' + 'must have a test expression', + self.filename, *stream.next()[2][1:]) + if info[1]: + value = info[2] + if self.expr: + matched = value == _eval_expr(self.expr, ctxt, vars) + else: + matched = bool(value) + else: + matched = bool(_eval_expr(self.expr, ctxt, vars)) + info[0] = matched + if not matched: + return [] + + return _apply_directives(stream, directives, ctxt, vars) + + +class OtherwiseDirective(Directive): + """Implementation of the ``py:otherwise`` directive for nesting in a parent + with the ``py:choose`` directive. + + See the documentation of `ChooseDirective` for usage. + """ + __slots__ = ['filename'] + + def __init__(self, value, template, namespaces=None, lineno=-1, offset=-1): + Directive.__init__(self, None, template, namespaces, lineno, offset) + self.filename = template.filepath + + def __call__(self, stream, directives, ctxt, **vars): + info = ctxt._choice_stack and ctxt._choice_stack[-1] + if not info: + raise TemplateRuntimeError('an "otherwise" directive can only be ' + 'used inside a "choose" directive', + self.filename, *stream.next()[2][1:]) + if info[0]: + return [] + info[0] = True + + return _apply_directives(stream, directives, ctxt, vars) + + +class WithDirective(Directive): + """Implementation of the ``py:with`` template directive, which allows + shorthand access to variables and expressions. + + >>> from genshi.template import MarkupTemplate + >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/"> + ... <span py:with="y=7; z=x+10">$x $y $z</span> + ... </div>''') + >>> print(tmpl.generate(x=42)) + <div> + <span>42 7 52</span> + </div> + """ + __slots__ = ['vars'] + + def __init__(self, value, template, namespaces=None, lineno=-1, offset=-1): + Directive.__init__(self, None, template, namespaces, lineno, offset) + self.vars = [] + value = value.strip() + try: + ast = _parse(value, 'exec') + for node in ast.body: + if not isinstance(node, _ast.Assign): + raise TemplateSyntaxError('only assignment allowed in ' + 'value of the "with" directive', + template.filepath, lineno, offset) + self.vars.append(([_assignment(n) for n in node.targets], + Expression(node.value, template.filepath, + lineno, lookup=template.lookup))) + except SyntaxError, err: + err.msg += ' in expression "%s" of "%s" directive' % (value, + self.tagname) + raise TemplateSyntaxError(err, template.filepath, lineno, + offset + (err.offset or 0)) + + @classmethod + def attach(cls, template, stream, value, namespaces, pos): + if type(value) is dict: + value = value.get('vars') + return super(WithDirective, cls).attach(template, stream, value, + namespaces, pos) + + def __call__(self, stream, directives, ctxt, **vars): + frame = {} + ctxt.push(frame) + for targets, expr in self.vars: + value = _eval_expr(expr, ctxt, vars) + for assign in targets: + assign(frame, value) + for event in _apply_directives(stream, directives, ctxt, vars): + yield event + ctxt.pop() + + def __repr__(self): + return '<%s>' % (type(self).__name__) diff --git a/websdk/genshi/template/eval.py b/websdk/genshi/template/eval.py new file mode 100644 index 0000000..8593aaa --- /dev/null +++ b/websdk/genshi/template/eval.py @@ -0,0 +1,629 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2006-2010 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://genshi.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://genshi.edgewall.org/log/. + +"""Support for "safe" evaluation of Python expressions.""" + +import __builtin__ + +from textwrap import dedent +from types import CodeType + +from genshi.core import Markup +from genshi.template.astutil import ASTTransformer, ASTCodeGenerator, \ + _ast, parse +from genshi.template.base import TemplateRuntimeError +from genshi.util import flatten + +__all__ = ['Code', 'Expression', 'Suite', 'LenientLookup', 'StrictLookup', + 'Undefined', 'UndefinedError'] +__docformat__ = 'restructuredtext en' + + +# Check for a Python 2.4 bug in the eval loop +has_star_import_bug = False +try: + class _FakeMapping(object): + __getitem__ = __setitem__ = lambda *a: None + exec 'from sys import *' in {}, _FakeMapping() +except SystemError: + has_star_import_bug = True +del _FakeMapping + + +def _star_import_patch(mapping, modname): + """This function is used as helper if a Python version with a broken + star-import opcode is in use. + """ + module = __import__(modname, None, None, ['__all__']) + if hasattr(module, '__all__'): + members = module.__all__ + else: + members = [x for x in module.__dict__ if not x.startswith('_')] + mapping.update([(name, getattr(module, name)) for name in members]) + + +class Code(object): + """Abstract base class for the `Expression` and `Suite` classes.""" + __slots__ = ['source', 'code', 'ast', '_globals'] + + def __init__(self, source, filename=None, lineno=-1, lookup='strict', + xform=None): + """Create the code object, either from a string, or from an AST node. + + :param source: either a string containing the source code, or an AST + node + :param filename: the (preferably absolute) name of the file containing + the code + :param lineno: the number of the line on which the code was found + :param lookup: the lookup class that defines how variables are looked + up in the context; can be either "strict" (the default), + "lenient", or a custom lookup class + :param xform: the AST transformer that should be applied to the code; + if `None`, the appropriate transformation is chosen + depending on the mode + """ + if isinstance(source, basestring): + self.source = source + node = _parse(source, mode=self.mode) + else: + assert isinstance(source, _ast.AST), \ + 'Expected string or AST node, but got %r' % source + self.source = '?' + if self.mode == 'eval': + node = _ast.Expression() + node.body = source + else: + node = _ast.Module() + node.body = [source] + + self.ast = node + self.code = _compile(node, self.source, mode=self.mode, + filename=filename, lineno=lineno, xform=xform) + if lookup is None: + lookup = LenientLookup + elif isinstance(lookup, basestring): + lookup = {'lenient': LenientLookup, 'strict': StrictLookup}[lookup] + self._globals = lookup.globals + + def __getstate__(self): + state = {'source': self.source, 'ast': self.ast, + 'lookup': self._globals.im_self} + c = self.code + state['code'] = (c.co_nlocals, c.co_stacksize, c.co_flags, c.co_code, + c.co_consts, c.co_names, c.co_varnames, c.co_filename, + c.co_name, c.co_firstlineno, c.co_lnotab, (), ()) + return state + + def __setstate__(self, state): + self.source = state['source'] + self.ast = state['ast'] + self.code = CodeType(0, *state['code']) + self._globals = state['lookup'].globals + + def __eq__(self, other): + return (type(other) == type(self)) and (self.code == other.code) + + def __hash__(self): + return hash(self.code) + + def __ne__(self, other): + return not self == other + + def __repr__(self): + return '%s(%r)' % (type(self).__name__, self.source) + + +class Expression(Code): + """Evaluates Python expressions used in templates. + + >>> data = dict(test='Foo', items=[1, 2, 3], dict={'some': 'thing'}) + >>> Expression('test').evaluate(data) + 'Foo' + + >>> Expression('items[0]').evaluate(data) + 1 + >>> Expression('items[-1]').evaluate(data) + 3 + >>> Expression('dict["some"]').evaluate(data) + 'thing' + + Similar to e.g. Javascript, expressions in templates can use the dot + notation for attribute access to access items in mappings: + + >>> Expression('dict.some').evaluate(data) + 'thing' + + This also works the other way around: item access can be used to access + any object attribute: + + >>> class MyClass(object): + ... myattr = 'Bar' + >>> data = dict(mine=MyClass(), key='myattr') + >>> Expression('mine.myattr').evaluate(data) + 'Bar' + >>> Expression('mine["myattr"]').evaluate(data) + 'Bar' + >>> Expression('mine[key]').evaluate(data) + 'Bar' + + All of the standard Python operators are available to template expressions. + Built-in functions such as ``len()`` are also available in template + expressions: + + >>> data = dict(items=[1, 2, 3]) + >>> Expression('len(items)').evaluate(data) + 3 + """ + __slots__ = [] + mode = 'eval' + + def evaluate(self, data): + """Evaluate the expression against the given data dictionary. + + :param data: a mapping containing the data to evaluate against + :return: the result of the evaluation + """ + __traceback_hide__ = 'before_and_this' + _globals = self._globals(data) + return eval(self.code, _globals, {'__data__': data}) + + +class Suite(Code): + """Executes Python statements used in templates. + + >>> data = dict(test='Foo', items=[1, 2, 3], dict={'some': 'thing'}) + >>> Suite("foo = dict['some']").execute(data) + >>> data['foo'] + 'thing' + """ + __slots__ = [] + mode = 'exec' + + def execute(self, data): + """Execute the suite in the given data dictionary. + + :param data: a mapping containing the data to execute in + """ + __traceback_hide__ = 'before_and_this' + _globals = self._globals(data) + exec self.code in _globals, data + + +UNDEFINED = object() + + +class UndefinedError(TemplateRuntimeError): + """Exception thrown when a template expression attempts to access a variable + not defined in the context. + + :see: `LenientLookup`, `StrictLookup` + """ + def __init__(self, name, owner=UNDEFINED): + if owner is not UNDEFINED: + message = '%s has no member named "%s"' % (repr(owner), name) + else: + message = '"%s" not defined' % name + TemplateRuntimeError.__init__(self, message) + + +class Undefined(object): + """Represents a reference to an undefined variable. + + Unlike the Python runtime, template expressions can refer to an undefined + variable without causing a `NameError` to be raised. The result will be an + instance of the `Undefined` class, which is treated the same as ``False`` in + conditions, but raise an exception on any other operation: + + >>> foo = Undefined('foo') + >>> bool(foo) + False + >>> list(foo) + [] + >>> print(foo) + undefined + + However, calling an undefined variable, or trying to access an attribute + of that variable, will raise an exception that includes the name used to + reference that undefined variable. + + >>> foo('bar') + Traceback (most recent call last): + ... + UndefinedError: "foo" not defined + + >>> foo.bar + Traceback (most recent call last): + ... + UndefinedError: "foo" not defined + + :see: `LenientLookup` + """ + __slots__ = ['_name', '_owner'] + + def __init__(self, name, owner=UNDEFINED): + """Initialize the object. + + :param name: the name of the reference + :param owner: the owning object, if the variable is accessed as a member + """ + self._name = name + self._owner = owner + + def __iter__(self): + return iter([]) + + def __nonzero__(self): + return False + + def __repr__(self): + return '<%s %r>' % (type(self).__name__, self._name) + + def __str__(self): + return 'undefined' + + def _die(self, *args, **kwargs): + """Raise an `UndefinedError`.""" + __traceback_hide__ = True + raise UndefinedError(self._name, self._owner) + __call__ = __getattr__ = __getitem__ = _die + + # Hack around some behavior introduced in Python 2.6.2 + # http://genshi.edgewall.org/ticket/324 + __length_hint__ = None + + +class LookupBase(object): + """Abstract base class for variable lookup implementations.""" + + @classmethod + def globals(cls, data): + """Construct the globals dictionary to use as the execution context for + the expression or suite. + """ + return { + '__data__': data, + '_lookup_name': cls.lookup_name, + '_lookup_attr': cls.lookup_attr, + '_lookup_item': cls.lookup_item, + '_star_import_patch': _star_import_patch, + 'UndefinedError': UndefinedError, + } + + @classmethod + def lookup_name(cls, data, name): + __traceback_hide__ = True + val = data.get(name, UNDEFINED) + if val is UNDEFINED: + val = BUILTINS.get(name, val) + if val is UNDEFINED: + val = cls.undefined(name) + return val + + @classmethod + def lookup_attr(cls, obj, key): + __traceback_hide__ = True + try: + val = getattr(obj, key) + except AttributeError: + if hasattr(obj.__class__, key): + raise + else: + try: + val = obj[key] + except (KeyError, TypeError): + val = cls.undefined(key, owner=obj) + return val + + @classmethod + def lookup_item(cls, obj, key): + __traceback_hide__ = True + if len(key) == 1: + key = key[0] + try: + return obj[key] + except (AttributeError, KeyError, IndexError, TypeError), e: + if isinstance(key, basestring): + val = getattr(obj, key, UNDEFINED) + if val is UNDEFINED: + val = cls.undefined(key, owner=obj) + return val + raise + + @classmethod + def undefined(cls, key, owner=UNDEFINED): + """Can be overridden by subclasses to specify behavior when undefined + variables are accessed. + + :param key: the name of the variable + :param owner: the owning object, if the variable is accessed as a member + """ + raise NotImplementedError + + +class LenientLookup(LookupBase): + """Default variable lookup mechanism for expressions. + + When an undefined variable is referenced using this lookup style, the + reference evaluates to an instance of the `Undefined` class: + + >>> expr = Expression('nothing', lookup='lenient') + >>> undef = expr.evaluate({}) + >>> undef + <Undefined 'nothing'> + + The same will happen when a non-existing attribute or item is accessed on + an existing object: + + >>> expr = Expression('something.nil', lookup='lenient') + >>> expr.evaluate({'something': dict()}) + <Undefined 'nil'> + + See the documentation of the `Undefined` class for details on the behavior + of such objects. + + :see: `StrictLookup` + """ + + @classmethod + def undefined(cls, key, owner=UNDEFINED): + """Return an ``Undefined`` object.""" + __traceback_hide__ = True + return Undefined(key, owner=owner) + + +class StrictLookup(LookupBase): + """Strict variable lookup mechanism for expressions. + + Referencing an undefined variable using this lookup style will immediately + raise an ``UndefinedError``: + + >>> expr = Expression('nothing', lookup='strict') + >>> expr.evaluate({}) + Traceback (most recent call last): + ... + UndefinedError: "nothing" not defined + + The same happens when a non-existing attribute or item is accessed on an + existing object: + + >>> expr = Expression('something.nil', lookup='strict') + >>> expr.evaluate({'something': dict()}) + Traceback (most recent call last): + ... + UndefinedError: {} has no member named "nil" + """ + + @classmethod + def undefined(cls, key, owner=UNDEFINED): + """Raise an ``UndefinedError`` immediately.""" + __traceback_hide__ = True + raise UndefinedError(key, owner=owner) + + +def _parse(source, mode='eval'): + source = source.strip() + if mode == 'exec': + lines = [line.expandtabs() for line in source.splitlines()] + if lines: + first = lines[0] + rest = dedent('\n'.join(lines[1:])).rstrip() + if first.rstrip().endswith(':') and not rest[0].isspace(): + rest = '\n'.join([' %s' % line for line in rest.splitlines()]) + source = '\n'.join([first, rest]) + if isinstance(source, unicode): + source = '\xef\xbb\xbf' + source.encode('utf-8') + return parse(source, mode) + + +def _compile(node, source=None, mode='eval', filename=None, lineno=-1, + xform=None): + if isinstance(filename, unicode): + # unicode file names not allowed for code objects + filename = filename.encode('utf-8', 'replace') + elif not filename: + filename = '<string>' + if lineno <= 0: + lineno = 1 + + if xform is None: + xform = { + 'eval': ExpressionASTTransformer + }.get(mode, TemplateASTTransformer) + tree = xform().visit(node) + + if mode == 'eval': + name = '<Expression %r>' % (source or '?') + else: + lines = source.splitlines() + if not lines: + extract = '' + else: + extract = lines[0] + if len(lines) > 1: + extract += ' ...' + name = '<Suite %r>' % (extract) + new_source = ASTCodeGenerator(tree).code + code = compile(new_source, filename, mode) + + try: + # We'd like to just set co_firstlineno, but it's readonly. So we need + # to clone the code object while adjusting the line number + return CodeType(0, code.co_nlocals, code.co_stacksize, + code.co_flags | 0x0040, code.co_code, code.co_consts, + code.co_names, code.co_varnames, filename, name, + lineno, code.co_lnotab, (), ()) + except RuntimeError: + return code + + +def _new(class_, *args, **kwargs): + ret = class_() + for attr, value in zip(ret._fields, args): + if attr in kwargs: + raise ValueError('Field set both in args and kwargs') + setattr(ret, attr, value) + for attr, value in kwargs: + setattr(ret, attr, value) + return ret + + +BUILTINS = __builtin__.__dict__.copy() +BUILTINS.update({'Markup': Markup, 'Undefined': Undefined}) +CONSTANTS = frozenset(['False', 'True', 'None', 'NotImplemented', 'Ellipsis']) + + +class TemplateASTTransformer(ASTTransformer): + """Concrete AST transformer that implements the AST transformations needed + for code embedded in templates. + """ + + def __init__(self): + self.locals = [CONSTANTS] + + def _extract_names(self, node): + names = set() + def _process(node): + if isinstance(node, _ast.Name): + names.add(node.id) + elif isinstance(node, _ast.alias): + names.add(node.asname or node.name) + elif isinstance(node, _ast.Tuple): + for elt in node.elts: + _process(elt) + if hasattr(node, 'args'): + for arg in node.args: + _process(arg) + if hasattr(node, 'vararg'): + names.add(node.vararg) + if hasattr(node, 'kwarg'): + names.add(node.kwarg) + elif hasattr(node, 'names'): + for elt in node.names: + _process(elt) + return names + + def visit_Str(self, node): + if isinstance(node.s, str): + try: # If the string is ASCII, return a `str` object + node.s.decode('ascii') + except ValueError: # Otherwise return a `unicode` object + return _new(_ast.Str, node.s.decode('utf-8')) + return node + + def visit_ClassDef(self, node): + if len(self.locals) > 1: + self.locals[-1].add(node.name) + self.locals.append(set()) + try: + return ASTTransformer.visit_ClassDef(self, node) + finally: + self.locals.pop() + + def visit_Import(self, node): + if len(self.locals) > 1: + self.locals[-1].update(self._extract_names(node)) + return ASTTransformer.visit_Import(self, node) + + def visit_ImportFrom(self, node): + if [a.name for a in node.names] == ['*']: + if has_star_import_bug: + # This is a Python 2.4 bug. Only if we have a broken Python + # version do we need to apply this hack + node = _new(_ast.Expr, _new(_ast.Call, + _new(_ast.Name, '_star_import_patch'), [ + _new(_ast.Name, '__data__'), + _new(_ast.Str, node.module) + ], (), ())) + return node + if len(self.locals) > 1: + self.locals[-1].update(self._extract_names(node)) + return ASTTransformer.visit_ImportFrom(self, node) + + def visit_FunctionDef(self, node): + if len(self.locals) > 1: + self.locals[-1].add(node.name) + + self.locals.append(self._extract_names(node.args)) + try: + return ASTTransformer.visit_FunctionDef(self, node) + finally: + self.locals.pop() + + # GeneratorExp(expr elt, comprehension* generators) + def visit_GeneratorExp(self, node): + gens = [] + for generator in node.generators: + # comprehension = (expr target, expr iter, expr* ifs) + self.locals.append(set()) + gen = _new(_ast.comprehension, self.visit(generator.target), + self.visit(generator.iter), + [self.visit(if_) for if_ in generator.ifs]) + gens.append(gen) + + # use node.__class__ to make it reusable as ListComp + ret = _new(node.__class__, self.visit(node.elt), gens) + #delete inserted locals + del self.locals[-len(node.generators):] + return ret + + # ListComp(expr elt, comprehension* generators) + visit_ListComp = visit_GeneratorExp + + def visit_Lambda(self, node): + self.locals.append(self._extract_names(node.args)) + try: + return ASTTransformer.visit_Lambda(self, node) + finally: + self.locals.pop() + + def visit_Name(self, node): + # If the name refers to a local inside a lambda, list comprehension, or + # generator expression, leave it alone + if isinstance(node.ctx, _ast.Load) and \ + node.id not in flatten(self.locals): + # Otherwise, translate the name ref into a context lookup + name = _new(_ast.Name, '_lookup_name', _ast.Load()) + namearg = _new(_ast.Name, '__data__', _ast.Load()) + strarg = _new(_ast.Str, node.id) + node = _new(_ast.Call, name, [namearg, strarg], []) + elif isinstance(node.ctx, _ast.Store): + if len(self.locals) > 1: + self.locals[-1].add(node.id) + + return node + + +class ExpressionASTTransformer(TemplateASTTransformer): + """Concrete AST transformer that implements the AST transformations needed + for code embedded in templates. + """ + + def visit_Attribute(self, node): + if not isinstance(node.ctx, _ast.Load): + return ASTTransformer.visit_Attribute(self, node) + + func = _new(_ast.Name, '_lookup_attr', _ast.Load()) + args = [self.visit(node.value), _new(_ast.Str, node.attr)] + return _new(_ast.Call, func, args, []) + + def visit_Subscript(self, node): + if not isinstance(node.ctx, _ast.Load) or \ + not isinstance(node.slice, _ast.Index): + return ASTTransformer.visit_Subscript(self, node) + + func = _new(_ast.Name, '_lookup_item', _ast.Load()) + args = [ + self.visit(node.value), + _new(_ast.Tuple, (self.visit(node.slice.value),), _ast.Load()) + ] + return _new(_ast.Call, func, args, []) diff --git a/websdk/genshi/template/interpolation.py b/websdk/genshi/template/interpolation.py new file mode 100644 index 0000000..1e1a385 --- /dev/null +++ b/websdk/genshi/template/interpolation.py @@ -0,0 +1,153 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2007-2009 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://genshi.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://genshi.edgewall.org/log/. + +"""String interpolation routines, i.e. the splitting up a given text into some +parts that are literal strings, and others that are Python expressions. +""" + +from itertools import chain +import os +import re +from tokenize import PseudoToken + +from genshi.core import TEXT +from genshi.template.base import TemplateSyntaxError, EXPR +from genshi.template.eval import Expression + +__all__ = ['interpolate'] +__docformat__ = 'restructuredtext en' + +NAMESTART = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_' +NAMECHARS = NAMESTART + '.0123456789' +PREFIX = '$' + +token_re = re.compile('%s|%s(?s)' % ( + r'[uU]?[rR]?("""|\'\'\')((?<!\\)\\\1|.)*?\1', + PseudoToken +)) + + +def interpolate(text, filepath=None, lineno=-1, offset=0, lookup='strict'): + """Parse the given string and extract expressions. + + This function is a generator that yields `TEXT` events for literal strings, + and `EXPR` events for expressions, depending on the results of parsing the + string. + + >>> for kind, data, pos in interpolate("hey ${foo}bar"): + ... print('%s %r' % (kind, data)) + TEXT 'hey ' + EXPR Expression('foo') + TEXT 'bar' + + :param text: the text to parse + :param filepath: absolute path to the file in which the text was found + (optional) + :param lineno: the line number at which the text was found (optional) + :param offset: the column number at which the text starts in the source + (optional) + :param lookup: the variable lookup mechanism; either "lenient" (the + default), "strict", or a custom lookup class + :return: a list of `TEXT` and `EXPR` events + :raise TemplateSyntaxError: when a syntax error in an expression is + encountered + """ + pos = [filepath, lineno, offset] + + textbuf = [] + textpos = None + for is_expr, chunk in chain(lex(text, pos, filepath), [(True, '')]): + if is_expr: + if textbuf: + yield TEXT, ''.join(textbuf), textpos + del textbuf[:] + textpos = None + if chunk: + try: + expr = Expression(chunk.strip(), pos[0], pos[1], + lookup=lookup) + yield EXPR, expr, tuple(pos) + except SyntaxError, err: + raise TemplateSyntaxError(err, filepath, pos[1], + pos[2] + (err.offset or 0)) + else: + textbuf.append(chunk) + if textpos is None: + textpos = tuple(pos) + + if '\n' in chunk: + lines = chunk.splitlines() + pos[1] += len(lines) - 1 + pos[2] += len(lines[-1]) + else: + pos[2] += len(chunk) + + +def lex(text, textpos, filepath): + offset = pos = 0 + end = len(text) + escaped = False + + while 1: + if escaped: + offset = text.find(PREFIX, offset + 2) + escaped = False + else: + offset = text.find(PREFIX, pos) + if offset < 0 or offset == end - 1: + break + next = text[offset + 1] + + if next == '{': + if offset > pos: + yield False, text[pos:offset] + pos = offset + 2 + level = 1 + while level: + match = token_re.match(text, pos) + if match is None: + raise TemplateSyntaxError('invalid syntax', filepath, + *textpos[1:]) + pos = match.end() + tstart, tend = match.regs[3] + token = text[tstart:tend] + if token == '{': + level += 1 + elif token == '}': + level -= 1 + yield True, text[offset + 2:pos - 1] + + elif next in NAMESTART: + if offset > pos: + yield False, text[pos:offset] + pos = offset + pos += 1 + while pos < end: + char = text[pos] + if char not in NAMECHARS: + break + pos += 1 + yield True, text[offset + 1:pos].strip() + + elif not escaped and next == PREFIX: + if offset > pos: + yield False, text[pos:offset] + escaped = True + pos = offset + 1 + + else: + yield False, text[pos:offset + 1] + pos = offset + 1 + + if pos < end: + yield False, text[pos:] diff --git a/websdk/genshi/template/loader.py b/websdk/genshi/template/loader.py new file mode 100644 index 0000000..0e7cda7 --- /dev/null +++ b/websdk/genshi/template/loader.py @@ -0,0 +1,344 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2006-2010 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://genshi.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://genshi.edgewall.org/log/. + +"""Template loading and caching.""" + +import os +try: + import threading +except ImportError: + import dummy_threading as threading + +from genshi.template.base import TemplateError +from genshi.util import LRUCache + +__all__ = ['TemplateLoader', 'TemplateNotFound', 'directory', 'package', + 'prefixed'] +__docformat__ = 'restructuredtext en' + + +class TemplateNotFound(TemplateError): + """Exception raised when a specific template file could not be found.""" + + def __init__(self, name, search_path): + """Create the exception. + + :param name: the filename of the template + :param search_path: the search path used to lookup the template + """ + TemplateError.__init__(self, 'Template "%s" not found' % name) + self.search_path = search_path + + +class TemplateLoader(object): + """Responsible for loading templates from files on the specified search + path. + + >>> import tempfile + >>> fd, path = tempfile.mkstemp(suffix='.html', prefix='template') + >>> os.write(fd, '<p>$var</p>') + 11 + >>> os.close(fd) + + The template loader accepts a list of directory paths that are then used + when searching for template files, in the given order: + + >>> loader = TemplateLoader([os.path.dirname(path)]) + + The `load()` method first checks the template cache whether the requested + template has already been loaded. If not, it attempts to locate the + template file, and returns the corresponding `Template` object: + + >>> from genshi.template import MarkupTemplate + >>> template = loader.load(os.path.basename(path)) + >>> isinstance(template, MarkupTemplate) + True + + Template instances are cached: requesting a template with the same name + results in the same instance being returned: + + >>> loader.load(os.path.basename(path)) is template + True + + The `auto_reload` option can be used to control whether a template should + be automatically reloaded when the file it was loaded from has been + changed. Disable this automatic reloading to improve performance. + + >>> os.remove(path) + """ + def __init__(self, search_path=None, auto_reload=False, + default_encoding=None, max_cache_size=25, default_class=None, + variable_lookup='strict', allow_exec=True, callback=None): + """Create the template laoder. + + :param search_path: a list of absolute path names that should be + searched for template files, or a string containing + a single absolute path; alternatively, any item on + the list may be a ''load function'' that is passed + a filename and returns a file-like object and some + metadata + :param auto_reload: whether to check the last modification time of + template files, and reload them if they have changed + :param default_encoding: the default encoding to assume when loading + templates; defaults to UTF-8 + :param max_cache_size: the maximum number of templates to keep in the + cache + :param default_class: the default `Template` subclass to use when + instantiating templates + :param variable_lookup: the variable lookup mechanism; either "strict" + (the default), "lenient", or a custom lookup + class + :param allow_exec: whether to allow Python code blocks in templates + :param callback: (optional) a callback function that is invoked after a + template was initialized by this loader; the function + is passed the template object as only argument. This + callback can be used for example to add any desired + filters to the template + :see: `LenientLookup`, `StrictLookup` + + :note: Changed in 0.5: Added the `allow_exec` argument + """ + from genshi.template.markup import MarkupTemplate + + self.search_path = search_path + if self.search_path is None: + self.search_path = [] + elif not isinstance(self.search_path, (list, tuple)): + self.search_path = [self.search_path] + + self.auto_reload = auto_reload + """Whether templates should be reloaded when the underlying file is + changed""" + + self.default_encoding = default_encoding + self.default_class = default_class or MarkupTemplate + self.variable_lookup = variable_lookup + self.allow_exec = allow_exec + if callback is not None and not hasattr(callback, '__call__'): + raise TypeError('The "callback" parameter needs to be callable') + self.callback = callback + self._cache = LRUCache(max_cache_size) + self._uptodate = {} + self._lock = threading.RLock() + + def __getstate__(self): + state = self.__dict__.copy() + state['_lock'] = None + return state + + def __setstate__(self, state): + self.__dict__ = state + self._lock = threading.RLock() + + def load(self, filename, relative_to=None, cls=None, encoding=None): + """Load the template with the given name. + + If the `filename` parameter is relative, this method searches the + search path trying to locate a template matching the given name. If the + file name is an absolute path, the search path is ignored. + + If the requested template is not found, a `TemplateNotFound` exception + is raised. Otherwise, a `Template` object is returned that represents + the parsed template. + + Template instances are cached to avoid having to parse the same + template file more than once. Thus, subsequent calls of this method + with the same template file name will return the same `Template` + object (unless the ``auto_reload`` option is enabled and the file was + changed since the last parse.) + + If the `relative_to` parameter is provided, the `filename` is + interpreted as being relative to that path. + + :param filename: the relative path of the template file to load + :param relative_to: the filename of the template from which the new + template is being loaded, or ``None`` if the + template is being loaded directly + :param cls: the class of the template object to instantiate + :param encoding: the encoding of the template to load; defaults to the + ``default_encoding`` of the loader instance + :return: the loaded `Template` instance + :raises TemplateNotFound: if a template with the given name could not + be found + """ + if cls is None: + cls = self.default_class + search_path = self.search_path + + # Make the filename relative to the template file its being loaded + # from, but only if that file is specified as a relative path, or no + # search path has been set up + if relative_to and (not search_path or not os.path.isabs(relative_to)): + filename = os.path.join(os.path.dirname(relative_to), filename) + + filename = os.path.normpath(filename) + cachekey = filename + + self._lock.acquire() + try: + # First check the cache to avoid reparsing the same file + try: + tmpl = self._cache[cachekey] + if not self.auto_reload: + return tmpl + uptodate = self._uptodate[cachekey] + if uptodate is not None and uptodate(): + return tmpl + except (KeyError, OSError): + pass + + isabs = False + + if os.path.isabs(filename): + # Bypass the search path if the requested filename is absolute + search_path = [os.path.dirname(filename)] + isabs = True + + elif relative_to and os.path.isabs(relative_to): + # Make sure that the directory containing the including + # template is on the search path + dirname = os.path.dirname(relative_to) + if dirname not in search_path: + search_path = list(search_path) + [dirname] + isabs = True + + elif not search_path: + # Uh oh, don't know where to look for the template + raise TemplateError('Search path for templates not configured') + + for loadfunc in search_path: + if isinstance(loadfunc, basestring): + loadfunc = directory(loadfunc) + try: + filepath, filename, fileobj, uptodate = loadfunc(filename) + except IOError: + continue + else: + try: + if isabs: + # If the filename of either the included or the + # including template is absolute, make sure the + # included template gets an absolute path, too, + # so that nested includes work properly without a + # search path + filename = filepath + tmpl = self._instantiate(cls, fileobj, filepath, + filename, encoding=encoding) + if self.callback: + self.callback(tmpl) + self._cache[cachekey] = tmpl + self._uptodate[cachekey] = uptodate + finally: + if hasattr(fileobj, 'close'): + fileobj.close() + return tmpl + + raise TemplateNotFound(filename, search_path) + + finally: + self._lock.release() + + def _instantiate(self, cls, fileobj, filepath, filename, encoding=None): + """Instantiate and return the `Template` object based on the given + class and parameters. + + This function is intended for subclasses to override if they need to + implement special template instantiation logic. Code that just uses + the `TemplateLoader` should use the `load` method instead. + + :param cls: the class of the template object to instantiate + :param fileobj: a readable file-like object containing the template + source + :param filepath: the absolute path to the template file + :param filename: the path to the template file relative to the search + path + :param encoding: the encoding of the template to load; defaults to the + ``default_encoding`` of the loader instance + :return: the loaded `Template` instance + :rtype: `Template` + """ + if encoding is None: + encoding = self.default_encoding + return cls(fileobj, filepath=filepath, filename=filename, loader=self, + encoding=encoding, lookup=self.variable_lookup, + allow_exec=self.allow_exec) + + @staticmethod + def directory(path): + """Loader factory for loading templates from a local directory. + + :param path: the path to the local directory containing the templates + :return: the loader function to load templates from the given directory + :rtype: ``function`` + """ + def _load_from_directory(filename): + filepath = os.path.join(path, filename) + fileobj = open(filepath, 'U') + mtime = os.path.getmtime(filepath) + def _uptodate(): + return mtime == os.path.getmtime(filepath) + return filepath, filename, fileobj, _uptodate + return _load_from_directory + + @staticmethod + def package(name, path): + """Loader factory for loading templates from egg package data. + + :param name: the name of the package containing the resources + :param path: the path inside the package data + :return: the loader function to load templates from the given package + :rtype: ``function`` + """ + from pkg_resources import resource_stream + def _load_from_package(filename): + filepath = os.path.join(path, filename) + return filepath, filename, resource_stream(name, filepath), None + return _load_from_package + + @staticmethod + def prefixed(**delegates): + """Factory for a load function that delegates to other loaders + depending on the prefix of the requested template path. + + The prefix is stripped from the filename when passing on the load + request to the delegate. + + >>> load = prefixed( + ... app1 = lambda filename: ('app1', filename, None, None), + ... app2 = lambda filename: ('app2', filename, None, None) + ... ) + >>> print(load('app1/foo.html')) + ('app1', 'app1/foo.html', None, None) + >>> print(load('app2/bar.html')) + ('app2', 'app2/bar.html', None, None) + + :param delegates: mapping of path prefixes to loader functions + :return: the loader function + :rtype: ``function`` + """ + def _dispatch_by_prefix(filename): + for prefix, delegate in delegates.items(): + if filename.startswith(prefix): + if isinstance(delegate, basestring): + delegate = directory(delegate) + filepath, _, fileobj, uptodate = delegate( + filename[len(prefix):].lstrip('/\\') + ) + return filepath, filename, fileobj, uptodate + raise TemplateNotFound(filename, list(delegates.keys())) + return _dispatch_by_prefix + + +directory = TemplateLoader.directory +package = TemplateLoader.package +prefixed = TemplateLoader.prefixed diff --git a/websdk/genshi/template/markup.py b/websdk/genshi/template/markup.py new file mode 100644 index 0000000..0e31632 --- /dev/null +++ b/websdk/genshi/template/markup.py @@ -0,0 +1,397 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2006-2010 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://genshi.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://genshi.edgewall.org/log/. + +"""Markup templating engine.""" + +from itertools import chain + +from genshi.core import Attrs, Markup, Namespace, Stream, StreamEventKind +from genshi.core import START, END, START_NS, END_NS, TEXT, PI, COMMENT +from genshi.input import XMLParser +from genshi.template.base import BadDirectiveError, Template, \ + TemplateSyntaxError, _apply_directives, \ + EXEC, INCLUDE, SUB +from genshi.template.eval import Suite +from genshi.template.interpolation import interpolate +from genshi.template.directives import * +from genshi.template.text import NewTextTemplate + +__all__ = ['MarkupTemplate'] +__docformat__ = 'restructuredtext en' + + +class MarkupTemplate(Template): + """Implementation of the template language for XML-based templates. + + >>> tmpl = MarkupTemplate('''<ul xmlns:py="http://genshi.edgewall.org/"> + ... <li py:for="item in items">${item}</li> + ... </ul>''') + >>> print(tmpl.generate(items=[1, 2, 3])) + <ul> + <li>1</li><li>2</li><li>3</li> + </ul> + """ + + DIRECTIVE_NAMESPACE = 'http://genshi.edgewall.org/' + XINCLUDE_NAMESPACE = 'http://www.w3.org/2001/XInclude' + + directives = [('def', DefDirective), + ('match', MatchDirective), + ('when', WhenDirective), + ('otherwise', OtherwiseDirective), + ('for', ForDirective), + ('if', IfDirective), + ('choose', ChooseDirective), + ('with', WithDirective), + ('replace', ReplaceDirective), + ('content', ContentDirective), + ('attrs', AttrsDirective), + ('strip', StripDirective)] + serializer = 'xml' + _number_conv = Markup + + def __init__(self, source, filepath=None, filename=None, loader=None, + encoding=None, lookup='strict', allow_exec=True): + Template.__init__(self, source, filepath=filepath, filename=filename, + loader=loader, encoding=encoding, lookup=lookup, + allow_exec=allow_exec) + self.add_directives(self.DIRECTIVE_NAMESPACE, self) + + def _init_filters(self): + Template._init_filters(self) + # Make sure the include filter comes after the match filter + self.filters.remove(self._include) + self.filters += [self._match, self._include] + + def _parse(self, source, encoding): + if not isinstance(source, Stream): + source = XMLParser(source, filename=self.filename, + encoding=encoding) + stream = [] + + for kind, data, pos in source: + + if kind is TEXT: + for kind, data, pos in interpolate(data, self.filepath, pos[1], + pos[2], lookup=self.lookup): + stream.append((kind, data, pos)) + + elif kind is PI and data[0] == 'python': + if not self.allow_exec: + raise TemplateSyntaxError('Python code blocks not allowed', + self.filepath, *pos[1:]) + try: + suite = Suite(data[1], self.filepath, pos[1], + lookup=self.lookup) + except SyntaxError, err: + raise TemplateSyntaxError(err, self.filepath, + pos[1] + (err.lineno or 1) - 1, + pos[2] + (err.offset or 0)) + stream.append((EXEC, suite, pos)) + + elif kind is COMMENT: + if not data.lstrip().startswith('!'): + stream.append((kind, data, pos)) + + else: + stream.append((kind, data, pos)) + + return stream + + def _extract_directives(self, stream, namespace, factory): + depth = 0 + dirmap = {} # temporary mapping of directives to elements + new_stream = [] + ns_prefix = {} # namespace prefixes in use + + for kind, data, pos in stream: + + if kind is START: + tag, attrs = data + directives = [] + strip = False + + if tag.namespace == namespace: + cls = factory.get_directive(tag.localname) + if cls is None: + raise BadDirectiveError(tag.localname, + self.filepath, pos[1]) + args = dict([(name.localname, value) for name, value + in attrs if not name.namespace]) + directives.append((factory.get_directive_index(cls), cls, + args, ns_prefix.copy(), pos)) + strip = True + + new_attrs = [] + for name, value in attrs: + if name.namespace == namespace: + cls = factory.get_directive(name.localname) + if cls is None: + raise BadDirectiveError(name.localname, + self.filepath, pos[1]) + if type(value) is list and len(value) == 1: + value = value[0][1] + directives.append((factory.get_directive_index(cls), + cls, value, ns_prefix.copy(), pos)) + else: + new_attrs.append((name, value)) + new_attrs = Attrs(new_attrs) + + if directives: + directives.sort() + dirmap[(depth, tag)] = (directives, len(new_stream), + strip) + + new_stream.append((kind, (tag, new_attrs), pos)) + depth += 1 + + elif kind is END: + depth -= 1 + new_stream.append((kind, data, pos)) + + # If there have have directive attributes with the + # corresponding start tag, move the events inbetween into + # a "subprogram" + if (depth, data) in dirmap: + directives, offset, strip = dirmap.pop((depth, data)) + substream = new_stream[offset:] + if strip: + substream = substream[1:-1] + new_stream[offset:] = [ + (SUB, (directives, substream), pos) + ] + + elif kind is SUB: + directives, substream = data + substream = self._extract_directives(substream, namespace, + factory) + + if len(substream) == 1 and substream[0][0] is SUB: + added_directives, substream = substream[0][1] + directives += added_directives + + new_stream.append((kind, (directives, substream), pos)) + + elif kind is START_NS: + # Strip out the namespace declaration for template + # directives + prefix, uri = data + ns_prefix[prefix] = uri + if uri != namespace: + new_stream.append((kind, data, pos)) + + elif kind is END_NS: + uri = ns_prefix.pop(data, None) + if uri and uri != namespace: + new_stream.append((kind, data, pos)) + + else: + new_stream.append((kind, data, pos)) + + return new_stream + + def _extract_includes(self, stream): + streams = [[]] # stacked lists of events of the "compiled" template + prefixes = {} + fallbacks = [] + includes = [] + xinclude_ns = Namespace(self.XINCLUDE_NAMESPACE) + + for kind, data, pos in stream: + stream = streams[-1] + + if kind is START: + # Record any directive attributes in start tags + tag, attrs = data + if tag in xinclude_ns: + if tag.localname == 'include': + include_href = attrs.get('href') + if not include_href: + raise TemplateSyntaxError('Include misses required ' + 'attribute "href"', + self.filepath, *pos[1:]) + includes.append((include_href, attrs.get('parse'))) + streams.append([]) + elif tag.localname == 'fallback': + streams.append([]) + fallbacks.append(streams[-1]) + else: + stream.append((kind, (tag, attrs), pos)) + + elif kind is END: + if fallbacks and data == xinclude_ns['fallback']: + assert streams.pop() is fallbacks[-1] + elif data == xinclude_ns['include']: + fallback = None + if len(fallbacks) == len(includes): + fallback = fallbacks.pop() + streams.pop() # discard anything between the include tags + # and the fallback element + stream = streams[-1] + href, parse = includes.pop() + try: + cls = { + 'xml': MarkupTemplate, + 'text': NewTextTemplate + }.get(parse) or self.__class__ + except KeyError: + raise TemplateSyntaxError('Invalid value for "parse" ' + 'attribute of include', + self.filepath, *pos[1:]) + stream.append((INCLUDE, (href, cls, fallback), pos)) + else: + stream.append((kind, data, pos)) + + elif kind is START_NS and data[1] == xinclude_ns: + # Strip out the XInclude namespace + prefixes[data[0]] = data[1] + + elif kind is END_NS and data in prefixes: + prefixes.pop(data) + + else: + stream.append((kind, data, pos)) + + assert len(streams) == 1 + return streams[0] + + def _interpolate_attrs(self, stream): + for kind, data, pos in stream: + + if kind is START: + # Record any directive attributes in start tags + tag, attrs = data + new_attrs = [] + for name, value in attrs: + if value: + value = list(interpolate(value, self.filepath, pos[1], + pos[2], lookup=self.lookup)) + if len(value) == 1 and value[0][0] is TEXT: + value = value[0][1] + new_attrs.append((name, value)) + data = tag, Attrs(new_attrs) + + yield kind, data, pos + + def _prepare(self, stream): + return Template._prepare(self, + self._extract_includes(self._interpolate_attrs(stream)) + ) + + def add_directives(self, namespace, factory): + """Register a custom `DirectiveFactory` for a given namespace. + + :param namespace: the namespace URI + :type namespace: `basestring` + :param factory: the directive factory to register + :type factory: `DirectiveFactory` + :since: version 0.6 + """ + assert not self._prepared, 'Too late for adding directives, ' \ + 'template already prepared' + self._stream = self._extract_directives(self._stream, namespace, + factory) + + def _match(self, stream, ctxt, start=0, end=None, **vars): + """Internal stream filter that applies any defined match templates + to the stream. + """ + match_templates = ctxt._match_templates + + tail = [] + def _strip(stream, append=tail.append): + depth = 1 + next = stream.next + while 1: + event = next() + if event[0] is START: + depth += 1 + elif event[0] is END: + depth -= 1 + if depth > 0: + yield event + else: + append(event) + break + + for event in stream: + + # We (currently) only care about start and end events for matching + # We might care about namespace events in the future, though + if not match_templates or (event[0] is not START and + event[0] is not END): + yield event + continue + + for idx, (test, path, template, hints, namespaces, directives) \ + in enumerate(match_templates): + if idx < start or end is not None and idx >= end: + continue + + if test(event, namespaces, ctxt) is True: + if 'match_once' in hints: + del match_templates[idx] + idx -= 1 + + # Let the remaining match templates know about the event so + # they get a chance to update their internal state + for test in [mt[0] for mt in match_templates[idx + 1:]]: + test(event, namespaces, ctxt, updateonly=True) + + # Consume and store all events until an end event + # corresponding to this start event is encountered + pre_end = idx + 1 + if 'match_once' not in hints and 'not_recursive' in hints: + pre_end -= 1 + inner = _strip(stream) + if pre_end > 0: + inner = self._match(inner, ctxt, start=start, + end=pre_end, **vars) + content = self._include(chain([event], inner, tail), ctxt) + if 'not_buffered' not in hints: + content = list(content) + content = Stream(content) + + # Make the select() function available in the body of the + # match template + selected = [False] + def select(path): + selected[0] = True + return content.select(path, namespaces, ctxt) + vars = dict(select=select) + + # Recursively process the output + template = _apply_directives(template, directives, ctxt, + vars) + for event in self._match(self._flatten(template, ctxt, + **vars), + ctxt, start=idx + 1, **vars): + yield event + + # If the match template did not actually call select to + # consume the matched stream, the original events need to + # be consumed here or they'll get appended to the output + if not selected[0]: + for event in content: + pass + + # Let the remaining match templates know about the last + # event in the matched content, so they can update their + # internal state accordingly + for test in [mt[0] for mt in match_templates[idx + 1:]]: + test(tail[0], namespaces, ctxt, updateonly=True) + + break + + else: # no matches + yield event diff --git a/websdk/genshi/template/plugin.py b/websdk/genshi/template/plugin.py new file mode 100644 index 0000000..70d56af --- /dev/null +++ b/websdk/genshi/template/plugin.py @@ -0,0 +1,176 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2006-2009 Edgewall Software +# Copyright (C) 2006 Matthew Good +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://genshi.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://genshi.edgewall.org/log/. + +"""Basic support for the template engine plugin API used by TurboGears and +CherryPy/Buffet. +""" + +from genshi.input import ET, HTML, XML +from genshi.output import DocType +from genshi.template.base import Template +from genshi.template.loader import TemplateLoader +from genshi.template.markup import MarkupTemplate +from genshi.template.text import TextTemplate, NewTextTemplate + +__all__ = ['ConfigurationError', 'AbstractTemplateEnginePlugin', + 'MarkupTemplateEnginePlugin', 'TextTemplateEnginePlugin'] +__docformat__ = 'restructuredtext en' + + +class ConfigurationError(ValueError): + """Exception raised when invalid plugin options are encountered.""" + + +class AbstractTemplateEnginePlugin(object): + """Implementation of the plugin API.""" + + template_class = None + extension = None + + def __init__(self, extra_vars_func=None, options=None): + self.get_extra_vars = extra_vars_func + if options is None: + options = {} + self.options = options + + self.default_encoding = options.get('genshi.default_encoding', 'utf-8') + auto_reload = options.get('genshi.auto_reload', '1') + if isinstance(auto_reload, basestring): + auto_reload = auto_reload.lower() in ('1', 'on', 'yes', 'true') + search_path = [p for p in + options.get('genshi.search_path', '').split(':') if p] + self.use_package_naming = not search_path + try: + max_cache_size = int(options.get('genshi.max_cache_size', 25)) + except ValueError: + raise ConfigurationError('Invalid value for max_cache_size: "%s"' % + options.get('genshi.max_cache_size')) + + loader_callback = options.get('genshi.loader_callback', None) + if loader_callback and not hasattr(loader_callback, '__call__'): + raise ConfigurationError('loader callback must be a function') + + lookup_errors = options.get('genshi.lookup_errors', 'strict') + if lookup_errors not in ('lenient', 'strict'): + raise ConfigurationError('Unknown lookup errors mode "%s"' % + lookup_errors) + + try: + allow_exec = bool(options.get('genshi.allow_exec', True)) + except ValueError: + raise ConfigurationError('Invalid value for allow_exec "%s"' % + options.get('genshi.allow_exec')) + + self.loader = TemplateLoader([p for p in search_path if p], + auto_reload=auto_reload, + max_cache_size=max_cache_size, + default_class=self.template_class, + variable_lookup=lookup_errors, + allow_exec=allow_exec, + callback=loader_callback) + + def load_template(self, templatename, template_string=None): + """Find a template specified in python 'dot' notation, or load one from + a string. + """ + if template_string is not None: + return self.template_class(template_string) + + if self.use_package_naming: + divider = templatename.rfind('.') + if divider >= 0: + from pkg_resources import resource_filename + package = templatename[:divider] + basename = templatename[divider + 1:] + self.extension + templatename = resource_filename(package, basename) + + return self.loader.load(templatename) + + def _get_render_options(self, format=None, fragment=False): + if format is None: + format = self.default_format + kwargs = {'method': format} + if self.default_encoding: + kwargs['encoding'] = self.default_encoding + return kwargs + + def render(self, info, format=None, fragment=False, template=None): + """Render the template to a string using the provided info.""" + kwargs = self._get_render_options(format=format, fragment=fragment) + return self.transform(info, template).render(**kwargs) + + def transform(self, info, template): + """Render the output to an event stream.""" + if not isinstance(template, Template): + template = self.load_template(template) + return template.generate(**info) + + +class MarkupTemplateEnginePlugin(AbstractTemplateEnginePlugin): + """Implementation of the plugin API for markup templates.""" + + template_class = MarkupTemplate + extension = '.html' + + def __init__(self, extra_vars_func=None, options=None): + AbstractTemplateEnginePlugin.__init__(self, extra_vars_func, options) + + default_doctype = self.options.get('genshi.default_doctype') + if default_doctype: + doctype = DocType.get(default_doctype) + if doctype is None: + raise ConfigurationError('Unknown doctype %r' % default_doctype) + self.default_doctype = doctype + else: + self.default_doctype = None + + format = self.options.get('genshi.default_format', 'html').lower() + if format not in ('html', 'xhtml', 'xml', 'text'): + raise ConfigurationError('Unknown output format %r' % format) + self.default_format = format + + def _get_render_options(self, format=None, fragment=False): + kwargs = super(MarkupTemplateEnginePlugin, + self)._get_render_options(format, fragment) + if self.default_doctype and not fragment: + kwargs['doctype'] = self.default_doctype + return kwargs + + def transform(self, info, template): + """Render the output to an event stream.""" + data = {'ET': ET, 'HTML': HTML, 'XML': XML} + if self.get_extra_vars: + data.update(self.get_extra_vars()) + data.update(info) + return super(MarkupTemplateEnginePlugin, self).transform(data, template) + + +class TextTemplateEnginePlugin(AbstractTemplateEnginePlugin): + """Implementation of the plugin API for text templates.""" + + template_class = TextTemplate + extension = '.txt' + default_format = 'text' + + def __init__(self, extra_vars_func=None, options=None): + if options is None: + options = {} + + new_syntax = options.get('genshi.new_text_syntax') + if isinstance(new_syntax, basestring): + new_syntax = new_syntax.lower() in ('1', 'on', 'yes', 'true') + if new_syntax: + self.template_class = NewTextTemplate + + AbstractTemplateEnginePlugin.__init__(self, extra_vars_func, options) diff --git a/websdk/genshi/template/text.py b/websdk/genshi/template/text.py new file mode 100644 index 0000000..746226c --- /dev/null +++ b/websdk/genshi/template/text.py @@ -0,0 +1,333 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2006-2009 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://genshi.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://genshi.edgewall.org/log/. + +"""Plain text templating engine. + +This module implements two template language syntaxes, at least for a certain +transitional period. `OldTextTemplate` (aliased to just `TextTemplate`) defines +a syntax that was inspired by Cheetah/Velocity. `NewTextTemplate` on the other +hand is inspired by the syntax of the Django template language, which has more +explicit delimiting of directives, and is more flexible with regards to +white space and line breaks. + +In a future release, `OldTextTemplate` will be phased out in favor of +`NewTextTemplate`, as the names imply. Therefore the new syntax is strongly +recommended for new projects, and existing projects may want to migrate to the +new syntax to remain compatible with future Genshi releases. +""" + +import re + +from genshi.core import TEXT +from genshi.template.base import BadDirectiveError, Template, \ + TemplateSyntaxError, EXEC, INCLUDE, SUB +from genshi.template.eval import Suite +from genshi.template.directives import * +from genshi.template.directives import Directive +from genshi.template.interpolation import interpolate + +__all__ = ['NewTextTemplate', 'OldTextTemplate', 'TextTemplate'] +__docformat__ = 'restructuredtext en' + + +class NewTextTemplate(Template): + r"""Implementation of a simple text-based template engine. This class will + replace `OldTextTemplate` in a future release. + + It uses a more explicit delimiting style for directives: instead of the old + style which required putting directives on separate lines that were prefixed + with a ``#`` sign, directives and commenbtsr are enclosed in delimiter pairs + (by default ``{% ... %}`` and ``{# ... #}``, respectively). + + Variable substitution uses the same interpolation syntax as for markup + languages: simple references are prefixed with a dollar sign, more complex + expression enclosed in curly braces. + + >>> tmpl = NewTextTemplate('''Dear $name, + ... + ... {# This is a comment #} + ... We have the following items for you: + ... {% for item in items %} + ... * ${'Item %d' % item} + ... {% end %} + ... ''') + >>> print(tmpl.generate(name='Joe', items=[1, 2, 3]).render(encoding=None)) + Dear Joe, + <BLANKLINE> + <BLANKLINE> + We have the following items for you: + <BLANKLINE> + * Item 1 + <BLANKLINE> + * Item 2 + <BLANKLINE> + * Item 3 + <BLANKLINE> + <BLANKLINE> + + By default, no spaces or line breaks are removed. If a line break should + not be included in the output, prefix it with a backslash: + + >>> tmpl = NewTextTemplate('''Dear $name, + ... + ... {# This is a comment #}\ + ... We have the following items for you: + ... {% for item in items %}\ + ... * $item + ... {% end %}\ + ... ''') + >>> print(tmpl.generate(name='Joe', items=[1, 2, 3]).render(encoding=None)) + Dear Joe, + <BLANKLINE> + We have the following items for you: + * 1 + * 2 + * 3 + <BLANKLINE> + + Backslashes are also used to escape the start delimiter of directives and + comments: + + >>> tmpl = NewTextTemplate('''Dear $name, + ... + ... \{# This is a comment #} + ... We have the following items for you: + ... {% for item in items %}\ + ... * $item + ... {% end %}\ + ... ''') + >>> print(tmpl.generate(name='Joe', items=[1, 2, 3]).render(encoding=None)) + Dear Joe, + <BLANKLINE> + {# This is a comment #} + We have the following items for you: + * 1 + * 2 + * 3 + <BLANKLINE> + + :since: version 0.5 + """ + directives = [('def', DefDirective), + ('when', WhenDirective), + ('otherwise', OtherwiseDirective), + ('for', ForDirective), + ('if', IfDirective), + ('choose', ChooseDirective), + ('with', WithDirective)] + serializer = 'text' + + _DIRECTIVE_RE = r'((?<!\\)%s\s*(\w+)\s*(.*?)\s*%s|(?<!\\)%s.*?%s)' + _ESCAPE_RE = r'\\\n|\\(\\)|\\(%s)|\\(%s)' + + def __init__(self, source, filepath=None, filename=None, loader=None, + encoding=None, lookup='strict', allow_exec=False, + delims=('{%', '%}', '{#', '#}')): + self.delimiters = delims + Template.__init__(self, source, filepath=filepath, filename=filename, + loader=loader, encoding=encoding, lookup=lookup) + + def _get_delims(self): + return self._delims + def _set_delims(self, delims): + if len(delims) != 4: + raise ValueError('delimiers tuple must have exactly four elements') + self._delims = delims + self._directive_re = re.compile(self._DIRECTIVE_RE % tuple( + [re.escape(d) for d in delims] + ), re.DOTALL) + self._escape_re = re.compile(self._ESCAPE_RE % tuple( + [re.escape(d) for d in delims[::2]] + )) + delimiters = property(_get_delims, _set_delims, """\ + The delimiters for directives and comments. This should be a four item tuple + of the form ``(directive_start, directive_end, comment_start, + comment_end)``, where each item is a string. + """) + + def _parse(self, source, encoding): + """Parse the template from text input.""" + stream = [] # list of events of the "compiled" template + dirmap = {} # temporary mapping of directives to elements + depth = 0 + + source = source.read() + if isinstance(source, str): + source = source.decode(encoding or 'utf-8', 'replace') + offset = 0 + lineno = 1 + + _escape_sub = self._escape_re.sub + def _escape_repl(mo): + groups = [g for g in mo.groups() if g] + if not groups: + return '' + return groups[0] + + for idx, mo in enumerate(self._directive_re.finditer(source)): + start, end = mo.span(1) + if start > offset: + text = _escape_sub(_escape_repl, source[offset:start]) + for kind, data, pos in interpolate(text, self.filepath, lineno, + lookup=self.lookup): + stream.append((kind, data, pos)) + lineno += len(text.splitlines()) + + lineno += len(source[start:end].splitlines()) + command, value = mo.group(2, 3) + + if command == 'include': + pos = (self.filename, lineno, 0) + value = list(interpolate(value, self.filepath, lineno, 0, + lookup=self.lookup)) + if len(value) == 1 and value[0][0] is TEXT: + value = value[0][1] + stream.append((INCLUDE, (value, None, []), pos)) + + elif command == 'python': + if not self.allow_exec: + raise TemplateSyntaxError('Python code blocks not allowed', + self.filepath, lineno) + try: + suite = Suite(value, self.filepath, lineno, + lookup=self.lookup) + except SyntaxError, err: + raise TemplateSyntaxError(err, self.filepath, + lineno + (err.lineno or 1) - 1) + pos = (self.filename, lineno, 0) + stream.append((EXEC, suite, pos)) + + elif command == 'end': + depth -= 1 + if depth in dirmap: + directive, start_offset = dirmap.pop(depth) + substream = stream[start_offset:] + stream[start_offset:] = [(SUB, ([directive], substream), + (self.filepath, lineno, 0))] + + elif command: + cls = self.get_directive(command) + if cls is None: + raise BadDirectiveError(command) + directive = 0, cls, value, None, (self.filepath, lineno, 0) + dirmap[depth] = (directive, len(stream)) + depth += 1 + + offset = end + + if offset < len(source): + text = _escape_sub(_escape_repl, source[offset:]) + for kind, data, pos in interpolate(text, self.filepath, lineno, + lookup=self.lookup): + stream.append((kind, data, pos)) + + return stream + + +class OldTextTemplate(Template): + """Legacy implementation of the old syntax text-based templates. This class + is provided in a transition phase for backwards compatibility. New code + should use the `NewTextTemplate` class and the improved syntax it provides. + + >>> tmpl = OldTextTemplate('''Dear $name, + ... + ... We have the following items for you: + ... #for item in items + ... * $item + ... #end + ... + ... All the best, + ... Foobar''') + >>> print(tmpl.generate(name='Joe', items=[1, 2, 3]).render(encoding=None)) + Dear Joe, + <BLANKLINE> + We have the following items for you: + * 1 + * 2 + * 3 + <BLANKLINE> + All the best, + Foobar + """ + directives = [('def', DefDirective), + ('when', WhenDirective), + ('otherwise', OtherwiseDirective), + ('for', ForDirective), + ('if', IfDirective), + ('choose', ChooseDirective), + ('with', WithDirective)] + serializer = 'text' + + _DIRECTIVE_RE = re.compile(r'(?:^[ \t]*(?<!\\)#(end).*\n?)|' + r'(?:^[ \t]*(?<!\\)#((?:\w+|#).*)\n?)', + re.MULTILINE) + + def _parse(self, source, encoding): + """Parse the template from text input.""" + stream = [] # list of events of the "compiled" template + dirmap = {} # temporary mapping of directives to elements + depth = 0 + + source = source.read() + if isinstance(source, str): + source = source.decode(encoding or 'utf-8', 'replace') + offset = 0 + lineno = 1 + + for idx, mo in enumerate(self._DIRECTIVE_RE.finditer(source)): + start, end = mo.span() + if start > offset: + text = source[offset:start] + for kind, data, pos in interpolate(text, self.filepath, lineno, + lookup=self.lookup): + stream.append((kind, data, pos)) + lineno += len(text.splitlines()) + + text = source[start:end].lstrip()[1:] + lineno += len(text.splitlines()) + directive = text.split(None, 1) + if len(directive) > 1: + command, value = directive + else: + command, value = directive[0], None + + if command == 'end': + depth -= 1 + if depth in dirmap: + directive, start_offset = dirmap.pop(depth) + substream = stream[start_offset:] + stream[start_offset:] = [(SUB, ([directive], substream), + (self.filepath, lineno, 0))] + elif command == 'include': + pos = (self.filename, lineno, 0) + stream.append((INCLUDE, (value.strip(), None, []), pos)) + elif command != '#': + cls = self.get_directive(command) + if cls is None: + raise BadDirectiveError(command) + directive = 0, cls, value, None, (self.filepath, lineno, 0) + dirmap[depth] = (directive, len(stream)) + depth += 1 + + offset = end + + if offset < len(source): + text = source[offset:].replace('\\#', '#') + for kind, data, pos in interpolate(text, self.filepath, lineno, + lookup=self.lookup): + stream.append((kind, data, pos)) + + return stream + + +TextTemplate = OldTextTemplate diff --git a/websdk/genshi/util.py b/websdk/genshi/util.py new file mode 100644 index 0000000..b964a01 --- /dev/null +++ b/websdk/genshi/util.py @@ -0,0 +1,274 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2006-2009 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://genshi.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://genshi.edgewall.org/log/. + +"""Various utility classes and functions.""" + +import htmlentitydefs as entities +import re + +__docformat__ = 'restructuredtext en' + + +class LRUCache(dict): + """A dictionary-like object that stores only a certain number of items, and + discards its least recently used item when full. + + >>> cache = LRUCache(3) + >>> cache['A'] = 0 + >>> cache['B'] = 1 + >>> cache['C'] = 2 + >>> len(cache) + 3 + + >>> cache['A'] + 0 + + Adding new items to the cache does not increase its size. Instead, the least + recently used item is dropped: + + >>> cache['D'] = 3 + >>> len(cache) + 3 + >>> 'B' in cache + False + + Iterating over the cache returns the keys, starting with the most recently + used: + + >>> for key in cache: + ... print(key) + D + A + C + + This code is based on the LRUCache class from ``myghtyutils.util``, written + by Mike Bayer and released under the MIT license. See: + + http://svn.myghty.org/myghtyutils/trunk/lib/myghtyutils/util.py + """ + + class _Item(object): + def __init__(self, key, value): + self.prv = self.nxt = None + self.key = key + self.value = value + def __repr__(self): + return repr(self.value) + + def __init__(self, capacity): + self._dict = dict() + self.capacity = capacity + self.head = None + self.tail = None + + def __contains__(self, key): + return key in self._dict + + def __iter__(self): + cur = self.head + while cur: + yield cur.key + cur = cur.nxt + + def __len__(self): + return len(self._dict) + + def __getitem__(self, key): + item = self._dict[key] + self._update_item(item) + return item.value + + def __setitem__(self, key, value): + item = self._dict.get(key) + if item is None: + item = self._Item(key, value) + self._dict[key] = item + self._insert_item(item) + else: + item.value = value + self._update_item(item) + self._manage_size() + + def __repr__(self): + return repr(self._dict) + + def _insert_item(self, item): + item.prv = None + item.nxt = self.head + if self.head is not None: + self.head.prv = item + else: + self.tail = item + self.head = item + self._manage_size() + + def _manage_size(self): + while len(self._dict) > self.capacity: + olditem = self._dict[self.tail.key] + del self._dict[self.tail.key] + if self.tail != self.head: + self.tail = self.tail.prv + self.tail.nxt = None + else: + self.head = self.tail = None + + def _update_item(self, item): + if self.head == item: + return + + prv = item.prv + prv.nxt = item.nxt + if item.nxt is not None: + item.nxt.prv = prv + else: + self.tail = prv + + item.prv = None + item.nxt = self.head + self.head.prv = self.head = item + + +def flatten(items): + """Flattens a potentially nested sequence into a flat list. + + :param items: the sequence to flatten + + >>> flatten((1, 2)) + [1, 2] + >>> flatten([1, (2, 3), 4]) + [1, 2, 3, 4] + >>> flatten([1, (2, [3, 4]), 5]) + [1, 2, 3, 4, 5] + """ + retval = [] + for item in items: + if isinstance(item, (frozenset, list, set, tuple)): + retval += flatten(item) + else: + retval.append(item) + return retval + + +def plaintext(text, keeplinebreaks=True): + """Return the text with all entities and tags removed. + + >>> plaintext('<b>1 < 2</b>') + u'1 < 2' + + The `keeplinebreaks` parameter can be set to ``False`` to replace any line + breaks by simple spaces: + + >>> plaintext('''<b>1 + ... < + ... 2</b>''', keeplinebreaks=False) + u'1 < 2' + + :param text: the text to convert to plain text + :param keeplinebreaks: whether line breaks in the text should be kept intact + :return: the text with tags and entities removed + """ + text = stripentities(striptags(text)) + if not keeplinebreaks: + text = text.replace('\n', ' ') + return text + + +_STRIPENTITIES_RE = re.compile(r'&(?:#((?:\d+)|(?:[xX][0-9a-fA-F]+));?|(\w+);)') +def stripentities(text, keepxmlentities=False): + """Return a copy of the given text with any character or numeric entities + replaced by the equivalent UTF-8 characters. + + >>> stripentities('1 < 2') + u'1 < 2' + >>> stripentities('more …') + u'more \u2026' + >>> stripentities('…') + u'\u2026' + >>> stripentities('…') + u'\u2026' + + If the `keepxmlentities` parameter is provided and is a truth value, the + core XML entities (&, ', >, < and ") are left intact. + + >>> stripentities('1 < 2 …', keepxmlentities=True) + u'1 < 2 \u2026' + """ + def _replace_entity(match): + if match.group(1): # numeric entity + ref = match.group(1) + if ref.startswith('x'): + ref = int(ref[1:], 16) + else: + ref = int(ref, 10) + return unichr(ref) + else: # character entity + ref = match.group(2) + if keepxmlentities and ref in ('amp', 'apos', 'gt', 'lt', 'quot'): + return '&%s;' % ref + try: + return unichr(entities.name2codepoint[ref]) + except KeyError: + if keepxmlentities: + return '&%s;' % ref + else: + return ref + return _STRIPENTITIES_RE.sub(_replace_entity, text) + + +_STRIPTAGS_RE = re.compile(r'(<!--.*?-->|<[^>]*>)') +def striptags(text): + """Return a copy of the text with any XML/HTML tags removed. + + >>> striptags('<span>Foo</span> bar') + 'Foo bar' + >>> striptags('<span class="bar">Foo</span>') + 'Foo' + >>> striptags('Foo<br />') + 'Foo' + + HTML/XML comments are stripped, too: + + >>> striptags('<!-- <blub>hehe</blah> -->test') + 'test' + + :param text: the string to remove tags from + :return: the text with tags removed + """ + return _STRIPTAGS_RE.sub('', text) + + +def stringrepr(string): + ascii = string.encode('ascii', 'backslashreplace') + quoted = "'" + ascii.replace("'", "\\'") + "'" + if len(ascii) > len(string): + return 'u' + quoted + return quoted + + +# Compatibility fallback implementations for older Python versions + +try: + all = all + any = any +except NameError: + def any(S): + for x in S: + if x: + return True + return False + + def all(S): + for x in S: + if not x: + return False + return True |