Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/genshi
diff options
context:
space:
mode:
Diffstat (limited to 'genshi')
-rw-r--r--genshi/__init__.py26
-rw-r--r--genshi/builder.py359
-rw-r--r--genshi/core.py727
-rw-r--r--genshi/filters/__init__.py20
-rw-r--r--genshi/filters/html.py453
-rw-r--r--genshi/filters/i18n.py1238
-rw-r--r--genshi/filters/transform.py1310
-rw-r--r--genshi/input.py443
-rw-r--r--genshi/output.py838
-rw-r--r--genshi/path.py1528
-rw-r--r--genshi/template/__init__.py23
-rw-r--r--genshi/template/_ast24.py446
-rw-r--r--genshi/template/ast24.py505
-rw-r--r--genshi/template/astutil.py784
-rw-r--r--genshi/template/base.py634
-rw-r--r--genshi/template/directives.py725
-rw-r--r--genshi/template/eval.py629
-rw-r--r--genshi/template/interpolation.py153
-rw-r--r--genshi/template/loader.py344
-rw-r--r--genshi/template/markup.py397
-rw-r--r--genshi/template/plugin.py176
-rw-r--r--genshi/template/text.py333
-rw-r--r--genshi/util.py274
23 files changed, 12365 insertions, 0 deletions
diff --git a/genshi/__init__.py b/genshi/__init__.py
new file mode 100644
index 0000000..02f4347
--- /dev/null
+++ b/genshi/__init__.py
@@ -0,0 +1,26 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006-2009 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""This package provides various means for generating and processing web markup
+(XML or HTML).
+
+The design is centered around the concept of streams of markup events (similar
+in concept to SAX parsing events) which can be processed in a uniform manner
+independently of where or how they are produced.
+"""
+
+__docformat__ = 'restructuredtext en'
+__version__ = '0.6'
+
+from genshi.core import *
+from genshi.input import ParseError, XML, HTML
diff --git a/genshi/builder.py b/genshi/builder.py
new file mode 100644
index 0000000..724e364
--- /dev/null
+++ b/genshi/builder.py
@@ -0,0 +1,359 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006-2009 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""Support for programmatically generating markup streams from Python code using
+a very simple syntax. The main entry point to this module is the `tag` object
+(which is actually an instance of the ``ElementFactory`` class). You should
+rarely (if ever) need to directly import and use any of the other classes in
+this module.
+
+Elements can be created using the `tag` object using attribute access. For
+example:
+
+>>> doc = tag.p('Some text and ', tag.a('a link', href='http://example.org/'), '.')
+>>> doc
+<Element "p">
+
+This produces an `Element` instance which can be further modified to add child
+nodes and attributes. This is done by "calling" the element: positional
+arguments are added as child nodes (alternatively, the `Element.append` method
+can be used for that purpose), whereas keywords arguments are added as
+attributes:
+
+>>> doc(tag.br)
+<Element "p">
+>>> print(doc)
+<p>Some text and <a href="http://example.org/">a link</a>.<br/></p>
+
+If an attribute name collides with a Python keyword, simply append an underscore
+to the name:
+
+>>> doc(class_='intro')
+<Element "p">
+>>> print(doc)
+<p class="intro">Some text and <a href="http://example.org/">a link</a>.<br/></p>
+
+As shown above, an `Element` can easily be directly rendered to XML text by
+printing it or using the Python ``str()`` function. This is basically a
+shortcut for converting the `Element` to a stream and serializing that
+stream:
+
+>>> stream = doc.generate()
+>>> stream #doctest: +ELLIPSIS
+<genshi.core.Stream object at ...>
+>>> print(stream)
+<p class="intro">Some text and <a href="http://example.org/">a link</a>.<br/></p>
+
+
+The `tag` object also allows creating "fragments", which are basically lists
+of nodes (elements or text) that don't have a parent element. This can be useful
+for creating snippets of markup that are attached to a parent element later (for
+example in a template). Fragments are created by calling the `tag` object, which
+returns an object of type `Fragment`:
+
+>>> fragment = tag('Hello, ', tag.em('world'), '!')
+>>> fragment
+<Fragment>
+>>> print(fragment)
+Hello, <em>world</em>!
+"""
+
+from genshi.core import Attrs, Markup, Namespace, QName, Stream, \
+ START, END, TEXT
+
+__all__ = ['Fragment', 'Element', 'ElementFactory', 'tag']
+__docformat__ = 'restructuredtext en'
+
+
+class Fragment(object):
+ """Represents a markup fragment, which is basically just a list of element
+ or text nodes.
+ """
+ __slots__ = ['children']
+
+ def __init__(self):
+ """Create a new fragment."""
+ self.children = []
+
+ def __add__(self, other):
+ return Fragment()(self, other)
+
+ def __call__(self, *args):
+ """Append any positional arguments as child nodes.
+
+ :see: `append`
+ """
+ for arg in args:
+ self.append(arg)
+ return self
+
+ def __iter__(self):
+ return self._generate()
+
+ def __repr__(self):
+ return '<%s>' % type(self).__name__
+
+ def __str__(self):
+ return str(self.generate())
+
+ def __unicode__(self):
+ return unicode(self.generate())
+
+ def __html__(self):
+ return Markup(self.generate())
+
+ def append(self, node):
+ """Append an element or string as child node.
+
+ :param node: the node to append; can be an `Element`, `Fragment`, or a
+ `Stream`, or a Python string or number
+ """
+ if isinstance(node, (Stream, Element, basestring, int, float, long)):
+ # For objects of a known/primitive type, we avoid the check for
+ # whether it is iterable for better performance
+ self.children.append(node)
+ elif isinstance(node, Fragment):
+ self.children.extend(node.children)
+ elif node is not None:
+ try:
+ for child in node:
+ self.append(child)
+ except TypeError:
+ self.children.append(node)
+
+ def _generate(self):
+ for child in self.children:
+ if isinstance(child, Fragment):
+ for event in child._generate():
+ yield event
+ elif isinstance(child, Stream):
+ for event in child:
+ yield event
+ else:
+ if not isinstance(child, basestring):
+ child = unicode(child)
+ yield TEXT, child, (None, -1, -1)
+
+ def generate(self):
+ """Return a markup event stream for the fragment.
+
+ :rtype: `Stream`
+ """
+ return Stream(self._generate())
+
+
+def _kwargs_to_attrs(kwargs):
+ attrs = []
+ names = set()
+ for name, value in kwargs.items():
+ name = name.rstrip('_').replace('_', '-')
+ if value is not None and name not in names:
+ attrs.append((QName(name), unicode(value)))
+ names.add(name)
+ return Attrs(attrs)
+
+
+class Element(Fragment):
+ """Simple XML output generator based on the builder pattern.
+
+ Construct XML elements by passing the tag name to the constructor:
+
+ >>> print(Element('strong'))
+ <strong/>
+
+ Attributes can be specified using keyword arguments. The values of the
+ arguments will be converted to strings and any special XML characters
+ escaped:
+
+ >>> print(Element('textarea', rows=10, cols=60))
+ <textarea rows="10" cols="60"/>
+ >>> print(Element('span', title='1 < 2'))
+ <span title="1 &lt; 2"/>
+ >>> print(Element('span', title='"baz"'))
+ <span title="&#34;baz&#34;"/>
+
+ The " character is escaped using a numerical entity.
+ The order in which attributes are rendered is undefined.
+
+ If an attribute value evaluates to `None`, that attribute is not included
+ in the output:
+
+ >>> print(Element('a', name=None))
+ <a/>
+
+ Attribute names that conflict with Python keywords can be specified by
+ appending an underscore:
+
+ >>> print(Element('div', class_='warning'))
+ <div class="warning"/>
+
+ Nested elements can be added to an element using item access notation.
+ The call notation can also be used for this and for adding attributes
+ using keyword arguments, as one would do in the constructor.
+
+ >>> print(Element('ul')(Element('li'), Element('li')))
+ <ul><li/><li/></ul>
+ >>> print(Element('a')('Label'))
+ <a>Label</a>
+ >>> print(Element('a')('Label', href="target"))
+ <a href="target">Label</a>
+
+ Text nodes can be nested in an element by adding strings instead of
+ elements. Any special characters in the strings are escaped automatically:
+
+ >>> print(Element('em')('Hello world'))
+ <em>Hello world</em>
+ >>> print(Element('em')(42))
+ <em>42</em>
+ >>> print(Element('em')('1 < 2'))
+ <em>1 &lt; 2</em>
+
+ This technique also allows mixed content:
+
+ >>> print(Element('p')('Hello ', Element('b')('world')))
+ <p>Hello <b>world</b></p>
+
+ Quotes are not escaped inside text nodes:
+ >>> print(Element('p')('"Hello"'))
+ <p>"Hello"</p>
+
+ Elements can also be combined with other elements or strings using the
+ addition operator, which results in a `Fragment` object that contains the
+ operands:
+
+ >>> print(Element('br') + 'some text' + Element('br'))
+ <br/>some text<br/>
+
+ Elements with a namespace can be generated using the `Namespace` and/or
+ `QName` classes:
+
+ >>> from genshi.core import Namespace
+ >>> xhtml = Namespace('http://www.w3.org/1999/xhtml')
+ >>> print(Element(xhtml.html, lang='en'))
+ <html xmlns="http://www.w3.org/1999/xhtml" lang="en"/>
+ """
+ __slots__ = ['tag', 'attrib']
+
+ def __init__(self, tag_, **attrib):
+ Fragment.__init__(self)
+ self.tag = QName(tag_)
+ self.attrib = _kwargs_to_attrs(attrib)
+
+ def __call__(self, *args, **kwargs):
+ """Append any positional arguments as child nodes, and keyword arguments
+ as attributes.
+
+ :return: the element itself so that calls can be chained
+ :rtype: `Element`
+ :see: `Fragment.append`
+ """
+ self.attrib |= _kwargs_to_attrs(kwargs)
+ Fragment.__call__(self, *args)
+ return self
+
+ def __repr__(self):
+ return '<%s "%s">' % (type(self).__name__, self.tag)
+
+ def _generate(self):
+ yield START, (self.tag, self.attrib), (None, -1, -1)
+ for kind, data, pos in Fragment._generate(self):
+ yield kind, data, pos
+ yield END, self.tag, (None, -1, -1)
+
+ def generate(self):
+ """Return a markup event stream for the fragment.
+
+ :rtype: `Stream`
+ """
+ return Stream(self._generate())
+
+
+class ElementFactory(object):
+ """Factory for `Element` objects.
+
+ A new element is created simply by accessing a correspondingly named
+ attribute of the factory object:
+
+ >>> factory = ElementFactory()
+ >>> print(factory.foo)
+ <foo/>
+ >>> print(factory.foo(id=2))
+ <foo id="2"/>
+
+ Markup fragments (lists of nodes without a parent element) can be created
+ by calling the factory:
+
+ >>> print(factory('Hello, ', factory.em('world'), '!'))
+ Hello, <em>world</em>!
+
+ A factory can also be bound to a specific namespace:
+
+ >>> factory = ElementFactory('http://www.w3.org/1999/xhtml')
+ >>> print(factory.html(lang="en"))
+ <html xmlns="http://www.w3.org/1999/xhtml" lang="en"/>
+
+ The namespace for a specific element can be altered on an existing factory
+ by specifying the new namespace using item access:
+
+ >>> factory = ElementFactory()
+ >>> print(factory.html(factory['http://www.w3.org/2000/svg'].g(id=3)))
+ <html><g xmlns="http://www.w3.org/2000/svg" id="3"/></html>
+
+ Usually, the `ElementFactory` class is not be used directly. Rather, the
+ `tag` instance should be used to create elements.
+ """
+
+ def __init__(self, namespace=None):
+ """Create the factory, optionally bound to the given namespace.
+
+ :param namespace: the namespace URI for any created elements, or `None`
+ for no namespace
+ """
+ if namespace and not isinstance(namespace, Namespace):
+ namespace = Namespace(namespace)
+ self.namespace = namespace
+
+ def __call__(self, *args):
+ """Create a fragment that has the given positional arguments as child
+ nodes.
+
+ :return: the created `Fragment`
+ :rtype: `Fragment`
+ """
+ return Fragment()(*args)
+
+ def __getitem__(self, namespace):
+ """Return a new factory that is bound to the specified namespace.
+
+ :param namespace: the namespace URI or `Namespace` object
+ :return: an `ElementFactory` that produces elements bound to the given
+ namespace
+ :rtype: `ElementFactory`
+ """
+ return ElementFactory(namespace)
+
+ def __getattr__(self, name):
+ """Create an `Element` with the given name.
+
+ :param name: the tag name of the element to create
+ :return: an `Element` with the specified name
+ :rtype: `Element`
+ """
+ return Element(self.namespace and self.namespace[name] or name)
+
+
+tag = ElementFactory()
+"""Global `ElementFactory` bound to the default namespace.
+
+:type: `ElementFactory`
+"""
diff --git a/genshi/core.py b/genshi/core.py
new file mode 100644
index 0000000..f7cddff
--- /dev/null
+++ b/genshi/core.py
@@ -0,0 +1,727 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006-2009 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""Core classes for markup processing."""
+
+try:
+ reduce # builtin in Python < 3
+except NameError:
+ from functools import reduce
+from itertools import chain
+import operator
+
+from genshi.util import plaintext, stripentities, striptags, stringrepr
+
+__all__ = ['Stream', 'Markup', 'escape', 'unescape', 'Attrs', 'Namespace',
+ 'QName']
+__docformat__ = 'restructuredtext en'
+
+
+class StreamEventKind(str):
+ """A kind of event on a markup stream."""
+ __slots__ = []
+ _instances = {}
+
+ def __new__(cls, val):
+ return cls._instances.setdefault(val, str.__new__(cls, val))
+
+
+class Stream(object):
+ """Represents a stream of markup events.
+
+ This class is basically an iterator over the events.
+
+ Stream events are tuples of the form::
+
+ (kind, data, position)
+
+ where ``kind`` is the event kind (such as `START`, `END`, `TEXT`, etc),
+ ``data`` depends on the kind of event, and ``position`` is a
+ ``(filename, line, offset)`` tuple that contains the location of the
+ original element or text in the input. If the original location is unknown,
+ ``position`` is ``(None, -1, -1)``.
+
+ Also provided are ways to serialize the stream to text. The `serialize()`
+ method will return an iterator over generated strings, while `render()`
+ returns the complete generated text at once. Both accept various parameters
+ that impact the way the stream is serialized.
+ """
+ __slots__ = ['events', 'serializer']
+
+ START = StreamEventKind('START') #: a start tag
+ END = StreamEventKind('END') #: an end tag
+ TEXT = StreamEventKind('TEXT') #: literal text
+ XML_DECL = StreamEventKind('XML_DECL') #: XML declaration
+ DOCTYPE = StreamEventKind('DOCTYPE') #: doctype declaration
+ START_NS = StreamEventKind('START_NS') #: start namespace mapping
+ END_NS = StreamEventKind('END_NS') #: end namespace mapping
+ START_CDATA = StreamEventKind('START_CDATA') #: start CDATA section
+ END_CDATA = StreamEventKind('END_CDATA') #: end CDATA section
+ PI = StreamEventKind('PI') #: processing instruction
+ COMMENT = StreamEventKind('COMMENT') #: comment
+
+ def __init__(self, events, serializer=None):
+ """Initialize the stream with a sequence of markup events.
+
+ :param events: a sequence or iterable providing the events
+ :param serializer: the default serialization method to use for this
+ stream
+
+ :note: Changed in 0.5: added the `serializer` argument
+ """
+ self.events = events #: The underlying iterable producing the events
+ self.serializer = serializer #: The default serializion method
+
+ def __iter__(self):
+ return iter(self.events)
+
+ def __or__(self, function):
+ """Override the "bitwise or" operator to apply filters or serializers
+ to the stream, providing a syntax similar to pipes on Unix shells.
+
+ Assume the following stream produced by the `HTML` function:
+
+ >>> from genshi.input import HTML
+ >>> html = HTML('''<p onclick="alert('Whoa')">Hello, world!</p>''')
+ >>> print(html)
+ <p onclick="alert('Whoa')">Hello, world!</p>
+
+ A filter such as the HTML sanitizer can be applied to that stream using
+ the pipe notation as follows:
+
+ >>> from genshi.filters import HTMLSanitizer
+ >>> sanitizer = HTMLSanitizer()
+ >>> print(html | sanitizer)
+ <p>Hello, world!</p>
+
+ Filters can be any function that accepts and produces a stream (where
+ a stream is anything that iterates over events):
+
+ >>> def uppercase(stream):
+ ... for kind, data, pos in stream:
+ ... if kind is TEXT:
+ ... data = data.upper()
+ ... yield kind, data, pos
+ >>> print(html | sanitizer | uppercase)
+ <p>HELLO, WORLD!</p>
+
+ Serializers can also be used with this notation:
+
+ >>> from genshi.output import TextSerializer
+ >>> output = TextSerializer()
+ >>> print(html | sanitizer | uppercase | output)
+ HELLO, WORLD!
+
+ Commonly, serializers should be used at the end of the "pipeline";
+ using them somewhere in the middle may produce unexpected results.
+
+ :param function: the callable object that should be applied as a filter
+ :return: the filtered stream
+ :rtype: `Stream`
+ """
+ return Stream(_ensure(function(self)), serializer=self.serializer)
+
+ def filter(self, *filters):
+ """Apply filters to the stream.
+
+ This method returns a new stream with the given filters applied. The
+ filters must be callables that accept the stream object as parameter,
+ and return the filtered stream.
+
+ The call::
+
+ stream.filter(filter1, filter2)
+
+ is equivalent to::
+
+ stream | filter1 | filter2
+
+ :param filters: one or more callable objects that should be applied as
+ filters
+ :return: the filtered stream
+ :rtype: `Stream`
+ """
+ return reduce(operator.or_, (self,) + filters)
+
+ def render(self, method=None, encoding='utf-8', out=None, **kwargs):
+ """Return a string representation of the stream.
+
+ Any additional keyword arguments are passed to the serializer, and thus
+ depend on the `method` parameter value.
+
+ :param method: determines how the stream is serialized; can be either
+ "xml", "xhtml", "html", "text", or a custom serializer
+ class; if `None`, the default serialization method of
+ the stream is used
+ :param encoding: how the output string should be encoded; if set to
+ `None`, this method returns a `unicode` object
+ :param out: a file-like object that the output should be written to
+ instead of being returned as one big string; note that if
+ this is a file or socket (or similar), the `encoding` must
+ not be `None` (that is, the output must be encoded)
+ :return: a `str` or `unicode` object (depending on the `encoding`
+ parameter), or `None` if the `out` parameter is provided
+ :rtype: `basestring`
+
+ :see: XMLSerializer, XHTMLSerializer, HTMLSerializer, TextSerializer
+ :note: Changed in 0.5: added the `out` parameter
+ """
+ from genshi.output import encode
+ if method is None:
+ method = self.serializer or 'xml'
+ generator = self.serialize(method=method, **kwargs)
+ return encode(generator, method=method, encoding=encoding, out=out)
+
+ def select(self, path, namespaces=None, variables=None):
+ """Return a new stream that contains the events matching the given
+ XPath expression.
+
+ >>> from genshi import HTML
+ >>> stream = HTML('<doc><elem>foo</elem><elem>bar</elem></doc>')
+ >>> print(stream.select('elem'))
+ <elem>foo</elem><elem>bar</elem>
+ >>> print(stream.select('elem/text()'))
+ foobar
+
+ Note that the outermost element of the stream becomes the *context
+ node* for the XPath test. That means that the expression "doc" would
+ not match anything in the example above, because it only tests against
+ child elements of the outermost element:
+
+ >>> print(stream.select('doc'))
+ <BLANKLINE>
+
+ You can use the "." expression to match the context node itself
+ (although that usually makes little sense):
+
+ >>> print(stream.select('.'))
+ <doc><elem>foo</elem><elem>bar</elem></doc>
+
+ :param path: a string containing the XPath expression
+ :param namespaces: mapping of namespace prefixes used in the path
+ :param variables: mapping of variable names to values
+ :return: the selected substream
+ :rtype: `Stream`
+ :raises PathSyntaxError: if the given path expression is invalid or not
+ supported
+ """
+ from genshi.path import Path
+ return Path(path).select(self, namespaces, variables)
+
+ def serialize(self, method='xml', **kwargs):
+ """Generate strings corresponding to a specific serialization of the
+ stream.
+
+ Unlike the `render()` method, this method is a generator that returns
+ the serialized output incrementally, as opposed to returning a single
+ string.
+
+ Any additional keyword arguments are passed to the serializer, and thus
+ depend on the `method` parameter value.
+
+ :param method: determines how the stream is serialized; can be either
+ "xml", "xhtml", "html", "text", or a custom serializer
+ class; if `None`, the default serialization method of
+ the stream is used
+ :return: an iterator over the serialization results (`Markup` or
+ `unicode` objects, depending on the serialization method)
+ :rtype: ``iterator``
+ :see: XMLSerializer, XHTMLSerializer, HTMLSerializer, TextSerializer
+ """
+ from genshi.output import get_serializer
+ if method is None:
+ method = self.serializer or 'xml'
+ return get_serializer(method, **kwargs)(_ensure(self))
+
+ def __str__(self):
+ return self.render()
+
+ def __unicode__(self):
+ return self.render(encoding=None)
+
+ def __html__(self):
+ return self
+
+
+START = Stream.START
+END = Stream.END
+TEXT = Stream.TEXT
+XML_DECL = Stream.XML_DECL
+DOCTYPE = Stream.DOCTYPE
+START_NS = Stream.START_NS
+END_NS = Stream.END_NS
+START_CDATA = Stream.START_CDATA
+END_CDATA = Stream.END_CDATA
+PI = Stream.PI
+COMMENT = Stream.COMMENT
+
+
+def _ensure(stream):
+ """Ensure that every item on the stream is actually a markup event."""
+ stream = iter(stream)
+ event = stream.next()
+
+ # Check whether the iterable is a real markup event stream by examining the
+ # first item it yields; if it's not we'll need to do some conversion
+ if type(event) is not tuple or len(event) != 3:
+ for event in chain([event], stream):
+ if hasattr(event, 'totuple'):
+ event = event.totuple()
+ else:
+ event = TEXT, unicode(event), (None, -1, -1)
+ yield event
+ return
+
+ # This looks like a markup event stream, so we'll just pass it through
+ # unchanged
+ yield event
+ for event in stream:
+ yield event
+
+
+class Attrs(tuple):
+ """Immutable sequence type that stores the attributes of an element.
+
+ Ordering of the attributes is preserved, while access by name is also
+ supported.
+
+ >>> attrs = Attrs([('href', '#'), ('title', 'Foo')])
+ >>> attrs
+ Attrs([('href', '#'), ('title', 'Foo')])
+
+ >>> 'href' in attrs
+ True
+ >>> 'tabindex' in attrs
+ False
+ >>> attrs.get('title')
+ 'Foo'
+
+ Instances may not be manipulated directly. Instead, the operators ``|`` and
+ ``-`` can be used to produce new instances that have specific attributes
+ added, replaced or removed.
+
+ To remove an attribute, use the ``-`` operator. The right hand side can be
+ either a string or a set/sequence of strings, identifying the name(s) of
+ the attribute(s) to remove:
+
+ >>> attrs - 'title'
+ Attrs([('href', '#')])
+ >>> attrs - ('title', 'href')
+ Attrs()
+
+ The original instance is not modified, but the operator can of course be
+ used with an assignment:
+
+ >>> attrs
+ Attrs([('href', '#'), ('title', 'Foo')])
+ >>> attrs -= 'title'
+ >>> attrs
+ Attrs([('href', '#')])
+
+ To add a new attribute, use the ``|`` operator, where the right hand value
+ is a sequence of ``(name, value)`` tuples (which includes `Attrs`
+ instances):
+
+ >>> attrs | [('title', 'Bar')]
+ Attrs([('href', '#'), ('title', 'Bar')])
+
+ If the attributes already contain an attribute with a given name, the value
+ of that attribute is replaced:
+
+ >>> attrs | [('href', 'http://example.org/')]
+ Attrs([('href', 'http://example.org/')])
+ """
+ __slots__ = []
+
+ def __contains__(self, name):
+ """Return whether the list includes an attribute with the specified
+ name.
+
+ :return: `True` if the list includes the attribute
+ :rtype: `bool`
+ """
+ for attr, _ in self:
+ if attr == name:
+ return True
+
+ def __getitem__(self, i):
+ """Return an item or slice of the attributes list.
+
+ >>> attrs = Attrs([('href', '#'), ('title', 'Foo')])
+ >>> attrs[1]
+ ('title', 'Foo')
+ >>> attrs[1:]
+ Attrs([('title', 'Foo')])
+ """
+ items = tuple.__getitem__(self, i)
+ if type(i) is slice:
+ return Attrs(items)
+ return items
+
+ def __getslice__(self, i, j):
+ """Return a slice of the attributes list.
+
+ >>> attrs = Attrs([('href', '#'), ('title', 'Foo')])
+ >>> attrs[1:]
+ Attrs([('title', 'Foo')])
+ """
+ return Attrs(tuple.__getslice__(self, i, j))
+
+ def __or__(self, attrs):
+ """Return a new instance that contains the attributes in `attrs` in
+ addition to any already existing attributes.
+
+ :return: a new instance with the merged attributes
+ :rtype: `Attrs`
+ """
+ repl = dict([(an, av) for an, av in attrs if an in self])
+ return Attrs([(sn, repl.get(sn, sv)) for sn, sv in self] +
+ [(an, av) for an, av in attrs if an not in self])
+
+ def __repr__(self):
+ if not self:
+ return 'Attrs()'
+ return 'Attrs([%s])' % ', '.join([repr(item) for item in self])
+
+ def __sub__(self, names):
+ """Return a new instance with all attributes with a name in `names` are
+ removed.
+
+ :param names: the names of the attributes to remove
+ :return: a new instance with the attribute removed
+ :rtype: `Attrs`
+ """
+ if isinstance(names, basestring):
+ names = (names,)
+ return Attrs([(name, val) for name, val in self if name not in names])
+
+ def get(self, name, default=None):
+ """Return the value of the attribute with the specified name, or the
+ value of the `default` parameter if no such attribute is found.
+
+ :param name: the name of the attribute
+ :param default: the value to return when the attribute does not exist
+ :return: the attribute value, or the `default` value if that attribute
+ does not exist
+ :rtype: `object`
+ """
+ for attr, value in self:
+ if attr == name:
+ return value
+ return default
+
+ def totuple(self):
+ """Return the attributes as a markup event.
+
+ The returned event is a `TEXT` event, the data is the value of all
+ attributes joined together.
+
+ >>> Attrs([('href', '#'), ('title', 'Foo')]).totuple()
+ ('TEXT', '#Foo', (None, -1, -1))
+
+ :return: a `TEXT` event
+ :rtype: `tuple`
+ """
+ return TEXT, ''.join([x[1] for x in self]), (None, -1, -1)
+
+
+class Markup(unicode):
+ """Marks a string as being safe for inclusion in HTML/XML output without
+ needing to be escaped.
+ """
+ __slots__ = []
+
+ def __add__(self, other):
+ return Markup(unicode.__add__(self, escape(other)))
+
+ def __radd__(self, other):
+ return Markup(unicode.__add__(escape(other), self))
+
+ def __mod__(self, args):
+ if isinstance(args, dict):
+ args = dict(zip(args.keys(), map(escape, args.values())))
+ elif isinstance(args, (list, tuple)):
+ args = tuple(map(escape, args))
+ else:
+ args = escape(args)
+ return Markup(unicode.__mod__(self, args))
+
+ def __mul__(self, num):
+ return Markup(unicode.__mul__(self, num))
+ __rmul__ = __mul__
+
+ def __repr__(self):
+ return "<%s %s>" % (type(self).__name__, unicode.__repr__(self))
+
+ def join(self, seq, escape_quotes=True):
+ """Return a `Markup` object which is the concatenation of the strings
+ in the given sequence, where this `Markup` object is the separator
+ between the joined elements.
+
+ Any element in the sequence that is not a `Markup` instance is
+ automatically escaped.
+
+ :param seq: the sequence of strings to join
+ :param escape_quotes: whether double quote characters in the elements
+ should be escaped
+ :return: the joined `Markup` object
+ :rtype: `Markup`
+ :see: `escape`
+ """
+ return Markup(unicode.join(self, [escape(item, quotes=escape_quotes)
+ for item in seq]))
+
+ @classmethod
+ def escape(cls, text, quotes=True):
+ """Create a Markup instance from a string and escape special characters
+ it may contain (<, >, & and \").
+
+ >>> escape('"1 < 2"')
+ <Markup u'&#34;1 &lt; 2&#34;'>
+
+ If the `quotes` parameter is set to `False`, the \" character is left
+ as is. Escaping quotes is generally only required for strings that are
+ to be used in attribute values.
+
+ >>> escape('"1 < 2"', quotes=False)
+ <Markup u'"1 &lt; 2"'>
+
+ :param text: the text to escape
+ :param quotes: if ``True``, double quote characters are escaped in
+ addition to the other special characters
+ :return: the escaped `Markup` string
+ :rtype: `Markup`
+ """
+ if not text:
+ return cls()
+ if type(text) is cls:
+ return text
+ if hasattr(text, '__html__'):
+ return Markup(text.__html__())
+
+ text = text.replace('&', '&amp;') \
+ .replace('<', '&lt;') \
+ .replace('>', '&gt;')
+ if quotes:
+ text = text.replace('"', '&#34;')
+ return cls(text)
+
+ def unescape(self):
+ """Reverse-escapes &, <, >, and \" and returns a `unicode` object.
+
+ >>> Markup('1 &lt; 2').unescape()
+ u'1 < 2'
+
+ :return: the unescaped string
+ :rtype: `unicode`
+ :see: `genshi.core.unescape`
+ """
+ if not self:
+ return ''
+ return unicode(self).replace('&#34;', '"') \
+ .replace('&gt;', '>') \
+ .replace('&lt;', '<') \
+ .replace('&amp;', '&')
+
+ def stripentities(self, keepxmlentities=False):
+ """Return a copy of the text with any character or numeric entities
+ replaced by the equivalent UTF-8 characters.
+
+ If the `keepxmlentities` parameter is provided and evaluates to `True`,
+ the core XML entities (``&amp;``, ``&apos;``, ``&gt;``, ``&lt;`` and
+ ``&quot;``) are not stripped.
+
+ :return: a `Markup` instance with entities removed
+ :rtype: `Markup`
+ :see: `genshi.util.stripentities`
+ """
+ return Markup(stripentities(self, keepxmlentities=keepxmlentities))
+
+ def striptags(self):
+ """Return a copy of the text with all XML/HTML tags removed.
+
+ :return: a `Markup` instance with all tags removed
+ :rtype: `Markup`
+ :see: `genshi.util.striptags`
+ """
+ return Markup(striptags(self))
+
+
+try:
+ from genshi._speedups import Markup
+except ImportError:
+ pass # just use the Python implementation
+
+
+escape = Markup.escape
+
+
+def unescape(text):
+ """Reverse-escapes &, <, >, and \" and returns a `unicode` object.
+
+ >>> unescape(Markup('1 &lt; 2'))
+ u'1 < 2'
+
+ If the provided `text` object is not a `Markup` instance, it is returned
+ unchanged.
+
+ >>> unescape('1 &lt; 2')
+ '1 &lt; 2'
+
+ :param text: the text to unescape
+ :return: the unescsaped string
+ :rtype: `unicode`
+ """
+ if not isinstance(text, Markup):
+ return text
+ return text.unescape()
+
+
+class Namespace(object):
+ """Utility class creating and testing elements with a namespace.
+
+ Internally, namespace URIs are encoded in the `QName` of any element or
+ attribute, the namespace URI being enclosed in curly braces. This class
+ helps create and test these strings.
+
+ A `Namespace` object is instantiated with the namespace URI.
+
+ >>> html = Namespace('http://www.w3.org/1999/xhtml')
+ >>> html
+ Namespace('http://www.w3.org/1999/xhtml')
+ >>> html.uri
+ u'http://www.w3.org/1999/xhtml'
+
+ The `Namespace` object can than be used to generate `QName` objects with
+ that namespace:
+
+ >>> html.body
+ QName('http://www.w3.org/1999/xhtml}body')
+ >>> html.body.localname
+ u'body'
+ >>> html.body.namespace
+ u'http://www.w3.org/1999/xhtml'
+
+ The same works using item access notation, which is useful for element or
+ attribute names that are not valid Python identifiers:
+
+ >>> html['body']
+ QName('http://www.w3.org/1999/xhtml}body')
+
+ A `Namespace` object can also be used to test whether a specific `QName`
+ belongs to that namespace using the ``in`` operator:
+
+ >>> qname = html.body
+ >>> qname in html
+ True
+ >>> qname in Namespace('http://www.w3.org/2002/06/xhtml2')
+ False
+ """
+ def __new__(cls, uri):
+ if type(uri) is cls:
+ return uri
+ return object.__new__(cls)
+
+ def __getnewargs__(self):
+ return (self.uri,)
+
+ def __getstate__(self):
+ return self.uri
+
+ def __setstate__(self, uri):
+ self.uri = uri
+
+ def __init__(self, uri):
+ self.uri = unicode(uri)
+
+ def __contains__(self, qname):
+ return qname.namespace == self.uri
+
+ def __ne__(self, other):
+ return not self == other
+
+ def __eq__(self, other):
+ if isinstance(other, Namespace):
+ return self.uri == other.uri
+ return self.uri == other
+
+ def __getitem__(self, name):
+ return QName(self.uri + '}' + name)
+ __getattr__ = __getitem__
+
+ def __hash__(self):
+ return hash(self.uri)
+
+ def __repr__(self):
+ return '%s(%s)' % (type(self).__name__, stringrepr(self.uri))
+
+ def __str__(self):
+ return self.uri.encode('utf-8')
+
+ def __unicode__(self):
+ return self.uri
+
+
+# The namespace used by attributes such as xml:lang and xml:space
+XML_NAMESPACE = Namespace('http://www.w3.org/XML/1998/namespace')
+
+
+class QName(unicode):
+ """A qualified element or attribute name.
+
+ The unicode value of instances of this class contains the qualified name of
+ the element or attribute, in the form ``{namespace-uri}local-name``. The
+ namespace URI can be obtained through the additional `namespace` attribute,
+ while the local name can be accessed through the `localname` attribute.
+
+ >>> qname = QName('foo')
+ >>> qname
+ QName('foo')
+ >>> qname.localname
+ u'foo'
+ >>> qname.namespace
+
+ >>> qname = QName('http://www.w3.org/1999/xhtml}body')
+ >>> qname
+ QName('http://www.w3.org/1999/xhtml}body')
+ >>> qname.localname
+ u'body'
+ >>> qname.namespace
+ u'http://www.w3.org/1999/xhtml'
+ """
+ __slots__ = ['namespace', 'localname']
+
+ def __new__(cls, qname):
+ """Create the `QName` instance.
+
+ :param qname: the qualified name as a string of the form
+ ``{namespace-uri}local-name``, where the leading curly
+ brace is optional
+ """
+ if type(qname) is cls:
+ return qname
+
+ parts = qname.lstrip('{').split('}', 1)
+ if len(parts) > 1:
+ self = unicode.__new__(cls, '{%s' % qname)
+ self.namespace, self.localname = map(unicode, parts)
+ else:
+ self = unicode.__new__(cls, qname)
+ self.namespace, self.localname = None, unicode(qname)
+ return self
+
+ def __getnewargs__(self):
+ return (self.lstrip('{'),)
+
+ def __repr__(self):
+ return '%s(%s)' % (type(self).__name__, stringrepr(self.lstrip('{')))
diff --git a/genshi/filters/__init__.py b/genshi/filters/__init__.py
new file mode 100644
index 0000000..efc2565
--- /dev/null
+++ b/genshi/filters/__init__.py
@@ -0,0 +1,20 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2007-2009 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""Implementation of a number of stream filters."""
+
+from genshi.filters.html import HTMLFormFiller, HTMLSanitizer
+from genshi.filters.i18n import Translator
+from genshi.filters.transform import Transformer
+
+__docformat__ = 'restructuredtext en'
diff --git a/genshi/filters/html.py b/genshi/filters/html.py
new file mode 100644
index 0000000..d554a54
--- /dev/null
+++ b/genshi/filters/html.py
@@ -0,0 +1,453 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006-2009 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""Implementation of a number of stream filters."""
+
+try:
+ any
+except NameError:
+ from genshi.util import any
+import re
+
+from genshi.core import Attrs, QName, stripentities
+from genshi.core import END, START, TEXT, COMMENT
+
+__all__ = ['HTMLFormFiller', 'HTMLSanitizer']
+__docformat__ = 'restructuredtext en'
+
+
+class HTMLFormFiller(object):
+ """A stream filter that can populate HTML forms from a dictionary of values.
+
+ >>> from genshi.input import HTML
+ >>> html = HTML('''<form>
+ ... <p><input type="text" name="foo" /></p>
+ ... </form>''')
+ >>> filler = HTMLFormFiller(data={'foo': 'bar'})
+ >>> print(html | filler)
+ <form>
+ <p><input type="text" name="foo" value="bar"/></p>
+ </form>
+ """
+ # TODO: only select the first radio button, and the first select option
+ # (if not in a multiple-select)
+ # TODO: only apply to elements in the XHTML namespace (or no namespace)?
+
+ def __init__(self, name=None, id=None, data=None, passwords=False):
+ """Create the filter.
+
+ :param name: The name of the form that should be populated. If this
+ parameter is given, only forms where the ``name`` attribute
+ value matches the parameter are processed.
+ :param id: The ID of the form that should be populated. If this
+ parameter is given, only forms where the ``id`` attribute
+ value matches the parameter are processed.
+ :param data: The dictionary of form values, where the keys are the names
+ of the form fields, and the values are the values to fill
+ in.
+ :param passwords: Whether password input fields should be populated.
+ This is off by default for security reasons (for
+ example, a password may end up in the browser cache)
+ :note: Changed in 0.5.2: added the `passwords` option
+ """
+ self.name = name
+ self.id = id
+ if data is None:
+ data = {}
+ self.data = data
+ self.passwords = passwords
+
+ def __call__(self, stream):
+ """Apply the filter to the given stream.
+
+ :param stream: the markup event stream to filter
+ """
+ in_form = in_select = in_option = in_textarea = False
+ select_value = option_value = textarea_value = None
+ option_start = None
+ option_text = []
+ no_option_value = False
+
+ for kind, data, pos in stream:
+
+ if kind is START:
+ tag, attrs = data
+ tagname = tag.localname
+
+ if tagname == 'form' and (
+ self.name and attrs.get('name') == self.name or
+ self.id and attrs.get('id') == self.id or
+ not (self.id or self.name)):
+ in_form = True
+
+ elif in_form:
+ if tagname == 'input':
+ type = attrs.get('type', '').lower()
+ if type in ('checkbox', 'radio'):
+ name = attrs.get('name')
+ if name and name in self.data:
+ value = self.data[name]
+ declval = attrs.get('value')
+ checked = False
+ if isinstance(value, (list, tuple)):
+ if declval:
+ checked = declval in [unicode(v) for v
+ in value]
+ else:
+ checked = any(value)
+ else:
+ if declval:
+ checked = declval == unicode(value)
+ elif type == 'checkbox':
+ checked = bool(value)
+ if checked:
+ attrs |= [(QName('checked'), 'checked')]
+ elif 'checked' in attrs:
+ attrs -= 'checked'
+ elif type in ('', 'hidden', 'text') \
+ or type == 'password' and self.passwords:
+ name = attrs.get('name')
+ if name and name in self.data:
+ value = self.data[name]
+ if isinstance(value, (list, tuple)):
+ value = value[0]
+ if value is not None:
+ attrs |= [
+ (QName('value'), unicode(value))
+ ]
+ elif tagname == 'select':
+ name = attrs.get('name')
+ if name in self.data:
+ select_value = self.data[name]
+ in_select = True
+ elif tagname == 'textarea':
+ name = attrs.get('name')
+ if name in self.data:
+ textarea_value = self.data.get(name)
+ if isinstance(textarea_value, (list, tuple)):
+ textarea_value = textarea_value[0]
+ in_textarea = True
+ elif in_select and tagname == 'option':
+ option_start = kind, data, pos
+ option_value = attrs.get('value')
+ if option_value is None:
+ no_option_value = True
+ option_value = ''
+ in_option = True
+ continue
+ yield kind, (tag, attrs), pos
+
+ elif in_form and kind is TEXT:
+ if in_select and in_option:
+ if no_option_value:
+ option_value += data
+ option_text.append((kind, data, pos))
+ continue
+ elif in_textarea:
+ continue
+ yield kind, data, pos
+
+ elif in_form and kind is END:
+ tagname = data.localname
+ if tagname == 'form':
+ in_form = False
+ elif tagname == 'select':
+ in_select = False
+ select_value = None
+ elif in_select and tagname == 'option':
+ if isinstance(select_value, (tuple, list)):
+ selected = option_value in [unicode(v) for v
+ in select_value]
+ else:
+ selected = option_value == unicode(select_value)
+ okind, (tag, attrs), opos = option_start
+ if selected:
+ attrs |= [(QName('selected'), 'selected')]
+ elif 'selected' in attrs:
+ attrs -= 'selected'
+ yield okind, (tag, attrs), opos
+ if option_text:
+ for event in option_text:
+ yield event
+ in_option = False
+ no_option_value = False
+ option_start = option_value = None
+ option_text = []
+ elif tagname == 'textarea':
+ if textarea_value:
+ yield TEXT, unicode(textarea_value), pos
+ in_textarea = False
+ yield kind, data, pos
+
+ else:
+ yield kind, data, pos
+
+
+class HTMLSanitizer(object):
+ """A filter that removes potentially dangerous HTML tags and attributes
+ from the stream.
+
+ >>> from genshi import HTML
+ >>> html = HTML('<div><script>alert(document.cookie)</script></div>')
+ >>> print(html | HTMLSanitizer())
+ <div/>
+
+ The default set of safe tags and attributes can be modified when the filter
+ is instantiated. For example, to allow inline ``style`` attributes, the
+ following instantation would work:
+
+ >>> html = HTML('<div style="background: #000"></div>')
+ >>> sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style']))
+ >>> print(html | sanitizer)
+ <div style="background: #000"/>
+
+ Note that even in this case, the filter *does* attempt to remove dangerous
+ constructs from style attributes:
+
+ >>> html = HTML('<div style="background: url(javascript:void); color: #000"></div>')
+ >>> print(html | sanitizer)
+ <div style="color: #000"/>
+
+ This handles HTML entities, unicode escapes in CSS and Javascript text, as
+ well as a lot of other things. However, the style tag is still excluded by
+ default because it is very hard for such sanitizing to be completely safe,
+ especially considering how much error recovery current web browsers perform.
+
+ It also does some basic filtering of CSS properties that may be used for
+ typical phishing attacks. For more sophisticated filtering, this class
+ provides a couple of hooks that can be overridden in sub-classes.
+
+ :warn: Note that this special processing of CSS is currently only applied to
+ style attributes, **not** style elements.
+ """
+
+ SAFE_TAGS = frozenset(['a', 'abbr', 'acronym', 'address', 'area', 'b',
+ 'big', 'blockquote', 'br', 'button', 'caption', 'center', 'cite',
+ 'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt',
+ 'em', 'fieldset', 'font', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
+ 'hr', 'i', 'img', 'input', 'ins', 'kbd', 'label', 'legend', 'li', 'map',
+ 'menu', 'ol', 'optgroup', 'option', 'p', 'pre', 'q', 's', 'samp',
+ 'select', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'table',
+ 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'tt', 'u',
+ 'ul', 'var'])
+
+ SAFE_ATTRS = frozenset(['abbr', 'accept', 'accept-charset', 'accesskey',
+ 'action', 'align', 'alt', 'axis', 'bgcolor', 'border', 'cellpadding',
+ 'cellspacing', 'char', 'charoff', 'charset', 'checked', 'cite', 'class',
+ 'clear', 'cols', 'colspan', 'color', 'compact', 'coords', 'datetime',
+ 'dir', 'disabled', 'enctype', 'for', 'frame', 'headers', 'height',
+ 'href', 'hreflang', 'hspace', 'id', 'ismap', 'label', 'lang',
+ 'longdesc', 'maxlength', 'media', 'method', 'multiple', 'name',
+ 'nohref', 'noshade', 'nowrap', 'prompt', 'readonly', 'rel', 'rev',
+ 'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size',
+ 'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title',
+ 'type', 'usemap', 'valign', 'value', 'vspace', 'width'])
+
+ SAFE_SCHEMES = frozenset(['file', 'ftp', 'http', 'https', 'mailto', None])
+
+ URI_ATTRS = frozenset(['action', 'background', 'dynsrc', 'href', 'lowsrc',
+ 'src'])
+
+ def __init__(self, safe_tags=SAFE_TAGS, safe_attrs=SAFE_ATTRS,
+ safe_schemes=SAFE_SCHEMES, uri_attrs=URI_ATTRS):
+ """Create the sanitizer.
+
+ The exact set of allowed elements and attributes can be configured.
+
+ :param safe_tags: a set of tag names that are considered safe
+ :param safe_attrs: a set of attribute names that are considered safe
+ :param safe_schemes: a set of URI schemes that are considered safe
+ :param uri_attrs: a set of names of attributes that contain URIs
+ """
+ self.safe_tags = safe_tags
+ "The set of tag names that are considered safe."
+ self.safe_attrs = safe_attrs
+ "The set of attribute names that are considered safe."
+ self.uri_attrs = uri_attrs
+ "The set of names of attributes that may contain URIs."
+ self.safe_schemes = safe_schemes
+ "The set of URI schemes that are considered safe."
+
+ def __call__(self, stream):
+ """Apply the filter to the given stream.
+
+ :param stream: the markup event stream to filter
+ """
+ waiting_for = None
+
+ for kind, data, pos in stream:
+ if kind is START:
+ if waiting_for:
+ continue
+ tag, attrs = data
+ if not self.is_safe_elem(tag, attrs):
+ waiting_for = tag
+ continue
+
+ new_attrs = []
+ for attr, value in attrs:
+ value = stripentities(value)
+ if attr not in self.safe_attrs:
+ continue
+ elif attr in self.uri_attrs:
+ # Don't allow URI schemes such as "javascript:"
+ if not self.is_safe_uri(value):
+ continue
+ elif attr == 'style':
+ # Remove dangerous CSS declarations from inline styles
+ decls = self.sanitize_css(value)
+ if not decls:
+ continue
+ value = '; '.join(decls)
+ new_attrs.append((attr, value))
+
+ yield kind, (tag, Attrs(new_attrs)), pos
+
+ elif kind is END:
+ tag = data
+ if waiting_for:
+ if waiting_for == tag:
+ waiting_for = None
+ else:
+ yield kind, data, pos
+
+ elif kind is not COMMENT:
+ if not waiting_for:
+ yield kind, data, pos
+
+ def is_safe_css(self, propname, value):
+ """Determine whether the given css property declaration is to be
+ considered safe for inclusion in the output.
+
+ :param propname: the CSS property name
+ :param value: the value of the property
+ :return: whether the property value should be considered safe
+ :rtype: bool
+ :since: version 0.6
+ """
+ if propname == 'position':
+ return False
+ if propname.startswith('margin') and '-' in value:
+ # Negative margins can be used for phishing
+ return False
+ return True
+
+ def is_safe_elem(self, tag, attrs):
+ """Determine whether the given element should be considered safe for
+ inclusion in the output.
+
+ :param tag: the tag name of the element
+ :type tag: QName
+ :param attrs: the element attributes
+ :type attrs: Attrs
+ :return: whether the element should be considered safe
+ :rtype: bool
+ :since: version 0.6
+ """
+ if tag not in self.safe_tags:
+ return False
+ if tag.localname == 'input':
+ input_type = attrs.get('type', '').lower()
+ if input_type == 'password':
+ return False
+ return True
+
+ def is_safe_uri(self, uri):
+ """Determine whether the given URI is to be considered safe for
+ inclusion in the output.
+
+ The default implementation checks whether the scheme of the URI is in
+ the set of allowed URIs (`safe_schemes`).
+
+ >>> sanitizer = HTMLSanitizer()
+ >>> sanitizer.is_safe_uri('http://example.org/')
+ True
+ >>> sanitizer.is_safe_uri('javascript:alert(document.cookie)')
+ False
+
+ :param uri: the URI to check
+ :return: `True` if the URI can be considered safe, `False` otherwise
+ :rtype: `bool`
+ :since: version 0.4.3
+ """
+ if '#' in uri:
+ uri = uri.split('#', 1)[0] # Strip out the fragment identifier
+ if ':' not in uri:
+ return True # This is a relative URI
+ chars = [char for char in uri.split(':', 1)[0] if char.isalnum()]
+ return ''.join(chars).lower() in self.safe_schemes
+
+ def sanitize_css(self, text):
+ """Remove potentially dangerous property declarations from CSS code.
+
+ In particular, properties using the CSS ``url()`` function with a scheme
+ that is not considered safe are removed:
+
+ >>> sanitizer = HTMLSanitizer()
+ >>> sanitizer.sanitize_css(u'''
+ ... background: url(javascript:alert("foo"));
+ ... color: #000;
+ ... ''')
+ [u'color: #000']
+
+ Also, the proprietary Internet Explorer function ``expression()`` is
+ always stripped:
+
+ >>> sanitizer.sanitize_css(u'''
+ ... background: #fff;
+ ... color: #000;
+ ... width: e/**/xpression(alert("foo"));
+ ... ''')
+ [u'background: #fff', u'color: #000']
+
+ :param text: the CSS text; this is expected to be `unicode` and to not
+ contain any character or numeric references
+ :return: a list of declarations that are considered safe
+ :rtype: `list`
+ :since: version 0.4.3
+ """
+ decls = []
+ text = self._strip_css_comments(self._replace_unicode_escapes(text))
+ for decl in text.split(';'):
+ decl = decl.strip()
+ if not decl:
+ continue
+ try:
+ propname, value = decl.split(':', 1)
+ except ValueError:
+ continue
+ if not self.is_safe_css(propname.strip().lower(), value.strip()):
+ continue
+ is_evil = False
+ if 'expression' in value:
+ is_evil = True
+ for match in re.finditer(r'url\s*\(([^)]+)', value):
+ if not self.is_safe_uri(match.group(1)):
+ is_evil = True
+ break
+ if not is_evil:
+ decls.append(decl.strip())
+ return decls
+
+ _NORMALIZE_NEWLINES = re.compile(r'\r\n').sub
+ _UNICODE_ESCAPE = re.compile(r'\\([0-9a-fA-F]{1,6})\s?').sub
+
+ def _replace_unicode_escapes(self, text):
+ def _repl(match):
+ return unichr(int(match.group(1), 16))
+ return self._UNICODE_ESCAPE(_repl, self._NORMALIZE_NEWLINES('\n', text))
+
+ _CSS_COMMENTS = re.compile(r'/\*.*?\*/').sub
+
+ def _strip_css_comments(self, text):
+ return self._CSS_COMMENTS('', text)
diff --git a/genshi/filters/i18n.py b/genshi/filters/i18n.py
new file mode 100644
index 0000000..7852875
--- /dev/null
+++ b/genshi/filters/i18n.py
@@ -0,0 +1,1238 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2007-2010 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""Directives and utilities for internationalization and localization of
+templates.
+
+:since: version 0.4
+:note: Directives support added since version 0.6
+"""
+
+try:
+ any
+except NameError:
+ from genshi.util import any
+from gettext import NullTranslations
+import os
+import re
+from types import FunctionType
+
+from genshi.core import Attrs, Namespace, QName, START, END, TEXT, \
+ XML_NAMESPACE, _ensure, StreamEventKind
+from genshi.template.eval import _ast
+from genshi.template.base import DirectiveFactory, EXPR, SUB, _apply_directives
+from genshi.template.directives import Directive, StripDirective
+from genshi.template.markup import MarkupTemplate, EXEC
+
+__all__ = ['Translator', 'extract']
+__docformat__ = 'restructuredtext en'
+
+
+I18N_NAMESPACE = Namespace('http://genshi.edgewall.org/i18n')
+
+MSGBUF = StreamEventKind('MSGBUF')
+SUB_START = StreamEventKind('SUB_START')
+SUB_END = StreamEventKind('SUB_END')
+
+GETTEXT_FUNCTIONS = ('_', 'gettext', 'ngettext', 'dgettext', 'dngettext',
+ 'ugettext', 'ungettext')
+
+
+class I18NDirective(Directive):
+ """Simple interface for i18n directives to support messages extraction."""
+
+ def __call__(self, stream, directives, ctxt, **vars):
+ return _apply_directives(stream, directives, ctxt, vars)
+
+
+class ExtractableI18NDirective(I18NDirective):
+ """Simple interface for directives to support messages extraction."""
+
+ def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS,
+ search_text=True, comment_stack=None):
+ raise NotImplementedError
+
+
+class CommentDirective(I18NDirective):
+ """Implementation of the ``i18n:comment`` template directive which adds
+ translation comments.
+
+ >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n">
+ ... <p i18n:comment="As in Foo Bar">Foo</p>
+ ... </html>''')
+ >>> translator = Translator()
+ >>> translator.setup(tmpl)
+ >>> list(translator.extract(tmpl.stream))
+ [(2, None, u'Foo', [u'As in Foo Bar'])]
+ """
+ __slots__ = ['comment']
+
+ def __init__(self, value, template=None, namespaces=None, lineno=-1,
+ offset=-1):
+ Directive.__init__(self, None, template, namespaces, lineno, offset)
+ self.comment = value
+
+
+class MsgDirective(ExtractableI18NDirective):
+ r"""Implementation of the ``i18n:msg`` directive which marks inner content
+ as translatable. Consider the following examples:
+
+ >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n">
+ ... <div i18n:msg="">
+ ... <p>Foo</p>
+ ... <p>Bar</p>
+ ... </div>
+ ... <p i18n:msg="">Foo <em>bar</em>!</p>
+ ... </html>''')
+
+ >>> translator = Translator()
+ >>> translator.setup(tmpl)
+ >>> list(translator.extract(tmpl.stream))
+ [(2, None, u'[1:Foo]\n [2:Bar]', []), (6, None, u'Foo [1:bar]!', [])]
+ >>> print(tmpl.generate().render())
+ <html>
+ <div><p>Foo</p>
+ <p>Bar</p></div>
+ <p>Foo <em>bar</em>!</p>
+ </html>
+
+ >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n">
+ ... <div i18n:msg="fname, lname">
+ ... <p>First Name: ${fname}</p>
+ ... <p>Last Name: ${lname}</p>
+ ... </div>
+ ... <p i18n:msg="">Foo <em>bar</em>!</p>
+ ... </html>''')
+ >>> translator.setup(tmpl)
+ >>> list(translator.extract(tmpl.stream)) #doctest: +NORMALIZE_WHITESPACE
+ [(2, None, u'[1:First Name: %(fname)s]\n [2:Last Name: %(lname)s]', []),
+ (6, None, u'Foo [1:bar]!', [])]
+
+ >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n">
+ ... <div i18n:msg="fname, lname">
+ ... <p>First Name: ${fname}</p>
+ ... <p>Last Name: ${lname}</p>
+ ... </div>
+ ... <p i18n:msg="">Foo <em>bar</em>!</p>
+ ... </html>''')
+ >>> translator.setup(tmpl)
+ >>> print(tmpl.generate(fname='John', lname='Doe').render())
+ <html>
+ <div><p>First Name: John</p>
+ <p>Last Name: Doe</p></div>
+ <p>Foo <em>bar</em>!</p>
+ </html>
+
+ Starting and ending white-space is stripped of to make it simpler for
+ translators. Stripping it is not that important since it's on the html
+ source, the rendered output will remain the same.
+ """
+ __slots__ = ['params', 'lineno']
+
+ def __init__(self, value, template=None, namespaces=None, lineno=-1,
+ offset=-1):
+ Directive.__init__(self, None, template, namespaces, lineno, offset)
+ self.params = [param.strip() for param in value.split(',') if param]
+ self.lineno = lineno
+
+ @classmethod
+ def attach(cls, template, stream, value, namespaces, pos):
+ if type(value) is dict:
+ value = value.get('params', '').strip()
+ return super(MsgDirective, cls).attach(template, stream, value.strip(),
+ namespaces, pos)
+
+ def __call__(self, stream, directives, ctxt, **vars):
+ gettext = ctxt.get('_i18n.gettext')
+ if ctxt.get('_i18n.domain'):
+ dgettext = ctxt.get('_i18n.dgettext')
+ assert hasattr(dgettext, '__call__'), \
+ 'No domain gettext function passed'
+ gettext = lambda msg: dgettext(ctxt.get('_i18n.domain'), msg)
+
+ def _generate():
+ msgbuf = MessageBuffer(self)
+ previous = stream.next()
+ if previous[0] is START:
+ yield previous
+ else:
+ msgbuf.append(*previous)
+ previous = stream.next()
+ for kind, data, pos in stream:
+ msgbuf.append(*previous)
+ previous = kind, data, pos
+ if previous[0] is not END:
+ msgbuf.append(*previous)
+ previous = None
+ for event in msgbuf.translate(gettext(msgbuf.format())):
+ yield event
+ if previous:
+ yield previous
+
+ return _apply_directives(_generate(), directives, ctxt, vars)
+
+ def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS,
+ search_text=True, comment_stack=None):
+ msgbuf = MessageBuffer(self)
+ strip = False
+
+ stream = iter(stream)
+ previous = stream.next()
+ if previous[0] is START:
+ for message in translator._extract_attrs(previous,
+ gettext_functions,
+ search_text=search_text):
+ yield message
+ previous = stream.next()
+ strip = True
+ for event in stream:
+ if event[0] is START:
+ for message in translator._extract_attrs(event,
+ gettext_functions,
+ search_text=search_text):
+ yield message
+ msgbuf.append(*previous)
+ previous = event
+ if not strip:
+ msgbuf.append(*previous)
+
+ yield self.lineno, None, msgbuf.format(), comment_stack[-1:]
+
+
+class ChooseBranchDirective(I18NDirective):
+ __slots__ = ['params']
+
+ def __call__(self, stream, directives, ctxt, **vars):
+ self.params = ctxt.get('_i18n.choose.params', [])[:]
+ msgbuf = MessageBuffer(self)
+ stream = _apply_directives(stream, directives, ctxt, vars)
+
+ previous = stream.next()
+ if previous[0] is START:
+ yield previous
+ else:
+ msgbuf.append(*previous)
+
+ try:
+ previous = stream.next()
+ except StopIteration:
+ # For example <i18n:singular> or <i18n:plural> directives
+ yield MSGBUF, (), -1 # the place holder for msgbuf output
+ ctxt['_i18n.choose.%s' % self.tagname] = msgbuf
+ return
+
+ for event in stream:
+ msgbuf.append(*previous)
+ previous = event
+ yield MSGBUF, (), -1 # the place holder for msgbuf output
+
+ if previous[0] is END:
+ yield previous # the outer end tag
+ else:
+ msgbuf.append(*previous)
+ ctxt['_i18n.choose.%s' % self.tagname] = msgbuf
+
+ def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS,
+ search_text=True, comment_stack=None, msgbuf=None):
+ stream = iter(stream)
+ previous = stream.next()
+
+ if previous[0] is START:
+ # skip the enclosing element
+ for message in translator._extract_attrs(previous,
+ gettext_functions,
+ search_text=search_text):
+ yield message
+ previous = stream.next()
+
+ for event in stream:
+ if previous[0] is START:
+ for message in translator._extract_attrs(previous,
+ gettext_functions,
+ search_text=search_text):
+ yield message
+ msgbuf.append(*previous)
+ previous = event
+
+ if previous[0] is not END:
+ msgbuf.append(*previous)
+
+
+class SingularDirective(ChooseBranchDirective):
+ """Implementation of the ``i18n:singular`` directive to be used with the
+ ``i18n:choose`` directive."""
+
+
+class PluralDirective(ChooseBranchDirective):
+ """Implementation of the ``i18n:plural`` directive to be used with the
+ ``i18n:choose`` directive."""
+
+
+class ChooseDirective(ExtractableI18NDirective):
+ """Implementation of the ``i18n:choose`` directive which provides plural
+ internationalisation of strings.
+
+ This directive requires at least one parameter, the one which evaluates to
+ an integer which will allow to choose the plural/singular form. If you also
+ have expressions inside the singular and plural version of the string you
+ also need to pass a name for those parameters. Consider the following
+ examples:
+
+ >>> tmpl = MarkupTemplate('''\
+ <html xmlns:i18n="http://genshi.edgewall.org/i18n">
+ ... <div i18n:choose="num; num">
+ ... <p i18n:singular="">There is $num coin</p>
+ ... <p i18n:plural="">There are $num coins</p>
+ ... </div>
+ ... </html>''')
+ >>> translator = Translator()
+ >>> translator.setup(tmpl)
+ >>> list(translator.extract(tmpl.stream)) #doctest: +NORMALIZE_WHITESPACE
+ [(2, 'ngettext', (u'There is %(num)s coin',
+ u'There are %(num)s coins'), [])]
+
+ >>> tmpl = MarkupTemplate('''\
+ <html xmlns:i18n="http://genshi.edgewall.org/i18n">
+ ... <div i18n:choose="num; num">
+ ... <p i18n:singular="">There is $num coin</p>
+ ... <p i18n:plural="">There are $num coins</p>
+ ... </div>
+ ... </html>''')
+ >>> translator.setup(tmpl)
+ >>> print(tmpl.generate(num=1).render())
+ <html>
+ <div>
+ <p>There is 1 coin</p>
+ </div>
+ </html>
+ >>> print(tmpl.generate(num=2).render())
+ <html>
+ <div>
+ <p>There are 2 coins</p>
+ </div>
+ </html>
+
+ When used as a element and not as an attribute:
+
+ >>> tmpl = MarkupTemplate('''\
+ <html xmlns:i18n="http://genshi.edgewall.org/i18n">
+ ... <i18n:choose numeral="num" params="num">
+ ... <p i18n:singular="">There is $num coin</p>
+ ... <p i18n:plural="">There are $num coins</p>
+ ... </i18n:choose>
+ ... </html>''')
+ >>> translator.setup(tmpl)
+ >>> list(translator.extract(tmpl.stream)) #doctest: +NORMALIZE_WHITESPACE
+ [(2, 'ngettext', (u'There is %(num)s coin',
+ u'There are %(num)s coins'), [])]
+ """
+ __slots__ = ['numeral', 'params', 'lineno']
+
+ def __init__(self, value, template=None, namespaces=None, lineno=-1,
+ offset=-1):
+ Directive.__init__(self, None, template, namespaces, lineno, offset)
+ params = [v.strip() for v in value.split(';')]
+ self.numeral = self._parse_expr(params.pop(0), template, lineno, offset)
+ self.params = params and [name.strip() for name in
+ params[0].split(',') if name] or []
+ self.lineno = lineno
+
+ @classmethod
+ def attach(cls, template, stream, value, namespaces, pos):
+ if type(value) is dict:
+ numeral = value.get('numeral', '').strip()
+ assert numeral is not '', "at least pass the numeral param"
+ params = [v.strip() for v in value.get('params', '').split(',')]
+ value = '%s; ' % numeral + ', '.join(params)
+ return super(ChooseDirective, cls).attach(template, stream, value,
+ namespaces, pos)
+
+ def __call__(self, stream, directives, ctxt, **vars):
+ ctxt.push({'_i18n.choose.params': self.params,
+ '_i18n.choose.singular': None,
+ '_i18n.choose.plural': None})
+
+ ngettext = ctxt.get('_i18n.ngettext')
+ assert hasattr(ngettext, '__call__'), 'No ngettext function available'
+ dngettext = ctxt.get('_i18n.dngettext')
+ if not dngettext:
+ dngettext = lambda d, s, p, n: ngettext(s, p, n)
+
+ new_stream = []
+ singular_stream = None
+ singular_msgbuf = None
+ plural_stream = None
+ plural_msgbuf = None
+
+ numeral = self.numeral.evaluate(ctxt)
+ is_plural = self._is_plural(numeral, ngettext)
+
+ for event in stream:
+ if event[0] is SUB and any(isinstance(d, ChooseBranchDirective)
+ for d in event[1][0]):
+ subdirectives, substream = event[1]
+
+ if isinstance(subdirectives[0], SingularDirective):
+ singular_stream = list(_apply_directives(substream,
+ subdirectives,
+ ctxt, vars))
+ new_stream.append((MSGBUF, None, (None, -1, -1)))
+
+ elif isinstance(subdirectives[0], PluralDirective):
+ if is_plural:
+ plural_stream = list(_apply_directives(substream,
+ subdirectives,
+ ctxt, vars))
+
+ else:
+ new_stream.append(event)
+
+ if ctxt.get('_i18n.domain'):
+ ngettext = lambda s, p, n: dngettext(ctxt.get('_i18n.domain'),
+ s, p, n)
+
+ singular_msgbuf = ctxt.get('_i18n.choose.singular')
+ if is_plural:
+ plural_msgbuf = ctxt.get('_i18n.choose.plural')
+ msgbuf, choice = plural_msgbuf, plural_stream
+ else:
+ msgbuf, choice = singular_msgbuf, singular_stream
+ plural_msgbuf = MessageBuffer(self)
+
+ for kind, data, pos in new_stream:
+ if kind is MSGBUF:
+ for event in choice:
+ if event[0] is MSGBUF:
+ translation = ngettext(singular_msgbuf.format(),
+ plural_msgbuf.format(),
+ numeral)
+ for subevent in msgbuf.translate(translation):
+ yield subevent
+ else:
+ yield event
+ else:
+ yield kind, data, pos
+
+ ctxt.pop()
+
+ def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS,
+ search_text=True, comment_stack=None):
+ strip = False
+ stream = iter(stream)
+ previous = stream.next()
+
+ if previous[0] is START:
+ # skip the enclosing element
+ for message in translator._extract_attrs(previous,
+ gettext_functions,
+ search_text=search_text):
+ yield message
+ previous = stream.next()
+ strip = True
+
+ singular_msgbuf = MessageBuffer(self)
+ plural_msgbuf = MessageBuffer(self)
+
+ for event in stream:
+ if previous[0] is SUB:
+ directives, substream = previous[1]
+ for directive in directives:
+ if isinstance(directive, SingularDirective):
+ for message in directive.extract(translator,
+ substream, gettext_functions, search_text,
+ comment_stack, msgbuf=singular_msgbuf):
+ yield message
+ elif isinstance(directive, PluralDirective):
+ for message in directive.extract(translator,
+ substream, gettext_functions, search_text,
+ comment_stack, msgbuf=plural_msgbuf):
+ yield message
+ elif not isinstance(directive, StripDirective):
+ singular_msgbuf.append(*previous)
+ plural_msgbuf.append(*previous)
+ else:
+ if previous[0] is START:
+ for message in translator._extract_attrs(previous,
+ gettext_functions,
+ search_text):
+ yield message
+ singular_msgbuf.append(*previous)
+ plural_msgbuf.append(*previous)
+ previous = event
+
+ if not strip:
+ singular_msgbuf.append(*previous)
+ plural_msgbuf.append(*previous)
+
+ yield self.lineno, 'ngettext', \
+ (singular_msgbuf.format(), plural_msgbuf.format()), \
+ comment_stack[-1:]
+
+ def _is_plural(self, numeral, ngettext):
+ # XXX: should we test which form was chosen like this!?!?!?
+ # There should be no match in any catalogue for these singular and
+ # plural test strings
+ singular = u'O\x85\xbe\xa9\xa8az\xc3?\xe6\xa1\x02n\x84\x93'
+ plural = u'\xcc\xfb+\xd3Pn\x9d\tT\xec\x1d\xda\x1a\x88\x00'
+ return ngettext(singular, plural, numeral) == plural
+
+
+class DomainDirective(I18NDirective):
+ """Implementation of the ``i18n:domain`` directive which allows choosing
+ another i18n domain(catalog) to translate from.
+
+ >>> from genshi.filters.tests.i18n import DummyTranslations
+ >>> tmpl = MarkupTemplate('''\
+ <html xmlns:i18n="http://genshi.edgewall.org/i18n">
+ ... <p i18n:msg="">Bar</p>
+ ... <div i18n:domain="foo">
+ ... <p i18n:msg="">FooBar</p>
+ ... <p>Bar</p>
+ ... <p i18n:domain="bar" i18n:msg="">Bar</p>
+ ... <p i18n:domain="">Bar</p>
+ ... </div>
+ ... <p>Bar</p>
+ ... </html>''')
+
+ >>> translations = DummyTranslations({'Bar': 'Voh'})
+ >>> translations.add_domain('foo', {'FooBar': 'BarFoo', 'Bar': 'foo_Bar'})
+ >>> translations.add_domain('bar', {'Bar': 'bar_Bar'})
+ >>> translator = Translator(translations)
+ >>> translator.setup(tmpl)
+
+ >>> print(tmpl.generate().render())
+ <html>
+ <p>Voh</p>
+ <div>
+ <p>BarFoo</p>
+ <p>foo_Bar</p>
+ <p>bar_Bar</p>
+ <p>Voh</p>
+ </div>
+ <p>Voh</p>
+ </html>
+ """
+ __slots__ = ['domain']
+
+ def __init__(self, value, template=None, namespaces=None, lineno=-1,
+ offset=-1):
+ Directive.__init__(self, None, template, namespaces, lineno, offset)
+ self.domain = value and value.strip() or '__DEFAULT__'
+
+ @classmethod
+ def attach(cls, template, stream, value, namespaces, pos):
+ if type(value) is dict:
+ value = value.get('name')
+ return super(DomainDirective, cls).attach(template, stream, value,
+ namespaces, pos)
+
+ def __call__(self, stream, directives, ctxt, **vars):
+ ctxt.push({'_i18n.domain': self.domain})
+ for event in _apply_directives(stream, directives, ctxt, vars):
+ yield event
+ ctxt.pop()
+
+
+class Translator(DirectiveFactory):
+ """Can extract and translate localizable strings from markup streams and
+ templates.
+
+ For example, assume the following template:
+
+ >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/">
+ ... <head>
+ ... <title>Example</title>
+ ... </head>
+ ... <body>
+ ... <h1>Example</h1>
+ ... <p>${_("Hello, %(name)s") % dict(name=username)}</p>
+ ... </body>
+ ... </html>''', filename='example.html')
+
+ For demonstration, we define a dummy ``gettext``-style function with a
+ hard-coded translation table, and pass that to the `Translator` initializer:
+
+ >>> def pseudo_gettext(string):
+ ... return {
+ ... 'Example': 'Beispiel',
+ ... 'Hello, %(name)s': 'Hallo, %(name)s'
+ ... }[string]
+ >>> translator = Translator(pseudo_gettext)
+
+ Next, the translator needs to be prepended to any already defined filters
+ on the template:
+
+ >>> tmpl.filters.insert(0, translator)
+
+ When generating the template output, our hard-coded translations should be
+ applied as expected:
+
+ >>> print(tmpl.generate(username='Hans', _=pseudo_gettext))
+ <html>
+ <head>
+ <title>Beispiel</title>
+ </head>
+ <body>
+ <h1>Beispiel</h1>
+ <p>Hallo, Hans</p>
+ </body>
+ </html>
+
+ Note that elements defining ``xml:lang`` attributes that do not contain
+ variable expressions are ignored by this filter. That can be used to
+ exclude specific parts of a template from being extracted and translated.
+ """
+
+ directives = [
+ ('domain', DomainDirective),
+ ('comment', CommentDirective),
+ ('msg', MsgDirective),
+ ('choose', ChooseDirective),
+ ('singular', SingularDirective),
+ ('plural', PluralDirective)
+ ]
+
+ IGNORE_TAGS = frozenset([
+ QName('script'), QName('http://www.w3.org/1999/xhtml}script'),
+ QName('style'), QName('http://www.w3.org/1999/xhtml}style')
+ ])
+ INCLUDE_ATTRS = frozenset([
+ 'abbr', 'alt', 'label', 'prompt', 'standby', 'summary', 'title'
+ ])
+ NAMESPACE = I18N_NAMESPACE
+
+ def __init__(self, translate=NullTranslations(), ignore_tags=IGNORE_TAGS,
+ include_attrs=INCLUDE_ATTRS, extract_text=True):
+ """Initialize the translator.
+
+ :param translate: the translation function, for example ``gettext`` or
+ ``ugettext``.
+ :param ignore_tags: a set of tag names that should not be localized
+ :param include_attrs: a set of attribute names should be localized
+ :param extract_text: whether the content of text nodes should be
+ extracted, or only text in explicit ``gettext``
+ function calls
+
+ :note: Changed in 0.6: the `translate` parameter can now be either
+ a ``gettext``-style function, or an object compatible with the
+ ``NullTransalations`` or ``GNUTranslations`` interface
+ """
+ self.translate = translate
+ self.ignore_tags = ignore_tags
+ self.include_attrs = include_attrs
+ self.extract_text = extract_text
+
+ def __call__(self, stream, ctxt=None, translate_text=True,
+ translate_attrs=True):
+ """Translate any localizable strings in the given stream.
+
+ This function shouldn't be called directly. Instead, an instance of
+ the `Translator` class should be registered as a filter with the
+ `Template` or the `TemplateLoader`, or applied as a regular stream
+ filter. If used as a template filter, it should be inserted in front of
+ all the default filters.
+
+ :param stream: the markup event stream
+ :param ctxt: the template context (not used)
+ :param translate_text: whether text nodes should be translated (used
+ internally)
+ :param translate_attrs: whether attribute values should be translated
+ (used internally)
+ :return: the localized stream
+ """
+ ignore_tags = self.ignore_tags
+ include_attrs = self.include_attrs
+ skip = 0
+ xml_lang = XML_NAMESPACE['lang']
+ if not self.extract_text:
+ translate_text = False
+ translate_attrs = False
+
+ if type(self.translate) is FunctionType:
+ gettext = self.translate
+ if ctxt:
+ ctxt['_i18n.gettext'] = gettext
+ else:
+ gettext = self.translate.ugettext
+ ngettext = self.translate.ungettext
+ try:
+ dgettext = self.translate.dugettext
+ dngettext = self.translate.dungettext
+ except AttributeError:
+ dgettext = lambda _, y: gettext(y)
+ dngettext = lambda _, s, p, n: ngettext(s, p, n)
+ if ctxt:
+ ctxt['_i18n.gettext'] = gettext
+ ctxt['_i18n.ngettext'] = ngettext
+ ctxt['_i18n.dgettext'] = dgettext
+ ctxt['_i18n.dngettext'] = dngettext
+
+ if ctxt and ctxt.get('_i18n.domain'):
+ gettext = lambda msg: dgettext(ctxt.get('_i18n.domain'), msg)
+
+ for kind, data, pos in stream:
+
+ # skip chunks that should not be localized
+ if skip:
+ if kind is START:
+ skip += 1
+ elif kind is END:
+ skip -= 1
+ yield kind, data, pos
+ continue
+
+ # handle different events that can be localized
+ if kind is START:
+ tag, attrs = data
+ if tag in self.ignore_tags or \
+ isinstance(attrs.get(xml_lang), basestring):
+ skip += 1
+ yield kind, data, pos
+ continue
+
+ new_attrs = []
+ changed = False
+
+ for name, value in attrs:
+ newval = value
+ if isinstance(value, basestring):
+ if translate_attrs and name in include_attrs:
+ newval = gettext(value)
+ else:
+ newval = list(
+ self(_ensure(value), ctxt, translate_text=False)
+ )
+ if newval != value:
+ value = newval
+ changed = True
+ new_attrs.append((name, value))
+ if changed:
+ attrs = Attrs(new_attrs)
+
+ yield kind, (tag, attrs), pos
+
+ elif translate_text and kind is TEXT:
+ text = data.strip()
+ if text:
+ data = data.replace(text, unicode(gettext(text)))
+ yield kind, data, pos
+
+ elif kind is SUB:
+ directives, substream = data
+ current_domain = None
+ for idx, directive in enumerate(directives):
+ # Organize directives to make everything work
+ # FIXME: There's got to be a better way to do this!
+ if isinstance(directive, DomainDirective):
+ # Grab current domain and update context
+ current_domain = directive.domain
+ ctxt.push({'_i18n.domain': current_domain})
+ # Put domain directive as the first one in order to
+ # update context before any other directives evaluation
+ directives.insert(0, directives.pop(idx))
+
+ # If this is an i18n directive, no need to translate text
+ # nodes here
+ is_i18n_directive = any([
+ isinstance(d, ExtractableI18NDirective)
+ for d in directives
+ ])
+ substream = list(self(substream, ctxt,
+ translate_text=not is_i18n_directive,
+ translate_attrs=translate_attrs))
+ yield kind, (directives, substream), pos
+
+ if current_domain:
+ ctxt.pop()
+ else:
+ yield kind, data, pos
+
+ def extract(self, stream, gettext_functions=GETTEXT_FUNCTIONS,
+ search_text=True, comment_stack=None):
+ """Extract localizable strings from the given template stream.
+
+ For every string found, this function yields a ``(lineno, function,
+ message, comments)`` tuple, where:
+
+ * ``lineno`` is the number of the line on which the string was found,
+ * ``function`` is the name of the ``gettext`` function used (if the
+ string was extracted from embedded Python code), and
+ * ``message`` is the string itself (a ``unicode`` object, or a tuple
+ of ``unicode`` objects for functions with multiple string
+ arguments).
+ * ``comments`` is a list of comments related to the message, extracted
+ from ``i18n:comment`` attributes found in the markup
+
+ >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/">
+ ... <head>
+ ... <title>Example</title>
+ ... </head>
+ ... <body>
+ ... <h1>Example</h1>
+ ... <p>${_("Hello, %(name)s") % dict(name=username)}</p>
+ ... <p>${ngettext("You have %d item", "You have %d items", num)}</p>
+ ... </body>
+ ... </html>''', filename='example.html')
+ >>> for line, func, msg, comments in Translator().extract(tmpl.stream):
+ ... print('%d, %r, %r' % (line, func, msg))
+ 3, None, u'Example'
+ 6, None, u'Example'
+ 7, '_', u'Hello, %(name)s'
+ 8, 'ngettext', (u'You have %d item', u'You have %d items', None)
+
+ :param stream: the event stream to extract strings from; can be a
+ regular stream or a template stream
+ :param gettext_functions: a sequence of function names that should be
+ treated as gettext-style localization
+ functions
+ :param search_text: whether the content of text nodes should be
+ extracted (used internally)
+
+ :note: Changed in 0.4.1: For a function with multiple string arguments
+ (such as ``ngettext``), a single item with a tuple of strings is
+ yielded, instead an item for each string argument.
+ :note: Changed in 0.6: The returned tuples now include a fourth
+ element, which is a list of comments for the translator.
+ """
+ if not self.extract_text:
+ search_text = False
+ if comment_stack is None:
+ comment_stack = []
+ skip = 0
+
+ xml_lang = XML_NAMESPACE['lang']
+
+ for kind, data, pos in stream:
+ if skip:
+ if kind is START:
+ skip += 1
+ if kind is END:
+ skip -= 1
+
+ if kind is START and not skip:
+ tag, attrs = data
+ if tag in self.ignore_tags or \
+ isinstance(attrs.get(xml_lang), basestring):
+ skip += 1
+ continue
+
+ for message in self._extract_attrs((kind, data, pos),
+ gettext_functions,
+ search_text=search_text):
+ yield message
+
+ elif not skip and search_text and kind is TEXT:
+ text = data.strip()
+ if text and [ch for ch in text if ch.isalpha()]:
+ yield pos[1], None, text, comment_stack[-1:]
+
+ elif kind is EXPR or kind is EXEC:
+ for funcname, strings in extract_from_code(data,
+ gettext_functions):
+ # XXX: Do we need to grab i18n:comment from comment_stack ???
+ yield pos[1], funcname, strings, []
+
+ elif kind is SUB:
+ directives, substream = data
+ in_comment = False
+
+ for idx, directive in enumerate(directives):
+ # Do a first loop to see if there's a comment directive
+ # If there is update context and pop it from directives
+ if isinstance(directive, CommentDirective):
+ in_comment = True
+ comment_stack.append(directive.comment)
+ if len(directives) == 1:
+ # in case we're in the presence of something like:
+ # <p i18n:comment="foo">Foo</p>
+ for message in self.extract(
+ substream, gettext_functions,
+ search_text=search_text and not skip,
+ comment_stack=comment_stack):
+ yield message
+ directives.pop(idx)
+ elif not isinstance(directive, I18NDirective):
+ # Remove all other non i18n directives from the process
+ directives.pop(idx)
+
+ if not directives and not in_comment:
+ # Extract content if there's no directives because
+ # strip was pop'ed and not because comment was pop'ed.
+ # Extraction in this case has been taken care of.
+ for message in self.extract(
+ substream, gettext_functions,
+ search_text=search_text and not skip):
+ yield message
+
+ for directive in directives:
+ if isinstance(directive, ExtractableI18NDirective):
+ for message in directive.extract(self,
+ substream, gettext_functions,
+ search_text=search_text and not skip,
+ comment_stack=comment_stack):
+ yield message
+ else:
+ for message in self.extract(
+ substream, gettext_functions,
+ search_text=search_text and not skip,
+ comment_stack=comment_stack):
+ yield message
+
+ if in_comment:
+ comment_stack.pop()
+
+ def get_directive_index(self, dir_cls):
+ total = len(self._dir_order)
+ if dir_cls in self._dir_order:
+ return self._dir_order.index(dir_cls) - total
+ return total
+
+ def setup(self, template):
+ """Convenience function to register the `Translator` filter and the
+ related directives with the given template.
+
+ :param template: a `Template` instance
+ """
+ template.filters.insert(0, self)
+ if hasattr(template, 'add_directives'):
+ template.add_directives(Translator.NAMESPACE, self)
+
+ def _extract_attrs(self, event, gettext_functions, search_text):
+ for name, value in event[1][1]:
+ if search_text and isinstance(value, basestring):
+ if name in self.include_attrs:
+ text = value.strip()
+ if text:
+ yield event[2][1], None, text, []
+ else:
+ for message in self.extract(_ensure(value), gettext_functions,
+ search_text=False):
+ yield message
+
+
+class MessageBuffer(object):
+ """Helper class for managing internationalized mixed content.
+
+ :since: version 0.5
+ """
+
+ def __init__(self, directive=None):
+ """Initialize the message buffer.
+
+ :param directive: the directive owning the buffer
+ :type directive: I18NDirective
+ """
+ # params list needs to be copied so that directives can be evaluated
+ # more than once
+ self.orig_params = self.params = directive.params[:]
+ self.directive = directive
+ self.string = []
+ self.events = {}
+ self.values = {}
+ self.depth = 1
+ self.order = 1
+ self.stack = [0]
+ self.subdirectives = {}
+
+ def append(self, kind, data, pos):
+ """Append a stream event to the buffer.
+
+ :param kind: the stream event kind
+ :param data: the event data
+ :param pos: the position of the event in the source
+ """
+ if kind is SUB:
+ # The order needs to be +1 because a new START kind event will
+ # happen and we we need to wrap those events into our custom kind(s)
+ order = self.stack[-1] + 1
+ subdirectives, substream = data
+ # Store the directives that should be applied after translation
+ self.subdirectives.setdefault(order, []).extend(subdirectives)
+ self.events.setdefault(order, []).append((SUB_START, None, pos))
+ for skind, sdata, spos in substream:
+ self.append(skind, sdata, spos)
+ self.events.setdefault(order, []).append((SUB_END, None, pos))
+ elif kind is TEXT:
+ if '[' in data or ']' in data:
+ # Quote [ and ] if it ain't us adding it, ie, if the user is
+ # using those chars in his templates, escape them
+ data = data.replace('[', '\[').replace(']', '\]')
+ self.string.append(data)
+ self.events.setdefault(self.stack[-1], []).append((kind, data, pos))
+ elif kind is EXPR:
+ if self.params:
+ param = self.params.pop(0)
+ else:
+ params = ', '.join(['"%s"' % p for p in self.orig_params if p])
+ if params:
+ params = "(%s)" % params
+ raise IndexError("%d parameters%s given to 'i18n:%s' but "
+ "%d or more expressions used in '%s', line %s"
+ % (len(self.orig_params), params,
+ self.directive.tagname,
+ len(self.orig_params) + 1,
+ os.path.basename(pos[0] or
+ 'In-memory Template'),
+ pos[1]))
+ self.string.append('%%(%s)s' % param)
+ self.events.setdefault(self.stack[-1], []).append((kind, data, pos))
+ self.values[param] = (kind, data, pos)
+ else:
+ if kind is START:
+ self.string.append('[%d:' % self.order)
+ self.stack.append(self.order)
+ self.events.setdefault(self.stack[-1],
+ []).append((kind, data, pos))
+ self.depth += 1
+ self.order += 1
+ elif kind is END:
+ self.depth -= 1
+ if self.depth:
+ self.events[self.stack[-1]].append((kind, data, pos))
+ self.string.append(']')
+ self.stack.pop()
+
+ def format(self):
+ """Return a message identifier representing the content in the
+ buffer.
+ """
+ return ''.join(self.string).strip()
+
+ def translate(self, string, regex=re.compile(r'%\((\w+)\)s')):
+ """Interpolate the given message translation with the events in the
+ buffer and return the translated stream.
+
+ :param string: the translated message string
+ """
+ substream = None
+
+ def yield_parts(string):
+ for idx, part in enumerate(regex.split(string)):
+ if idx % 2:
+ yield self.values[part]
+ elif part:
+ yield (TEXT,
+ part.replace('\[', '[').replace('\]', ']'),
+ (None, -1, -1)
+ )
+
+ parts = parse_msg(string)
+ parts_counter = {}
+ for order, string in parts:
+ parts_counter.setdefault(order, []).append(None)
+
+ while parts:
+ order, string = parts.pop(0)
+ if len(parts_counter[order]) == 1:
+ events = self.events[order]
+ else:
+ events = [self.events[order].pop(0)]
+ parts_counter[order].pop()
+
+ for event in events:
+ if event[0] is SUB_START:
+ substream = []
+ elif event[0] is SUB_END:
+ # Yield a substream which might have directives to be
+ # applied to it (after translation events)
+ yield SUB, (self.subdirectives[order], substream), event[2]
+ substream = None
+ elif event[0] is TEXT:
+ if string:
+ for part in yield_parts(string):
+ if substream is not None:
+ substream.append(part)
+ else:
+ yield part
+ # String handled, reset it
+ string = None
+ elif event[0] is START:
+ if substream is not None:
+ substream.append(event)
+ else:
+ yield event
+ if string:
+ for part in yield_parts(string):
+ if substream is not None:
+ substream.append(part)
+ else:
+ yield part
+ # String handled, reset it
+ string = None
+ elif event[0] is END:
+ if string:
+ for part in yield_parts(string):
+ if substream is not None:
+ substream.append(part)
+ else:
+ yield part
+ # String handled, reset it
+ string = None
+ if substream is not None:
+ substream.append(event)
+ else:
+ yield event
+ elif event[0] is EXPR:
+ # These are handled on the strings itself
+ continue
+ else:
+ if string:
+ for part in yield_parts(string):
+ if substream is not None:
+ substream.append(part)
+ else:
+ yield part
+ # String handled, reset it
+ string = None
+ if substream is not None:
+ substream.append(event)
+ else:
+ yield event
+
+
+def parse_msg(string, regex=re.compile(r'(?:\[(\d+)\:)|(?<!\\)\]')):
+ """Parse a translated message using Genshi mixed content message
+ formatting.
+
+ >>> parse_msg("See [1:Help].")
+ [(0, 'See '), (1, 'Help'), (0, '.')]
+
+ >>> parse_msg("See [1:our [2:Help] page] for details.")
+ [(0, 'See '), (1, 'our '), (2, 'Help'), (1, ' page'), (0, ' for details.')]
+
+ >>> parse_msg("[2:Details] finden Sie in [1:Hilfe].")
+ [(2, 'Details'), (0, ' finden Sie in '), (1, 'Hilfe'), (0, '.')]
+
+ >>> parse_msg("[1:] Bilder pro Seite anzeigen.")
+ [(1, ''), (0, ' Bilder pro Seite anzeigen.')]
+
+ :param string: the translated message string
+ :return: a list of ``(order, string)`` tuples
+ :rtype: `list`
+ """
+ parts = []
+ stack = [0]
+ while True:
+ mo = regex.search(string)
+ if not mo:
+ break
+
+ if mo.start() or stack[-1]:
+ parts.append((stack[-1], string[:mo.start()]))
+ string = string[mo.end():]
+
+ orderno = mo.group(1)
+ if orderno is not None:
+ stack.append(int(orderno))
+ else:
+ stack.pop()
+ if not stack:
+ break
+
+ if string:
+ parts.append((stack[-1], string))
+
+ return parts
+
+
+def extract_from_code(code, gettext_functions):
+ """Extract strings from Python bytecode.
+
+ >>> from genshi.template.eval import Expression
+ >>> expr = Expression('_("Hello")')
+ >>> list(extract_from_code(expr, GETTEXT_FUNCTIONS))
+ [('_', u'Hello')]
+
+ >>> expr = Expression('ngettext("You have %(num)s item", '
+ ... '"You have %(num)s items", num)')
+ >>> list(extract_from_code(expr, GETTEXT_FUNCTIONS))
+ [('ngettext', (u'You have %(num)s item', u'You have %(num)s items', None))]
+
+ :param code: the `Code` object
+ :type code: `genshi.template.eval.Code`
+ :param gettext_functions: a sequence of function names
+ :since: version 0.5
+ """
+ def _walk(node):
+ if isinstance(node, _ast.Call) and isinstance(node.func, _ast.Name) \
+ and node.func.id in gettext_functions:
+ strings = []
+ def _add(arg):
+ if isinstance(arg, _ast.Str) and isinstance(arg.s, basestring):
+ strings.append(unicode(arg.s, 'utf-8'))
+ elif arg:
+ strings.append(None)
+ [_add(arg) for arg in node.args]
+ _add(node.starargs)
+ _add(node.kwargs)
+ if len(strings) == 1:
+ strings = strings[0]
+ else:
+ strings = tuple(strings)
+ yield node.func.id, strings
+ elif node._fields:
+ children = []
+ for field in node._fields:
+ child = getattr(node, field, None)
+ if isinstance(child, list):
+ for elem in child:
+ children.append(elem)
+ elif isinstance(child, _ast.AST):
+ children.append(child)
+ for child in children:
+ for funcname, strings in _walk(child):
+ yield funcname, strings
+ return _walk(code.ast)
+
+
+def extract(fileobj, keywords, comment_tags, options):
+ """Babel extraction method for Genshi templates.
+
+ :param fileobj: the file-like object the messages should be extracted from
+ :param keywords: a list of keywords (i.e. function names) that should be
+ recognized as translation functions
+ :param comment_tags: a list of translator tags to search for and include
+ in the results
+ :param options: a dictionary of additional options (optional)
+ :return: an iterator over ``(lineno, funcname, message, comments)`` tuples
+ :rtype: ``iterator``
+ """
+ template_class = options.get('template_class', MarkupTemplate)
+ if isinstance(template_class, basestring):
+ module, clsname = template_class.split(':', 1)
+ template_class = getattr(__import__(module, {}, {}, [clsname]), clsname)
+ encoding = options.get('encoding', None)
+
+ extract_text = options.get('extract_text', True)
+ if isinstance(extract_text, basestring):
+ extract_text = extract_text.lower() in ('1', 'on', 'yes', 'true')
+
+ ignore_tags = options.get('ignore_tags', Translator.IGNORE_TAGS)
+ if isinstance(ignore_tags, basestring):
+ ignore_tags = ignore_tags.split()
+ ignore_tags = [QName(tag) for tag in ignore_tags]
+
+ include_attrs = options.get('include_attrs', Translator.INCLUDE_ATTRS)
+ if isinstance(include_attrs, basestring):
+ include_attrs = include_attrs.split()
+ include_attrs = [QName(attr) for attr in include_attrs]
+
+ tmpl = template_class(fileobj, filename=getattr(fileobj, 'name', None),
+ encoding=encoding)
+ tmpl.loader = None
+
+ translator = Translator(None, ignore_tags, include_attrs, extract_text)
+ if hasattr(tmpl, 'add_directives'):
+ tmpl.add_directives(Translator.NAMESPACE, translator)
+ for message in translator.extract(tmpl.stream, gettext_functions=keywords):
+ yield message
diff --git a/genshi/filters/transform.py b/genshi/filters/transform.py
new file mode 100644
index 0000000..9b75b06
--- /dev/null
+++ b/genshi/filters/transform.py
@@ -0,0 +1,1310 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2007-2009 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""A filter for functional-style transformations of markup streams.
+
+The `Transformer` filter provides a variety of transformations that can be
+applied to parts of streams that match given XPath expressions. These
+transformations can be chained to achieve results that would be comparitively
+tedious to achieve by writing stream filters by hand. The approach of chaining
+node selection and transformation has been inspired by the `jQuery`_ Javascript
+library.
+
+ .. _`jQuery`: http://jquery.com/
+
+For example, the following transformation removes the ``<title>`` element from
+the ``<head>`` of the input document:
+
+>>> from genshi.builder import tag
+>>> html = HTML('''<html>
+... <head><title>Some Title</title></head>
+... <body>
+... Some <em>body</em> text.
+... </body>
+... </html>''')
+>>> print(html | Transformer('body/em').map(unicode.upper, TEXT)
+... .unwrap().wrap(tag.u))
+<html>
+ <head><title>Some Title</title></head>
+ <body>
+ Some <u>BODY</u> text.
+ </body>
+</html>
+
+The ``Transformer`` support a large number of useful transformations out of the
+box, but custom transformations can be added easily.
+
+:since: version 0.5
+"""
+
+import re
+import sys
+
+from genshi.builder import Element
+from genshi.core import Stream, Attrs, QName, TEXT, START, END, _ensure, Markup
+from genshi.path import Path
+
+__all__ = ['Transformer', 'StreamBuffer', 'InjectorTransformation', 'ENTER',
+ 'EXIT', 'INSIDE', 'OUTSIDE', 'BREAK']
+
+
+class TransformMark(str):
+ """A mark on a transformation stream."""
+ __slots__ = []
+ _instances = {}
+
+ def __new__(cls, val):
+ return cls._instances.setdefault(val, str.__new__(cls, val))
+
+
+ENTER = TransformMark('ENTER')
+"""Stream augmentation mark indicating that a selected element is being
+entered."""
+
+INSIDE = TransformMark('INSIDE')
+"""Stream augmentation mark indicating that processing is currently inside a
+selected element."""
+
+OUTSIDE = TransformMark('OUTSIDE')
+"""Stream augmentation mark indicating that a match occurred outside a selected
+element."""
+
+ATTR = TransformMark('ATTR')
+"""Stream augmentation mark indicating a selected element attribute."""
+
+EXIT = TransformMark('EXIT')
+"""Stream augmentation mark indicating that a selected element is being
+exited."""
+
+BREAK = TransformMark('BREAK')
+"""Stream augmentation mark indicating a break between two otherwise contiguous
+blocks of marked events.
+
+This is used primarily by the cut() transform to provide later transforms with
+an opportunity to operate on the cut buffer.
+"""
+
+
+class PushBackStream(object):
+ """Allows a single event to be pushed back onto the stream and re-consumed.
+ """
+ def __init__(self, stream):
+ self.stream = iter(stream)
+ self.peek = None
+
+ def push(self, event):
+ assert self.peek is None
+ self.peek = event
+
+ def __iter__(self):
+ while True:
+ if self.peek is not None:
+ peek = self.peek
+ self.peek = None
+ yield peek
+ else:
+ try:
+ event = self.stream.next()
+ yield event
+ except StopIteration:
+ if self.peek is None:
+ raise
+
+
+class Transformer(object):
+ """Stream filter that can apply a variety of different transformations to
+ a stream.
+
+ This is achieved by selecting the events to be transformed using XPath,
+ then applying the transformations to the events matched by the path
+ expression. Each marked event is in the form (mark, (kind, data, pos)),
+ where mark can be any of `ENTER`, `INSIDE`, `EXIT`, `OUTSIDE`, or `None`.
+
+ The first three marks match `START` and `END` events, and any events
+ contained `INSIDE` any selected XML/HTML element. A non-element match
+ outside a `START`/`END` container (e.g. ``text()``) will yield an `OUTSIDE`
+ mark.
+
+ >>> html = HTML('<html><head><title>Some Title</title></head>'
+ ... '<body>Some <em>body</em> text.</body></html>')
+
+ Transformations act on selected stream events matching an XPath expression.
+ Here's an example of removing some markup (the title, in this case)
+ selected by an expression:
+
+ >>> print(html | Transformer('head/title').remove())
+ <html><head/><body>Some <em>body</em> text.</body></html>
+
+ Inserted content can be passed in the form of a string, or a markup event
+ stream, which includes streams generated programmatically via the
+ `builder` module:
+
+ >>> from genshi.builder import tag
+ >>> print(html | Transformer('body').prepend(tag.h1('Document Title')))
+ <html><head><title>Some Title</title></head><body><h1>Document
+ Title</h1>Some <em>body</em> text.</body></html>
+
+ Each XPath expression determines the set of tags that will be acted upon by
+ subsequent transformations. In this example we select the ``<title>`` text,
+ copy it into a buffer, then select the ``<body>`` element and paste the
+ copied text into the body as ``<h1>`` enclosed text:
+
+ >>> buffer = StreamBuffer()
+ >>> print(html | Transformer('head/title/text()').copy(buffer)
+ ... .end().select('body').prepend(tag.h1(buffer)))
+ <html><head><title>Some Title</title></head><body><h1>Some Title</h1>Some
+ <em>body</em> text.</body></html>
+
+ Transformations can also be assigned and reused, although care must be
+ taken when using buffers, to ensure that buffers are cleared between
+ transforms:
+
+ >>> emphasis = Transformer('body//em').attr('class', 'emphasis')
+ >>> print(html | emphasis)
+ <html><head><title>Some Title</title></head><body>Some <em
+ class="emphasis">body</em> text.</body></html>
+ """
+
+ __slots__ = ['transforms']
+
+ def __init__(self, path='.'):
+ """Construct a new transformation filter.
+
+ :param path: an XPath expression (as string) or a `Path` instance
+ """
+ self.transforms = [SelectTransformation(path)]
+
+ def __call__(self, stream, keep_marks=False):
+ """Apply the transform filter to the marked stream.
+
+ :param stream: the marked event stream to filter
+ :param keep_marks: Do not strip transformer selection marks from the
+ stream. Useful for testing.
+ :return: the transformed stream
+ :rtype: `Stream`
+ """
+ transforms = self._mark(stream)
+ for link in self.transforms:
+ transforms = link(transforms)
+ if not keep_marks:
+ transforms = self._unmark(transforms)
+ return Stream(transforms,
+ serializer=getattr(stream, 'serializer', None))
+
+ def apply(self, function):
+ """Apply a transformation to the stream.
+
+ Transformations can be chained, similar to stream filters. Any callable
+ accepting a marked stream can be used as a transform.
+
+ As an example, here is a simple `TEXT` event upper-casing transform:
+
+ >>> def upper(stream):
+ ... for mark, (kind, data, pos) in stream:
+ ... if mark and kind is TEXT:
+ ... yield mark, (kind, data.upper(), pos)
+ ... else:
+ ... yield mark, (kind, data, pos)
+ >>> short_stream = HTML('<body>Some <em>test</em> text</body>')
+ >>> print(short_stream | Transformer('.//em/text()').apply(upper))
+ <body>Some <em>TEST</em> text</body>
+ """
+ transformer = Transformer()
+ transformer.transforms = self.transforms[:]
+ if isinstance(function, Transformer):
+ transformer.transforms.extend(function.transforms)
+ else:
+ transformer.transforms.append(function)
+ return transformer
+
+ #{ Selection operations
+
+ def select(self, path):
+ """Mark events matching the given XPath expression, within the current
+ selection.
+
+ >>> html = HTML('<body>Some <em>test</em> text</body>')
+ >>> print(html | Transformer().select('.//em').trace())
+ (None, ('START', (QName('body'), Attrs()), (None, 1, 0)))
+ (None, ('TEXT', u'Some ', (None, 1, 6)))
+ ('ENTER', ('START', (QName('em'), Attrs()), (None, 1, 11)))
+ ('INSIDE', ('TEXT', u'test', (None, 1, 15)))
+ ('EXIT', ('END', QName('em'), (None, 1, 19)))
+ (None, ('TEXT', u' text', (None, 1, 24)))
+ (None, ('END', QName('body'), (None, 1, 29)))
+ <body>Some <em>test</em> text</body>
+
+ :param path: an XPath expression (as string) or a `Path` instance
+ :return: the stream augmented by transformation marks
+ :rtype: `Transformer`
+ """
+ return self.apply(SelectTransformation(path))
+
+ def invert(self):
+ """Invert selection so that marked events become unmarked, and vice
+ versa.
+
+ Specificaly, all marks are converted to null marks, and all null marks
+ are converted to OUTSIDE marks.
+
+ >>> html = HTML('<body>Some <em>test</em> text</body>')
+ >>> print(html | Transformer('//em').invert().trace())
+ ('OUTSIDE', ('START', (QName('body'), Attrs()), (None, 1, 0)))
+ ('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6)))
+ (None, ('START', (QName('em'), Attrs()), (None, 1, 11)))
+ (None, ('TEXT', u'test', (None, 1, 15)))
+ (None, ('END', QName('em'), (None, 1, 19)))
+ ('OUTSIDE', ('TEXT', u' text', (None, 1, 24)))
+ ('OUTSIDE', ('END', QName('body'), (None, 1, 29)))
+ <body>Some <em>test</em> text</body>
+
+ :rtype: `Transformer`
+ """
+ return self.apply(InvertTransformation())
+
+ def end(self):
+ """End current selection, allowing all events to be selected.
+
+ Example:
+
+ >>> html = HTML('<body>Some <em>test</em> text</body>')
+ >>> print(html | Transformer('//em').end().trace())
+ ('OUTSIDE', ('START', (QName('body'), Attrs()), (None, 1, 0)))
+ ('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6)))
+ ('OUTSIDE', ('START', (QName('em'), Attrs()), (None, 1, 11)))
+ ('OUTSIDE', ('TEXT', u'test', (None, 1, 15)))
+ ('OUTSIDE', ('END', QName('em'), (None, 1, 19)))
+ ('OUTSIDE', ('TEXT', u' text', (None, 1, 24)))
+ ('OUTSIDE', ('END', QName('body'), (None, 1, 29)))
+ <body>Some <em>test</em> text</body>
+
+ :return: the stream augmented by transformation marks
+ :rtype: `Transformer`
+ """
+ return self.apply(EndTransformation())
+
+ #{ Deletion operations
+
+ def empty(self):
+ """Empty selected elements of all content.
+
+ Example:
+
+ >>> html = HTML('<html><head><title>Some Title</title></head>'
+ ... '<body>Some <em>body</em> text.</body></html>')
+ >>> print(html | Transformer('.//em').empty())
+ <html><head><title>Some Title</title></head><body>Some <em/>
+ text.</body></html>
+
+ :rtype: `Transformer`
+ """
+ return self.apply(EmptyTransformation())
+
+ def remove(self):
+ """Remove selection from the stream.
+
+ Example:
+
+ >>> html = HTML('<html><head><title>Some Title</title></head>'
+ ... '<body>Some <em>body</em> text.</body></html>')
+ >>> print(html | Transformer('.//em').remove())
+ <html><head><title>Some Title</title></head><body>Some
+ text.</body></html>
+
+ :rtype: `Transformer`
+ """
+ return self.apply(RemoveTransformation())
+
+ #{ Direct element operations
+
+ def unwrap(self):
+ """Remove outermost enclosing elements from selection.
+
+ Example:
+
+ >>> html = HTML('<html><head><title>Some Title</title></head>'
+ ... '<body>Some <em>body</em> text.</body></html>')
+ >>> print(html | Transformer('.//em').unwrap())
+ <html><head><title>Some Title</title></head><body>Some body
+ text.</body></html>
+
+ :rtype: `Transformer`
+ """
+ return self.apply(UnwrapTransformation())
+
+ def wrap(self, element):
+ """Wrap selection in an element.
+
+ >>> html = HTML('<html><head><title>Some Title</title></head>'
+ ... '<body>Some <em>body</em> text.</body></html>')
+ >>> print(html | Transformer('.//em').wrap('strong'))
+ <html><head><title>Some Title</title></head><body>Some
+ <strong><em>body</em></strong> text.</body></html>
+
+ :param element: either a tag name (as string) or an `Element` object
+ :rtype: `Transformer`
+ """
+ return self.apply(WrapTransformation(element))
+
+ #{ Content insertion operations
+
+ def replace(self, content):
+ """Replace selection with content.
+
+ >>> html = HTML('<html><head><title>Some Title</title></head>'
+ ... '<body>Some <em>body</em> text.</body></html>')
+ >>> print(html | Transformer('.//title/text()').replace('New Title'))
+ <html><head><title>New Title</title></head><body>Some <em>body</em>
+ text.</body></html>
+
+ :param content: Either a callable, an iterable of events, or a string
+ to insert.
+ :rtype: `Transformer`
+ """
+ return self.apply(ReplaceTransformation(content))
+
+ def before(self, content):
+ """Insert content before selection.
+
+ In this example we insert the word 'emphasised' before the <em> opening
+ tag:
+
+ >>> html = HTML('<html><head><title>Some Title</title></head>'
+ ... '<body>Some <em>body</em> text.</body></html>')
+ >>> print(html | Transformer('.//em').before('emphasised '))
+ <html><head><title>Some Title</title></head><body>Some emphasised
+ <em>body</em> text.</body></html>
+
+ :param content: Either a callable, an iterable of events, or a string
+ to insert.
+ :rtype: `Transformer`
+ """
+ return self.apply(BeforeTransformation(content))
+
+ def after(self, content):
+ """Insert content after selection.
+
+ Here, we insert some text after the </em> closing tag:
+
+ >>> html = HTML('<html><head><title>Some Title</title></head>'
+ ... '<body>Some <em>body</em> text.</body></html>')
+ >>> print(html | Transformer('.//em').after(' rock'))
+ <html><head><title>Some Title</title></head><body>Some <em>body</em>
+ rock text.</body></html>
+
+ :param content: Either a callable, an iterable of events, or a string
+ to insert.
+ :rtype: `Transformer`
+ """
+ return self.apply(AfterTransformation(content))
+
+ def prepend(self, content):
+ """Insert content after the ENTER event of the selection.
+
+ Inserting some new text at the start of the <body>:
+
+ >>> html = HTML('<html><head><title>Some Title</title></head>'
+ ... '<body>Some <em>body</em> text.</body></html>')
+ >>> print(html | Transformer('.//body').prepend('Some new body text. '))
+ <html><head><title>Some Title</title></head><body>Some new body text.
+ Some <em>body</em> text.</body></html>
+
+ :param content: Either a callable, an iterable of events, or a string
+ to insert.
+ :rtype: `Transformer`
+ """
+ return self.apply(PrependTransformation(content))
+
+ def append(self, content):
+ """Insert content before the END event of the selection.
+
+ >>> html = HTML('<html><head><title>Some Title</title></head>'
+ ... '<body>Some <em>body</em> text.</body></html>')
+ >>> print(html | Transformer('.//body').append(' Some new body text.'))
+ <html><head><title>Some Title</title></head><body>Some <em>body</em>
+ text. Some new body text.</body></html>
+
+ :param content: Either a callable, an iterable of events, or a string
+ to insert.
+ :rtype: `Transformer`
+ """
+ return self.apply(AppendTransformation(content))
+
+ #{ Attribute manipulation
+
+ def attr(self, name, value):
+ """Add, replace or delete an attribute on selected elements.
+
+ If `value` evaulates to `None` the attribute will be deleted from the
+ element:
+
+ >>> html = HTML('<html><head><title>Some Title</title></head>'
+ ... '<body>Some <em class="before">body</em> <em>text</em>.</body>'
+ ... '</html>')
+ >>> print(html | Transformer('body/em').attr('class', None))
+ <html><head><title>Some Title</title></head><body>Some <em>body</em>
+ <em>text</em>.</body></html>
+
+ Otherwise the attribute will be set to `value`:
+
+ >>> print(html | Transformer('body/em').attr('class', 'emphasis'))
+ <html><head><title>Some Title</title></head><body>Some <em
+ class="emphasis">body</em> <em class="emphasis">text</em>.</body></html>
+
+ If `value` is a callable it will be called with the attribute name and
+ the `START` event for the matching element. Its return value will then
+ be used to set the attribute:
+
+ >>> def print_attr(name, event):
+ ... attrs = event[1][1]
+ ... print(attrs)
+ ... return attrs.get(name)
+ >>> print(html | Transformer('body/em').attr('class', print_attr))
+ Attrs([(QName('class'), u'before')])
+ Attrs()
+ <html><head><title>Some Title</title></head><body>Some <em
+ class="before">body</em> <em>text</em>.</body></html>
+
+ :param name: the name of the attribute
+ :param value: the value that should be set for the attribute.
+ :rtype: `Transformer`
+ """
+ return self.apply(AttrTransformation(name, value))
+
+ #{ Buffer operations
+
+ def copy(self, buffer, accumulate=False):
+ """Copy selection into buffer.
+
+ The buffer is replaced by each *contiguous* selection before being passed
+ to the next transformation. If accumulate=True, further selections will
+ be appended to the buffer rather than replacing it.
+
+ >>> from genshi.builder import tag
+ >>> buffer = StreamBuffer()
+ >>> html = HTML('<html><head><title>Some Title</title></head>'
+ ... '<body>Some <em>body</em> text.</body></html>')
+ >>> print(html | Transformer('head/title/text()').copy(buffer)
+ ... .end().select('body').prepend(tag.h1(buffer)))
+ <html><head><title>Some Title</title></head><body><h1>Some
+ Title</h1>Some <em>body</em> text.</body></html>
+
+ This example illustrates that only a single contiguous selection will
+ be buffered:
+
+ >>> print(html | Transformer('head/title/text()').copy(buffer)
+ ... .end().select('body/em').copy(buffer).end().select('body')
+ ... .prepend(tag.h1(buffer)))
+ <html><head><title>Some Title</title></head><body><h1>Some
+ Title</h1>Some <em>body</em> text.</body></html>
+ >>> print(buffer)
+ <em>body</em>
+
+ Element attributes can also be copied for later use:
+
+ >>> html = HTML('<html><head><title>Some Title</title></head>'
+ ... '<body><em>Some</em> <em class="before">body</em>'
+ ... '<em>text</em>.</body></html>')
+ >>> buffer = StreamBuffer()
+ >>> def apply_attr(name, entry):
+ ... return list(buffer)[0][1][1].get('class')
+ >>> print(html | Transformer('body/em[@class]/@class').copy(buffer)
+ ... .end().buffer().select('body/em[not(@class)]')
+ ... .attr('class', apply_attr))
+ <html><head><title>Some Title</title></head><body><em
+ class="before">Some</em> <em class="before">body</em><em
+ class="before">text</em>.</body></html>
+
+
+ :param buffer: the `StreamBuffer` in which the selection should be
+ stored
+ :rtype: `Transformer`
+ :note: Copy (and cut) copy each individual selected object into the
+ buffer before passing to the next transform. For example, the
+ XPath ``*|text()`` will select all elements and text, each
+ instance of which will be copied to the buffer individually
+ before passing to the next transform. This has implications for
+ how ``StreamBuffer`` objects can be used, so some
+ experimentation may be required.
+
+ """
+ return self.apply(CopyTransformation(buffer, accumulate))
+
+ def cut(self, buffer, accumulate=False):
+ """Copy selection into buffer and remove the selection from the stream.
+
+ >>> from genshi.builder import tag
+ >>> buffer = StreamBuffer()
+ >>> html = HTML('<html><head><title>Some Title</title></head>'
+ ... '<body>Some <em>body</em> text.</body></html>')
+ >>> print(html | Transformer('.//em/text()').cut(buffer)
+ ... .end().select('.//em').after(tag.h1(buffer)))
+ <html><head><title>Some Title</title></head><body>Some
+ <em/><h1>body</h1> text.</body></html>
+
+ Specifying accumulate=True, appends all selected intervals onto the
+ buffer. Combining this with the .buffer() operation allows us operate
+ on all copied events rather than per-segment. See the documentation on
+ buffer() for more information.
+
+ :param buffer: the `StreamBuffer` in which the selection should be
+ stored
+ :rtype: `Transformer`
+ :note: this transformation will buffer the entire input stream
+ """
+ return self.apply(CutTransformation(buffer, accumulate))
+
+ def buffer(self):
+ """Buffer the entire stream (can consume a considerable amount of
+ memory).
+
+ Useful in conjunction with copy(accumulate=True) and
+ cut(accumulate=True) to ensure that all marked events in the entire
+ stream are copied to the buffer before further transformations are
+ applied.
+
+ For example, to move all <note> elements inside a <notes> tag at the
+ top of the document:
+
+ >>> doc = HTML('<doc><notes></notes><body>Some <note>one</note> '
+ ... 'text <note>two</note>.</body></doc>')
+ >>> buffer = StreamBuffer()
+ >>> print(doc | Transformer('body/note').cut(buffer, accumulate=True)
+ ... .end().buffer().select('notes').prepend(buffer))
+ <doc><notes><note>one</note><note>two</note></notes><body>Some text
+ .</body></doc>
+
+ """
+ return self.apply(list)
+
+ #{ Miscellaneous operations
+
+ def filter(self, filter):
+ """Apply a normal stream filter to the selection. The filter is called
+ once for each contiguous block of marked events.
+
+ >>> from genshi.filters.html import HTMLSanitizer
+ >>> html = HTML('<html><body>Some text<script>alert(document.cookie)'
+ ... '</script> and some more text</body></html>')
+ >>> print(html | Transformer('body/*').filter(HTMLSanitizer()))
+ <html><body>Some text and some more text</body></html>
+
+ :param filter: The stream filter to apply.
+ :rtype: `Transformer`
+ """
+ return self.apply(FilterTransformation(filter))
+
+ def map(self, function, kind):
+ """Applies a function to the ``data`` element of events of ``kind`` in
+ the selection.
+
+ >>> html = HTML('<html><head><title>Some Title</title></head>'
+ ... '<body>Some <em>body</em> text.</body></html>')
+ >>> print(html | Transformer('head/title').map(unicode.upper, TEXT))
+ <html><head><title>SOME TITLE</title></head><body>Some <em>body</em>
+ text.</body></html>
+
+ :param function: the function to apply
+ :param kind: the kind of event the function should be applied to
+ :rtype: `Transformer`
+ """
+ return self.apply(MapTransformation(function, kind))
+
+ def substitute(self, pattern, replace, count=1):
+ """Replace text matching a regular expression.
+
+ Refer to the documentation for ``re.sub()`` for details.
+
+ >>> html = HTML('<html><body>Some text, some more text and '
+ ... '<b>some bold text</b>\\n'
+ ... '<i>some italicised text</i></body></html>')
+ >>> print(html | Transformer('body/b').substitute('(?i)some', 'SOME'))
+ <html><body>Some text, some more text and <b>SOME bold text</b>
+ <i>some italicised text</i></body></html>
+ >>> tags = tag.html(tag.body('Some text, some more text and\\n',
+ ... Markup('<b>some bold text</b>')))
+ >>> print(tags.generate() | Transformer('body').substitute(
+ ... '(?i)some', 'SOME'))
+ <html><body>SOME text, some more text and
+ <b>SOME bold text</b></body></html>
+
+ :param pattern: A regular expression object or string.
+ :param replace: Replacement pattern.
+ :param count: Number of replacements to make in each text fragment.
+ :rtype: `Transformer`
+ """
+ return self.apply(SubstituteTransformation(pattern, replace, count))
+
+ def rename(self, name):
+ """Rename matching elements.
+
+ >>> html = HTML('<html><body>Some text, some more text and '
+ ... '<b>some bold text</b></body></html>')
+ >>> print(html | Transformer('body/b').rename('strong'))
+ <html><body>Some text, some more text and <strong>some bold text</strong></body></html>
+ """
+ return self.apply(RenameTransformation(name))
+
+ def trace(self, prefix='', fileobj=None):
+ """Print events as they pass through the transform.
+
+ >>> html = HTML('<body>Some <em>test</em> text</body>')
+ >>> print(html | Transformer('em').trace())
+ (None, ('START', (QName('body'), Attrs()), (None, 1, 0)))
+ (None, ('TEXT', u'Some ', (None, 1, 6)))
+ ('ENTER', ('START', (QName('em'), Attrs()), (None, 1, 11)))
+ ('INSIDE', ('TEXT', u'test', (None, 1, 15)))
+ ('EXIT', ('END', QName('em'), (None, 1, 19)))
+ (None, ('TEXT', u' text', (None, 1, 24)))
+ (None, ('END', QName('body'), (None, 1, 29)))
+ <body>Some <em>test</em> text</body>
+
+ :param prefix: a string to prefix each event with in the output
+ :param fileobj: the writable file-like object to write to; defaults to
+ the standard output stream
+ :rtype: `Transformer`
+ """
+ return self.apply(TraceTransformation(prefix, fileobj=fileobj))
+
+ # Internal methods
+
+ def _mark(self, stream):
+ for event in stream:
+ yield OUTSIDE, event
+
+ def _unmark(self, stream):
+ for mark, event in stream:
+ kind = event[0]
+ if not (kind is None or kind is ATTR or kind is BREAK):
+ yield event
+
+
+class SelectTransformation(object):
+ """Select and mark events that match an XPath expression."""
+
+ def __init__(self, path):
+ """Create selection.
+
+ :param path: an XPath expression (as string) or a `Path` object
+ """
+ if not isinstance(path, Path):
+ path = Path(path)
+ self.path = path
+
+ def __call__(self, stream):
+ """Apply the transform filter to the marked stream.
+
+ :param stream: the marked event stream to filter
+ """
+ namespaces = {}
+ variables = {}
+ test = self.path.test()
+ stream = iter(stream)
+ next = stream.next
+ for mark, event in stream:
+ if mark is None:
+ yield mark, event
+ continue
+ result = test(event, namespaces, variables)
+ # XXX This is effectively genshi.core._ensure() for transform
+ # streams.
+ if result is True:
+ if event[0] is START:
+ yield ENTER, event
+ depth = 1
+ while depth > 0:
+ mark, subevent = next()
+ if subevent[0] is START:
+ depth += 1
+ elif subevent[0] is END:
+ depth -= 1
+ if depth == 0:
+ yield EXIT, subevent
+ else:
+ yield INSIDE, subevent
+ test(subevent, namespaces, variables, updateonly=True)
+ else:
+ yield OUTSIDE, event
+ elif isinstance(result, Attrs):
+ # XXX Selected *attributes* are given a "kind" of None to
+ # indicate they are not really part of the stream.
+ yield ATTR, (ATTR, (QName(event[1][0] + '@*'), result), event[2])
+ yield None, event
+ elif isinstance(result, tuple):
+ yield OUTSIDE, result
+ elif result:
+ # XXX Assume everything else is "text"?
+ yield None, (TEXT, unicode(result), (None, -1, -1))
+ else:
+ yield None, event
+
+
+class InvertTransformation(object):
+ """Invert selection so that marked events become unmarked, and vice versa.
+
+ Specificaly, all input marks are converted to null marks, and all input
+ null marks are converted to OUTSIDE marks.
+ """
+
+ def __call__(self, stream):
+ """Apply the transform filter to the marked stream.
+
+ :param stream: the marked event stream to filter
+ """
+ for mark, event in stream:
+ if mark:
+ yield None, event
+ else:
+ yield OUTSIDE, event
+
+
+class EndTransformation(object):
+ """End the current selection."""
+
+ def __call__(self, stream):
+ """Apply the transform filter to the marked stream.
+
+ :param stream: the marked event stream to filter
+ """
+ for mark, event in stream:
+ yield OUTSIDE, event
+
+
+class EmptyTransformation(object):
+ """Empty selected elements of all content."""
+
+ def __call__(self, stream):
+ """Apply the transform filter to the marked stream.
+
+ :param stream: the marked event stream to filter
+ """
+ for mark, event in stream:
+ yield mark, event
+ if mark is ENTER:
+ for mark, event in stream:
+ if mark is EXIT:
+ yield mark, event
+ break
+
+
+class RemoveTransformation(object):
+ """Remove selection from the stream."""
+
+ def __call__(self, stream):
+ """Apply the transform filter to the marked stream.
+
+ :param stream: the marked event stream to filter
+ """
+ for mark, event in stream:
+ if mark is None:
+ yield mark, event
+
+
+class UnwrapTransformation(object):
+ """Remove outtermost enclosing elements from selection."""
+
+ def __call__(self, stream):
+ """Apply the transform filter to the marked stream.
+
+ :param stream: the marked event stream to filter
+ """
+ for mark, event in stream:
+ if mark not in (ENTER, EXIT):
+ yield mark, event
+
+
+class WrapTransformation(object):
+ """Wrap selection in an element."""
+
+ def __init__(self, element):
+ if isinstance(element, Element):
+ self.element = element
+ else:
+ self.element = Element(element)
+
+ def __call__(self, stream):
+ for mark, event in stream:
+ if mark:
+ element = list(self.element.generate())
+ for prefix in element[:-1]:
+ yield None, prefix
+ yield mark, event
+ start = mark
+ stopped = False
+ for mark, event in stream:
+ if start is ENTER and mark is EXIT:
+ yield mark, event
+ stopped = True
+ break
+ if not mark:
+ break
+ yield mark, event
+ else:
+ stopped = True
+ yield None, element[-1]
+ if not stopped:
+ yield mark, event
+ else:
+ yield mark, event
+
+
+class TraceTransformation(object):
+ """Print events as they pass through the transform."""
+
+ def __init__(self, prefix='', fileobj=None):
+ """Trace constructor.
+
+ :param prefix: text to prefix each traced line with.
+ :param fileobj: the writable file-like object to write to
+ """
+ self.prefix = prefix
+ self.fileobj = fileobj or sys.stdout
+
+ def __call__(self, stream):
+ """Apply the transform filter to the marked stream.
+
+ :param stream: the marked event stream to filter
+ """
+ for event in stream:
+ self.fileobj.write('%s%s\n' % (self.prefix, event))
+ yield event
+
+
+class FilterTransformation(object):
+ """Apply a normal stream filter to the selection. The filter is called once
+ for each selection."""
+
+ def __init__(self, filter):
+ """Create the transform.
+
+ :param filter: The stream filter to apply.
+ """
+ self.filter = filter
+
+ def __call__(self, stream):
+ """Apply the transform filter to the marked stream.
+
+ :param stream: The marked event stream to filter
+ """
+ def flush(queue):
+ if queue:
+ for event in self.filter(queue):
+ yield OUTSIDE, event
+ del queue[:]
+
+ queue = []
+ for mark, event in stream:
+ if mark is ENTER:
+ queue.append(event)
+ for mark, event in stream:
+ queue.append(event)
+ if mark is EXIT:
+ break
+ for queue_event in flush(queue):
+ yield queue_event
+ elif mark is OUTSIDE:
+ stopped = False
+ queue.append(event)
+ for mark, event in stream:
+ if mark is not OUTSIDE:
+ break
+ queue.append(event)
+ else:
+ stopped = True
+ for queue_event in flush(queue):
+ yield queue_event
+ if not stopped:
+ yield mark, event
+ else:
+ yield mark, event
+ for queue_event in flush(queue):
+ yield queue_event
+
+
+class MapTransformation(object):
+ """Apply a function to the `data` element of events of ``kind`` in the
+ selection.
+ """
+
+ def __init__(self, function, kind):
+ """Create the transform.
+
+ :param function: the function to apply; the function must take one
+ argument, the `data` element of each selected event
+ :param kind: the stream event ``kind`` to apply the `function` to
+ """
+ self.function = function
+ self.kind = kind
+
+ def __call__(self, stream):
+ """Apply the transform filter to the marked stream.
+
+ :param stream: The marked event stream to filter
+ """
+ for mark, (kind, data, pos) in stream:
+ if mark and self.kind in (None, kind):
+ yield mark, (kind, self.function(data), pos)
+ else:
+ yield mark, (kind, data, pos)
+
+
+class SubstituteTransformation(object):
+ """Replace text matching a regular expression.
+
+ Refer to the documentation for ``re.sub()`` for details.
+ """
+ def __init__(self, pattern, replace, count=0):
+ """Create the transform.
+
+ :param pattern: A regular expression object, or string.
+ :param replace: Replacement pattern.
+ :param count: Number of replacements to make in each text fragment.
+ """
+ if isinstance(pattern, basestring):
+ self.pattern = re.compile(pattern)
+ else:
+ self.pattern = pattern
+ self.count = count
+ self.replace = replace
+
+ def __call__(self, stream):
+ """Apply the transform filter to the marked stream.
+
+ :param stream: The marked event stream to filter
+ """
+ for mark, (kind, data, pos) in stream:
+ if mark is not None and kind is TEXT:
+ new_data = self.pattern.sub(self.replace, data, self.count)
+ if isinstance(data, Markup):
+ data = Markup(new_data)
+ else:
+ data = new_data
+ yield mark, (kind, data, pos)
+
+
+class RenameTransformation(object):
+ """Rename matching elements."""
+ def __init__(self, name):
+ """Create the transform.
+
+ :param name: New element name.
+ """
+ self.name = QName(name)
+
+ def __call__(self, stream):
+ """Apply the transform filter to the marked stream.
+
+ :param stream: The marked event stream to filter
+ """
+ for mark, (kind, data, pos) in stream:
+ if mark is ENTER:
+ data = self.name, data[1]
+ elif mark is EXIT:
+ data = self.name
+ yield mark, (kind, data, pos)
+
+
+class InjectorTransformation(object):
+ """Abstract base class for transformations that inject content into a
+ stream.
+
+ >>> class Top(InjectorTransformation):
+ ... def __call__(self, stream):
+ ... for event in self._inject():
+ ... yield event
+ ... for event in stream:
+ ... yield event
+ >>> html = HTML('<body>Some <em>test</em> text</body>')
+ >>> print(html | Transformer('.//em').apply(Top('Prefix ')))
+ Prefix <body>Some <em>test</em> text</body>
+ """
+ def __init__(self, content):
+ """Create a new injector.
+
+ :param content: An iterable of Genshi stream events, or a string to be
+ injected.
+ """
+ self.content = content
+
+ def _inject(self):
+ content = self.content
+ if hasattr(content, '__call__'):
+ content = content()
+ for event in _ensure(content):
+ yield None, event
+
+
+class ReplaceTransformation(InjectorTransformation):
+ """Replace selection with content."""
+
+ def __call__(self, stream):
+ """Apply the transform filter to the marked stream.
+
+ :param stream: The marked event stream to filter
+ """
+ stream = PushBackStream(stream)
+ for mark, event in stream:
+ if mark is not None:
+ start = mark
+ for subevent in self._inject():
+ yield subevent
+ for mark, event in stream:
+ if start is ENTER:
+ if mark is EXIT:
+ break
+ elif mark != start:
+ stream.push((mark, event))
+ break
+ else:
+ yield mark, event
+
+
+class BeforeTransformation(InjectorTransformation):
+ """Insert content before selection."""
+
+ def __call__(self, stream):
+ """Apply the transform filter to the marked stream.
+
+ :param stream: The marked event stream to filter
+ """
+ stream = PushBackStream(stream)
+ for mark, event in stream:
+ if mark is not None:
+ start = mark
+ for subevent in self._inject():
+ yield subevent
+ yield mark, event
+ for mark, event in stream:
+ if mark != start and start is not ENTER:
+ stream.push((mark, event))
+ break
+ yield mark, event
+ if start is ENTER and mark is EXIT:
+ break
+ else:
+ yield mark, event
+
+
+class AfterTransformation(InjectorTransformation):
+ """Insert content after selection."""
+
+ def __call__(self, stream):
+ """Apply the transform filter to the marked stream.
+
+ :param stream: The marked event stream to filter
+ """
+ stream = PushBackStream(stream)
+ for mark, event in stream:
+ yield mark, event
+ if mark:
+ start = mark
+ for mark, event in stream:
+ if start is not ENTER and mark != start:
+ stream.push((mark, event))
+ break
+ yield mark, event
+ if start is ENTER and mark is EXIT:
+ break
+ for subevent in self._inject():
+ yield subevent
+
+
+class PrependTransformation(InjectorTransformation):
+ """Prepend content to the inside of selected elements."""
+
+ def __call__(self, stream):
+ """Apply the transform filter to the marked stream.
+
+ :param stream: The marked event stream to filter
+ """
+ for mark, event in stream:
+ yield mark, event
+ if mark is ENTER:
+ for subevent in self._inject():
+ yield subevent
+
+
+class AppendTransformation(InjectorTransformation):
+ """Append content after the content of selected elements."""
+
+ def __call__(self, stream):
+ """Apply the transform filter to the marked stream.
+
+ :param stream: The marked event stream to filter
+ """
+ for mark, event in stream:
+ yield mark, event
+ if mark is ENTER:
+ for mark, event in stream:
+ if mark is EXIT:
+ break
+ yield mark, event
+ for subevent in self._inject():
+ yield subevent
+ yield mark, event
+
+
+class AttrTransformation(object):
+ """Set an attribute on selected elements."""
+
+ def __init__(self, name, value):
+ """Construct transform.
+
+ :param name: name of the attribute that should be set
+ :param value: the value to set
+ """
+ self.name = name
+ self.value = value
+
+ def __call__(self, stream):
+ """Apply the transform filter to the marked stream.
+
+ :param stream: The marked event stream to filter
+ """
+ callable_value = hasattr(self.value, '__call__')
+ for mark, (kind, data, pos) in stream:
+ if mark is ENTER:
+ if callable_value:
+ value = self.value(self.name, (kind, data, pos))
+ else:
+ value = self.value
+ if value is None:
+ attrs = data[1] - [QName(self.name)]
+ else:
+ attrs = data[1] | [(QName(self.name), value)]
+ data = (data[0], attrs)
+ yield mark, (kind, data, pos)
+
+
+
+class StreamBuffer(Stream):
+ """Stream event buffer used for cut and copy transformations."""
+
+ def __init__(self):
+ """Create the buffer."""
+ Stream.__init__(self, [])
+
+ def append(self, event):
+ """Add an event to the buffer.
+
+ :param event: the markup event to add
+ """
+ self.events.append(event)
+
+ def reset(self):
+ """Empty the buffer of events."""
+ del self.events[:]
+
+
+class CopyTransformation(object):
+ """Copy selected events into a buffer for later insertion."""
+
+ def __init__(self, buffer, accumulate=False):
+ """Create the copy transformation.
+
+ :param buffer: the `StreamBuffer` in which the selection should be
+ stored
+ """
+ if not accumulate:
+ buffer.reset()
+ self.buffer = buffer
+ self.accumulate = accumulate
+
+ def __call__(self, stream):
+ """Apply the transformation to the marked stream.
+
+ :param stream: the marked event stream to filter
+ """
+ stream = PushBackStream(stream)
+
+ for mark, event in stream:
+ if mark:
+ if not self.accumulate:
+ self.buffer.reset()
+ events = [(mark, event)]
+ self.buffer.append(event)
+ start = mark
+ for mark, event in stream:
+ if start is not ENTER and mark != start:
+ stream.push((mark, event))
+ break
+ events.append((mark, event))
+ self.buffer.append(event)
+ if start is ENTER and mark is EXIT:
+ break
+ for i in events:
+ yield i
+ else:
+ yield mark, event
+
+
+class CutTransformation(object):
+ """Cut selected events into a buffer for later insertion and remove the
+ selection.
+ """
+
+ def __init__(self, buffer, accumulate=False):
+ """Create the cut transformation.
+
+ :param buffer: the `StreamBuffer` in which the selection should be
+ stored
+ """
+ self.buffer = buffer
+ self.accumulate = accumulate
+
+
+ def __call__(self, stream):
+ """Apply the transform filter to the marked stream.
+
+ :param stream: the marked event stream to filter
+ """
+ attributes = []
+ stream = PushBackStream(stream)
+ broken = False
+ if not self.accumulate:
+ self.buffer.reset()
+ for mark, event in stream:
+ if mark:
+ # Send a BREAK event if there was no other event sent between
+ if not self.accumulate:
+ if not broken and self.buffer:
+ yield BREAK, (BREAK, None, None)
+ self.buffer.reset()
+ self.buffer.append(event)
+ start = mark
+ if mark is ATTR:
+ attributes.extend([name for name, _ in event[1][1]])
+ for mark, event in stream:
+ if start is mark is ATTR:
+ attributes.extend([name for name, _ in event[1][1]])
+ # Handle non-element contiguous selection
+ if start is not ENTER and mark != start:
+ # Operating on the attributes of a START event
+ if start is ATTR:
+ kind, data, pos = event
+ assert kind is START
+ data = (data[0], data[1] - attributes)
+ attributes = None
+ stream.push((mark, (kind, data, pos)))
+ else:
+ stream.push((mark, event))
+ break
+ self.buffer.append(event)
+ if start is ENTER and mark is EXIT:
+ break
+ broken = False
+ else:
+ broken = True
+ yield mark, event
+ if not broken and self.buffer:
+ yield BREAK, (BREAK, None, None)
diff --git a/genshi/input.py b/genshi/input.py
new file mode 100644
index 0000000..039e5e5
--- /dev/null
+++ b/genshi/input.py
@@ -0,0 +1,443 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006-2009 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""Support for constructing markup streams from files, strings, or other
+sources.
+"""
+
+from itertools import chain
+import htmlentitydefs as entities
+import HTMLParser as html
+from StringIO import StringIO
+from xml.parsers import expat
+
+from genshi.core import Attrs, QName, Stream, stripentities
+from genshi.core import START, END, XML_DECL, DOCTYPE, TEXT, START_NS, \
+ END_NS, START_CDATA, END_CDATA, PI, COMMENT
+
+__all__ = ['ET', 'ParseError', 'XMLParser', 'XML', 'HTMLParser', 'HTML']
+__docformat__ = 'restructuredtext en'
+
+
+def ET(element):
+ """Convert a given ElementTree element to a markup stream.
+
+ :param element: an ElementTree element
+ :return: a markup stream
+ """
+ tag_name = QName(element.tag.lstrip('{'))
+ attrs = Attrs([(QName(attr.lstrip('{')), value)
+ for attr, value in element.items()])
+
+ yield START, (tag_name, attrs), (None, -1, -1)
+ if element.text:
+ yield TEXT, element.text, (None, -1, -1)
+ for child in element.getchildren():
+ for item in ET(child):
+ yield item
+ yield END, tag_name, (None, -1, -1)
+ if element.tail:
+ yield TEXT, element.tail, (None, -1, -1)
+
+
+class ParseError(Exception):
+ """Exception raised when fatal syntax errors are found in the input being
+ parsed.
+ """
+
+ def __init__(self, message, filename=None, lineno=-1, offset=-1):
+ """Exception initializer.
+
+ :param message: the error message from the parser
+ :param filename: the path to the file that was parsed
+ :param lineno: the number of the line on which the error was encountered
+ :param offset: the column number where the error was encountered
+ """
+ self.msg = message
+ if filename:
+ message += ', in ' + filename
+ Exception.__init__(self, message)
+ self.filename = filename or '<string>'
+ self.lineno = lineno
+ self.offset = offset
+
+
+class XMLParser(object):
+ """Generator-based XML parser based on roughly equivalent code in
+ Kid/ElementTree.
+
+ The parsing is initiated by iterating over the parser object:
+
+ >>> parser = XMLParser(StringIO('<root id="2"><child>Foo</child></root>'))
+ >>> for kind, data, pos in parser:
+ ... print('%s %s' % (kind, data))
+ START (QName('root'), Attrs([(QName('id'), u'2')]))
+ START (QName('child'), Attrs())
+ TEXT Foo
+ END child
+ END root
+ """
+
+ _entitydefs = ['<!ENTITY %s "&#%d;">' % (name, value) for name, value in
+ entities.name2codepoint.items()]
+ _external_dtd = '\n'.join(_entitydefs)
+
+ def __init__(self, source, filename=None, encoding=None):
+ """Initialize the parser for the given XML input.
+
+ :param source: the XML text as a file-like object
+ :param filename: the name of the file, if appropriate
+ :param encoding: the encoding of the file; if not specified, the
+ encoding is assumed to be ASCII, UTF-8, or UTF-16, or
+ whatever the encoding specified in the XML declaration
+ (if any)
+ """
+ self.source = source
+ self.filename = filename
+
+ # Setup the Expat parser
+ parser = expat.ParserCreate(encoding, '}')
+ parser.buffer_text = True
+ parser.returns_unicode = True
+ parser.ordered_attributes = True
+
+ parser.StartElementHandler = self._handle_start
+ parser.EndElementHandler = self._handle_end
+ parser.CharacterDataHandler = self._handle_data
+ parser.StartDoctypeDeclHandler = self._handle_doctype
+ parser.StartNamespaceDeclHandler = self._handle_start_ns
+ parser.EndNamespaceDeclHandler = self._handle_end_ns
+ parser.StartCdataSectionHandler = self._handle_start_cdata
+ parser.EndCdataSectionHandler = self._handle_end_cdata
+ parser.ProcessingInstructionHandler = self._handle_pi
+ parser.XmlDeclHandler = self._handle_xml_decl
+ parser.CommentHandler = self._handle_comment
+
+ # Tell Expat that we'll handle non-XML entities ourselves
+ # (in _handle_other)
+ parser.DefaultHandler = self._handle_other
+ parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
+ parser.UseForeignDTD()
+ parser.ExternalEntityRefHandler = self._build_foreign
+
+ self.expat = parser
+ self._queue = []
+
+ def parse(self):
+ """Generator that parses the XML source, yielding markup events.
+
+ :return: a markup event stream
+ :raises ParseError: if the XML text is not well formed
+ """
+ def _generate():
+ try:
+ bufsize = 4 * 1024 # 4K
+ done = False
+ while 1:
+ while not done and len(self._queue) == 0:
+ data = self.source.read(bufsize)
+ if data == '': # end of data
+ if hasattr(self, 'expat'):
+ self.expat.Parse('', True)
+ del self.expat # get rid of circular references
+ done = True
+ else:
+ if isinstance(data, unicode):
+ data = data.encode('utf-8')
+ self.expat.Parse(data, False)
+ for event in self._queue:
+ yield event
+ self._queue = []
+ if done:
+ break
+ except expat.ExpatError, e:
+ msg = str(e)
+ raise ParseError(msg, self.filename, e.lineno, e.offset)
+ return Stream(_generate()).filter(_coalesce)
+
+ def __iter__(self):
+ return iter(self.parse())
+
+ def _build_foreign(self, context, base, sysid, pubid):
+ parser = self.expat.ExternalEntityParserCreate(context)
+ parser.ParseFile(StringIO(self._external_dtd))
+ return 1
+
+ def _enqueue(self, kind, data=None, pos=None):
+ if pos is None:
+ pos = self._getpos()
+ if kind is TEXT:
+ # Expat reports the *end* of the text event as current position. We
+ # try to fix that up here as much as possible. Unfortunately, the
+ # offset is only valid for single-line text. For multi-line text,
+ # it is apparently not possible to determine at what offset it
+ # started
+ if '\n' in data:
+ lines = data.splitlines()
+ lineno = pos[1] - len(lines) + 1
+ offset = -1
+ else:
+ lineno = pos[1]
+ offset = pos[2] - len(data)
+ pos = (pos[0], lineno, offset)
+ self._queue.append((kind, data, pos))
+
+ def _getpos_unknown(self):
+ return (self.filename, -1, -1)
+
+ def _getpos(self):
+ return (self.filename, self.expat.CurrentLineNumber,
+ self.expat.CurrentColumnNumber)
+
+ def _handle_start(self, tag, attrib):
+ attrs = Attrs([(QName(name), value) for name, value in
+ zip(*[iter(attrib)] * 2)])
+ self._enqueue(START, (QName(tag), attrs))
+
+ def _handle_end(self, tag):
+ self._enqueue(END, QName(tag))
+
+ def _handle_data(self, text):
+ self._enqueue(TEXT, text)
+
+ def _handle_xml_decl(self, version, encoding, standalone):
+ self._enqueue(XML_DECL, (version, encoding, standalone))
+
+ def _handle_doctype(self, name, sysid, pubid, has_internal_subset):
+ self._enqueue(DOCTYPE, (name, pubid, sysid))
+
+ def _handle_start_ns(self, prefix, uri):
+ self._enqueue(START_NS, (prefix or '', uri))
+
+ def _handle_end_ns(self, prefix):
+ self._enqueue(END_NS, prefix or '')
+
+ def _handle_start_cdata(self):
+ self._enqueue(START_CDATA)
+
+ def _handle_end_cdata(self):
+ self._enqueue(END_CDATA)
+
+ def _handle_pi(self, target, data):
+ self._enqueue(PI, (target, data))
+
+ def _handle_comment(self, text):
+ self._enqueue(COMMENT, text)
+
+ def _handle_other(self, text):
+ if text.startswith('&'):
+ # deal with undefined entities
+ try:
+ text = unichr(entities.name2codepoint[text[1:-1]])
+ self._enqueue(TEXT, text)
+ except KeyError:
+ filename, lineno, offset = self._getpos()
+ error = expat.error('undefined entity "%s": line %d, column %d'
+ % (text, lineno, offset))
+ error.code = expat.errors.XML_ERROR_UNDEFINED_ENTITY
+ error.lineno = lineno
+ error.offset = offset
+ raise error
+
+
+def XML(text):
+ """Parse the given XML source and return a markup stream.
+
+ Unlike with `XMLParser`, the returned stream is reusable, meaning it can be
+ iterated over multiple times:
+
+ >>> xml = XML('<doc><elem>Foo</elem><elem>Bar</elem></doc>')
+ >>> print(xml)
+ <doc><elem>Foo</elem><elem>Bar</elem></doc>
+ >>> print(xml.select('elem'))
+ <elem>Foo</elem><elem>Bar</elem>
+ >>> print(xml.select('elem/text()'))
+ FooBar
+
+ :param text: the XML source
+ :return: the parsed XML event stream
+ :raises ParseError: if the XML text is not well-formed
+ """
+ return Stream(list(XMLParser(StringIO(text))))
+
+
+class HTMLParser(html.HTMLParser, object):
+ """Parser for HTML input based on the Python `HTMLParser` module.
+
+ This class provides the same interface for generating stream events as
+ `XMLParser`, and attempts to automatically balance tags.
+
+ The parsing is initiated by iterating over the parser object:
+
+ >>> parser = HTMLParser(StringIO('<UL compact><LI>Foo</UL>'))
+ >>> for kind, data, pos in parser:
+ ... print('%s %s' % (kind, data))
+ START (QName('ul'), Attrs([(QName('compact'), u'compact')]))
+ START (QName('li'), Attrs())
+ TEXT Foo
+ END li
+ END ul
+ """
+
+ _EMPTY_ELEMS = frozenset(['area', 'base', 'basefont', 'br', 'col', 'frame',
+ 'hr', 'img', 'input', 'isindex', 'link', 'meta',
+ 'param'])
+
+ def __init__(self, source, filename=None, encoding='utf-8'):
+ """Initialize the parser for the given HTML input.
+
+ :param source: the HTML text as a file-like object
+ :param filename: the name of the file, if known
+ :param filename: encoding of the file; ignored if the input is unicode
+ """
+ html.HTMLParser.__init__(self)
+ self.source = source
+ self.filename = filename
+ self.encoding = encoding
+ self._queue = []
+ self._open_tags = []
+
+ def parse(self):
+ """Generator that parses the HTML source, yielding markup events.
+
+ :return: a markup event stream
+ :raises ParseError: if the HTML text is not well formed
+ """
+ def _generate():
+ try:
+ bufsize = 4 * 1024 # 4K
+ done = False
+ while 1:
+ while not done and len(self._queue) == 0:
+ data = self.source.read(bufsize)
+ if data == '': # end of data
+ self.close()
+ done = True
+ else:
+ self.feed(data)
+ for kind, data, pos in self._queue:
+ yield kind, data, pos
+ self._queue = []
+ if done:
+ open_tags = self._open_tags
+ open_tags.reverse()
+ for tag in open_tags:
+ yield END, QName(tag), pos
+ break
+ except html.HTMLParseError, e:
+ msg = '%s: line %d, column %d' % (e.msg, e.lineno, e.offset)
+ raise ParseError(msg, self.filename, e.lineno, e.offset)
+ return Stream(_generate()).filter(_coalesce)
+
+ def __iter__(self):
+ return iter(self.parse())
+
+ def _enqueue(self, kind, data, pos=None):
+ if pos is None:
+ pos = self._getpos()
+ self._queue.append((kind, data, pos))
+
+ def _getpos(self):
+ lineno, column = self.getpos()
+ return (self.filename, lineno, column)
+
+ def handle_starttag(self, tag, attrib):
+ fixed_attrib = []
+ for name, value in attrib: # Fixup minimized attributes
+ if value is None:
+ value = unicode(name)
+ elif not isinstance(value, unicode):
+ value = value.decode(self.encoding, 'replace')
+ fixed_attrib.append((QName(name), stripentities(value)))
+
+ self._enqueue(START, (QName(tag), Attrs(fixed_attrib)))
+ if tag in self._EMPTY_ELEMS:
+ self._enqueue(END, QName(tag))
+ else:
+ self._open_tags.append(tag)
+
+ def handle_endtag(self, tag):
+ if tag not in self._EMPTY_ELEMS:
+ while self._open_tags:
+ open_tag = self._open_tags.pop()
+ self._enqueue(END, QName(open_tag))
+ if open_tag.lower() == tag.lower():
+ break
+
+ def handle_data(self, text):
+ if not isinstance(text, unicode):
+ text = text.decode(self.encoding, 'replace')
+ self._enqueue(TEXT, text)
+
+ def handle_charref(self, name):
+ if name.lower().startswith('x'):
+ text = unichr(int(name[1:], 16))
+ else:
+ text = unichr(int(name))
+ self._enqueue(TEXT, text)
+
+ def handle_entityref(self, name):
+ try:
+ text = unichr(entities.name2codepoint[name])
+ except KeyError:
+ text = '&%s;' % name
+ self._enqueue(TEXT, text)
+
+ def handle_pi(self, data):
+ target, data = data.split(None, 1)
+ if data.endswith('?'):
+ data = data[:-1]
+ self._enqueue(PI, (target.strip(), data.strip()))
+
+ def handle_comment(self, text):
+ self._enqueue(COMMENT, text)
+
+
+def HTML(text, encoding='utf-8'):
+ """Parse the given HTML source and return a markup stream.
+
+ Unlike with `HTMLParser`, the returned stream is reusable, meaning it can be
+ iterated over multiple times:
+
+ >>> html = HTML('<body><h1>Foo</h1></body>')
+ >>> print(html)
+ <body><h1>Foo</h1></body>
+ >>> print(html.select('h1'))
+ <h1>Foo</h1>
+ >>> print(html.select('h1/text()'))
+ Foo
+
+ :param text: the HTML source
+ :return: the parsed XML event stream
+ :raises ParseError: if the HTML text is not well-formed, and error recovery
+ fails
+ """
+ return Stream(list(HTMLParser(StringIO(text), encoding=encoding)))
+
+
+def _coalesce(stream):
+ """Coalesces adjacent TEXT events into a single event."""
+ textbuf = []
+ textpos = None
+ for kind, data, pos in chain(stream, [(None, None, None)]):
+ if kind is TEXT:
+ textbuf.append(data)
+ if textpos is None:
+ textpos = pos
+ else:
+ if textbuf:
+ yield TEXT, ''.join(textbuf), textpos
+ del textbuf[:]
+ textpos = None
+ if kind:
+ yield kind, data, pos
diff --git a/genshi/output.py b/genshi/output.py
new file mode 100644
index 0000000..2ebb38b
--- /dev/null
+++ b/genshi/output.py
@@ -0,0 +1,838 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006-2009 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""This module provides different kinds of serialization methods for XML event
+streams.
+"""
+
+from itertools import chain
+import re
+
+from genshi.core import escape, Attrs, Markup, Namespace, QName, StreamEventKind
+from genshi.core import START, END, TEXT, XML_DECL, DOCTYPE, START_NS, END_NS, \
+ START_CDATA, END_CDATA, PI, COMMENT, XML_NAMESPACE
+
+__all__ = ['encode', 'get_serializer', 'DocType', 'XMLSerializer',
+ 'XHTMLSerializer', 'HTMLSerializer', 'TextSerializer']
+__docformat__ = 'restructuredtext en'
+
+
+def encode(iterator, method='xml', encoding='utf-8', out=None):
+ """Encode serializer output into a string.
+
+ :param iterator: the iterator returned from serializing a stream (basically
+ any iterator that yields unicode objects)
+ :param method: the serialization method; determines how characters not
+ representable in the specified encoding are treated
+ :param encoding: how the output string should be encoded; if set to `None`,
+ this method returns a `unicode` object
+ :param out: a file-like object that the output should be written to
+ instead of being returned as one big string; note that if
+ this is a file or socket (or similar), the `encoding` must
+ not be `None` (that is, the output must be encoded)
+ :return: a `str` or `unicode` object (depending on the `encoding`
+ parameter), or `None` if the `out` parameter is provided
+
+ :since: version 0.4.1
+ :note: Changed in 0.5: added the `out` parameter
+ """
+ if encoding is not None:
+ errors = 'replace'
+ if method != 'text' and not isinstance(method, TextSerializer):
+ errors = 'xmlcharrefreplace'
+ _encode = lambda string: string.encode(encoding, errors)
+ else:
+ _encode = lambda string: string
+ if out is None:
+ return _encode(''.join(list(iterator)))
+ for chunk in iterator:
+ out.write(_encode(chunk))
+
+
+def get_serializer(method='xml', **kwargs):
+ """Return a serializer object for the given method.
+
+ :param method: the serialization method; can be either "xml", "xhtml",
+ "html", "text", or a custom serializer class
+
+ Any additional keyword arguments are passed to the serializer, and thus
+ depend on the `method` parameter value.
+
+ :see: `XMLSerializer`, `XHTMLSerializer`, `HTMLSerializer`, `TextSerializer`
+ :since: version 0.4.1
+ """
+ if isinstance(method, basestring):
+ method = {'xml': XMLSerializer,
+ 'xhtml': XHTMLSerializer,
+ 'html': HTMLSerializer,
+ 'text': TextSerializer}[method.lower()]
+ return method(**kwargs)
+
+
+class DocType(object):
+ """Defines a number of commonly used DOCTYPE declarations as constants."""
+
+ HTML_STRICT = (
+ 'html', '-//W3C//DTD HTML 4.01//EN',
+ 'http://www.w3.org/TR/html4/strict.dtd'
+ )
+ HTML_TRANSITIONAL = (
+ 'html', '-//W3C//DTD HTML 4.01 Transitional//EN',
+ 'http://www.w3.org/TR/html4/loose.dtd'
+ )
+ HTML_FRAMESET = (
+ 'html', '-//W3C//DTD HTML 4.01 Frameset//EN',
+ 'http://www.w3.org/TR/html4/frameset.dtd'
+ )
+ HTML = HTML_STRICT
+
+ HTML5 = ('html', None, None)
+
+ XHTML_STRICT = (
+ 'html', '-//W3C//DTD XHTML 1.0 Strict//EN',
+ 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'
+ )
+ XHTML_TRANSITIONAL = (
+ 'html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
+ 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'
+ )
+ XHTML_FRAMESET = (
+ 'html', '-//W3C//DTD XHTML 1.0 Frameset//EN',
+ 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd'
+ )
+ XHTML = XHTML_STRICT
+
+ XHTML11 = (
+ 'html', '-//W3C//DTD XHTML 1.1//EN',
+ 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd'
+ )
+
+ SVG_FULL = (
+ 'svg', '-//W3C//DTD SVG 1.1//EN',
+ 'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd'
+ )
+ SVG_BASIC = (
+ 'svg', '-//W3C//DTD SVG Basic 1.1//EN',
+ 'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-basic.dtd'
+ )
+ SVG_TINY = (
+ 'svg', '-//W3C//DTD SVG Tiny 1.1//EN',
+ 'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-tiny.dtd'
+ )
+ SVG = SVG_FULL
+
+ @classmethod
+ def get(cls, name):
+ """Return the ``(name, pubid, sysid)`` tuple of the ``DOCTYPE``
+ declaration for the specified name.
+
+ The following names are recognized in this version:
+ * "html" or "html-strict" for the HTML 4.01 strict DTD
+ * "html-transitional" for the HTML 4.01 transitional DTD
+ * "html-frameset" for the HTML 4.01 frameset DTD
+ * "html5" for the ``DOCTYPE`` proposed for HTML5
+ * "xhtml" or "xhtml-strict" for the XHTML 1.0 strict DTD
+ * "xhtml-transitional" for the XHTML 1.0 transitional DTD
+ * "xhtml-frameset" for the XHTML 1.0 frameset DTD
+ * "xhtml11" for the XHTML 1.1 DTD
+ * "svg" or "svg-full" for the SVG 1.1 DTD
+ * "svg-basic" for the SVG Basic 1.1 DTD
+ * "svg-tiny" for the SVG Tiny 1.1 DTD
+
+ :param name: the name of the ``DOCTYPE``
+ :return: the ``(name, pubid, sysid)`` tuple for the requested
+ ``DOCTYPE``, or ``None`` if the name is not recognized
+ :since: version 0.4.1
+ """
+ return {
+ 'html': cls.HTML, 'html-strict': cls.HTML_STRICT,
+ 'html-transitional': DocType.HTML_TRANSITIONAL,
+ 'html-frameset': DocType.HTML_FRAMESET,
+ 'html5': cls.HTML5,
+ 'xhtml': cls.XHTML, 'xhtml-strict': cls.XHTML_STRICT,
+ 'xhtml-transitional': cls.XHTML_TRANSITIONAL,
+ 'xhtml-frameset': cls.XHTML_FRAMESET,
+ 'xhtml11': cls.XHTML11,
+ 'svg': cls.SVG, 'svg-full': cls.SVG_FULL,
+ 'svg-basic': cls.SVG_BASIC,
+ 'svg-tiny': cls.SVG_TINY
+ }.get(name.lower())
+
+
+class XMLSerializer(object):
+ """Produces XML text from an event stream.
+
+ >>> from genshi.builder import tag
+ >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True))
+ >>> print(''.join(XMLSerializer()(elem.generate())))
+ <div><a href="foo"/><br/><hr noshade="True"/></div>
+ """
+
+ _PRESERVE_SPACE = frozenset()
+
+ def __init__(self, doctype=None, strip_whitespace=True,
+ namespace_prefixes=None, cache=True):
+ """Initialize the XML serializer.
+
+ :param doctype: a ``(name, pubid, sysid)`` tuple that represents the
+ DOCTYPE declaration that should be included at the top
+ of the generated output, or the name of a DOCTYPE as
+ defined in `DocType.get`
+ :param strip_whitespace: whether extraneous whitespace should be
+ stripped from the output
+ :param cache: whether to cache the text output per event, which
+ improves performance for repetitive markup
+ :note: Changed in 0.4.2: The `doctype` parameter can now be a string.
+ :note: Changed in 0.6: The `cache` parameter was added
+ """
+ self.filters = [EmptyTagFilter()]
+ if strip_whitespace:
+ self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE))
+ self.filters.append(NamespaceFlattener(prefixes=namespace_prefixes,
+ cache=cache))
+ if doctype:
+ self.filters.append(DocTypeInserter(doctype))
+ self.cache = cache
+
+ def __call__(self, stream):
+ have_decl = have_doctype = False
+ in_cdata = False
+
+ cache = {}
+ cache_get = cache.get
+ if self.cache:
+ def _emit(kind, input, output):
+ cache[kind, input] = output
+ return output
+ else:
+ def _emit(kind, input, output):
+ return output
+
+ for filter_ in self.filters:
+ stream = filter_(stream)
+ for kind, data, pos in stream:
+ cached = cache_get((kind, data))
+ if cached is not None:
+ yield cached
+
+ elif kind is START or kind is EMPTY:
+ tag, attrib = data
+ buf = ['<', tag]
+ for attr, value in attrib:
+ buf += [' ', attr, '="', escape(value), '"']
+ buf.append(kind is EMPTY and '/>' or '>')
+ yield _emit(kind, data, Markup(''.join(buf)))
+
+ elif kind is END:
+ yield _emit(kind, data, Markup('</%s>' % data))
+
+ elif kind is TEXT:
+ if in_cdata:
+ yield _emit(kind, data, data)
+ else:
+ yield _emit(kind, data, escape(data, quotes=False))
+
+ elif kind is COMMENT:
+ yield _emit(kind, data, Markup('<!--%s-->' % data))
+
+ elif kind is XML_DECL and not have_decl:
+ version, encoding, standalone = data
+ buf = ['<?xml version="%s"' % version]
+ if encoding:
+ buf.append(' encoding="%s"' % encoding)
+ if standalone != -1:
+ standalone = standalone and 'yes' or 'no'
+ buf.append(' standalone="%s"' % standalone)
+ buf.append('?>\n')
+ yield Markup(''.join(buf))
+ have_decl = True
+
+ elif kind is DOCTYPE and not have_doctype:
+ name, pubid, sysid = data
+ buf = ['<!DOCTYPE %s']
+ if pubid:
+ buf.append(' PUBLIC "%s"')
+ elif sysid:
+ buf.append(' SYSTEM')
+ if sysid:
+ buf.append(' "%s"')
+ buf.append('>\n')
+ yield Markup(''.join(buf)) % tuple([p for p in data if p])
+ have_doctype = True
+
+ elif kind is START_CDATA:
+ yield Markup('<![CDATA[')
+ in_cdata = True
+
+ elif kind is END_CDATA:
+ yield Markup(']]>')
+ in_cdata = False
+
+ elif kind is PI:
+ yield _emit(kind, data, Markup('<?%s %s?>' % data))
+
+
+class XHTMLSerializer(XMLSerializer):
+ """Produces XHTML text from an event stream.
+
+ >>> from genshi.builder import tag
+ >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True))
+ >>> print(''.join(XHTMLSerializer()(elem.generate())))
+ <div><a href="foo"></a><br /><hr noshade="noshade" /></div>
+ """
+
+ _EMPTY_ELEMS = frozenset(['area', 'base', 'basefont', 'br', 'col', 'frame',
+ 'hr', 'img', 'input', 'isindex', 'link', 'meta',
+ 'param'])
+ _BOOLEAN_ATTRS = frozenset(['selected', 'checked', 'compact', 'declare',
+ 'defer', 'disabled', 'ismap', 'multiple',
+ 'nohref', 'noresize', 'noshade', 'nowrap'])
+ _PRESERVE_SPACE = frozenset([
+ QName('pre'), QName('http://www.w3.org/1999/xhtml}pre'),
+ QName('textarea'), QName('http://www.w3.org/1999/xhtml}textarea')
+ ])
+
+ def __init__(self, doctype=None, strip_whitespace=True,
+ namespace_prefixes=None, drop_xml_decl=True, cache=True):
+ super(XHTMLSerializer, self).__init__(doctype, False)
+ self.filters = [EmptyTagFilter()]
+ if strip_whitespace:
+ self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE))
+ namespace_prefixes = namespace_prefixes or {}
+ namespace_prefixes['http://www.w3.org/1999/xhtml'] = ''
+ self.filters.append(NamespaceFlattener(prefixes=namespace_prefixes,
+ cache=cache))
+ if doctype:
+ self.filters.append(DocTypeInserter(doctype))
+ self.drop_xml_decl = drop_xml_decl
+ self.cache = cache
+
+ def __call__(self, stream):
+ boolean_attrs = self._BOOLEAN_ATTRS
+ empty_elems = self._EMPTY_ELEMS
+ drop_xml_decl = self.drop_xml_decl
+ have_decl = have_doctype = False
+ in_cdata = False
+
+ cache = {}
+ cache_get = cache.get
+ if self.cache:
+ def _emit(kind, input, output):
+ cache[kind, input] = output
+ return output
+ else:
+ def _emit(kind, input, output):
+ return output
+
+ for filter_ in self.filters:
+ stream = filter_(stream)
+ for kind, data, pos in stream:
+ cached = cache_get((kind, data))
+ if cached is not None:
+ yield cached
+
+ elif kind is START or kind is EMPTY:
+ tag, attrib = data
+ buf = ['<', tag]
+ for attr, value in attrib:
+ if attr in boolean_attrs:
+ value = attr
+ elif attr == 'xml:lang' and 'lang' not in attrib:
+ buf += [' lang="', escape(value), '"']
+ elif attr == 'xml:space':
+ continue
+ buf += [' ', attr, '="', escape(value), '"']
+ if kind is EMPTY:
+ if tag in empty_elems:
+ buf.append(' />')
+ else:
+ buf.append('></%s>' % tag)
+ else:
+ buf.append('>')
+ yield _emit(kind, data, Markup(''.join(buf)))
+
+ elif kind is END:
+ yield _emit(kind, data, Markup('</%s>' % data))
+
+ elif kind is TEXT:
+ if in_cdata:
+ yield _emit(kind, data, data)
+ else:
+ yield _emit(kind, data, escape(data, quotes=False))
+
+ elif kind is COMMENT:
+ yield _emit(kind, data, Markup('<!--%s-->' % data))
+
+ elif kind is DOCTYPE and not have_doctype:
+ name, pubid, sysid = data
+ buf = ['<!DOCTYPE %s']
+ if pubid:
+ buf.append(' PUBLIC "%s"')
+ elif sysid:
+ buf.append(' SYSTEM')
+ if sysid:
+ buf.append(' "%s"')
+ buf.append('>\n')
+ yield Markup(''.join(buf)) % tuple([p for p in data if p])
+ have_doctype = True
+
+ elif kind is XML_DECL and not have_decl and not drop_xml_decl:
+ version, encoding, standalone = data
+ buf = ['<?xml version="%s"' % version]
+ if encoding:
+ buf.append(' encoding="%s"' % encoding)
+ if standalone != -1:
+ standalone = standalone and 'yes' or 'no'
+ buf.append(' standalone="%s"' % standalone)
+ buf.append('?>\n')
+ yield Markup(''.join(buf))
+ have_decl = True
+
+ elif kind is START_CDATA:
+ yield Markup('<![CDATA[')
+ in_cdata = True
+
+ elif kind is END_CDATA:
+ yield Markup(']]>')
+ in_cdata = False
+
+ elif kind is PI:
+ yield _emit(kind, data, Markup('<?%s %s?>' % data))
+
+
+class HTMLSerializer(XHTMLSerializer):
+ """Produces HTML text from an event stream.
+
+ >>> from genshi.builder import tag
+ >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True))
+ >>> print(''.join(HTMLSerializer()(elem.generate())))
+ <div><a href="foo"></a><br><hr noshade></div>
+ """
+
+ _NOESCAPE_ELEMS = frozenset([
+ QName('script'), QName('http://www.w3.org/1999/xhtml}script'),
+ QName('style'), QName('http://www.w3.org/1999/xhtml}style')
+ ])
+
+ def __init__(self, doctype=None, strip_whitespace=True, cache=True):
+ """Initialize the HTML serializer.
+
+ :param doctype: a ``(name, pubid, sysid)`` tuple that represents the
+ DOCTYPE declaration that should be included at the top
+ of the generated output
+ :param strip_whitespace: whether extraneous whitespace should be
+ stripped from the output
+ :param cache: whether to cache the text output per event, which
+ improves performance for repetitive markup
+ :note: Changed in 0.6: The `cache` parameter was added
+ """
+ super(HTMLSerializer, self).__init__(doctype, False)
+ self.filters = [EmptyTagFilter()]
+ if strip_whitespace:
+ self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE,
+ self._NOESCAPE_ELEMS))
+ self.filters.append(NamespaceFlattener(prefixes={
+ 'http://www.w3.org/1999/xhtml': ''
+ }, cache=cache))
+ if doctype:
+ self.filters.append(DocTypeInserter(doctype))
+ self.cache = True
+
+ def __call__(self, stream):
+ boolean_attrs = self._BOOLEAN_ATTRS
+ empty_elems = self._EMPTY_ELEMS
+ noescape_elems = self._NOESCAPE_ELEMS
+ have_doctype = False
+ noescape = False
+
+ cache = {}
+ cache_get = cache.get
+ if self.cache:
+ def _emit(kind, input, output):
+ cache[kind, input] = output
+ return output
+ else:
+ def _emit(kind, input, output):
+ return output
+
+ for filter_ in self.filters:
+ stream = filter_(stream)
+ for kind, data, _ in stream:
+ output = cache_get((kind, data))
+ if output is not None:
+ yield output
+ if (kind is START or kind is EMPTY) \
+ and data[0] in noescape_elems:
+ noescape = True
+ elif kind is END:
+ noescape = False
+
+ elif kind is START or kind is EMPTY:
+ tag, attrib = data
+ buf = ['<', tag]
+ for attr, value in attrib:
+ if attr in boolean_attrs:
+ if value:
+ buf += [' ', attr]
+ elif ':' in attr:
+ if attr == 'xml:lang' and 'lang' not in attrib:
+ buf += [' lang="', escape(value), '"']
+ elif attr != 'xmlns':
+ buf += [' ', attr, '="', escape(value), '"']
+ buf.append('>')
+ if kind is EMPTY:
+ if tag not in empty_elems:
+ buf.append('</%s>' % tag)
+ yield _emit(kind, data, Markup(''.join(buf)))
+ if tag in noescape_elems:
+ noescape = True
+
+ elif kind is END:
+ yield _emit(kind, data, Markup('</%s>' % data))
+ noescape = False
+
+ elif kind is TEXT:
+ if noescape:
+ yield _emit(kind, data, data)
+ else:
+ yield _emit(kind, data, escape(data, quotes=False))
+
+ elif kind is COMMENT:
+ yield _emit(kind, data, Markup('<!--%s-->' % data))
+
+ elif kind is DOCTYPE and not have_doctype:
+ name, pubid, sysid = data
+ buf = ['<!DOCTYPE %s']
+ if pubid:
+ buf.append(' PUBLIC "%s"')
+ elif sysid:
+ buf.append(' SYSTEM')
+ if sysid:
+ buf.append(' "%s"')
+ buf.append('>\n')
+ yield Markup(''.join(buf)) % tuple([p for p in data if p])
+ have_doctype = True
+
+ elif kind is PI:
+ yield _emit(kind, data, Markup('<?%s %s?>' % data))
+
+
+class TextSerializer(object):
+ """Produces plain text from an event stream.
+
+ Only text events are included in the output. Unlike the other serializer,
+ special XML characters are not escaped:
+
+ >>> from genshi.builder import tag
+ >>> elem = tag.div(tag.a('<Hello!>', href='foo'), tag.br)
+ >>> print(elem)
+ <div><a href="foo">&lt;Hello!&gt;</a><br/></div>
+ >>> print(''.join(TextSerializer()(elem.generate())))
+ <Hello!>
+
+ If text events contain literal markup (instances of the `Markup` class),
+ that markup is by default passed through unchanged:
+
+ >>> elem = tag.div(Markup('<a href="foo">Hello &amp; Bye!</a><br/>'))
+ >>> print(elem.generate().render(TextSerializer, encoding=None))
+ <a href="foo">Hello &amp; Bye!</a><br/>
+
+ You can use the ``strip_markup`` to change this behavior, so that tags and
+ entities are stripped from the output (or in the case of entities,
+ replaced with the equivalent character):
+
+ >>> print(elem.generate().render(TextSerializer, strip_markup=True,
+ ... encoding=None))
+ Hello & Bye!
+ """
+
+ def __init__(self, strip_markup=False):
+ """Create the serializer.
+
+ :param strip_markup: whether markup (tags and encoded characters) found
+ in the text should be removed
+ """
+ self.strip_markup = strip_markup
+
+ def __call__(self, stream):
+ strip_markup = self.strip_markup
+ for event in stream:
+ if event[0] is TEXT:
+ data = event[1]
+ if strip_markup and type(data) is Markup:
+ data = data.striptags().stripentities()
+ yield unicode(data)
+
+
+class EmptyTagFilter(object):
+ """Combines `START` and `STOP` events into `EMPTY` events for elements that
+ have no contents.
+ """
+
+ EMPTY = StreamEventKind('EMPTY')
+
+ def __call__(self, stream):
+ prev = (None, None, None)
+ for ev in stream:
+ if prev[0] is START:
+ if ev[0] is END:
+ prev = EMPTY, prev[1], prev[2]
+ yield prev
+ continue
+ else:
+ yield prev
+ if ev[0] is not START:
+ yield ev
+ prev = ev
+
+
+EMPTY = EmptyTagFilter.EMPTY
+
+
+class NamespaceFlattener(object):
+ r"""Output stream filter that removes namespace information from the stream,
+ instead adding namespace attributes and prefixes as needed.
+
+ :param prefixes: optional mapping of namespace URIs to prefixes
+
+ >>> from genshi.input import XML
+ >>> xml = XML('''<doc xmlns="NS1" xmlns:two="NS2">
+ ... <two:item/>
+ ... </doc>''')
+ >>> for kind, data, pos in NamespaceFlattener()(xml):
+ ... print('%s %r' % (kind, data))
+ START (u'doc', Attrs([('xmlns', u'NS1'), (u'xmlns:two', u'NS2')]))
+ TEXT u'\n '
+ START (u'two:item', Attrs())
+ END u'two:item'
+ TEXT u'\n'
+ END u'doc'
+ """
+
+ def __init__(self, prefixes=None, cache=True):
+ self.prefixes = {XML_NAMESPACE.uri: 'xml'}
+ if prefixes is not None:
+ self.prefixes.update(prefixes)
+ self.cache = cache
+
+ def __call__(self, stream):
+ cache = {}
+ cache_get = cache.get
+ if self.cache:
+ def _emit(kind, input, output, pos):
+ cache[kind, input] = output
+ return kind, output, pos
+ else:
+ def _emit(kind, input, output, pos):
+ return output
+
+ prefixes = dict([(v, [k]) for k, v in self.prefixes.items()])
+ namespaces = {XML_NAMESPACE.uri: ['xml']}
+ def _push_ns(prefix, uri):
+ namespaces.setdefault(uri, []).append(prefix)
+ prefixes.setdefault(prefix, []).append(uri)
+ cache.clear()
+ def _pop_ns(prefix):
+ uris = prefixes.get(prefix)
+ uri = uris.pop()
+ if not uris:
+ del prefixes[prefix]
+ if uri not in uris or uri != uris[-1]:
+ uri_prefixes = namespaces[uri]
+ uri_prefixes.pop()
+ if not uri_prefixes:
+ del namespaces[uri]
+ cache.clear()
+ return uri
+
+ ns_attrs = []
+ _push_ns_attr = ns_attrs.append
+ def _make_ns_attr(prefix, uri):
+ return 'xmlns%s' % (prefix and ':%s' % prefix or ''), uri
+
+ def _gen_prefix():
+ val = 0
+ while 1:
+ val += 1
+ yield 'ns%d' % val
+ _gen_prefix = _gen_prefix().next
+
+ for kind, data, pos in stream:
+ output = cache_get((kind, data))
+ if output is not None:
+ yield kind, output, pos
+
+ elif kind is START or kind is EMPTY:
+ tag, attrs = data
+
+ tagname = tag.localname
+ tagns = tag.namespace
+ if tagns:
+ if tagns in namespaces:
+ prefix = namespaces[tagns][-1]
+ if prefix:
+ tagname = '%s:%s' % (prefix, tagname)
+ else:
+ _push_ns_attr(('xmlns', tagns))
+ _push_ns('', tagns)
+
+ new_attrs = []
+ for attr, value in attrs:
+ attrname = attr.localname
+ attrns = attr.namespace
+ if attrns:
+ if attrns not in namespaces:
+ prefix = _gen_prefix()
+ _push_ns(prefix, attrns)
+ _push_ns_attr(('xmlns:%s' % prefix, attrns))
+ else:
+ prefix = namespaces[attrns][-1]
+ if prefix:
+ attrname = '%s:%s' % (prefix, attrname)
+ new_attrs.append((attrname, value))
+
+ yield _emit(kind, data, (tagname, Attrs(ns_attrs + new_attrs)), pos)
+ del ns_attrs[:]
+
+ elif kind is END:
+ tagname = data.localname
+ tagns = data.namespace
+ if tagns:
+ prefix = namespaces[tagns][-1]
+ if prefix:
+ tagname = '%s:%s' % (prefix, tagname)
+ yield _emit(kind, data, tagname, pos)
+
+ elif kind is START_NS:
+ prefix, uri = data
+ if uri not in namespaces:
+ prefix = prefixes.get(uri, [prefix])[-1]
+ _push_ns_attr(_make_ns_attr(prefix, uri))
+ _push_ns(prefix, uri)
+
+ elif kind is END_NS:
+ if data in prefixes:
+ uri = _pop_ns(data)
+ if ns_attrs:
+ attr = _make_ns_attr(data, uri)
+ if attr in ns_attrs:
+ ns_attrs.remove(attr)
+
+ else:
+ yield kind, data, pos
+
+
+class WhitespaceFilter(object):
+ """A filter that removes extraneous ignorable white space from the
+ stream.
+ """
+
+ def __init__(self, preserve=None, noescape=None):
+ """Initialize the filter.
+
+ :param preserve: a set or sequence of tag names for which white-space
+ should be preserved
+ :param noescape: a set or sequence of tag names for which text content
+ should not be escaped
+
+ The `noescape` set is expected to refer to elements that cannot contain
+ further child elements (such as ``<style>`` or ``<script>`` in HTML
+ documents).
+ """
+ if preserve is None:
+ preserve = []
+ self.preserve = frozenset(preserve)
+ if noescape is None:
+ noescape = []
+ self.noescape = frozenset(noescape)
+
+ def __call__(self, stream, ctxt=None, space=XML_NAMESPACE['space'],
+ trim_trailing_space=re.compile('[ \t]+(?=\n)').sub,
+ collapse_lines=re.compile('\n{2,}').sub):
+ mjoin = Markup('').join
+ preserve_elems = self.preserve
+ preserve = 0
+ noescape_elems = self.noescape
+ noescape = False
+
+ textbuf = []
+ push_text = textbuf.append
+ pop_text = textbuf.pop
+ for kind, data, pos in chain(stream, [(None, None, None)]):
+
+ if kind is TEXT:
+ if noescape:
+ data = Markup(data)
+ push_text(data)
+ else:
+ if textbuf:
+ if len(textbuf) > 1:
+ text = mjoin(textbuf, escape_quotes=False)
+ del textbuf[:]
+ else:
+ text = escape(pop_text(), quotes=False)
+ if not preserve:
+ text = collapse_lines('\n', trim_trailing_space('', text))
+ yield TEXT, Markup(text), pos
+
+ if kind is START:
+ tag, attrs = data
+ if preserve or (tag in preserve_elems or
+ attrs.get(space) == 'preserve'):
+ preserve += 1
+ if not noescape and tag in noescape_elems:
+ noescape = True
+
+ elif kind is END:
+ noescape = False
+ if preserve:
+ preserve -= 1
+
+ elif kind is START_CDATA:
+ noescape = True
+
+ elif kind is END_CDATA:
+ noescape = False
+
+ if kind:
+ yield kind, data, pos
+
+
+class DocTypeInserter(object):
+ """A filter that inserts the DOCTYPE declaration in the correct location,
+ after the XML declaration.
+ """
+ def __init__(self, doctype):
+ """Initialize the filter.
+
+ :param doctype: DOCTYPE as a string or DocType object.
+ """
+ if isinstance(doctype, basestring):
+ doctype = DocType.get(doctype)
+ self.doctype_event = (DOCTYPE, doctype, (None, -1, -1))
+
+ def __call__(self, stream):
+ doctype_inserted = False
+ for kind, data, pos in stream:
+ if not doctype_inserted:
+ doctype_inserted = True
+ if kind is XML_DECL:
+ yield (kind, data, pos)
+ yield self.doctype_event
+ continue
+ yield self.doctype_event
+
+ yield (kind, data, pos)
+
+ if not doctype_inserted:
+ yield self.doctype_event
diff --git a/genshi/path.py b/genshi/path.py
new file mode 100644
index 0000000..122fbf0
--- /dev/null
+++ b/genshi/path.py
@@ -0,0 +1,1528 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006-2009 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""Basic support for evaluating XPath expressions against streams.
+
+>>> from genshi.input import XML
+>>> doc = XML('''<doc>
+... <items count="4">
+... <item status="new">
+... <summary>Foo</summary>
+... </item>
+... <item status="closed">
+... <summary>Bar</summary>
+... </item>
+... <item status="closed" resolution="invalid">
+... <summary>Baz</summary>
+... </item>
+... <item status="closed" resolution="fixed">
+... <summary>Waz</summary>
+... </item>
+... </items>
+... </doc>''')
+>>> print(doc.select('items/item[@status="closed" and '
+... '(@resolution="invalid" or not(@resolution))]/summary/text()'))
+BarBaz
+
+Because the XPath engine operates on markup streams (as opposed to tree
+structures), it only implements a subset of the full XPath 1.0 language.
+"""
+
+from collections import deque
+try:
+ reduce # builtin in Python < 3
+except NameError:
+ from functools import reduce
+from math import ceil, floor
+import operator
+import re
+from itertools import chain
+
+from genshi.core import Stream, Attrs, Namespace, QName
+from genshi.core import START, END, TEXT, START_NS, END_NS, COMMENT, PI, \
+ START_CDATA, END_CDATA
+
+__all__ = ['Path', 'PathSyntaxError']
+__docformat__ = 'restructuredtext en'
+
+
+class Axis(object):
+ """Defines constants for the various supported XPath axes."""
+
+ ATTRIBUTE = 'attribute'
+ CHILD = 'child'
+ DESCENDANT = 'descendant'
+ DESCENDANT_OR_SELF = 'descendant-or-self'
+ SELF = 'self'
+
+ @classmethod
+ def forname(cls, name):
+ """Return the axis constant for the given name, or `None` if no such
+ axis was defined.
+ """
+ return getattr(cls, name.upper().replace('-', '_'), None)
+
+
+ATTRIBUTE = Axis.ATTRIBUTE
+CHILD = Axis.CHILD
+DESCENDANT = Axis.DESCENDANT
+DESCENDANT_OR_SELF = Axis.DESCENDANT_OR_SELF
+SELF = Axis.SELF
+
+
+class GenericStrategy(object):
+
+ @classmethod
+ def supports(cls, path):
+ return True
+
+ def __init__(self, path):
+ self.path = path
+
+ def test(self, ignore_context):
+ p = self.path
+ if ignore_context:
+ if p[0][0] is ATTRIBUTE:
+ steps = [_DOTSLASHSLASH] + p
+ else:
+ steps = [(DESCENDANT_OR_SELF, p[0][1], p[0][2])] + p[1:]
+ elif p[0][0] is CHILD or p[0][0] is ATTRIBUTE \
+ or p[0][0] is DESCENDANT:
+ steps = [_DOTSLASH] + p
+ else:
+ steps = p
+
+ # for node it contains all positions of xpath expression
+ # where its child should start checking for matches
+ # with list of corresponding context counters
+ # there can be many of them, because position that is from
+ # descendant-like axis can be achieved from different nodes
+ # for example <a><a><b/></a></a> should match both //a//b[1]
+ # and //a//b[2]
+ # positions always form increasing sequence (invariant)
+ stack = [[(0, [[]])]]
+
+ def _test(event, namespaces, variables, updateonly=False):
+ kind, data, pos = event[:3]
+ retval = None
+
+ # Manage the stack that tells us "where we are" in the stream
+ if kind is END:
+ if stack:
+ stack.pop()
+ return None
+ if kind is START_NS or kind is END_NS \
+ or kind is START_CDATA or kind is END_CDATA:
+ # should we make namespaces work?
+ return None
+
+ pos_queue = deque([(pos, cou, []) for pos, cou in stack[-1]])
+ next_pos = []
+
+ # length of real part of path - we omit attribute axis
+ real_len = len(steps) - ((steps[-1][0] == ATTRIBUTE) or 1 and 0)
+ last_checked = -1
+
+ # places where we have to check for match, are these
+ # provided by parent
+ while pos_queue:
+ x, pcou, mcou = pos_queue.popleft()
+ axis, nodetest, predicates = steps[x]
+
+ # we need to push descendant-like positions from parent
+ # further
+ if (axis is DESCENDANT or axis is DESCENDANT_OR_SELF) and pcou:
+ if next_pos and next_pos[-1][0] == x:
+ next_pos[-1][1].extend(pcou)
+ else:
+ next_pos.append((x, pcou))
+
+ # nodetest first
+ if not nodetest(kind, data, pos, namespaces, variables):
+ continue
+
+ # counters packs that were already bad
+ missed = set()
+ counters_len = len(pcou) + len(mcou)
+
+ # number of counters - we have to create one
+ # for every context position based predicate
+ cnum = 0
+
+ # tells if we have match with position x
+ matched = True
+
+ if predicates:
+ for predicate in predicates:
+ pretval = predicate(kind, data, pos,
+ namespaces,
+ variables)
+ if type(pretval) is float: # FIXME <- need to check
+ # this for other types that
+ # can be coerced to float
+
+ # each counter pack needs to be checked
+ for i, cou in enumerate(chain(pcou, mcou)):
+ # it was bad before
+ if i in missed:
+ continue
+
+ if len(cou) < cnum + 1:
+ cou.append(0)
+ cou[cnum] += 1
+
+ # it is bad now
+ if cou[cnum] != int(pretval):
+ missed.add(i)
+
+ # none of counters pack was good
+ if len(missed) == counters_len:
+ pretval = False
+ cnum += 1
+
+ if not pretval:
+ matched = False
+ break
+
+ if not matched:
+ continue
+
+ # counter for next position with current node as context node
+ child_counter = []
+
+ if x + 1 == real_len:
+ # we reached end of expression, because x + 1
+ # is equal to the length of expression
+ matched = True
+ axis, nodetest, predicates = steps[-1]
+ if axis is ATTRIBUTE:
+ matched = nodetest(kind, data, pos, namespaces,
+ variables)
+ if matched:
+ retval = matched
+ else:
+ next_axis = steps[x + 1][0]
+
+ # if next axis allows matching self we have
+ # to add next position to our queue
+ if next_axis is DESCENDANT_OR_SELF or next_axis is SELF:
+ if not pos_queue or pos_queue[0][0] > x + 1:
+ pos_queue.appendleft((x + 1, [], [child_counter]))
+ else:
+ pos_queue[0][2].append(child_counter)
+
+ # if axis is not self we have to add it to child's list
+ if next_axis is not SELF:
+ next_pos.append((x + 1, [child_counter]))
+
+ if kind is START:
+ stack.append(next_pos)
+
+ return retval
+
+ return _test
+
+
+class SimplePathStrategy(object):
+ """Strategy for path with only local names, attributes and text nodes."""
+
+ @classmethod
+ def supports(cls, path):
+ if path[0][0] is ATTRIBUTE:
+ return False
+ allowed_tests = (LocalNameTest, CommentNodeTest, TextNodeTest)
+ for _, nodetest, predicates in path:
+ if predicates:
+ return False
+ if not isinstance(nodetest, allowed_tests):
+ return False
+ return True
+
+ def __init__(self, path):
+ # fragments is list of tuples (fragment, pi, attr, self_beginning)
+ # fragment is list of nodetests for fragment of path with only
+ # child:: axes between
+ # pi is KMP partial match table for this fragment
+ # attr is attribute nodetest if fragment ends with @ and None otherwise
+ # self_beginning is True if axis for first fragment element
+ # was self (first fragment) or descendant-or-self (farther fragment)
+ self.fragments = []
+
+ self_beginning = False
+ fragment = []
+
+ def nodes_equal(node1, node2):
+ """Tests if two node tests are equal"""
+ if type(node1) is not type(node2):
+ return False
+ if type(node1) == LocalNameTest:
+ return node1.name == node2.name
+ return True
+
+ def calculate_pi(f):
+ """KMP prefix calculation for table"""
+ # the indexes in prefix table are shifted by one
+ # in comparision with common implementations
+ # pi[i] = NORMAL_PI[i + 1]
+ if len(f) == 0:
+ return []
+ pi = [0]
+ s = 0
+ for i in range(1, len(f)):
+ while s > 0 and not nodes_equal(f[s], f[i]):
+ s = pi[s-1]
+ if nodes_equal(f[s], f[i]):
+ s += 1
+ pi.append(s)
+ return pi
+
+ for axis in path:
+ if axis[0] is SELF:
+ if len(fragment) != 0:
+ # if element is not first in fragment it has to be
+ # the same as previous one
+ # for example child::a/self::b is always wrong
+ if axis[1] != fragment[-1][1]:
+ self.fragments = None
+ return
+ else:
+ self_beginning = True
+ fragment.append(axis[1])
+ elif axis[0] is CHILD:
+ fragment.append(axis[1])
+ elif axis[0] is ATTRIBUTE:
+ pi = calculate_pi(fragment)
+ self.fragments.append((fragment, pi, axis[1], self_beginning))
+ # attribute has always to be at the end, so we can jump out
+ return
+ else:
+ pi = calculate_pi(fragment)
+ self.fragments.append((fragment, pi, None, self_beginning))
+ fragment = [axis[1]]
+ if axis[0] is DESCENDANT:
+ self_beginning = False
+ else: # DESCENDANT_OR_SELF
+ self_beginning = True
+ pi = calculate_pi(fragment)
+ self.fragments.append((fragment, pi, None, self_beginning))
+
+ def test(self, ignore_context):
+ # stack of triples (fid, p, ic)
+ # fid is index of current fragment
+ # p is position in this fragment
+ # ic is if we ignore context in this fragment
+ stack = []
+ stack_push = stack.append
+ stack_pop = stack.pop
+ frags = self.fragments
+ frags_len = len(frags)
+
+ def _test(event, namespaces, variables, updateonly=False):
+ # expression found impossible during init
+ if frags is None:
+ return None
+
+ kind, data, pos = event[:3]
+
+ # skip events we don't care about
+ if kind is END:
+ if stack:
+ stack_pop()
+ return None
+ if kind is START_NS or kind is END_NS \
+ or kind is START_CDATA or kind is END_CDATA:
+ return None
+
+ if not stack:
+ # root node, nothing on stack, special case
+ fid = 0
+ # skip empty fragments (there can be actually only one)
+ while not frags[fid][0]:
+ fid += 1
+ p = 0
+ # empty fragment means descendant node at beginning
+ ic = ignore_context or (fid > 0)
+
+ # expression can match first node, if first axis is self::,
+ # descendant-or-self:: or if ignore_context is True and
+ # axis is not descendant::
+ if not frags[fid][3] and (not ignore_context or fid > 0):
+ # axis is not self-beggining, we have to skip this node
+ stack_push((fid, p, ic))
+ return None
+ else:
+ # take position of parent
+ fid, p, ic = stack[-1]
+
+ if fid is not None and not ic:
+ # fragment not ignoring context - we can't jump back
+ frag, pi, attrib, _ = frags[fid]
+ frag_len = len(frag)
+
+ if p == frag_len:
+ # that probably means empty first fragment
+ pass
+ elif frag[p](kind, data, pos, namespaces, variables):
+ # match, so we can go further
+ p += 1
+ else:
+ # not matched, so there will be no match in subtree
+ fid, p = None, None
+
+ if p == frag_len and fid + 1 != frags_len:
+ # we made it to end of fragment, we can go to following
+ fid += 1
+ p = 0
+ ic = True
+
+ if fid is None:
+ # there was no match in fragment not ignoring context
+ if kind is START:
+ stack_push((fid, p, ic))
+ return None
+
+ if ic:
+ # we are in fragment ignoring context
+ while True:
+ frag, pi, attrib, _ = frags[fid]
+ frag_len = len(frag)
+
+ # KMP new "character"
+ while p > 0 and (p >= frag_len or not \
+ frag[p](kind, data, pos, namespaces, variables)):
+ p = pi[p-1]
+ if frag[p](kind, data, pos, namespaces, variables):
+ p += 1
+
+ if p == frag_len:
+ # end of fragment reached
+ if fid + 1 == frags_len:
+ # that was last fragment
+ break
+ else:
+ fid += 1
+ p = 0
+ ic = True
+ if not frags[fid][3]:
+ # next fragment not self-beginning
+ break
+ else:
+ break
+
+ if kind is START:
+ # we have to put new position on stack, for children
+
+ if not ic and fid + 1 == frags_len and p == frag_len:
+ # it is end of the only, not context ignoring fragment
+ # so there will be no matches in subtree
+ stack_push((None, None, ic))
+ else:
+ stack_push((fid, p, ic))
+
+ # have we reached the end of the last fragment?
+ if fid + 1 == frags_len and p == frag_len:
+ if attrib: # attribute ended path, return value
+ return attrib(kind, data, pos, namespaces, variables)
+ return True
+
+ return None
+
+ return _test
+
+
+class SingleStepStrategy(object):
+
+ @classmethod
+ def supports(cls, path):
+ return len(path) == 1
+
+ def __init__(self, path):
+ self.path = path
+
+ def test(self, ignore_context):
+ steps = self.path
+ if steps[0][0] is ATTRIBUTE:
+ steps = [_DOTSLASH] + steps
+ select_attr = steps[-1][0] is ATTRIBUTE and steps[-1][1] or None
+
+ # for every position in expression stores counters' list
+ # it is used for position based predicates
+ counters = []
+ depth = [0]
+
+ def _test(event, namespaces, variables, updateonly=False):
+ kind, data, pos = event[:3]
+
+ # Manage the stack that tells us "where we are" in the stream
+ if kind is END:
+ if not ignore_context:
+ depth[0] -= 1
+ return None
+ elif kind is START_NS or kind is END_NS \
+ or kind is START_CDATA or kind is END_CDATA:
+ # should we make namespaces work?
+ return None
+
+ if not ignore_context:
+ outside = (steps[0][0] is SELF and depth[0] != 0) \
+ or (steps[0][0] is CHILD and depth[0] != 1) \
+ or (steps[0][0] is DESCENDANT and depth[0] < 1)
+ if kind is START:
+ depth[0] += 1
+ if outside:
+ return None
+
+ axis, nodetest, predicates = steps[0]
+ if not nodetest(kind, data, pos, namespaces, variables):
+ return None
+
+ if predicates:
+ cnum = 0
+ for predicate in predicates:
+ pretval = predicate(kind, data, pos, namespaces, variables)
+ if type(pretval) is float: # FIXME <- need to check this
+ # for other types that can be
+ # coerced to float
+ if len(counters) < cnum + 1:
+ counters.append(0)
+ counters[cnum] += 1
+ if counters[cnum] != int(pretval):
+ pretval = False
+ cnum += 1
+ if not pretval:
+ return None
+
+ if select_attr:
+ return select_attr(kind, data, pos, namespaces, variables)
+
+ return True
+
+ return _test
+
+
+class Path(object):
+ """Implements basic XPath support on streams.
+
+ Instances of this class represent a "compiled" XPath expression, and
+ provide methods for testing the path against a stream, as well as
+ extracting a substream matching that path.
+ """
+
+ STRATEGIES = (SingleStepStrategy, SimplePathStrategy, GenericStrategy)
+
+ def __init__(self, text, filename=None, lineno=-1):
+ """Create the path object from a string.
+
+ :param text: the path expression
+ :param filename: the name of the file in which the path expression was
+ found (used in error messages)
+ :param lineno: the line on which the expression was found
+ """
+ self.source = text
+ self.paths = PathParser(text, filename, lineno).parse()
+ self.strategies = []
+ for path in self.paths:
+ for strategy_class in self.STRATEGIES:
+ if strategy_class.supports(path):
+ self.strategies.append(strategy_class(path))
+ break
+ else:
+ raise NotImplemented('No strategy found for path')
+
+ def __repr__(self):
+ paths = []
+ for path in self.paths:
+ steps = []
+ for axis, nodetest, predicates in path:
+ steps.append('%s::%s' % (axis, nodetest))
+ for predicate in predicates:
+ steps[-1] += '[%s]' % predicate
+ paths.append('/'.join(steps))
+ return '<%s "%s">' % (type(self).__name__, '|'.join(paths))
+
+ def select(self, stream, namespaces=None, variables=None):
+ """Returns a substream of the given stream that matches the path.
+
+ If there are no matches, this method returns an empty stream.
+
+ >>> from genshi.input import XML
+ >>> xml = XML('<root><elem><child>Text</child></elem></root>')
+
+ >>> print(Path('.//child').select(xml))
+ <child>Text</child>
+
+ >>> print(Path('.//child/text()').select(xml))
+ Text
+
+ :param stream: the stream to select from
+ :param namespaces: (optional) a mapping of namespace prefixes to URIs
+ :param variables: (optional) a mapping of variable names to values
+ :return: the substream matching the path, or an empty stream
+ :rtype: `Stream`
+ """
+ if namespaces is None:
+ namespaces = {}
+ if variables is None:
+ variables = {}
+ stream = iter(stream)
+ def _generate(stream=stream, ns=namespaces, vs=variables):
+ next = stream.next
+ test = self.test()
+ for event in stream:
+ result = test(event, ns, vs)
+ if result is True:
+ yield event
+ if event[0] is START:
+ depth = 1
+ while depth > 0:
+ subevent = next()
+ if subevent[0] is START:
+ depth += 1
+ elif subevent[0] is END:
+ depth -= 1
+ yield subevent
+ test(subevent, ns, vs, updateonly=True)
+ elif result:
+ yield result
+ return Stream(_generate(),
+ serializer=getattr(stream, 'serializer', None))
+
+ def test(self, ignore_context=False):
+ """Returns a function that can be used to track whether the path matches
+ a specific stream event.
+
+ The function returned expects the positional arguments ``event``,
+ ``namespaces`` and ``variables``. The first is a stream event, while the
+ latter two are a mapping of namespace prefixes to URIs, and a mapping
+ of variable names to values, respectively. In addition, the function
+ accepts an ``updateonly`` keyword argument that default to ``False``. If
+ it is set to ``True``, the function only updates its internal state,
+ but does not perform any tests or return a result.
+
+ If the path matches the event, the function returns the match (for
+ example, a `START` or `TEXT` event.) Otherwise, it returns ``None``.
+
+ >>> from genshi.input import XML
+ >>> xml = XML('<root><elem><child id="1"/></elem><child id="2"/></root>')
+ >>> test = Path('child').test()
+ >>> namespaces, variables = {}, {}
+ >>> for event in xml:
+ ... if test(event, namespaces, variables):
+ ... print('%s %r' % (event[0], event[1]))
+ START (QName('child'), Attrs([(QName('id'), u'2')]))
+
+ :param ignore_context: if `True`, the path is interpreted like a pattern
+ in XSLT, meaning for example that it will match
+ at any depth
+ :return: a function that can be used to test individual events in a
+ stream against the path
+ :rtype: ``function``
+ """
+ tests = [s.test(ignore_context) for s in self.strategies]
+ if len(tests) == 1:
+ return tests[0]
+
+ def _multi(event, namespaces, variables, updateonly=False):
+ retval = None
+ for test in tests:
+ val = test(event, namespaces, variables, updateonly=updateonly)
+ if retval is None:
+ retval = val
+ return retval
+ return _multi
+
+
+class PathSyntaxError(Exception):
+ """Exception raised when an XPath expression is syntactically incorrect."""
+
+ def __init__(self, message, filename=None, lineno=-1, offset=-1):
+ if filename:
+ message = '%s (%s, line %d)' % (message, filename, lineno)
+ Exception.__init__(self, message)
+ self.filename = filename
+ self.lineno = lineno
+ self.offset = offset
+
+
+class PathParser(object):
+ """Tokenizes and parses an XPath expression."""
+
+ _QUOTES = (("'", "'"), ('"', '"'))
+ _TOKENS = ('::', ':', '..', '.', '//', '/', '[', ']', '()', '(', ')', '@',
+ '=', '!=', '!', '|', ',', '>=', '>', '<=', '<', '$')
+ _tokenize = re.compile('("[^"]*")|(\'[^\']*\')|((?:\d+)?\.\d+)|(%s)|([^%s\s]+)|\s+' % (
+ '|'.join([re.escape(t) for t in _TOKENS]),
+ ''.join([re.escape(t[0]) for t in _TOKENS]))).findall
+
+ def __init__(self, text, filename=None, lineno=-1):
+ self.filename = filename
+ self.lineno = lineno
+ self.tokens = [t for t in [dqstr or sqstr or number or token or name
+ for dqstr, sqstr, number, token, name in
+ self._tokenize(text)] if t]
+ self.pos = 0
+
+ # Tokenizer
+
+ @property
+ def at_end(self):
+ return self.pos == len(self.tokens) - 1
+
+ @property
+ def cur_token(self):
+ return self.tokens[self.pos]
+
+ def next_token(self):
+ self.pos += 1
+ return self.tokens[self.pos]
+
+ def peek_token(self):
+ if not self.at_end:
+ return self.tokens[self.pos + 1]
+ return None
+
+ # Recursive descent parser
+
+ def parse(self):
+ """Parses the XPath expression and returns a list of location path
+ tests.
+
+ For union expressions (such as `*|text()`), this function returns one
+ test for each operand in the union. For patch expressions that don't
+ use the union operator, the function always returns a list of size 1.
+
+ Each path test in turn is a sequence of tests that correspond to the
+ location steps, each tuples of the form `(axis, testfunc, predicates)`
+ """
+ paths = [self._location_path()]
+ while self.cur_token == '|':
+ self.next_token()
+ paths.append(self._location_path())
+ if not self.at_end:
+ raise PathSyntaxError('Unexpected token %r after end of expression'
+ % self.cur_token, self.filename, self.lineno)
+ return paths
+
+ def _location_path(self):
+ steps = []
+ while True:
+ if self.cur_token.startswith('/'):
+ if not steps:
+ if self.cur_token == '//':
+ # hack to make //* match every node - also root
+ self.next_token()
+ axis, nodetest, predicates = self._location_step()
+ steps.append((DESCENDANT_OR_SELF, nodetest,
+ predicates))
+ if self.at_end or not self.cur_token.startswith('/'):
+ break
+ continue
+ else:
+ raise PathSyntaxError('Absolute location paths not '
+ 'supported', self.filename,
+ self.lineno)
+ elif self.cur_token == '//':
+ steps.append((DESCENDANT_OR_SELF, NodeTest(), []))
+ self.next_token()
+
+ axis, nodetest, predicates = self._location_step()
+ if not axis:
+ axis = CHILD
+ steps.append((axis, nodetest, predicates))
+ if self.at_end or not self.cur_token.startswith('/'):
+ break
+
+ return steps
+
+ def _location_step(self):
+ if self.cur_token == '@':
+ axis = ATTRIBUTE
+ self.next_token()
+ elif self.cur_token == '.':
+ axis = SELF
+ elif self.cur_token == '..':
+ raise PathSyntaxError('Unsupported axis "parent"', self.filename,
+ self.lineno)
+ elif self.peek_token() == '::':
+ axis = Axis.forname(self.cur_token)
+ if axis is None:
+ raise PathSyntaxError('Unsupport axis "%s"' % axis,
+ self.filename, self.lineno)
+ self.next_token()
+ self.next_token()
+ else:
+ axis = None
+ nodetest = self._node_test(axis or CHILD)
+ predicates = []
+ while self.cur_token == '[':
+ predicates.append(self._predicate())
+ return axis, nodetest, predicates
+
+ def _node_test(self, axis=None):
+ test = prefix = None
+ next_token = self.peek_token()
+ if next_token in ('(', '()'): # Node type test
+ test = self._node_type()
+
+ elif next_token == ':': # Namespace prefix
+ prefix = self.cur_token
+ self.next_token()
+ localname = self.next_token()
+ if localname == '*':
+ test = QualifiedPrincipalTypeTest(axis, prefix)
+ else:
+ test = QualifiedNameTest(axis, prefix, localname)
+
+ else: # Name test
+ if self.cur_token == '*':
+ test = PrincipalTypeTest(axis)
+ elif self.cur_token == '.':
+ test = NodeTest()
+ else:
+ test = LocalNameTest(axis, self.cur_token)
+
+ if not self.at_end:
+ self.next_token()
+ return test
+
+ def _node_type(self):
+ name = self.cur_token
+ self.next_token()
+
+ args = []
+ if self.cur_token != '()':
+ # The processing-instruction() function optionally accepts the
+ # name of the PI as argument, which must be a literal string
+ self.next_token() # (
+ if self.cur_token != ')':
+ string = self.cur_token
+ if (string[0], string[-1]) in self._QUOTES:
+ string = string[1:-1]
+ args.append(string)
+
+ cls = _nodetest_map.get(name)
+ if not cls:
+ raise PathSyntaxError('%s() not allowed here' % name, self.filename,
+ self.lineno)
+ return cls(*args)
+
+ def _predicate(self):
+ assert self.cur_token == '['
+ self.next_token()
+ expr = self._or_expr()
+ if self.cur_token != ']':
+ raise PathSyntaxError('Expected "]" to close predicate, '
+ 'but found "%s"' % self.cur_token,
+ self.filename, self.lineno)
+ if not self.at_end:
+ self.next_token()
+ return expr
+
+ def _or_expr(self):
+ expr = self._and_expr()
+ while self.cur_token == 'or':
+ self.next_token()
+ expr = OrOperator(expr, self._and_expr())
+ return expr
+
+ def _and_expr(self):
+ expr = self._equality_expr()
+ while self.cur_token == 'and':
+ self.next_token()
+ expr = AndOperator(expr, self._equality_expr())
+ return expr
+
+ def _equality_expr(self):
+ expr = self._relational_expr()
+ while self.cur_token in ('=', '!='):
+ op = _operator_map[self.cur_token]
+ self.next_token()
+ expr = op(expr, self._relational_expr())
+ return expr
+
+ def _relational_expr(self):
+ expr = self._sub_expr()
+ while self.cur_token in ('>', '>=', '<', '>='):
+ op = _operator_map[self.cur_token]
+ self.next_token()
+ expr = op(expr, self._sub_expr())
+ return expr
+
+ def _sub_expr(self):
+ token = self.cur_token
+ if token != '(':
+ return self._primary_expr()
+ self.next_token()
+ expr = self._or_expr()
+ if self.cur_token != ')':
+ raise PathSyntaxError('Expected ")" to close sub-expression, '
+ 'but found "%s"' % self.cur_token,
+ self.filename, self.lineno)
+ self.next_token()
+ return expr
+
+ def _primary_expr(self):
+ token = self.cur_token
+ if len(token) > 1 and (token[0], token[-1]) in self._QUOTES:
+ self.next_token()
+ return StringLiteral(token[1:-1])
+ elif token[0].isdigit() or token[0] == '.':
+ self.next_token()
+ return NumberLiteral(as_float(token))
+ elif token == '$':
+ token = self.next_token()
+ self.next_token()
+ return VariableReference(token)
+ elif not self.at_end and self.peek_token().startswith('('):
+ return self._function_call()
+ else:
+ axis = None
+ if token == '@':
+ axis = ATTRIBUTE
+ self.next_token()
+ return self._node_test(axis)
+
+ def _function_call(self):
+ name = self.cur_token
+ if self.next_token() == '()':
+ args = []
+ else:
+ assert self.cur_token == '('
+ self.next_token()
+ args = [self._or_expr()]
+ while self.cur_token == ',':
+ self.next_token()
+ args.append(self._or_expr())
+ if not self.cur_token == ')':
+ raise PathSyntaxError('Expected ")" to close function argument '
+ 'list, but found "%s"' % self.cur_token,
+ self.filename, self.lineno)
+ self.next_token()
+ cls = _function_map.get(name)
+ if not cls:
+ raise PathSyntaxError('Unsupported function "%s"' % name,
+ self.filename, self.lineno)
+ return cls(*args)
+
+
+# Type coercion
+
+def as_scalar(value):
+ """Convert value to a scalar. If a single element Attrs() object is passed
+ the value of the single attribute will be returned."""
+ if isinstance(value, Attrs):
+ assert len(value) == 1
+ return value[0][1]
+ else:
+ return value
+
+def as_float(value):
+ # FIXME - if value is a bool it will be coerced to 0.0 and consequently
+ # compared as a float. This is probably not ideal.
+ return float(as_scalar(value))
+
+def as_long(value):
+ return long(as_scalar(value))
+
+def as_string(value):
+ value = as_scalar(value)
+ if value is False:
+ return ''
+ return unicode(value)
+
+def as_bool(value):
+ return bool(as_scalar(value))
+
+
+# Node tests
+
+class PrincipalTypeTest(object):
+ """Node test that matches any event with the given principal type."""
+ __slots__ = ['principal_type']
+ def __init__(self, principal_type):
+ self.principal_type = principal_type
+ def __call__(self, kind, data, pos, namespaces, variables):
+ if kind is START:
+ if self.principal_type is ATTRIBUTE:
+ return data[1] or None
+ else:
+ return True
+ def __repr__(self):
+ return '*'
+
+class QualifiedPrincipalTypeTest(object):
+ """Node test that matches any event with the given principal type in a
+ specific namespace."""
+ __slots__ = ['principal_type', 'prefix']
+ def __init__(self, principal_type, prefix):
+ self.principal_type = principal_type
+ self.prefix = prefix
+ def __call__(self, kind, data, pos, namespaces, variables):
+ namespace = Namespace(namespaces.get(self.prefix))
+ if kind is START:
+ if self.principal_type is ATTRIBUTE and data[1]:
+ return Attrs([(name, value) for name, value in data[1]
+ if name in namespace]) or None
+ else:
+ return data[0] in namespace
+ def __repr__(self):
+ return '%s:*' % self.prefix
+
+class LocalNameTest(object):
+ """Node test that matches any event with the given principal type and
+ local name.
+ """
+ __slots__ = ['principal_type', 'name']
+ def __init__(self, principal_type, name):
+ self.principal_type = principal_type
+ self.name = name
+ def __call__(self, kind, data, pos, namespaces, variables):
+ if kind is START:
+ if self.principal_type is ATTRIBUTE and self.name in data[1]:
+ return Attrs([(self.name, data[1].get(self.name))])
+ else:
+ return data[0].localname == self.name
+ def __repr__(self):
+ return self.name
+
+class QualifiedNameTest(object):
+ """Node test that matches any event with the given principal type and
+ qualified name.
+ """
+ __slots__ = ['principal_type', 'prefix', 'name']
+ def __init__(self, principal_type, prefix, name):
+ self.principal_type = principal_type
+ self.prefix = prefix
+ self.name = name
+ def __call__(self, kind, data, pos, namespaces, variables):
+ qname = QName('%s}%s' % (namespaces.get(self.prefix), self.name))
+ if kind is START:
+ if self.principal_type is ATTRIBUTE and qname in data[1]:
+ return Attrs([(self.name, data[1].get(self.name))])
+ else:
+ return data[0] == qname
+ def __repr__(self):
+ return '%s:%s' % (self.prefix, self.name)
+
+class CommentNodeTest(object):
+ """Node test that matches any comment events."""
+ __slots__ = []
+ def __call__(self, kind, data, pos, namespaces, variables):
+ return kind is COMMENT
+ def __repr__(self):
+ return 'comment()'
+
+class NodeTest(object):
+ """Node test that matches any node."""
+ __slots__ = []
+ def __call__(self, kind, data, pos, namespaces, variables):
+ if kind is START:
+ return True
+ return kind, data, pos
+ def __repr__(self):
+ return 'node()'
+
+class ProcessingInstructionNodeTest(object):
+ """Node test that matches any processing instruction event."""
+ __slots__ = ['target']
+ def __init__(self, target=None):
+ self.target = target
+ def __call__(self, kind, data, pos, namespaces, variables):
+ return kind is PI and (not self.target or data[0] == self.target)
+ def __repr__(self):
+ arg = ''
+ if self.target:
+ arg = '"' + self.target + '"'
+ return 'processing-instruction(%s)' % arg
+
+class TextNodeTest(object):
+ """Node test that matches any text event."""
+ __slots__ = []
+ def __call__(self, kind, data, pos, namespaces, variables):
+ return kind is TEXT
+ def __repr__(self):
+ return 'text()'
+
+_nodetest_map = {'comment': CommentNodeTest, 'node': NodeTest,
+ 'processing-instruction': ProcessingInstructionNodeTest,
+ 'text': TextNodeTest}
+
+# Functions
+
+class Function(object):
+ """Base class for function nodes in XPath expressions."""
+
+class BooleanFunction(Function):
+ """The `boolean` function, which converts its argument to a boolean
+ value.
+ """
+ __slots__ = ['expr']
+ _return_type = bool
+ def __init__(self, expr):
+ self.expr = expr
+ def __call__(self, kind, data, pos, namespaces, variables):
+ val = self.expr(kind, data, pos, namespaces, variables)
+ return as_bool(val)
+ def __repr__(self):
+ return 'boolean(%r)' % self.expr
+
+class CeilingFunction(Function):
+ """The `ceiling` function, which returns the nearest lower integer number
+ for the given number.
+ """
+ __slots__ = ['number']
+ def __init__(self, number):
+ self.number = number
+ def __call__(self, kind, data, pos, namespaces, variables):
+ number = self.number(kind, data, pos, namespaces, variables)
+ return ceil(as_float(number))
+ def __repr__(self):
+ return 'ceiling(%r)' % self.number
+
+class ConcatFunction(Function):
+ """The `concat` function, which concatenates (joins) the variable number of
+ strings it gets as arguments.
+ """
+ __slots__ = ['exprs']
+ def __init__(self, *exprs):
+ self.exprs = exprs
+ def __call__(self, kind, data, pos, namespaces, variables):
+ strings = []
+ for item in [expr(kind, data, pos, namespaces, variables)
+ for expr in self.exprs]:
+ strings.append(as_string(item))
+ return ''.join(strings)
+ def __repr__(self):
+ return 'concat(%s)' % ', '.join([repr(expr) for expr in self.exprs])
+
+class ContainsFunction(Function):
+ """The `contains` function, which returns whether a string contains a given
+ substring.
+ """
+ __slots__ = ['string1', 'string2']
+ def __init__(self, string1, string2):
+ self.string1 = string1
+ self.string2 = string2
+ def __call__(self, kind, data, pos, namespaces, variables):
+ string1 = self.string1(kind, data, pos, namespaces, variables)
+ string2 = self.string2(kind, data, pos, namespaces, variables)
+ return as_string(string2) in as_string(string1)
+ def __repr__(self):
+ return 'contains(%r, %r)' % (self.string1, self.string2)
+
+class MatchesFunction(Function):
+ """The `matches` function, which returns whether a string matches a regular
+ expression.
+ """
+ __slots__ = ['string1', 'string2']
+ flag_mapping = {'s': re.S, 'm': re.M, 'i': re.I, 'x': re.X}
+
+ def __init__(self, string1, string2, flags=''):
+ self.string1 = string1
+ self.string2 = string2
+ self.flags = self._map_flags(flags)
+ def __call__(self, kind, data, pos, namespaces, variables):
+ string1 = as_string(self.string1(kind, data, pos, namespaces, variables))
+ string2 = as_string(self.string2(kind, data, pos, namespaces, variables))
+ return re.search(string2, string1, self.flags)
+ def _map_flags(self, flags):
+ return reduce(operator.or_,
+ [self.flag_map[flag] for flag in flags], re.U)
+ def __repr__(self):
+ return 'contains(%r, %r)' % (self.string1, self.string2)
+
+class FalseFunction(Function):
+ """The `false` function, which always returns the boolean `false` value."""
+ __slots__ = []
+ def __call__(self, kind, data, pos, namespaces, variables):
+ return False
+ def __repr__(self):
+ return 'false()'
+
+class FloorFunction(Function):
+ """The `ceiling` function, which returns the nearest higher integer number
+ for the given number.
+ """
+ __slots__ = ['number']
+ def __init__(self, number):
+ self.number = number
+ def __call__(self, kind, data, pos, namespaces, variables):
+ number = self.number(kind, data, pos, namespaces, variables)
+ return floor(as_float(number))
+ def __repr__(self):
+ return 'floor(%r)' % self.number
+
+class LocalNameFunction(Function):
+ """The `local-name` function, which returns the local name of the current
+ element.
+ """
+ __slots__ = []
+ def __call__(self, kind, data, pos, namespaces, variables):
+ if kind is START:
+ return data[0].localname
+ def __repr__(self):
+ return 'local-name()'
+
+class NameFunction(Function):
+ """The `name` function, which returns the qualified name of the current
+ element.
+ """
+ __slots__ = []
+ def __call__(self, kind, data, pos, namespaces, variables):
+ if kind is START:
+ return data[0]
+ def __repr__(self):
+ return 'name()'
+
+class NamespaceUriFunction(Function):
+ """The `namespace-uri` function, which returns the namespace URI of the
+ current element.
+ """
+ __slots__ = []
+ def __call__(self, kind, data, pos, namespaces, variables):
+ if kind is START:
+ return data[0].namespace
+ def __repr__(self):
+ return 'namespace-uri()'
+
+class NotFunction(Function):
+ """The `not` function, which returns the negated boolean value of its
+ argument.
+ """
+ __slots__ = ['expr']
+ def __init__(self, expr):
+ self.expr = expr
+ def __call__(self, kind, data, pos, namespaces, variables):
+ return not as_bool(self.expr(kind, data, pos, namespaces, variables))
+ def __repr__(self):
+ return 'not(%s)' % self.expr
+
+class NormalizeSpaceFunction(Function):
+ """The `normalize-space` function, which removes leading and trailing
+ whitespace in the given string, and replaces multiple adjacent whitespace
+ characters inside the string with a single space.
+ """
+ __slots__ = ['expr']
+ _normalize = re.compile(r'\s{2,}').sub
+ def __init__(self, expr):
+ self.expr = expr
+ def __call__(self, kind, data, pos, namespaces, variables):
+ string = self.expr(kind, data, pos, namespaces, variables)
+ return self._normalize(' ', as_string(string).strip())
+ def __repr__(self):
+ return 'normalize-space(%s)' % repr(self.expr)
+
+class NumberFunction(Function):
+ """The `number` function that converts its argument to a number."""
+ __slots__ = ['expr']
+ def __init__(self, expr):
+ self.expr = expr
+ def __call__(self, kind, data, pos, namespaces, variables):
+ val = self.expr(kind, data, pos, namespaces, variables)
+ return as_float(val)
+ def __repr__(self):
+ return 'number(%r)' % self.expr
+
+class RoundFunction(Function):
+ """The `round` function, which returns the nearest integer number for the
+ given number.
+ """
+ __slots__ = ['number']
+ def __init__(self, number):
+ self.number = number
+ def __call__(self, kind, data, pos, namespaces, variables):
+ number = self.number(kind, data, pos, namespaces, variables)
+ return round(as_float(number))
+ def __repr__(self):
+ return 'round(%r)' % self.number
+
+class StartsWithFunction(Function):
+ """The `starts-with` function that returns whether one string starts with
+ a given substring.
+ """
+ __slots__ = ['string1', 'string2']
+ def __init__(self, string1, string2):
+ self.string1 = string1
+ self.string2 = string2
+ def __call__(self, kind, data, pos, namespaces, variables):
+ string1 = self.string1(kind, data, pos, namespaces, variables)
+ string2 = self.string2(kind, data, pos, namespaces, variables)
+ return as_string(string1).startswith(as_string(string2))
+ def __repr__(self):
+ return 'starts-with(%r, %r)' % (self.string1, self.string2)
+
+class StringLengthFunction(Function):
+ """The `string-length` function that returns the length of the given
+ string.
+ """
+ __slots__ = ['expr']
+ def __init__(self, expr):
+ self.expr = expr
+ def __call__(self, kind, data, pos, namespaces, variables):
+ string = self.expr(kind, data, pos, namespaces, variables)
+ return len(as_string(string))
+ def __repr__(self):
+ return 'string-length(%r)' % self.expr
+
+class SubstringFunction(Function):
+ """The `substring` function that returns the part of a string that starts
+ at the given offset, and optionally limited to the given length.
+ """
+ __slots__ = ['string', 'start', 'length']
+ def __init__(self, string, start, length=None):
+ self.string = string
+ self.start = start
+ self.length = length
+ def __call__(self, kind, data, pos, namespaces, variables):
+ string = self.string(kind, data, pos, namespaces, variables)
+ start = self.start(kind, data, pos, namespaces, variables)
+ length = 0
+ if self.length is not None:
+ length = self.length(kind, data, pos, namespaces, variables)
+ return string[as_long(start):len(as_string(string)) - as_long(length)]
+ def __repr__(self):
+ if self.length is not None:
+ return 'substring(%r, %r, %r)' % (self.string, self.start,
+ self.length)
+ else:
+ return 'substring(%r, %r)' % (self.string, self.start)
+
+class SubstringAfterFunction(Function):
+ """The `substring-after` function that returns the part of a string that
+ is found after the given substring.
+ """
+ __slots__ = ['string1', 'string2']
+ def __init__(self, string1, string2):
+ self.string1 = string1
+ self.string2 = string2
+ def __call__(self, kind, data, pos, namespaces, variables):
+ string1 = as_string(self.string1(kind, data, pos, namespaces, variables))
+ string2 = as_string(self.string2(kind, data, pos, namespaces, variables))
+ index = string1.find(string2)
+ if index >= 0:
+ return string1[index + len(string2):]
+ return ''
+ def __repr__(self):
+ return 'substring-after(%r, %r)' % (self.string1, self.string2)
+
+class SubstringBeforeFunction(Function):
+ """The `substring-before` function that returns the part of a string that
+ is found before the given substring.
+ """
+ __slots__ = ['string1', 'string2']
+ def __init__(self, string1, string2):
+ self.string1 = string1
+ self.string2 = string2
+ def __call__(self, kind, data, pos, namespaces, variables):
+ string1 = as_string(self.string1(kind, data, pos, namespaces, variables))
+ string2 = as_string(self.string2(kind, data, pos, namespaces, variables))
+ index = string1.find(string2)
+ if index >= 0:
+ return string1[:index]
+ return ''
+ def __repr__(self):
+ return 'substring-after(%r, %r)' % (self.string1, self.string2)
+
+class TranslateFunction(Function):
+ """The `translate` function that translates a set of characters in a
+ string to target set of characters.
+ """
+ __slots__ = ['string', 'fromchars', 'tochars']
+ def __init__(self, string, fromchars, tochars):
+ self.string = string
+ self.fromchars = fromchars
+ self.tochars = tochars
+ def __call__(self, kind, data, pos, namespaces, variables):
+ string = as_string(self.string(kind, data, pos, namespaces, variables))
+ fromchars = as_string(self.fromchars(kind, data, pos, namespaces, variables))
+ tochars = as_string(self.tochars(kind, data, pos, namespaces, variables))
+ table = dict(zip([ord(c) for c in fromchars],
+ [ord(c) for c in tochars]))
+ return string.translate(table)
+ def __repr__(self):
+ return 'translate(%r, %r, %r)' % (self.string, self.fromchars,
+ self.tochars)
+
+class TrueFunction(Function):
+ """The `true` function, which always returns the boolean `true` value."""
+ __slots__ = []
+ def __call__(self, kind, data, pos, namespaces, variables):
+ return True
+ def __repr__(self):
+ return 'true()'
+
+_function_map = {'boolean': BooleanFunction, 'ceiling': CeilingFunction,
+ 'concat': ConcatFunction, 'contains': ContainsFunction,
+ 'matches': MatchesFunction, 'false': FalseFunction, 'floor':
+ FloorFunction, 'local-name': LocalNameFunction, 'name':
+ NameFunction, 'namespace-uri': NamespaceUriFunction,
+ 'normalize-space': NormalizeSpaceFunction, 'not': NotFunction,
+ 'number': NumberFunction, 'round': RoundFunction,
+ 'starts-with': StartsWithFunction, 'string-length':
+ StringLengthFunction, 'substring': SubstringFunction,
+ 'substring-after': SubstringAfterFunction, 'substring-before':
+ SubstringBeforeFunction, 'translate': TranslateFunction,
+ 'true': TrueFunction}
+
+# Literals & Variables
+
+class Literal(object):
+ """Abstract base class for literal nodes."""
+
+class StringLiteral(Literal):
+ """A string literal node."""
+ __slots__ = ['text']
+ def __init__(self, text):
+ self.text = text
+ def __call__(self, kind, data, pos, namespaces, variables):
+ return self.text
+ def __repr__(self):
+ return '"%s"' % self.text
+
+class NumberLiteral(Literal):
+ """A number literal node."""
+ __slots__ = ['number']
+ def __init__(self, number):
+ self.number = number
+ def __call__(self, kind, data, pos, namespaces, variables):
+ return self.number
+ def __repr__(self):
+ return str(self.number)
+
+class VariableReference(Literal):
+ """A variable reference node."""
+ __slots__ = ['name']
+ def __init__(self, name):
+ self.name = name
+ def __call__(self, kind, data, pos, namespaces, variables):
+ return variables.get(self.name)
+ def __repr__(self):
+ return str(self.name)
+
+# Operators
+
+class AndOperator(object):
+ """The boolean operator `and`."""
+ __slots__ = ['lval', 'rval']
+ def __init__(self, lval, rval):
+ self.lval = lval
+ self.rval = rval
+ def __call__(self, kind, data, pos, namespaces, variables):
+ lval = as_bool(self.lval(kind, data, pos, namespaces, variables))
+ if not lval:
+ return False
+ rval = self.rval(kind, data, pos, namespaces, variables)
+ return as_bool(rval)
+ def __repr__(self):
+ return '%s and %s' % (self.lval, self.rval)
+
+class EqualsOperator(object):
+ """The equality operator `=`."""
+ __slots__ = ['lval', 'rval']
+ def __init__(self, lval, rval):
+ self.lval = lval
+ self.rval = rval
+ def __call__(self, kind, data, pos, namespaces, variables):
+ lval = as_scalar(self.lval(kind, data, pos, namespaces, variables))
+ rval = as_scalar(self.rval(kind, data, pos, namespaces, variables))
+ return lval == rval
+ def __repr__(self):
+ return '%s=%s' % (self.lval, self.rval)
+
+class NotEqualsOperator(object):
+ """The equality operator `!=`."""
+ __slots__ = ['lval', 'rval']
+ def __init__(self, lval, rval):
+ self.lval = lval
+ self.rval = rval
+ def __call__(self, kind, data, pos, namespaces, variables):
+ lval = as_scalar(self.lval(kind, data, pos, namespaces, variables))
+ rval = as_scalar(self.rval(kind, data, pos, namespaces, variables))
+ return lval != rval
+ def __repr__(self):
+ return '%s!=%s' % (self.lval, self.rval)
+
+class OrOperator(object):
+ """The boolean operator `or`."""
+ __slots__ = ['lval', 'rval']
+ def __init__(self, lval, rval):
+ self.lval = lval
+ self.rval = rval
+ def __call__(self, kind, data, pos, namespaces, variables):
+ lval = as_bool(self.lval(kind, data, pos, namespaces, variables))
+ if lval:
+ return True
+ rval = self.rval(kind, data, pos, namespaces, variables)
+ return as_bool(rval)
+ def __repr__(self):
+ return '%s or %s' % (self.lval, self.rval)
+
+class GreaterThanOperator(object):
+ """The relational operator `>` (greater than)."""
+ __slots__ = ['lval', 'rval']
+ def __init__(self, lval, rval):
+ self.lval = lval
+ self.rval = rval
+ def __call__(self, kind, data, pos, namespaces, variables):
+ lval = self.lval(kind, data, pos, namespaces, variables)
+ rval = self.rval(kind, data, pos, namespaces, variables)
+ return as_float(lval) > as_float(rval)
+ def __repr__(self):
+ return '%s>%s' % (self.lval, self.rval)
+
+class GreaterThanOrEqualOperator(object):
+ """The relational operator `>=` (greater than or equal)."""
+ __slots__ = ['lval', 'rval']
+ def __init__(self, lval, rval):
+ self.lval = lval
+ self.rval = rval
+ def __call__(self, kind, data, pos, namespaces, variables):
+ lval = self.lval(kind, data, pos, namespaces, variables)
+ rval = self.rval(kind, data, pos, namespaces, variables)
+ return as_float(lval) >= as_float(rval)
+ def __repr__(self):
+ return '%s>=%s' % (self.lval, self.rval)
+
+class LessThanOperator(object):
+ """The relational operator `<` (less than)."""
+ __slots__ = ['lval', 'rval']
+ def __init__(self, lval, rval):
+ self.lval = lval
+ self.rval = rval
+ def __call__(self, kind, data, pos, namespaces, variables):
+ lval = self.lval(kind, data, pos, namespaces, variables)
+ rval = self.rval(kind, data, pos, namespaces, variables)
+ return as_float(lval) < as_float(rval)
+ def __repr__(self):
+ return '%s<%s' % (self.lval, self.rval)
+
+class LessThanOrEqualOperator(object):
+ """The relational operator `<=` (less than or equal)."""
+ __slots__ = ['lval', 'rval']
+ def __init__(self, lval, rval):
+ self.lval = lval
+ self.rval = rval
+ def __call__(self, kind, data, pos, namespaces, variables):
+ lval = self.lval(kind, data, pos, namespaces, variables)
+ rval = self.rval(kind, data, pos, namespaces, variables)
+ return as_float(lval) <= as_float(rval)
+ def __repr__(self):
+ return '%s<=%s' % (self.lval, self.rval)
+
+_operator_map = {'=': EqualsOperator, '!=': NotEqualsOperator,
+ '>': GreaterThanOperator, '>=': GreaterThanOrEqualOperator,
+ '<': LessThanOperator, '>=': LessThanOrEqualOperator}
+
+
+_DOTSLASHSLASH = (DESCENDANT_OR_SELF, PrincipalTypeTest(None), ())
+_DOTSLASH = (SELF, PrincipalTypeTest(None), ())
diff --git a/genshi/template/__init__.py b/genshi/template/__init__.py
new file mode 100644
index 0000000..47a9310
--- /dev/null
+++ b/genshi/template/__init__.py
@@ -0,0 +1,23 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006-2007 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""Implementation of the template engine."""
+
+from genshi.template.base import Context, Template, TemplateError, \
+ TemplateRuntimeError, TemplateSyntaxError, \
+ BadDirectiveError
+from genshi.template.loader import TemplateLoader, TemplateNotFound
+from genshi.template.markup import MarkupTemplate
+from genshi.template.text import TextTemplate, OldTextTemplate, NewTextTemplate
+
+__docformat__ = 'restructuredtext en'
diff --git a/genshi/template/_ast24.py b/genshi/template/_ast24.py
new file mode 100644
index 0000000..05d241b
--- /dev/null
+++ b/genshi/template/_ast24.py
@@ -0,0 +1,446 @@
+# Generated automatically, please do not edit
+# Generator can be found in Genshi SVN, scripts/ast-generator.py
+
+__version__ = 43614
+
+class AST(object):
+ _fields = None
+ __doc__ = None
+
+class operator(AST):
+ _fields = None
+ __doc__ = None
+ _attributes = []
+class Add(operator):
+ _fields = None
+ __doc__ = None
+
+class boolop(AST):
+ _fields = None
+ __doc__ = None
+ _attributes = []
+class And(boolop):
+ _fields = None
+ __doc__ = None
+
+class stmt(AST):
+ _fields = None
+ __doc__ = None
+ _attributes = ['lineno', 'col_offset']
+class Assert(stmt):
+ _fields = ('test', 'msg')
+ __doc__ = None
+
+class Assign(stmt):
+ _fields = ('targets', 'value')
+ __doc__ = None
+
+class expr(AST):
+ _fields = None
+ __doc__ = None
+ _attributes = ['lineno', 'col_offset']
+class Attribute(expr):
+ _fields = ('value', 'attr', 'ctx')
+ __doc__ = None
+
+class AugAssign(stmt):
+ _fields = ('target', 'op', 'value')
+ __doc__ = None
+
+class expr_context(AST):
+ _fields = None
+ __doc__ = None
+ _attributes = []
+class AugLoad(expr_context):
+ _fields = None
+ __doc__ = None
+
+class AugStore(expr_context):
+ _fields = None
+ __doc__ = None
+
+class BinOp(expr):
+ _fields = ('left', 'op', 'right')
+ __doc__ = None
+
+class BitAnd(operator):
+ _fields = None
+ __doc__ = None
+
+class BitOr(operator):
+ _fields = None
+ __doc__ = None
+
+class BitXor(operator):
+ _fields = None
+ __doc__ = None
+
+class BoolOp(expr):
+ _fields = ('op', 'values')
+ __doc__ = None
+
+class Break(stmt):
+ _fields = None
+ __doc__ = None
+
+class Call(expr):
+ _fields = ('func', 'args', 'keywords', 'starargs', 'kwargs')
+ __doc__ = None
+
+class ClassDef(stmt):
+ _fields = ('name', 'bases', 'body')
+ __doc__ = None
+
+class Compare(expr):
+ _fields = ('left', 'ops', 'comparators')
+ __doc__ = None
+
+class Continue(stmt):
+ _fields = None
+ __doc__ = None
+
+class Del(expr_context):
+ _fields = None
+ __doc__ = None
+
+class Delete(stmt):
+ _fields = ('targets',)
+ __doc__ = None
+
+class Dict(expr):
+ _fields = ('keys', 'values')
+ __doc__ = None
+
+class Div(operator):
+ _fields = None
+ __doc__ = None
+
+class slice(AST):
+ _fields = None
+ __doc__ = None
+ _attributes = []
+class Ellipsis(slice):
+ _fields = None
+ __doc__ = None
+
+class cmpop(AST):
+ _fields = None
+ __doc__ = None
+ _attributes = []
+class Eq(cmpop):
+ _fields = None
+ __doc__ = None
+
+class Exec(stmt):
+ _fields = ('body', 'globals', 'locals')
+ __doc__ = None
+
+class Expr(stmt):
+ _fields = ('value',)
+ __doc__ = None
+
+class mod(AST):
+ _fields = None
+ __doc__ = None
+ _attributes = []
+class Expression(mod):
+ _fields = ('body',)
+ __doc__ = None
+
+class ExtSlice(slice):
+ _fields = ('dims',)
+ __doc__ = None
+
+class FloorDiv(operator):
+ _fields = None
+ __doc__ = None
+
+class For(stmt):
+ _fields = ('target', 'iter', 'body', 'orelse')
+ __doc__ = None
+
+class FunctionDef(stmt):
+ _fields = ('name', 'args', 'body', 'decorators')
+ __doc__ = None
+
+class GeneratorExp(expr):
+ _fields = ('elt', 'generators')
+ __doc__ = None
+
+class Global(stmt):
+ _fields = ('names',)
+ __doc__ = None
+
+class Gt(cmpop):
+ _fields = None
+ __doc__ = None
+
+class GtE(cmpop):
+ _fields = None
+ __doc__ = None
+
+class If(stmt):
+ _fields = ('test', 'body', 'orelse')
+ __doc__ = None
+
+class IfExp(expr):
+ _fields = ('test', 'body', 'orelse')
+ __doc__ = None
+
+class Import(stmt):
+ _fields = ('names',)
+ __doc__ = None
+
+class ImportFrom(stmt):
+ _fields = ('module', 'names', 'level')
+ __doc__ = None
+
+class In(cmpop):
+ _fields = None
+ __doc__ = None
+
+class Index(slice):
+ _fields = ('value',)
+ __doc__ = None
+
+class Interactive(mod):
+ _fields = ('body',)
+ __doc__ = None
+
+class unaryop(AST):
+ _fields = None
+ __doc__ = None
+ _attributes = []
+class Invert(unaryop):
+ _fields = None
+ __doc__ = None
+
+class Is(cmpop):
+ _fields = None
+ __doc__ = None
+
+class IsNot(cmpop):
+ _fields = None
+ __doc__ = None
+
+class LShift(operator):
+ _fields = None
+ __doc__ = None
+
+class Lambda(expr):
+ _fields = ('args', 'body')
+ __doc__ = None
+
+class List(expr):
+ _fields = ('elts', 'ctx')
+ __doc__ = None
+
+class ListComp(expr):
+ _fields = ('elt', 'generators')
+ __doc__ = None
+
+class Load(expr_context):
+ _fields = None
+ __doc__ = None
+
+class Lt(cmpop):
+ _fields = None
+ __doc__ = None
+
+class LtE(cmpop):
+ _fields = None
+ __doc__ = None
+
+class Mod(operator):
+ _fields = None
+ __doc__ = None
+
+class Module(mod):
+ _fields = ('body',)
+ __doc__ = None
+
+class Mult(operator):
+ _fields = None
+ __doc__ = None
+
+class Name(expr):
+ _fields = ('id', 'ctx')
+ __doc__ = None
+
+class Not(unaryop):
+ _fields = None
+ __doc__ = None
+
+class NotEq(cmpop):
+ _fields = None
+ __doc__ = None
+
+class NotIn(cmpop):
+ _fields = None
+ __doc__ = None
+
+class Num(expr):
+ _fields = ('n',)
+ __doc__ = None
+
+class Or(boolop):
+ _fields = None
+ __doc__ = None
+
+class Param(expr_context):
+ _fields = None
+ __doc__ = None
+
+class Pass(stmt):
+ _fields = None
+ __doc__ = None
+
+class Pow(operator):
+ _fields = None
+ __doc__ = None
+
+class Print(stmt):
+ _fields = ('dest', 'values', 'nl')
+ __doc__ = None
+
+class RShift(operator):
+ _fields = None
+ __doc__ = None
+
+class Raise(stmt):
+ _fields = ('type', 'inst', 'tback')
+ __doc__ = None
+
+class Repr(expr):
+ _fields = ('value',)
+ __doc__ = None
+
+class Return(stmt):
+ _fields = ('value',)
+ __doc__ = None
+
+class Slice(slice):
+ _fields = ('lower', 'upper', 'step')
+ __doc__ = None
+
+class Store(expr_context):
+ _fields = None
+ __doc__ = None
+
+class Str(expr):
+ _fields = ('s',)
+ __doc__ = None
+
+class Sub(operator):
+ _fields = None
+ __doc__ = None
+
+class Subscript(expr):
+ _fields = ('value', 'slice', 'ctx')
+ __doc__ = None
+
+class Suite(mod):
+ _fields = ('body',)
+ __doc__ = None
+
+class TryExcept(stmt):
+ _fields = ('body', 'handlers', 'orelse')
+ __doc__ = None
+
+class TryFinally(stmt):
+ _fields = ('body', 'finalbody')
+ __doc__ = None
+
+class Tuple(expr):
+ _fields = ('elts', 'ctx')
+ __doc__ = None
+
+class UAdd(unaryop):
+ _fields = None
+ __doc__ = None
+
+class USub(unaryop):
+ _fields = None
+ __doc__ = None
+
+class UnaryOp(expr):
+ _fields = ('op', 'operand')
+ __doc__ = None
+
+class While(stmt):
+ _fields = ('test', 'body', 'orelse')
+ __doc__ = None
+
+class With(stmt):
+ _fields = ('context_expr', 'optional_vars', 'body')
+ __doc__ = None
+
+class Yield(expr):
+ _fields = ('value',)
+ __doc__ = None
+
+class alias(AST):
+ _fields = ('name', 'asname')
+ __doc__ = None
+
+class arguments(AST):
+ _fields = ('args', 'vararg', 'kwarg', 'defaults')
+ __doc__ = None
+
+class boolop(AST):
+ _fields = None
+ __doc__ = None
+ _attributes = []
+
+class cmpop(AST):
+ _fields = None
+ __doc__ = None
+ _attributes = []
+
+class comprehension(AST):
+ _fields = ('target', 'iter', 'ifs')
+ __doc__ = None
+
+class excepthandler(AST):
+ _fields = ('type', 'name', 'body', 'lineno', 'col_offset')
+ __doc__ = None
+
+class expr(AST):
+ _fields = None
+ __doc__ = None
+ _attributes = ['lineno', 'col_offset']
+
+class expr_context(AST):
+ _fields = None
+ __doc__ = None
+ _attributes = []
+
+class keyword(AST):
+ _fields = ('arg', 'value')
+ __doc__ = None
+
+class mod(AST):
+ _fields = None
+ __doc__ = None
+ _attributes = []
+
+class operator(AST):
+ _fields = None
+ __doc__ = None
+ _attributes = []
+
+class slice(AST):
+ _fields = None
+ __doc__ = None
+ _attributes = []
+
+class stmt(AST):
+ _fields = None
+ __doc__ = None
+ _attributes = ['lineno', 'col_offset']
+
+class unaryop(AST):
+ _fields = None
+ __doc__ = None
+ _attributes = []
+
diff --git a/genshi/template/ast24.py b/genshi/template/ast24.py
new file mode 100644
index 0000000..af6dce9
--- /dev/null
+++ b/genshi/template/ast24.py
@@ -0,0 +1,505 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2008-2009 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""Emulation of the proper abstract syntax tree API for Python 2.4."""
+
+import compiler
+import compiler.ast
+
+from genshi.template import _ast24 as _ast
+
+__all__ = ['_ast', 'parse']
+__docformat__ = 'restructuredtext en'
+
+
+def _new(cls, *args, **kwargs):
+ ret = cls()
+ if ret._fields:
+ for attr, value in zip(ret._fields, args):
+ if attr in kwargs:
+ raise ValueError('Field set both in args and kwargs')
+ setattr(ret, attr, value)
+ for attr in kwargs:
+ if (getattr(ret, '_fields', None) and attr in ret._fields) \
+ or (getattr(ret, '_attributes', None) and
+ attr in ret._attributes):
+ setattr(ret, attr, kwargs[attr])
+ return ret
+
+
+class ASTUpgrader(object):
+ """Transformer changing structure of Python 2.4 ASTs to
+ Python 2.5 ones.
+
+ Transforms ``compiler.ast`` Abstract Syntax Tree to builtin ``_ast``.
+ It can use fake`` _ast`` classes and this way allow ``_ast`` emulation
+ in Python 2.4.
+ """
+
+ def __init__(self):
+ self.out_flags = None
+ self.lines = [-1]
+
+ def _new(self, *args, **kwargs):
+ return _new(lineno = self.lines[-1], *args, **kwargs)
+
+ def visit(self, node):
+ if node is None:
+ return None
+ if type(node) is tuple:
+ return tuple([self.visit(n) for n in node])
+ lno = getattr(node, 'lineno', None)
+ if lno is not None:
+ self.lines.append(lno)
+ visitor = getattr(self, 'visit_%s' % node.__class__.__name__, None)
+ if visitor is None:
+ raise Exception('Unhandled node type %r' % type(node))
+
+ retval = visitor(node)
+ if lno is not None:
+ self.lines.pop()
+ return retval
+
+ def visit_Module(self, node):
+ body = self.visit(node.node)
+ if node.doc:
+ body = [self._new(_ast.Expr, self._new(_ast.Str, node.doc))] + body
+ return self._new(_ast.Module, body)
+
+ def visit_Expression(self, node):
+ return self._new(_ast.Expression, self.visit(node.node))
+
+ def _extract_args(self, node):
+ tab = node.argnames[:]
+ if node.flags & compiler.ast.CO_VARKEYWORDS:
+ kwarg = tab[-1]
+ tab = tab[:-1]
+ else:
+ kwarg = None
+
+ if node.flags & compiler.ast.CO_VARARGS:
+ vararg = tab[-1]
+ tab = tab[:-1]
+ else:
+ vararg = None
+
+ def _tup(t):
+ if isinstance(t, str):
+ return self._new(_ast.Name, t, _ast.Store())
+ elif isinstance(t, tuple):
+ elts = [_tup(x) for x in t]
+ return self._new(_ast.Tuple, elts, _ast.Store())
+ else:
+ raise NotImplemented
+
+ args = []
+ for arg in tab:
+ if isinstance(arg, str):
+ args.append(self._new(_ast.Name, arg, _ast.Param()))
+ elif isinstance(arg, tuple):
+ args.append(_tup(arg))
+ else:
+ assert False, node.__class__
+
+ defaults = [self.visit(d) for d in node.defaults]
+ return self._new(_ast.arguments, args, vararg, kwarg, defaults)
+
+
+ def visit_Function(self, node):
+ if getattr(node, 'decorators', ()):
+ decorators = [self.visit(d) for d in node.decorators.nodes]
+ else:
+ decorators = []
+
+ args = self._extract_args(node)
+ body = self.visit(node.code)
+ if node.doc:
+ body = [self._new(_ast.Expr, self._new(_ast.Str, node.doc))] + body
+ return self._new(_ast.FunctionDef, node.name, args, body, decorators)
+
+ def visit_Class(self, node):
+ #self.name_types.append(_ast.Load)
+ bases = [self.visit(b) for b in node.bases]
+ #self.name_types.pop()
+ body = self.visit(node.code)
+ if node.doc:
+ body = [self._new(_ast.Expr, self._new(_ast.Str, node.doc))] + body
+ return self._new(_ast.ClassDef, node.name, bases, body)
+
+ def visit_Return(self, node):
+ return self._new(_ast.Return, self.visit(node.value))
+
+ def visit_Assign(self, node):
+ #self.name_types.append(_ast.Store)
+ targets = [self.visit(t) for t in node.nodes]
+ #self.name_types.pop()
+ return self._new(_ast.Assign, targets, self.visit(node.expr))
+
+ aug_operators = {
+ '+=': _ast.Add,
+ '/=': _ast.Div,
+ '//=': _ast.FloorDiv,
+ '<<=': _ast.LShift,
+ '%=': _ast.Mod,
+ '*=': _ast.Mult,
+ '**=': _ast.Pow,
+ '>>=': _ast.RShift,
+ '-=': _ast.Sub,
+ }
+
+ def visit_AugAssign(self, node):
+ target = self.visit(node.node)
+
+ # Because it's AugAssign target can't be list nor tuple
+ # so we only have to change context of one node
+ target.ctx = _ast.Store()
+ op = self.aug_operators[node.op]()
+ return self._new(_ast.AugAssign, target, op, self.visit(node.expr))
+
+ def _visit_Print(nl):
+ def _visit(self, node):
+ values = [self.visit(v) for v in node.nodes]
+ return self._new(_ast.Print, self.visit(node.dest), values, nl)
+ return _visit
+
+ visit_Print = _visit_Print(False)
+ visit_Printnl = _visit_Print(True)
+ del _visit_Print
+
+ def visit_For(self, node):
+ return self._new(_ast.For, self.visit(node.assign), self.visit(node.list),
+ self.visit(node.body), self.visit(node.else_))
+
+ def visit_While(self, node):
+ return self._new(_ast.While, self.visit(node.test), self.visit(node.body),
+ self.visit(node.else_))
+
+ def visit_If(self, node):
+ def _level(tests, else_):
+ test = self.visit(tests[0][0])
+ body = self.visit(tests[0][1])
+ if len(tests) == 1:
+ orelse = self.visit(else_)
+ else:
+ orelse = [_level(tests[1:], else_)]
+ return self._new(_ast.If, test, body, orelse)
+ return _level(node.tests, node.else_)
+
+ def visit_With(self, node):
+ return self._new(_ast.With, self.visit(node.expr),
+ self.visit(node.vars), self.visit(node.body))
+
+ def visit_Raise(self, node):
+ return self._new(_ast.Raise, self.visit(node.expr1),
+ self.visit(node.expr2), self.visit(node.expr3))
+
+ def visit_TryExcept(self, node):
+ handlers = []
+ for type, name, body in node.handlers:
+ handlers.append(self._new(_ast.excepthandler, self.visit(type),
+ self.visit(name), self.visit(body)))
+ return self._new(_ast.TryExcept, self.visit(node.body),
+ handlers, self.visit(node.else_))
+
+ def visit_TryFinally(self, node):
+ return self._new(_ast.TryFinally, self.visit(node.body),
+ self.visit(node.final))
+
+ def visit_Assert(self, node):
+ return self._new(_ast.Assert, self.visit(node.test), self.visit(node.fail))
+
+ def visit_Import(self, node):
+ names = [self._new(_ast.alias, n[0], n[1]) for n in node.names]
+ return self._new(_ast.Import, names)
+
+ def visit_From(self, node):
+ names = [self._new(_ast.alias, n[0], n[1]) for n in node.names]
+ return self._new(_ast.ImportFrom, node.modname, names, 0)
+
+ def visit_Exec(self, node):
+ return self._new(_ast.Exec, self.visit(node.expr),
+ self.visit(node.locals), self.visit(node.globals))
+
+ def visit_Global(self, node):
+ return self._new(_ast.Global, node.names[:])
+
+ def visit_Discard(self, node):
+ return self._new(_ast.Expr, self.visit(node.expr))
+
+ def _map_class(to):
+ def _visit(self, node):
+ return self._new(to)
+ return _visit
+
+ visit_Pass = _map_class(_ast.Pass)
+ visit_Break = _map_class(_ast.Break)
+ visit_Continue = _map_class(_ast.Continue)
+
+ def _visit_BinOperator(opcls):
+ def _visit(self, node):
+ return self._new(_ast.BinOp, self.visit(node.left),
+ opcls(), self.visit(node.right))
+ return _visit
+ visit_Add = _visit_BinOperator(_ast.Add)
+ visit_Div = _visit_BinOperator(_ast.Div)
+ visit_FloorDiv = _visit_BinOperator(_ast.FloorDiv)
+ visit_LeftShift = _visit_BinOperator(_ast.LShift)
+ visit_Mod = _visit_BinOperator(_ast.Mod)
+ visit_Mul = _visit_BinOperator(_ast.Mult)
+ visit_Power = _visit_BinOperator(_ast.Pow)
+ visit_RightShift = _visit_BinOperator(_ast.RShift)
+ visit_Sub = _visit_BinOperator(_ast.Sub)
+ del _visit_BinOperator
+
+ def _visit_BitOperator(opcls):
+ def _visit(self, node):
+ def _make(nodes):
+ if len(nodes) == 1:
+ return self.visit(nodes[0])
+ left = _make(nodes[:-1])
+ right = self.visit(nodes[-1])
+ return self._new(_ast.BinOp, left, opcls(), right)
+ return _make(node.nodes)
+ return _visit
+ visit_Bitand = _visit_BitOperator(_ast.BitAnd)
+ visit_Bitor = _visit_BitOperator(_ast.BitOr)
+ visit_Bitxor = _visit_BitOperator(_ast.BitXor)
+ del _visit_BitOperator
+
+ def _visit_UnaryOperator(opcls):
+ def _visit(self, node):
+ return self._new(_ast.UnaryOp, opcls(), self.visit(node.expr))
+ return _visit
+
+ visit_Invert = _visit_UnaryOperator(_ast.Invert)
+ visit_Not = _visit_UnaryOperator(_ast.Not)
+ visit_UnaryAdd = _visit_UnaryOperator(_ast.UAdd)
+ visit_UnarySub = _visit_UnaryOperator(_ast.USub)
+ del _visit_UnaryOperator
+
+ def _visit_BoolOperator(opcls):
+ def _visit(self, node):
+ values = [self.visit(n) for n in node.nodes]
+ return self._new(_ast.BoolOp, opcls(), values)
+ return _visit
+ visit_And = _visit_BoolOperator(_ast.And)
+ visit_Or = _visit_BoolOperator(_ast.Or)
+ del _visit_BoolOperator
+
+ cmp_operators = {
+ '==': _ast.Eq,
+ '!=': _ast.NotEq,
+ '<': _ast.Lt,
+ '<=': _ast.LtE,
+ '>': _ast.Gt,
+ '>=': _ast.GtE,
+ 'is': _ast.Is,
+ 'is not': _ast.IsNot,
+ 'in': _ast.In,
+ 'not in': _ast.NotIn,
+ }
+
+ def visit_Compare(self, node):
+ left = self.visit(node.expr)
+ ops = []
+ comparators = []
+ for optype, expr in node.ops:
+ ops.append(self.cmp_operators[optype]())
+ comparators.append(self.visit(expr))
+ return self._new(_ast.Compare, left, ops, comparators)
+
+ def visit_Lambda(self, node):
+ args = self._extract_args(node)
+ body = self.visit(node.code)
+ return self._new(_ast.Lambda, args, body)
+
+ def visit_IfExp(self, node):
+ return self._new(_ast.IfExp, self.visit(node.test), self.visit(node.then),
+ self.visit(node.else_))
+
+ def visit_Dict(self, node):
+ keys = [self.visit(x[0]) for x in node.items]
+ values = [self.visit(x[1]) for x in node.items]
+ return self._new(_ast.Dict, keys, values)
+
+ def visit_ListComp(self, node):
+ generators = [self.visit(q) for q in node.quals]
+ return self._new(_ast.ListComp, self.visit(node.expr), generators)
+
+ def visit_GenExprInner(self, node):
+ generators = [self.visit(q) for q in node.quals]
+ return self._new(_ast.GeneratorExp, self.visit(node.expr), generators)
+
+ def visit_GenExpr(self, node):
+ return self.visit(node.code)
+
+ def visit_GenExprFor(self, node):
+ ifs = [self.visit(i) for i in node.ifs]
+ return self._new(_ast.comprehension, self.visit(node.assign),
+ self.visit(node.iter), ifs)
+
+ def visit_ListCompFor(self, node):
+ ifs = [self.visit(i) for i in node.ifs]
+ return self._new(_ast.comprehension, self.visit(node.assign),
+ self.visit(node.list), ifs)
+
+ def visit_GenExprIf(self, node):
+ return self.visit(node.test)
+ visit_ListCompIf = visit_GenExprIf
+
+ def visit_Yield(self, node):
+ return self._new(_ast.Yield, self.visit(node.value))
+
+ def visit_CallFunc(self, node):
+ args = []
+ keywords = []
+ for arg in node.args:
+ if isinstance(arg, compiler.ast.Keyword):
+ keywords.append(self._new(_ast.keyword, arg.name,
+ self.visit(arg.expr)))
+ else:
+ args.append(self.visit(arg))
+ return self._new(_ast.Call, self.visit(node.node), args, keywords,
+ self.visit(node.star_args), self.visit(node.dstar_args))
+
+ def visit_Backquote(self, node):
+ return self._new(_ast.Repr, self.visit(node.expr))
+
+ def visit_Const(self, node):
+ if node.value is None: # appears in slices
+ return None
+ elif isinstance(node.value, basestring):
+ return self._new(_ast.Str, node.value)
+ else:
+ return self._new(_ast.Num, node.value)
+
+ def visit_Name(self, node):
+ return self._new(_ast.Name, node.name, _ast.Load())
+
+ def visit_Getattr(self, node):
+ return self._new(_ast.Attribute, self.visit(node.expr), node.attrname,
+ _ast.Load())
+
+ def visit_Tuple(self, node):
+ nodes = [self.visit(n) for n in node.nodes]
+ return self._new(_ast.Tuple, nodes, _ast.Load())
+
+ def visit_List(self, node):
+ nodes = [self.visit(n) for n in node.nodes]
+ return self._new(_ast.List, nodes, _ast.Load())
+
+ def get_ctx(self, flags):
+ if flags == 'OP_DELETE':
+ return _ast.Del()
+ elif flags == 'OP_APPLY':
+ return _ast.Load()
+ elif flags == 'OP_ASSIGN':
+ return _ast.Store()
+ else:
+ # FIXME Exception here
+ assert False, repr(flags)
+
+ def visit_AssName(self, node):
+ self.out_flags = node.flags
+ ctx = self.get_ctx(node.flags)
+ return self._new(_ast.Name, node.name, ctx)
+
+ def visit_AssAttr(self, node):
+ self.out_flags = node.flags
+ ctx = self.get_ctx(node.flags)
+ return self._new(_ast.Attribute, self.visit(node.expr),
+ node.attrname, ctx)
+
+ def _visit_AssCollection(cls):
+ def _visit(self, node):
+ flags = None
+ elts = []
+ for n in node.nodes:
+ elts.append(self.visit(n))
+ if flags is None:
+ flags = self.out_flags
+ else:
+ assert flags == self.out_flags
+ self.out_flags = flags
+ ctx = self.get_ctx(flags)
+ return self._new(cls, elts, ctx)
+ return _visit
+
+ visit_AssList = _visit_AssCollection(_ast.List)
+ visit_AssTuple = _visit_AssCollection(_ast.Tuple)
+ del _visit_AssCollection
+
+ def visit_Slice(self, node):
+ lower = self.visit(node.lower)
+ upper = self.visit(node.upper)
+ ctx = self.get_ctx(node.flags)
+ self.out_flags = node.flags
+ return self._new(_ast.Subscript, self.visit(node.expr),
+ self._new(_ast.Slice, lower, upper, None), ctx)
+
+ def visit_Subscript(self, node):
+ ctx = self.get_ctx(node.flags)
+ subs = [self.visit(s) for s in node.subs]
+
+ advanced = (_ast.Slice, _ast.Ellipsis)
+ slices = []
+ nonindex = False
+ for sub in subs:
+ if isinstance(sub, advanced):
+ nonindex = True
+ slices.append(sub)
+ else:
+ slices.append(self._new(_ast.Index, sub))
+ if len(slices) == 1:
+ slice = slices[0]
+ elif nonindex:
+ slice = self._new(_ast.ExtSlice, slices)
+ else:
+ slice = self._new(_ast.Tuple, slices, _ast.Load())
+
+ self.out_flags = node.flags
+ return self._new(_ast.Subscript, self.visit(node.expr), slice, ctx)
+
+ def visit_Sliceobj(self, node):
+ a = [self.visit(n) for n in node.nodes + [None]*(3 - len(node.nodes))]
+ return self._new(_ast.Slice, a[0], a[1], a[2])
+
+ def visit_Ellipsis(self, node):
+ return self._new(_ast.Ellipsis)
+
+ def visit_Stmt(self, node):
+ def _check_del(n):
+ # del x is just AssName('x', 'OP_DELETE')
+ # we want to transform it to Delete([Name('x', Del())])
+ dcls = (_ast.Name, _ast.List, _ast.Subscript, _ast.Attribute)
+ if isinstance(n, dcls) and isinstance(n.ctx, _ast.Del):
+ return self._new(_ast.Delete, [n])
+ elif isinstance(n, _ast.Tuple) and isinstance(n.ctx, _ast.Del):
+ # unpack last tuple to avoid making del (x, y, z,);
+ # out of del x, y, z; (there's no difference between
+ # this two in compiler.ast)
+ return self._new(_ast.Delete, n.elts)
+ else:
+ return n
+ def _keep(n):
+ if isinstance(n, _ast.Expr) and n.value is None:
+ return False
+ else:
+ return True
+ return [s for s in [_check_del(self.visit(n)) for n in node.nodes]
+ if _keep(s)]
+
+
+def parse(source, mode):
+ node = compiler.parse(source, mode)
+ return ASTUpgrader().visit(node)
diff --git a/genshi/template/astutil.py b/genshi/template/astutil.py
new file mode 100644
index 0000000..c3ad107
--- /dev/null
+++ b/genshi/template/astutil.py
@@ -0,0 +1,784 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2008-2010 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""Support classes for generating code from abstract syntax trees."""
+
+try:
+ import _ast
+except ImportError:
+ from genshi.template.ast24 import _ast, parse
+else:
+ def parse(source, mode):
+ return compile(source, '', mode, _ast.PyCF_ONLY_AST)
+
+
+__docformat__ = 'restructuredtext en'
+
+
+class ASTCodeGenerator(object):
+ """General purpose base class for AST transformations.
+
+ Every visitor method can be overridden to return an AST node that has been
+ altered or replaced in some way.
+ """
+ def __init__(self, tree):
+ self.lines_info = []
+ self.line_info = None
+ self.code = ''
+ self.line = None
+ self.last = None
+ self.indent = 0
+ self.blame_stack = []
+ self.visit(tree)
+ if self.line.strip():
+ self.code += self.line + '\n'
+ self.lines_info.append(self.line_info)
+ self.line = None
+ self.line_info = None
+
+ def _change_indent(self, delta):
+ self.indent += delta
+
+ def _new_line(self):
+ if self.line is not None:
+ self.code += self.line + '\n'
+ self.lines_info.append(self.line_info)
+ self.line = ' '*4*self.indent
+ if len(self.blame_stack) == 0:
+ self.line_info = []
+ self.last = None
+ else:
+ self.line_info = [(0, self.blame_stack[-1],)]
+ self.last = self.blame_stack[-1]
+
+ def _write(self, s):
+ if len(s) == 0:
+ return
+ if len(self.blame_stack) == 0:
+ if self.last is not None:
+ self.last = None
+ self.line_info.append((len(self.line), self.last))
+ else:
+ if self.last != self.blame_stack[-1]:
+ self.last = self.blame_stack[-1]
+ self.line_info.append((len(self.line), self.last))
+ self.line += s
+
+ def visit(self, node):
+ if node is None:
+ return None
+ if type(node) is tuple:
+ return tuple([self.visit(n) for n in node])
+ try:
+ self.blame_stack.append((node.lineno, node.col_offset,))
+ info = True
+ except AttributeError:
+ info = False
+ visitor = getattr(self, 'visit_%s' % node.__class__.__name__, None)
+ if visitor is None:
+ raise Exception('Unhandled node type %r' % type(node))
+ ret = visitor(node)
+ if info:
+ self.blame_stack.pop()
+ return ret
+
+ def visit_Module(self, node):
+ for n in node.body:
+ self.visit(n)
+ visit_Interactive = visit_Module
+ visit_Suite = visit_Module
+
+ def visit_Expression(self, node):
+ self._new_line()
+ return self.visit(node.body)
+
+ # arguments = (expr* args, identifier? vararg,
+ # identifier? kwarg, expr* defaults)
+ def visit_arguments(self, node):
+ first = True
+ no_default_count = len(node.args) - len(node.defaults)
+ for i, arg in enumerate(node.args):
+ if not first:
+ self._write(', ')
+ else:
+ first = False
+ self.visit(arg)
+ if i >= no_default_count:
+ self._write('=')
+ self.visit(node.defaults[i - no_default_count])
+ if getattr(node, 'vararg', None):
+ if not first:
+ self._write(', ')
+ else:
+ first = False
+ self._write('*' + node.vararg)
+ if getattr(node, 'kwarg', None):
+ if not first:
+ self._write(', ')
+ else:
+ first = False
+ self._write('**' + node.kwarg)
+
+ # FunctionDef(identifier name, arguments args,
+ # stmt* body, expr* decorator_list)
+ def visit_FunctionDef(self, node):
+ decarators = ()
+ if hasattr(node, 'decorator_list'):
+ decorators = getattr(node, 'decorator_list')
+ else: # different name in earlier Python versions
+ decorators = getattr(node, 'decorators', ())
+ for decorator in decorators:
+ self._new_line()
+ self._write('@')
+ self.visit(decorator)
+ self._new_line()
+ self._write('def ' + node.name + '(')
+ self.visit(node.args)
+ self._write('):')
+ self._change_indent(1)
+ for statement in node.body:
+ self.visit(statement)
+ self._change_indent(-1)
+
+ # ClassDef(identifier name, expr* bases, stmt* body)
+ def visit_ClassDef(self, node):
+ self._new_line()
+ self._write('class ' + node.name)
+ if node.bases:
+ self._write('(')
+ self.visit(node.bases[0])
+ for base in node.bases[1:]:
+ self._write(', ')
+ self.visit(base)
+ self._write(')')
+ self._write(':')
+ self._change_indent(1)
+ for statement in node.body:
+ self.visit(statement)
+ self._change_indent(-1)
+
+ # Return(expr? value)
+ def visit_Return(self, node):
+ self._new_line()
+ self._write('return')
+ if getattr(node, 'value', None):
+ self._write(' ')
+ self.visit(node.value)
+
+ # Delete(expr* targets)
+ def visit_Delete(self, node):
+ self._new_line()
+ self._write('del ')
+ self.visit(node.targets[0])
+ for target in node.targets[1:]:
+ self._write(', ')
+ self.visit(target)
+
+ # Assign(expr* targets, expr value)
+ def visit_Assign(self, node):
+ self._new_line()
+ for target in node.targets:
+ self.visit(target)
+ self._write(' = ')
+ self.visit(node.value)
+
+ # AugAssign(expr target, operator op, expr value)
+ def visit_AugAssign(self, node):
+ self._new_line()
+ self.visit(node.target)
+ self._write(' ' + self.binary_operators[node.op.__class__] + '= ')
+ self.visit(node.value)
+
+ # Print(expr? dest, expr* values, bool nl)
+ def visit_Print(self, node):
+ self._new_line()
+ self._write('print')
+ if getattr(node, 'dest', None):
+ self._write(' >> ')
+ self.visit(node.dest)
+ if getattr(node, 'values', None):
+ self._write(', ')
+ else:
+ self._write(' ')
+ if getattr(node, 'values', None):
+ self.visit(node.values[0])
+ for value in node.values[1:]:
+ self._write(', ')
+ self.visit(value)
+ if not node.nl:
+ self._write(',')
+
+ # For(expr target, expr iter, stmt* body, stmt* orelse)
+ def visit_For(self, node):
+ self._new_line()
+ self._write('for ')
+ self.visit(node.target)
+ self._write(' in ')
+ self.visit(node.iter)
+ self._write(':')
+ self._change_indent(1)
+ for statement in node.body:
+ self.visit(statement)
+ self._change_indent(-1)
+ if getattr(node, 'orelse', None):
+ self._new_line()
+ self._write('else:')
+ self._change_indent(1)
+ for statement in node.orelse:
+ self.visit(statement)
+ self._change_indent(-1)
+
+ # While(expr test, stmt* body, stmt* orelse)
+ def visit_While(self, node):
+ self._new_line()
+ self._write('while ')
+ self.visit(node.test)
+ self._write(':')
+ self._change_indent(1)
+ for statement in node.body:
+ self.visit(statement)
+ self._change_indent(-1)
+ if getattr(node, 'orelse', None):
+ self._new_line()
+ self._write('else:')
+ self._change_indent(1)
+ for statement in node.orelse:
+ self.visit(statement)
+ self._change_indent(-1)
+
+ # If(expr test, stmt* body, stmt* orelse)
+ def visit_If(self, node):
+ self._new_line()
+ self._write('if ')
+ self.visit(node.test)
+ self._write(':')
+ self._change_indent(1)
+ for statement in node.body:
+ self.visit(statement)
+ self._change_indent(-1)
+ if getattr(node, 'orelse', None):
+ self._new_line()
+ self._write('else:')
+ self._change_indent(1)
+ for statement in node.orelse:
+ self.visit(statement)
+ self._change_indent(-1)
+
+ # With(expr context_expr, expr? optional_vars, stmt* body)
+ def visit_With(self, node):
+ self._new_line()
+ self._write('with ')
+ self.visit(node.context_expr)
+ if getattr(node, 'optional_vars', None):
+ self._write(' as ')
+ self.visit(node.optional_vars)
+ self._write(':')
+ self._change_indent(1)
+ for statement in node.body:
+ self.visit(statement)
+ self._change_indent(-1)
+
+
+ # Raise(expr? type, expr? inst, expr? tback)
+ def visit_Raise(self, node):
+ self._new_line()
+ self._write('raise')
+ if not node.type:
+ return
+ self._write(' ')
+ self.visit(node.type)
+ if not node.inst:
+ return
+ self._write(', ')
+ self.visit(node.inst)
+ if not node.tback:
+ return
+ self._write(', ')
+ self.visit(node.tback)
+
+ # TryExcept(stmt* body, excepthandler* handlers, stmt* orelse)
+ def visit_TryExcept(self, node):
+ self._new_line()
+ self._write('try:')
+ self._change_indent(1)
+ for statement in node.body:
+ self.visit(statement)
+ self._change_indent(-1)
+ if getattr(node, 'handlers', None):
+ for handler in node.handlers:
+ self.visit(handler)
+ self._new_line()
+ if getattr(node, 'orelse', None):
+ self._write('else:')
+ self._change_indent(1)
+ for statement in node.orelse:
+ self.visit(statement)
+ self._change_indent(-1)
+
+ # excepthandler = (expr? type, expr? name, stmt* body)
+ def visit_ExceptHandler(self, node):
+ self._new_line()
+ self._write('except')
+ if getattr(node, 'type', None):
+ self._write(' ')
+ self.visit(node.type)
+ if getattr(node, 'name', None):
+ self._write(', ')
+ self.visit(node.name)
+ self._write(':')
+ self._change_indent(1)
+ for statement in node.body:
+ self.visit(statement)
+ self._change_indent(-1)
+ visit_excepthandler = visit_ExceptHandler
+
+ # TryFinally(stmt* body, stmt* finalbody)
+ def visit_TryFinally(self, node):
+ self._new_line()
+ self._write('try:')
+ self._change_indent(1)
+ for statement in node.body:
+ self.visit(statement)
+ self._change_indent(-1)
+
+ if getattr(node, 'finalbody', None):
+ self._new_line()
+ self._write('finally:')
+ self._change_indent(1)
+ for statement in node.finalbody:
+ self.visit(statement)
+ self._change_indent(-1)
+
+ # Assert(expr test, expr? msg)
+ def visit_Assert(self, node):
+ self._new_line()
+ self._write('assert ')
+ self.visit(node.test)
+ if getattr(node, 'msg', None):
+ self._write(', ')
+ self.visit(node.msg)
+
+ def visit_alias(self, node):
+ self._write(node.name)
+ if getattr(node, 'asname', None):
+ self._write(' as ')
+ self._write(node.asname)
+
+ # Import(alias* names)
+ def visit_Import(self, node):
+ self._new_line()
+ self._write('import ')
+ self.visit(node.names[0])
+ for name in node.names[1:]:
+ self._write(', ')
+ self.visit(name)
+
+ # ImportFrom(identifier module, alias* names, int? level)
+ def visit_ImportFrom(self, node):
+ self._new_line()
+ self._write('from ')
+ if node.level:
+ self._write('.' * node.level)
+ self._write(node.module)
+ self._write(' import ')
+ self.visit(node.names[0])
+ for name in node.names[1:]:
+ self._write(', ')
+ self.visit(name)
+
+ # Exec(expr body, expr? globals, expr? locals)
+ def visit_Exec(self, node):
+ self._new_line()
+ self._write('exec ')
+ self.visit(node.body)
+ if not node.globals:
+ return
+ self._write(', ')
+ self.visit(node.globals)
+ if not node.locals:
+ return
+ self._write(', ')
+ self.visit(node.locals)
+
+ # Global(identifier* names)
+ def visit_Global(self, node):
+ self._new_line()
+ self._write('global ')
+ self.visit(node.names[0])
+ for name in node.names[1:]:
+ self._write(', ')
+ self.visit(name)
+
+ # Expr(expr value)
+ def visit_Expr(self, node):
+ self._new_line()
+ self.visit(node.value)
+
+ # Pass
+ def visit_Pass(self, node):
+ self._new_line()
+ self._write('pass')
+
+ # Break
+ def visit_Break(self, node):
+ self._new_line()
+ self._write('break')
+
+ # Continue
+ def visit_Continue(self, node):
+ self._new_line()
+ self._write('continue')
+
+ ### EXPRESSIONS
+ def with_parens(f):
+ def _f(self, node):
+ self._write('(')
+ f(self, node)
+ self._write(')')
+ return _f
+
+ bool_operators = {_ast.And: 'and', _ast.Or: 'or'}
+
+ # BoolOp(boolop op, expr* values)
+ @with_parens
+ def visit_BoolOp(self, node):
+ joiner = ' ' + self.bool_operators[node.op.__class__] + ' '
+ self.visit(node.values[0])
+ for value in node.values[1:]:
+ self._write(joiner)
+ self.visit(value)
+
+ binary_operators = {
+ _ast.Add: '+',
+ _ast.Sub: '-',
+ _ast.Mult: '*',
+ _ast.Div: '/',
+ _ast.Mod: '%',
+ _ast.Pow: '**',
+ _ast.LShift: '<<',
+ _ast.RShift: '>>',
+ _ast.BitOr: '|',
+ _ast.BitXor: '^',
+ _ast.BitAnd: '&',
+ _ast.FloorDiv: '//'
+ }
+
+ # BinOp(expr left, operator op, expr right)
+ @with_parens
+ def visit_BinOp(self, node):
+ self.visit(node.left)
+ self._write(' ' + self.binary_operators[node.op.__class__] + ' ')
+ self.visit(node.right)
+
+ unary_operators = {
+ _ast.Invert: '~',
+ _ast.Not: 'not',
+ _ast.UAdd: '+',
+ _ast.USub: '-',
+ }
+
+ # UnaryOp(unaryop op, expr operand)
+ def visit_UnaryOp(self, node):
+ self._write(self.unary_operators[node.op.__class__] + ' ')
+ self.visit(node.operand)
+
+ # Lambda(arguments args, expr body)
+ @with_parens
+ def visit_Lambda(self, node):
+ self._write('lambda ')
+ self.visit(node.args)
+ self._write(': ')
+ self.visit(node.body)
+
+ # IfExp(expr test, expr body, expr orelse)
+ @with_parens
+ def visit_IfExp(self, node):
+ self.visit(node.body)
+ self._write(' if ')
+ self.visit(node.test)
+ self._write(' else ')
+ self.visit(node.orelse)
+
+ # Dict(expr* keys, expr* values)
+ def visit_Dict(self, node):
+ self._write('{')
+ for key, value in zip(node.keys, node.values):
+ self.visit(key)
+ self._write(': ')
+ self.visit(value)
+ self._write(', ')
+ self._write('}')
+
+ # ListComp(expr elt, comprehension* generators)
+ def visit_ListComp(self, node):
+ self._write('[')
+ self.visit(node.elt)
+ for generator in node.generators:
+ # comprehension = (expr target, expr iter, expr* ifs)
+ self._write(' for ')
+ self.visit(generator.target)
+ self._write(' in ')
+ self.visit(generator.iter)
+ for ifexpr in generator.ifs:
+ self._write(' if ')
+ self.visit(ifexpr)
+ self._write(']')
+
+ # GeneratorExp(expr elt, comprehension* generators)
+ def visit_GeneratorExp(self, node):
+ self._write('(')
+ self.visit(node.elt)
+ for generator in node.generators:
+ # comprehension = (expr target, expr iter, expr* ifs)
+ self._write(' for ')
+ self.visit(generator.target)
+ self._write(' in ')
+ self.visit(generator.iter)
+ for ifexpr in generator.ifs:
+ self._write(' if ')
+ self.visit(ifexpr)
+ self._write(')')
+
+ # Yield(expr? value)
+ def visit_Yield(self, node):
+ self._write('yield')
+ if getattr(node, 'value', None):
+ self._write(' ')
+ self.visit(node.value)
+
+ comparision_operators = {
+ _ast.Eq: '==',
+ _ast.NotEq: '!=',
+ _ast.Lt: '<',
+ _ast.LtE: '<=',
+ _ast.Gt: '>',
+ _ast.GtE: '>=',
+ _ast.Is: 'is',
+ _ast.IsNot: 'is not',
+ _ast.In: 'in',
+ _ast.NotIn: 'not in',
+ }
+
+ # Compare(expr left, cmpop* ops, expr* comparators)
+ @with_parens
+ def visit_Compare(self, node):
+ self.visit(node.left)
+ for op, comparator in zip(node.ops, node.comparators):
+ self._write(' ' + self.comparision_operators[op.__class__] + ' ')
+ self.visit(comparator)
+
+ # Call(expr func, expr* args, keyword* keywords,
+ # expr? starargs, expr? kwargs)
+ def visit_Call(self, node):
+ self.visit(node.func)
+ self._write('(')
+ first = True
+ for arg in node.args:
+ if not first:
+ self._write(', ')
+ first = False
+ self.visit(arg)
+
+ for keyword in node.keywords:
+ if not first:
+ self._write(', ')
+ first = False
+ # keyword = (identifier arg, expr value)
+ self._write(keyword.arg)
+ self._write('=')
+ self.visit(keyword.value)
+ if getattr(node, 'starargs', None):
+ if not first:
+ self._write(', ')
+ first = False
+ self._write('*')
+ self.visit(node.starargs)
+
+ if getattr(node, 'kwargs', None):
+ if not first:
+ self._write(', ')
+ first = False
+ self._write('**')
+ self.visit(node.kwargs)
+ self._write(')')
+
+ # Repr(expr value)
+ def visit_Repr(self, node):
+ self._write('`')
+ self.visit(node.value)
+ self._write('`')
+
+ # Num(object n)
+ def visit_Num(self, node):
+ self._write(repr(node.n))
+
+ # Str(string s)
+ def visit_Str(self, node):
+ self._write(repr(node.s))
+
+ # Attribute(expr value, identifier attr, expr_context ctx)
+ def visit_Attribute(self, node):
+ self.visit(node.value)
+ self._write('.')
+ self._write(node.attr)
+
+ # Subscript(expr value, slice slice, expr_context ctx)
+ def visit_Subscript(self, node):
+ self.visit(node.value)
+ self._write('[')
+ def _process_slice(node):
+ if isinstance(node, _ast.Ellipsis):
+ self._write('...')
+ elif isinstance(node, _ast.Slice):
+ if getattr(node, 'lower', 'None'):
+ self.visit(node.lower)
+ self._write(':')
+ if getattr(node, 'upper', None):
+ self.visit(node.upper)
+ if getattr(node, 'step', None):
+ self._write(':')
+ self.visit(node.step)
+ elif isinstance(node, _ast.Index):
+ self.visit(node.value)
+ elif isinstance(node, _ast.ExtSlice):
+ self.visit(node.dims[0])
+ for dim in node.dims[1:]:
+ self._write(', ')
+ self.visit(dim)
+ else:
+ raise NotImplemented('Slice type not implemented')
+ _process_slice(node.slice)
+ self._write(']')
+
+ # Name(identifier id, expr_context ctx)
+ def visit_Name(self, node):
+ self._write(node.id)
+
+ # List(expr* elts, expr_context ctx)
+ def visit_List(self, node):
+ self._write('[')
+ for elt in node.elts:
+ self.visit(elt)
+ self._write(', ')
+ self._write(']')
+
+ # Tuple(expr *elts, expr_context ctx)
+ def visit_Tuple(self, node):
+ self._write('(')
+ for elt in node.elts:
+ self.visit(elt)
+ self._write(', ')
+ self._write(')')
+
+
+class ASTTransformer(object):
+ """General purpose base class for AST transformations.
+
+ Every visitor method can be overridden to return an AST node that has been
+ altered or replaced in some way.
+ """
+
+ def visit(self, node):
+ if node is None:
+ return None
+ if type(node) is tuple:
+ return tuple([self.visit(n) for n in node])
+ visitor = getattr(self, 'visit_%s' % node.__class__.__name__, None)
+ if visitor is None:
+ return node
+ return visitor(node)
+
+ def _clone(self, node):
+ clone = node.__class__()
+ for name in getattr(clone, '_attributes', ()):
+ try:
+ setattr(clone, 'name', getattr(node, name))
+ except AttributeError:
+ pass
+ for name in clone._fields:
+ try:
+ value = getattr(node, name)
+ except AttributeError:
+ pass
+ else:
+ if value is None:
+ pass
+ elif isinstance(value, list):
+ value = [self.visit(x) for x in value]
+ elif isinstance(value, tuple):
+ value = tuple(self.visit(x) for x in value)
+ else:
+ value = self.visit(value)
+ setattr(clone, name, value)
+ return clone
+
+ visit_Module = _clone
+ visit_Interactive = _clone
+ visit_Expression = _clone
+ visit_Suite = _clone
+
+ visit_FunctionDef = _clone
+ visit_ClassDef = _clone
+ visit_Return = _clone
+ visit_Delete = _clone
+ visit_Assign = _clone
+ visit_AugAssign = _clone
+ visit_Print = _clone
+ visit_For = _clone
+ visit_While = _clone
+ visit_If = _clone
+ visit_With = _clone
+ visit_Raise = _clone
+ visit_TryExcept = _clone
+ visit_TryFinally = _clone
+ visit_Assert = _clone
+ visit_ExceptHandler = _clone
+
+ visit_Import = _clone
+ visit_ImportFrom = _clone
+ visit_Exec = _clone
+ visit_Global = _clone
+ visit_Expr = _clone
+ # Pass, Break, Continue don't need to be copied
+
+ visit_BoolOp = _clone
+ visit_BinOp = _clone
+ visit_UnaryOp = _clone
+ visit_Lambda = _clone
+ visit_IfExp = _clone
+ visit_Dict = _clone
+ visit_ListComp = _clone
+ visit_GeneratorExp = _clone
+ visit_Yield = _clone
+ visit_Compare = _clone
+ visit_Call = _clone
+ visit_Repr = _clone
+ # Num, Str don't need to be copied
+
+ visit_Attribute = _clone
+ visit_Subscript = _clone
+ visit_Name = _clone
+ visit_List = _clone
+ visit_Tuple = _clone
+
+ visit_comprehension = _clone
+ visit_excepthandler = _clone
+ visit_arguments = _clone
+ visit_keyword = _clone
+ visit_alias = _clone
+
+ visit_Slice = _clone
+ visit_ExtSlice = _clone
+ visit_Index = _clone
+
+ del _clone
diff --git a/genshi/template/base.py b/genshi/template/base.py
new file mode 100644
index 0000000..202faae
--- /dev/null
+++ b/genshi/template/base.py
@@ -0,0 +1,634 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006-2010 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""Basic templating functionality."""
+
+from collections import deque
+import os
+from StringIO import StringIO
+import sys
+
+from genshi.core import Attrs, Stream, StreamEventKind, START, TEXT, _ensure
+from genshi.input import ParseError
+
+__all__ = ['Context', 'DirectiveFactory', 'Template', 'TemplateError',
+ 'TemplateRuntimeError', 'TemplateSyntaxError', 'BadDirectiveError']
+__docformat__ = 'restructuredtext en'
+
+
+class TemplateError(Exception):
+ """Base exception class for errors related to template processing."""
+
+ def __init__(self, message, filename=None, lineno=-1, offset=-1):
+ """Create the exception.
+
+ :param message: the error message
+ :param filename: the filename of the template
+ :param lineno: the number of line in the template at which the error
+ occurred
+ :param offset: the column number at which the error occurred
+ """
+ if filename is None:
+ filename = '<string>'
+ self.msg = message #: the error message string
+ if filename != '<string>' or lineno >= 0:
+ message = '%s (%s, line %d)' % (self.msg, filename, lineno)
+ Exception.__init__(self, message)
+ self.filename = filename #: the name of the template file
+ self.lineno = lineno #: the number of the line containing the error
+ self.offset = offset #: the offset on the line
+
+
+class TemplateSyntaxError(TemplateError):
+ """Exception raised when an expression in a template causes a Python syntax
+ error, or the template is not well-formed.
+ """
+
+ def __init__(self, message, filename=None, lineno=-1, offset=-1):
+ """Create the exception
+
+ :param message: the error message
+ :param filename: the filename of the template
+ :param lineno: the number of line in the template at which the error
+ occurred
+ :param offset: the column number at which the error occurred
+ """
+ if isinstance(message, SyntaxError) and message.lineno is not None:
+ message = str(message).replace(' (line %d)' % message.lineno, '')
+ TemplateError.__init__(self, message, filename, lineno)
+
+
+class BadDirectiveError(TemplateSyntaxError):
+ """Exception raised when an unknown directive is encountered when parsing
+ a template.
+
+ An unknown directive is any attribute using the namespace for directives,
+ with a local name that doesn't match any registered directive.
+ """
+
+ def __init__(self, name, filename=None, lineno=-1):
+ """Create the exception
+
+ :param name: the name of the directive
+ :param filename: the filename of the template
+ :param lineno: the number of line in the template at which the error
+ occurred
+ """
+ TemplateSyntaxError.__init__(self, 'bad directive "%s"' % name,
+ filename, lineno)
+
+
+class TemplateRuntimeError(TemplateError):
+ """Exception raised when an the evaluation of a Python expression in a
+ template causes an error.
+ """
+
+
+class Context(object):
+ """Container for template input data.
+
+ A context provides a stack of scopes (represented by dictionaries).
+
+ Template directives such as loops can push a new scope on the stack with
+ data that should only be available inside the loop. When the loop
+ terminates, that scope can get popped off the stack again.
+
+ >>> ctxt = Context(one='foo', other=1)
+ >>> ctxt.get('one')
+ 'foo'
+ >>> ctxt.get('other')
+ 1
+ >>> ctxt.push(dict(one='frost'))
+ >>> ctxt.get('one')
+ 'frost'
+ >>> ctxt.get('other')
+ 1
+ >>> ctxt.pop()
+ {'one': 'frost'}
+ >>> ctxt.get('one')
+ 'foo'
+ """
+
+ def __init__(self, **data):
+ """Initialize the template context with the given keyword arguments as
+ data.
+ """
+ self.frames = deque([data])
+ self.pop = self.frames.popleft
+ self.push = self.frames.appendleft
+ self._match_templates = []
+ self._choice_stack = []
+
+ # Helper functions for use in expressions
+ def defined(name):
+ """Return whether a variable with the specified name exists in the
+ expression scope."""
+ return name in self
+ def value_of(name, default=None):
+ """If a variable of the specified name is defined, return its value.
+ Otherwise, return the provided default value, or ``None``."""
+ return self.get(name, default)
+ data.setdefault('defined', defined)
+ data.setdefault('value_of', value_of)
+
+ def __repr__(self):
+ return repr(list(self.frames))
+
+ def __contains__(self, key):
+ """Return whether a variable exists in any of the scopes.
+
+ :param key: the name of the variable
+ """
+ return self._find(key)[1] is not None
+ has_key = __contains__
+
+ def __delitem__(self, key):
+ """Remove a variable from all scopes.
+
+ :param key: the name of the variable
+ """
+ for frame in self.frames:
+ if key in frame:
+ del frame[key]
+
+ def __getitem__(self, key):
+ """Get a variables's value, starting at the current scope and going
+ upward.
+
+ :param key: the name of the variable
+ :return: the variable value
+ :raises KeyError: if the requested variable wasn't found in any scope
+ """
+ value, frame = self._find(key)
+ if frame is None:
+ raise KeyError(key)
+ return value
+
+ def __len__(self):
+ """Return the number of distinctly named variables in the context.
+
+ :return: the number of variables in the context
+ """
+ return len(self.items())
+
+ def __setitem__(self, key, value):
+ """Set a variable in the current scope.
+
+ :param key: the name of the variable
+ :param value: the variable value
+ """
+ self.frames[0][key] = value
+
+ def _find(self, key, default=None):
+ """Retrieve a given variable's value and the frame it was found in.
+
+ Intended primarily for internal use by directives.
+
+ :param key: the name of the variable
+ :param default: the default value to return when the variable is not
+ found
+ """
+ for frame in self.frames:
+ if key in frame:
+ return frame[key], frame
+ return default, None
+
+ def get(self, key, default=None):
+ """Get a variable's value, starting at the current scope and going
+ upward.
+
+ :param key: the name of the variable
+ :param default: the default value to return when the variable is not
+ found
+ """
+ for frame in self.frames:
+ if key in frame:
+ return frame[key]
+ return default
+
+ def keys(self):
+ """Return the name of all variables in the context.
+
+ :return: a list of variable names
+ """
+ keys = []
+ for frame in self.frames:
+ keys += [key for key in frame if key not in keys]
+ return keys
+
+ def items(self):
+ """Return a list of ``(name, value)`` tuples for all variables in the
+ context.
+
+ :return: a list of variables
+ """
+ return [(key, self.get(key)) for key in self.keys()]
+
+ def update(self, mapping):
+ """Update the context from the mapping provided."""
+ self.frames[0].update(mapping)
+
+ def push(self, data):
+ """Push a new scope on the stack.
+
+ :param data: the data dictionary to push on the context stack.
+ """
+
+ def pop(self):
+ """Pop the top-most scope from the stack."""
+
+
+def _apply_directives(stream, directives, ctxt, vars):
+ """Apply the given directives to the stream.
+
+ :param stream: the stream the directives should be applied to
+ :param directives: the list of directives to apply
+ :param ctxt: the `Context`
+ :param vars: additional variables that should be available when Python
+ code is executed
+ :return: the stream with the given directives applied
+ """
+ if directives:
+ stream = directives[0](iter(stream), directives[1:], ctxt, **vars)
+ return stream
+
+
+def _eval_expr(expr, ctxt, vars=None):
+ """Evaluate the given `Expression` object.
+
+ :param expr: the expression to evaluate
+ :param ctxt: the `Context`
+ :param vars: additional variables that should be available to the
+ expression
+ :return: the result of the evaluation
+ """
+ if vars:
+ ctxt.push(vars)
+ retval = expr.evaluate(ctxt)
+ if vars:
+ ctxt.pop()
+ return retval
+
+
+def _exec_suite(suite, ctxt, vars=None):
+ """Execute the given `Suite` object.
+
+ :param suite: the code suite to execute
+ :param ctxt: the `Context`
+ :param vars: additional variables that should be available to the
+ code
+ """
+ if vars:
+ ctxt.push(vars)
+ ctxt.push({})
+ suite.execute(ctxt)
+ if vars:
+ top = ctxt.pop()
+ ctxt.pop()
+ ctxt.frames[0].update(top)
+
+
+class DirectiveFactoryMeta(type):
+ """Meta class for directive factories."""
+
+ def __new__(cls, name, bases, d):
+ if 'directives' in d:
+ d['_dir_by_name'] = dict(d['directives'])
+ d['_dir_order'] = [directive[1] for directive in d['directives']]
+
+ return type.__new__(cls, name, bases, d)
+
+
+class DirectiveFactory(object):
+ """Base for classes that provide a set of template directives.
+
+ :since: version 0.6
+ """
+ __metaclass__ = DirectiveFactoryMeta
+
+ directives = []
+ """A list of ``(name, cls)`` tuples that define the set of directives
+ provided by this factory.
+ """
+
+ def get_directive(self, name):
+ """Return the directive class for the given name.
+
+ :param name: the directive name as used in the template
+ :return: the directive class
+ :see: `Directive`
+ """
+ return self._dir_by_name.get(name)
+
+ def get_directive_index(self, dir_cls):
+ """Return a key for the given directive class that should be used to
+ sort it among other directives on the same `SUB` event.
+
+ The default implementation simply returns the index of the directive in
+ the `directives` list.
+
+ :param dir_cls: the directive class
+ :return: the sort key
+ """
+ if dir_cls in self._dir_order:
+ return self._dir_order.index(dir_cls)
+ return len(self._dir_order)
+
+
+class Template(DirectiveFactory):
+ """Abstract template base class.
+
+ This class implements most of the template processing model, but does not
+ specify the syntax of templates.
+ """
+
+ EXEC = StreamEventKind('EXEC')
+ """Stream event kind representing a Python code suite to execute."""
+
+ EXPR = StreamEventKind('EXPR')
+ """Stream event kind representing a Python expression."""
+
+ INCLUDE = StreamEventKind('INCLUDE')
+ """Stream event kind representing the inclusion of another template."""
+
+ SUB = StreamEventKind('SUB')
+ """Stream event kind representing a nested stream to which one or more
+ directives should be applied.
+ """
+
+ serializer = None
+ _number_conv = unicode # function used to convert numbers to event data
+
+ def __init__(self, source, filepath=None, filename=None, loader=None,
+ encoding=None, lookup='strict', allow_exec=True):
+ """Initialize a template from either a string, a file-like object, or
+ an already parsed markup stream.
+
+ :param source: a string, file-like object, or markup stream to read the
+ template from
+ :param filepath: the absolute path to the template file
+ :param filename: the path to the template file relative to the search
+ path
+ :param loader: the `TemplateLoader` to use for loading included
+ templates
+ :param encoding: the encoding of the `source`
+ :param lookup: the variable lookup mechanism; either "strict" (the
+ default), "lenient", or a custom lookup class
+ :param allow_exec: whether Python code blocks in templates should be
+ allowed
+
+ :note: Changed in 0.5: Added the `allow_exec` argument
+ """
+ self.filepath = filepath or filename
+ self.filename = filename
+ self.loader = loader
+ self.lookup = lookup
+ self.allow_exec = allow_exec
+ self._init_filters()
+ self._init_loader()
+ self._prepared = False
+
+ if isinstance(source, basestring):
+ source = StringIO(source)
+ else:
+ source = source
+ try:
+ self._stream = self._parse(source, encoding)
+ except ParseError, e:
+ raise TemplateSyntaxError(e.msg, self.filepath, e.lineno, e.offset)
+
+ def __getstate__(self):
+ state = self.__dict__.copy()
+ state['filters'] = []
+ return state
+
+ def __setstate__(self, state):
+ self.__dict__ = state
+ self._init_filters()
+
+ def __repr__(self):
+ return '<%s "%s">' % (type(self).__name__, self.filename)
+
+ def _init_filters(self):
+ self.filters = [self._flatten, self._include]
+
+ def _init_loader(self):
+ if self.loader is None:
+ from genshi.template.loader import TemplateLoader
+ if self.filename:
+ if self.filepath != self.filename:
+ basedir = os.path.normpath(self.filepath)[:-len(
+ os.path.normpath(self.filename))
+ ]
+ else:
+ basedir = os.path.dirname(self.filename)
+ else:
+ basedir = '.'
+ self.loader = TemplateLoader([os.path.abspath(basedir)])
+
+ @property
+ def stream(self):
+ if not self._prepared:
+ self._stream = list(self._prepare(self._stream))
+ self._prepared = True
+ return self._stream
+
+ def _parse(self, source, encoding):
+ """Parse the template.
+
+ The parsing stage parses the template and constructs a list of
+ directives that will be executed in the render stage. The input is
+ split up into literal output (text that does not depend on the context
+ data) and directives or expressions.
+
+ :param source: a file-like object containing the XML source of the
+ template, or an XML event stream
+ :param encoding: the encoding of the `source`
+ """
+ raise NotImplementedError
+
+ def _prepare(self, stream):
+ """Call the `attach` method of every directive found in the template.
+
+ :param stream: the event stream of the template
+ """
+ from genshi.template.loader import TemplateNotFound
+
+ for kind, data, pos in stream:
+ if kind is SUB:
+ directives = []
+ substream = data[1]
+ for _, cls, value, namespaces, pos in sorted(data[0]):
+ directive, substream = cls.attach(self, substream, value,
+ namespaces, pos)
+ if directive:
+ directives.append(directive)
+ substream = self._prepare(substream)
+ if directives:
+ yield kind, (directives, list(substream)), pos
+ else:
+ for event in substream:
+ yield event
+ else:
+ if kind is INCLUDE:
+ href, cls, fallback = data
+ if isinstance(href, basestring) and \
+ not getattr(self.loader, 'auto_reload', True):
+ # If the path to the included template is static, and
+ # auto-reloading is disabled on the template loader,
+ # the template is inlined into the stream
+ try:
+ tmpl = self.loader.load(href, relative_to=pos[0],
+ cls=cls or self.__class__)
+ for event in tmpl.stream:
+ yield event
+ except TemplateNotFound:
+ if fallback is None:
+ raise
+ for event in self._prepare(fallback):
+ yield event
+ continue
+ elif fallback:
+ # Otherwise the include is performed at run time
+ data = href, cls, list(self._prepare(fallback))
+
+ yield kind, data, pos
+
+ def generate(self, *args, **kwargs):
+ """Apply the template to the given context data.
+
+ Any keyword arguments are made available to the template as context
+ data.
+
+ Only one positional argument is accepted: if it is provided, it must be
+ an instance of the `Context` class, and keyword arguments are ignored.
+ This calling style is used for internal processing.
+
+ :return: a markup event stream representing the result of applying
+ the template to the context data.
+ """
+ vars = {}
+ if args:
+ assert len(args) == 1
+ ctxt = args[0]
+ if ctxt is None:
+ ctxt = Context(**kwargs)
+ else:
+ vars = kwargs
+ assert isinstance(ctxt, Context)
+ else:
+ ctxt = Context(**kwargs)
+
+ stream = self.stream
+ for filter_ in self.filters:
+ stream = filter_(iter(stream), ctxt, **vars)
+ return Stream(stream, self.serializer)
+
+ def _flatten(self, stream, ctxt, **vars):
+ number_conv = self._number_conv
+ stack = []
+ push = stack.append
+ pop = stack.pop
+ stream = iter(stream)
+
+ while 1:
+ for kind, data, pos in stream:
+
+ if kind is START and data[1]:
+ # Attributes may still contain expressions in start tags at
+ # this point, so do some evaluation
+ tag, attrs = data
+ new_attrs = []
+ for name, value in attrs:
+ if type(value) is list: # this is an interpolated string
+ values = [event[1]
+ for event in self._flatten(value, ctxt, **vars)
+ if event[0] is TEXT and event[1] is not None
+ ]
+ if not values:
+ continue
+ value = ''.join(values)
+ new_attrs.append((name, value))
+ yield kind, (tag, Attrs(new_attrs)), pos
+
+ elif kind is EXPR:
+ result = _eval_expr(data, ctxt, vars)
+ if result is not None:
+ # First check for a string, otherwise the iterable test
+ # below succeeds, and the string will be chopped up into
+ # individual characters
+ if isinstance(result, basestring):
+ yield TEXT, result, pos
+ elif isinstance(result, (int, float, long)):
+ yield TEXT, number_conv(result), pos
+ elif hasattr(result, '__iter__'):
+ push(stream)
+ stream = _ensure(result)
+ break
+ else:
+ yield TEXT, unicode(result), pos
+
+ elif kind is SUB:
+ # This event is a list of directives and a list of nested
+ # events to which those directives should be applied
+ push(stream)
+ stream = _apply_directives(data[1], data[0], ctxt, vars)
+ break
+
+ elif kind is EXEC:
+ _exec_suite(data, ctxt, vars)
+
+ else:
+ yield kind, data, pos
+
+ else:
+ if not stack:
+ break
+ stream = pop()
+
+ def _include(self, stream, ctxt, **vars):
+ """Internal stream filter that performs inclusion of external
+ template files.
+ """
+ from genshi.template.loader import TemplateNotFound
+
+ for event in stream:
+ if event[0] is INCLUDE:
+ href, cls, fallback = event[1]
+ if not isinstance(href, basestring):
+ parts = []
+ for subkind, subdata, subpos in self._flatten(href, ctxt,
+ **vars):
+ if subkind is TEXT:
+ parts.append(subdata)
+ href = ''.join([x for x in parts if x is not None])
+ try:
+ tmpl = self.loader.load(href, relative_to=event[2][0],
+ cls=cls or self.__class__)
+ for event in tmpl.generate(ctxt, **vars):
+ yield event
+ except TemplateNotFound:
+ if fallback is None:
+ raise
+ for filter_ in self.filters:
+ fallback = filter_(iter(fallback), ctxt, **vars)
+ for event in fallback:
+ yield event
+ else:
+ yield event
+
+
+EXEC = Template.EXEC
+EXPR = Template.EXPR
+INCLUDE = Template.INCLUDE
+SUB = Template.SUB
diff --git a/genshi/template/directives.py b/genshi/template/directives.py
new file mode 100644
index 0000000..e2c9424
--- /dev/null
+++ b/genshi/template/directives.py
@@ -0,0 +1,725 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006-2009 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""Implementation of the various template directives."""
+
+from genshi.core import QName, Stream
+from genshi.path import Path
+from genshi.template.base import TemplateRuntimeError, TemplateSyntaxError, \
+ EXPR, _apply_directives, _eval_expr
+from genshi.template.eval import Expression, ExpressionASTTransformer, \
+ _ast, _parse
+
+__all__ = ['AttrsDirective', 'ChooseDirective', 'ContentDirective',
+ 'DefDirective', 'ForDirective', 'IfDirective', 'MatchDirective',
+ 'OtherwiseDirective', 'ReplaceDirective', 'StripDirective',
+ 'WhenDirective', 'WithDirective']
+__docformat__ = 'restructuredtext en'
+
+
+class DirectiveMeta(type):
+ """Meta class for template directives."""
+
+ def __new__(cls, name, bases, d):
+ d['tagname'] = name.lower().replace('directive', '')
+ return type.__new__(cls, name, bases, d)
+
+
+class Directive(object):
+ """Abstract base class for template directives.
+
+ A directive is basically a callable that takes three positional arguments:
+ ``ctxt`` is the template data context, ``stream`` is an iterable over the
+ events that the directive applies to, and ``directives`` is is a list of
+ other directives on the same stream that need to be applied.
+
+ Directives can be "anonymous" or "registered". Registered directives can be
+ applied by the template author using an XML attribute with the
+ corresponding name in the template. Such directives should be subclasses of
+ this base class that can be instantiated with the value of the directive
+ attribute as parameter.
+
+ Anonymous directives are simply functions conforming to the protocol
+ described above, and can only be applied programmatically (for example by
+ template filters).
+ """
+ __metaclass__ = DirectiveMeta
+ __slots__ = ['expr']
+
+ def __init__(self, value, template=None, namespaces=None, lineno=-1,
+ offset=-1):
+ self.expr = self._parse_expr(value, template, lineno, offset)
+
+ @classmethod
+ def attach(cls, template, stream, value, namespaces, pos):
+ """Called after the template stream has been completely parsed.
+
+ :param template: the `Template` object
+ :param stream: the event stream associated with the directive
+ :param value: the argument value for the directive; if the directive was
+ specified as an element, this will be an `Attrs` instance
+ with all specified attributes, otherwise it will be a
+ `unicode` object with just the attribute value
+ :param namespaces: a mapping of namespace URIs to prefixes
+ :param pos: a ``(filename, lineno, offset)`` tuple describing the
+ location where the directive was found in the source
+
+ This class method should return a ``(directive, stream)`` tuple. If
+ ``directive`` is not ``None``, it should be an instance of the `Directive`
+ class, and gets added to the list of directives applied to the substream
+ at runtime. `stream` is an event stream that replaces the original
+ stream associated with the directive.
+ """
+ return cls(value, template, namespaces, *pos[1:]), stream
+
+ def __call__(self, stream, directives, ctxt, **vars):
+ """Apply the directive to the given stream.
+
+ :param stream: the event stream
+ :param directives: a list of the remaining directives that should
+ process the stream
+ :param ctxt: the context data
+ :param vars: additional variables that should be made available when
+ Python code is executed
+ """
+ raise NotImplementedError
+
+ def __repr__(self):
+ expr = ''
+ if getattr(self, 'expr', None) is not None:
+ expr = ' "%s"' % self.expr.source
+ return '<%s%s>' % (type(self).__name__, expr)
+
+ @classmethod
+ def _parse_expr(cls, expr, template, lineno=-1, offset=-1):
+ """Parses the given expression, raising a useful error message when a
+ syntax error is encountered.
+ """
+ try:
+ return expr and Expression(expr, template.filepath, lineno,
+ lookup=template.lookup) or None
+ except SyntaxError, err:
+ err.msg += ' in expression "%s" of "%s" directive' % (expr,
+ cls.tagname)
+ raise TemplateSyntaxError(err, template.filepath, lineno,
+ offset + (err.offset or 0))
+
+
+def _assignment(ast):
+ """Takes the AST representation of an assignment, and returns a
+ function that applies the assignment of a given value to a dictionary.
+ """
+ def _names(node):
+ if isinstance(node, _ast.Tuple):
+ return tuple([_names(child) for child in node.elts])
+ elif isinstance(node, _ast.Name):
+ return node.id
+ def _assign(data, value, names=_names(ast)):
+ if type(names) is tuple:
+ for idx in range(len(names)):
+ _assign(data, value[idx], names[idx])
+ else:
+ data[names] = value
+ return _assign
+
+
+class AttrsDirective(Directive):
+ """Implementation of the ``py:attrs`` template directive.
+
+ The value of the ``py:attrs`` attribute should be a dictionary or a sequence
+ of ``(name, value)`` tuples. The items in that dictionary or sequence are
+ added as attributes to the element:
+
+ >>> from genshi.template import MarkupTemplate
+ >>> tmpl = MarkupTemplate('''<ul xmlns:py="http://genshi.edgewall.org/">
+ ... <li py:attrs="foo">Bar</li>
+ ... </ul>''')
+ >>> print(tmpl.generate(foo={'class': 'collapse'}))
+ <ul>
+ <li class="collapse">Bar</li>
+ </ul>
+ >>> print(tmpl.generate(foo=[('class', 'collapse')]))
+ <ul>
+ <li class="collapse">Bar</li>
+ </ul>
+
+ If the value evaluates to ``None`` (or any other non-truth value), no
+ attributes are added:
+
+ >>> print(tmpl.generate(foo=None))
+ <ul>
+ <li>Bar</li>
+ </ul>
+ """
+ __slots__ = []
+
+ def __call__(self, stream, directives, ctxt, **vars):
+ def _generate():
+ kind, (tag, attrib), pos = stream.next()
+ attrs = _eval_expr(self.expr, ctxt, vars)
+ if attrs:
+ if isinstance(attrs, Stream):
+ try:
+ attrs = iter(attrs).next()
+ except StopIteration:
+ attrs = []
+ elif not isinstance(attrs, list): # assume it's a dict
+ attrs = attrs.items()
+ attrib -= [name for name, val in attrs if val is None]
+ attrib |= [(QName(name), unicode(val).strip()) for name, val
+ in attrs if val is not None]
+ yield kind, (tag, attrib), pos
+ for event in stream:
+ yield event
+
+ return _apply_directives(_generate(), directives, ctxt, vars)
+
+
+class ContentDirective(Directive):
+ """Implementation of the ``py:content`` template directive.
+
+ This directive replaces the content of the element with the result of
+ evaluating the value of the ``py:content`` attribute:
+
+ >>> from genshi.template import MarkupTemplate
+ >>> tmpl = MarkupTemplate('''<ul xmlns:py="http://genshi.edgewall.org/">
+ ... <li py:content="bar">Hello</li>
+ ... </ul>''')
+ >>> print(tmpl.generate(bar='Bye'))
+ <ul>
+ <li>Bye</li>
+ </ul>
+ """
+ __slots__ = []
+
+ @classmethod
+ def attach(cls, template, stream, value, namespaces, pos):
+ if type(value) is dict:
+ raise TemplateSyntaxError('The content directive can not be used '
+ 'as an element', template.filepath,
+ *pos[1:])
+ expr = cls._parse_expr(value, template, *pos[1:])
+ return None, [stream[0], (EXPR, expr, pos), stream[-1]]
+
+
+class DefDirective(Directive):
+ """Implementation of the ``py:def`` template directive.
+
+ This directive can be used to create "Named Template Functions", which
+ are template snippets that are not actually output during normal
+ processing, but rather can be expanded from expressions in other places
+ in the template.
+
+ A named template function can be used just like a normal Python function
+ from template expressions:
+
+ >>> from genshi.template import MarkupTemplate
+ >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/">
+ ... <p py:def="echo(greeting, name='world')" class="message">
+ ... ${greeting}, ${name}!
+ ... </p>
+ ... ${echo('Hi', name='you')}
+ ... </div>''')
+ >>> print(tmpl.generate(bar='Bye'))
+ <div>
+ <p class="message">
+ Hi, you!
+ </p>
+ </div>
+
+ If a function does not require parameters, the parenthesis can be omitted
+ in the definition:
+
+ >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/">
+ ... <p py:def="helloworld" class="message">
+ ... Hello, world!
+ ... </p>
+ ... ${helloworld()}
+ ... </div>''')
+ >>> print(tmpl.generate(bar='Bye'))
+ <div>
+ <p class="message">
+ Hello, world!
+ </p>
+ </div>
+ """
+ __slots__ = ['name', 'args', 'star_args', 'dstar_args', 'defaults']
+
+ def __init__(self, args, template, namespaces=None, lineno=-1, offset=-1):
+ Directive.__init__(self, None, template, namespaces, lineno, offset)
+ ast = _parse(args).body
+ self.args = []
+ self.star_args = None
+ self.dstar_args = None
+ self.defaults = {}
+ if isinstance(ast, _ast.Call):
+ self.name = ast.func.id
+ for arg in ast.args:
+ # only names
+ self.args.append(arg.id)
+ for kwd in ast.keywords:
+ self.args.append(kwd.arg)
+ exp = Expression(kwd.value, template.filepath,
+ lineno, lookup=template.lookup)
+ self.defaults[kwd.arg] = exp
+ if getattr(ast, 'starargs', None):
+ self.star_args = ast.starargs.id
+ if getattr(ast, 'kwargs', None):
+ self.dstar_args = ast.kwargs.id
+ else:
+ self.name = ast.id
+
+ @classmethod
+ def attach(cls, template, stream, value, namespaces, pos):
+ if type(value) is dict:
+ value = value.get('function')
+ return super(DefDirective, cls).attach(template, stream, value,
+ namespaces, pos)
+
+ def __call__(self, stream, directives, ctxt, **vars):
+ stream = list(stream)
+
+ def function(*args, **kwargs):
+ scope = {}
+ args = list(args) # make mutable
+ for name in self.args:
+ if args:
+ scope[name] = args.pop(0)
+ else:
+ if name in kwargs:
+ val = kwargs.pop(name)
+ else:
+ val = _eval_expr(self.defaults.get(name), ctxt, vars)
+ scope[name] = val
+ if not self.star_args is None:
+ scope[self.star_args] = args
+ if not self.dstar_args is None:
+ scope[self.dstar_args] = kwargs
+ ctxt.push(scope)
+ for event in _apply_directives(stream, directives, ctxt, vars):
+ yield event
+ ctxt.pop()
+ function.__name__ = self.name
+
+ # Store the function reference in the bottom context frame so that it
+ # doesn't get popped off before processing the template has finished
+ # FIXME: this makes context data mutable as a side-effect
+ ctxt.frames[-1][self.name] = function
+
+ return []
+
+ def __repr__(self):
+ return '<%s "%s">' % (type(self).__name__, self.name)
+
+
+class ForDirective(Directive):
+ """Implementation of the ``py:for`` template directive for repeating an
+ element based on an iterable in the context data.
+
+ >>> from genshi.template import MarkupTemplate
+ >>> tmpl = MarkupTemplate('''<ul xmlns:py="http://genshi.edgewall.org/">
+ ... <li py:for="item in items">${item}</li>
+ ... </ul>''')
+ >>> print(tmpl.generate(items=[1, 2, 3]))
+ <ul>
+ <li>1</li><li>2</li><li>3</li>
+ </ul>
+ """
+ __slots__ = ['assign', 'filename']
+
+ def __init__(self, value, template, namespaces=None, lineno=-1, offset=-1):
+ if ' in ' not in value:
+ raise TemplateSyntaxError('"in" keyword missing in "for" directive',
+ template.filepath, lineno, offset)
+ assign, value = value.split(' in ', 1)
+ ast = _parse(assign, 'exec')
+ value = 'iter(%s)' % value.strip()
+ self.assign = _assignment(ast.body[0].value)
+ self.filename = template.filepath
+ Directive.__init__(self, value, template, namespaces, lineno, offset)
+
+ @classmethod
+ def attach(cls, template, stream, value, namespaces, pos):
+ if type(value) is dict:
+ value = value.get('each')
+ return super(ForDirective, cls).attach(template, stream, value,
+ namespaces, pos)
+
+ def __call__(self, stream, directives, ctxt, **vars):
+ iterable = _eval_expr(self.expr, ctxt, vars)
+ if iterable is None:
+ return
+
+ assign = self.assign
+ scope = {}
+ stream = list(stream)
+ for item in iterable:
+ assign(scope, item)
+ ctxt.push(scope)
+ for event in _apply_directives(stream, directives, ctxt, vars):
+ yield event
+ ctxt.pop()
+
+ def __repr__(self):
+ return '<%s>' % type(self).__name__
+
+
+class IfDirective(Directive):
+ """Implementation of the ``py:if`` template directive for conditionally
+ excluding elements from being output.
+
+ >>> from genshi.template import MarkupTemplate
+ >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/">
+ ... <b py:if="foo">${bar}</b>
+ ... </div>''')
+ >>> print(tmpl.generate(foo=True, bar='Hello'))
+ <div>
+ <b>Hello</b>
+ </div>
+ """
+ __slots__ = []
+
+ @classmethod
+ def attach(cls, template, stream, value, namespaces, pos):
+ if type(value) is dict:
+ value = value.get('test')
+ return super(IfDirective, cls).attach(template, stream, value,
+ namespaces, pos)
+
+ def __call__(self, stream, directives, ctxt, **vars):
+ value = _eval_expr(self.expr, ctxt, vars)
+ if value:
+ return _apply_directives(stream, directives, ctxt, vars)
+ return []
+
+
+class MatchDirective(Directive):
+ """Implementation of the ``py:match`` template directive.
+
+ >>> from genshi.template import MarkupTemplate
+ >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/">
+ ... <span py:match="greeting">
+ ... Hello ${select('@name')}
+ ... </span>
+ ... <greeting name="Dude" />
+ ... </div>''')
+ >>> print(tmpl.generate())
+ <div>
+ <span>
+ Hello Dude
+ </span>
+ </div>
+ """
+ __slots__ = ['path', 'namespaces', 'hints']
+
+ def __init__(self, value, template, hints=None, namespaces=None,
+ lineno=-1, offset=-1):
+ Directive.__init__(self, None, template, namespaces, lineno, offset)
+ self.path = Path(value, template.filepath, lineno)
+ self.namespaces = namespaces or {}
+ self.hints = hints or ()
+
+ @classmethod
+ def attach(cls, template, stream, value, namespaces, pos):
+ hints = []
+ if type(value) is dict:
+ if value.get('buffer', '').lower() == 'false':
+ hints.append('not_buffered')
+ if value.get('once', '').lower() == 'true':
+ hints.append('match_once')
+ if value.get('recursive', '').lower() == 'false':
+ hints.append('not_recursive')
+ value = value.get('path')
+ return cls(value, template, frozenset(hints), namespaces, *pos[1:]), \
+ stream
+
+ def __call__(self, stream, directives, ctxt, **vars):
+ ctxt._match_templates.append((self.path.test(ignore_context=True),
+ self.path, list(stream), self.hints,
+ self.namespaces, directives))
+ return []
+
+ def __repr__(self):
+ return '<%s "%s">' % (type(self).__name__, self.path.source)
+
+
+class ReplaceDirective(Directive):
+ """Implementation of the ``py:replace`` template directive.
+
+ This directive replaces the element with the result of evaluating the
+ value of the ``py:replace`` attribute:
+
+ >>> from genshi.template import MarkupTemplate
+ >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/">
+ ... <span py:replace="bar">Hello</span>
+ ... </div>''')
+ >>> print(tmpl.generate(bar='Bye'))
+ <div>
+ Bye
+ </div>
+
+ This directive is equivalent to ``py:content`` combined with ``py:strip``,
+ providing a less verbose way to achieve the same effect:
+
+ >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/">
+ ... <span py:content="bar" py:strip="">Hello</span>
+ ... </div>''')
+ >>> print(tmpl.generate(bar='Bye'))
+ <div>
+ Bye
+ </div>
+ """
+ __slots__ = []
+
+ @classmethod
+ def attach(cls, template, stream, value, namespaces, pos):
+ if type(value) is dict:
+ value = value.get('value')
+ if not value:
+ raise TemplateSyntaxError('missing value for "replace" directive',
+ template.filepath, *pos[1:])
+ expr = cls._parse_expr(value, template, *pos[1:])
+ return None, [(EXPR, expr, pos)]
+
+
+class StripDirective(Directive):
+ """Implementation of the ``py:strip`` template directive.
+
+ When the value of the ``py:strip`` attribute evaluates to ``True``, the
+ element is stripped from the output
+
+ >>> from genshi.template import MarkupTemplate
+ >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/">
+ ... <div py:strip="True"><b>foo</b></div>
+ ... </div>''')
+ >>> print(tmpl.generate())
+ <div>
+ <b>foo</b>
+ </div>
+
+ Leaving the attribute value empty is equivalent to a truth value.
+
+ This directive is particulary interesting for named template functions or
+ match templates that do not generate a top-level element:
+
+ >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/">
+ ... <div py:def="echo(what)" py:strip="">
+ ... <b>${what}</b>
+ ... </div>
+ ... ${echo('foo')}
+ ... </div>''')
+ >>> print(tmpl.generate())
+ <div>
+ <b>foo</b>
+ </div>
+ """
+ __slots__ = []
+
+ def __call__(self, stream, directives, ctxt, **vars):
+ def _generate():
+ if not self.expr or _eval_expr(self.expr, ctxt, vars):
+ stream.next() # skip start tag
+ previous = stream.next()
+ for event in stream:
+ yield previous
+ previous = event
+ else:
+ for event in stream:
+ yield event
+ return _apply_directives(_generate(), directives, ctxt, vars)
+
+
+class ChooseDirective(Directive):
+ """Implementation of the ``py:choose`` directive for conditionally selecting
+ one of several body elements to display.
+
+ If the ``py:choose`` expression is empty the expressions of nested
+ ``py:when`` directives are tested for truth. The first true ``py:when``
+ body is output. If no ``py:when`` directive is matched then the fallback
+ directive ``py:otherwise`` will be used.
+
+ >>> from genshi.template import MarkupTemplate
+ >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/"
+ ... py:choose="">
+ ... <span py:when="0 == 1">0</span>
+ ... <span py:when="1 == 1">1</span>
+ ... <span py:otherwise="">2</span>
+ ... </div>''')
+ >>> print(tmpl.generate())
+ <div>
+ <span>1</span>
+ </div>
+
+ If the ``py:choose`` directive contains an expression, the nested
+ ``py:when`` directives are tested for equality to the ``py:choose``
+ expression:
+
+ >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/"
+ ... py:choose="2">
+ ... <span py:when="1">1</span>
+ ... <span py:when="2">2</span>
+ ... </div>''')
+ >>> print(tmpl.generate())
+ <div>
+ <span>2</span>
+ </div>
+
+ Behavior is undefined if a ``py:choose`` block contains content outside a
+ ``py:when`` or ``py:otherwise`` block. Behavior is also undefined if a
+ ``py:otherwise`` occurs before ``py:when`` blocks.
+ """
+ __slots__ = ['matched', 'value']
+
+ @classmethod
+ def attach(cls, template, stream, value, namespaces, pos):
+ if type(value) is dict:
+ value = value.get('test')
+ return super(ChooseDirective, cls).attach(template, stream, value,
+ namespaces, pos)
+
+ def __call__(self, stream, directives, ctxt, **vars):
+ info = [False, bool(self.expr), None]
+ if self.expr:
+ info[2] = _eval_expr(self.expr, ctxt, vars)
+ ctxt._choice_stack.append(info)
+ for event in _apply_directives(stream, directives, ctxt, vars):
+ yield event
+ ctxt._choice_stack.pop()
+
+
+class WhenDirective(Directive):
+ """Implementation of the ``py:when`` directive for nesting in a parent with
+ the ``py:choose`` directive.
+
+ See the documentation of the `ChooseDirective` for usage.
+ """
+ __slots__ = ['filename']
+
+ def __init__(self, value, template, namespaces=None, lineno=-1, offset=-1):
+ Directive.__init__(self, value, template, namespaces, lineno, offset)
+ self.filename = template.filepath
+
+ @classmethod
+ def attach(cls, template, stream, value, namespaces, pos):
+ if type(value) is dict:
+ value = value.get('test')
+ return super(WhenDirective, cls).attach(template, stream, value,
+ namespaces, pos)
+
+ def __call__(self, stream, directives, ctxt, **vars):
+ info = ctxt._choice_stack and ctxt._choice_stack[-1]
+ if not info:
+ raise TemplateRuntimeError('"when" directives can only be used '
+ 'inside a "choose" directive',
+ self.filename, *stream.next()[2][1:])
+ if info[0]:
+ return []
+ if not self.expr and not info[1]:
+ raise TemplateRuntimeError('either "choose" or "when" directive '
+ 'must have a test expression',
+ self.filename, *stream.next()[2][1:])
+ if info[1]:
+ value = info[2]
+ if self.expr:
+ matched = value == _eval_expr(self.expr, ctxt, vars)
+ else:
+ matched = bool(value)
+ else:
+ matched = bool(_eval_expr(self.expr, ctxt, vars))
+ info[0] = matched
+ if not matched:
+ return []
+
+ return _apply_directives(stream, directives, ctxt, vars)
+
+
+class OtherwiseDirective(Directive):
+ """Implementation of the ``py:otherwise`` directive for nesting in a parent
+ with the ``py:choose`` directive.
+
+ See the documentation of `ChooseDirective` for usage.
+ """
+ __slots__ = ['filename']
+
+ def __init__(self, value, template, namespaces=None, lineno=-1, offset=-1):
+ Directive.__init__(self, None, template, namespaces, lineno, offset)
+ self.filename = template.filepath
+
+ def __call__(self, stream, directives, ctxt, **vars):
+ info = ctxt._choice_stack and ctxt._choice_stack[-1]
+ if not info:
+ raise TemplateRuntimeError('an "otherwise" directive can only be '
+ 'used inside a "choose" directive',
+ self.filename, *stream.next()[2][1:])
+ if info[0]:
+ return []
+ info[0] = True
+
+ return _apply_directives(stream, directives, ctxt, vars)
+
+
+class WithDirective(Directive):
+ """Implementation of the ``py:with`` template directive, which allows
+ shorthand access to variables and expressions.
+
+ >>> from genshi.template import MarkupTemplate
+ >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/">
+ ... <span py:with="y=7; z=x+10">$x $y $z</span>
+ ... </div>''')
+ >>> print(tmpl.generate(x=42))
+ <div>
+ <span>42 7 52</span>
+ </div>
+ """
+ __slots__ = ['vars']
+
+ def __init__(self, value, template, namespaces=None, lineno=-1, offset=-1):
+ Directive.__init__(self, None, template, namespaces, lineno, offset)
+ self.vars = []
+ value = value.strip()
+ try:
+ ast = _parse(value, 'exec')
+ for node in ast.body:
+ if not isinstance(node, _ast.Assign):
+ raise TemplateSyntaxError('only assignment allowed in '
+ 'value of the "with" directive',
+ template.filepath, lineno, offset)
+ self.vars.append(([_assignment(n) for n in node.targets],
+ Expression(node.value, template.filepath,
+ lineno, lookup=template.lookup)))
+ except SyntaxError, err:
+ err.msg += ' in expression "%s" of "%s" directive' % (value,
+ self.tagname)
+ raise TemplateSyntaxError(err, template.filepath, lineno,
+ offset + (err.offset or 0))
+
+ @classmethod
+ def attach(cls, template, stream, value, namespaces, pos):
+ if type(value) is dict:
+ value = value.get('vars')
+ return super(WithDirective, cls).attach(template, stream, value,
+ namespaces, pos)
+
+ def __call__(self, stream, directives, ctxt, **vars):
+ frame = {}
+ ctxt.push(frame)
+ for targets, expr in self.vars:
+ value = _eval_expr(expr, ctxt, vars)
+ for assign in targets:
+ assign(frame, value)
+ for event in _apply_directives(stream, directives, ctxt, vars):
+ yield event
+ ctxt.pop()
+
+ def __repr__(self):
+ return '<%s>' % (type(self).__name__)
diff --git a/genshi/template/eval.py b/genshi/template/eval.py
new file mode 100644
index 0000000..8593aaa
--- /dev/null
+++ b/genshi/template/eval.py
@@ -0,0 +1,629 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006-2010 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""Support for "safe" evaluation of Python expressions."""
+
+import __builtin__
+
+from textwrap import dedent
+from types import CodeType
+
+from genshi.core import Markup
+from genshi.template.astutil import ASTTransformer, ASTCodeGenerator, \
+ _ast, parse
+from genshi.template.base import TemplateRuntimeError
+from genshi.util import flatten
+
+__all__ = ['Code', 'Expression', 'Suite', 'LenientLookup', 'StrictLookup',
+ 'Undefined', 'UndefinedError']
+__docformat__ = 'restructuredtext en'
+
+
+# Check for a Python 2.4 bug in the eval loop
+has_star_import_bug = False
+try:
+ class _FakeMapping(object):
+ __getitem__ = __setitem__ = lambda *a: None
+ exec 'from sys import *' in {}, _FakeMapping()
+except SystemError:
+ has_star_import_bug = True
+del _FakeMapping
+
+
+def _star_import_patch(mapping, modname):
+ """This function is used as helper if a Python version with a broken
+ star-import opcode is in use.
+ """
+ module = __import__(modname, None, None, ['__all__'])
+ if hasattr(module, '__all__'):
+ members = module.__all__
+ else:
+ members = [x for x in module.__dict__ if not x.startswith('_')]
+ mapping.update([(name, getattr(module, name)) for name in members])
+
+
+class Code(object):
+ """Abstract base class for the `Expression` and `Suite` classes."""
+ __slots__ = ['source', 'code', 'ast', '_globals']
+
+ def __init__(self, source, filename=None, lineno=-1, lookup='strict',
+ xform=None):
+ """Create the code object, either from a string, or from an AST node.
+
+ :param source: either a string containing the source code, or an AST
+ node
+ :param filename: the (preferably absolute) name of the file containing
+ the code
+ :param lineno: the number of the line on which the code was found
+ :param lookup: the lookup class that defines how variables are looked
+ up in the context; can be either "strict" (the default),
+ "lenient", or a custom lookup class
+ :param xform: the AST transformer that should be applied to the code;
+ if `None`, the appropriate transformation is chosen
+ depending on the mode
+ """
+ if isinstance(source, basestring):
+ self.source = source
+ node = _parse(source, mode=self.mode)
+ else:
+ assert isinstance(source, _ast.AST), \
+ 'Expected string or AST node, but got %r' % source
+ self.source = '?'
+ if self.mode == 'eval':
+ node = _ast.Expression()
+ node.body = source
+ else:
+ node = _ast.Module()
+ node.body = [source]
+
+ self.ast = node
+ self.code = _compile(node, self.source, mode=self.mode,
+ filename=filename, lineno=lineno, xform=xform)
+ if lookup is None:
+ lookup = LenientLookup
+ elif isinstance(lookup, basestring):
+ lookup = {'lenient': LenientLookup, 'strict': StrictLookup}[lookup]
+ self._globals = lookup.globals
+
+ def __getstate__(self):
+ state = {'source': self.source, 'ast': self.ast,
+ 'lookup': self._globals.im_self}
+ c = self.code
+ state['code'] = (c.co_nlocals, c.co_stacksize, c.co_flags, c.co_code,
+ c.co_consts, c.co_names, c.co_varnames, c.co_filename,
+ c.co_name, c.co_firstlineno, c.co_lnotab, (), ())
+ return state
+
+ def __setstate__(self, state):
+ self.source = state['source']
+ self.ast = state['ast']
+ self.code = CodeType(0, *state['code'])
+ self._globals = state['lookup'].globals
+
+ def __eq__(self, other):
+ return (type(other) == type(self)) and (self.code == other.code)
+
+ def __hash__(self):
+ return hash(self.code)
+
+ def __ne__(self, other):
+ return not self == other
+
+ def __repr__(self):
+ return '%s(%r)' % (type(self).__name__, self.source)
+
+
+class Expression(Code):
+ """Evaluates Python expressions used in templates.
+
+ >>> data = dict(test='Foo', items=[1, 2, 3], dict={'some': 'thing'})
+ >>> Expression('test').evaluate(data)
+ 'Foo'
+
+ >>> Expression('items[0]').evaluate(data)
+ 1
+ >>> Expression('items[-1]').evaluate(data)
+ 3
+ >>> Expression('dict["some"]').evaluate(data)
+ 'thing'
+
+ Similar to e.g. Javascript, expressions in templates can use the dot
+ notation for attribute access to access items in mappings:
+
+ >>> Expression('dict.some').evaluate(data)
+ 'thing'
+
+ This also works the other way around: item access can be used to access
+ any object attribute:
+
+ >>> class MyClass(object):
+ ... myattr = 'Bar'
+ >>> data = dict(mine=MyClass(), key='myattr')
+ >>> Expression('mine.myattr').evaluate(data)
+ 'Bar'
+ >>> Expression('mine["myattr"]').evaluate(data)
+ 'Bar'
+ >>> Expression('mine[key]').evaluate(data)
+ 'Bar'
+
+ All of the standard Python operators are available to template expressions.
+ Built-in functions such as ``len()`` are also available in template
+ expressions:
+
+ >>> data = dict(items=[1, 2, 3])
+ >>> Expression('len(items)').evaluate(data)
+ 3
+ """
+ __slots__ = []
+ mode = 'eval'
+
+ def evaluate(self, data):
+ """Evaluate the expression against the given data dictionary.
+
+ :param data: a mapping containing the data to evaluate against
+ :return: the result of the evaluation
+ """
+ __traceback_hide__ = 'before_and_this'
+ _globals = self._globals(data)
+ return eval(self.code, _globals, {'__data__': data})
+
+
+class Suite(Code):
+ """Executes Python statements used in templates.
+
+ >>> data = dict(test='Foo', items=[1, 2, 3], dict={'some': 'thing'})
+ >>> Suite("foo = dict['some']").execute(data)
+ >>> data['foo']
+ 'thing'
+ """
+ __slots__ = []
+ mode = 'exec'
+
+ def execute(self, data):
+ """Execute the suite in the given data dictionary.
+
+ :param data: a mapping containing the data to execute in
+ """
+ __traceback_hide__ = 'before_and_this'
+ _globals = self._globals(data)
+ exec self.code in _globals, data
+
+
+UNDEFINED = object()
+
+
+class UndefinedError(TemplateRuntimeError):
+ """Exception thrown when a template expression attempts to access a variable
+ not defined in the context.
+
+ :see: `LenientLookup`, `StrictLookup`
+ """
+ def __init__(self, name, owner=UNDEFINED):
+ if owner is not UNDEFINED:
+ message = '%s has no member named "%s"' % (repr(owner), name)
+ else:
+ message = '"%s" not defined' % name
+ TemplateRuntimeError.__init__(self, message)
+
+
+class Undefined(object):
+ """Represents a reference to an undefined variable.
+
+ Unlike the Python runtime, template expressions can refer to an undefined
+ variable without causing a `NameError` to be raised. The result will be an
+ instance of the `Undefined` class, which is treated the same as ``False`` in
+ conditions, but raise an exception on any other operation:
+
+ >>> foo = Undefined('foo')
+ >>> bool(foo)
+ False
+ >>> list(foo)
+ []
+ >>> print(foo)
+ undefined
+
+ However, calling an undefined variable, or trying to access an attribute
+ of that variable, will raise an exception that includes the name used to
+ reference that undefined variable.
+
+ >>> foo('bar')
+ Traceback (most recent call last):
+ ...
+ UndefinedError: "foo" not defined
+
+ >>> foo.bar
+ Traceback (most recent call last):
+ ...
+ UndefinedError: "foo" not defined
+
+ :see: `LenientLookup`
+ """
+ __slots__ = ['_name', '_owner']
+
+ def __init__(self, name, owner=UNDEFINED):
+ """Initialize the object.
+
+ :param name: the name of the reference
+ :param owner: the owning object, if the variable is accessed as a member
+ """
+ self._name = name
+ self._owner = owner
+
+ def __iter__(self):
+ return iter([])
+
+ def __nonzero__(self):
+ return False
+
+ def __repr__(self):
+ return '<%s %r>' % (type(self).__name__, self._name)
+
+ def __str__(self):
+ return 'undefined'
+
+ def _die(self, *args, **kwargs):
+ """Raise an `UndefinedError`."""
+ __traceback_hide__ = True
+ raise UndefinedError(self._name, self._owner)
+ __call__ = __getattr__ = __getitem__ = _die
+
+ # Hack around some behavior introduced in Python 2.6.2
+ # http://genshi.edgewall.org/ticket/324
+ __length_hint__ = None
+
+
+class LookupBase(object):
+ """Abstract base class for variable lookup implementations."""
+
+ @classmethod
+ def globals(cls, data):
+ """Construct the globals dictionary to use as the execution context for
+ the expression or suite.
+ """
+ return {
+ '__data__': data,
+ '_lookup_name': cls.lookup_name,
+ '_lookup_attr': cls.lookup_attr,
+ '_lookup_item': cls.lookup_item,
+ '_star_import_patch': _star_import_patch,
+ 'UndefinedError': UndefinedError,
+ }
+
+ @classmethod
+ def lookup_name(cls, data, name):
+ __traceback_hide__ = True
+ val = data.get(name, UNDEFINED)
+ if val is UNDEFINED:
+ val = BUILTINS.get(name, val)
+ if val is UNDEFINED:
+ val = cls.undefined(name)
+ return val
+
+ @classmethod
+ def lookup_attr(cls, obj, key):
+ __traceback_hide__ = True
+ try:
+ val = getattr(obj, key)
+ except AttributeError:
+ if hasattr(obj.__class__, key):
+ raise
+ else:
+ try:
+ val = obj[key]
+ except (KeyError, TypeError):
+ val = cls.undefined(key, owner=obj)
+ return val
+
+ @classmethod
+ def lookup_item(cls, obj, key):
+ __traceback_hide__ = True
+ if len(key) == 1:
+ key = key[0]
+ try:
+ return obj[key]
+ except (AttributeError, KeyError, IndexError, TypeError), e:
+ if isinstance(key, basestring):
+ val = getattr(obj, key, UNDEFINED)
+ if val is UNDEFINED:
+ val = cls.undefined(key, owner=obj)
+ return val
+ raise
+
+ @classmethod
+ def undefined(cls, key, owner=UNDEFINED):
+ """Can be overridden by subclasses to specify behavior when undefined
+ variables are accessed.
+
+ :param key: the name of the variable
+ :param owner: the owning object, if the variable is accessed as a member
+ """
+ raise NotImplementedError
+
+
+class LenientLookup(LookupBase):
+ """Default variable lookup mechanism for expressions.
+
+ When an undefined variable is referenced using this lookup style, the
+ reference evaluates to an instance of the `Undefined` class:
+
+ >>> expr = Expression('nothing', lookup='lenient')
+ >>> undef = expr.evaluate({})
+ >>> undef
+ <Undefined 'nothing'>
+
+ The same will happen when a non-existing attribute or item is accessed on
+ an existing object:
+
+ >>> expr = Expression('something.nil', lookup='lenient')
+ >>> expr.evaluate({'something': dict()})
+ <Undefined 'nil'>
+
+ See the documentation of the `Undefined` class for details on the behavior
+ of such objects.
+
+ :see: `StrictLookup`
+ """
+
+ @classmethod
+ def undefined(cls, key, owner=UNDEFINED):
+ """Return an ``Undefined`` object."""
+ __traceback_hide__ = True
+ return Undefined(key, owner=owner)
+
+
+class StrictLookup(LookupBase):
+ """Strict variable lookup mechanism for expressions.
+
+ Referencing an undefined variable using this lookup style will immediately
+ raise an ``UndefinedError``:
+
+ >>> expr = Expression('nothing', lookup='strict')
+ >>> expr.evaluate({})
+ Traceback (most recent call last):
+ ...
+ UndefinedError: "nothing" not defined
+
+ The same happens when a non-existing attribute or item is accessed on an
+ existing object:
+
+ >>> expr = Expression('something.nil', lookup='strict')
+ >>> expr.evaluate({'something': dict()})
+ Traceback (most recent call last):
+ ...
+ UndefinedError: {} has no member named "nil"
+ """
+
+ @classmethod
+ def undefined(cls, key, owner=UNDEFINED):
+ """Raise an ``UndefinedError`` immediately."""
+ __traceback_hide__ = True
+ raise UndefinedError(key, owner=owner)
+
+
+def _parse(source, mode='eval'):
+ source = source.strip()
+ if mode == 'exec':
+ lines = [line.expandtabs() for line in source.splitlines()]
+ if lines:
+ first = lines[0]
+ rest = dedent('\n'.join(lines[1:])).rstrip()
+ if first.rstrip().endswith(':') and not rest[0].isspace():
+ rest = '\n'.join([' %s' % line for line in rest.splitlines()])
+ source = '\n'.join([first, rest])
+ if isinstance(source, unicode):
+ source = '\xef\xbb\xbf' + source.encode('utf-8')
+ return parse(source, mode)
+
+
+def _compile(node, source=None, mode='eval', filename=None, lineno=-1,
+ xform=None):
+ if isinstance(filename, unicode):
+ # unicode file names not allowed for code objects
+ filename = filename.encode('utf-8', 'replace')
+ elif not filename:
+ filename = '<string>'
+ if lineno <= 0:
+ lineno = 1
+
+ if xform is None:
+ xform = {
+ 'eval': ExpressionASTTransformer
+ }.get(mode, TemplateASTTransformer)
+ tree = xform().visit(node)
+
+ if mode == 'eval':
+ name = '<Expression %r>' % (source or '?')
+ else:
+ lines = source.splitlines()
+ if not lines:
+ extract = ''
+ else:
+ extract = lines[0]
+ if len(lines) > 1:
+ extract += ' ...'
+ name = '<Suite %r>' % (extract)
+ new_source = ASTCodeGenerator(tree).code
+ code = compile(new_source, filename, mode)
+
+ try:
+ # We'd like to just set co_firstlineno, but it's readonly. So we need
+ # to clone the code object while adjusting the line number
+ return CodeType(0, code.co_nlocals, code.co_stacksize,
+ code.co_flags | 0x0040, code.co_code, code.co_consts,
+ code.co_names, code.co_varnames, filename, name,
+ lineno, code.co_lnotab, (), ())
+ except RuntimeError:
+ return code
+
+
+def _new(class_, *args, **kwargs):
+ ret = class_()
+ for attr, value in zip(ret._fields, args):
+ if attr in kwargs:
+ raise ValueError('Field set both in args and kwargs')
+ setattr(ret, attr, value)
+ for attr, value in kwargs:
+ setattr(ret, attr, value)
+ return ret
+
+
+BUILTINS = __builtin__.__dict__.copy()
+BUILTINS.update({'Markup': Markup, 'Undefined': Undefined})
+CONSTANTS = frozenset(['False', 'True', 'None', 'NotImplemented', 'Ellipsis'])
+
+
+class TemplateASTTransformer(ASTTransformer):
+ """Concrete AST transformer that implements the AST transformations needed
+ for code embedded in templates.
+ """
+
+ def __init__(self):
+ self.locals = [CONSTANTS]
+
+ def _extract_names(self, node):
+ names = set()
+ def _process(node):
+ if isinstance(node, _ast.Name):
+ names.add(node.id)
+ elif isinstance(node, _ast.alias):
+ names.add(node.asname or node.name)
+ elif isinstance(node, _ast.Tuple):
+ for elt in node.elts:
+ _process(elt)
+ if hasattr(node, 'args'):
+ for arg in node.args:
+ _process(arg)
+ if hasattr(node, 'vararg'):
+ names.add(node.vararg)
+ if hasattr(node, 'kwarg'):
+ names.add(node.kwarg)
+ elif hasattr(node, 'names'):
+ for elt in node.names:
+ _process(elt)
+ return names
+
+ def visit_Str(self, node):
+ if isinstance(node.s, str):
+ try: # If the string is ASCII, return a `str` object
+ node.s.decode('ascii')
+ except ValueError: # Otherwise return a `unicode` object
+ return _new(_ast.Str, node.s.decode('utf-8'))
+ return node
+
+ def visit_ClassDef(self, node):
+ if len(self.locals) > 1:
+ self.locals[-1].add(node.name)
+ self.locals.append(set())
+ try:
+ return ASTTransformer.visit_ClassDef(self, node)
+ finally:
+ self.locals.pop()
+
+ def visit_Import(self, node):
+ if len(self.locals) > 1:
+ self.locals[-1].update(self._extract_names(node))
+ return ASTTransformer.visit_Import(self, node)
+
+ def visit_ImportFrom(self, node):
+ if [a.name for a in node.names] == ['*']:
+ if has_star_import_bug:
+ # This is a Python 2.4 bug. Only if we have a broken Python
+ # version do we need to apply this hack
+ node = _new(_ast.Expr, _new(_ast.Call,
+ _new(_ast.Name, '_star_import_patch'), [
+ _new(_ast.Name, '__data__'),
+ _new(_ast.Str, node.module)
+ ], (), ()))
+ return node
+ if len(self.locals) > 1:
+ self.locals[-1].update(self._extract_names(node))
+ return ASTTransformer.visit_ImportFrom(self, node)
+
+ def visit_FunctionDef(self, node):
+ if len(self.locals) > 1:
+ self.locals[-1].add(node.name)
+
+ self.locals.append(self._extract_names(node.args))
+ try:
+ return ASTTransformer.visit_FunctionDef(self, node)
+ finally:
+ self.locals.pop()
+
+ # GeneratorExp(expr elt, comprehension* generators)
+ def visit_GeneratorExp(self, node):
+ gens = []
+ for generator in node.generators:
+ # comprehension = (expr target, expr iter, expr* ifs)
+ self.locals.append(set())
+ gen = _new(_ast.comprehension, self.visit(generator.target),
+ self.visit(generator.iter),
+ [self.visit(if_) for if_ in generator.ifs])
+ gens.append(gen)
+
+ # use node.__class__ to make it reusable as ListComp
+ ret = _new(node.__class__, self.visit(node.elt), gens)
+ #delete inserted locals
+ del self.locals[-len(node.generators):]
+ return ret
+
+ # ListComp(expr elt, comprehension* generators)
+ visit_ListComp = visit_GeneratorExp
+
+ def visit_Lambda(self, node):
+ self.locals.append(self._extract_names(node.args))
+ try:
+ return ASTTransformer.visit_Lambda(self, node)
+ finally:
+ self.locals.pop()
+
+ def visit_Name(self, node):
+ # If the name refers to a local inside a lambda, list comprehension, or
+ # generator expression, leave it alone
+ if isinstance(node.ctx, _ast.Load) and \
+ node.id not in flatten(self.locals):
+ # Otherwise, translate the name ref into a context lookup
+ name = _new(_ast.Name, '_lookup_name', _ast.Load())
+ namearg = _new(_ast.Name, '__data__', _ast.Load())
+ strarg = _new(_ast.Str, node.id)
+ node = _new(_ast.Call, name, [namearg, strarg], [])
+ elif isinstance(node.ctx, _ast.Store):
+ if len(self.locals) > 1:
+ self.locals[-1].add(node.id)
+
+ return node
+
+
+class ExpressionASTTransformer(TemplateASTTransformer):
+ """Concrete AST transformer that implements the AST transformations needed
+ for code embedded in templates.
+ """
+
+ def visit_Attribute(self, node):
+ if not isinstance(node.ctx, _ast.Load):
+ return ASTTransformer.visit_Attribute(self, node)
+
+ func = _new(_ast.Name, '_lookup_attr', _ast.Load())
+ args = [self.visit(node.value), _new(_ast.Str, node.attr)]
+ return _new(_ast.Call, func, args, [])
+
+ def visit_Subscript(self, node):
+ if not isinstance(node.ctx, _ast.Load) or \
+ not isinstance(node.slice, _ast.Index):
+ return ASTTransformer.visit_Subscript(self, node)
+
+ func = _new(_ast.Name, '_lookup_item', _ast.Load())
+ args = [
+ self.visit(node.value),
+ _new(_ast.Tuple, (self.visit(node.slice.value),), _ast.Load())
+ ]
+ return _new(_ast.Call, func, args, [])
diff --git a/genshi/template/interpolation.py b/genshi/template/interpolation.py
new file mode 100644
index 0000000..1e1a385
--- /dev/null
+++ b/genshi/template/interpolation.py
@@ -0,0 +1,153 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2007-2009 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""String interpolation routines, i.e. the splitting up a given text into some
+parts that are literal strings, and others that are Python expressions.
+"""
+
+from itertools import chain
+import os
+import re
+from tokenize import PseudoToken
+
+from genshi.core import TEXT
+from genshi.template.base import TemplateSyntaxError, EXPR
+from genshi.template.eval import Expression
+
+__all__ = ['interpolate']
+__docformat__ = 'restructuredtext en'
+
+NAMESTART = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'
+NAMECHARS = NAMESTART + '.0123456789'
+PREFIX = '$'
+
+token_re = re.compile('%s|%s(?s)' % (
+ r'[uU]?[rR]?("""|\'\'\')((?<!\\)\\\1|.)*?\1',
+ PseudoToken
+))
+
+
+def interpolate(text, filepath=None, lineno=-1, offset=0, lookup='strict'):
+ """Parse the given string and extract expressions.
+
+ This function is a generator that yields `TEXT` events for literal strings,
+ and `EXPR` events for expressions, depending on the results of parsing the
+ string.
+
+ >>> for kind, data, pos in interpolate("hey ${foo}bar"):
+ ... print('%s %r' % (kind, data))
+ TEXT 'hey '
+ EXPR Expression('foo')
+ TEXT 'bar'
+
+ :param text: the text to parse
+ :param filepath: absolute path to the file in which the text was found
+ (optional)
+ :param lineno: the line number at which the text was found (optional)
+ :param offset: the column number at which the text starts in the source
+ (optional)
+ :param lookup: the variable lookup mechanism; either "lenient" (the
+ default), "strict", or a custom lookup class
+ :return: a list of `TEXT` and `EXPR` events
+ :raise TemplateSyntaxError: when a syntax error in an expression is
+ encountered
+ """
+ pos = [filepath, lineno, offset]
+
+ textbuf = []
+ textpos = None
+ for is_expr, chunk in chain(lex(text, pos, filepath), [(True, '')]):
+ if is_expr:
+ if textbuf:
+ yield TEXT, ''.join(textbuf), textpos
+ del textbuf[:]
+ textpos = None
+ if chunk:
+ try:
+ expr = Expression(chunk.strip(), pos[0], pos[1],
+ lookup=lookup)
+ yield EXPR, expr, tuple(pos)
+ except SyntaxError, err:
+ raise TemplateSyntaxError(err, filepath, pos[1],
+ pos[2] + (err.offset or 0))
+ else:
+ textbuf.append(chunk)
+ if textpos is None:
+ textpos = tuple(pos)
+
+ if '\n' in chunk:
+ lines = chunk.splitlines()
+ pos[1] += len(lines) - 1
+ pos[2] += len(lines[-1])
+ else:
+ pos[2] += len(chunk)
+
+
+def lex(text, textpos, filepath):
+ offset = pos = 0
+ end = len(text)
+ escaped = False
+
+ while 1:
+ if escaped:
+ offset = text.find(PREFIX, offset + 2)
+ escaped = False
+ else:
+ offset = text.find(PREFIX, pos)
+ if offset < 0 or offset == end - 1:
+ break
+ next = text[offset + 1]
+
+ if next == '{':
+ if offset > pos:
+ yield False, text[pos:offset]
+ pos = offset + 2
+ level = 1
+ while level:
+ match = token_re.match(text, pos)
+ if match is None:
+ raise TemplateSyntaxError('invalid syntax', filepath,
+ *textpos[1:])
+ pos = match.end()
+ tstart, tend = match.regs[3]
+ token = text[tstart:tend]
+ if token == '{':
+ level += 1
+ elif token == '}':
+ level -= 1
+ yield True, text[offset + 2:pos - 1]
+
+ elif next in NAMESTART:
+ if offset > pos:
+ yield False, text[pos:offset]
+ pos = offset
+ pos += 1
+ while pos < end:
+ char = text[pos]
+ if char not in NAMECHARS:
+ break
+ pos += 1
+ yield True, text[offset + 1:pos].strip()
+
+ elif not escaped and next == PREFIX:
+ if offset > pos:
+ yield False, text[pos:offset]
+ escaped = True
+ pos = offset + 1
+
+ else:
+ yield False, text[pos:offset + 1]
+ pos = offset + 1
+
+ if pos < end:
+ yield False, text[pos:]
diff --git a/genshi/template/loader.py b/genshi/template/loader.py
new file mode 100644
index 0000000..0e7cda7
--- /dev/null
+++ b/genshi/template/loader.py
@@ -0,0 +1,344 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006-2010 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""Template loading and caching."""
+
+import os
+try:
+ import threading
+except ImportError:
+ import dummy_threading as threading
+
+from genshi.template.base import TemplateError
+from genshi.util import LRUCache
+
+__all__ = ['TemplateLoader', 'TemplateNotFound', 'directory', 'package',
+ 'prefixed']
+__docformat__ = 'restructuredtext en'
+
+
+class TemplateNotFound(TemplateError):
+ """Exception raised when a specific template file could not be found."""
+
+ def __init__(self, name, search_path):
+ """Create the exception.
+
+ :param name: the filename of the template
+ :param search_path: the search path used to lookup the template
+ """
+ TemplateError.__init__(self, 'Template "%s" not found' % name)
+ self.search_path = search_path
+
+
+class TemplateLoader(object):
+ """Responsible for loading templates from files on the specified search
+ path.
+
+ >>> import tempfile
+ >>> fd, path = tempfile.mkstemp(suffix='.html', prefix='template')
+ >>> os.write(fd, '<p>$var</p>')
+ 11
+ >>> os.close(fd)
+
+ The template loader accepts a list of directory paths that are then used
+ when searching for template files, in the given order:
+
+ >>> loader = TemplateLoader([os.path.dirname(path)])
+
+ The `load()` method first checks the template cache whether the requested
+ template has already been loaded. If not, it attempts to locate the
+ template file, and returns the corresponding `Template` object:
+
+ >>> from genshi.template import MarkupTemplate
+ >>> template = loader.load(os.path.basename(path))
+ >>> isinstance(template, MarkupTemplate)
+ True
+
+ Template instances are cached: requesting a template with the same name
+ results in the same instance being returned:
+
+ >>> loader.load(os.path.basename(path)) is template
+ True
+
+ The `auto_reload` option can be used to control whether a template should
+ be automatically reloaded when the file it was loaded from has been
+ changed. Disable this automatic reloading to improve performance.
+
+ >>> os.remove(path)
+ """
+ def __init__(self, search_path=None, auto_reload=False,
+ default_encoding=None, max_cache_size=25, default_class=None,
+ variable_lookup='strict', allow_exec=True, callback=None):
+ """Create the template laoder.
+
+ :param search_path: a list of absolute path names that should be
+ searched for template files, or a string containing
+ a single absolute path; alternatively, any item on
+ the list may be a ''load function'' that is passed
+ a filename and returns a file-like object and some
+ metadata
+ :param auto_reload: whether to check the last modification time of
+ template files, and reload them if they have changed
+ :param default_encoding: the default encoding to assume when loading
+ templates; defaults to UTF-8
+ :param max_cache_size: the maximum number of templates to keep in the
+ cache
+ :param default_class: the default `Template` subclass to use when
+ instantiating templates
+ :param variable_lookup: the variable lookup mechanism; either "strict"
+ (the default), "lenient", or a custom lookup
+ class
+ :param allow_exec: whether to allow Python code blocks in templates
+ :param callback: (optional) a callback function that is invoked after a
+ template was initialized by this loader; the function
+ is passed the template object as only argument. This
+ callback can be used for example to add any desired
+ filters to the template
+ :see: `LenientLookup`, `StrictLookup`
+
+ :note: Changed in 0.5: Added the `allow_exec` argument
+ """
+ from genshi.template.markup import MarkupTemplate
+
+ self.search_path = search_path
+ if self.search_path is None:
+ self.search_path = []
+ elif not isinstance(self.search_path, (list, tuple)):
+ self.search_path = [self.search_path]
+
+ self.auto_reload = auto_reload
+ """Whether templates should be reloaded when the underlying file is
+ changed"""
+
+ self.default_encoding = default_encoding
+ self.default_class = default_class or MarkupTemplate
+ self.variable_lookup = variable_lookup
+ self.allow_exec = allow_exec
+ if callback is not None and not hasattr(callback, '__call__'):
+ raise TypeError('The "callback" parameter needs to be callable')
+ self.callback = callback
+ self._cache = LRUCache(max_cache_size)
+ self._uptodate = {}
+ self._lock = threading.RLock()
+
+ def __getstate__(self):
+ state = self.__dict__.copy()
+ state['_lock'] = None
+ return state
+
+ def __setstate__(self, state):
+ self.__dict__ = state
+ self._lock = threading.RLock()
+
+ def load(self, filename, relative_to=None, cls=None, encoding=None):
+ """Load the template with the given name.
+
+ If the `filename` parameter is relative, this method searches the
+ search path trying to locate a template matching the given name. If the
+ file name is an absolute path, the search path is ignored.
+
+ If the requested template is not found, a `TemplateNotFound` exception
+ is raised. Otherwise, a `Template` object is returned that represents
+ the parsed template.
+
+ Template instances are cached to avoid having to parse the same
+ template file more than once. Thus, subsequent calls of this method
+ with the same template file name will return the same `Template`
+ object (unless the ``auto_reload`` option is enabled and the file was
+ changed since the last parse.)
+
+ If the `relative_to` parameter is provided, the `filename` is
+ interpreted as being relative to that path.
+
+ :param filename: the relative path of the template file to load
+ :param relative_to: the filename of the template from which the new
+ template is being loaded, or ``None`` if the
+ template is being loaded directly
+ :param cls: the class of the template object to instantiate
+ :param encoding: the encoding of the template to load; defaults to the
+ ``default_encoding`` of the loader instance
+ :return: the loaded `Template` instance
+ :raises TemplateNotFound: if a template with the given name could not
+ be found
+ """
+ if cls is None:
+ cls = self.default_class
+ search_path = self.search_path
+
+ # Make the filename relative to the template file its being loaded
+ # from, but only if that file is specified as a relative path, or no
+ # search path has been set up
+ if relative_to and (not search_path or not os.path.isabs(relative_to)):
+ filename = os.path.join(os.path.dirname(relative_to), filename)
+
+ filename = os.path.normpath(filename)
+ cachekey = filename
+
+ self._lock.acquire()
+ try:
+ # First check the cache to avoid reparsing the same file
+ try:
+ tmpl = self._cache[cachekey]
+ if not self.auto_reload:
+ return tmpl
+ uptodate = self._uptodate[cachekey]
+ if uptodate is not None and uptodate():
+ return tmpl
+ except (KeyError, OSError):
+ pass
+
+ isabs = False
+
+ if os.path.isabs(filename):
+ # Bypass the search path if the requested filename is absolute
+ search_path = [os.path.dirname(filename)]
+ isabs = True
+
+ elif relative_to and os.path.isabs(relative_to):
+ # Make sure that the directory containing the including
+ # template is on the search path
+ dirname = os.path.dirname(relative_to)
+ if dirname not in search_path:
+ search_path = list(search_path) + [dirname]
+ isabs = True
+
+ elif not search_path:
+ # Uh oh, don't know where to look for the template
+ raise TemplateError('Search path for templates not configured')
+
+ for loadfunc in search_path:
+ if isinstance(loadfunc, basestring):
+ loadfunc = directory(loadfunc)
+ try:
+ filepath, filename, fileobj, uptodate = loadfunc(filename)
+ except IOError:
+ continue
+ else:
+ try:
+ if isabs:
+ # If the filename of either the included or the
+ # including template is absolute, make sure the
+ # included template gets an absolute path, too,
+ # so that nested includes work properly without a
+ # search path
+ filename = filepath
+ tmpl = self._instantiate(cls, fileobj, filepath,
+ filename, encoding=encoding)
+ if self.callback:
+ self.callback(tmpl)
+ self._cache[cachekey] = tmpl
+ self._uptodate[cachekey] = uptodate
+ finally:
+ if hasattr(fileobj, 'close'):
+ fileobj.close()
+ return tmpl
+
+ raise TemplateNotFound(filename, search_path)
+
+ finally:
+ self._lock.release()
+
+ def _instantiate(self, cls, fileobj, filepath, filename, encoding=None):
+ """Instantiate and return the `Template` object based on the given
+ class and parameters.
+
+ This function is intended for subclasses to override if they need to
+ implement special template instantiation logic. Code that just uses
+ the `TemplateLoader` should use the `load` method instead.
+
+ :param cls: the class of the template object to instantiate
+ :param fileobj: a readable file-like object containing the template
+ source
+ :param filepath: the absolute path to the template file
+ :param filename: the path to the template file relative to the search
+ path
+ :param encoding: the encoding of the template to load; defaults to the
+ ``default_encoding`` of the loader instance
+ :return: the loaded `Template` instance
+ :rtype: `Template`
+ """
+ if encoding is None:
+ encoding = self.default_encoding
+ return cls(fileobj, filepath=filepath, filename=filename, loader=self,
+ encoding=encoding, lookup=self.variable_lookup,
+ allow_exec=self.allow_exec)
+
+ @staticmethod
+ def directory(path):
+ """Loader factory for loading templates from a local directory.
+
+ :param path: the path to the local directory containing the templates
+ :return: the loader function to load templates from the given directory
+ :rtype: ``function``
+ """
+ def _load_from_directory(filename):
+ filepath = os.path.join(path, filename)
+ fileobj = open(filepath, 'U')
+ mtime = os.path.getmtime(filepath)
+ def _uptodate():
+ return mtime == os.path.getmtime(filepath)
+ return filepath, filename, fileobj, _uptodate
+ return _load_from_directory
+
+ @staticmethod
+ def package(name, path):
+ """Loader factory for loading templates from egg package data.
+
+ :param name: the name of the package containing the resources
+ :param path: the path inside the package data
+ :return: the loader function to load templates from the given package
+ :rtype: ``function``
+ """
+ from pkg_resources import resource_stream
+ def _load_from_package(filename):
+ filepath = os.path.join(path, filename)
+ return filepath, filename, resource_stream(name, filepath), None
+ return _load_from_package
+
+ @staticmethod
+ def prefixed(**delegates):
+ """Factory for a load function that delegates to other loaders
+ depending on the prefix of the requested template path.
+
+ The prefix is stripped from the filename when passing on the load
+ request to the delegate.
+
+ >>> load = prefixed(
+ ... app1 = lambda filename: ('app1', filename, None, None),
+ ... app2 = lambda filename: ('app2', filename, None, None)
+ ... )
+ >>> print(load('app1/foo.html'))
+ ('app1', 'app1/foo.html', None, None)
+ >>> print(load('app2/bar.html'))
+ ('app2', 'app2/bar.html', None, None)
+
+ :param delegates: mapping of path prefixes to loader functions
+ :return: the loader function
+ :rtype: ``function``
+ """
+ def _dispatch_by_prefix(filename):
+ for prefix, delegate in delegates.items():
+ if filename.startswith(prefix):
+ if isinstance(delegate, basestring):
+ delegate = directory(delegate)
+ filepath, _, fileobj, uptodate = delegate(
+ filename[len(prefix):].lstrip('/\\')
+ )
+ return filepath, filename, fileobj, uptodate
+ raise TemplateNotFound(filename, list(delegates.keys()))
+ return _dispatch_by_prefix
+
+
+directory = TemplateLoader.directory
+package = TemplateLoader.package
+prefixed = TemplateLoader.prefixed
diff --git a/genshi/template/markup.py b/genshi/template/markup.py
new file mode 100644
index 0000000..0e31632
--- /dev/null
+++ b/genshi/template/markup.py
@@ -0,0 +1,397 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006-2010 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""Markup templating engine."""
+
+from itertools import chain
+
+from genshi.core import Attrs, Markup, Namespace, Stream, StreamEventKind
+from genshi.core import START, END, START_NS, END_NS, TEXT, PI, COMMENT
+from genshi.input import XMLParser
+from genshi.template.base import BadDirectiveError, Template, \
+ TemplateSyntaxError, _apply_directives, \
+ EXEC, INCLUDE, SUB
+from genshi.template.eval import Suite
+from genshi.template.interpolation import interpolate
+from genshi.template.directives import *
+from genshi.template.text import NewTextTemplate
+
+__all__ = ['MarkupTemplate']
+__docformat__ = 'restructuredtext en'
+
+
+class MarkupTemplate(Template):
+ """Implementation of the template language for XML-based templates.
+
+ >>> tmpl = MarkupTemplate('''<ul xmlns:py="http://genshi.edgewall.org/">
+ ... <li py:for="item in items">${item}</li>
+ ... </ul>''')
+ >>> print(tmpl.generate(items=[1, 2, 3]))
+ <ul>
+ <li>1</li><li>2</li><li>3</li>
+ </ul>
+ """
+
+ DIRECTIVE_NAMESPACE = 'http://genshi.edgewall.org/'
+ XINCLUDE_NAMESPACE = 'http://www.w3.org/2001/XInclude'
+
+ directives = [('def', DefDirective),
+ ('match', MatchDirective),
+ ('when', WhenDirective),
+ ('otherwise', OtherwiseDirective),
+ ('for', ForDirective),
+ ('if', IfDirective),
+ ('choose', ChooseDirective),
+ ('with', WithDirective),
+ ('replace', ReplaceDirective),
+ ('content', ContentDirective),
+ ('attrs', AttrsDirective),
+ ('strip', StripDirective)]
+ serializer = 'xml'
+ _number_conv = Markup
+
+ def __init__(self, source, filepath=None, filename=None, loader=None,
+ encoding=None, lookup='strict', allow_exec=True):
+ Template.__init__(self, source, filepath=filepath, filename=filename,
+ loader=loader, encoding=encoding, lookup=lookup,
+ allow_exec=allow_exec)
+ self.add_directives(self.DIRECTIVE_NAMESPACE, self)
+
+ def _init_filters(self):
+ Template._init_filters(self)
+ # Make sure the include filter comes after the match filter
+ self.filters.remove(self._include)
+ self.filters += [self._match, self._include]
+
+ def _parse(self, source, encoding):
+ if not isinstance(source, Stream):
+ source = XMLParser(source, filename=self.filename,
+ encoding=encoding)
+ stream = []
+
+ for kind, data, pos in source:
+
+ if kind is TEXT:
+ for kind, data, pos in interpolate(data, self.filepath, pos[1],
+ pos[2], lookup=self.lookup):
+ stream.append((kind, data, pos))
+
+ elif kind is PI and data[0] == 'python':
+ if not self.allow_exec:
+ raise TemplateSyntaxError('Python code blocks not allowed',
+ self.filepath, *pos[1:])
+ try:
+ suite = Suite(data[1], self.filepath, pos[1],
+ lookup=self.lookup)
+ except SyntaxError, err:
+ raise TemplateSyntaxError(err, self.filepath,
+ pos[1] + (err.lineno or 1) - 1,
+ pos[2] + (err.offset or 0))
+ stream.append((EXEC, suite, pos))
+
+ elif kind is COMMENT:
+ if not data.lstrip().startswith('!'):
+ stream.append((kind, data, pos))
+
+ else:
+ stream.append((kind, data, pos))
+
+ return stream
+
+ def _extract_directives(self, stream, namespace, factory):
+ depth = 0
+ dirmap = {} # temporary mapping of directives to elements
+ new_stream = []
+ ns_prefix = {} # namespace prefixes in use
+
+ for kind, data, pos in stream:
+
+ if kind is START:
+ tag, attrs = data
+ directives = []
+ strip = False
+
+ if tag.namespace == namespace:
+ cls = factory.get_directive(tag.localname)
+ if cls is None:
+ raise BadDirectiveError(tag.localname,
+ self.filepath, pos[1])
+ args = dict([(name.localname, value) for name, value
+ in attrs if not name.namespace])
+ directives.append((factory.get_directive_index(cls), cls,
+ args, ns_prefix.copy(), pos))
+ strip = True
+
+ new_attrs = []
+ for name, value in attrs:
+ if name.namespace == namespace:
+ cls = factory.get_directive(name.localname)
+ if cls is None:
+ raise BadDirectiveError(name.localname,
+ self.filepath, pos[1])
+ if type(value) is list and len(value) == 1:
+ value = value[0][1]
+ directives.append((factory.get_directive_index(cls),
+ cls, value, ns_prefix.copy(), pos))
+ else:
+ new_attrs.append((name, value))
+ new_attrs = Attrs(new_attrs)
+
+ if directives:
+ directives.sort()
+ dirmap[(depth, tag)] = (directives, len(new_stream),
+ strip)
+
+ new_stream.append((kind, (tag, new_attrs), pos))
+ depth += 1
+
+ elif kind is END:
+ depth -= 1
+ new_stream.append((kind, data, pos))
+
+ # If there have have directive attributes with the
+ # corresponding start tag, move the events inbetween into
+ # a "subprogram"
+ if (depth, data) in dirmap:
+ directives, offset, strip = dirmap.pop((depth, data))
+ substream = new_stream[offset:]
+ if strip:
+ substream = substream[1:-1]
+ new_stream[offset:] = [
+ (SUB, (directives, substream), pos)
+ ]
+
+ elif kind is SUB:
+ directives, substream = data
+ substream = self._extract_directives(substream, namespace,
+ factory)
+
+ if len(substream) == 1 and substream[0][0] is SUB:
+ added_directives, substream = substream[0][1]
+ directives += added_directives
+
+ new_stream.append((kind, (directives, substream), pos))
+
+ elif kind is START_NS:
+ # Strip out the namespace declaration for template
+ # directives
+ prefix, uri = data
+ ns_prefix[prefix] = uri
+ if uri != namespace:
+ new_stream.append((kind, data, pos))
+
+ elif kind is END_NS:
+ uri = ns_prefix.pop(data, None)
+ if uri and uri != namespace:
+ new_stream.append((kind, data, pos))
+
+ else:
+ new_stream.append((kind, data, pos))
+
+ return new_stream
+
+ def _extract_includes(self, stream):
+ streams = [[]] # stacked lists of events of the "compiled" template
+ prefixes = {}
+ fallbacks = []
+ includes = []
+ xinclude_ns = Namespace(self.XINCLUDE_NAMESPACE)
+
+ for kind, data, pos in stream:
+ stream = streams[-1]
+
+ if kind is START:
+ # Record any directive attributes in start tags
+ tag, attrs = data
+ if tag in xinclude_ns:
+ if tag.localname == 'include':
+ include_href = attrs.get('href')
+ if not include_href:
+ raise TemplateSyntaxError('Include misses required '
+ 'attribute "href"',
+ self.filepath, *pos[1:])
+ includes.append((include_href, attrs.get('parse')))
+ streams.append([])
+ elif tag.localname == 'fallback':
+ streams.append([])
+ fallbacks.append(streams[-1])
+ else:
+ stream.append((kind, (tag, attrs), pos))
+
+ elif kind is END:
+ if fallbacks and data == xinclude_ns['fallback']:
+ assert streams.pop() is fallbacks[-1]
+ elif data == xinclude_ns['include']:
+ fallback = None
+ if len(fallbacks) == len(includes):
+ fallback = fallbacks.pop()
+ streams.pop() # discard anything between the include tags
+ # and the fallback element
+ stream = streams[-1]
+ href, parse = includes.pop()
+ try:
+ cls = {
+ 'xml': MarkupTemplate,
+ 'text': NewTextTemplate
+ }.get(parse) or self.__class__
+ except KeyError:
+ raise TemplateSyntaxError('Invalid value for "parse" '
+ 'attribute of include',
+ self.filepath, *pos[1:])
+ stream.append((INCLUDE, (href, cls, fallback), pos))
+ else:
+ stream.append((kind, data, pos))
+
+ elif kind is START_NS and data[1] == xinclude_ns:
+ # Strip out the XInclude namespace
+ prefixes[data[0]] = data[1]
+
+ elif kind is END_NS and data in prefixes:
+ prefixes.pop(data)
+
+ else:
+ stream.append((kind, data, pos))
+
+ assert len(streams) == 1
+ return streams[0]
+
+ def _interpolate_attrs(self, stream):
+ for kind, data, pos in stream:
+
+ if kind is START:
+ # Record any directive attributes in start tags
+ tag, attrs = data
+ new_attrs = []
+ for name, value in attrs:
+ if value:
+ value = list(interpolate(value, self.filepath, pos[1],
+ pos[2], lookup=self.lookup))
+ if len(value) == 1 and value[0][0] is TEXT:
+ value = value[0][1]
+ new_attrs.append((name, value))
+ data = tag, Attrs(new_attrs)
+
+ yield kind, data, pos
+
+ def _prepare(self, stream):
+ return Template._prepare(self,
+ self._extract_includes(self._interpolate_attrs(stream))
+ )
+
+ def add_directives(self, namespace, factory):
+ """Register a custom `DirectiveFactory` for a given namespace.
+
+ :param namespace: the namespace URI
+ :type namespace: `basestring`
+ :param factory: the directive factory to register
+ :type factory: `DirectiveFactory`
+ :since: version 0.6
+ """
+ assert not self._prepared, 'Too late for adding directives, ' \
+ 'template already prepared'
+ self._stream = self._extract_directives(self._stream, namespace,
+ factory)
+
+ def _match(self, stream, ctxt, start=0, end=None, **vars):
+ """Internal stream filter that applies any defined match templates
+ to the stream.
+ """
+ match_templates = ctxt._match_templates
+
+ tail = []
+ def _strip(stream, append=tail.append):
+ depth = 1
+ next = stream.next
+ while 1:
+ event = next()
+ if event[0] is START:
+ depth += 1
+ elif event[0] is END:
+ depth -= 1
+ if depth > 0:
+ yield event
+ else:
+ append(event)
+ break
+
+ for event in stream:
+
+ # We (currently) only care about start and end events for matching
+ # We might care about namespace events in the future, though
+ if not match_templates or (event[0] is not START and
+ event[0] is not END):
+ yield event
+ continue
+
+ for idx, (test, path, template, hints, namespaces, directives) \
+ in enumerate(match_templates):
+ if idx < start or end is not None and idx >= end:
+ continue
+
+ if test(event, namespaces, ctxt) is True:
+ if 'match_once' in hints:
+ del match_templates[idx]
+ idx -= 1
+
+ # Let the remaining match templates know about the event so
+ # they get a chance to update their internal state
+ for test in [mt[0] for mt in match_templates[idx + 1:]]:
+ test(event, namespaces, ctxt, updateonly=True)
+
+ # Consume and store all events until an end event
+ # corresponding to this start event is encountered
+ pre_end = idx + 1
+ if 'match_once' not in hints and 'not_recursive' in hints:
+ pre_end -= 1
+ inner = _strip(stream)
+ if pre_end > 0:
+ inner = self._match(inner, ctxt, start=start,
+ end=pre_end, **vars)
+ content = self._include(chain([event], inner, tail), ctxt)
+ if 'not_buffered' not in hints:
+ content = list(content)
+ content = Stream(content)
+
+ # Make the select() function available in the body of the
+ # match template
+ selected = [False]
+ def select(path):
+ selected[0] = True
+ return content.select(path, namespaces, ctxt)
+ vars = dict(select=select)
+
+ # Recursively process the output
+ template = _apply_directives(template, directives, ctxt,
+ vars)
+ for event in self._match(self._flatten(template, ctxt,
+ **vars),
+ ctxt, start=idx + 1, **vars):
+ yield event
+
+ # If the match template did not actually call select to
+ # consume the matched stream, the original events need to
+ # be consumed here or they'll get appended to the output
+ if not selected[0]:
+ for event in content:
+ pass
+
+ # Let the remaining match templates know about the last
+ # event in the matched content, so they can update their
+ # internal state accordingly
+ for test in [mt[0] for mt in match_templates[idx + 1:]]:
+ test(tail[0], namespaces, ctxt, updateonly=True)
+
+ break
+
+ else: # no matches
+ yield event
diff --git a/genshi/template/plugin.py b/genshi/template/plugin.py
new file mode 100644
index 0000000..70d56af
--- /dev/null
+++ b/genshi/template/plugin.py
@@ -0,0 +1,176 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006-2009 Edgewall Software
+# Copyright (C) 2006 Matthew Good
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""Basic support for the template engine plugin API used by TurboGears and
+CherryPy/Buffet.
+"""
+
+from genshi.input import ET, HTML, XML
+from genshi.output import DocType
+from genshi.template.base import Template
+from genshi.template.loader import TemplateLoader
+from genshi.template.markup import MarkupTemplate
+from genshi.template.text import TextTemplate, NewTextTemplate
+
+__all__ = ['ConfigurationError', 'AbstractTemplateEnginePlugin',
+ 'MarkupTemplateEnginePlugin', 'TextTemplateEnginePlugin']
+__docformat__ = 'restructuredtext en'
+
+
+class ConfigurationError(ValueError):
+ """Exception raised when invalid plugin options are encountered."""
+
+
+class AbstractTemplateEnginePlugin(object):
+ """Implementation of the plugin API."""
+
+ template_class = None
+ extension = None
+
+ def __init__(self, extra_vars_func=None, options=None):
+ self.get_extra_vars = extra_vars_func
+ if options is None:
+ options = {}
+ self.options = options
+
+ self.default_encoding = options.get('genshi.default_encoding', 'utf-8')
+ auto_reload = options.get('genshi.auto_reload', '1')
+ if isinstance(auto_reload, basestring):
+ auto_reload = auto_reload.lower() in ('1', 'on', 'yes', 'true')
+ search_path = [p for p in
+ options.get('genshi.search_path', '').split(':') if p]
+ self.use_package_naming = not search_path
+ try:
+ max_cache_size = int(options.get('genshi.max_cache_size', 25))
+ except ValueError:
+ raise ConfigurationError('Invalid value for max_cache_size: "%s"' %
+ options.get('genshi.max_cache_size'))
+
+ loader_callback = options.get('genshi.loader_callback', None)
+ if loader_callback and not hasattr(loader_callback, '__call__'):
+ raise ConfigurationError('loader callback must be a function')
+
+ lookup_errors = options.get('genshi.lookup_errors', 'strict')
+ if lookup_errors not in ('lenient', 'strict'):
+ raise ConfigurationError('Unknown lookup errors mode "%s"' %
+ lookup_errors)
+
+ try:
+ allow_exec = bool(options.get('genshi.allow_exec', True))
+ except ValueError:
+ raise ConfigurationError('Invalid value for allow_exec "%s"' %
+ options.get('genshi.allow_exec'))
+
+ self.loader = TemplateLoader([p for p in search_path if p],
+ auto_reload=auto_reload,
+ max_cache_size=max_cache_size,
+ default_class=self.template_class,
+ variable_lookup=lookup_errors,
+ allow_exec=allow_exec,
+ callback=loader_callback)
+
+ def load_template(self, templatename, template_string=None):
+ """Find a template specified in python 'dot' notation, or load one from
+ a string.
+ """
+ if template_string is not None:
+ return self.template_class(template_string)
+
+ if self.use_package_naming:
+ divider = templatename.rfind('.')
+ if divider >= 0:
+ from pkg_resources import resource_filename
+ package = templatename[:divider]
+ basename = templatename[divider + 1:] + self.extension
+ templatename = resource_filename(package, basename)
+
+ return self.loader.load(templatename)
+
+ def _get_render_options(self, format=None, fragment=False):
+ if format is None:
+ format = self.default_format
+ kwargs = {'method': format}
+ if self.default_encoding:
+ kwargs['encoding'] = self.default_encoding
+ return kwargs
+
+ def render(self, info, format=None, fragment=False, template=None):
+ """Render the template to a string using the provided info."""
+ kwargs = self._get_render_options(format=format, fragment=fragment)
+ return self.transform(info, template).render(**kwargs)
+
+ def transform(self, info, template):
+ """Render the output to an event stream."""
+ if not isinstance(template, Template):
+ template = self.load_template(template)
+ return template.generate(**info)
+
+
+class MarkupTemplateEnginePlugin(AbstractTemplateEnginePlugin):
+ """Implementation of the plugin API for markup templates."""
+
+ template_class = MarkupTemplate
+ extension = '.html'
+
+ def __init__(self, extra_vars_func=None, options=None):
+ AbstractTemplateEnginePlugin.__init__(self, extra_vars_func, options)
+
+ default_doctype = self.options.get('genshi.default_doctype')
+ if default_doctype:
+ doctype = DocType.get(default_doctype)
+ if doctype is None:
+ raise ConfigurationError('Unknown doctype %r' % default_doctype)
+ self.default_doctype = doctype
+ else:
+ self.default_doctype = None
+
+ format = self.options.get('genshi.default_format', 'html').lower()
+ if format not in ('html', 'xhtml', 'xml', 'text'):
+ raise ConfigurationError('Unknown output format %r' % format)
+ self.default_format = format
+
+ def _get_render_options(self, format=None, fragment=False):
+ kwargs = super(MarkupTemplateEnginePlugin,
+ self)._get_render_options(format, fragment)
+ if self.default_doctype and not fragment:
+ kwargs['doctype'] = self.default_doctype
+ return kwargs
+
+ def transform(self, info, template):
+ """Render the output to an event stream."""
+ data = {'ET': ET, 'HTML': HTML, 'XML': XML}
+ if self.get_extra_vars:
+ data.update(self.get_extra_vars())
+ data.update(info)
+ return super(MarkupTemplateEnginePlugin, self).transform(data, template)
+
+
+class TextTemplateEnginePlugin(AbstractTemplateEnginePlugin):
+ """Implementation of the plugin API for text templates."""
+
+ template_class = TextTemplate
+ extension = '.txt'
+ default_format = 'text'
+
+ def __init__(self, extra_vars_func=None, options=None):
+ if options is None:
+ options = {}
+
+ new_syntax = options.get('genshi.new_text_syntax')
+ if isinstance(new_syntax, basestring):
+ new_syntax = new_syntax.lower() in ('1', 'on', 'yes', 'true')
+ if new_syntax:
+ self.template_class = NewTextTemplate
+
+ AbstractTemplateEnginePlugin.__init__(self, extra_vars_func, options)
diff --git a/genshi/template/text.py b/genshi/template/text.py
new file mode 100644
index 0000000..746226c
--- /dev/null
+++ b/genshi/template/text.py
@@ -0,0 +1,333 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006-2009 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""Plain text templating engine.
+
+This module implements two template language syntaxes, at least for a certain
+transitional period. `OldTextTemplate` (aliased to just `TextTemplate`) defines
+a syntax that was inspired by Cheetah/Velocity. `NewTextTemplate` on the other
+hand is inspired by the syntax of the Django template language, which has more
+explicit delimiting of directives, and is more flexible with regards to
+white space and line breaks.
+
+In a future release, `OldTextTemplate` will be phased out in favor of
+`NewTextTemplate`, as the names imply. Therefore the new syntax is strongly
+recommended for new projects, and existing projects may want to migrate to the
+new syntax to remain compatible with future Genshi releases.
+"""
+
+import re
+
+from genshi.core import TEXT
+from genshi.template.base import BadDirectiveError, Template, \
+ TemplateSyntaxError, EXEC, INCLUDE, SUB
+from genshi.template.eval import Suite
+from genshi.template.directives import *
+from genshi.template.directives import Directive
+from genshi.template.interpolation import interpolate
+
+__all__ = ['NewTextTemplate', 'OldTextTemplate', 'TextTemplate']
+__docformat__ = 'restructuredtext en'
+
+
+class NewTextTemplate(Template):
+ r"""Implementation of a simple text-based template engine. This class will
+ replace `OldTextTemplate` in a future release.
+
+ It uses a more explicit delimiting style for directives: instead of the old
+ style which required putting directives on separate lines that were prefixed
+ with a ``#`` sign, directives and commenbtsr are enclosed in delimiter pairs
+ (by default ``{% ... %}`` and ``{# ... #}``, respectively).
+
+ Variable substitution uses the same interpolation syntax as for markup
+ languages: simple references are prefixed with a dollar sign, more complex
+ expression enclosed in curly braces.
+
+ >>> tmpl = NewTextTemplate('''Dear $name,
+ ...
+ ... {# This is a comment #}
+ ... We have the following items for you:
+ ... {% for item in items %}
+ ... * ${'Item %d' % item}
+ ... {% end %}
+ ... ''')
+ >>> print(tmpl.generate(name='Joe', items=[1, 2, 3]).render(encoding=None))
+ Dear Joe,
+ <BLANKLINE>
+ <BLANKLINE>
+ We have the following items for you:
+ <BLANKLINE>
+ * Item 1
+ <BLANKLINE>
+ * Item 2
+ <BLANKLINE>
+ * Item 3
+ <BLANKLINE>
+ <BLANKLINE>
+
+ By default, no spaces or line breaks are removed. If a line break should
+ not be included in the output, prefix it with a backslash:
+
+ >>> tmpl = NewTextTemplate('''Dear $name,
+ ...
+ ... {# This is a comment #}\
+ ... We have the following items for you:
+ ... {% for item in items %}\
+ ... * $item
+ ... {% end %}\
+ ... ''')
+ >>> print(tmpl.generate(name='Joe', items=[1, 2, 3]).render(encoding=None))
+ Dear Joe,
+ <BLANKLINE>
+ We have the following items for you:
+ * 1
+ * 2
+ * 3
+ <BLANKLINE>
+
+ Backslashes are also used to escape the start delimiter of directives and
+ comments:
+
+ >>> tmpl = NewTextTemplate('''Dear $name,
+ ...
+ ... \{# This is a comment #}
+ ... We have the following items for you:
+ ... {% for item in items %}\
+ ... * $item
+ ... {% end %}\
+ ... ''')
+ >>> print(tmpl.generate(name='Joe', items=[1, 2, 3]).render(encoding=None))
+ Dear Joe,
+ <BLANKLINE>
+ {# This is a comment #}
+ We have the following items for you:
+ * 1
+ * 2
+ * 3
+ <BLANKLINE>
+
+ :since: version 0.5
+ """
+ directives = [('def', DefDirective),
+ ('when', WhenDirective),
+ ('otherwise', OtherwiseDirective),
+ ('for', ForDirective),
+ ('if', IfDirective),
+ ('choose', ChooseDirective),
+ ('with', WithDirective)]
+ serializer = 'text'
+
+ _DIRECTIVE_RE = r'((?<!\\)%s\s*(\w+)\s*(.*?)\s*%s|(?<!\\)%s.*?%s)'
+ _ESCAPE_RE = r'\\\n|\\(\\)|\\(%s)|\\(%s)'
+
+ def __init__(self, source, filepath=None, filename=None, loader=None,
+ encoding=None, lookup='strict', allow_exec=False,
+ delims=('{%', '%}', '{#', '#}')):
+ self.delimiters = delims
+ Template.__init__(self, source, filepath=filepath, filename=filename,
+ loader=loader, encoding=encoding, lookup=lookup)
+
+ def _get_delims(self):
+ return self._delims
+ def _set_delims(self, delims):
+ if len(delims) != 4:
+ raise ValueError('delimiers tuple must have exactly four elements')
+ self._delims = delims
+ self._directive_re = re.compile(self._DIRECTIVE_RE % tuple(
+ [re.escape(d) for d in delims]
+ ), re.DOTALL)
+ self._escape_re = re.compile(self._ESCAPE_RE % tuple(
+ [re.escape(d) for d in delims[::2]]
+ ))
+ delimiters = property(_get_delims, _set_delims, """\
+ The delimiters for directives and comments. This should be a four item tuple
+ of the form ``(directive_start, directive_end, comment_start,
+ comment_end)``, where each item is a string.
+ """)
+
+ def _parse(self, source, encoding):
+ """Parse the template from text input."""
+ stream = [] # list of events of the "compiled" template
+ dirmap = {} # temporary mapping of directives to elements
+ depth = 0
+
+ source = source.read()
+ if isinstance(source, str):
+ source = source.decode(encoding or 'utf-8', 'replace')
+ offset = 0
+ lineno = 1
+
+ _escape_sub = self._escape_re.sub
+ def _escape_repl(mo):
+ groups = [g for g in mo.groups() if g]
+ if not groups:
+ return ''
+ return groups[0]
+
+ for idx, mo in enumerate(self._directive_re.finditer(source)):
+ start, end = mo.span(1)
+ if start > offset:
+ text = _escape_sub(_escape_repl, source[offset:start])
+ for kind, data, pos in interpolate(text, self.filepath, lineno,
+ lookup=self.lookup):
+ stream.append((kind, data, pos))
+ lineno += len(text.splitlines())
+
+ lineno += len(source[start:end].splitlines())
+ command, value = mo.group(2, 3)
+
+ if command == 'include':
+ pos = (self.filename, lineno, 0)
+ value = list(interpolate(value, self.filepath, lineno, 0,
+ lookup=self.lookup))
+ if len(value) == 1 and value[0][0] is TEXT:
+ value = value[0][1]
+ stream.append((INCLUDE, (value, None, []), pos))
+
+ elif command == 'python':
+ if not self.allow_exec:
+ raise TemplateSyntaxError('Python code blocks not allowed',
+ self.filepath, lineno)
+ try:
+ suite = Suite(value, self.filepath, lineno,
+ lookup=self.lookup)
+ except SyntaxError, err:
+ raise TemplateSyntaxError(err, self.filepath,
+ lineno + (err.lineno or 1) - 1)
+ pos = (self.filename, lineno, 0)
+ stream.append((EXEC, suite, pos))
+
+ elif command == 'end':
+ depth -= 1
+ if depth in dirmap:
+ directive, start_offset = dirmap.pop(depth)
+ substream = stream[start_offset:]
+ stream[start_offset:] = [(SUB, ([directive], substream),
+ (self.filepath, lineno, 0))]
+
+ elif command:
+ cls = self.get_directive(command)
+ if cls is None:
+ raise BadDirectiveError(command)
+ directive = 0, cls, value, None, (self.filepath, lineno, 0)
+ dirmap[depth] = (directive, len(stream))
+ depth += 1
+
+ offset = end
+
+ if offset < len(source):
+ text = _escape_sub(_escape_repl, source[offset:])
+ for kind, data, pos in interpolate(text, self.filepath, lineno,
+ lookup=self.lookup):
+ stream.append((kind, data, pos))
+
+ return stream
+
+
+class OldTextTemplate(Template):
+ """Legacy implementation of the old syntax text-based templates. This class
+ is provided in a transition phase for backwards compatibility. New code
+ should use the `NewTextTemplate` class and the improved syntax it provides.
+
+ >>> tmpl = OldTextTemplate('''Dear $name,
+ ...
+ ... We have the following items for you:
+ ... #for item in items
+ ... * $item
+ ... #end
+ ...
+ ... All the best,
+ ... Foobar''')
+ >>> print(tmpl.generate(name='Joe', items=[1, 2, 3]).render(encoding=None))
+ Dear Joe,
+ <BLANKLINE>
+ We have the following items for you:
+ * 1
+ * 2
+ * 3
+ <BLANKLINE>
+ All the best,
+ Foobar
+ """
+ directives = [('def', DefDirective),
+ ('when', WhenDirective),
+ ('otherwise', OtherwiseDirective),
+ ('for', ForDirective),
+ ('if', IfDirective),
+ ('choose', ChooseDirective),
+ ('with', WithDirective)]
+ serializer = 'text'
+
+ _DIRECTIVE_RE = re.compile(r'(?:^[ \t]*(?<!\\)#(end).*\n?)|'
+ r'(?:^[ \t]*(?<!\\)#((?:\w+|#).*)\n?)',
+ re.MULTILINE)
+
+ def _parse(self, source, encoding):
+ """Parse the template from text input."""
+ stream = [] # list of events of the "compiled" template
+ dirmap = {} # temporary mapping of directives to elements
+ depth = 0
+
+ source = source.read()
+ if isinstance(source, str):
+ source = source.decode(encoding or 'utf-8', 'replace')
+ offset = 0
+ lineno = 1
+
+ for idx, mo in enumerate(self._DIRECTIVE_RE.finditer(source)):
+ start, end = mo.span()
+ if start > offset:
+ text = source[offset:start]
+ for kind, data, pos in interpolate(text, self.filepath, lineno,
+ lookup=self.lookup):
+ stream.append((kind, data, pos))
+ lineno += len(text.splitlines())
+
+ text = source[start:end].lstrip()[1:]
+ lineno += len(text.splitlines())
+ directive = text.split(None, 1)
+ if len(directive) > 1:
+ command, value = directive
+ else:
+ command, value = directive[0], None
+
+ if command == 'end':
+ depth -= 1
+ if depth in dirmap:
+ directive, start_offset = dirmap.pop(depth)
+ substream = stream[start_offset:]
+ stream[start_offset:] = [(SUB, ([directive], substream),
+ (self.filepath, lineno, 0))]
+ elif command == 'include':
+ pos = (self.filename, lineno, 0)
+ stream.append((INCLUDE, (value.strip(), None, []), pos))
+ elif command != '#':
+ cls = self.get_directive(command)
+ if cls is None:
+ raise BadDirectiveError(command)
+ directive = 0, cls, value, None, (self.filepath, lineno, 0)
+ dirmap[depth] = (directive, len(stream))
+ depth += 1
+
+ offset = end
+
+ if offset < len(source):
+ text = source[offset:].replace('\\#', '#')
+ for kind, data, pos in interpolate(text, self.filepath, lineno,
+ lookup=self.lookup):
+ stream.append((kind, data, pos))
+
+ return stream
+
+
+TextTemplate = OldTextTemplate
diff --git a/genshi/util.py b/genshi/util.py
new file mode 100644
index 0000000..b964a01
--- /dev/null
+++ b/genshi/util.py
@@ -0,0 +1,274 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006-2009 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""Various utility classes and functions."""
+
+import htmlentitydefs as entities
+import re
+
+__docformat__ = 'restructuredtext en'
+
+
+class LRUCache(dict):
+ """A dictionary-like object that stores only a certain number of items, and
+ discards its least recently used item when full.
+
+ >>> cache = LRUCache(3)
+ >>> cache['A'] = 0
+ >>> cache['B'] = 1
+ >>> cache['C'] = 2
+ >>> len(cache)
+ 3
+
+ >>> cache['A']
+ 0
+
+ Adding new items to the cache does not increase its size. Instead, the least
+ recently used item is dropped:
+
+ >>> cache['D'] = 3
+ >>> len(cache)
+ 3
+ >>> 'B' in cache
+ False
+
+ Iterating over the cache returns the keys, starting with the most recently
+ used:
+
+ >>> for key in cache:
+ ... print(key)
+ D
+ A
+ C
+
+ This code is based on the LRUCache class from ``myghtyutils.util``, written
+ by Mike Bayer and released under the MIT license. See:
+
+ http://svn.myghty.org/myghtyutils/trunk/lib/myghtyutils/util.py
+ """
+
+ class _Item(object):
+ def __init__(self, key, value):
+ self.prv = self.nxt = None
+ self.key = key
+ self.value = value
+ def __repr__(self):
+ return repr(self.value)
+
+ def __init__(self, capacity):
+ self._dict = dict()
+ self.capacity = capacity
+ self.head = None
+ self.tail = None
+
+ def __contains__(self, key):
+ return key in self._dict
+
+ def __iter__(self):
+ cur = self.head
+ while cur:
+ yield cur.key
+ cur = cur.nxt
+
+ def __len__(self):
+ return len(self._dict)
+
+ def __getitem__(self, key):
+ item = self._dict[key]
+ self._update_item(item)
+ return item.value
+
+ def __setitem__(self, key, value):
+ item = self._dict.get(key)
+ if item is None:
+ item = self._Item(key, value)
+ self._dict[key] = item
+ self._insert_item(item)
+ else:
+ item.value = value
+ self._update_item(item)
+ self._manage_size()
+
+ def __repr__(self):
+ return repr(self._dict)
+
+ def _insert_item(self, item):
+ item.prv = None
+ item.nxt = self.head
+ if self.head is not None:
+ self.head.prv = item
+ else:
+ self.tail = item
+ self.head = item
+ self._manage_size()
+
+ def _manage_size(self):
+ while len(self._dict) > self.capacity:
+ olditem = self._dict[self.tail.key]
+ del self._dict[self.tail.key]
+ if self.tail != self.head:
+ self.tail = self.tail.prv
+ self.tail.nxt = None
+ else:
+ self.head = self.tail = None
+
+ def _update_item(self, item):
+ if self.head == item:
+ return
+
+ prv = item.prv
+ prv.nxt = item.nxt
+ if item.nxt is not None:
+ item.nxt.prv = prv
+ else:
+ self.tail = prv
+
+ item.prv = None
+ item.nxt = self.head
+ self.head.prv = self.head = item
+
+
+def flatten(items):
+ """Flattens a potentially nested sequence into a flat list.
+
+ :param items: the sequence to flatten
+
+ >>> flatten((1, 2))
+ [1, 2]
+ >>> flatten([1, (2, 3), 4])
+ [1, 2, 3, 4]
+ >>> flatten([1, (2, [3, 4]), 5])
+ [1, 2, 3, 4, 5]
+ """
+ retval = []
+ for item in items:
+ if isinstance(item, (frozenset, list, set, tuple)):
+ retval += flatten(item)
+ else:
+ retval.append(item)
+ return retval
+
+
+def plaintext(text, keeplinebreaks=True):
+ """Return the text with all entities and tags removed.
+
+ >>> plaintext('<b>1 &lt; 2</b>')
+ u'1 < 2'
+
+ The `keeplinebreaks` parameter can be set to ``False`` to replace any line
+ breaks by simple spaces:
+
+ >>> plaintext('''<b>1
+ ... &lt;
+ ... 2</b>''', keeplinebreaks=False)
+ u'1 < 2'
+
+ :param text: the text to convert to plain text
+ :param keeplinebreaks: whether line breaks in the text should be kept intact
+ :return: the text with tags and entities removed
+ """
+ text = stripentities(striptags(text))
+ if not keeplinebreaks:
+ text = text.replace('\n', ' ')
+ return text
+
+
+_STRIPENTITIES_RE = re.compile(r'&(?:#((?:\d+)|(?:[xX][0-9a-fA-F]+));?|(\w+);)')
+def stripentities(text, keepxmlentities=False):
+ """Return a copy of the given text with any character or numeric entities
+ replaced by the equivalent UTF-8 characters.
+
+ >>> stripentities('1 &lt; 2')
+ u'1 < 2'
+ >>> stripentities('more &hellip;')
+ u'more \u2026'
+ >>> stripentities('&#8230;')
+ u'\u2026'
+ >>> stripentities('&#x2026;')
+ u'\u2026'
+
+ If the `keepxmlentities` parameter is provided and is a truth value, the
+ core XML entities (&amp;, &apos;, &gt;, &lt; and &quot;) are left intact.
+
+ >>> stripentities('1 &lt; 2 &hellip;', keepxmlentities=True)
+ u'1 &lt; 2 \u2026'
+ """
+ def _replace_entity(match):
+ if match.group(1): # numeric entity
+ ref = match.group(1)
+ if ref.startswith('x'):
+ ref = int(ref[1:], 16)
+ else:
+ ref = int(ref, 10)
+ return unichr(ref)
+ else: # character entity
+ ref = match.group(2)
+ if keepxmlentities and ref in ('amp', 'apos', 'gt', 'lt', 'quot'):
+ return '&%s;' % ref
+ try:
+ return unichr(entities.name2codepoint[ref])
+ except KeyError:
+ if keepxmlentities:
+ return '&amp;%s;' % ref
+ else:
+ return ref
+ return _STRIPENTITIES_RE.sub(_replace_entity, text)
+
+
+_STRIPTAGS_RE = re.compile(r'(<!--.*?-->|<[^>]*>)')
+def striptags(text):
+ """Return a copy of the text with any XML/HTML tags removed.
+
+ >>> striptags('<span>Foo</span> bar')
+ 'Foo bar'
+ >>> striptags('<span class="bar">Foo</span>')
+ 'Foo'
+ >>> striptags('Foo<br />')
+ 'Foo'
+
+ HTML/XML comments are stripped, too:
+
+ >>> striptags('<!-- <blub>hehe</blah> -->test')
+ 'test'
+
+ :param text: the string to remove tags from
+ :return: the text with tags removed
+ """
+ return _STRIPTAGS_RE.sub('', text)
+
+
+def stringrepr(string):
+ ascii = string.encode('ascii', 'backslashreplace')
+ quoted = "'" + ascii.replace("'", "\\'") + "'"
+ if len(ascii) > len(string):
+ return 'u' + quoted
+ return quoted
+
+
+# Compatibility fallback implementations for older Python versions
+
+try:
+ all = all
+ any = any
+except NameError:
+ def any(S):
+ for x in S:
+ if x:
+ return True
+ return False
+
+ def all(S):
+ for x in S:
+ if not x:
+ return False
+ return True