23 files changed, 12365 insertions, 0 deletions
diff --git a/genshi/__init__.py b/genshi/__init__.py
new file mode 100644
index 0000000..02f4347
--- /dev/null
+++ b/genshi/__init__.py
@@ -0,0 +1,26 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006-2009 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""This package provides various means for generating and processing web markup
+(XML or HTML).
+
+The design is centered around the concept of streams of markup events (similar
+in concept to SAX parsing events) which can be processed in a uniform manner
+independently of where or how they are produced.
+"""
+
+__docformat__ = 'restructuredtext en'
+__version__ = '0.6'
+
+from genshi.core import *
+from genshi.input import ParseError, XML, HTML
diff --git a/genshi/builder.py b/genshi/builder.py
new file mode 100644
index 0000000..724e364
--- /dev/null
+++ b/genshi/builder.py
@@ -0,0 +1,359 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006-2009 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""Support for programmatically generating markup streams from Python code using
+a very simple syntax. The main entry point to this module is the `tag` object
+(which is actually an instance of the ``ElementFactory`` class). You should
+rarely (if ever) need to directly import and use any of the other classes in
+this module.
+
+Elements can be created using the `tag` object using attribute access. For
+example:
+
+>>> doc = tag.p('Some text and ', tag.a('a link', href='http://example.org/'), '.')
+>>> doc
+<Element "p">
+
+This produces an `Element` instance which can be further modified to add child
+nodes and attributes. This is done by "calling" the element: positional
+arguments are added as child nodes (alternatively, the `Element.append` method
+can be used for that purpose), whereas keywords arguments are added as
+attributes:
+
+>>> doc(tag.br)
+<Element "p">
+>>> print(doc)
+<p>Some text and <a href="http://example.org/">a link</a>.<br/></p>
+
+If an attribute name collides with a Python keyword, simply append an underscore
+to the name:
+
+>>> doc(class_='intro')
+<Element "p">
+>>> print(doc)
+<p class="intro">Some text and <a href="http://example.org/">a link</a>.<br/></p>
+
+As shown above, an `Element` can easily be directly rendered to XML text by
+printing it or using the Python ``str()`` function. This is basically a
+shortcut for converting the `Element` to a stream and serializing that
+stream:
+
+>>> stream = doc.generate()
+>>> stream #doctest: +ELLIPSIS
+<genshi.core.Stream object at ...>
+>>> print(stream)
+<p class="intro">Some text and <a href="http://example.org/">a link</a>.<br/></p>
+
+
+The `tag` object also allows creating "fragments", which are basically lists
+of nodes (elements or text) that don't have a parent element. This can be useful
+for creating snippets of markup that are attached to a parent element later (for
+example in a template). Fragments are created by calling the `tag` object, which
+returns an object of type `Fragment`:
+
+>>> fragment = tag('Hello, ', tag.em('world'), '!')
+>>> fragment
+<Fragment>
+>>> print(fragment)
+Hello, <em>world</em>!
+"""
+
+from genshi.core import Attrs, Markup, Namespace, QName, Stream, \
+                        START, END, TEXT
+
+__all__ = ['Fragment', 'Element', 'ElementFactory', 'tag']
+__docformat__ = 'restructuredtext en'
+
+
+class Fragment(object):
+    """Represents a markup fragment, which is basically just a list of element
+    or text nodes.
+    """
+    __slots__ = ['children']
+
+    def __init__(self):
+        """Create a new fragment."""
+        self.children = []
+
+    def __add__(self, other):
+        return Fragment()(self, other)
+
+    def __call__(self, *args):
+        """Append any positional arguments as child nodes.
+        
+        :see: `append`
+        """
+        for arg in args:
+            self.append(arg)
+        return self
+
+    def __iter__(self):
+        return self._generate()
+
+    def __repr__(self):
+        return '<%s>' % type(self).__name__
+
+    def __str__(self):
+        return str(self.generate())
+
+    def __unicode__(self):
+        return unicode(self.generate())
+
+    def __html__(self):
+        return Markup(self.generate())
+
+    def append(self, node):
+        """Append an element or string as child node.
+        
+        :param node: the node to append; can be an `Element`, `Fragment`, or a
+                     `Stream`, or a Python string or number
+        """
+        if isinstance(node, (Stream, Element, basestring, int, float, long)):
+            # For objects of a known/primitive type, we avoid the check for
+            # whether it is iterable for better performance
+            self.children.append(node)
+        elif isinstance(node, Fragment):
+            self.children.extend(node.children)
+        elif node is not None:
+            try:
+                for child in node:
+                    self.append(child)
+            except TypeError:
+                self.children.append(node)
+
+    def _generate(self):
+        for child in self.children:
+            if isinstance(child, Fragment):
+                for event in child._generate():
+                    yield event
+            elif isinstance(child, Stream):
+                for event in child:
+                    yield event
+            else:
+                if not isinstance(child, basestring):
+                    child = unicode(child)
+                yield TEXT, child, (None, -1, -1)
+
+    def generate(self):
+        """Return a markup event stream for the fragment.
+        
+        :rtype: `Stream`
+        """
+        return Stream(self._generate())
+
+
+def _kwargs_to_attrs(kwargs):
+    attrs = []
+    names = set()
+    for name, value in kwargs.items():
+        name = name.rstrip('_').replace('_', '-')
+        if value is not None and name not in names:
+            attrs.append((QName(name), unicode(value)))
+            names.add(name)
+    return Attrs(attrs)
+
+
+class Element(Fragment):
+    """Simple XML output generator based on the builder pattern.
+
+    Construct XML elements by passing the tag name to the constructor:
+
+    >>> print(Element('strong'))
+    <strong/>
+
+    Attributes can be specified using keyword arguments. The values of the
+    arguments will be converted to strings and any special XML characters
+    escaped:
+
+    >>> print(Element('textarea', rows=10, cols=60))
+    <textarea rows="10" cols="60"/>
+    >>> print(Element('span', title='1 < 2'))
+    <span title="1 &lt; 2"/>
+    >>> print(Element('span', title='"baz"'))
+    <span title="&#34;baz&#34;"/>
+
+    The " character is escaped using a numerical entity.
+    The order in which attributes are rendered is undefined.
+
+    If an attribute value evaluates to `None`, that attribute is not included
+    in the output:
+
+    >>> print(Element('a', name=None))
+    <a/>
+
+    Attribute names that conflict with Python keywords can be specified by
+    appending an underscore:
+
+    >>> print(Element('div', class_='warning'))
+    <div class="warning"/>
+
+    Nested elements can be added to an element using item access notation.
+    The call notation can also be used for this and for adding attributes
+    using keyword arguments, as one would do in the constructor.
+
+    >>> print(Element('ul')(Element('li'), Element('li')))
+    <ul><li/><li/></ul>
+    >>> print(Element('a')('Label'))
+    <a>Label</a>
+    >>> print(Element('a')('Label', href="target"))
+    <a href="target">Label</a>
+
+    Text nodes can be nested in an element by adding strings instead of
+    elements. Any special characters in the strings are escaped automatically:
+
+    >>> print(Element('em')('Hello world'))
+    <em>Hello world</em>
+    >>> print(Element('em')(42))
+    <em>42</em>
+    >>> print(Element('em')('1 < 2'))
+    <em>1 &lt; 2</em>
+
+    This technique also allows mixed content:
+
+    >>> print(Element('p')('Hello ', Element('b')('world')))
+    <p>Hello <b>world</b></p>
+
+    Quotes are not escaped inside text nodes:
+    >>> print(Element('p')('"Hello"'))
+    <p>"Hello"</p>
+
+    Elements can also be combined with other elements or strings using the
+    addition operator, which results in a `Fragment` object that contains the
+    operands:
+    
+    >>> print(Element('br') + 'some text' + Element('br'))
+    <br/>some text<br/>
+    
+    Elements with a namespace can be generated using the `Namespace` and/or
+    `QName` classes:
+    
+    >>> from genshi.core import Namespace
+    >>> xhtml = Namespace('http://www.w3.org/1999/xhtml')
+    >>> print(Element(xhtml.html, lang='en'))
+    <html xmlns="http://www.w3.org/1999/xhtml" lang="en"/>
+    """
+    __slots__ = ['tag', 'attrib']
+
+    def __init__(self, tag_, **attrib):
+        Fragment.__init__(self)
+        self.tag = QName(tag_)
+        self.attrib = _kwargs_to_attrs(attrib)
+
+    def __call__(self, *args, **kwargs):
+        """Append any positional arguments as child nodes, and keyword arguments
+        as attributes.
+        
+        :return: the element itself so that calls can be chained
+        :rtype: `Element`
+        :see: `Fragment.append`
+        """
+        self.attrib |= _kwargs_to_attrs(kwargs)
+        Fragment.__call__(self, *args)
+        return self
+
+    def __repr__(self):
+        return '<%s "%s">' % (type(self).__name__, self.tag)
+
+    def _generate(self):
+        yield START, (self.tag, self.attrib), (None, -1, -1)
+        for kind, data, pos in Fragment._generate(self):
+            yield kind, data, pos
+        yield END, self.tag, (None, -1, -1)
+
+    def generate(self):
+        """Return a markup event stream for the fragment.
+        
+        :rtype: `Stream`
+        """
+        return Stream(self._generate())
+
+
+class ElementFactory(object):
+    """Factory for `Element` objects.
+    
+    A new element is created simply by accessing a correspondingly named
+    attribute of the factory object:
+    
+    >>> factory = ElementFactory()
+    >>> print(factory.foo)
+    <foo/>
+    >>> print(factory.foo(id=2))
+    <foo id="2"/>
+    
+    Markup fragments (lists of nodes without a parent element) can be created
+    by calling the factory:
+    
+    >>> print(factory('Hello, ', factory.em('world'), '!'))
+    Hello, <em>world</em>!
+    
+    A factory can also be bound to a specific namespace:
+    
+    >>> factory = ElementFactory('http://www.w3.org/1999/xhtml')
+    >>> print(factory.html(lang="en"))
+    <html xmlns="http://www.w3.org/1999/xhtml" lang="en"/>
+    
+    The namespace for a specific element can be altered on an existing factory
+    by specifying the new namespace using item access:
+    
+    >>> factory = ElementFactory()
+    >>> print(factory.html(factory['http://www.w3.org/2000/svg'].g(id=3)))
+    <html><g xmlns="http://www.w3.org/2000/svg" id="3"/></html>
+    
+    Usually, the `ElementFactory` class is not be used directly. Rather, the
+    `tag` instance should be used to create elements.
+    """
+
+    def __init__(self, namespace=None):
+        """Create the factory, optionally bound to the given namespace.
+        
+        :param namespace: the namespace URI for any created elements, or `None`
+                          for no namespace
+        """
+        if namespace and not isinstance(namespace, Namespace):
+            namespace = Namespace(namespace)
+        self.namespace = namespace
+
+    def __call__(self, *args):
+        """Create a fragment that has the given positional arguments as child
+        nodes.
+
+        :return: the created `Fragment`
+        :rtype: `Fragment`
+        """
+        return Fragment()(*args)
+
+    def __getitem__(self, namespace):
+        """Return a new factory that is bound to the specified namespace.
+        
+        :param namespace: the namespace URI or `Namespace` object
+        :return: an `ElementFactory` that produces elements bound to the given
+                 namespace
+        :rtype: `ElementFactory`
+        """
+        return ElementFactory(namespace)
+
+    def __getattr__(self, name):
+        """Create an `Element` with the given name.
+        
+        :param name: the tag name of the element to create
+        :return: an `Element` with the specified name
+        :rtype: `Element`
+        """
+        return Element(self.namespace and self.namespace[name] or name)
+
+
+tag = ElementFactory()
+"""Global `ElementFactory` bound to the default namespace.
+
+:type: `ElementFactory`
+"""
diff --git a/genshi/core.py b/genshi/core.py
new file mode 100644
index 0000000..f7cddff
--- /dev/null
+++ b/genshi/core.py
@@ -0,0 +1,727 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006-2009 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""Core classes for markup processing."""
+
+try:
+    reduce # builtin in Python < 3
+except NameError:
+    from functools import reduce
+from itertools import chain
+import operator
+
+from genshi.util import plaintext, stripentities, striptags, stringrepr
+
+__all__ = ['Stream', 'Markup', 'escape', 'unescape', 'Attrs', 'Namespace',
+           'QName']
+__docformat__ = 'restructuredtext en'
+
+
+class StreamEventKind(str):
+    """A kind of event on a markup stream."""
+    __slots__ = []
+    _instances = {}
+
+    def __new__(cls, val):
+        return cls._instances.setdefault(val, str.__new__(cls, val))
+
+
+class Stream(object):
+    """Represents a stream of markup events.
+    
+    This class is basically an iterator over the events.
+    
+    Stream events are tuples of the form::
+    
+      (kind, data, position)
+    
+    where ``kind`` is the event kind (such as `START`, `END`, `TEXT`, etc),
+    ``data`` depends on the kind of event, and ``position`` is a
+    ``(filename, line, offset)`` tuple that contains the location of the
+    original element or text in the input. If the original location is unknown,
+    ``position`` is ``(None, -1, -1)``.
+    
+    Also provided are ways to serialize the stream to text. The `serialize()`
+    method will return an iterator over generated strings, while `render()`
+    returns the complete generated text at once. Both accept various parameters
+    that impact the way the stream is serialized.
+    """
+    __slots__ = ['events', 'serializer']
+
+    START = StreamEventKind('START') #: a start tag
+    END = StreamEventKind('END') #: an end tag
+    TEXT = StreamEventKind('TEXT') #: literal text
+    XML_DECL = StreamEventKind('XML_DECL') #: XML declaration
+    DOCTYPE = StreamEventKind('DOCTYPE') #: doctype declaration
+    START_NS = StreamEventKind('START_NS') #: start namespace mapping
+    END_NS = StreamEventKind('END_NS') #: end namespace mapping
+    START_CDATA = StreamEventKind('START_CDATA') #: start CDATA section
+    END_CDATA = StreamEventKind('END_CDATA') #: end CDATA section
+    PI = StreamEventKind('PI') #: processing instruction
+    COMMENT = StreamEventKind('COMMENT') #: comment
+
+    def __init__(self, events, serializer=None):
+        """Initialize the stream with a sequence of markup events.
+        
+        :param events: a sequence or iterable providing the events
+        :param serializer: the default serialization method to use for this
+                           stream
+
+        :note: Changed in 0.5: added the `serializer` argument
+        """
+        self.events = events #: The underlying iterable producing the events
+        self.serializer = serializer #: The default serializion method
+
+    def __iter__(self):
+        return iter(self.events)
+
+    def __or__(self, function):
+        """Override the "bitwise or" operator to apply filters or serializers
+        to the stream, providing a syntax similar to pipes on Unix shells.
+        
+        Assume the following stream produced by the `HTML` function:
+        
+        >>> from genshi.input import HTML
+        >>> html = HTML('''<p onclick="alert('Whoa')">Hello, world!</p>''')
+        >>> print(html)
+        <p onclick="alert('Whoa')">Hello, world!</p>
+        
+        A filter such as the HTML sanitizer can be applied to that stream using
+        the pipe notation as follows:
+        
+        >>> from genshi.filters import HTMLSanitizer
+        >>> sanitizer = HTMLSanitizer()
+        >>> print(html | sanitizer)
+        <p>Hello, world!</p>
+        
+        Filters can be any function that accepts and produces a stream (where
+        a stream is anything that iterates over events):
+        
+        >>> def uppercase(stream):
+        ...     for kind, data, pos in stream:
+        ...         if kind is TEXT:
+        ...             data = data.upper()
+        ...         yield kind, data, pos
+        >>> print(html | sanitizer | uppercase)
+        <p>HELLO, WORLD!</p>
+        
+        Serializers can also be used with this notation:
+        
+        >>> from genshi.output import TextSerializer
+        >>> output = TextSerializer()
+        >>> print(html | sanitizer | uppercase | output)
+        HELLO, WORLD!
+        
+        Commonly, serializers should be used at the end of the "pipeline";
+        using them somewhere in the middle may produce unexpected results.
+        
+        :param function: the callable object that should be applied as a filter
+        :return: the filtered stream
+        :rtype: `Stream`
+        """
+        return Stream(_ensure(function(self)), serializer=self.serializer)
+
+    def filter(self, *filters):
+        """Apply filters to the stream.
+        
+        This method returns a new stream with the given filters applied. The
+        filters must be callables that accept the stream object as parameter,
+        and return the filtered stream.
+        
+        The call::
+        
+            stream.filter(filter1, filter2)
+        
+        is equivalent to::
+        
+            stream | filter1 | filter2
+        
+        :param filters: one or more callable objects that should be applied as
+                        filters
+        :return: the filtered stream
+        :rtype: `Stream`
+        """
+        return reduce(operator.or_, (self,) + filters)
+
+    def render(self, method=None, encoding='utf-8', out=None, **kwargs):
+        """Return a string representation of the stream.
+        
+        Any additional keyword arguments are passed to the serializer, and thus
+        depend on the `method` parameter value.
+        
+        :param method: determines how the stream is serialized; can be either
+                       "xml", "xhtml", "html", "text", or a custom serializer
+                       class; if `None`, the default serialization method of
+                       the stream is used
+        :param encoding: how the output string should be encoded; if set to
+                         `None`, this method returns a `unicode` object
+        :param out: a file-like object that the output should be written to
+                    instead of being returned as one big string; note that if
+                    this is a file or socket (or similar), the `encoding` must
+                    not be `None` (that is, the output must be encoded)
+        :return: a `str` or `unicode` object (depending on the `encoding`
+                 parameter), or `None` if the `out` parameter is provided
+        :rtype: `basestring`
+        
+        :see: XMLSerializer, XHTMLSerializer, HTMLSerializer, TextSerializer
+        :note: Changed in 0.5: added the `out` parameter
+        """
+        from genshi.output import encode
+        if method is None:
+            method = self.serializer or 'xml'
+        generator = self.serialize(method=method, **kwargs)
+        return encode(generator, method=method, encoding=encoding, out=out)
+
+    def select(self, path, namespaces=None, variables=None):
+        """Return a new stream that contains the events matching the given
+        XPath expression.
+        
+        >>> from genshi import HTML
+        >>> stream = HTML('<doc><elem>foo</elem><elem>bar</elem></doc>')
+        >>> print(stream.select('elem'))
+        <elem>foo</elem><elem>bar</elem>
+        >>> print(stream.select('elem/text()'))
+        foobar
+        
+        Note that the outermost element of the stream becomes the *context
+        node* for the XPath test. That means that the expression "doc" would
+        not match anything in the example above, because it only tests against
+        child elements of the outermost element:
+        
+        >>> print(stream.select('doc'))
+        <BLANKLINE>
+        
+        You can use the "." expression to match the context node itself
+        (although that usually makes little sense):
+        
+        >>> print(stream.select('.'))
+        <doc><elem>foo</elem><elem>bar</elem></doc>
+        
+        :param path: a string containing the XPath expression
+        :param namespaces: mapping of namespace prefixes used in the path
+        :param variables: mapping of variable names to values
+        :return: the selected substream
+        :rtype: `Stream`
+        :raises PathSyntaxError: if the given path expression is invalid or not
+                                 supported
+        """
+        from genshi.path import Path
+        return Path(path).select(self, namespaces, variables)
+
+    def serialize(self, method='xml', **kwargs):
+        """Generate strings corresponding to a specific serialization of the
+        stream.
+        
+        Unlike the `render()` method, this method is a generator that returns
+        the serialized output incrementally, as opposed to returning a single
+        string.
+        
+        Any additional keyword arguments are passed to the serializer, and thus
+        depend on the `method` parameter value.
+        
+        :param method: determines how the stream is serialized; can be either
+                       "xml", "xhtml", "html", "text", or a custom serializer
+                       class; if `None`, the default serialization method of
+                       the stream is used
+        :return: an iterator over the serialization results (`Markup` or
+                 `unicode` objects, depending on the serialization method)
+        :rtype: ``iterator``
+        :see: XMLSerializer, XHTMLSerializer, HTMLSerializer, TextSerializer
+        """
+        from genshi.output import get_serializer
+        if method is None:
+            method = self.serializer or 'xml'
+        return get_serializer(method, **kwargs)(_ensure(self))
+
+    def __str__(self):
+        return self.render()
+
+    def __unicode__(self):
+        return self.render(encoding=None)
+
+    def __html__(self):
+        return self
+
+
+START = Stream.START
+END = Stream.END
+TEXT = Stream.TEXT
+XML_DECL = Stream.XML_DECL
+DOCTYPE = Stream.DOCTYPE
+START_NS = Stream.START_NS
+END_NS = Stream.END_NS
+START_CDATA = Stream.START_CDATA
+END_CDATA = Stream.END_CDATA
+PI = Stream.PI
+COMMENT = Stream.COMMENT
+
+
+def _ensure(stream):
+    """Ensure that every item on the stream is actually a markup event."""
+    stream = iter(stream)
+    event = stream.next()
+
+    # Check whether the iterable is a real markup event stream by examining the
+    # first item it yields; if it's not we'll need to do some conversion
+    if type(event) is not tuple or len(event) != 3:
+        for event in chain([event], stream):
+            if hasattr(event, 'totuple'):
+                event = event.totuple()
+            else:
+                event = TEXT, unicode(event), (None, -1, -1)
+            yield event
+        return
+
+    # This looks like a markup event stream, so we'll just pass it through
+    # unchanged
+    yield event
+    for event in stream:
+        yield event
+
+
+class Attrs(tuple):
+    """Immutable sequence type that stores the attributes of an element.
+    
+    Ordering of the attributes is preserved, while access by name is also
+    supported.
+    
+    >>> attrs = Attrs([('href', '#'), ('title', 'Foo')])
+    >>> attrs
+    Attrs([('href', '#'), ('title', 'Foo')])
+    
+    >>> 'href' in attrs
+    True
+    >>> 'tabindex' in attrs
+    False
+    >>> attrs.get('title')
+    'Foo'
+    
+    Instances may not be manipulated directly. Instead, the operators ``|`` and
+    ``-`` can be used to produce new instances that have specific attributes
+    added, replaced or removed.
+    
+    To remove an attribute, use the ``-`` operator. The right hand side can be
+    either a string or a set/sequence of strings, identifying the name(s) of
+    the attribute(s) to remove:
+    
+    >>> attrs - 'title'
+    Attrs([('href', '#')])
+    >>> attrs - ('title', 'href')
+    Attrs()
+    
+    The original instance is not modified, but the operator can of course be
+    used with an assignment:
+
+    >>> attrs
+    Attrs([('href', '#'), ('title', 'Foo')])
+    >>> attrs -= 'title'
+    >>> attrs
+    Attrs([('href', '#')])
+    
+    To add a new attribute, use the ``|`` operator, where the right hand value
+    is a sequence of ``(name, value)`` tuples (which includes `Attrs`
+    instances):
+    
+    >>> attrs | [('title', 'Bar')]
+    Attrs([('href', '#'), ('title', 'Bar')])
+    
+    If the attributes already contain an attribute with a given name, the value
+    of that attribute is replaced:
+    
+    >>> attrs | [('href', 'http://example.org/')]
+    Attrs([('href', 'http://example.org/')])
+    """
+    __slots__ = []
+
+    def __contains__(self, name):
+        """Return whether the list includes an attribute with the specified
+        name.
+        
+        :return: `True` if the list includes the attribute
+        :rtype: `bool`
+        """
+        for attr, _ in self:
+            if attr == name:
+                return True
+
+    def __getitem__(self, i):
+        """Return an item or slice of the attributes list.
+        
+        >>> attrs = Attrs([('href', '#'), ('title', 'Foo')])
+        >>> attrs[1]
+        ('title', 'Foo')
+        >>> attrs[1:]
+        Attrs([('title', 'Foo')])
+        """
+        items = tuple.__getitem__(self, i)
+        if type(i) is slice:
+            return Attrs(items)
+        return items
+
+    def __getslice__(self, i, j):
+        """Return a slice of the attributes list.
+        
+        >>> attrs = Attrs([('href', '#'), ('title', 'Foo')])
+        >>> attrs[1:]
+        Attrs([('title', 'Foo')])
+        """
+        return Attrs(tuple.__getslice__(self, i, j))
+
+    def __or__(self, attrs):
+        """Return a new instance that contains the attributes in `attrs` in
+        addition to any already existing attributes.
+        
+        :return: a new instance with the merged attributes
+        :rtype: `Attrs`
+        """
+        repl = dict([(an, av) for an, av in attrs if an in self])
+        return Attrs([(sn, repl.get(sn, sv)) for sn, sv in self] +
+                     [(an, av) for an, av in attrs if an not in self])
+
+    def __repr__(self):
+        if not self:
+            return 'Attrs()'
+        return 'Attrs([%s])' % ', '.join([repr(item) for item in self])
+
+    def __sub__(self, names):
+        """Return a new instance with all attributes with a name in `names` are
+        removed.
+        
+        :param names: the names of the attributes to remove
+        :return: a new instance with the attribute removed
+        :rtype: `Attrs`
+        """
+        if isinstance(names, basestring):
+            names = (names,)
+        return Attrs([(name, val) for name, val in self if name not in names])
+
+    def get(self, name, default=None):
+        """Return the value of the attribute with the specified name, or the
+        value of the `default` parameter if no such attribute is found.
+        
+        :param name: the name of the attribute
+        :param default: the value to return when the attribute does not exist
+        :return: the attribute value, or the `default` value if that attribute
+                 does not exist
+        :rtype: `object`
+        """
+        for attr, value in self:
+            if attr == name:
+                return value
+        return default
+
+    def totuple(self):
+        """Return the attributes as a markup event.
+        
+        The returned event is a `TEXT` event, the data is the value of all
+        attributes joined together.
+        
+        >>> Attrs([('href', '#'), ('title', 'Foo')]).totuple()
+        ('TEXT', '#Foo', (None, -1, -1))
+        
+        :return: a `TEXT` event
+        :rtype: `tuple`
+        """
+        return TEXT, ''.join([x[1] for x in self]), (None, -1, -1)
+
+
+class Markup(unicode):
+    """Marks a string as being safe for inclusion in HTML/XML output without
+    needing to be escaped.
+    """
+    __slots__ = []
+
+    def __add__(self, other):
+        return Markup(unicode.__add__(self, escape(other)))
+
+    def __radd__(self, other):
+        return Markup(unicode.__add__(escape(other), self))
+
+    def __mod__(self, args):
+        if isinstance(args, dict):
+            args = dict(zip(args.keys(), map(escape, args.values())))
+        elif isinstance(args, (list, tuple)):
+            args = tuple(map(escape, args))
+        else:
+            args = escape(args)
+        return Markup(unicode.__mod__(self, args))
+
+    def __mul__(self, num):
+        return Markup(unicode.__mul__(self, num))
+    __rmul__ = __mul__
+
+    def __repr__(self):
+        return "<%s %s>" % (type(self).__name__, unicode.__repr__(self))
+
+    def join(self, seq, escape_quotes=True):
+        """Return a `Markup` object which is the concatenation of the strings
+        in the given sequence, where this `Markup` object is the separator
+        between the joined elements.
+        
+        Any element in the sequence that is not a `Markup` instance is
+        automatically escaped.
+        
+        :param seq: the sequence of strings to join
+        :param escape_quotes: whether double quote characters in the elements
+                              should be escaped
+        :return: the joined `Markup` object
+        :rtype: `Markup`
+        :see: `escape`
+        """
+        return Markup(unicode.join(self, [escape(item, quotes=escape_quotes)
+                                          for item in seq]))
+
+    @classmethod
+    def escape(cls, text, quotes=True):
+        """Create a Markup instance from a string and escape special characters
+        it may contain (<, >, & and \").
+        
+        >>> escape('"1 < 2"')
+        <Markup u'&#34;1 &lt; 2&#34;'>
+        
+        If the `quotes` parameter is set to `False`, the \" character is left
+        as is. Escaping quotes is generally only required for strings that are
+        to be used in attribute values.
+        
+        >>> escape('"1 < 2"', quotes=False)
+        <Markup u'"1 &lt; 2"'>
+        
+        :param text: the text to escape
+        :param quotes: if ``True``, double quote characters are escaped in
+                       addition to the other special characters
+        :return: the escaped `Markup` string
+        :rtype: `Markup`
+        """
+        if not text:
+            return cls()
+        if type(text) is cls:
+            return text
+        if hasattr(text, '__html__'):
+            return Markup(text.__html__())
+
+        text = text.replace('&', '&amp;') \
+                   .replace('<', '&lt;') \
+                   .replace('>', '&gt;')
+        if quotes:
+            text = text.replace('"', '&#34;')
+        return cls(text)
+
+    def unescape(self):
+        """Reverse-escapes &, <, >, and \" and returns a `unicode` object.
+        
+        >>> Markup('1 &lt; 2').unescape()
+        u'1 < 2'
+        
+        :return: the unescaped string
+        :rtype: `unicode`
+        :see: `genshi.core.unescape`
+        """
+        if not self:
+            return ''
+        return unicode(self).replace('&#34;', '"') \
+                            .replace('&gt;', '>') \
+                            .replace('&lt;', '<') \
+                            .replace('&amp;', '&')
+
+    def stripentities(self, keepxmlentities=False):
+        """Return a copy of the text with any character or numeric entities
+        replaced by the equivalent UTF-8 characters.
+        
+        If the `keepxmlentities` parameter is provided and evaluates to `True`,
+        the core XML entities (``&amp;``, ``&apos;``, ``&gt;``, ``&lt;`` and
+        ``&quot;``) are not stripped.
+        
+        :return: a `Markup` instance with entities removed
+        :rtype: `Markup`
+        :see: `genshi.util.stripentities`
+        """
+        return Markup(stripentities(self, keepxmlentities=keepxmlentities))
+
+    def striptags(self):
+        """Return a copy of the text with all XML/HTML tags removed.
+        
+        :return: a `Markup` instance with all tags removed
+        :rtype: `Markup`
+        :see: `genshi.util.striptags`
+        """
+        return Markup(striptags(self))
+
+
+try:
+    from genshi._speedups import Markup
+except ImportError:
+    pass # just use the Python implementation
+
+
+escape = Markup.escape
+
+
+def unescape(text):
+    """Reverse-escapes &, <, >, and \" and returns a `unicode` object.
+    
+    >>> unescape(Markup('1 &lt; 2'))
+    u'1 < 2'
+    
+    If the provided `text` object is not a `Markup` instance, it is returned
+    unchanged.
+    
+    >>> unescape('1 &lt; 2')
+    '1 &lt; 2'
+    
+    :param text: the text to unescape
+    :return: the unescsaped string
+    :rtype: `unicode`
+    """
+    if not isinstance(text, Markup):
+        return text
+    return text.unescape()
+
+
+class Namespace(object):
+    """Utility class creating and testing elements with a namespace.
+    
+    Internally, namespace URIs are encoded in the `QName` of any element or
+    attribute, the namespace URI being enclosed in curly braces. This class
+    helps create and test these strings.
+    
+    A `Namespace` object is instantiated with the namespace URI.
+    
+    >>> html = Namespace('http://www.w3.org/1999/xhtml')
+    >>> html
+    Namespace('http://www.w3.org/1999/xhtml')
+    >>> html.uri
+    u'http://www.w3.org/1999/xhtml'
+    
+    The `Namespace` object can than be used to generate `QName` objects with
+    that namespace:
+    
+    >>> html.body
+    QName('http://www.w3.org/1999/xhtml}body')
+    >>> html.body.localname
+    u'body'
+    >>> html.body.namespace
+    u'http://www.w3.org/1999/xhtml'
+    
+    The same works using item access notation, which is useful for element or
+    attribute names that are not valid Python identifiers:
+    
+    >>> html['body']
+    QName('http://www.w3.org/1999/xhtml}body')
+    
+    A `Namespace` object can also be used to test whether a specific `QName`
+    belongs to that namespace using the ``in`` operator:
+    
+    >>> qname = html.body
+    >>> qname in html
+    True
+    >>> qname in Namespace('http://www.w3.org/2002/06/xhtml2')
+    False
+    """
+    def __new__(cls, uri):
+        if type(uri) is cls:
+            return uri
+        return object.__new__(cls)
+
+    def __getnewargs__(self):
+        return (self.uri,)
+
+    def __getstate__(self):
+        return self.uri
+
+    def __setstate__(self, uri):
+        self.uri = uri
+
+    def __init__(self, uri):
+        self.uri = unicode(uri)
+
+    def __contains__(self, qname):
+        return qname.namespace == self.uri
+
+    def __ne__(self, other):
+        return not self == other
+
+    def __eq__(self, other):
+        if isinstance(other, Namespace):
+            return self.uri == other.uri
+        return self.uri == other
+
+    def __getitem__(self, name):
+        return QName(self.uri + '}' + name)
+    __getattr__ = __getitem__
+
+    def __hash__(self):
+        return hash(self.uri)
+
+    def __repr__(self):
+        return '%s(%s)' % (type(self).__name__, stringrepr(self.uri))
+
+    def __str__(self):
+        return self.uri.encode('utf-8')
+
+    def __unicode__(self):
+        return self.uri
+
+
+# The namespace used by attributes such as xml:lang and xml:space
+XML_NAMESPACE = Namespace('http://www.w3.org/XML/1998/namespace')
+
+
+class QName(unicode):
+    """A qualified element or attribute name.
+    
+    The unicode value of instances of this class contains the qualified name of
+    the element or attribute, in the form ``{namespace-uri}local-name``. The
+    namespace URI can be obtained through the additional `namespace` attribute,
+    while the local name can be accessed through the `localname` attribute.
+    
+    >>> qname = QName('foo')
+    >>> qname
+    QName('foo')
+    >>> qname.localname
+    u'foo'
+    >>> qname.namespace
+    
+    >>> qname = QName('http://www.w3.org/1999/xhtml}body')
+    >>> qname
+    QName('http://www.w3.org/1999/xhtml}body')
+    >>> qname.localname
+    u'body'
+    >>> qname.namespace
+    u'http://www.w3.org/1999/xhtml'
+    """
+    __slots__ = ['namespace', 'localname']
+
+    def __new__(cls, qname):
+        """Create the `QName` instance.
+        
+        :param qname: the qualified name as a string of the form
+                      ``{namespace-uri}local-name``, where the leading curly
+                      brace is optional
+        """
+        if type(qname) is cls:
+            return qname
+
+        parts = qname.lstrip('{').split('}', 1)
+        if len(parts) > 1:
+            self = unicode.__new__(cls, '{%s' % qname)
+            self.namespace, self.localname = map(unicode, parts)
+        else:
+            self = unicode.__new__(cls, qname)
+            self.namespace, self.localname = None, unicode(qname)
+        return self
+
+    def __getnewargs__(self):
+        return (self.lstrip('{'),)
+
+    def __repr__(self):
+        return '%s(%s)' % (type(self).__name__, stringrepr(self.lstrip('{')))
diff --git a/genshi/filters/__init__.py b/genshi/filters/__init__.py
new file mode 100644
index 0000000..efc2565
--- /dev/null
+++ b/genshi/filters/__init__.py
@@ -0,0 +1,20 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2007-2009 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""Implementation of a number of stream filters."""
+
+from genshi.filters.html import HTMLFormFiller, HTMLSanitizer
+from genshi.filters.i18n import Translator
+from genshi.filters.transform import Transformer
+
+__docformat__ = 'restructuredtext en'
diff --git a/genshi/filters/html.py b/genshi/filters/html.py
new file mode 100644
index 0000000..d554a54
--- /dev/null
+++ b/genshi/filters/html.py
@@ -0,0 +1,453 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006-2009 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""Implementation of a number of stream filters."""
+
+try:
+    any
+except NameError:
+    from genshi.util import any
+import re
+
+from genshi.core import Attrs, QName, stripentities
+from genshi.core import END, START, TEXT, COMMENT
+
+__all__ = ['HTMLFormFiller', 'HTMLSanitizer']
+__docformat__ = 'restructuredtext en'
+
+
+class HTMLFormFiller(object):
+    """A stream filter that can populate HTML forms from a dictionary of values.
+    
+    >>> from genshi.input import HTML
+    >>> html = HTML('''<form>
+    ...   <p><input type="text" name="foo" /></p>
+    ... </form>''')
+    >>> filler = HTMLFormFiller(data={'foo': 'bar'})
+    >>> print(html | filler)
+    <form>
+      <p><input type="text" name="foo" value="bar"/></p>
+    </form>
+    """
+    # TODO: only select the first radio button, and the first select option
+    #       (if not in a multiple-select)
+    # TODO: only apply to elements in the XHTML namespace (or no namespace)?
+
+    def __init__(self, name=None, id=None, data=None, passwords=False):
+        """Create the filter.
+        
+        :param name: The name of the form that should be populated. If this
+                     parameter is given, only forms where the ``name`` attribute
+                     value matches the parameter are processed.
+        :param id: The ID of the form that should be populated. If this
+                   parameter is given, only forms where the ``id`` attribute
+                   value matches the parameter are processed.
+        :param data: The dictionary of form values, where the keys are the names
+                     of the form fields, and the values are the values to fill
+                     in.
+        :param passwords: Whether password input fields should be populated.
+                          This is off by default for security reasons (for
+                          example, a password may end up in the browser cache)
+        :note: Changed in 0.5.2: added the `passwords` option
+        """
+        self.name = name
+        self.id = id
+        if data is None:
+            data = {}
+        self.data = data
+        self.passwords = passwords
+
+    def __call__(self, stream):
+        """Apply the filter to the given stream.
+        
+        :param stream: the markup event stream to filter
+        """
+        in_form = in_select = in_option = in_textarea = False
+        select_value = option_value = textarea_value = None
+        option_start = None
+        option_text = []
+        no_option_value = False
+
+        for kind, data, pos in stream:
+
+            if kind is START:
+                tag, attrs = data
+                tagname = tag.localname
+
+                if tagname == 'form' and (
+                        self.name and attrs.get('name') == self.name or
+                        self.id and attrs.get('id') == self.id or
+                        not (self.id or self.name)):
+                    in_form = True
+
+                elif in_form:
+                    if tagname == 'input':
+                        type = attrs.get('type', '').lower()
+                        if type in ('checkbox', 'radio'):
+                            name = attrs.get('name')
+                            if name and name in self.data:
+                                value = self.data[name]
+                                declval = attrs.get('value')
+                                checked = False
+                                if isinstance(value, (list, tuple)):
+                                    if declval:
+                                        checked = declval in [unicode(v) for v
+                                                              in value]
+                                    else:
+                                        checked = any(value)
+                                else:
+                                    if declval:
+                                        checked = declval == unicode(value)
+                                    elif type == 'checkbox':
+                                        checked = bool(value)
+                                if checked:
+                                    attrs |= [(QName('checked'), 'checked')]
+                                elif 'checked' in attrs:
+                                    attrs -= 'checked'
+                        elif type in ('', 'hidden', 'text') \
+                                or type == 'password' and self.passwords:
+                            name = attrs.get('name')
+                            if name and name in self.data:
+                                value = self.data[name]
+                                if isinstance(value, (list, tuple)):
+                                    value = value[0]
+                                if value is not None:
+                                    attrs |= [
+                                        (QName('value'), unicode(value))
+                                    ]
+                    elif tagname == 'select':
+                        name = attrs.get('name')
+                        if name in self.data:
+                            select_value = self.data[name]
+                            in_select = True
+                    elif tagname == 'textarea':
+                        name = attrs.get('name')
+                        if name in self.data:
+                            textarea_value = self.data.get(name)
+                            if isinstance(textarea_value, (list, tuple)):
+                                textarea_value = textarea_value[0]
+                            in_textarea = True
+                    elif in_select and tagname == 'option':
+                        option_start = kind, data, pos
+                        option_value = attrs.get('value')
+                        if option_value is None:
+                            no_option_value = True
+                            option_value = ''
+                        in_option = True
+                        continue
+                yield kind, (tag, attrs), pos
+
+            elif in_form and kind is TEXT:
+                if in_select and in_option:
+                    if no_option_value:
+                        option_value += data
+                    option_text.append((kind, data, pos))
+                    continue
+                elif in_textarea:
+                    continue
+                yield kind, data, pos
+
+            elif in_form and kind is END:
+                tagname = data.localname
+                if tagname == 'form':
+                    in_form = False
+                elif tagname == 'select':
+                    in_select = False
+                    select_value = None
+                elif in_select and tagname == 'option':
+                    if isinstance(select_value, (tuple, list)):
+                        selected = option_value in [unicode(v) for v
+                                                    in select_value]
+                    else:
+                        selected = option_value == unicode(select_value)
+                    okind, (tag, attrs), opos = option_start
+                    if selected:
+                        attrs |= [(QName('selected'), 'selected')]
+                    elif 'selected' in attrs:
+                        attrs -= 'selected'
+                    yield okind, (tag, attrs), opos
+                    if option_text:
+                        for event in option_text:
+                            yield event
+                    in_option = False
+                    no_option_value = False
+                    option_start = option_value = None
+                    option_text = []
+                elif tagname == 'textarea':
+                    if textarea_value:
+                        yield TEXT, unicode(textarea_value), pos
+                    in_textarea = False
+                yield kind, data, pos
+
+            else:
+                yield kind, data, pos
+
+
+class HTMLSanitizer(object):
+    """A filter that removes potentially dangerous HTML tags and attributes
+    from the stream.
+    
+    >>> from genshi import HTML
+    >>> html = HTML('<div><script>alert(document.cookie)</script></div>')
+    >>> print(html | HTMLSanitizer())
+    <div/>
+    
+    The default set of safe tags and attributes can be modified when the filter
+    is instantiated. For example, to allow inline ``style`` attributes, the
+    following instantation would work:
+    
+    >>> html = HTML('<div style="background: #000"></div>')
+    >>> sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style']))
+    >>> print(html | sanitizer)
+    <div style="background: #000"/>
+    
+    Note that even in this case, the filter *does* attempt to remove dangerous
+    constructs from style attributes:
+
+    >>> html = HTML('<div style="background: url(javascript:void); color: #000"></div>')
+    >>> print(html | sanitizer)
+    <div style="color: #000"/>
+    
+    This handles HTML entities, unicode escapes in CSS and Javascript text, as
+    well as a lot of other things. However, the style tag is still excluded by
+    default because it is very hard for such sanitizing to be completely safe,
+    especially considering how much error recovery current web browsers perform.
+    
+    It also does some basic filtering of CSS properties that may be used for
+    typical phishing attacks. For more sophisticated filtering, this class
+    provides a couple of hooks that can be overridden in sub-classes.
+    
+    :warn: Note that this special processing of CSS is currently only applied to
+           style attributes, **not** style elements.
+    """
+
+    SAFE_TAGS = frozenset(['a', 'abbr', 'acronym', 'address', 'area', 'b',
+        'big', 'blockquote', 'br', 'button', 'caption', 'center', 'cite',
+        'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt',
+        'em', 'fieldset', 'font', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
+        'hr', 'i', 'img', 'input', 'ins', 'kbd', 'label', 'legend', 'li', 'map',
+        'menu', 'ol', 'optgroup', 'option', 'p', 'pre', 'q', 's', 'samp',
+        'select', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'table',
+        'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'tt', 'u',
+        'ul', 'var'])
+
+    SAFE_ATTRS = frozenset(['abbr', 'accept', 'accept-charset', 'accesskey',
+        'action', 'align', 'alt', 'axis', 'bgcolor', 'border', 'cellpadding',
+        'cellspacing', 'char', 'charoff', 'charset', 'checked', 'cite', 'class',
+        'clear', 'cols', 'colspan', 'color', 'compact', 'coords', 'datetime',
+        'dir', 'disabled', 'enctype', 'for', 'frame', 'headers', 'height',
+        'href', 'hreflang', 'hspace', 'id', 'ismap', 'label', 'lang',
+        'longdesc', 'maxlength', 'media', 'method', 'multiple', 'name',
+        'nohref', 'noshade', 'nowrap', 'prompt', 'readonly', 'rel', 'rev',
+        'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size',
+        'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title',
+        'type', 'usemap', 'valign', 'value', 'vspace', 'width'])
+
+    SAFE_SCHEMES = frozenset(['file', 'ftp', 'http', 'https', 'mailto', None])
+
+    URI_ATTRS = frozenset(['action', 'background', 'dynsrc', 'href', 'lowsrc',
+        'src'])
+
+    def __init__(self, safe_tags=SAFE_TAGS, safe_attrs=SAFE_ATTRS,
+                 safe_schemes=SAFE_SCHEMES, uri_attrs=URI_ATTRS):
+        """Create the sanitizer.
+        
+        The exact set of allowed elements and attributes can be configured.
+        
+        :param safe_tags: a set of tag names that are considered safe
+        :param safe_attrs: a set of attribute names that are considered safe
+        :param safe_schemes: a set of URI schemes that are considered safe
+        :param uri_attrs: a set of names of attributes that contain URIs
+        """
+        self.safe_tags = safe_tags
+        "The set of tag names that are considered safe."
+        self.safe_attrs = safe_attrs
+        "The set of attribute names that are considered safe."
+        self.uri_attrs = uri_attrs
+        "The set of names of attributes that may contain URIs."
+        self.safe_schemes = safe_schemes
+        "The set of URI schemes that are considered safe."
+
+    def __call__(self, stream):
+        """Apply the filter to the given stream.
+        
+        :param stream: the markup event stream to filter
+        """
+        waiting_for = None
+
+        for kind, data, pos in stream:
+            if kind is START:
+                if waiting_for:
+                    continue
+                tag, attrs = data
+                if not self.is_safe_elem(tag, attrs):
+                    waiting_for = tag
+                    continue
+
+                new_attrs = []
+                for attr, value in attrs:
+                    value = stripentities(value)
+                    if attr not in self.safe_attrs:
+                        continue
+                    elif attr in self.uri_attrs:
+                        # Don't allow URI schemes such as "javascript:"
+                        if not self.is_safe_uri(value):
+                            continue
+                    elif attr == 'style':
+                        # Remove dangerous CSS declarations from inline styles
+                        decls = self.sanitize_css(value)
+                        if not decls:
+                            continue
+                        value = '; '.join(decls)
+                    new_attrs.append((attr, value))
+
+                yield kind, (tag, Attrs(new_attrs)), pos
+
+            elif kind is END:
+                tag = data
+                if waiting_for:
+                    if waiting_for == tag:
+                        waiting_for = None
+                else:
+                    yield kind, data, pos
+
+            elif kind is not COMMENT:
+                if not waiting_for:
+                    yield kind, data, pos
+
+    def is_safe_css(self, propname, value):
+        """Determine whether the given css property declaration is to be
+        considered safe for inclusion in the output.
+        
+        :param propname: the CSS property name
+        :param value: the value of the property
+        :return: whether the property value should be considered safe
+        :rtype: bool
+        :since: version 0.6
+        """
+        if propname == 'position':
+            return False
+        if propname.startswith('margin') and '-' in value:
+            # Negative margins can be used for phishing
+            return False
+        return True
+
+    def is_safe_elem(self, tag, attrs):
+        """Determine whether the given element should be considered safe for
+        inclusion in the output.
+        
+        :param tag: the tag name of the element
+        :type tag: QName
+        :param attrs: the element attributes
+        :type attrs: Attrs
+        :return: whether the element should be considered safe
+        :rtype: bool
+        :since: version 0.6
+        """
+        if tag not in self.safe_tags:
+            return False
+        if tag.localname == 'input':
+            input_type = attrs.get('type', '').lower()
+            if input_type == 'password':
+                return False
+        return True
+
+    def is_safe_uri(self, uri):
+        """Determine whether the given URI is to be considered safe for
+        inclusion in the output.
+        
+        The default implementation checks whether the scheme of the URI is in
+        the set of allowed URIs (`safe_schemes`).
+        
+        >>> sanitizer = HTMLSanitizer()
+        >>> sanitizer.is_safe_uri('http://example.org/')
+        True
+        >>> sanitizer.is_safe_uri('javascript:alert(document.cookie)')
+        False
+        
+        :param uri: the URI to check
+        :return: `True` if the URI can be considered safe, `False` otherwise
+        :rtype: `bool`
+        :since: version 0.4.3
+        """
+        if '#' in uri:
+            uri = uri.split('#', 1)[0] # Strip out the fragment identifier
+        if ':' not in uri:
+            return True # This is a relative URI
+        chars = [char for char in uri.split(':', 1)[0] if char.isalnum()]
+        return ''.join(chars).lower() in self.safe_schemes
+
+    def sanitize_css(self, text):
+        """Remove potentially dangerous property declarations from CSS code.
+        
+        In particular, properties using the CSS ``url()`` function with a scheme
+        that is not considered safe are removed:
+        
+        >>> sanitizer = HTMLSanitizer()
+        >>> sanitizer.sanitize_css(u'''
+        ...   background: url(javascript:alert("foo"));
+        ...   color: #000;
+        ... ''')
+        [u'color: #000']
+        
+        Also, the proprietary Internet Explorer function ``expression()`` is
+        always stripped:
+        
+        >>> sanitizer.sanitize_css(u'''
+        ...   background: #fff;
+        ...   color: #000;
+        ...   width: e/**/xpression(alert("foo"));
+        ... ''')
+        [u'background: #fff', u'color: #000']
+        
+        :param text: the CSS text; this is expected to be `unicode` and to not
+                     contain any character or numeric references
+        :return: a list of declarations that are considered safe
+        :rtype: `list`
+        :since: version 0.4.3
+        """
+        decls = []
+        text = self._strip_css_comments(self._replace_unicode_escapes(text))
+        for decl in text.split(';'):
+            decl = decl.strip()
+            if not decl:
+                continue
+            try:
+                propname, value = decl.split(':', 1)
+            except ValueError:
+                continue
+            if not self.is_safe_css(propname.strip().lower(), value.strip()):
+                continue
+            is_evil = False
+            if 'expression' in value:
+                is_evil = True
+            for match in re.finditer(r'url\s*\(([^)]+)', value):
+                if not self.is_safe_uri(match.group(1)):
+                    is_evil = True
+                    break
+            if not is_evil:
+                decls.append(decl.strip())
+        return decls
+
+    _NORMALIZE_NEWLINES = re.compile(r'\r\n').sub
+    _UNICODE_ESCAPE = re.compile(r'\\([0-9a-fA-F]{1,6})\s?').sub
+
+    def _replace_unicode_escapes(self, text):
+        def _repl(match):
+            return unichr(int(match.group(1), 16))
+        return self._UNICODE_ESCAPE(_repl, self._NORMALIZE_NEWLINES('\n', text))
+
+    _CSS_COMMENTS = re.compile(r'/\*.*?\*/').sub
+
+    def _strip_css_comments(self, text):
+        return self._CSS_COMMENTS('', text)
diff --git a/genshi/filters/i18n.py b/genshi/filters/i18n.py
new file mode 100644
index 0000000..7852875
--- /dev/null
+++ b/genshi/filters/i18n.py
@@ -0,0 +1,1238 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2007-2010 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""Directives and utilities for internationalization and localization of
+templates.
+
+:since: version 0.4
+:note: Directives support added since version 0.6
+"""
+
+try:
+    any
+except NameError:
+    from genshi.util import any
+from gettext import NullTranslations
+import os
+import re
+from types import FunctionType
+
+from genshi.core import Attrs, Namespace, QName, START, END, TEXT, \
+                        XML_NAMESPACE, _ensure, StreamEventKind
+from genshi.template.eval import _ast
+from genshi.template.base import DirectiveFactory, EXPR, SUB, _apply_directives
+from genshi.template.directives import Directive, StripDirective
+from genshi.template.markup import MarkupTemplate, EXEC
+
+__all__ = ['Translator', 'extract']
+__docformat__ = 'restructuredtext en'
+
+
+I18N_NAMESPACE = Namespace('http://genshi.edgewall.org/i18n')
+
+MSGBUF = StreamEventKind('MSGBUF')
+SUB_START = StreamEventKind('SUB_START')
+SUB_END = StreamEventKind('SUB_END')
+
+GETTEXT_FUNCTIONS = ('_', 'gettext', 'ngettext', 'dgettext', 'dngettext',
+                     'ugettext', 'ungettext')
+
+
+class I18NDirective(Directive):
+    """Simple interface for i18n directives to support messages extraction."""
+
+    def __call__(self, stream, directives, ctxt, **vars):
+        return _apply_directives(stream, directives, ctxt, vars)
+
+
+class ExtractableI18NDirective(I18NDirective):
+    """Simple interface for directives to support messages extraction."""
+
+    def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS,
+                search_text=True, comment_stack=None):
+        raise NotImplementedError
+
+
+class CommentDirective(I18NDirective):
+    """Implementation of the ``i18n:comment`` template directive which adds
+    translation comments.
+    
+    >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n">
+    ...   <p i18n:comment="As in Foo Bar">Foo</p>
+    ... </html>''')
+    >>> translator = Translator()
+    >>> translator.setup(tmpl)
+    >>> list(translator.extract(tmpl.stream))
+    [(2, None, u'Foo', [u'As in Foo Bar'])]
+    """
+    __slots__ = ['comment']
+
+    def __init__(self, value, template=None, namespaces=None, lineno=-1,
+                 offset=-1):
+        Directive.__init__(self, None, template, namespaces, lineno, offset)
+        self.comment = value
+
+
+class MsgDirective(ExtractableI18NDirective):
+    r"""Implementation of the ``i18n:msg`` directive which marks inner content
+    as translatable. Consider the following examples:
+    
+    >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n">
+    ...   <div i18n:msg="">
+    ...     <p>Foo</p>
+    ...     <p>Bar</p>
+    ...   </div>
+    ...   <p i18n:msg="">Foo <em>bar</em>!</p>
+    ... </html>''')
+    
+    >>> translator = Translator()
+    >>> translator.setup(tmpl)
+    >>> list(translator.extract(tmpl.stream))
+    [(2, None, u'[1:Foo]\n    [2:Bar]', []), (6, None, u'Foo [1:bar]!', [])]
+    >>> print(tmpl.generate().render())
+    <html>
+      <div><p>Foo</p>
+        <p>Bar</p></div>
+      <p>Foo <em>bar</em>!</p>
+    </html>
+
+    >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n">
+    ...   <div i18n:msg="fname, lname">
+    ...     <p>First Name: ${fname}</p>
+    ...     <p>Last Name: ${lname}</p>
+    ...   </div>
+    ...   <p i18n:msg="">Foo <em>bar</em>!</p>
+    ... </html>''')
+    >>> translator.setup(tmpl)
+    >>> list(translator.extract(tmpl.stream)) #doctest: +NORMALIZE_WHITESPACE
+    [(2, None, u'[1:First Name: %(fname)s]\n    [2:Last Name: %(lname)s]', []),
+    (6, None, u'Foo [1:bar]!', [])]
+
+    >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n">
+    ...   <div i18n:msg="fname, lname">
+    ...     <p>First Name: ${fname}</p>
+    ...     <p>Last Name: ${lname}</p>
+    ...   </div>
+    ...   <p i18n:msg="">Foo <em>bar</em>!</p>
+    ... </html>''')
+    >>> translator.setup(tmpl)
+    >>> print(tmpl.generate(fname='John', lname='Doe').render())
+    <html>
+      <div><p>First Name: John</p>
+        <p>Last Name: Doe</p></div>
+      <p>Foo <em>bar</em>!</p>
+    </html>
+
+    Starting and ending white-space is stripped of to make it simpler for
+    translators. Stripping it is not that important since it's on the html
+    source, the rendered output will remain the same.
+    """
+    __slots__ = ['params', 'lineno']
+
+    def __init__(self, value, template=None, namespaces=None, lineno=-1,
+                 offset=-1):
+        Directive.__init__(self, None, template, namespaces, lineno, offset)
+        self.params = [param.strip() for param in value.split(',') if param]
+        self.lineno = lineno
+
+    @classmethod
+    def attach(cls, template, stream, value, namespaces, pos):
+        if type(value) is dict:
+            value = value.get('params', '').strip()
+        return super(MsgDirective, cls).attach(template, stream, value.strip(),
+                                               namespaces, pos)
+
+    def __call__(self, stream, directives, ctxt, **vars):
+        gettext = ctxt.get('_i18n.gettext')
+        if ctxt.get('_i18n.domain'):
+            dgettext = ctxt.get('_i18n.dgettext')
+            assert hasattr(dgettext, '__call__'), \
+                'No domain gettext function passed'
+            gettext = lambda msg: dgettext(ctxt.get('_i18n.domain'), msg)
+
+        def _generate():
+            msgbuf = MessageBuffer(self)
+            previous = stream.next()
+            if previous[0] is START:
+                yield previous
+            else:
+                msgbuf.append(*previous)
+            previous = stream.next()
+            for kind, data, pos in stream:
+                msgbuf.append(*previous)
+                previous = kind, data, pos
+            if previous[0] is not END:
+                msgbuf.append(*previous)
+                previous = None
+            for event in msgbuf.translate(gettext(msgbuf.format())):
+                yield event
+            if previous:
+                yield previous
+
+        return _apply_directives(_generate(), directives, ctxt, vars)
+
+    def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS,
+                search_text=True, comment_stack=None):
+        msgbuf = MessageBuffer(self)
+        strip = False
+
+        stream = iter(stream)
+        previous = stream.next()
+        if previous[0] is START:
+            for message in translator._extract_attrs(previous,
+                                                     gettext_functions,
+                                                     search_text=search_text):
+                yield message
+            previous = stream.next()
+            strip = True
+        for event in stream:
+            if event[0] is START:
+                for message in translator._extract_attrs(event,
+                                                         gettext_functions,
+                                                         search_text=search_text):
+                    yield message
+            msgbuf.append(*previous)
+            previous = event
+        if not strip:
+            msgbuf.append(*previous)
+
+        yield self.lineno, None, msgbuf.format(), comment_stack[-1:]
+
+
+class ChooseBranchDirective(I18NDirective):
+    __slots__ = ['params']
+
+    def __call__(self, stream, directives, ctxt, **vars):
+        self.params = ctxt.get('_i18n.choose.params', [])[:]
+        msgbuf = MessageBuffer(self)
+        stream = _apply_directives(stream, directives, ctxt, vars)
+
+        previous = stream.next()
+        if previous[0] is START:
+            yield previous
+        else:
+            msgbuf.append(*previous)
+
+        try:
+            previous = stream.next()
+        except StopIteration:
+            # For example <i18n:singular> or <i18n:plural> directives
+            yield MSGBUF, (), -1 # the place holder for msgbuf output
+            ctxt['_i18n.choose.%s' % self.tagname] = msgbuf
+            return
+
+        for event in stream:
+            msgbuf.append(*previous)
+            previous = event
+        yield MSGBUF, (), -1 # the place holder for msgbuf output
+
+        if previous[0] is END:
+            yield previous # the outer end tag
+        else:
+            msgbuf.append(*previous)
+        ctxt['_i18n.choose.%s' % self.tagname] = msgbuf
+
+    def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS,
+                search_text=True, comment_stack=None, msgbuf=None):
+        stream = iter(stream)
+        previous = stream.next()
+
+        if previous[0] is START:
+            # skip the enclosing element
+            for message in translator._extract_attrs(previous,
+                                                     gettext_functions,
+                                                     search_text=search_text):
+                yield message
+            previous = stream.next()
+
+        for event in stream:
+            if previous[0] is START:
+                for message in translator._extract_attrs(previous,
+                                                         gettext_functions,
+                                                         search_text=search_text):
+                    yield message
+            msgbuf.append(*previous)
+            previous = event
+
+        if previous[0] is not END:
+            msgbuf.append(*previous)
+
+
+class SingularDirective(ChooseBranchDirective):
+    """Implementation of the ``i18n:singular`` directive to be used with the
+    ``i18n:choose`` directive."""
+
+
+class PluralDirective(ChooseBranchDirective):
+    """Implementation of the ``i18n:plural`` directive to be used with the
+    ``i18n:choose`` directive."""
+
+
+class ChooseDirective(ExtractableI18NDirective):
+    """Implementation of the ``i18n:choose`` directive which provides plural
+    internationalisation of strings.
+    
+    This directive requires at least one parameter, the one which evaluates to
+    an integer which will allow to choose the plural/singular form. If you also
+    have expressions inside the singular and plural version of the string you
+    also need to pass a name for those parameters. Consider the following
+    examples:
+    
+    >>> tmpl = MarkupTemplate('''\
+        <html xmlns:i18n="http://genshi.edgewall.org/i18n">
+    ...   <div i18n:choose="num; num">
+    ...     <p i18n:singular="">There is $num coin</p>
+    ...     <p i18n:plural="">There are $num coins</p>
+    ...   </div>
+    ... </html>''')
+    >>> translator = Translator()
+    >>> translator.setup(tmpl)
+    >>> list(translator.extract(tmpl.stream)) #doctest: +NORMALIZE_WHITESPACE
+    [(2, 'ngettext', (u'There is %(num)s coin',
+                      u'There are %(num)s coins'), [])]
+
+    >>> tmpl = MarkupTemplate('''\
+        <html xmlns:i18n="http://genshi.edgewall.org/i18n">
+    ...   <div i18n:choose="num; num">
+    ...     <p i18n:singular="">There is $num coin</p>
+    ...     <p i18n:plural="">There are $num coins</p>
+    ...   </div>
+    ... </html>''')
+    >>> translator.setup(tmpl)
+    >>> print(tmpl.generate(num=1).render())
+    <html>
+      <div>
+        <p>There is 1 coin</p>
+      </div>
+    </html>
+    >>> print(tmpl.generate(num=2).render())
+    <html>
+      <div>
+        <p>There are 2 coins</p>
+      </div>
+    </html>
+
+    When used as a element and not as an attribute:
+
+    >>> tmpl = MarkupTemplate('''\
+        <html xmlns:i18n="http://genshi.edgewall.org/i18n">
+    ...   <i18n:choose numeral="num" params="num">
+    ...     <p i18n:singular="">There is $num coin</p>
+    ...     <p i18n:plural="">There are $num coins</p>
+    ...   </i18n:choose>
+    ... </html>''')
+    >>> translator.setup(tmpl)
+    >>> list(translator.extract(tmpl.stream)) #doctest: +NORMALIZE_WHITESPACE
+    [(2, 'ngettext', (u'There is %(num)s coin',
+                      u'There are %(num)s coins'), [])]
+    """
+    __slots__ = ['numeral', 'params', 'lineno']
+
+    def __init__(self, value, template=None, namespaces=None, lineno=-1,
+                 offset=-1):
+        Directive.__init__(self, None, template, namespaces, lineno, offset)
+        params = [v.strip() for v in value.split(';')]
+        self.numeral = self._parse_expr(params.pop(0), template, lineno, offset)
+        self.params = params and [name.strip() for name in
+                                  params[0].split(',') if name] or []
+        self.lineno = lineno
+
+    @classmethod
+    def attach(cls, template, stream, value, namespaces, pos):
+        if type(value) is dict:
+            numeral = value.get('numeral', '').strip()
+            assert numeral is not '', "at least pass the numeral param"
+            params = [v.strip() for v in value.get('params', '').split(',')]
+            value = '%s; ' % numeral + ', '.join(params)
+        return super(ChooseDirective, cls).attach(template, stream, value,
+                                                  namespaces, pos)
+
+    def __call__(self, stream, directives, ctxt, **vars):
+        ctxt.push({'_i18n.choose.params': self.params,
+                   '_i18n.choose.singular': None,
+                   '_i18n.choose.plural': None})
+
+        ngettext = ctxt.get('_i18n.ngettext')
+        assert hasattr(ngettext, '__call__'), 'No ngettext function available'
+        dngettext = ctxt.get('_i18n.dngettext')
+        if not dngettext:
+            dngettext = lambda d, s, p, n: ngettext(s, p, n)
+
+        new_stream = []
+        singular_stream = None
+        singular_msgbuf = None
+        plural_stream = None
+        plural_msgbuf = None
+
+        numeral = self.numeral.evaluate(ctxt)
+        is_plural = self._is_plural(numeral, ngettext)
+
+        for event in stream:
+            if event[0] is SUB and any(isinstance(d, ChooseBranchDirective)
+                                       for d in event[1][0]):
+                subdirectives, substream = event[1]
+
+                if isinstance(subdirectives[0], SingularDirective):
+                    singular_stream = list(_apply_directives(substream,
+                                                             subdirectives,
+                                                             ctxt, vars))
+                    new_stream.append((MSGBUF, None, (None, -1, -1)))
+
+                elif isinstance(subdirectives[0], PluralDirective):
+                    if is_plural:
+                        plural_stream = list(_apply_directives(substream,
+                                                               subdirectives,
+                                                               ctxt, vars))
+
+            else:
+                new_stream.append(event)
+
+        if ctxt.get('_i18n.domain'):
+            ngettext = lambda s, p, n: dngettext(ctxt.get('_i18n.domain'),
+                                                 s, p, n)
+
+        singular_msgbuf = ctxt.get('_i18n.choose.singular')
+        if is_plural:
+            plural_msgbuf = ctxt.get('_i18n.choose.plural')
+            msgbuf, choice = plural_msgbuf, plural_stream
+        else:
+            msgbuf, choice = singular_msgbuf, singular_stream
+            plural_msgbuf = MessageBuffer(self)
+
+        for kind, data, pos in new_stream:
+            if kind is MSGBUF:
+                for event in choice:
+                    if event[0] is MSGBUF:
+                        translation = ngettext(singular_msgbuf.format(),
+                                               plural_msgbuf.format(),
+                                               numeral)
+                        for subevent in msgbuf.translate(translation):
+                            yield subevent
+                    else:
+                        yield event
+            else:
+                yield kind, data, pos
+
+        ctxt.pop()
+
+    def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS,
+                search_text=True, comment_stack=None):
+        strip = False
+        stream = iter(stream)
+        previous = stream.next()
+
+        if previous[0] is START:
+            # skip the enclosing element
+            for message in translator._extract_attrs(previous,
+                                                     gettext_functions,
+                                                     search_text=search_text):
+                yield message
+            previous = stream.next()
+            strip = True
+
+        singular_msgbuf = MessageBuffer(self)
+        plural_msgbuf = MessageBuffer(self)
+
+        for event in stream:
+            if previous[0] is SUB:
+                directives, substream = previous[1]
+                for directive in directives:
+                    if isinstance(directive, SingularDirective):
+                        for message in directive.extract(translator,
+                                substream, gettext_functions, search_text,
+                                comment_stack, msgbuf=singular_msgbuf):
+                            yield message
+                    elif isinstance(directive, PluralDirective):
+                        for message in directive.extract(translator,
+                                substream, gettext_functions, search_text,
+                                comment_stack, msgbuf=plural_msgbuf):
+                            yield message
+                    elif not isinstance(directive, StripDirective):
+                        singular_msgbuf.append(*previous)
+                        plural_msgbuf.append(*previous)
+            else:
+                if previous[0] is START:
+                    for message in translator._extract_attrs(previous,
+                                                             gettext_functions,
+                                                             search_text):
+                        yield message
+                singular_msgbuf.append(*previous)
+                plural_msgbuf.append(*previous)
+            previous = event
+
+        if not strip:
+            singular_msgbuf.append(*previous)
+            plural_msgbuf.append(*previous)
+
+        yield self.lineno, 'ngettext', \
+            (singular_msgbuf.format(), plural_msgbuf.format()), \
+            comment_stack[-1:]
+
+    def _is_plural(self, numeral, ngettext):
+        # XXX: should we test which form was chosen like this!?!?!?
+        # There should be no match in any catalogue for these singular and
+        # plural test strings
+        singular = u'O\x85\xbe\xa9\xa8az\xc3?\xe6\xa1\x02n\x84\x93'
+        plural = u'\xcc\xfb+\xd3Pn\x9d\tT\xec\x1d\xda\x1a\x88\x00'
+        return ngettext(singular, plural, numeral) == plural
+
+
+class DomainDirective(I18NDirective):
+    """Implementation of the ``i18n:domain`` directive which allows choosing
+    another i18n domain(catalog) to translate from.
+    
+    >>> from genshi.filters.tests.i18n import DummyTranslations
+    >>> tmpl = MarkupTemplate('''\
+        <html xmlns:i18n="http://genshi.edgewall.org/i18n">
+    ...   <p i18n:msg="">Bar</p>
+    ...   <div i18n:domain="foo">
+    ...     <p i18n:msg="">FooBar</p>
+    ...     <p>Bar</p>
+    ...     <p i18n:domain="bar" i18n:msg="">Bar</p>
+    ...     <p i18n:domain="">Bar</p>
+    ...   </div>
+    ...   <p>Bar</p>
+    ... </html>''')
+
+    >>> translations = DummyTranslations({'Bar': 'Voh'})
+    >>> translations.add_domain('foo', {'FooBar': 'BarFoo', 'Bar': 'foo_Bar'})
+    >>> translations.add_domain('bar', {'Bar': 'bar_Bar'})
+    >>> translator = Translator(translations)
+    >>> translator.setup(tmpl)
+
+    >>> print(tmpl.generate().render())
+    <html>
+      <p>Voh</p>
+      <div>
+        <p>BarFoo</p>
+        <p>foo_Bar</p>
+        <p>bar_Bar</p>
+        <p>Voh</p>
+      </div>
+      <p>Voh</p>
+    </html>
+    """
+    __slots__ = ['domain']
+
+    def __init__(self, value, template=None, namespaces=None, lineno=-1,
+                 offset=-1):
+        Directive.__init__(self, None, template, namespaces, lineno, offset)
+        self.domain = value and value.strip() or '__DEFAULT__'
+
+    @classmethod
+    def attach(cls, template, stream, value, namespaces, pos):
+        if type(value) is dict:
+            value = value.get('name')
+        return super(DomainDirective, cls).attach(template, stream, value,
+                                                  namespaces, pos)
+
+    def __call__(self, stream, directives, ctxt, **vars):
+        ctxt.push({'_i18n.domain': self.domain})
+        for event in _apply_directives(stream, directives, ctxt, vars):
+            yield event
+        ctxt.pop()
+
+
+class Translator(DirectiveFactory):
+    """Can extract and translate localizable strings from markup streams and
+    templates.
+    
+    For example, assume the following template:
+    
+    >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/">
+    ...   <head>
+    ...     <title>Example</title>
+    ...   </head>
+    ...   <body>
+    ...     <h1>Example</h1>
+    ...     <p>${_("Hello, %(name)s") % dict(name=username)}</p>
+    ...   </body>
+    ... </html>''', filename='example.html')
+    
+    For demonstration, we define a dummy ``gettext``-style function with a
+    hard-coded translation table, and pass that to the `Translator` initializer:
+    
+    >>> def pseudo_gettext(string):
+    ...     return {
+    ...         'Example': 'Beispiel',
+    ...         'Hello, %(name)s': 'Hallo, %(name)s'
+    ...     }[string]
+    >>> translator = Translator(pseudo_gettext)
+    
+    Next, the translator needs to be prepended to any already defined filters
+    on the template:
+    
+    >>> tmpl.filters.insert(0, translator)
+    
+    When generating the template output, our hard-coded translations should be
+    applied as expected:
+    
+    >>> print(tmpl.generate(username='Hans', _=pseudo_gettext))
+    <html>
+      <head>
+        <title>Beispiel</title>
+      </head>
+      <body>
+        <h1>Beispiel</h1>
+        <p>Hallo, Hans</p>
+      </body>
+    </html>
+    
+    Note that elements defining ``xml:lang`` attributes that do not contain
+    variable expressions are ignored by this filter. That can be used to
+    exclude specific parts of a template from being extracted and translated.
+    """
+
+    directives = [
+        ('domain', DomainDirective),
+        ('comment', CommentDirective),
+        ('msg', MsgDirective),
+        ('choose', ChooseDirective),
+        ('singular', SingularDirective),
+        ('plural', PluralDirective)
+    ]
+
+    IGNORE_TAGS = frozenset([
+        QName('script'), QName('http://www.w3.org/1999/xhtml}script'),
+        QName('style'), QName('http://www.w3.org/1999/xhtml}style')
+    ])
+    INCLUDE_ATTRS = frozenset([
+        'abbr', 'alt', 'label', 'prompt', 'standby', 'summary', 'title'
+    ])
+    NAMESPACE = I18N_NAMESPACE
+
+    def __init__(self, translate=NullTranslations(), ignore_tags=IGNORE_TAGS,
+                 include_attrs=INCLUDE_ATTRS, extract_text=True):
+        """Initialize the translator.
+        
+        :param translate: the translation function, for example ``gettext`` or
+                          ``ugettext``.
+        :param ignore_tags: a set of tag names that should not be localized
+        :param include_attrs: a set of attribute names should be localized
+        :param extract_text: whether the content of text nodes should be
+                             extracted, or only text in explicit ``gettext``
+                             function calls
+        
+        :note: Changed in 0.6: the `translate` parameter can now be either
+               a ``gettext``-style function, or an object compatible with the
+               ``NullTransalations`` or ``GNUTranslations`` interface
+        """
+        self.translate = translate
+        self.ignore_tags = ignore_tags
+        self.include_attrs = include_attrs
+        self.extract_text = extract_text
+
+    def __call__(self, stream, ctxt=None, translate_text=True,
+                 translate_attrs=True):
+        """Translate any localizable strings in the given stream.
+        
+        This function shouldn't be called directly. Instead, an instance of
+        the `Translator` class should be registered as a filter with the
+        `Template` or the `TemplateLoader`, or applied as a regular stream
+        filter. If used as a template filter, it should be inserted in front of
+        all the default filters.
+        
+        :param stream: the markup event stream
+        :param ctxt: the template context (not used)
+        :param translate_text: whether text nodes should be translated (used
+                               internally)
+        :param translate_attrs: whether attribute values should be translated
+                                (used internally)
+        :return: the localized stream
+        """
+        ignore_tags = self.ignore_tags
+        include_attrs = self.include_attrs
+        skip = 0
+        xml_lang = XML_NAMESPACE['lang']
+        if not self.extract_text:
+            translate_text = False
+            translate_attrs = False
+
+        if type(self.translate) is FunctionType:
+            gettext = self.translate
+            if ctxt:
+                ctxt['_i18n.gettext'] = gettext
+        else:
+            gettext = self.translate.ugettext
+            ngettext = self.translate.ungettext
+            try:
+                dgettext = self.translate.dugettext
+                dngettext = self.translate.dungettext
+            except AttributeError:
+                dgettext = lambda _, y: gettext(y)
+                dngettext = lambda _, s, p, n: ngettext(s, p, n)
+            if ctxt:
+                ctxt['_i18n.gettext'] = gettext
+                ctxt['_i18n.ngettext'] = ngettext
+                ctxt['_i18n.dgettext'] = dgettext
+                ctxt['_i18n.dngettext'] = dngettext
+
+        if ctxt and ctxt.get('_i18n.domain'):
+            gettext = lambda msg: dgettext(ctxt.get('_i18n.domain'), msg)
+
+        for kind, data, pos in stream:
+
+            # skip chunks that should not be localized
+            if skip:
+                if kind is START:
+                    skip += 1
+                elif kind is END:
+                    skip -= 1
+                yield kind, data, pos
+                continue
+
+            # handle different events that can be localized
+            if kind is START:
+                tag, attrs = data
+                if tag in self.ignore_tags or \
+                        isinstance(attrs.get(xml_lang), basestring):
+                    skip += 1
+                    yield kind, data, pos
+                    continue
+
+                new_attrs = []
+                changed = False
+
+                for name, value in attrs:
+                    newval = value
+                    if isinstance(value, basestring):
+                        if translate_attrs and name in include_attrs:
+                            newval = gettext(value)
+                    else:
+                        newval = list(
+                            self(_ensure(value), ctxt, translate_text=False)
+                        )
+                    if newval != value:
+                        value = newval
+                        changed = True
+                    new_attrs.append((name, value))
+                if changed:
+                    attrs = Attrs(new_attrs)
+
+                yield kind, (tag, attrs), pos
+
+            elif translate_text and kind is TEXT:
+                text = data.strip()
+                if text:
+                    data = data.replace(text, unicode(gettext(text)))
+                yield kind, data, pos
+
+            elif kind is SUB:
+                directives, substream = data
+                current_domain = None
+                for idx, directive in enumerate(directives):
+                    # Organize directives to make everything work
+                    # FIXME: There's got to be a better way to do this!
+                    if isinstance(directive, DomainDirective):
+                        # Grab current domain and update context
+                        current_domain = directive.domain
+                        ctxt.push({'_i18n.domain': current_domain})
+                        # Put domain directive as the first one in order to
+                        # update context before any other directives evaluation
+                        directives.insert(0, directives.pop(idx))
+
+                # If this is an i18n directive, no need to translate text
+                # nodes here
+                is_i18n_directive = any([
+                    isinstance(d, ExtractableI18NDirective)
+                    for d in directives
+                ])
+                substream = list(self(substream, ctxt,
+                                      translate_text=not is_i18n_directive,
+                                      translate_attrs=translate_attrs))
+                yield kind, (directives, substream), pos
+
+                if current_domain:
+                    ctxt.pop()
+            else:
+                yield kind, data, pos
+
+    def extract(self, stream, gettext_functions=GETTEXT_FUNCTIONS,
+                search_text=True, comment_stack=None):
+        """Extract localizable strings from the given template stream.
+        
+        For every string found, this function yields a ``(lineno, function,
+        message, comments)`` tuple, where:
+        
+        * ``lineno`` is the number of the line on which the string was found,
+        * ``function`` is the name of the ``gettext`` function used (if the
+          string was extracted from embedded Python code), and
+        *  ``message`` is the string itself (a ``unicode`` object, or a tuple
+           of ``unicode`` objects for functions with multiple string
+           arguments).
+        *  ``comments`` is a list of comments related to the message, extracted
+           from ``i18n:comment`` attributes found in the markup
+        
+        >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/">
+        ...   <head>
+        ...     <title>Example</title>
+        ...   </head>
+        ...   <body>
+        ...     <h1>Example</h1>
+        ...     <p>${_("Hello, %(name)s") % dict(name=username)}</p>
+        ...     <p>${ngettext("You have %d item", "You have %d items", num)}</p>
+        ...   </body>
+        ... </html>''', filename='example.html')
+        >>> for line, func, msg, comments in Translator().extract(tmpl.stream):
+        ...    print('%d, %r, %r' % (line, func, msg))
+        3, None, u'Example'
+        6, None, u'Example'
+        7, '_', u'Hello, %(name)s'
+        8, 'ngettext', (u'You have %d item', u'You have %d items', None)
+        
+        :param stream: the event stream to extract strings from; can be a
+                       regular stream or a template stream
+        :param gettext_functions: a sequence of function names that should be
+                                  treated as gettext-style localization
+                                  functions
+        :param search_text: whether the content of text nodes should be
+                            extracted (used internally)
+        
+        :note: Changed in 0.4.1: For a function with multiple string arguments
+               (such as ``ngettext``), a single item with a tuple of strings is
+               yielded, instead an item for each string argument.
+        :note: Changed in 0.6: The returned tuples now include a fourth
+               element, which is a list of comments for the translator.
+        """
+        if not self.extract_text:
+            search_text = False
+        if comment_stack is None:
+            comment_stack = []
+        skip = 0
+
+        xml_lang = XML_NAMESPACE['lang']
+
+        for kind, data, pos in stream:
+            if skip:
+                if kind is START:
+                    skip += 1
+                if kind is END:
+                    skip -= 1
+
+            if kind is START and not skip:
+                tag, attrs = data
+                if tag in self.ignore_tags or \
+                        isinstance(attrs.get(xml_lang), basestring):
+                    skip += 1
+                    continue
+
+                for message in self._extract_attrs((kind, data, pos),
+                                                   gettext_functions,
+                                                   search_text=search_text):
+                    yield message
+
+            elif not skip and search_text and kind is TEXT:
+                text = data.strip()
+                if text and [ch for ch in text if ch.isalpha()]:
+                    yield pos[1], None, text, comment_stack[-1:]
+
+            elif kind is EXPR or kind is EXEC:
+                for funcname, strings in extract_from_code(data,
+                                                           gettext_functions):
+                    # XXX: Do we need to grab i18n:comment from comment_stack ???
+                    yield pos[1], funcname, strings, []
+
+            elif kind is SUB:
+                directives, substream = data
+                in_comment = False
+
+                for idx, directive in enumerate(directives):
+                    # Do a first loop to see if there's a comment directive
+                    # If there is update context and pop it from directives
+                    if isinstance(directive, CommentDirective):
+                        in_comment = True
+                        comment_stack.append(directive.comment)
+                        if len(directives) == 1:
+                            # in case we're in the presence of something like:
+                            # <p i18n:comment="foo">Foo</p>
+                            for message in self.extract(
+                                    substream, gettext_functions,
+                                    search_text=search_text and not skip,
+                                    comment_stack=comment_stack):
+                                yield message
+                        directives.pop(idx)
+                    elif not isinstance(directive, I18NDirective):
+                        # Remove all other non i18n directives from the process
+                        directives.pop(idx)
+
+                if not directives and not in_comment:
+                    # Extract content if there's no directives because
+                    # strip was pop'ed and not because comment was pop'ed.
+                    # Extraction in this case has been taken care of.
+                    for message in self.extract(
+                            substream, gettext_functions,
+                            search_text=search_text and not skip):
+                        yield message
+
+                for directive in directives:
+                    if isinstance(directive, ExtractableI18NDirective):
+                        for message in directive.extract(self,
+                                substream, gettext_functions,
+                                search_text=search_text and not skip,
+                                comment_stack=comment_stack):
+                            yield message
+                    else:
+                        for message in self.extract(
+                                substream, gettext_functions,
+                                search_text=search_text and not skip,
+                                comment_stack=comment_stack):
+                            yield message
+
+                if in_comment:
+                    comment_stack.pop()
+
+    def get_directive_index(self, dir_cls):
+        total = len(self._dir_order)
+        if dir_cls in self._dir_order:
+            return self._dir_order.index(dir_cls) - total
+        return total
+
+    def setup(self, template):
+        """Convenience function to register the `Translator` filter and the
+        related directives with the given template.
+        
+        :param template: a `Template` instance
+        """
+        template.filters.insert(0, self)
+        if hasattr(template, 'add_directives'):
+            template.add_directives(Translator.NAMESPACE, self)
+
+    def _extract_attrs(self, event, gettext_functions, search_text):
+        for name, value in event[1][1]:
+            if search_text and isinstance(value, basestring):
+                if name in self.include_attrs:
+                    text = value.strip()
+                    if text:
+                        yield event[2][1], None, text, []
+            else:
+                for message in self.extract(_ensure(value), gettext_functions,
+                                            search_text=False):
+                    yield message
+
+
+class MessageBuffer(object):
+    """Helper class for managing internationalized mixed content.
+    
+    :since: version 0.5
+    """
+
+    def __init__(self, directive=None):
+        """Initialize the message buffer.
+        
+        :param directive: the directive owning the buffer
+        :type directive: I18NDirective
+        """
+        # params list needs to be copied so that directives can be evaluated
+        # more than once
+        self.orig_params = self.params = directive.params[:]
+        self.directive = directive
+        self.string = []
+        self.events = {}
+        self.values = {}
+        self.depth = 1
+        self.order = 1
+        self.stack = [0]
+        self.subdirectives = {}
+
+    def append(self, kind, data, pos):
+        """Append a stream event to the buffer.
+        
+        :param kind: the stream event kind
+        :param data: the event data
+        :param pos: the position of the event in the source
+        """
+        if kind is SUB:
+            # The order needs to be +1 because a new START kind event will
+            # happen and we we need to wrap those events into our custom kind(s)
+            order = self.stack[-1] + 1
+            subdirectives, substream = data
+            # Store the directives that should be applied after translation
+            self.subdirectives.setdefault(order, []).extend(subdirectives)
+            self.events.setdefault(order, []).append((SUB_START, None, pos))
+            for skind, sdata, spos in substream:
+                self.append(skind, sdata, spos)
+            self.events.setdefault(order, []).append((SUB_END, None, pos))
+        elif kind is TEXT:
+            if '[' in data or ']' in data:
+                # Quote [ and ] if it ain't us adding it, ie, if the user is
+                # using those chars in his templates, escape them
+                data = data.replace('[', '\[').replace(']', '\]')
+            self.string.append(data)
+            self.events.setdefault(self.stack[-1], []).append((kind, data, pos))
+        elif kind is EXPR:
+            if self.params:
+                param = self.params.pop(0)
+            else:
+                params = ', '.join(['"%s"' % p for p in self.orig_params if p])
+                if params:
+                    params = "(%s)" % params
+                raise IndexError("%d parameters%s given to 'i18n:%s' but "
+                                 "%d or more expressions used in '%s', line %s"
+                                 % (len(self.orig_params), params, 
+                                    self.directive.tagname,
+                                    len(self.orig_params) + 1,
+                                    os.path.basename(pos[0] or
+                                                     'In-memory Template'),
+                                    pos[1]))
+            self.string.append('%%(%s)s' % param)
+            self.events.setdefault(self.stack[-1], []).append((kind, data, pos))
+            self.values[param] = (kind, data, pos)
+        else:
+            if kind is START: 
+                self.string.append('[%d:' % self.order)
+                self.stack.append(self.order)
+                self.events.setdefault(self.stack[-1],
+                                       []).append((kind, data, pos))
+                self.depth += 1
+                self.order += 1
+            elif kind is END:
+                self.depth -= 1
+                if self.depth:
+                    self.events[self.stack[-1]].append((kind, data, pos))
+                    self.string.append(']')
+                    self.stack.pop()
+
+    def format(self):
+        """Return a message identifier representing the content in the
+        buffer.
+        """
+        return ''.join(self.string).strip()
+
+    def translate(self, string, regex=re.compile(r'%\((\w+)\)s')):
+        """Interpolate the given message translation with the events in the
+        buffer and return the translated stream.
+        
+        :param string: the translated message string
+        """
+        substream = None
+
+        def yield_parts(string):
+            for idx, part in enumerate(regex.split(string)):
+                if idx % 2:
+                    yield self.values[part]
+                elif part:
+                    yield (TEXT,
+                           part.replace('\[', '[').replace('\]', ']'),
+                           (None, -1, -1)
+                    )
+
+        parts = parse_msg(string)
+        parts_counter = {}
+        for order, string in parts:
+            parts_counter.setdefault(order, []).append(None)
+
+        while parts:
+            order, string = parts.pop(0)
+            if len(parts_counter[order]) == 1:
+                events = self.events[order]
+            else:
+                events = [self.events[order].pop(0)]
+            parts_counter[order].pop()
+
+            for event in events:
+                if event[0] is SUB_START:
+                    substream = []
+                elif event[0] is SUB_END:
+                    # Yield a substream which might have directives to be
+                    # applied to it (after translation events)
+                    yield SUB, (self.subdirectives[order], substream), event[2]
+                    substream = None
+                elif event[0] is TEXT:
+                    if string:
+                        for part in yield_parts(string):
+                            if substream is not None:
+                                substream.append(part)
+                            else:
+                                yield part
+                        # String handled, reset it
+                        string = None
+                elif event[0] is START:
+                    if substream is not None:
+                        substream.append(event)
+                    else:
+                        yield event
+                    if string:
+                        for part in yield_parts(string):
+                            if substream is not None:
+                                substream.append(part)
+                            else:
+                                yield part
+                        # String handled, reset it
+                        string = None
+                elif event[0] is END:
+                    if string:
+                        for part in yield_parts(string):
+                            if substream is not None:
+                                substream.append(part)
+                            else:
+                                yield part
+                        # String handled, reset it
+                        string = None
+                    if substream is not None:
+                        substream.append(event)
+                    else:
+                        yield event
+                elif event[0] is EXPR:
+                    # These are handled on the strings itself
+                    continue
+                else:
+                    if string:
+                        for part in yield_parts(string):
+                            if substream is not None:
+                                substream.append(part)
+                            else:
+                                yield part
+                        # String handled, reset it
+                        string = None
+                    if substream is not None:
+                        substream.append(event)
+                    else:
+                        yield event
+
+
+def parse_msg(string, regex=re.compile(r'(?:\[(\d+)\:)|(?<!\\)\]')):
+    """Parse a translated message using Genshi mixed content message
+    formatting.
+    
+    >>> parse_msg("See [1:Help].")
+    [(0, 'See '), (1, 'Help'), (0, '.')]
+    
+    >>> parse_msg("See [1:our [2:Help] page] for details.")
+    [(0, 'See '), (1, 'our '), (2, 'Help'), (1, ' page'), (0, ' for details.')]
+    
+    >>> parse_msg("[2:Details] finden Sie in [1:Hilfe].")
+    [(2, 'Details'), (0, ' finden Sie in '), (1, 'Hilfe'), (0, '.')]
+    
+    >>> parse_msg("[1:] Bilder pro Seite anzeigen.")
+    [(1, ''), (0, ' Bilder pro Seite anzeigen.')]
+    
+    :param string: the translated message string
+    :return: a list of ``(order, string)`` tuples
+    :rtype: `list`
+    """
+    parts = []
+    stack = [0]
+    while True:
+        mo = regex.search(string)
+        if not mo:
+            break
+
+        if mo.start() or stack[-1]:
+            parts.append((stack[-1], string[:mo.start()]))
+        string = string[mo.end():]
+
+        orderno = mo.group(1)
+        if orderno is not None:
+            stack.append(int(orderno))
+        else:
+            stack.pop()
+        if not stack:
+            break
+
+    if string:
+        parts.append((stack[-1], string))
+
+    return parts
+
+
+def extract_from_code(code, gettext_functions):
+    """Extract strings from Python bytecode.
+    
+    >>> from genshi.template.eval import Expression
+    >>> expr = Expression('_("Hello")')
+    >>> list(extract_from_code(expr, GETTEXT_FUNCTIONS))
+    [('_', u'Hello')]
+    
+    >>> expr = Expression('ngettext("You have %(num)s item", '
+    ...                            '"You have %(num)s items", num)')
+    >>> list(extract_from_code(expr, GETTEXT_FUNCTIONS))
+    [('ngettext', (u'You have %(num)s item', u'You have %(num)s items', None))]
+    
+    :param code: the `Code` object
+    :type code: `genshi.template.eval.Code`
+    :param gettext_functions: a sequence of function names
+    :since: version 0.5
+    """
+    def _walk(node):
+        if isinstance(node, _ast.Call) and isinstance(node.func, _ast.Name) \
+                and node.func.id in gettext_functions:
+            strings = []
+            def _add(arg):
+                if isinstance(arg, _ast.Str) and isinstance(arg.s, basestring):
+                    strings.append(unicode(arg.s, 'utf-8'))
+                elif arg:
+                    strings.append(None)
+            [_add(arg) for arg in node.args]
+            _add(node.starargs)
+            _add(node.kwargs)
+            if len(strings) == 1:
+                strings = strings[0]
+            else:
+                strings = tuple(strings)
+            yield node.func.id, strings
+        elif node._fields:
+            children = []
+            for field in node._fields:
+                child = getattr(node, field, None)
+                if isinstance(child, list):
+                    for elem in child:
+                        children.append(elem)
+                elif isinstance(child, _ast.AST):
+                    children.append(child)
+            for child in children:
+                for funcname, strings in _walk(child):
+                    yield funcname, strings
+    return _walk(code.ast)
+
+
+def extract(fileobj, keywords, comment_tags, options):
+    """Babel extraction method for Genshi templates.
+    
+    :param fileobj: the file-like object the messages should be extracted from
+    :param keywords: a list of keywords (i.e. function names) that should be
+                     recognized as translation functions
+    :param comment_tags: a list of translator tags to search for and include
+                         in the results
+    :param options: a dictionary of additional options (optional)
+    :return: an iterator over ``(lineno, funcname, message, comments)`` tuples
+    :rtype: ``iterator``
+    """
+    template_class = options.get('template_class', MarkupTemplate)
+    if isinstance(template_class, basestring):
+        module, clsname = template_class.split(':', 1)
+        template_class = getattr(__import__(module, {}, {}, [clsname]), clsname)
+    encoding = options.get('encoding', None)
+
+    extract_text = options.get('extract_text', True)
+    if isinstance(extract_text, basestring):
+        extract_text = extract_text.lower() in ('1', 'on', 'yes', 'true')
+
+    ignore_tags = options.get('ignore_tags', Translator.IGNORE_TAGS)
+    if isinstance(ignore_tags, basestring):
+        ignore_tags = ignore_tags.split()
+    ignore_tags = [QName(tag) for tag in ignore_tags]
+
+    include_attrs = options.get('include_attrs', Translator.INCLUDE_ATTRS)
+    if isinstance(include_attrs, basestring):
+        include_attrs = include_attrs.split()
+    include_attrs = [QName(attr) for attr in include_attrs]
+
+    tmpl = template_class(fileobj, filename=getattr(fileobj, 'name', None),
+                          encoding=encoding)
+    tmpl.loader = None
+
+    translator = Translator(None, ignore_tags, include_attrs, extract_text)
+    if hasattr(tmpl, 'add_directives'):
+        tmpl.add_directives(Translator.NAMESPACE, translator)
+    for message in translator.extract(tmpl.stream, gettext_functions=keywords):
+        yield message
diff --git a/genshi/filters/transform.py b/genshi/filters/transform.py
new file mode 100644
index 0000000..9b75b06
--- /dev/null
+++ b/genshi/filters/transform.py
@@ -0,0 +1,1310 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2007-2009 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""A filter for functional-style transformations of markup streams.
+
+The `Transformer` filter provides a variety of transformations that can be
+applied to parts of streams that match given XPath expressions. These
+transformations can be chained to achieve results that would be comparitively
+tedious to achieve by writing stream filters by hand. The approach of chaining
+node selection and transformation has been inspired by the `jQuery`_ Javascript
+library.
+
+ .. _`jQuery`: http://jquery.com/
+
+For example, the following transformation removes the ``<title>`` element from
+the ``<head>`` of the input document:
+
+>>> from genshi.builder import tag
+>>> html = HTML('''<html>
+...  <head><title>Some Title</title></head>
+...  <body>
+...    Some <em>body</em> text.
+...  </body>
+... </html>''')
+>>> print(html | Transformer('body/em').map(unicode.upper, TEXT)
+...                                    .unwrap().wrap(tag.u))
+<html>
+  <head><title>Some Title</title></head>
+  <body>
+    Some <u>BODY</u> text.
+  </body>
+</html>
+
+The ``Transformer`` support a large number of useful transformations out of the
+box, but custom transformations can be added easily.
+
+:since: version 0.5
+"""
+
+import re
+import sys
+
+from genshi.builder import Element
+from genshi.core import Stream, Attrs, QName, TEXT, START, END, _ensure, Markup
+from genshi.path import Path
+
+__all__ = ['Transformer', 'StreamBuffer', 'InjectorTransformation', 'ENTER',
+           'EXIT', 'INSIDE', 'OUTSIDE', 'BREAK']
+
+
+class TransformMark(str):
+    """A mark on a transformation stream."""
+    __slots__ = []
+    _instances = {}
+
+    def __new__(cls, val):
+        return cls._instances.setdefault(val, str.__new__(cls, val))
+
+
+ENTER = TransformMark('ENTER')
+"""Stream augmentation mark indicating that a selected element is being
+entered."""
+
+INSIDE = TransformMark('INSIDE')
+"""Stream augmentation mark indicating that processing is currently inside a
+selected element."""
+
+OUTSIDE = TransformMark('OUTSIDE')
+"""Stream augmentation mark indicating that a match occurred outside a selected
+element."""
+
+ATTR = TransformMark('ATTR')
+"""Stream augmentation mark indicating a selected element attribute."""
+
+EXIT = TransformMark('EXIT')
+"""Stream augmentation mark indicating that a selected element is being
+exited."""
+
+BREAK = TransformMark('BREAK')
+"""Stream augmentation mark indicating a break between two otherwise contiguous
+blocks of marked events.
+
+This is used primarily by the cut() transform to provide later transforms with
+an opportunity to operate on the cut buffer.
+"""
+
+
+class PushBackStream(object):
+    """Allows a single event to be pushed back onto the stream and re-consumed.
+    """
+    def __init__(self, stream):
+        self.stream = iter(stream)
+        self.peek = None
+
+    def push(self, event):
+        assert self.peek is None
+        self.peek = event
+
+    def __iter__(self):
+        while True:
+            if self.peek is not None:
+                peek = self.peek
+                self.peek = None
+                yield peek
+            else:
+                try:
+                    event = self.stream.next()
+                    yield event
+                except StopIteration:
+                    if self.peek is None:
+                        raise
+
+
+class Transformer(object):
+    """Stream filter that can apply a variety of different transformations to
+    a stream.
+
+    This is achieved by selecting the events to be transformed using XPath,
+    then applying the transformations to the events matched by the path
+    expression. Each marked event is in the form (mark, (kind, data, pos)),
+    where mark can be any of `ENTER`, `INSIDE`, `EXIT`, `OUTSIDE`, or `None`.
+
+    The first three marks match `START` and `END` events, and any events
+    contained `INSIDE` any selected XML/HTML element. A non-element match
+    outside a `START`/`END` container (e.g. ``text()``) will yield an `OUTSIDE`
+    mark.
+
+    >>> html = HTML('<html><head><title>Some Title</title></head>'
+    ...             '<body>Some <em>body</em> text.</body></html>')
+
+    Transformations act on selected stream events matching an XPath expression.
+    Here's an example of removing some markup (the title, in this case)
+    selected by an expression:
+
+    >>> print(html | Transformer('head/title').remove())
+    <html><head/><body>Some <em>body</em> text.</body></html>
+
+    Inserted content can be passed in the form of a string, or a markup event
+    stream, which includes streams generated programmatically via the
+    `builder` module:
+
+    >>> from genshi.builder import tag
+    >>> print(html | Transformer('body').prepend(tag.h1('Document Title')))
+    <html><head><title>Some Title</title></head><body><h1>Document
+    Title</h1>Some <em>body</em> text.</body></html>
+
+    Each XPath expression determines the set of tags that will be acted upon by
+    subsequent transformations. In this example we select the ``<title>`` text,
+    copy it into a buffer, then select the ``<body>`` element and paste the
+    copied text into the body as ``<h1>`` enclosed text:
+
+    >>> buffer = StreamBuffer()
+    >>> print(html | Transformer('head/title/text()').copy(buffer)
+    ...     .end().select('body').prepend(tag.h1(buffer)))
+    <html><head><title>Some Title</title></head><body><h1>Some Title</h1>Some
+    <em>body</em> text.</body></html>
+
+    Transformations can also be assigned and reused, although care must be
+    taken when using buffers, to ensure that buffers are cleared between
+    transforms:
+
+    >>> emphasis = Transformer('body//em').attr('class', 'emphasis')
+    >>> print(html | emphasis)
+    <html><head><title>Some Title</title></head><body>Some <em
+    class="emphasis">body</em> text.</body></html>
+    """
+
+    __slots__ = ['transforms']
+
+    def __init__(self, path='.'):
+        """Construct a new transformation filter.
+
+        :param path: an XPath expression (as string) or a `Path` instance
+        """
+        self.transforms = [SelectTransformation(path)]
+
+    def __call__(self, stream, keep_marks=False):
+        """Apply the transform filter to the marked stream.
+
+        :param stream: the marked event stream to filter
+        :param keep_marks: Do not strip transformer selection marks from the
+                           stream. Useful for testing.
+        :return: the transformed stream
+        :rtype: `Stream`
+        """
+        transforms = self._mark(stream)
+        for link in self.transforms:
+            transforms = link(transforms)
+        if not keep_marks:
+            transforms = self._unmark(transforms)
+        return Stream(transforms,
+                      serializer=getattr(stream, 'serializer', None))
+
+    def apply(self, function):
+        """Apply a transformation to the stream.
+
+        Transformations can be chained, similar to stream filters. Any callable
+        accepting a marked stream can be used as a transform.
+
+        As an example, here is a simple `TEXT` event upper-casing transform:
+
+        >>> def upper(stream):
+        ...     for mark, (kind, data, pos) in stream:
+        ...         if mark and kind is TEXT:
+        ...             yield mark, (kind, data.upper(), pos)
+        ...         else:
+        ...             yield mark, (kind, data, pos)
+        >>> short_stream = HTML('<body>Some <em>test</em> text</body>')
+        >>> print(short_stream | Transformer('.//em/text()').apply(upper))
+        <body>Some <em>TEST</em> text</body>
+        """
+        transformer = Transformer()
+        transformer.transforms = self.transforms[:]
+        if isinstance(function, Transformer):
+            transformer.transforms.extend(function.transforms)
+        else:
+            transformer.transforms.append(function)
+        return transformer
+
+    #{ Selection operations
+
+    def select(self, path):
+        """Mark events matching the given XPath expression, within the current
+        selection.
+
+        >>> html = HTML('<body>Some <em>test</em> text</body>')
+        >>> print(html | Transformer().select('.//em').trace())
+        (None, ('START', (QName('body'), Attrs()), (None, 1, 0)))
+        (None, ('TEXT', u'Some ', (None, 1, 6)))
+        ('ENTER', ('START', (QName('em'), Attrs()), (None, 1, 11)))
+        ('INSIDE', ('TEXT', u'test', (None, 1, 15)))
+        ('EXIT', ('END', QName('em'), (None, 1, 19)))
+        (None, ('TEXT', u' text', (None, 1, 24)))
+        (None, ('END', QName('body'), (None, 1, 29)))
+        <body>Some <em>test</em> text</body>
+
+        :param path: an XPath expression (as string) or a `Path` instance
+        :return: the stream augmented by transformation marks
+        :rtype: `Transformer`
+        """
+        return self.apply(SelectTransformation(path))
+
+    def invert(self):
+        """Invert selection so that marked events become unmarked, and vice
+        versa.
+
+        Specificaly, all marks are converted to null marks, and all null marks
+        are converted to OUTSIDE marks.
+
+        >>> html = HTML('<body>Some <em>test</em> text</body>')
+        >>> print(html | Transformer('//em').invert().trace())
+        ('OUTSIDE', ('START', (QName('body'), Attrs()), (None, 1, 0)))
+        ('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6)))
+        (None, ('START', (QName('em'), Attrs()), (None, 1, 11)))
+        (None, ('TEXT', u'test', (None, 1, 15)))
+        (None, ('END', QName('em'), (None, 1, 19)))
+        ('OUTSIDE', ('TEXT', u' text', (None, 1, 24)))
+        ('OUTSIDE', ('END', QName('body'), (None, 1, 29)))
+        <body>Some <em>test</em> text</body>
+
+        :rtype: `Transformer`
+        """
+        return self.apply(InvertTransformation())
+
+    def end(self):
+        """End current selection, allowing all events to be selected.
+
+        Example:
+
+        >>> html = HTML('<body>Some <em>test</em> text</body>')
+        >>> print(html | Transformer('//em').end().trace())
+        ('OUTSIDE', ('START', (QName('body'), Attrs()), (None, 1, 0)))
+        ('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6)))
+        ('OUTSIDE', ('START', (QName('em'), Attrs()), (None, 1, 11)))
+        ('OUTSIDE', ('TEXT', u'test', (None, 1, 15)))
+        ('OUTSIDE', ('END', QName('em'), (None, 1, 19)))
+        ('OUTSIDE', ('TEXT', u' text', (None, 1, 24)))
+        ('OUTSIDE', ('END', QName('body'), (None, 1, 29)))
+        <body>Some <em>test</em> text</body>
+
+        :return: the stream augmented by transformation marks
+        :rtype: `Transformer`
+        """
+        return self.apply(EndTransformation())
+
+    #{ Deletion operations
+
+    def empty(self):
+        """Empty selected elements of all content.
+
+        Example:
+
+        >>> html = HTML('<html><head><title>Some Title</title></head>'
+        ...             '<body>Some <em>body</em> text.</body></html>')
+        >>> print(html | Transformer('.//em').empty())
+        <html><head><title>Some Title</title></head><body>Some <em/>
+        text.</body></html>
+
+        :rtype: `Transformer`
+        """
+        return self.apply(EmptyTransformation())
+
+    def remove(self):
+        """Remove selection from the stream.
+
+        Example:
+
+        >>> html = HTML('<html><head><title>Some Title</title></head>'
+        ...             '<body>Some <em>body</em> text.</body></html>')
+        >>> print(html | Transformer('.//em').remove())
+        <html><head><title>Some Title</title></head><body>Some
+        text.</body></html>
+
+        :rtype: `Transformer`
+        """
+        return self.apply(RemoveTransformation())
+
+    #{ Direct element operations
+
+    def unwrap(self):
+        """Remove outermost enclosing elements from selection.
+
+        Example:
+
+        >>> html = HTML('<html><head><title>Some Title</title></head>'
+        ...             '<body>Some <em>body</em> text.</body></html>')
+        >>> print(html | Transformer('.//em').unwrap())
+        <html><head><title>Some Title</title></head><body>Some body
+        text.</body></html>
+
+        :rtype: `Transformer`
+        """
+        return self.apply(UnwrapTransformation())
+
+    def wrap(self, element):
+        """Wrap selection in an element.
+
+        >>> html = HTML('<html><head><title>Some Title</title></head>'
+        ...             '<body>Some <em>body</em> text.</body></html>')
+        >>> print(html | Transformer('.//em').wrap('strong'))
+        <html><head><title>Some Title</title></head><body>Some
+        <strong><em>body</em></strong> text.</body></html>
+
+        :param element: either a tag name (as string) or an `Element` object
+        :rtype: `Transformer`
+        """
+        return self.apply(WrapTransformation(element))
+
+    #{ Content insertion operations
+
+    def replace(self, content):
+        """Replace selection with content.
+
+        >>> html = HTML('<html><head><title>Some Title</title></head>'
+        ...             '<body>Some <em>body</em> text.</body></html>')
+        >>> print(html | Transformer('.//title/text()').replace('New Title'))
+        <html><head><title>New Title</title></head><body>Some <em>body</em>
+        text.</body></html>
+
+        :param content: Either a callable, an iterable of events, or a string
+                        to insert.
+        :rtype: `Transformer`
+        """
+        return self.apply(ReplaceTransformation(content))
+
+    def before(self, content):
+        """Insert content before selection.
+
+        In this example we insert the word 'emphasised' before the <em> opening
+        tag:
+
+        >>> html = HTML('<html><head><title>Some Title</title></head>'
+        ...             '<body>Some <em>body</em> text.</body></html>')
+        >>> print(html | Transformer('.//em').before('emphasised '))
+        <html><head><title>Some Title</title></head><body>Some emphasised
+        <em>body</em> text.</body></html>
+
+        :param content: Either a callable, an iterable of events, or a string
+                        to insert.
+        :rtype: `Transformer`
+        """
+        return self.apply(BeforeTransformation(content))
+
+    def after(self, content):
+        """Insert content after selection.
+
+        Here, we insert some text after the </em> closing tag:
+
+        >>> html = HTML('<html><head><title>Some Title</title></head>'
+        ...             '<body>Some <em>body</em> text.</body></html>')
+        >>> print(html | Transformer('.//em').after(' rock'))
+        <html><head><title>Some Title</title></head><body>Some <em>body</em>
+        rock text.</body></html>
+
+        :param content: Either a callable, an iterable of events, or a string
+                        to insert.
+        :rtype: `Transformer`
+        """
+        return self.apply(AfterTransformation(content))
+
+    def prepend(self, content):
+        """Insert content after the ENTER event of the selection.
+
+        Inserting some new text at the start of the <body>:
+
+        >>> html = HTML('<html><head><title>Some Title</title></head>'
+        ...             '<body>Some <em>body</em> text.</body></html>')
+        >>> print(html | Transformer('.//body').prepend('Some new body text. '))
+        <html><head><title>Some Title</title></head><body>Some new body text.
+        Some <em>body</em> text.</body></html>
+
+        :param content: Either a callable, an iterable of events, or a string
+                        to insert.
+        :rtype: `Transformer`
+        """
+        return self.apply(PrependTransformation(content))
+
+    def append(self, content):
+        """Insert content before the END event of the selection.
+
+        >>> html = HTML('<html><head><title>Some Title</title></head>'
+        ...             '<body>Some <em>body</em> text.</body></html>')
+        >>> print(html | Transformer('.//body').append(' Some new body text.'))
+        <html><head><title>Some Title</title></head><body>Some <em>body</em>
+        text. Some new body text.</body></html>
+
+        :param content: Either a callable, an iterable of events, or a string
+                        to insert.
+        :rtype: `Transformer`
+        """
+        return self.apply(AppendTransformation(content))
+
+    #{ Attribute manipulation
+
+    def attr(self, name, value):
+        """Add, replace or delete an attribute on selected elements.
+
+        If `value` evaulates to `None` the attribute will be deleted from the
+        element:
+
+        >>> html = HTML('<html><head><title>Some Title</title></head>'
+        ...             '<body>Some <em class="before">body</em> <em>text</em>.</body>'
+        ...             '</html>')
+        >>> print(html | Transformer('body/em').attr('class', None))
+        <html><head><title>Some Title</title></head><body>Some <em>body</em>
+        <em>text</em>.</body></html>
+
+        Otherwise the attribute will be set to `value`:
+
+        >>> print(html | Transformer('body/em').attr('class', 'emphasis'))
+        <html><head><title>Some Title</title></head><body>Some <em
+        class="emphasis">body</em> <em class="emphasis">text</em>.</body></html>
+
+        If `value` is a callable it will be called with the attribute name and
+        the `START` event for the matching element. Its return value will then
+        be used to set the attribute:
+
+        >>> def print_attr(name, event):
+        ...     attrs = event[1][1]
+        ...     print(attrs)
+        ...     return attrs.get(name)
+        >>> print(html | Transformer('body/em').attr('class', print_attr))
+        Attrs([(QName('class'), u'before')])
+        Attrs()
+        <html><head><title>Some Title</title></head><body>Some <em
+        class="before">body</em> <em>text</em>.</body></html>
+
+        :param name: the name of the attribute
+        :param value: the value that should be set for the attribute.
+        :rtype: `Transformer`
+        """
+        return self.apply(AttrTransformation(name, value))
+
+    #{ Buffer operations
+
+    def copy(self, buffer, accumulate=False):
+        """Copy selection into buffer.
+
+        The buffer is replaced by each *contiguous* selection before being passed
+        to the next transformation. If accumulate=True, further selections will
+        be appended to the buffer rather than replacing it.
+
+        >>> from genshi.builder import tag
+        >>> buffer = StreamBuffer()
+        >>> html = HTML('<html><head><title>Some Title</title></head>'
+        ...             '<body>Some <em>body</em> text.</body></html>')
+        >>> print(html | Transformer('head/title/text()').copy(buffer)
+        ...     .end().select('body').prepend(tag.h1(buffer)))
+        <html><head><title>Some Title</title></head><body><h1>Some
+        Title</h1>Some <em>body</em> text.</body></html>
+
+        This example illustrates that only a single contiguous selection will
+        be buffered:
+
+        >>> print(html | Transformer('head/title/text()').copy(buffer)
+        ...     .end().select('body/em').copy(buffer).end().select('body')
+        ...     .prepend(tag.h1(buffer)))
+        <html><head><title>Some Title</title></head><body><h1>Some
+        Title</h1>Some <em>body</em> text.</body></html>
+        >>> print(buffer)
+        <em>body</em>
+
+        Element attributes can also be copied for later use:
+
+        >>> html = HTML('<html><head><title>Some Title</title></head>'
+        ...             '<body><em>Some</em> <em class="before">body</em>'
+        ...             '<em>text</em>.</body></html>')
+        >>> buffer = StreamBuffer()
+        >>> def apply_attr(name, entry):
+        ...     return list(buffer)[0][1][1].get('class')
+        >>> print(html | Transformer('body/em[@class]/@class').copy(buffer)
+        ...     .end().buffer().select('body/em[not(@class)]')
+        ...     .attr('class', apply_attr))
+        <html><head><title>Some Title</title></head><body><em
+        class="before">Some</em> <em class="before">body</em><em
+        class="before">text</em>.</body></html>
+
+
+        :param buffer: the `StreamBuffer` in which the selection should be
+                       stored
+        :rtype: `Transformer`
+        :note: Copy (and cut) copy each individual selected object into the
+               buffer before passing to the next transform. For example, the
+               XPath ``*|text()`` will select all elements and text, each
+               instance of which will be copied to the buffer individually
+               before passing to the next transform. This has implications for
+               how ``StreamBuffer`` objects can be used, so some
+               experimentation may be required.
+
+        """
+        return self.apply(CopyTransformation(buffer, accumulate))
+
+    def cut(self, buffer, accumulate=False):
+        """Copy selection into buffer and remove the selection from the stream.
+
+        >>> from genshi.builder import tag
+        >>> buffer = StreamBuffer()
+        >>> html = HTML('<html><head><title>Some Title</title></head>'
+        ...             '<body>Some <em>body</em> text.</body></html>')
+        >>> print(html | Transformer('.//em/text()').cut(buffer)
+        ...     .end().select('.//em').after(tag.h1(buffer)))
+        <html><head><title>Some Title</title></head><body>Some
+        <em/><h1>body</h1> text.</body></html>
+
+        Specifying accumulate=True, appends all selected intervals onto the
+        buffer. Combining this with the .buffer() operation allows us operate
+        on all copied events rather than per-segment. See the documentation on
+        buffer() for more information.
+
+        :param buffer: the `StreamBuffer` in which the selection should be
+                       stored
+        :rtype: `Transformer`
+        :note: this transformation will buffer the entire input stream
+        """
+        return self.apply(CutTransformation(buffer, accumulate))
+
+    def buffer(self):
+        """Buffer the entire stream (can consume a considerable amount of
+        memory).
+
+        Useful in conjunction with copy(accumulate=True) and
+        cut(accumulate=True) to ensure that all marked events in the entire
+        stream are copied to the buffer before further transformations are
+        applied.
+
+        For example, to move all <note> elements inside a <notes> tag at the
+        top of the document:
+
+        >>> doc = HTML('<doc><notes></notes><body>Some <note>one</note> '
+        ...            'text <note>two</note>.</body></doc>')
+        >>> buffer = StreamBuffer()
+        >>> print(doc | Transformer('body/note').cut(buffer, accumulate=True)
+        ...     .end().buffer().select('notes').prepend(buffer))
+        <doc><notes><note>one</note><note>two</note></notes><body>Some  text
+        .</body></doc>
+
+        """
+        return self.apply(list)
+
+    #{ Miscellaneous operations
+
+    def filter(self, filter):
+        """Apply a normal stream filter to the selection. The filter is called
+        once for each contiguous block of marked events.
+
+        >>> from genshi.filters.html import HTMLSanitizer
+        >>> html = HTML('<html><body>Some text<script>alert(document.cookie)'
+        ...             '</script> and some more text</body></html>')
+        >>> print(html | Transformer('body/*').filter(HTMLSanitizer()))
+        <html><body>Some text and some more text</body></html>
+
+        :param filter: The stream filter to apply.
+        :rtype: `Transformer`
+        """
+        return self.apply(FilterTransformation(filter))
+
+    def map(self, function, kind):
+        """Applies a function to the ``data`` element of events of ``kind`` in
+        the selection.
+
+        >>> html = HTML('<html><head><title>Some Title</title></head>'
+        ...               '<body>Some <em>body</em> text.</body></html>')
+        >>> print(html | Transformer('head/title').map(unicode.upper, TEXT))
+        <html><head><title>SOME TITLE</title></head><body>Some <em>body</em>
+        text.</body></html>
+
+        :param function: the function to apply
+        :param kind: the kind of event the function should be applied to
+        :rtype: `Transformer`
+        """
+        return self.apply(MapTransformation(function, kind))
+
+    def substitute(self, pattern, replace, count=1):
+        """Replace text matching a regular expression.
+
+        Refer to the documentation for ``re.sub()`` for details.
+
+        >>> html = HTML('<html><body>Some text, some more text and '
+        ...             '<b>some bold text</b>\\n'
+        ...             '<i>some italicised text</i></body></html>')
+        >>> print(html | Transformer('body/b').substitute('(?i)some', 'SOME'))
+        <html><body>Some text, some more text and <b>SOME bold text</b>
+        <i>some italicised text</i></body></html>
+        >>> tags = tag.html(tag.body('Some text, some more text and\\n',
+        ...      Markup('<b>some bold text</b>')))
+        >>> print(tags.generate() | Transformer('body').substitute(
+        ...     '(?i)some', 'SOME'))
+        <html><body>SOME text, some more text and
+        <b>SOME bold text</b></body></html>
+
+        :param pattern: A regular expression object or string.
+        :param replace: Replacement pattern.
+        :param count: Number of replacements to make in each text fragment.
+        :rtype: `Transformer`
+        """
+        return self.apply(SubstituteTransformation(pattern, replace, count))
+
+    def rename(self, name):
+        """Rename matching elements.
+
+        >>> html = HTML('<html><body>Some text, some more text and '
+        ...             '<b>some bold text</b></body></html>')
+        >>> print(html | Transformer('body/b').rename('strong'))
+        <html><body>Some text, some more text and <strong>some bold text</strong></body></html>
+        """
+        return self.apply(RenameTransformation(name))
+
+    def trace(self, prefix='', fileobj=None):
+        """Print events as they pass through the transform.
+
+        >>> html = HTML('<body>Some <em>test</em> text</body>')
+        >>> print(html | Transformer('em').trace())
+        (None, ('START', (QName('body'), Attrs()), (None, 1, 0)))
+        (None, ('TEXT', u'Some ', (None, 1, 6)))
+        ('ENTER', ('START', (QName('em'), Attrs()), (None, 1, 11)))
+        ('INSIDE', ('TEXT', u'test', (None, 1, 15)))
+        ('EXIT', ('END', QName('em'), (None, 1, 19)))
+        (None, ('TEXT', u' text', (None, 1, 24)))
+        (None, ('END', QName('body'), (None, 1, 29)))
+        <body>Some <em>test</em> text</body>
+
+        :param prefix: a string to prefix each event with in the output
+        :param fileobj: the writable file-like object to write to; defaults to
+                        the standard output stream
+        :rtype: `Transformer`
+        """
+        return self.apply(TraceTransformation(prefix, fileobj=fileobj))
+
+    # Internal methods
+
+    def _mark(self, stream):
+        for event in stream:
+            yield OUTSIDE, event
+
+    def _unmark(self, stream):
+        for mark, event in stream:
+            kind = event[0]
+            if not (kind is None or kind is ATTR or kind is BREAK):
+                yield event
+
+
+class SelectTransformation(object):
+    """Select and mark events that match an XPath expression."""
+
+    def __init__(self, path):
+        """Create selection.
+
+        :param path: an XPath expression (as string) or a `Path` object
+        """
+        if not isinstance(path, Path):
+            path = Path(path)
+        self.path = path
+
+    def __call__(self, stream):
+        """Apply the transform filter to the marked stream.
+
+        :param stream: the marked event stream to filter
+        """
+        namespaces = {}
+        variables = {}
+        test = self.path.test()
+        stream = iter(stream)
+        next = stream.next
+        for mark, event in stream:
+            if mark is None:
+                yield mark, event
+                continue
+            result = test(event, namespaces, variables)
+            # XXX This is effectively genshi.core._ensure() for transform
+            # streams.
+            if result is True:
+                if event[0] is START:
+                    yield ENTER, event
+                    depth = 1
+                    while depth > 0:
+                        mark, subevent = next()
+                        if subevent[0] is START:
+                            depth += 1
+                        elif subevent[0] is END:
+                            depth -= 1
+                        if depth == 0:
+                            yield EXIT, subevent
+                        else:
+                            yield INSIDE, subevent
+                        test(subevent, namespaces, variables, updateonly=True)
+                else:
+                    yield OUTSIDE, event
+            elif isinstance(result, Attrs):
+                # XXX  Selected *attributes* are given a "kind" of None to
+                # indicate they are not really part of the stream.
+                yield ATTR, (ATTR, (QName(event[1][0] + '@*'), result), event[2])
+                yield None, event
+            elif isinstance(result, tuple):
+                yield OUTSIDE, result
+            elif result:
+                # XXX Assume everything else is "text"?
+                yield None, (TEXT, unicode(result), (None, -1, -1))
+            else:
+                yield None, event
+
+
+class InvertTransformation(object):
+    """Invert selection so that marked events become unmarked, and vice versa.
+
+    Specificaly, all input marks are converted to null marks, and all input
+    null marks are converted to OUTSIDE marks.
+    """
+
+    def __call__(self, stream):
+        """Apply the transform filter to the marked stream.
+
+        :param stream: the marked event stream to filter
+        """
+        for mark, event in stream:
+            if mark:
+                yield None, event
+            else:
+                yield OUTSIDE, event
+
+
+class EndTransformation(object):
+    """End the current selection."""
+
+    def __call__(self, stream):
+        """Apply the transform filter to the marked stream.
+
+        :param stream: the marked event stream to filter
+        """
+        for mark, event in stream:
+            yield OUTSIDE, event
+
+
+class EmptyTransformation(object):
+    """Empty selected elements of all content."""
+
+    def __call__(self, stream):
+        """Apply the transform filter to the marked stream.
+
+        :param stream: the marked event stream to filter
+        """
+        for mark, event in stream:
+            yield mark, event
+            if mark is ENTER:
+                for mark, event in stream:
+                    if mark is EXIT:
+                        yield mark, event
+                        break
+
+
+class RemoveTransformation(object):
+    """Remove selection from the stream."""
+
+    def __call__(self, stream):
+        """Apply the transform filter to the marked stream.
+
+        :param stream: the marked event stream to filter
+        """
+        for mark, event in stream:
+            if mark is None:
+                yield mark, event
+
+
+class UnwrapTransformation(object):
+    """Remove outtermost enclosing elements from selection."""
+
+    def __call__(self, stream):
+        """Apply the transform filter to the marked stream.
+
+        :param stream: the marked event stream to filter
+        """
+        for mark, event in stream:
+            if mark not in (ENTER, EXIT):
+                yield mark, event
+
+
+class WrapTransformation(object):
+    """Wrap selection in an element."""
+
+    def __init__(self, element):
+        if isinstance(element, Element):
+            self.element = element
+        else:
+            self.element = Element(element)
+
+    def __call__(self, stream):
+        for mark, event in stream:
+            if mark:
+                element = list(self.element.generate())
+                for prefix in element[:-1]:
+                    yield None, prefix
+                yield mark, event
+                start = mark
+                stopped = False
+                for mark, event in stream:
+                    if start is ENTER and mark is EXIT:
+                        yield mark, event
+                        stopped = True
+                        break
+                    if not mark:
+                        break
+                    yield mark, event
+                else:
+                    stopped = True
+                yield None, element[-1]
+                if not stopped:
+                    yield mark, event
+            else:
+                yield mark, event
+
+
+class TraceTransformation(object):
+    """Print events as they pass through the transform."""
+
+    def __init__(self, prefix='', fileobj=None):
+        """Trace constructor.
+
+        :param prefix: text to prefix each traced line with.
+        :param fileobj: the writable file-like object to write to
+        """
+        self.prefix = prefix
+        self.fileobj = fileobj or sys.stdout
+
+    def __call__(self, stream):
+        """Apply the transform filter to the marked stream.
+
+        :param stream: the marked event stream to filter
+        """
+        for event in stream:
+            self.fileobj.write('%s%s\n' % (self.prefix, event))
+            yield event
+
+
+class FilterTransformation(object):
+    """Apply a normal stream filter to the selection. The filter is called once
+    for each selection."""
+
+    def __init__(self, filter):
+        """Create the transform.
+
+        :param filter: The stream filter to apply.
+        """
+        self.filter = filter
+
+    def __call__(self, stream):
+        """Apply the transform filter to the marked stream.
+
+        :param stream: The marked event stream to filter
+        """
+        def flush(queue):
+            if queue:
+                for event in self.filter(queue):
+                    yield OUTSIDE, event
+                del queue[:]
+
+        queue = []
+        for mark, event in stream:
+            if mark is ENTER:
+                queue.append(event)
+                for mark, event in stream:
+                    queue.append(event)
+                    if mark is EXIT:
+                        break
+                for queue_event in flush(queue):
+                    yield queue_event
+            elif mark is OUTSIDE:
+                stopped = False
+                queue.append(event)
+                for mark, event in stream:
+                    if mark is not OUTSIDE:
+                        break
+                    queue.append(event)
+                else:
+                    stopped = True
+                for queue_event in flush(queue):
+                    yield queue_event
+                if not stopped:
+                    yield mark, event
+            else:
+                yield mark, event
+        for queue_event in flush(queue):
+            yield queue_event
+
+
+class MapTransformation(object):
+    """Apply a function to the `data` element of events of ``kind`` in the
+    selection.
+    """
+
+    def __init__(self, function, kind):
+        """Create the transform.
+
+        :param function: the function to apply; the function must take one
+                         argument, the `data` element of each selected event
+        :param kind: the stream event ``kind`` to apply the `function` to
+        """
+        self.function = function
+        self.kind = kind
+
+    def __call__(self, stream):
+        """Apply the transform filter to the marked stream.
+
+        :param stream: The marked event stream to filter
+        """
+        for mark, (kind, data, pos) in stream:
+            if mark and self.kind in (None, kind):
+                yield mark, (kind, self.function(data), pos)
+            else:
+                yield mark, (kind, data, pos)
+
+
+class SubstituteTransformation(object):
+    """Replace text matching a regular expression.
+
+    Refer to the documentation for ``re.sub()`` for details.
+    """
+    def __init__(self, pattern, replace, count=0):
+        """Create the transform.
+
+        :param pattern: A regular expression object, or string.
+        :param replace: Replacement pattern.
+        :param count: Number of replacements to make in each text fragment.
+        """
+        if isinstance(pattern, basestring):
+            self.pattern = re.compile(pattern)
+        else:
+            self.pattern = pattern
+        self.count = count
+        self.replace = replace
+
+    def __call__(self, stream):
+        """Apply the transform filter to the marked stream.
+
+        :param stream: The marked event stream to filter
+        """
+        for mark, (kind, data, pos) in stream:
+            if mark is not None and kind is TEXT:
+                new_data = self.pattern.sub(self.replace, data, self.count)
+                if isinstance(data, Markup):
+                    data = Markup(new_data)
+                else:
+                    data = new_data
+            yield mark, (kind, data, pos)
+
+
+class RenameTransformation(object):
+    """Rename matching elements."""
+    def __init__(self, name):
+        """Create the transform.
+
+        :param name: New element name.
+        """
+        self.name = QName(name)
+
+    def __call__(self, stream):
+        """Apply the transform filter to the marked stream.
+
+        :param stream: The marked event stream to filter
+        """
+        for mark, (kind, data, pos) in stream:
+            if mark is ENTER:
+                data = self.name, data[1]
+            elif mark is EXIT:
+                data = self.name
+            yield mark, (kind, data, pos)
+
+
+class InjectorTransformation(object):
+    """Abstract base class for transformations that inject content into a
+    stream.
+
+    >>> class Top(InjectorTransformation):
+    ...     def __call__(self, stream):
+    ...         for event in self._inject():
+    ...             yield event
+    ...         for event in stream:
+    ...             yield event
+    >>> html = HTML('<body>Some <em>test</em> text</body>')
+    >>> print(html | Transformer('.//em').apply(Top('Prefix ')))
+    Prefix <body>Some <em>test</em> text</body>
+    """
+    def __init__(self, content):
+        """Create a new injector.
+
+        :param content: An iterable of Genshi stream events, or a string to be
+                        injected.
+        """
+        self.content = content
+
+    def _inject(self):
+        content = self.content
+        if hasattr(content, '__call__'):
+            content = content()
+        for event in _ensure(content):
+            yield None, event
+
+
+class ReplaceTransformation(InjectorTransformation):
+    """Replace selection with content."""
+
+    def __call__(self, stream):
+        """Apply the transform filter to the marked stream.
+
+        :param stream: The marked event stream to filter
+        """
+        stream = PushBackStream(stream)
+        for mark, event in stream:
+            if mark is not None:
+                start = mark
+                for subevent in self._inject():
+                    yield subevent
+                for mark, event in stream:
+                    if start is ENTER:
+                        if mark is EXIT:
+                            break
+                    elif mark != start:
+                        stream.push((mark, event))
+                        break
+            else:
+                yield mark, event
+
+
+class BeforeTransformation(InjectorTransformation):
+    """Insert content before selection."""
+
+    def __call__(self, stream):
+        """Apply the transform filter to the marked stream.
+
+        :param stream: The marked event stream to filter
+        """
+        stream = PushBackStream(stream)
+        for mark, event in stream:
+            if mark is not None:
+                start = mark
+                for subevent in self._inject():
+                    yield subevent
+                yield mark, event
+                for mark, event in stream:
+                    if mark != start and start is not ENTER:
+                        stream.push((mark, event))
+                        break
+                    yield mark, event
+                    if start is ENTER and mark is EXIT:
+                        break
+            else:
+                yield mark, event
+
+
+class AfterTransformation(InjectorTransformation):
+    """Insert content after selection."""
+
+    def __call__(self, stream):
+        """Apply the transform filter to the marked stream.
+
+        :param stream: The marked event stream to filter
+        """
+        stream = PushBackStream(stream)
+        for mark, event in stream:
+            yield mark, event
+            if mark:
+                start = mark
+                for mark, event in stream:
+                    if start is not ENTER and mark != start:
+                        stream.push((mark, event))
+                        break
+                    yield mark, event
+                    if start is ENTER and mark is EXIT:
+                        break
+                for subevent in self._inject():
+                    yield subevent
+
+
+class PrependTransformation(InjectorTransformation):
+    """Prepend content to the inside of selected elements."""
+
+    def __call__(self, stream):
+        """Apply the transform filter to the marked stream.
+
+        :param stream: The marked event stream to filter
+        """
+        for mark, event in stream:
+            yield mark, event
+            if mark is ENTER:
+                for subevent in self._inject():
+                    yield subevent
+
+
+class AppendTransformation(InjectorTransformation):
+    """Append content after the content of selected elements."""
+
+    def __call__(self, stream):
+        """Apply the transform filter to the marked stream.
+
+        :param stream: The marked event stream to filter
+        """
+        for mark, event in stream:
+            yield mark, event
+            if mark is ENTER:
+                for mark, event in stream:
+                    if mark is EXIT:
+                        break
+                    yield mark, event
+                for subevent in self._inject():
+                    yield subevent
+                yield mark, event
+
+
+class AttrTransformation(object):
+    """Set an attribute on selected elements."""
+
+    def __init__(self, name, value):
+        """Construct transform.
+
+        :param name: name of the attribute that should be set
+        :param value: the value to set
+        """
+        self.name = name
+        self.value = value
+
+    def __call__(self, stream):
+        """Apply the transform filter to the marked stream.
+
+        :param stream: The marked event stream to filter
+        """
+        callable_value = hasattr(self.value, '__call__')
+        for mark, (kind, data, pos) in stream:
+            if mark is ENTER:
+                if callable_value:
+                    value = self.value(self.name, (kind, data, pos))
+                else:
+                    value = self.value
+                if value is None:
+                    attrs = data[1] - [QName(self.name)]
+                else:
+                    attrs = data[1] | [(QName(self.name), value)]
+                data = (data[0], attrs)
+            yield mark, (kind, data, pos)
+
+
+
+class StreamBuffer(Stream):
+    """Stream event buffer used for cut and copy transformations."""
+
+    def __init__(self):
+        """Create the buffer."""
+        Stream.__init__(self, [])
+
+    def append(self, event):
+        """Add an event to the buffer.
+
+        :param event: the markup event to add
+        """
+        self.events.append(event)
+
+    def reset(self):
+        """Empty the buffer of events."""
+        del self.events[:]
+
+
+class CopyTransformation(object):
+    """Copy selected events into a buffer for later insertion."""
+
+    def __init__(self, buffer, accumulate=False):
+        """Create the copy transformation.
+
+        :param buffer: the `StreamBuffer` in which the selection should be
+                       stored
+        """
+        if not accumulate:
+            buffer.reset()
+        self.buffer = buffer
+        self.accumulate = accumulate
+
+    def __call__(self, stream):
+        """Apply the transformation to the marked stream.
+
+        :param stream: the marked event stream to filter
+        """
+        stream = PushBackStream(stream)
+
+        for mark, event in stream:
+            if mark:
+                if not self.accumulate:
+                    self.buffer.reset()
+                events = [(mark, event)]
+                self.buffer.append(event)
+                start = mark
+                for mark, event in stream:
+                    if start is not ENTER and mark != start:
+                        stream.push((mark, event))
+                        break
+                    events.append((mark, event))
+                    self.buffer.append(event)
+                    if start is ENTER and mark is EXIT:
+                        break
+                for i in events:
+                    yield i
+            else:
+                yield mark, event
+
+
+class CutTransformation(object):
+    """Cut selected events into a buffer for later insertion and remove the
+    selection.
+    """
+
+    def __init__(self, buffer, accumulate=False):
+        """Create the cut transformation.
+
+        :param buffer: the `StreamBuffer` in which the selection should be
+                       stored
+        """
+        self.buffer = buffer
+        self.accumulate = accumulate
+
+
+    def __call__(self, stream):
+        """Apply the transform filter to the marked stream.
+
+        :param stream: the marked event stream to filter
+        """
+        attributes = []
+        stream = PushBackStream(stream)
+        broken = False
+        if not self.accumulate:
+            self.buffer.reset()
+        for mark, event in stream:
+            if mark:
+                # Send a BREAK event if there was no other event sent between 
+                if not self.accumulate:
+                    if not broken and self.buffer:
+                        yield BREAK, (BREAK, None, None)
+                    self.buffer.reset()
+                self.buffer.append(event)
+                start = mark
+                if mark is ATTR:
+                    attributes.extend([name for name, _ in event[1][1]])
+                for mark, event in stream:
+                    if start is mark is ATTR:
+                        attributes.extend([name for name, _ in event[1][1]])
+                    # Handle non-element contiguous selection
+                    if start is not ENTER and mark != start:
+                        # Operating on the attributes of a START event
+                        if start is ATTR:
+                            kind, data, pos = event
+                            assert kind is START
+                            data = (data[0], data[1] - attributes)
+                            attributes = None
+                            stream.push((mark, (kind, data, pos)))
+                        else:
+                            stream.push((mark, event))
+                        break
+                    self.buffer.append(event)
+                    if start is ENTER and mark is EXIT:
+                        break
+                broken = False
+            else:
+                broken = True
+                yield mark, event
+        if not broken and self.buffer:
+            yield BREAK, (BREAK, None, None)
diff --git a/genshi/input.py b/genshi/input.py
new file mode 100644
index 0000000..039e5e5
--- /dev/null
+++ b/genshi/input.py
@@ -0,0 +1,443 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006-2009 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""Support for constructing markup streams from files, strings, or other
+sources.
+"""
+
+from itertools import chain
+import htmlentitydefs as entities
+import HTMLParser as html
+from StringIO import StringIO
+from xml.parsers import expat
+
+from genshi.core import Attrs, QName, Stream, stripentities
+from genshi.core import START, END, XML_DECL, DOCTYPE, TEXT, START_NS, \
+                        END_NS, START_CDATA, END_CDATA, PI, COMMENT
+
+__all__ = ['ET', 'ParseError', 'XMLParser', 'XML', 'HTMLParser', 'HTML']
+__docformat__ = 'restructuredtext en'
+
+
+def ET(element):
+    """Convert a given ElementTree element to a markup stream.
+    
+    :param element: an ElementTree element
+    :return: a markup stream
+    """
+    tag_name = QName(element.tag.lstrip('{'))
+    attrs = Attrs([(QName(attr.lstrip('{')), value)
+                   for attr, value in element.items()])
+
+    yield START, (tag_name, attrs), (None, -1, -1)
+    if element.text:
+        yield TEXT, element.text, (None, -1, -1)
+    for child in element.getchildren():
+        for item in ET(child):
+            yield item
+    yield END, tag_name, (None, -1, -1)
+    if element.tail:
+        yield TEXT, element.tail, (None, -1, -1)
+
+
+class ParseError(Exception):
+    """Exception raised when fatal syntax errors are found in the input being
+    parsed.
+    """
+
+    def __init__(self, message, filename=None, lineno=-1, offset=-1):
+        """Exception initializer.
+        
+        :param message: the error message from the parser
+        :param filename: the path to the file that was parsed
+        :param lineno: the number of the line on which the error was encountered
+        :param offset: the column number where the error was encountered
+        """
+        self.msg = message
+        if filename:
+            message += ', in ' + filename
+        Exception.__init__(self, message)
+        self.filename = filename or '<string>'
+        self.lineno = lineno
+        self.offset = offset
+
+
+class XMLParser(object):
+    """Generator-based XML parser based on roughly equivalent code in
+    Kid/ElementTree.
+    
+    The parsing is initiated by iterating over the parser object:
+    
+    >>> parser = XMLParser(StringIO('<root id="2"><child>Foo</child></root>'))
+    >>> for kind, data, pos in parser:
+    ...     print('%s %s' % (kind, data))
+    START (QName('root'), Attrs([(QName('id'), u'2')]))
+    START (QName('child'), Attrs())
+    TEXT Foo
+    END child
+    END root
+    """
+
+    _entitydefs = ['<!ENTITY %s "&#%d;">' % (name, value) for name, value in
+                   entities.name2codepoint.items()]
+    _external_dtd = '\n'.join(_entitydefs)
+
+    def __init__(self, source, filename=None, encoding=None):
+        """Initialize the parser for the given XML input.
+        
+        :param source: the XML text as a file-like object
+        :param filename: the name of the file, if appropriate
+        :param encoding: the encoding of the file; if not specified, the
+                         encoding is assumed to be ASCII, UTF-8, or UTF-16, or
+                         whatever the encoding specified in the XML declaration
+                         (if any)
+        """
+        self.source = source
+        self.filename = filename
+
+        # Setup the Expat parser
+        parser = expat.ParserCreate(encoding, '}')
+        parser.buffer_text = True
+        parser.returns_unicode = True
+        parser.ordered_attributes = True
+
+        parser.StartElementHandler = self._handle_start
+        parser.EndElementHandler = self._handle_end
+        parser.CharacterDataHandler = self._handle_data
+        parser.StartDoctypeDeclHandler = self._handle_doctype
+        parser.StartNamespaceDeclHandler = self._handle_start_ns
+        parser.EndNamespaceDeclHandler = self._handle_end_ns
+        parser.StartCdataSectionHandler = self._handle_start_cdata
+        parser.EndCdataSectionHandler = self._handle_end_cdata
+        parser.ProcessingInstructionHandler = self._handle_pi
+        parser.XmlDeclHandler = self._handle_xml_decl
+        parser.CommentHandler = self._handle_comment
+
+        # Tell Expat that we'll handle non-XML entities ourselves
+        # (in _handle_other)
+        parser.DefaultHandler = self._handle_other
+        parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
+        parser.UseForeignDTD()
+        parser.ExternalEntityRefHandler = self._build_foreign
+
+        self.expat = parser
+        self._queue = []
+
+    def parse(self):
+        """Generator that parses the XML source, yielding markup events.
+        
+        :return: a markup event stream
+        :raises ParseError: if the XML text is not well formed
+        """
+        def _generate():
+            try:
+                bufsize = 4 * 1024 # 4K
+                done = False
+                while 1:
+                    while not done and len(self._queue) == 0:
+                        data = self.source.read(bufsize)
+                        if data == '': # end of data
+                            if hasattr(self, 'expat'):
+                                self.expat.Parse('', True)
+                                del self.expat # get rid of circular references
+                            done = True
+                        else:
+                            if isinstance(data, unicode):
+                                data = data.encode('utf-8')
+                            self.expat.Parse(data, False)
+                    for event in self._queue:
+                        yield event
+                    self._queue = []
+                    if done:
+                        break
+            except expat.ExpatError, e:
+                msg = str(e)
+                raise ParseError(msg, self.filename, e.lineno, e.offset)
+        return Stream(_generate()).filter(_coalesce)
+
+    def __iter__(self):
+        return iter(self.parse())
+
+    def _build_foreign(self, context, base, sysid, pubid):
+        parser = self.expat.ExternalEntityParserCreate(context)
+        parser.ParseFile(StringIO(self._external_dtd))
+        return 1
+
+    def _enqueue(self, kind, data=None, pos=None):
+        if pos is None:
+            pos = self._getpos()
+        if kind is TEXT:
+            # Expat reports the *end* of the text event as current position. We
+            # try to fix that up here as much as possible. Unfortunately, the
+            # offset is only valid for single-line text. For multi-line text,
+            # it is apparently not possible to determine at what offset it
+            # started
+            if '\n' in data:
+                lines = data.splitlines()
+                lineno = pos[1] - len(lines) + 1
+                offset = -1
+            else:
+                lineno = pos[1]
+                offset = pos[2] - len(data)
+            pos = (pos[0], lineno, offset)
+        self._queue.append((kind, data, pos))
+
+    def _getpos_unknown(self):
+        return (self.filename, -1, -1)
+
+    def _getpos(self):
+        return (self.filename, self.expat.CurrentLineNumber,
+                self.expat.CurrentColumnNumber)
+
+    def _handle_start(self, tag, attrib):
+        attrs = Attrs([(QName(name), value) for name, value in
+                       zip(*[iter(attrib)] * 2)])
+        self._enqueue(START, (QName(tag), attrs))
+
+    def _handle_end(self, tag):
+        self._enqueue(END, QName(tag))
+
+    def _handle_data(self, text):
+        self._enqueue(TEXT, text)
+
+    def _handle_xml_decl(self, version, encoding, standalone):
+        self._enqueue(XML_DECL, (version, encoding, standalone))
+
+    def _handle_doctype(self, name, sysid, pubid, has_internal_subset):
+        self._enqueue(DOCTYPE, (name, pubid, sysid))
+
+    def _handle_start_ns(self, prefix, uri):
+        self._enqueue(START_NS, (prefix or '', uri))
+
+    def _handle_end_ns(self, prefix):
+        self._enqueue(END_NS, prefix or '')
+
+    def _handle_start_cdata(self):
+        self._enqueue(START_CDATA)
+
+    def _handle_end_cdata(self):
+        self._enqueue(END_CDATA)
+
+    def _handle_pi(self, target, data):
+        self._enqueue(PI, (target, data))
+
+    def _handle_comment(self, text):
+        self._enqueue(COMMENT, text)
+
+    def _handle_other(self, text):
+        if text.startswith('&'):
+            # deal with undefined entities
+            try:
+                text = unichr(entities.name2codepoint[text[1:-1]])
+                self._enqueue(TEXT, text)
+            except KeyError:
+                filename, lineno, offset = self._getpos()
+                error = expat.error('undefined entity "%s": line %d, column %d'
+                                    % (text, lineno, offset))
+                error.code = expat.errors.XML_ERROR_UNDEFINED_ENTITY
+                error.lineno = lineno
+                error.offset = offset
+                raise error
+
+
+def XML(text):
+    """Parse the given XML source and return a markup stream.
+    
+    Unlike with `XMLParser`, the returned stream is reusable, meaning it can be
+    iterated over multiple times:
+    
+    >>> xml = XML('<doc><elem>Foo</elem><elem>Bar</elem></doc>')
+    >>> print(xml)
+    <doc><elem>Foo</elem><elem>Bar</elem></doc>
+    >>> print(xml.select('elem'))
+    <elem>Foo</elem><elem>Bar</elem>
+    >>> print(xml.select('elem/text()'))
+    FooBar
+    
+    :param text: the XML source
+    :return: the parsed XML event stream
+    :raises ParseError: if the XML text is not well-formed
+    """
+    return Stream(list(XMLParser(StringIO(text))))
+
+
+class HTMLParser(html.HTMLParser, object):
+    """Parser for HTML input based on the Python `HTMLParser` module.
+    
+    This class provides the same interface for generating stream events as
+    `XMLParser`, and attempts to automatically balance tags.
+    
+    The parsing is initiated by iterating over the parser object:
+    
+    >>> parser = HTMLParser(StringIO('<UL compact><LI>Foo</UL>'))
+    >>> for kind, data, pos in parser:
+    ...     print('%s %s' % (kind, data))
+    START (QName('ul'), Attrs([(QName('compact'), u'compact')]))
+    START (QName('li'), Attrs())
+    TEXT Foo
+    END li
+    END ul
+    """
+
+    _EMPTY_ELEMS = frozenset(['area', 'base', 'basefont', 'br', 'col', 'frame',
+                              'hr', 'img', 'input', 'isindex', 'link', 'meta',
+                              'param'])
+
+    def __init__(self, source, filename=None, encoding='utf-8'):
+        """Initialize the parser for the given HTML input.
+        
+        :param source: the HTML text as a file-like object
+        :param filename: the name of the file, if known
+        :param filename: encoding of the file; ignored if the input is unicode
+        """
+        html.HTMLParser.__init__(self)
+        self.source = source
+        self.filename = filename
+        self.encoding = encoding
+        self._queue = []
+        self._open_tags = []
+
+    def parse(self):
+        """Generator that parses the HTML source, yielding markup events.
+        
+        :return: a markup event stream
+        :raises ParseError: if the HTML text is not well formed
+        """
+        def _generate():
+            try:
+                bufsize = 4 * 1024 # 4K
+                done = False
+                while 1:
+                    while not done and len(self._queue) == 0:
+                        data = self.source.read(bufsize)
+                        if data == '': # end of data
+                            self.close()
+                            done = True
+                        else:
+                            self.feed(data)
+                    for kind, data, pos in self._queue:
+                        yield kind, data, pos
+                    self._queue = []
+                    if done:
+                        open_tags = self._open_tags
+                        open_tags.reverse()
+                        for tag in open_tags:
+                            yield END, QName(tag), pos
+                        break
+            except html.HTMLParseError, e:
+                msg = '%s: line %d, column %d' % (e.msg, e.lineno, e.offset)
+                raise ParseError(msg, self.filename, e.lineno, e.offset)
+        return Stream(_generate()).filter(_coalesce)
+
+    def __iter__(self):
+        return iter(self.parse())
+
+    def _enqueue(self, kind, data, pos=None):
+        if pos is None:
+            pos = self._getpos()
+        self._queue.append((kind, data, pos))
+
+    def _getpos(self):
+        lineno, column = self.getpos()
+        return (self.filename, lineno, column)
+
+    def handle_starttag(self, tag, attrib):
+        fixed_attrib = []
+        for name, value in attrib: # Fixup minimized attributes
+            if value is None:
+                value = unicode(name)
+            elif not isinstance(value, unicode):
+                value = value.decode(self.encoding, 'replace')
+            fixed_attrib.append((QName(name), stripentities(value)))
+
+        self._enqueue(START, (QName(tag), Attrs(fixed_attrib)))
+        if tag in self._EMPTY_ELEMS:
+            self._enqueue(END, QName(tag))
+        else:
+            self._open_tags.append(tag)
+
+    def handle_endtag(self, tag):
+        if tag not in self._EMPTY_ELEMS:
+            while self._open_tags:
+                open_tag = self._open_tags.pop()
+                self._enqueue(END, QName(open_tag))
+                if open_tag.lower() == tag.lower():
+                    break
+
+    def handle_data(self, text):
+        if not isinstance(text, unicode):
+            text = text.decode(self.encoding, 'replace')
+        self._enqueue(TEXT, text)
+
+    def handle_charref(self, name):
+        if name.lower().startswith('x'):
+            text = unichr(int(name[1:], 16))
+        else:
+            text = unichr(int(name))
+        self._enqueue(TEXT, text)
+
+    def handle_entityref(self, name):
+        try:
+            text = unichr(entities.name2codepoint[name])
+        except KeyError:
+            text = '&%s;' % name
+        self._enqueue(TEXT, text)
+
+    def handle_pi(self, data):
+        target, data = data.split(None, 1)
+        if data.endswith('?'):
+            data = data[:-1]
+        self._enqueue(PI, (target.strip(), data.strip()))
+
+    def handle_comment(self, text):
+        self._enqueue(COMMENT, text)
+
+
+def HTML(text, encoding='utf-8'):
+    """Parse the given HTML source and return a markup stream.
+    
+    Unlike with `HTMLParser`, the returned stream is reusable, meaning it can be
+    iterated over multiple times:
+    
+    >>> html = HTML('<body><h1>Foo</h1></body>')
+    >>> print(html)
+    <body><h1>Foo</h1></body>
+    >>> print(html.select('h1'))
+    <h1>Foo</h1>
+    >>> print(html.select('h1/text()'))
+    Foo
+    
+    :param text: the HTML source
+    :return: the parsed XML event stream
+    :raises ParseError: if the HTML text is not well-formed, and error recovery
+                        fails
+    """
+    return Stream(list(HTMLParser(StringIO(text), encoding=encoding)))
+
+
+def _coalesce(stream):
+    """Coalesces adjacent TEXT events into a single event."""
+    textbuf = []
+    textpos = None
+    for kind, data, pos in chain(stream, [(None, None, None)]):
+        if kind is TEXT:
+            textbuf.append(data)
+            if textpos is None:
+                textpos = pos
+        else:
+            if textbuf:
+                yield TEXT, ''.join(textbuf), textpos
+                del textbuf[:]
+                textpos = None
+            if kind:
+                yield kind, data, pos
diff --git a/genshi/output.py b/genshi/output.py
new file mode 100644
index 0000000..2ebb38b
--- /dev/null
+++ b/genshi/output.py
@@ -0,0 +1,838 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006-2009 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""This module provides different kinds of serialization methods for XML event
+streams.
+"""
+
+from itertools import chain
+import re
+
+from genshi.core import escape, Attrs, Markup, Namespace, QName, StreamEventKind
+from genshi.core import START, END, TEXT, XML_DECL, DOCTYPE, START_NS, END_NS, \
+                        START_CDATA, END_CDATA, PI, COMMENT, XML_NAMESPACE
+
+__all__ = ['encode', 'get_serializer', 'DocType', 'XMLSerializer',
+           'XHTMLSerializer', 'HTMLSerializer', 'TextSerializer']
+__docformat__ = 'restructuredtext en'
+
+
+def encode(iterator, method='xml', encoding='utf-8', out=None):
+    """Encode serializer output into a string.
+    
+    :param iterator: the iterator returned from serializing a stream (basically
+                     any iterator that yields unicode objects)
+    :param method: the serialization method; determines how characters not
+                   representable in the specified encoding are treated
+    :param encoding: how the output string should be encoded; if set to `None`,
+                     this method returns a `unicode` object
+    :param out: a file-like object that the output should be written to
+                instead of being returned as one big string; note that if
+                this is a file or socket (or similar), the `encoding` must
+                not be `None` (that is, the output must be encoded)
+    :return: a `str` or `unicode` object (depending on the `encoding`
+             parameter), or `None` if the `out` parameter is provided
+    
+    :since: version 0.4.1
+    :note: Changed in 0.5: added the `out` parameter
+    """
+    if encoding is not None:
+        errors = 'replace'
+        if method != 'text' and not isinstance(method, TextSerializer):
+            errors = 'xmlcharrefreplace'
+        _encode = lambda string: string.encode(encoding, errors)
+    else:
+        _encode = lambda string: string
+    if out is None:
+        return _encode(''.join(list(iterator)))
+    for chunk in iterator:
+        out.write(_encode(chunk))
+
+
+def get_serializer(method='xml', **kwargs):
+    """Return a serializer object for the given method.
+    
+    :param method: the serialization method; can be either "xml", "xhtml",
+                   "html", "text", or a custom serializer class
+
+    Any additional keyword arguments are passed to the serializer, and thus
+    depend on the `method` parameter value.
+    
+    :see: `XMLSerializer`, `XHTMLSerializer`, `HTMLSerializer`, `TextSerializer`
+    :since: version 0.4.1
+    """
+    if isinstance(method, basestring):
+        method = {'xml':   XMLSerializer,
+                  'xhtml': XHTMLSerializer,
+                  'html':  HTMLSerializer,
+                  'text':  TextSerializer}[method.lower()]
+    return method(**kwargs)
+
+
+class DocType(object):
+    """Defines a number of commonly used DOCTYPE declarations as constants."""
+
+    HTML_STRICT = (
+        'html', '-//W3C//DTD HTML 4.01//EN',
+        'http://www.w3.org/TR/html4/strict.dtd'
+    )
+    HTML_TRANSITIONAL = (
+        'html', '-//W3C//DTD HTML 4.01 Transitional//EN',
+        'http://www.w3.org/TR/html4/loose.dtd'
+    )
+    HTML_FRAMESET = (
+        'html', '-//W3C//DTD HTML 4.01 Frameset//EN',
+        'http://www.w3.org/TR/html4/frameset.dtd'
+    )
+    HTML = HTML_STRICT
+
+    HTML5 = ('html', None, None)
+
+    XHTML_STRICT = (
+        'html', '-//W3C//DTD XHTML 1.0 Strict//EN',
+        'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'
+    )
+    XHTML_TRANSITIONAL = (
+        'html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
+        'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'
+    )
+    XHTML_FRAMESET = (
+        'html', '-//W3C//DTD XHTML 1.0 Frameset//EN',
+        'http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd'
+    )
+    XHTML = XHTML_STRICT
+
+    XHTML11 = (
+        'html', '-//W3C//DTD XHTML 1.1//EN',
+        'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd'
+    )
+
+    SVG_FULL = (
+        'svg', '-//W3C//DTD SVG 1.1//EN',
+        'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd'
+    )
+    SVG_BASIC = (
+        'svg', '-//W3C//DTD SVG Basic 1.1//EN',
+        'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-basic.dtd'
+    )
+    SVG_TINY = (
+        'svg', '-//W3C//DTD SVG Tiny 1.1//EN',
+        'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-tiny.dtd'
+    )
+    SVG = SVG_FULL
+
+    @classmethod
+    def get(cls, name):
+        """Return the ``(name, pubid, sysid)`` tuple of the ``DOCTYPE``
+        declaration for the specified name.
+        
+        The following names are recognized in this version:
+         * "html" or "html-strict" for the HTML 4.01 strict DTD
+         * "html-transitional" for the HTML 4.01 transitional DTD
+         * "html-frameset" for the HTML 4.01 frameset DTD
+         * "html5" for the ``DOCTYPE`` proposed for HTML5
+         * "xhtml" or "xhtml-strict" for the XHTML 1.0 strict DTD
+         * "xhtml-transitional" for the XHTML 1.0 transitional DTD
+         * "xhtml-frameset" for the XHTML 1.0 frameset DTD
+         * "xhtml11" for the XHTML 1.1 DTD
+         * "svg" or "svg-full" for the SVG 1.1 DTD
+         * "svg-basic" for the SVG Basic 1.1 DTD
+         * "svg-tiny" for the SVG Tiny 1.1 DTD
+        
+        :param name: the name of the ``DOCTYPE``
+        :return: the ``(name, pubid, sysid)`` tuple for the requested
+                 ``DOCTYPE``, or ``None`` if the name is not recognized
+        :since: version 0.4.1
+        """
+        return {
+            'html': cls.HTML, 'html-strict': cls.HTML_STRICT,
+            'html-transitional': DocType.HTML_TRANSITIONAL,
+            'html-frameset': DocType.HTML_FRAMESET,
+            'html5': cls.HTML5,
+            'xhtml': cls.XHTML, 'xhtml-strict': cls.XHTML_STRICT,
+            'xhtml-transitional': cls.XHTML_TRANSITIONAL,
+            'xhtml-frameset': cls.XHTML_FRAMESET,
+            'xhtml11': cls.XHTML11,
+            'svg': cls.SVG, 'svg-full': cls.SVG_FULL,
+            'svg-basic': cls.SVG_BASIC,
+            'svg-tiny': cls.SVG_TINY
+        }.get(name.lower())
+
+
+class XMLSerializer(object):
+    """Produces XML text from an event stream.
+    
+    >>> from genshi.builder import tag
+    >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True))
+    >>> print(''.join(XMLSerializer()(elem.generate())))
+    <div><a href="foo"/><br/><hr noshade="True"/></div>
+    """
+
+    _PRESERVE_SPACE = frozenset()
+
+    def __init__(self, doctype=None, strip_whitespace=True,
+                 namespace_prefixes=None, cache=True):
+        """Initialize the XML serializer.
+        
+        :param doctype: a ``(name, pubid, sysid)`` tuple that represents the
+                        DOCTYPE declaration that should be included at the top
+                        of the generated output, or the name of a DOCTYPE as
+                        defined in `DocType.get`
+        :param strip_whitespace: whether extraneous whitespace should be
+                                 stripped from the output
+        :param cache: whether to cache the text output per event, which
+                      improves performance for repetitive markup
+        :note: Changed in 0.4.2: The  `doctype` parameter can now be a string.
+        :note: Changed in 0.6: The `cache` parameter was added
+        """
+        self.filters = [EmptyTagFilter()]
+        if strip_whitespace:
+            self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE))
+        self.filters.append(NamespaceFlattener(prefixes=namespace_prefixes,
+                                               cache=cache))
+        if doctype:
+            self.filters.append(DocTypeInserter(doctype))
+        self.cache = cache
+
+    def __call__(self, stream):
+        have_decl = have_doctype = False
+        in_cdata = False
+
+        cache = {}
+        cache_get = cache.get
+        if self.cache:
+            def _emit(kind, input, output):
+                cache[kind, input] = output
+                return output
+        else:
+            def _emit(kind, input, output):
+                return output
+
+        for filter_ in self.filters:
+            stream = filter_(stream)
+        for kind, data, pos in stream:
+            cached = cache_get((kind, data))
+            if cached is not None:
+                yield cached
+
+            elif kind is START or kind is EMPTY:
+                tag, attrib = data
+                buf = ['<', tag]
+                for attr, value in attrib:
+                    buf += [' ', attr, '="', escape(value), '"']
+                buf.append(kind is EMPTY and '/>' or '>')
+                yield _emit(kind, data, Markup(''.join(buf)))
+
+            elif kind is END:
+                yield _emit(kind, data, Markup('</%s>' % data))
+
+            elif kind is TEXT:
+                if in_cdata:
+                    yield _emit(kind, data, data)
+                else:
+                    yield _emit(kind, data, escape(data, quotes=False))
+
+            elif kind is COMMENT:
+                yield _emit(kind, data, Markup('<!--%s-->' % data))
+
+            elif kind is XML_DECL and not have_decl:
+                version, encoding, standalone = data
+                buf = ['<?xml version="%s"' % version]
+                if encoding:
+                    buf.append(' encoding="%s"' % encoding)
+                if standalone != -1:
+                    standalone = standalone and 'yes' or 'no'
+                    buf.append(' standalone="%s"' % standalone)
+                buf.append('?>\n')
+                yield Markup(''.join(buf))
+                have_decl = True
+
+            elif kind is DOCTYPE and not have_doctype:
+                name, pubid, sysid = data
+                buf = ['<!DOCTYPE %s']
+                if pubid:
+                    buf.append(' PUBLIC "%s"')
+                elif sysid:
+                    buf.append(' SYSTEM')
+                if sysid:
+                    buf.append(' "%s"')
+                buf.append('>\n')
+                yield Markup(''.join(buf)) % tuple([p for p in data if p])
+                have_doctype = True
+
+            elif kind is START_CDATA:
+                yield Markup('<![CDATA[')
+                in_cdata = True
+
+            elif kind is END_CDATA:
+                yield Markup(']]>')
+                in_cdata = False
+
+            elif kind is PI:
+                yield _emit(kind, data, Markup('<?%s %s?>' % data))
+
+
+class XHTMLSerializer(XMLSerializer):
+    """Produces XHTML text from an event stream.
+    
+    >>> from genshi.builder import tag
+    >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True))
+    >>> print(''.join(XHTMLSerializer()(elem.generate())))
+    <div><a href="foo"></a><br /><hr noshade="noshade" /></div>
+    """
+
+    _EMPTY_ELEMS = frozenset(['area', 'base', 'basefont', 'br', 'col', 'frame',
+                              'hr', 'img', 'input', 'isindex', 'link', 'meta',
+                              'param'])
+    _BOOLEAN_ATTRS = frozenset(['selected', 'checked', 'compact', 'declare',
+                                'defer', 'disabled', 'ismap', 'multiple',
+                                'nohref', 'noresize', 'noshade', 'nowrap'])
+    _PRESERVE_SPACE = frozenset([
+        QName('pre'), QName('http://www.w3.org/1999/xhtml}pre'),
+        QName('textarea'), QName('http://www.w3.org/1999/xhtml}textarea')
+    ])
+
+    def __init__(self, doctype=None, strip_whitespace=True,
+                 namespace_prefixes=None, drop_xml_decl=True, cache=True):
+        super(XHTMLSerializer, self).__init__(doctype, False)
+        self.filters = [EmptyTagFilter()]
+        if strip_whitespace:
+            self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE))
+        namespace_prefixes = namespace_prefixes or {}
+        namespace_prefixes['http://www.w3.org/1999/xhtml'] = ''
+        self.filters.append(NamespaceFlattener(prefixes=namespace_prefixes,
+                                               cache=cache))
+        if doctype:
+            self.filters.append(DocTypeInserter(doctype))
+        self.drop_xml_decl = drop_xml_decl
+        self.cache = cache
+
+    def __call__(self, stream):
+        boolean_attrs = self._BOOLEAN_ATTRS
+        empty_elems = self._EMPTY_ELEMS
+        drop_xml_decl = self.drop_xml_decl
+        have_decl = have_doctype = False
+        in_cdata = False
+
+        cache = {}
+        cache_get = cache.get
+        if self.cache:
+            def _emit(kind, input, output):
+                cache[kind, input] = output
+                return output
+        else:
+            def _emit(kind, input, output):
+                return output
+
+        for filter_ in self.filters:
+            stream = filter_(stream)
+        for kind, data, pos in stream:
+            cached = cache_get((kind, data))
+            if cached is not None:
+                yield cached
+
+            elif kind is START or kind is EMPTY:
+                tag, attrib = data
+                buf = ['<', tag]
+                for attr, value in attrib:
+                    if attr in boolean_attrs:
+                        value = attr
+                    elif attr == 'xml:lang' and 'lang' not in attrib:
+                        buf += [' lang="', escape(value), '"']
+                    elif attr == 'xml:space':
+                        continue
+                    buf += [' ', attr, '="', escape(value), '"']
+                if kind is EMPTY:
+                    if tag in empty_elems:
+                        buf.append(' />')
+                    else:
+                        buf.append('></%s>' % tag)
+                else:
+                    buf.append('>')
+                yield _emit(kind, data, Markup(''.join(buf)))
+
+            elif kind is END:
+                yield _emit(kind, data, Markup('</%s>' % data))
+
+            elif kind is TEXT:
+                if in_cdata:
+                    yield _emit(kind, data, data)
+                else:
+                    yield _emit(kind, data, escape(data, quotes=False))
+
+            elif kind is COMMENT:
+                yield _emit(kind, data, Markup('<!--%s-->' % data))
+
+            elif kind is DOCTYPE and not have_doctype:
+                name, pubid, sysid = data
+                buf = ['<!DOCTYPE %s']
+                if pubid:
+                    buf.append(' PUBLIC "%s"')
+                elif sysid:
+                    buf.append(' SYSTEM')
+                if sysid:
+                    buf.append(' "%s"')
+                buf.append('>\n')
+                yield Markup(''.join(buf)) % tuple([p for p in data if p])
+                have_doctype = True
+
+            elif kind is XML_DECL and not have_decl and not drop_xml_decl:
+                version, encoding, standalone = data
+                buf = ['<?xml version="%s"' % version]
+                if encoding:
+                    buf.append(' encoding="%s"' % encoding)
+                if standalone != -1:
+                    standalone = standalone and 'yes' or 'no'
+                    buf.append(' standalone="%s"' % standalone)
+                buf.append('?>\n')
+                yield Markup(''.join(buf))
+                have_decl = True
+
+            elif kind is START_CDATA:
+                yield Markup('<![CDATA[')
+                in_cdata = True
+
+            elif kind is END_CDATA:
+                yield Markup(']]>')
+                in_cdata = False
+
+            elif kind is PI:
+                yield _emit(kind, data, Markup('<?%s %s?>' % data))
+
+
+class HTMLSerializer(XHTMLSerializer):
+    """Produces HTML text from an event stream.
+    
+    >>> from genshi.builder import tag
+    >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True))
+    >>> print(''.join(HTMLSerializer()(elem.generate())))
+    <div><a href="foo"></a><br><hr noshade></div>
+    """
+
+    _NOESCAPE_ELEMS = frozenset([
+        QName('script'), QName('http://www.w3.org/1999/xhtml}script'),
+        QName('style'), QName('http://www.w3.org/1999/xhtml}style')
+    ])
+
+    def __init__(self, doctype=None, strip_whitespace=True, cache=True):
+        """Initialize the HTML serializer.
+        
+        :param doctype: a ``(name, pubid, sysid)`` tuple that represents the
+                        DOCTYPE declaration that should be included at the top
+                        of the generated output
+        :param strip_whitespace: whether extraneous whitespace should be
+                                 stripped from the output
+        :param cache: whether to cache the text output per event, which
+                      improves performance for repetitive markup
+        :note: Changed in 0.6: The `cache` parameter was added
+        """
+        super(HTMLSerializer, self).__init__(doctype, False)
+        self.filters = [EmptyTagFilter()]
+        if strip_whitespace:
+            self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE,
+                                                 self._NOESCAPE_ELEMS))
+        self.filters.append(NamespaceFlattener(prefixes={
+            'http://www.w3.org/1999/xhtml': ''
+        }, cache=cache))
+        if doctype:
+            self.filters.append(DocTypeInserter(doctype))
+        self.cache = True
+
+    def __call__(self, stream):
+        boolean_attrs = self._BOOLEAN_ATTRS
+        empty_elems = self._EMPTY_ELEMS
+        noescape_elems = self._NOESCAPE_ELEMS
+        have_doctype = False
+        noescape = False
+
+        cache = {}
+        cache_get = cache.get
+        if self.cache:
+            def _emit(kind, input, output):
+                cache[kind, input] = output
+                return output
+        else:
+            def _emit(kind, input, output):
+                return output
+
+        for filter_ in self.filters:
+            stream = filter_(stream)
+        for kind, data, _ in stream:
+            output = cache_get((kind, data))
+            if output is not None:
+                yield output
+                if (kind is START or kind is EMPTY) \
+                        and data[0] in noescape_elems:
+                    noescape = True
+                elif kind is END:
+                    noescape = False
+
+            elif kind is START or kind is EMPTY:
+                tag, attrib = data
+                buf = ['<', tag]
+                for attr, value in attrib:
+                    if attr in boolean_attrs:
+                        if value:
+                            buf += [' ', attr]
+                    elif ':' in attr:
+                        if attr == 'xml:lang' and 'lang' not in attrib:
+                            buf += [' lang="', escape(value), '"']
+                    elif attr != 'xmlns':
+                        buf += [' ', attr, '="', escape(value), '"']
+                buf.append('>')
+                if kind is EMPTY:
+                    if tag not in empty_elems:
+                        buf.append('</%s>' % tag)
+                yield _emit(kind, data, Markup(''.join(buf)))
+                if tag in noescape_elems:
+                    noescape = True
+
+            elif kind is END:
+                yield _emit(kind, data, Markup('</%s>' % data))
+                noescape = False
+
+            elif kind is TEXT:
+                if noescape:
+                    yield _emit(kind, data, data)
+                else:
+                    yield _emit(kind, data, escape(data, quotes=False))
+
+            elif kind is COMMENT:
+                yield _emit(kind, data, Markup('<!--%s-->' % data))
+
+            elif kind is DOCTYPE and not have_doctype:
+                name, pubid, sysid = data
+                buf = ['<!DOCTYPE %s']
+                if pubid:
+                    buf.append(' PUBLIC "%s"')
+                elif sysid:
+                    buf.append(' SYSTEM')
+                if sysid:
+                    buf.append(' "%s"')
+                buf.append('>\n')
+                yield Markup(''.join(buf)) % tuple([p for p in data if p])
+                have_doctype = True
+
+            elif kind is PI:
+                yield _emit(kind, data, Markup('<?%s %s?>' % data))
+
+
+class TextSerializer(object):
+    """Produces plain text from an event stream.
+    
+    Only text events are included in the output. Unlike the other serializer,
+    special XML characters are not escaped:
+    
+    >>> from genshi.builder import tag
+    >>> elem = tag.div(tag.a('<Hello!>', href='foo'), tag.br)
+    >>> print(elem)
+    <div><a href="foo">&lt;Hello!&gt;</a><br/></div>
+    >>> print(''.join(TextSerializer()(elem.generate())))
+    <Hello!>
+
+    If text events contain literal markup (instances of the `Markup` class),
+    that markup is by default passed through unchanged:
+    
+    >>> elem = tag.div(Markup('<a href="foo">Hello &amp; Bye!</a><br/>'))
+    >>> print(elem.generate().render(TextSerializer, encoding=None))
+    <a href="foo">Hello &amp; Bye!</a><br/>
+    
+    You can use the ``strip_markup`` to change this behavior, so that tags and
+    entities are stripped from the output (or in the case of entities,
+    replaced with the equivalent character):
+
+    >>> print(elem.generate().render(TextSerializer, strip_markup=True,
+    ...                              encoding=None))
+    Hello & Bye!
+    """
+
+    def __init__(self, strip_markup=False):
+        """Create the serializer.
+        
+        :param strip_markup: whether markup (tags and encoded characters) found
+                             in the text should be removed
+        """
+        self.strip_markup = strip_markup
+
+    def __call__(self, stream):
+        strip_markup = self.strip_markup
+        for event in stream:
+            if event[0] is TEXT:
+                data = event[1]
+                if strip_markup and type(data) is Markup:
+                    data = data.striptags().stripentities()
+                yield unicode(data)
+
+
+class EmptyTagFilter(object):
+    """Combines `START` and `STOP` events into `EMPTY` events for elements that
+    have no contents.
+    """
+
+    EMPTY = StreamEventKind('EMPTY')
+
+    def __call__(self, stream):
+        prev = (None, None, None)
+        for ev in stream:
+            if prev[0] is START:
+                if ev[0] is END:
+                    prev = EMPTY, prev[1], prev[2]
+                    yield prev
+                    continue
+                else:
+                    yield prev
+            if ev[0] is not START:
+                yield ev
+            prev = ev
+
+
+EMPTY = EmptyTagFilter.EMPTY
+
+
+class NamespaceFlattener(object):
+    r"""Output stream filter that removes namespace information from the stream,
+    instead adding namespace attributes and prefixes as needed.
+    
+    :param prefixes: optional mapping of namespace URIs to prefixes
+    
+    >>> from genshi.input import XML
+    >>> xml = XML('''<doc xmlns="NS1" xmlns:two="NS2">
+    ...   <two:item/>
+    ... </doc>''')
+    >>> for kind, data, pos in NamespaceFlattener()(xml):
+    ...     print('%s %r' % (kind, data))
+    START (u'doc', Attrs([('xmlns', u'NS1'), (u'xmlns:two', u'NS2')]))
+    TEXT u'\n  '
+    START (u'two:item', Attrs())
+    END u'two:item'
+    TEXT u'\n'
+    END u'doc'
+    """
+
+    def __init__(self, prefixes=None, cache=True):
+        self.prefixes = {XML_NAMESPACE.uri: 'xml'}
+        if prefixes is not None:
+            self.prefixes.update(prefixes)
+        self.cache = cache
+
+    def __call__(self, stream):
+        cache = {}
+        cache_get = cache.get
+        if self.cache:
+            def _emit(kind, input, output, pos):
+                cache[kind, input] = output
+                return kind, output, pos
+        else:
+            def _emit(kind, input, output, pos):
+                return output
+
+        prefixes = dict([(v, [k]) for k, v in self.prefixes.items()])
+        namespaces = {XML_NAMESPACE.uri: ['xml']}
+        def _push_ns(prefix, uri):
+            namespaces.setdefault(uri, []).append(prefix)
+            prefixes.setdefault(prefix, []).append(uri)
+            cache.clear()
+        def _pop_ns(prefix):
+            uris = prefixes.get(prefix)
+            uri = uris.pop()
+            if not uris:
+                del prefixes[prefix]
+            if uri not in uris or uri != uris[-1]:
+                uri_prefixes = namespaces[uri]
+                uri_prefixes.pop()
+                if not uri_prefixes:
+                    del namespaces[uri]
+            cache.clear()
+            return uri
+
+        ns_attrs = []
+        _push_ns_attr = ns_attrs.append
+        def _make_ns_attr(prefix, uri):
+            return 'xmlns%s' % (prefix and ':%s' % prefix or ''), uri
+
+        def _gen_prefix():
+            val = 0
+            while 1:
+                val += 1
+                yield 'ns%d' % val
+        _gen_prefix = _gen_prefix().next
+
+        for kind, data, pos in stream:
+            output = cache_get((kind, data))
+            if output is not None:
+                yield kind, output, pos
+
+            elif kind is START or kind is EMPTY:
+                tag, attrs = data
+
+                tagname = tag.localname
+                tagns = tag.namespace
+                if tagns:
+                    if tagns in namespaces:
+                        prefix = namespaces[tagns][-1]
+                        if prefix:
+                            tagname = '%s:%s' % (prefix, tagname)
+                    else:
+                        _push_ns_attr(('xmlns', tagns))
+                        _push_ns('', tagns)
+
+                new_attrs = []
+                for attr, value in attrs:
+                    attrname = attr.localname
+                    attrns = attr.namespace
+                    if attrns:
+                        if attrns not in namespaces:
+                            prefix = _gen_prefix()
+                            _push_ns(prefix, attrns)
+                            _push_ns_attr(('xmlns:%s' % prefix, attrns))
+                        else:
+                            prefix = namespaces[attrns][-1]
+                        if prefix:
+                            attrname = '%s:%s' % (prefix, attrname)
+                    new_attrs.append((attrname, value))
+
+                yield _emit(kind, data, (tagname, Attrs(ns_attrs + new_attrs)), pos)
+                del ns_attrs[:]
+
+            elif kind is END:
+                tagname = data.localname
+                tagns = data.namespace
+                if tagns:
+                    prefix = namespaces[tagns][-1]
+                    if prefix:
+                        tagname = '%s:%s' % (prefix, tagname)
+                yield _emit(kind, data, tagname, pos)
+
+            elif kind is START_NS:
+                prefix, uri = data
+                if uri not in namespaces:
+                    prefix = prefixes.get(uri, [prefix])[-1]
+                    _push_ns_attr(_make_ns_attr(prefix, uri))
+                _push_ns(prefix, uri)
+
+            elif kind is END_NS:
+                if data in prefixes:
+                    uri = _pop_ns(data)
+                    if ns_attrs:
+                        attr = _make_ns_attr(data, uri)
+                        if attr in ns_attrs:
+                            ns_attrs.remove(attr)
+
+            else:
+                yield kind, data, pos
+
+
+class WhitespaceFilter(object):
+    """A filter that removes extraneous ignorable white space from the
+    stream.
+    """
+
+    def __init__(self, preserve=None, noescape=None):
+        """Initialize the filter.
+        
+        :param preserve: a set or sequence of tag names for which white-space
+                         should be preserved
+        :param noescape: a set or sequence of tag names for which text content
+                         should not be escaped
+        
+        The `noescape` set is expected to refer to elements that cannot contain
+        further child elements (such as ``<style>`` or ``<script>`` in HTML
+        documents).
+        """
+        if preserve is None:
+            preserve = []
+        self.preserve = frozenset(preserve)
+        if noescape is None:
+            noescape = []
+        self.noescape = frozenset(noescape)
+
+    def __call__(self, stream, ctxt=None, space=XML_NAMESPACE['space'],
+                 trim_trailing_space=re.compile('[ \t]+(?=\n)').sub,
+                 collapse_lines=re.compile('\n{2,}').sub):
+        mjoin = Markup('').join
+        preserve_elems = self.preserve
+        preserve = 0
+        noescape_elems = self.noescape
+        noescape = False
+
+        textbuf = []
+        push_text = textbuf.append
+        pop_text = textbuf.pop
+        for kind, data, pos in chain(stream, [(None, None, None)]):
+
+            if kind is TEXT:
+                if noescape:
+                    data = Markup(data)
+                push_text(data)
+            else:
+                if textbuf:
+                    if len(textbuf) > 1:
+                        text = mjoin(textbuf, escape_quotes=False)
+                        del textbuf[:]
+                    else:
+                        text = escape(pop_text(), quotes=False)
+                    if not preserve:
+                        text = collapse_lines('\n', trim_trailing_space('', text))
+                    yield TEXT, Markup(text), pos
+
+                if kind is START:
+                    tag, attrs = data
+                    if preserve or (tag in preserve_elems or
+                                    attrs.get(space) == 'preserve'):
+                        preserve += 1
+                    if not noescape and tag in noescape_elems:
+                        noescape = True
+
+                elif kind is END:
+                    noescape = False
+                    if preserve:
+                        preserve -= 1
+
+                elif kind is START_CDATA:
+                    noescape = True
+
+                elif kind is END_CDATA:
+                    noescape = False
+
+                if kind:
+                    yield kind, data, pos
+
+
+class DocTypeInserter(object):
+    """A filter that inserts the DOCTYPE declaration in the correct location,
+    after the XML declaration.
+    """
+    def __init__(self, doctype):
+        """Initialize the filter.
+
+        :param doctype: DOCTYPE as a string or DocType object.
+        """
+        if isinstance(doctype, basestring):
+            doctype = DocType.get(doctype)
+        self.doctype_event = (DOCTYPE, doctype, (None, -1, -1))
+
+    def __call__(self, stream):
+        doctype_inserted = False
+        for kind, data, pos in stream:
+            if not doctype_inserted:
+                doctype_inserted = True
+                if kind is XML_DECL:
+                    yield (kind, data, pos)
+                    yield self.doctype_event
+                    continue
+                yield self.doctype_event
+
+            yield (kind, data, pos)
+
+        if not doctype_inserted:
+            yield self.doctype_event
diff --git a/genshi/path.py b/genshi/path.py
new file mode 100644
index 0000000..122fbf0
--- /dev/null
+++ b/genshi/path.py
@@ -0,0 +1,1528 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006-2009 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""Basic support for evaluating XPath expressions against streams.
+
+>>> from genshi.input import XML
+>>> doc = XML('''<doc>
+...  <items count="4">
+...       <item status="new">
+...         <summary>Foo</summary>
+...       </item>
+...       <item status="closed">
+...         <summary>Bar</summary>
+...       </item>
+...       <item status="closed" resolution="invalid">
+...         <summary>Baz</summary>
+...       </item>
+...       <item status="closed" resolution="fixed">
+...         <summary>Waz</summary>
+...       </item>
+...   </items>
+... </doc>''')
+>>> print(doc.select('items/item[@status="closed" and '
+...     '(@resolution="invalid" or not(@resolution))]/summary/text()'))
+BarBaz
+
+Because the XPath engine operates on markup streams (as opposed to tree
+structures), it only implements a subset of the full XPath 1.0 language.
+"""
+
+from collections import deque
+try:
+    reduce # builtin in Python < 3
+except NameError:
+    from functools import reduce
+from math import ceil, floor
+import operator
+import re
+from itertools import chain
+
+from genshi.core import Stream, Attrs, Namespace, QName
+from genshi.core import START, END, TEXT, START_NS, END_NS, COMMENT, PI, \
+                        START_CDATA, END_CDATA
+
+__all__ = ['Path', 'PathSyntaxError']
+__docformat__ = 'restructuredtext en'
+
+
+class Axis(object):
+    """Defines constants for the various supported XPath axes."""
+
+    ATTRIBUTE = 'attribute'
+    CHILD = 'child'
+    DESCENDANT = 'descendant'
+    DESCENDANT_OR_SELF = 'descendant-or-self'
+    SELF = 'self'
+
+    @classmethod
+    def forname(cls, name):
+        """Return the axis constant for the given name, or `None` if no such
+        axis was defined.
+        """
+        return getattr(cls, name.upper().replace('-', '_'), None)
+
+
+ATTRIBUTE = Axis.ATTRIBUTE
+CHILD = Axis.CHILD
+DESCENDANT = Axis.DESCENDANT
+DESCENDANT_OR_SELF = Axis.DESCENDANT_OR_SELF
+SELF = Axis.SELF
+
+
+class GenericStrategy(object):
+
+    @classmethod
+    def supports(cls, path):
+        return True
+
+    def __init__(self, path):
+        self.path = path
+
+    def test(self, ignore_context):
+        p = self.path
+        if ignore_context:
+            if p[0][0] is ATTRIBUTE:
+                steps = [_DOTSLASHSLASH] + p
+            else:
+                steps = [(DESCENDANT_OR_SELF, p[0][1], p[0][2])] + p[1:]
+        elif p[0][0] is CHILD or p[0][0] is ATTRIBUTE \
+                or p[0][0] is DESCENDANT:
+            steps = [_DOTSLASH] + p
+        else:
+            steps = p
+
+        # for node it contains all positions of xpath expression
+        # where its child should start checking for matches
+        # with list of corresponding context counters
+        # there can be many of them, because position that is from
+        # descendant-like axis can be achieved from different nodes
+        # for example <a><a><b/></a></a> should match both //a//b[1]
+        # and //a//b[2]
+        # positions always form increasing sequence (invariant)
+        stack = [[(0, [[]])]]
+
+        def _test(event, namespaces, variables, updateonly=False):
+            kind, data, pos = event[:3]
+            retval = None
+
+            # Manage the stack that tells us "where we are" in the stream
+            if kind is END:
+                if stack:
+                    stack.pop()
+                return None
+            if kind is START_NS or kind is END_NS \
+                    or kind is START_CDATA or kind is END_CDATA:
+                # should we make namespaces work?
+                return None
+
+            pos_queue = deque([(pos, cou, []) for pos, cou in stack[-1]])
+            next_pos = []
+
+            # length of real part of path - we omit attribute axis
+            real_len = len(steps) - ((steps[-1][0] == ATTRIBUTE) or 1 and 0)
+            last_checked = -1
+
+            # places where we have to check for match, are these
+            # provided by parent
+            while pos_queue:
+                x, pcou, mcou = pos_queue.popleft()
+                axis, nodetest, predicates = steps[x]
+
+                # we need to push descendant-like positions from parent
+                # further
+                if (axis is DESCENDANT or axis is DESCENDANT_OR_SELF) and pcou:
+                    if next_pos and next_pos[-1][0] == x:
+                        next_pos[-1][1].extend(pcou)
+                    else:
+                        next_pos.append((x, pcou))
+
+                # nodetest first
+                if not nodetest(kind, data, pos, namespaces, variables):
+                    continue
+
+                # counters packs that were already bad
+                missed = set()
+                counters_len = len(pcou) + len(mcou)
+
+                # number of counters - we have to create one
+                # for every context position based predicate
+                cnum = 0
+
+                # tells if we have match with position x
+                matched = True
+
+                if predicates:
+                    for predicate in predicates:
+                        pretval = predicate(kind, data, pos,
+                                            namespaces,
+                                            variables)
+                        if type(pretval) is float: # FIXME <- need to check
+                                                   # this for other types that
+                                                   # can be coerced to float
+
+                            # each counter pack needs to be checked
+                            for i, cou in enumerate(chain(pcou, mcou)):
+                                # it was bad before
+                                if i in missed:
+                                    continue
+
+                                if len(cou) < cnum + 1:
+                                    cou.append(0)
+                                cou[cnum] += 1 
+
+                                # it is bad now
+                                if cou[cnum] != int(pretval):
+                                    missed.add(i)
+
+                            # none of counters pack was good
+                            if len(missed) == counters_len:
+                                pretval = False
+                            cnum += 1
+
+                        if not pretval:
+                             matched = False
+                             break
+
+                if not matched:
+                    continue
+
+                # counter for next position with current node as context node
+                child_counter = []
+
+                if x + 1 == real_len:
+                    # we reached end of expression, because x + 1
+                    # is equal to the length of expression
+                    matched = True
+                    axis, nodetest, predicates = steps[-1]
+                    if axis is ATTRIBUTE:
+                        matched = nodetest(kind, data, pos, namespaces,
+                                           variables)
+                    if matched:
+                        retval = matched
+                else:
+                    next_axis = steps[x + 1][0]
+
+                    # if next axis allows matching self we have
+                    # to add next position to our queue
+                    if next_axis is DESCENDANT_OR_SELF or next_axis is SELF:
+                        if not pos_queue or pos_queue[0][0] > x + 1:
+                            pos_queue.appendleft((x + 1, [], [child_counter]))
+                        else:
+                            pos_queue[0][2].append(child_counter)
+
+                    # if axis is not self we have to add it to child's list
+                    if next_axis is not SELF:
+                        next_pos.append((x + 1, [child_counter]))
+
+            if kind is START:
+                stack.append(next_pos)
+
+            return retval
+
+        return _test
+
+
+class SimplePathStrategy(object):
+    """Strategy for path with only local names, attributes and text nodes."""
+
+    @classmethod
+    def supports(cls, path):
+        if path[0][0] is ATTRIBUTE:
+            return False
+        allowed_tests = (LocalNameTest, CommentNodeTest, TextNodeTest)
+        for _, nodetest, predicates in path:
+            if predicates:
+                return False
+            if not isinstance(nodetest, allowed_tests):
+                return False
+        return True
+
+    def __init__(self, path):
+        # fragments is list of tuples (fragment, pi, attr, self_beginning)
+        # fragment is list of nodetests for fragment of path with only
+        # child:: axes between
+        # pi is KMP partial match table for this fragment
+        # attr is attribute nodetest if fragment ends with @ and None otherwise
+        # self_beginning is True if axis for first fragment element
+        # was self (first fragment) or descendant-or-self (farther fragment)
+        self.fragments = []
+
+        self_beginning = False
+        fragment = []
+
+        def nodes_equal(node1, node2):
+            """Tests if two node tests are equal"""
+            if type(node1) is not type(node2):
+                return False
+            if type(node1) == LocalNameTest:
+                return node1.name == node2.name
+            return True
+
+        def calculate_pi(f):
+            """KMP prefix calculation for table"""
+            # the indexes in prefix table are shifted by one
+            # in comparision with common implementations
+            # pi[i] = NORMAL_PI[i + 1]
+            if len(f) == 0:
+                return []
+            pi = [0]
+            s = 0
+            for i in range(1, len(f)):
+                while s > 0 and not nodes_equal(f[s], f[i]):
+                    s = pi[s-1]
+                if nodes_equal(f[s], f[i]):
+                    s += 1
+                pi.append(s)
+            return pi
+
+        for axis in path:
+            if axis[0] is SELF:
+                if len(fragment) != 0:
+                    # if element is not first in fragment it has to be
+                    # the same as previous one
+                    # for example child::a/self::b is always wrong
+                    if axis[1] != fragment[-1][1]:
+                        self.fragments = None
+                        return
+                else:
+                    self_beginning = True
+                    fragment.append(axis[1])
+            elif axis[0] is CHILD:
+                fragment.append(axis[1])
+            elif axis[0] is ATTRIBUTE:
+                pi = calculate_pi(fragment)
+                self.fragments.append((fragment, pi, axis[1], self_beginning))
+                # attribute has always to be at the end, so we can jump out
+                return
+            else:
+                pi = calculate_pi(fragment)
+                self.fragments.append((fragment, pi, None, self_beginning))
+                fragment = [axis[1]]
+                if axis[0] is DESCENDANT:
+                    self_beginning = False
+                else: # DESCENDANT_OR_SELF
+                    self_beginning = True
+        pi = calculate_pi(fragment)
+        self.fragments.append((fragment, pi, None, self_beginning))
+
+    def test(self, ignore_context):
+        # stack of triples (fid, p, ic)
+        # fid is index of current fragment
+        # p is position in this fragment
+        # ic is if we ignore context in this fragment
+        stack = []
+        stack_push = stack.append
+        stack_pop = stack.pop
+        frags = self.fragments
+        frags_len = len(frags)
+
+        def _test(event, namespaces, variables, updateonly=False):
+            # expression found impossible during init
+            if frags is None:
+                return None
+
+            kind, data, pos = event[:3]
+
+            # skip events we don't care about
+            if kind is END:
+                if stack:
+                    stack_pop()
+                return None
+            if kind is START_NS or kind is END_NS \
+                    or kind is START_CDATA or kind is END_CDATA:
+                return None
+
+            if not stack:
+                # root node, nothing on stack, special case
+                fid = 0
+                # skip empty fragments (there can be actually only one)
+                while not frags[fid][0]:
+                    fid += 1
+                p = 0
+                # empty fragment means descendant node at beginning
+                ic = ignore_context or (fid > 0)
+
+                # expression can match first node, if first axis is self::,
+                # descendant-or-self:: or if ignore_context is True and
+                # axis is not descendant::
+                if not frags[fid][3] and (not ignore_context or fid > 0):
+                    # axis is not self-beggining, we have to skip this node
+                    stack_push((fid, p, ic))
+                    return None
+            else:
+                # take position of parent
+                fid, p, ic = stack[-1]
+
+            if fid is not None and not ic:
+                # fragment not ignoring context - we can't jump back
+                frag, pi, attrib, _ = frags[fid]
+                frag_len = len(frag)
+
+                if p == frag_len:
+                    # that probably means empty first fragment
+                    pass
+                elif frag[p](kind, data, pos, namespaces, variables):
+                    # match, so we can go further
+                    p += 1
+                else:
+                    # not matched, so there will be no match in subtree
+                    fid, p = None, None
+
+                if p == frag_len and fid + 1 != frags_len:
+                    # we made it to end of fragment, we can go to following
+                    fid += 1
+                    p = 0
+                    ic = True
+
+            if fid is None:
+                # there was no match in fragment not ignoring context
+                if kind is START:
+                    stack_push((fid, p, ic))
+                return None
+
+            if ic:
+                # we are in fragment ignoring context
+                while True:
+                    frag, pi, attrib, _ = frags[fid]
+                    frag_len = len(frag)
+
+                    # KMP new "character"
+                    while p > 0 and (p >= frag_len or not \
+                            frag[p](kind, data, pos, namespaces, variables)):
+                        p = pi[p-1]
+                    if frag[p](kind, data, pos, namespaces, variables):
+                        p += 1
+
+                    if p == frag_len:
+                        # end of fragment reached
+                        if fid + 1 == frags_len:
+                            # that was last fragment
+                            break
+                        else:
+                            fid += 1
+                            p = 0
+                            ic = True
+                            if not frags[fid][3]:
+                                # next fragment not self-beginning
+                                break
+                    else:
+                        break
+
+            if kind is START:
+                # we have to put new position on stack, for children
+
+                if not ic and fid + 1 == frags_len and p == frag_len:
+                    # it is end of the only, not context ignoring fragment
+                    # so there will be no matches in subtree
+                    stack_push((None, None, ic))
+                else:
+                    stack_push((fid, p, ic))
+
+            # have we reached the end of the last fragment?
+            if fid + 1 == frags_len and p == frag_len:
+                if attrib: # attribute ended path, return value
+                    return attrib(kind, data, pos, namespaces, variables)
+                return True
+
+            return None
+
+        return _test
+
+
+class SingleStepStrategy(object):
+
+    @classmethod
+    def supports(cls, path):
+        return len(path) == 1
+
+    def __init__(self, path):
+        self.path = path
+
+    def test(self, ignore_context):
+        steps = self.path
+        if steps[0][0] is ATTRIBUTE:
+            steps = [_DOTSLASH] + steps
+        select_attr = steps[-1][0] is ATTRIBUTE and steps[-1][1] or None
+
+        # for every position in expression stores counters' list
+        # it is used for position based predicates
+        counters = []
+        depth = [0]
+
+        def _test(event, namespaces, variables, updateonly=False):
+            kind, data, pos = event[:3]
+
+            # Manage the stack that tells us "where we are" in the stream
+            if kind is END:
+                if not ignore_context:
+                    depth[0] -= 1
+                return None
+            elif kind is START_NS or kind is END_NS \
+                    or kind is START_CDATA or kind is END_CDATA:
+                # should we make namespaces work?
+                return None
+
+            if not ignore_context:
+                outside = (steps[0][0] is SELF and depth[0] != 0) \
+                       or (steps[0][0] is CHILD and depth[0] != 1) \
+                       or (steps[0][0] is DESCENDANT and depth[0] < 1)
+                if kind is START:
+                    depth[0] += 1
+                if outside:
+                    return None
+
+            axis, nodetest, predicates = steps[0]
+            if not nodetest(kind, data, pos, namespaces, variables):
+                return None
+
+            if predicates:
+                cnum = 0
+                for predicate in predicates:
+                    pretval = predicate(kind, data, pos, namespaces, variables)
+                    if type(pretval) is float: # FIXME <- need to check this
+                                               # for other types that can be
+                                               # coerced to float
+                        if len(counters) < cnum + 1:
+                            counters.append(0)
+                        counters[cnum] += 1 
+                        if counters[cnum] != int(pretval):
+                            pretval = False
+                        cnum += 1
+                    if not pretval:
+                         return None
+
+            if select_attr:
+                return select_attr(kind, data, pos, namespaces, variables)
+
+            return True
+
+        return _test
+
+
+class Path(object):
+    """Implements basic XPath support on streams.
+    
+    Instances of this class represent a "compiled" XPath expression, and
+    provide methods for testing the path against a stream, as well as
+    extracting a substream matching that path.
+    """
+
+    STRATEGIES = (SingleStepStrategy, SimplePathStrategy, GenericStrategy)
+
+    def __init__(self, text, filename=None, lineno=-1):
+        """Create the path object from a string.
+        
+        :param text: the path expression
+        :param filename: the name of the file in which the path expression was
+                         found (used in error messages)
+        :param lineno: the line on which the expression was found
+        """
+        self.source = text
+        self.paths = PathParser(text, filename, lineno).parse()
+        self.strategies = []
+        for path in self.paths:
+            for strategy_class in self.STRATEGIES:
+                if strategy_class.supports(path):
+                    self.strategies.append(strategy_class(path))
+                    break
+            else:
+                raise NotImplemented('No strategy found for path')
+
+    def __repr__(self):
+        paths = []
+        for path in self.paths:
+            steps = []
+            for axis, nodetest, predicates in path:
+                steps.append('%s::%s' % (axis, nodetest))
+                for predicate in predicates:
+                    steps[-1] += '[%s]' % predicate
+            paths.append('/'.join(steps))
+        return '<%s "%s">' % (type(self).__name__, '|'.join(paths))
+
+    def select(self, stream, namespaces=None, variables=None):
+        """Returns a substream of the given stream that matches the path.
+        
+        If there are no matches, this method returns an empty stream.
+        
+        >>> from genshi.input import XML
+        >>> xml = XML('<root><elem><child>Text</child></elem></root>')
+        
+        >>> print(Path('.//child').select(xml))
+        <child>Text</child>
+        
+        >>> print(Path('.//child/text()').select(xml))
+        Text
+        
+        :param stream: the stream to select from
+        :param namespaces: (optional) a mapping of namespace prefixes to URIs
+        :param variables: (optional) a mapping of variable names to values
+        :return: the substream matching the path, or an empty stream
+        :rtype: `Stream`
+        """
+        if namespaces is None:
+            namespaces = {}
+        if variables is None:
+            variables = {}
+        stream = iter(stream)
+        def _generate(stream=stream, ns=namespaces, vs=variables):
+            next = stream.next
+            test = self.test()
+            for event in stream:
+                result = test(event, ns, vs)
+                if result is True:
+                    yield event
+                    if event[0] is START:
+                        depth = 1
+                        while depth > 0:
+                            subevent = next()
+                            if subevent[0] is START:
+                                depth += 1
+                            elif subevent[0] is END:
+                                depth -= 1
+                            yield subevent
+                            test(subevent, ns, vs, updateonly=True)
+                elif result:
+                    yield result
+        return Stream(_generate(),
+                      serializer=getattr(stream, 'serializer', None))
+
+    def test(self, ignore_context=False):
+        """Returns a function that can be used to track whether the path matches
+        a specific stream event.
+        
+        The function returned expects the positional arguments ``event``,
+        ``namespaces`` and ``variables``. The first is a stream event, while the
+        latter two are a mapping of namespace prefixes to URIs, and a mapping
+        of variable names to values, respectively. In addition, the function
+        accepts an ``updateonly`` keyword argument that default to ``False``. If
+        it is set to ``True``, the function only updates its internal state,
+        but does not perform any tests or return a result.
+        
+        If the path matches the event, the function returns the match (for
+        example, a `START` or `TEXT` event.) Otherwise, it returns ``None``.
+        
+        >>> from genshi.input import XML
+        >>> xml = XML('<root><elem><child id="1"/></elem><child id="2"/></root>')
+        >>> test = Path('child').test()
+        >>> namespaces, variables = {}, {}
+        >>> for event in xml:
+        ...     if test(event, namespaces, variables):
+        ...         print('%s %r' % (event[0], event[1]))
+        START (QName('child'), Attrs([(QName('id'), u'2')]))
+        
+        :param ignore_context: if `True`, the path is interpreted like a pattern
+                               in XSLT, meaning for example that it will match
+                               at any depth
+        :return: a function that can be used to test individual events in a
+                 stream against the path
+        :rtype: ``function``
+        """
+        tests = [s.test(ignore_context) for s in self.strategies]
+        if len(tests) == 1:
+            return tests[0]
+
+        def _multi(event, namespaces, variables, updateonly=False):
+            retval = None
+            for test in tests:
+                val = test(event, namespaces, variables, updateonly=updateonly)
+                if retval is None:
+                    retval = val
+            return retval
+        return _multi
+
+
+class PathSyntaxError(Exception):
+    """Exception raised when an XPath expression is syntactically incorrect."""
+
+    def __init__(self, message, filename=None, lineno=-1, offset=-1):
+        if filename:
+            message = '%s (%s, line %d)' % (message, filename, lineno)
+        Exception.__init__(self, message)
+        self.filename = filename
+        self.lineno = lineno
+        self.offset = offset
+
+
+class PathParser(object):
+    """Tokenizes and parses an XPath expression."""
+
+    _QUOTES = (("'", "'"), ('"', '"'))
+    _TOKENS = ('::', ':', '..', '.', '//', '/', '[', ']', '()', '(', ')', '@',
+               '=', '!=', '!', '|', ',', '>=', '>', '<=', '<', '$')
+    _tokenize = re.compile('("[^"]*")|(\'[^\']*\')|((?:\d+)?\.\d+)|(%s)|([^%s\s]+)|\s+' % (
+                           '|'.join([re.escape(t) for t in _TOKENS]),
+                           ''.join([re.escape(t[0]) for t in _TOKENS]))).findall
+
+    def __init__(self, text, filename=None, lineno=-1):
+        self.filename = filename
+        self.lineno = lineno
+        self.tokens = [t for t in [dqstr or sqstr or number or token or name
+                                   for dqstr, sqstr, number, token, name in
+                                   self._tokenize(text)] if t]
+        self.pos = 0
+
+    # Tokenizer
+
+    @property
+    def at_end(self):
+        return self.pos == len(self.tokens) - 1
+
+    @property
+    def cur_token(self):
+        return self.tokens[self.pos]
+
+    def next_token(self):
+        self.pos += 1
+        return self.tokens[self.pos]
+
+    def peek_token(self):
+        if not self.at_end:
+            return self.tokens[self.pos + 1]
+        return None
+
+    # Recursive descent parser
+
+    def parse(self):
+        """Parses the XPath expression and returns a list of location path
+        tests.
+        
+        For union expressions (such as `*|text()`), this function returns one
+        test for each operand in the union. For patch expressions that don't
+        use the union operator, the function always returns a list of size 1.
+        
+        Each path test in turn is a sequence of tests that correspond to the
+        location steps, each tuples of the form `(axis, testfunc, predicates)`
+        """
+        paths = [self._location_path()]
+        while self.cur_token == '|':
+            self.next_token()
+            paths.append(self._location_path())
+        if not self.at_end:
+            raise PathSyntaxError('Unexpected token %r after end of expression'
+                                  % self.cur_token, self.filename, self.lineno)
+        return paths
+
+    def _location_path(self):
+        steps = []
+        while True:
+            if self.cur_token.startswith('/'):
+                if not steps:
+                    if self.cur_token == '//':
+                        # hack to make //* match every node - also root
+                        self.next_token()
+                        axis, nodetest, predicates = self._location_step()
+                        steps.append((DESCENDANT_OR_SELF, nodetest, 
+                                      predicates))
+                        if self.at_end or not self.cur_token.startswith('/'):
+                            break
+                        continue
+                    else:
+                        raise PathSyntaxError('Absolute location paths not '
+                                              'supported', self.filename,
+                                              self.lineno)
+                elif self.cur_token == '//':
+                    steps.append((DESCENDANT_OR_SELF, NodeTest(), []))
+                self.next_token()
+
+            axis, nodetest, predicates = self._location_step()
+            if not axis:
+                axis = CHILD
+            steps.append((axis, nodetest, predicates))
+            if self.at_end or not self.cur_token.startswith('/'):
+                break
+
+        return steps
+
+    def _location_step(self):
+        if self.cur_token == '@':
+            axis = ATTRIBUTE
+            self.next_token()
+        elif self.cur_token == '.':
+            axis = SELF
+        elif self.cur_token == '..':
+            raise PathSyntaxError('Unsupported axis "parent"', self.filename,
+                                  self.lineno)
+        elif self.peek_token() == '::':
+            axis = Axis.forname(self.cur_token)
+            if axis is None:
+                raise PathSyntaxError('Unsupport axis "%s"' % axis,
+                                      self.filename, self.lineno)
+            self.next_token()
+            self.next_token()
+        else:
+            axis = None
+        nodetest = self._node_test(axis or CHILD)
+        predicates = []
+        while self.cur_token == '[':
+            predicates.append(self._predicate())
+        return axis, nodetest, predicates
+
+    def _node_test(self, axis=None):
+        test = prefix = None
+        next_token = self.peek_token()
+        if next_token in ('(', '()'): # Node type test
+            test = self._node_type()
+
+        elif next_token == ':': # Namespace prefix
+            prefix = self.cur_token
+            self.next_token()
+            localname = self.next_token()
+            if localname == '*':
+                test = QualifiedPrincipalTypeTest(axis, prefix)
+            else:
+                test = QualifiedNameTest(axis, prefix, localname)
+
+        else: # Name test
+            if self.cur_token == '*':
+                test = PrincipalTypeTest(axis)
+            elif self.cur_token == '.':
+                test = NodeTest()
+            else:
+                test = LocalNameTest(axis, self.cur_token)
+
+        if not self.at_end:
+            self.next_token()
+        return test
+
+    def _node_type(self):
+        name = self.cur_token
+        self.next_token()
+
+        args = []
+        if self.cur_token != '()':
+            # The processing-instruction() function optionally accepts the
+            # name of the PI as argument, which must be a literal string
+            self.next_token() # (
+            if self.cur_token != ')':
+                string = self.cur_token
+                if (string[0], string[-1]) in self._QUOTES:
+                    string = string[1:-1]
+                args.append(string)
+
+        cls = _nodetest_map.get(name)
+        if not cls:
+            raise PathSyntaxError('%s() not allowed here' % name, self.filename,
+                                  self.lineno)
+        return cls(*args)
+
+    def _predicate(self):
+        assert self.cur_token == '['
+        self.next_token()
+        expr = self._or_expr()
+        if self.cur_token != ']':
+            raise PathSyntaxError('Expected "]" to close predicate, '
+                                  'but found "%s"' % self.cur_token,
+                                  self.filename, self.lineno)
+        if not self.at_end:
+            self.next_token()
+        return expr
+
+    def _or_expr(self):
+        expr = self._and_expr()
+        while self.cur_token == 'or':
+            self.next_token()
+            expr = OrOperator(expr, self._and_expr())
+        return expr
+
+    def _and_expr(self):
+        expr = self._equality_expr()
+        while self.cur_token == 'and':
+            self.next_token()
+            expr = AndOperator(expr, self._equality_expr())
+        return expr
+
+    def _equality_expr(self):
+        expr = self._relational_expr()
+        while self.cur_token in ('=', '!='):
+            op = _operator_map[self.cur_token]
+            self.next_token()
+            expr = op(expr, self._relational_expr())
+        return expr
+
+    def _relational_expr(self):
+        expr = self._sub_expr()
+        while self.cur_token in ('>', '>=', '<', '>='):
+            op = _operator_map[self.cur_token]
+            self.next_token()
+            expr = op(expr, self._sub_expr())
+        return expr
+
+    def _sub_expr(self):
+        token = self.cur_token
+        if token != '(':
+            return self._primary_expr()
+        self.next_token()
+        expr = self._or_expr()
+        if self.cur_token != ')':
+            raise PathSyntaxError('Expected ")" to close sub-expression, '
+                                  'but found "%s"' % self.cur_token,
+                                  self.filename, self.lineno)
+        self.next_token()
+        return expr
+
+    def _primary_expr(self):
+        token = self.cur_token
+        if len(token) > 1 and (token[0], token[-1]) in self._QUOTES:
+            self.next_token()
+            return StringLiteral(token[1:-1])
+        elif token[0].isdigit() or token[0] == '.':
+            self.next_token()
+            return NumberLiteral(as_float(token))
+        elif token == '$':
+            token = self.next_token()
+            self.next_token()
+            return VariableReference(token)
+        elif not self.at_end and self.peek_token().startswith('('):
+            return self._function_call()
+        else:
+            axis = None
+            if token == '@':
+                axis = ATTRIBUTE
+                self.next_token()
+            return self._node_test(axis)
+
+    def _function_call(self):
+        name = self.cur_token
+        if self.next_token() == '()':
+            args = []
+        else:
+            assert self.cur_token == '('
+            self.next_token()
+            args = [self._or_expr()]
+            while self.cur_token == ',':
+                self.next_token()
+                args.append(self._or_expr())
+            if not self.cur_token == ')':
+                raise PathSyntaxError('Expected ")" to close function argument '
+                                      'list, but found "%s"' % self.cur_token,
+                                      self.filename, self.lineno)
+        self.next_token()
+        cls = _function_map.get(name)
+        if not cls:
+            raise PathSyntaxError('Unsupported function "%s"' % name,
+                                  self.filename, self.lineno)
+        return cls(*args)
+
+
+# Type coercion
+
+def as_scalar(value):
+    """Convert value to a scalar. If a single element Attrs() object is passed
+    the value of the single attribute will be returned."""
+    if isinstance(value, Attrs):
+        assert len(value) == 1
+        return value[0][1]
+    else:
+        return value
+
+def as_float(value):
+    # FIXME - if value is a bool it will be coerced to 0.0 and consequently
+    # compared as a float. This is probably not ideal.
+    return float(as_scalar(value))
+
+def as_long(value):
+    return long(as_scalar(value))
+
+def as_string(value):
+    value = as_scalar(value)
+    if value is False:
+        return ''
+    return unicode(value)
+
+def as_bool(value):
+    return bool(as_scalar(value))
+
+
+# Node tests
+
+class PrincipalTypeTest(object):
+    """Node test that matches any event with the given principal type."""
+    __slots__ = ['principal_type']
+    def __init__(self, principal_type):
+        self.principal_type = principal_type
+    def __call__(self, kind, data, pos, namespaces, variables):
+        if kind is START:
+            if self.principal_type is ATTRIBUTE:
+                return data[1] or None
+            else:
+                return True
+    def __repr__(self):
+        return '*'
+
+class QualifiedPrincipalTypeTest(object):
+    """Node test that matches any event with the given principal type in a
+    specific namespace."""
+    __slots__ = ['principal_type', 'prefix']
+    def __init__(self, principal_type, prefix):
+        self.principal_type = principal_type
+        self.prefix = prefix
+    def __call__(self, kind, data, pos, namespaces, variables):
+        namespace = Namespace(namespaces.get(self.prefix))
+        if kind is START:
+            if self.principal_type is ATTRIBUTE and data[1]:
+                return Attrs([(name, value) for name, value in data[1]
+                              if name in namespace]) or None
+            else:
+                return data[0] in namespace
+    def __repr__(self):
+        return '%s:*' % self.prefix
+
+class LocalNameTest(object):
+    """Node test that matches any event with the given principal type and
+    local name.
+    """
+    __slots__ = ['principal_type', 'name']
+    def __init__(self, principal_type, name):
+        self.principal_type = principal_type
+        self.name = name
+    def __call__(self, kind, data, pos, namespaces, variables):
+        if kind is START:
+            if self.principal_type is ATTRIBUTE and self.name in data[1]:
+                return Attrs([(self.name, data[1].get(self.name))])
+            else:
+                return data[0].localname == self.name
+    def __repr__(self):
+        return self.name
+
+class QualifiedNameTest(object):
+    """Node test that matches any event with the given principal type and
+    qualified name.
+    """
+    __slots__ = ['principal_type', 'prefix', 'name']
+    def __init__(self, principal_type, prefix, name):
+        self.principal_type = principal_type
+        self.prefix = prefix
+        self.name = name
+    def __call__(self, kind, data, pos, namespaces, variables):
+        qname = QName('%s}%s' % (namespaces.get(self.prefix), self.name))
+        if kind is START:
+            if self.principal_type is ATTRIBUTE and qname in data[1]:
+                return Attrs([(self.name, data[1].get(self.name))])
+            else:
+                return data[0] == qname
+    def __repr__(self):
+        return '%s:%s' % (self.prefix, self.name)
+
+class CommentNodeTest(object):
+    """Node test that matches any comment events."""
+    __slots__ = []
+    def __call__(self, kind, data, pos, namespaces, variables):
+        return kind is COMMENT
+    def __repr__(self):
+        return 'comment()'
+
+class NodeTest(object):
+    """Node test that matches any node."""
+    __slots__ = []
+    def __call__(self, kind, data, pos, namespaces, variables):
+        if kind is START:
+            return True
+        return kind, data, pos
+    def __repr__(self):
+        return 'node()'
+
+class ProcessingInstructionNodeTest(object):
+    """Node test that matches any processing instruction event."""
+    __slots__ = ['target']
+    def __init__(self, target=None):
+        self.target = target
+    def __call__(self, kind, data, pos, namespaces, variables):
+        return kind is PI and (not self.target or data[0] == self.target)
+    def __repr__(self):
+        arg = ''
+        if self.target:
+            arg = '"' + self.target + '"'
+        return 'processing-instruction(%s)' % arg
+
+class TextNodeTest(object):
+    """Node test that matches any text event."""
+    __slots__ = []
+    def __call__(self, kind, data, pos, namespaces, variables):
+        return kind is TEXT
+    def __repr__(self):
+        return 'text()'
+
+_nodetest_map = {'comment': CommentNodeTest, 'node': NodeTest,
+                 'processing-instruction': ProcessingInstructionNodeTest,
+                 'text': TextNodeTest}
+
+# Functions
+
+class Function(object):
+    """Base class for function nodes in XPath expressions."""
+
+class BooleanFunction(Function):
+    """The `boolean` function, which converts its argument to a boolean
+    value.
+    """
+    __slots__ = ['expr']
+    _return_type = bool
+    def __init__(self, expr):
+        self.expr = expr
+    def __call__(self, kind, data, pos, namespaces, variables):
+        val = self.expr(kind, data, pos, namespaces, variables)
+        return as_bool(val)
+    def __repr__(self):
+        return 'boolean(%r)' % self.expr
+
+class CeilingFunction(Function):
+    """The `ceiling` function, which returns the nearest lower integer number
+    for the given number.
+    """
+    __slots__ = ['number']
+    def __init__(self, number):
+        self.number = number
+    def __call__(self, kind, data, pos, namespaces, variables):
+        number = self.number(kind, data, pos, namespaces, variables)
+        return ceil(as_float(number))
+    def __repr__(self):
+        return 'ceiling(%r)' % self.number
+
+class ConcatFunction(Function):
+    """The `concat` function, which concatenates (joins) the variable number of
+    strings it gets as arguments.
+    """
+    __slots__ = ['exprs']
+    def __init__(self, *exprs):
+        self.exprs = exprs
+    def __call__(self, kind, data, pos, namespaces, variables):
+        strings = []
+        for item in [expr(kind, data, pos, namespaces, variables)
+                     for expr in self.exprs]:
+            strings.append(as_string(item))
+        return ''.join(strings)
+    def __repr__(self):
+        return 'concat(%s)' % ', '.join([repr(expr) for expr in self.exprs])
+
+class ContainsFunction(Function):
+    """The `contains` function, which returns whether a string contains a given
+    substring.
+    """
+    __slots__ = ['string1', 'string2']
+    def __init__(self, string1, string2):
+        self.string1 = string1
+        self.string2 = string2
+    def __call__(self, kind, data, pos, namespaces, variables):
+        string1 = self.string1(kind, data, pos, namespaces, variables)
+        string2 = self.string2(kind, data, pos, namespaces, variables)
+        return as_string(string2) in as_string(string1)
+    def __repr__(self):
+        return 'contains(%r, %r)' % (self.string1, self.string2)
+
+class MatchesFunction(Function):
+    """The `matches` function, which returns whether a string matches a regular
+    expression.
+    """
+    __slots__ = ['string1', 'string2']
+    flag_mapping = {'s': re.S, 'm': re.M, 'i': re.I, 'x': re.X}
+
+    def __init__(self, string1, string2, flags=''):
+        self.string1 = string1
+        self.string2 = string2
+        self.flags = self._map_flags(flags)
+    def __call__(self, kind, data, pos, namespaces, variables):
+        string1 = as_string(self.string1(kind, data, pos, namespaces, variables))
+        string2 = as_string(self.string2(kind, data, pos, namespaces, variables))
+        return re.search(string2, string1, self.flags)
+    def _map_flags(self, flags):
+        return reduce(operator.or_,
+                      [self.flag_map[flag] for flag in flags], re.U)
+    def __repr__(self):
+        return 'contains(%r, %r)' % (self.string1, self.string2)
+
+class FalseFunction(Function):
+    """The `false` function, which always returns the boolean `false` value."""
+    __slots__ = []
+    def __call__(self, kind, data, pos, namespaces, variables):
+        return False
+    def __repr__(self):
+        return 'false()'
+
+class FloorFunction(Function):
+    """The `ceiling` function, which returns the nearest higher integer number
+    for the given number.
+    """
+    __slots__ = ['number']
+    def __init__(self, number):
+        self.number = number
+    def __call__(self, kind, data, pos, namespaces, variables):
+        number = self.number(kind, data, pos, namespaces, variables)
+        return floor(as_float(number))
+    def __repr__(self):
+        return 'floor(%r)' % self.number
+
+class LocalNameFunction(Function):
+    """The `local-name` function, which returns the local name of the current
+    element.
+    """
+    __slots__ = []
+    def __call__(self, kind, data, pos, namespaces, variables):
+        if kind is START:
+            return data[0].localname
+    def __repr__(self):
+        return 'local-name()'
+
+class NameFunction(Function):
+    """The `name` function, which returns the qualified name of the current
+    element.
+    """
+    __slots__ = []
+    def __call__(self, kind, data, pos, namespaces, variables):
+        if kind is START:
+            return data[0]
+    def __repr__(self):
+        return 'name()'
+
+class NamespaceUriFunction(Function):
+    """The `namespace-uri` function, which returns the namespace URI of the
+    current element.
+    """
+    __slots__ = []
+    def __call__(self, kind, data, pos, namespaces, variables):
+        if kind is START:
+            return data[0].namespace
+    def __repr__(self):
+        return 'namespace-uri()'
+
+class NotFunction(Function):
+    """The `not` function, which returns the negated boolean value of its
+    argument.
+    """
+    __slots__ = ['expr']
+    def __init__(self, expr):
+        self.expr = expr
+    def __call__(self, kind, data, pos, namespaces, variables):
+        return not as_bool(self.expr(kind, data, pos, namespaces, variables))
+    def __repr__(self):
+        return 'not(%s)' % self.expr
+
+class NormalizeSpaceFunction(Function):
+    """The `normalize-space` function, which removes leading and trailing
+    whitespace in the given string, and replaces multiple adjacent whitespace
+    characters inside the string with a single space.
+    """
+    __slots__ = ['expr']
+    _normalize = re.compile(r'\s{2,}').sub
+    def __init__(self, expr):
+        self.expr = expr
+    def __call__(self, kind, data, pos, namespaces, variables):
+        string = self.expr(kind, data, pos, namespaces, variables)
+        return self._normalize(' ', as_string(string).strip())
+    def __repr__(self):
+        return 'normalize-space(%s)' % repr(self.expr)
+
+class NumberFunction(Function):
+    """The `number` function that converts its argument to a number."""
+    __slots__ = ['expr']
+    def __init__(self, expr):
+        self.expr = expr
+    def __call__(self, kind, data, pos, namespaces, variables):
+        val = self.expr(kind, data, pos, namespaces, variables)
+        return as_float(val)
+    def __repr__(self):
+        return 'number(%r)' % self.expr
+
+class RoundFunction(Function):
+    """The `round` function, which returns the nearest integer number for the
+    given number.
+    """
+    __slots__ = ['number']
+    def __init__(self, number):
+        self.number = number
+    def __call__(self, kind, data, pos, namespaces, variables):
+        number = self.number(kind, data, pos, namespaces, variables)
+        return round(as_float(number))
+    def __repr__(self):
+        return 'round(%r)' % self.number
+
+class StartsWithFunction(Function):
+    """The `starts-with` function that returns whether one string starts with
+    a given substring.
+    """
+    __slots__ = ['string1', 'string2']
+    def __init__(self, string1, string2):
+        self.string1 = string1
+        self.string2 = string2
+    def __call__(self, kind, data, pos, namespaces, variables):
+        string1 = self.string1(kind, data, pos, namespaces, variables)
+        string2 = self.string2(kind, data, pos, namespaces, variables)
+        return as_string(string1).startswith(as_string(string2))
+    def __repr__(self):
+        return 'starts-with(%r, %r)' % (self.string1, self.string2)
+
+class StringLengthFunction(Function):
+    """The `string-length` function that returns the length of the given
+    string.
+    """
+    __slots__ = ['expr']
+    def __init__(self, expr):
+        self.expr = expr
+    def __call__(self, kind, data, pos, namespaces, variables):
+        string = self.expr(kind, data, pos, namespaces, variables)
+        return len(as_string(string))
+    def __repr__(self):
+        return 'string-length(%r)' % self.expr
+
+class SubstringFunction(Function):
+    """The `substring` function that returns the part of a string that starts
+    at the given offset, and optionally limited to the given length.
+    """
+    __slots__ = ['string', 'start', 'length']
+    def __init__(self, string, start, length=None):
+        self.string = string
+        self.start = start
+        self.length = length
+    def __call__(self, kind, data, pos, namespaces, variables):
+        string = self.string(kind, data, pos, namespaces, variables)
+        start = self.start(kind, data, pos, namespaces, variables)
+        length = 0
+        if self.length is not None:
+            length = self.length(kind, data, pos, namespaces, variables)
+        return string[as_long(start):len(as_string(string)) - as_long(length)]
+    def __repr__(self):
+        if self.length is not None:
+            return 'substring(%r, %r, %r)' % (self.string, self.start,
+                                              self.length)
+        else:
+            return 'substring(%r, %r)' % (self.string, self.start)
+
+class SubstringAfterFunction(Function):
+    """The `substring-after` function that returns the part of a string that
+    is found after the given substring.
+    """
+    __slots__ = ['string1', 'string2']
+    def __init__(self, string1, string2):
+        self.string1 = string1
+        self.string2 = string2
+    def __call__(self, kind, data, pos, namespaces, variables):
+        string1 = as_string(self.string1(kind, data, pos, namespaces, variables))
+        string2 = as_string(self.string2(kind, data, pos, namespaces, variables))
+        index = string1.find(string2)
+        if index >= 0:
+            return string1[index + len(string2):]
+        return ''
+    def __repr__(self):
+        return 'substring-after(%r, %r)' % (self.string1, self.string2)
+
+class SubstringBeforeFunction(Function):
+    """The `substring-before` function that returns the part of a string that
+    is found before the given substring.
+    """
+    __slots__ = ['string1', 'string2']
+    def __init__(self, string1, string2):
+        self.string1 = string1
+        self.string2 = string2
+    def __call__(self, kind, data, pos, namespaces, variables):
+        string1 = as_string(self.string1(kind, data, pos, namespaces, variables))
+        string2 = as_string(self.string2(kind, data, pos, namespaces, variables))
+        index = string1.find(string2)
+        if index >= 0:
+            return string1[:index]
+        return ''
+    def __repr__(self):
+        return 'substring-after(%r, %r)' % (self.string1, self.string2)
+
+class TranslateFunction(Function):
+    """The `translate` function that translates a set of characters in a
+    string to target set of characters.
+    """
+    __slots__ = ['string', 'fromchars', 'tochars']
+    def __init__(self, string, fromchars, tochars):
+        self.string = string
+        self.fromchars = fromchars
+        self.tochars = tochars
+    def __call__(self, kind, data, pos, namespaces, variables):
+        string = as_string(self.string(kind, data, pos, namespaces, variables))
+        fromchars = as_string(self.fromchars(kind, data, pos, namespaces, variables))
+        tochars = as_string(self.tochars(kind, data, pos, namespaces, variables))
+        table = dict(zip([ord(c) for c in fromchars],
+                         [ord(c) for c in tochars]))
+        return string.translate(table)
+    def __repr__(self):
+        return 'translate(%r, %r, %r)' % (self.string, self.fromchars,
+                                          self.tochars)
+
+class TrueFunction(Function):
+    """The `true` function, which always returns the boolean `true` value."""
+    __slots__ = []
+    def __call__(self, kind, data, pos, namespaces, variables):
+        return True
+    def __repr__(self):
+        return 'true()'
+
+_function_map = {'boolean': BooleanFunction, 'ceiling': CeilingFunction,
+                 'concat': ConcatFunction, 'contains': ContainsFunction,
+                 'matches': MatchesFunction, 'false': FalseFunction, 'floor':
+                 FloorFunction, 'local-name': LocalNameFunction, 'name':
+                 NameFunction, 'namespace-uri': NamespaceUriFunction,
+                 'normalize-space': NormalizeSpaceFunction, 'not': NotFunction,
+                 'number': NumberFunction, 'round': RoundFunction,
+                 'starts-with': StartsWithFunction, 'string-length':
+                 StringLengthFunction, 'substring': SubstringFunction,
+                 'substring-after': SubstringAfterFunction, 'substring-before':
+                 SubstringBeforeFunction, 'translate': TranslateFunction,
+                 'true': TrueFunction}
+
+# Literals & Variables
+
+class Literal(object):
+    """Abstract base class for literal nodes."""
+
+class StringLiteral(Literal):
+    """A string literal node."""
+    __slots__ = ['text']
+    def __init__(self, text):
+        self.text = text
+    def __call__(self, kind, data, pos, namespaces, variables):
+        return self.text
+    def __repr__(self):
+        return '"%s"' % self.text
+
+class NumberLiteral(Literal):
+    """A number literal node."""
+    __slots__ = ['number']
+    def __init__(self, number):
+        self.number = number
+    def __call__(self, kind, data, pos, namespaces, variables):
+        return self.number
+    def __repr__(self):
+        return str(self.number)
+
+class VariableReference(Literal):
+    """A variable reference node."""
+    __slots__ = ['name']
+    def __init__(self, name):
+        self.name = name
+    def __call__(self, kind, data, pos, namespaces, variables):
+        return variables.get(self.name)
+    def __repr__(self):
+        return str(self.name)
+
+# Operators
+
+class AndOperator(object):
+    """The boolean operator `and`."""
+    __slots__ = ['lval', 'rval']
+    def __init__(self, lval, rval):
+        self.lval = lval
+        self.rval = rval
+    def __call__(self, kind, data, pos, namespaces, variables):
+        lval = as_bool(self.lval(kind, data, pos, namespaces, variables))
+        if not lval:
+            return False
+        rval = self.rval(kind, data, pos, namespaces, variables)
+        return as_bool(rval)
+    def __repr__(self):
+        return '%s and %s' % (self.lval, self.rval)
+
+class EqualsOperator(object):
+    """The equality operator `=`."""
+    __slots__ = ['lval', 'rval']
+    def __init__(self, lval, rval):
+        self.lval = lval
+        self.rval = rval
+    def __call__(self, kind, data, pos, namespaces, variables):
+        lval = as_scalar(self.lval(kind, data, pos, namespaces, variables))
+        rval = as_scalar(self.rval(kind, data, pos, namespaces, variables))
+        return lval == rval
+    def __repr__(self):
+        return '%s=%s' % (self.lval, self.rval)
+
+class NotEqualsOperator(object):
+    """The equality operator `!=`."""
+    __slots__ = ['lval', 'rval']
+    def __init__(self, lval, rval):
+        self.lval = lval
+        self.rval = rval
+    def __call__(self, kind, data, pos, namespaces, variables):
+        lval = as_scalar(self.lval(kind, data, pos, namespaces, variables))
+        rval = as_scalar(self.rval(kind, data, pos, namespaces, variables))
+        return lval != rval
+    def __repr__(self):
+        return '%s!=%s' % (self.lval, self.rval)
+
+class OrOperator(object):
+    """The boolean operator `or`."""
+    __slots__ = ['lval', 'rval']
+    def __init__(self, lval, rval):
+        self.lval = lval
+        self.rval = rval
+    def __call__(self, kind, data, pos, namespaces, variables):
+        lval = as_bool(self.lval(kind, data, pos, namespaces, variables))
+        if lval:
+            return True
+        rval = self.rval(kind, data, pos, namespaces, variables)
+        return as_bool(rval)
+    def __repr__(self):
+        return '%s or %s' % (self.lval, self.rval)
+
+class GreaterThanOperator(object):
+    """The relational operator `>` (greater than)."""
+    __slots__ = ['lval', 'rval']
+    def __init__(self, lval, rval):
+        self.lval = lval
+        self.rval = rval
+    def __call__(self, kind, data, pos, namespaces, variables):
+        lval = self.lval(kind, data, pos, namespaces, variables)
+        rval = self.rval(kind, data, pos, namespaces, variables)
+        return as_float(lval) > as_float(rval)
+    def __repr__(self):
+        return '%s>%s' % (self.lval, self.rval)
+
+class GreaterThanOrEqualOperator(object):
+    """The relational operator `>=` (greater than or equal)."""
+    __slots__ = ['lval', 'rval']
+    def __init__(self, lval, rval):
+        self.lval = lval
+        self.rval = rval
+    def __call__(self, kind, data, pos, namespaces, variables):
+        lval = self.lval(kind, data, pos, namespaces, variables)
+        rval = self.rval(kind, data, pos, namespaces, variables)
+        return as_float(lval) >= as_float(rval)
+    def __repr__(self):
+        return '%s>=%s' % (self.lval, self.rval)
+
+class LessThanOperator(object):
+    """The relational operator `<` (less than)."""
+    __slots__ = ['lval', 'rval']
+    def __init__(self, lval, rval):
+        self.lval = lval
+        self.rval = rval
+    def __call__(self, kind, data, pos, namespaces, variables):
+        lval = self.lval(kind, data, pos, namespaces, variables)
+        rval = self.rval(kind, data, pos, namespaces, variables)
+        return as_float(lval) < as_float(rval)
+    def __repr__(self):
+        return '%s<%s' % (self.lval, self.rval)
+
+class LessThanOrEqualOperator(object):
+    """The relational operator `<=` (less than or equal)."""
+    __slots__ = ['lval', 'rval']
+    def __init__(self, lval, rval):
+        self.lval = lval
+        self.rval = rval
+    def __call__(self, kind, data, pos, namespaces, variables):
+        lval = self.lval(kind, data, pos, namespaces, variables)
+        rval = self.rval(kind, data, pos, namespaces, variables)
+        return as_float(lval) <= as_float(rval)
+    def __repr__(self):
+        return '%s<=%s' % (self.lval, self.rval)
+
+_operator_map = {'=': EqualsOperator, '!=': NotEqualsOperator,
+                 '>': GreaterThanOperator, '>=': GreaterThanOrEqualOperator,
+                 '<': LessThanOperator, '>=': LessThanOrEqualOperator}
+
+
+_DOTSLASHSLASH = (DESCENDANT_OR_SELF, PrincipalTypeTest(None), ())
+_DOTSLASH = (SELF, PrincipalTypeTest(None), ())
diff --git a/genshi/template/__init__.py b/genshi/template/__init__.py
new file mode 100644
index 0000000..47a9310
--- /dev/null
+++ b/genshi/template/__init__.py
@@ -0,0 +1,23 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006-2007 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""Implementation of the template engine."""
+
+from genshi.template.base import Context, Template, TemplateError, \
+                                 TemplateRuntimeError, TemplateSyntaxError, \
+                                 BadDirectiveError
+from genshi.template.loader import TemplateLoader, TemplateNotFound
+from genshi.template.markup import MarkupTemplate
+from genshi.template.text import TextTemplate, OldTextTemplate, NewTextTemplate
+
+__docformat__ = 'restructuredtext en'
diff --git a/genshi/template/_ast24.py b/genshi/template/_ast24.py
new file mode 100644
index 0000000..05d241b
--- /dev/null
+++ b/genshi/template/_ast24.py
@@ -0,0 +1,446 @@
+# Generated automatically, please do not edit
+# Generator can be found in Genshi SVN, scripts/ast-generator.py
+
+__version__ = 43614
+
+class AST(object):
+	_fields = None
+	__doc__ = None
+
+class operator(AST):
+	_fields = None
+	__doc__ = None
+	_attributes = []
+class Add(operator):
+	_fields = None
+	__doc__ = None
+
+class boolop(AST):
+	_fields = None
+	__doc__ = None
+	_attributes = []
+class And(boolop):
+	_fields = None
+	__doc__ = None
+
+class stmt(AST):
+	_fields = None
+	__doc__ = None
+	_attributes = ['lineno', 'col_offset']
+class Assert(stmt):
+	_fields = ('test', 'msg')
+	__doc__ = None
+
+class Assign(stmt):
+	_fields = ('targets', 'value')
+	__doc__ = None
+
+class expr(AST):
+	_fields = None
+	__doc__ = None
+	_attributes = ['lineno', 'col_offset']
+class Attribute(expr):
+	_fields = ('value', 'attr', 'ctx')
+	__doc__ = None
+
+class AugAssign(stmt):
+	_fields = ('target', 'op', 'value')
+	__doc__ = None
+
+class expr_context(AST):
+	_fields = None
+	__doc__ = None
+	_attributes = []
+class AugLoad(expr_context):
+	_fields = None
+	__doc__ = None
+
+class AugStore(expr_context):
+	_fields = None
+	__doc__ = None
+
+class BinOp(expr):
+	_fields = ('left', 'op', 'right')
+	__doc__ = None
+
+class BitAnd(operator):
+	_fields = None
+	__doc__ = None
+
+class BitOr(operator):
+	_fields = None
+	__doc__ = None
+
+class BitXor(operator):
+	_fields = None
+	__doc__ = None
+
+class BoolOp(expr):
+	_fields = ('op', 'values')
+	__doc__ = None
+
+class Break(stmt):
+	_fields = None
+	__doc__ = None
+
+class Call(expr):
+	_fields = ('func', 'args', 'keywords', 'starargs', 'kwargs')
+	__doc__ = None
+
+class ClassDef(stmt):
+	_fields = ('name', 'bases', 'body')
+	__doc__ = None
+
+class Compare(expr):
+	_fields = ('left', 'ops', 'comparators')
+	__doc__ = None
+
+class Continue(stmt):
+	_fields = None
+	__doc__ = None
+
+class Del(expr_context):
+	_fields = None
+	__doc__ = None
+
+class Delete(stmt):
+	_fields = ('targets',)
+	__doc__ = None
+
+class Dict(expr):
+	_fields = ('keys', 'values')
+	__doc__ = None
+
+class Div(operator):
+	_fields = None
+	__doc__ = None
+
+class slice(AST):
+	_fields = None
+	__doc__ = None
+	_attributes = []
+class Ellipsis(slice):
+	_fields = None
+	__doc__ = None
+
+class cmpop(AST):
+	_fields = None
+	__doc__ = None
+	_attributes = []
+class Eq(cmpop):
+	_fields = None
+	__doc__ = None
+
+class Exec(stmt):
+	_fields = ('body', 'globals', 'locals')
+	__doc__ = None
+
+class Expr(stmt):
+	_fields = ('value',)
+	__doc__ = None
+
+class mod(AST):
+	_fields = None
+	__doc__ = None
+	_attributes = []
+class Expression(mod):
+	_fields = ('body',)
+	__doc__ = None
+
+class ExtSlice(slice):
+	_fields = ('dims',)
+	__doc__ = None
+
+class FloorDiv(operator):
+	_fields = None
+	__doc__ = None
+
+class For(stmt):
+	_fields = ('target', 'iter', 'body', 'orelse')
+	__doc__ = None
+
+class FunctionDef(stmt):
+	_fields = ('name', 'args', 'body', 'decorators')
+	__doc__ = None
+
+class GeneratorExp(expr):
+	_fields = ('elt', 'generators')
+	__doc__ = None
+
+class Global(stmt):
+	_fields = ('names',)
+	__doc__ = None
+
+class Gt(cmpop):
+	_fields = None
+	__doc__ = None
+
+class GtE(cmpop):
+	_fields = None
+	__doc__ = None
+
+class If(stmt):
+	_fields = ('test', 'body', 'orelse')
+	__doc__ = None
+
+class IfExp(expr):
+	_fields = ('test', 'body', 'orelse')
+	__doc__ = None
+
+class Import(stmt):
+	_fields = ('names',)
+	__doc__ = None
+
+class ImportFrom(stmt):
+	_fields = ('module', 'names', 'level')
+	__doc__ = None
+
+class In(cmpop):
+	_fields = None
+	__doc__ = None
+
+class Index(slice):
+	_fields = ('value',)
+	__doc__ = None
+
+class Interactive(mod):
+	_fields = ('body',)
+	__doc__ = None
+
+class unaryop(AST):
+	_fields = None
+	__doc__ = None
+	_attributes = []
+class Invert(unaryop):
+	_fields = None
+	__doc__ = None
+
+class Is(cmpop):
+	_fields = None
+	__doc__ = None
+
+class IsNot(cmpop):
+	_fields = None
+	__doc__ = None
+
+class LShift(operator):
+	_fields = None
+	__doc__ = None
+
+class Lambda(expr):
+	_fields = ('args', 'body')
+	__doc__ = None
+
+class List(expr):
+	_fields = ('elts', 'ctx')
+	__doc__ = None
+
+class ListComp(expr):
+	_fields = ('elt', 'generators')
+	__doc__ = None
+
+class Load(expr_context):
+	_fields = None
+	__doc__ = None
+
+class Lt(cmpop):
+	_fields = None
+	__doc__ = None
+
+class LtE(cmpop):
+	_fields = None
+	__doc__ = None
+
+class Mod(operator):
+	_fields = None
+	__doc__ = None
+
+class Module(mod):
+	_fields = ('body',)
+	__doc__ = None
+
+class Mult(operator):
+	_fields = None
+	__doc__ = None
+
+class Name(expr):
+	_fields = ('id', 'ctx')
+	__doc__ = None
+
+class Not(unaryop):
+	_fields = None
+	__doc__ = None
+
+class NotEq(cmpop):
+	_fields = None
+	__doc__ = None
+
+class NotIn(cmpop):
+	_fields = None
+	__doc__ = None
+
+class Num(expr):
+	_fields = ('n',)
+	__doc__ = None
+
+class Or(boolop):
+	_fields = None
+	__doc__ = None
+
+class Param(expr_context):
+	_fields = None
+	__doc__ = None
+
+class Pass(stmt):
+	_fields = None
+	__doc__ = None
+
+class Pow(operator):
+	_fields = None
+	__doc__ = None
+
+class Print(stmt):
+	_fields = ('dest', 'values', 'nl')
+	__doc__ = None
+
+class RShift(operator):
+	_fields = None
+	__doc__ = None
+
+class Raise(stmt):
+	_fields = ('type', 'inst', 'tback')
+	__doc__ = None
+
+class Repr(expr):
+	_fields = ('value',)
+	__doc__ = None
+
+class Return(stmt):
+	_fields = ('value',)
+	__doc__ = None
+
+class Slice(slice):
+	_fields = ('lower', 'upper', 'step')
+	__doc__ = None
+
+class Store(expr_context):
+	_fields = None
+	__doc__ = None
+
+class Str(expr):
+	_fields = ('s',)
+	__doc__ = None
+
+class Sub(operator):
+	_fields = None
+	__doc__ = None
+
+class Subscript(expr):
+	_fields = ('value', 'slice', 'ctx')
+	__doc__ = None
+
+class Suite(mod):
+	_fields = ('body',)
+	__doc__ = None
+
+class TryExcept(stmt):
+	_fields = ('body', 'handlers', 'orelse')
+	__doc__ = None
+
+class TryFinally(stmt):
+	_fields = ('body', 'finalbody')
+	__doc__ = None
+
+class Tuple(expr):
+	_fields = ('elts', 'ctx')
+	__doc__ = None
+
+class UAdd(unaryop):
+	_fields = None
+	__doc__ = None
+
+class USub(unaryop):
+	_fields = None
+	__doc__ = None
+
+class UnaryOp(expr):
+	_fields = ('op', 'operand')
+	__doc__ = None
+
+class While(stmt):
+	_fields = ('test', 'body', 'orelse')
+	__doc__ = None
+
+class With(stmt):
+	_fields = ('context_expr', 'optional_vars', 'body')
+	__doc__ = None
+
+class Yield(expr):
+	_fields = ('value',)
+	__doc__ = None
+
+class alias(AST):
+	_fields = ('name', 'asname')
+	__doc__ = None
+
+class arguments(AST):
+	_fields = ('args', 'vararg', 'kwarg', 'defaults')
+	__doc__ = None
+
+class boolop(AST):
+	_fields = None
+	__doc__ = None
+	_attributes = []
+
+class cmpop(AST):
+	_fields = None
+	__doc__ = None
+	_attributes = []
+
+class comprehension(AST):
+	_fields = ('target', 'iter', 'ifs')
+	__doc__ = None
+
+class excepthandler(AST):
+	_fields = ('type', 'name', 'body', 'lineno', 'col_offset')
+	__doc__ = None
+
+class expr(AST):
+	_fields = None
+	__doc__ = None
+	_attributes = ['lineno', 'col_offset']
+
+class expr_context(AST):
+	_fields = None
+	__doc__ = None
+	_attributes = []
+
+class keyword(AST):
+	_fields = ('arg', 'value')
+	__doc__ = None
+
+class mod(AST):
+	_fields = None
+	__doc__ = None
+	_attributes = []
+
+class operator(AST):
+	_fields = None
+	__doc__ = None
+	_attributes = []
+
+class slice(AST):
+	_fields = None
+	__doc__ = None
+	_attributes = []
+
+class stmt(AST):
+	_fields = None
+	__doc__ = None
+	_attributes = ['lineno', 'col_offset']
+
+class unaryop(AST):
+	_fields = None
+	__doc__ = None
+	_attributes = []
+
diff --git a/genshi/template/ast24.py b/genshi/template/ast24.py
new file mode 100644
index 0000000..af6dce9
--- /dev/null
+++ b/genshi/template/ast24.py
@@ -0,0 +1,505 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2008-2009 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""Emulation of the proper abstract syntax tree API for Python 2.4."""
+
+import compiler
+import compiler.ast
+
+from genshi.template import _ast24 as _ast
+
+__all__ = ['_ast', 'parse']
+__docformat__ = 'restructuredtext en'
+
+
+def _new(cls, *args, **kwargs):
+    ret = cls()
+    if ret._fields:
+        for attr, value in zip(ret._fields, args):
+            if attr in kwargs:
+                raise ValueError('Field set both in args and kwargs')
+            setattr(ret, attr, value)
+    for attr in kwargs:
+        if (getattr(ret, '_fields', None) and attr in ret._fields) \
+                or (getattr(ret, '_attributes', None) and 
+                        attr in ret._attributes):
+            setattr(ret, attr, kwargs[attr])
+    return ret
+
+
+class ASTUpgrader(object):
+    """Transformer changing structure of Python 2.4 ASTs to
+    Python 2.5 ones.
+
+    Transforms ``compiler.ast`` Abstract Syntax Tree to builtin ``_ast``.
+    It can use fake`` _ast`` classes and this way allow ``_ast`` emulation
+    in Python 2.4.
+    """
+
+    def __init__(self):
+        self.out_flags = None
+        self.lines = [-1]
+
+    def _new(self, *args, **kwargs):
+        return _new(lineno = self.lines[-1], *args, **kwargs)
+
+    def visit(self, node):
+        if node is None:
+            return None
+        if type(node) is tuple:
+            return tuple([self.visit(n) for n in node])
+        lno = getattr(node, 'lineno', None)
+        if lno is not None:
+            self.lines.append(lno)
+        visitor = getattr(self, 'visit_%s' % node.__class__.__name__, None)
+        if visitor is None:
+            raise Exception('Unhandled node type %r' % type(node))
+
+        retval = visitor(node)
+        if lno is not None:
+            self.lines.pop()
+        return retval
+
+    def visit_Module(self, node):
+        body = self.visit(node.node)
+        if node.doc:
+            body = [self._new(_ast.Expr, self._new(_ast.Str, node.doc))] + body
+        return self._new(_ast.Module, body)
+
+    def visit_Expression(self, node):
+        return self._new(_ast.Expression, self.visit(node.node))
+
+    def _extract_args(self, node):
+        tab = node.argnames[:]
+        if node.flags & compiler.ast.CO_VARKEYWORDS:
+            kwarg = tab[-1]
+            tab = tab[:-1]
+        else:
+            kwarg = None
+
+        if node.flags & compiler.ast.CO_VARARGS:
+            vararg = tab[-1]
+            tab = tab[:-1]
+        else:
+            vararg = None
+
+        def _tup(t):
+            if isinstance(t, str):
+                return self._new(_ast.Name, t, _ast.Store())
+            elif isinstance(t, tuple):
+                elts = [_tup(x) for x in t]
+                return self._new(_ast.Tuple, elts, _ast.Store())
+            else:
+                raise NotImplemented
+
+        args = []
+        for arg in tab:
+            if isinstance(arg, str):
+                args.append(self._new(_ast.Name, arg, _ast.Param()))
+            elif isinstance(arg, tuple):
+                args.append(_tup(arg))
+            else:
+                assert False, node.__class__
+
+        defaults = [self.visit(d) for d in node.defaults]
+        return self._new(_ast.arguments, args, vararg, kwarg, defaults)
+
+
+    def visit_Function(self, node):
+        if getattr(node, 'decorators', ()):
+            decorators = [self.visit(d) for d in node.decorators.nodes]
+        else:
+            decorators = []
+
+        args = self._extract_args(node)
+        body = self.visit(node.code)
+        if node.doc:
+            body = [self._new(_ast.Expr, self._new(_ast.Str, node.doc))] + body
+        return self._new(_ast.FunctionDef, node.name, args, body, decorators)
+
+    def visit_Class(self, node):
+        #self.name_types.append(_ast.Load)
+        bases = [self.visit(b) for b in node.bases]
+        #self.name_types.pop()
+        body = self.visit(node.code)
+        if node.doc:
+            body = [self._new(_ast.Expr, self._new(_ast.Str, node.doc))] + body
+        return self._new(_ast.ClassDef, node.name, bases, body)
+
+    def visit_Return(self, node):
+        return self._new(_ast.Return, self.visit(node.value))
+
+    def visit_Assign(self, node):
+        #self.name_types.append(_ast.Store)
+        targets = [self.visit(t) for t in node.nodes]
+        #self.name_types.pop()
+        return self._new(_ast.Assign, targets, self.visit(node.expr))
+
+    aug_operators = {
+        '+=': _ast.Add,
+        '/=': _ast.Div,
+        '//=': _ast.FloorDiv,
+        '<<=': _ast.LShift,
+        '%=': _ast.Mod,
+        '*=': _ast.Mult,
+        '**=': _ast.Pow,
+        '>>=': _ast.RShift,
+        '-=': _ast.Sub,
+    }
+
+    def visit_AugAssign(self, node):
+        target = self.visit(node.node)
+
+        # Because it's AugAssign target can't be list nor tuple
+        # so we only have to change context of one node
+        target.ctx = _ast.Store()
+        op = self.aug_operators[node.op]()
+        return self._new(_ast.AugAssign, target, op, self.visit(node.expr))
+
+    def _visit_Print(nl):
+        def _visit(self, node):
+            values = [self.visit(v) for v in node.nodes]
+            return self._new(_ast.Print, self.visit(node.dest), values, nl)
+        return _visit
+
+    visit_Print = _visit_Print(False)
+    visit_Printnl = _visit_Print(True)
+    del _visit_Print
+
+    def visit_For(self, node):
+        return self._new(_ast.For, self.visit(node.assign), self.visit(node.list),
+                        self.visit(node.body), self.visit(node.else_))
+
+    def visit_While(self, node):
+        return self._new(_ast.While, self.visit(node.test), self.visit(node.body),
+                        self.visit(node.else_))
+
+    def visit_If(self, node):
+        def _level(tests, else_):
+            test = self.visit(tests[0][0])
+            body = self.visit(tests[0][1])
+            if len(tests) == 1:
+                orelse = self.visit(else_)
+            else:
+                orelse = [_level(tests[1:], else_)]
+            return self._new(_ast.If, test, body, orelse)
+        return _level(node.tests, node.else_)
+
+    def visit_With(self, node):
+        return self._new(_ast.With, self.visit(node.expr),
+                            self.visit(node.vars), self.visit(node.body))
+
+    def visit_Raise(self, node):
+        return self._new(_ast.Raise, self.visit(node.expr1),
+                        self.visit(node.expr2), self.visit(node.expr3))
+
+    def visit_TryExcept(self, node):
+        handlers = []
+        for type, name, body in node.handlers:
+            handlers.append(self._new(_ast.excepthandler, self.visit(type), 
+                            self.visit(name), self.visit(body)))
+        return self._new(_ast.TryExcept, self.visit(node.body),
+                        handlers, self.visit(node.else_))
+
+    def visit_TryFinally(self, node):
+        return self._new(_ast.TryFinally, self.visit(node.body),
+                        self.visit(node.final))
+
+    def visit_Assert(self, node):
+        return self._new(_ast.Assert, self.visit(node.test), self.visit(node.fail))
+
+    def visit_Import(self, node):
+        names = [self._new(_ast.alias, n[0], n[1]) for n in node.names]
+        return self._new(_ast.Import, names)
+
+    def visit_From(self, node):
+        names = [self._new(_ast.alias, n[0], n[1]) for n in node.names]
+        return self._new(_ast.ImportFrom, node.modname, names, 0)
+
+    def visit_Exec(self, node):
+        return self._new(_ast.Exec, self.visit(node.expr),
+                        self.visit(node.locals), self.visit(node.globals))
+
+    def visit_Global(self, node):
+        return self._new(_ast.Global, node.names[:])
+
+    def visit_Discard(self, node):
+        return self._new(_ast.Expr, self.visit(node.expr))
+
+    def _map_class(to):
+        def _visit(self, node):
+            return self._new(to)
+        return _visit
+
+    visit_Pass = _map_class(_ast.Pass)
+    visit_Break = _map_class(_ast.Break)
+    visit_Continue = _map_class(_ast.Continue)
+
+    def _visit_BinOperator(opcls):
+        def _visit(self, node):
+            return self._new(_ast.BinOp, self.visit(node.left), 
+                            opcls(), self.visit(node.right)) 
+        return _visit
+    visit_Add = _visit_BinOperator(_ast.Add)
+    visit_Div = _visit_BinOperator(_ast.Div)
+    visit_FloorDiv = _visit_BinOperator(_ast.FloorDiv)
+    visit_LeftShift = _visit_BinOperator(_ast.LShift)
+    visit_Mod = _visit_BinOperator(_ast.Mod)
+    visit_Mul = _visit_BinOperator(_ast.Mult)
+    visit_Power = _visit_BinOperator(_ast.Pow)
+    visit_RightShift = _visit_BinOperator(_ast.RShift)
+    visit_Sub = _visit_BinOperator(_ast.Sub)
+    del _visit_BinOperator
+
+    def _visit_BitOperator(opcls):
+        def _visit(self, node):
+            def _make(nodes):
+                if len(nodes) == 1:
+                    return self.visit(nodes[0])
+                left = _make(nodes[:-1])
+                right = self.visit(nodes[-1])
+                return self._new(_ast.BinOp, left, opcls(), right)
+            return _make(node.nodes)
+        return _visit
+    visit_Bitand = _visit_BitOperator(_ast.BitAnd)
+    visit_Bitor = _visit_BitOperator(_ast.BitOr)
+    visit_Bitxor = _visit_BitOperator(_ast.BitXor)
+    del _visit_BitOperator
+
+    def _visit_UnaryOperator(opcls):
+        def _visit(self, node):
+            return self._new(_ast.UnaryOp, opcls(), self.visit(node.expr))
+        return _visit
+
+    visit_Invert = _visit_UnaryOperator(_ast.Invert)
+    visit_Not = _visit_UnaryOperator(_ast.Not)
+    visit_UnaryAdd = _visit_UnaryOperator(_ast.UAdd)
+    visit_UnarySub = _visit_UnaryOperator(_ast.USub)
+    del _visit_UnaryOperator
+
+    def _visit_BoolOperator(opcls):
+        def _visit(self, node):
+            values = [self.visit(n) for n in node.nodes]
+            return self._new(_ast.BoolOp, opcls(), values)
+        return _visit
+    visit_And = _visit_BoolOperator(_ast.And)
+    visit_Or = _visit_BoolOperator(_ast.Or)
+    del _visit_BoolOperator
+
+    cmp_operators = {
+        '==': _ast.Eq,
+        '!=': _ast.NotEq,
+        '<': _ast.Lt,
+        '<=': _ast.LtE,
+        '>': _ast.Gt,
+        '>=': _ast.GtE,
+        'is': _ast.Is,
+        'is not': _ast.IsNot,
+        'in': _ast.In,
+        'not in': _ast.NotIn,
+    }
+
+    def visit_Compare(self, node):
+        left = self.visit(node.expr)
+        ops = []
+        comparators = []
+        for optype, expr in node.ops:
+            ops.append(self.cmp_operators[optype]())
+            comparators.append(self.visit(expr))
+        return self._new(_ast.Compare, left, ops, comparators)
+
+    def visit_Lambda(self, node):
+        args = self._extract_args(node)
+        body = self.visit(node.code)
+        return self._new(_ast.Lambda, args, body)
+
+    def visit_IfExp(self, node):
+        return self._new(_ast.IfExp, self.visit(node.test), self.visit(node.then),
+                        self.visit(node.else_))
+
+    def visit_Dict(self, node):
+        keys = [self.visit(x[0]) for x in node.items]
+        values = [self.visit(x[1]) for x in node.items]
+        return self._new(_ast.Dict, keys, values)
+
+    def visit_ListComp(self, node):
+        generators = [self.visit(q) for q in node.quals]
+        return self._new(_ast.ListComp, self.visit(node.expr), generators)
+
+    def visit_GenExprInner(self, node):
+        generators = [self.visit(q) for q in node.quals]
+        return self._new(_ast.GeneratorExp, self.visit(node.expr), generators)
+
+    def visit_GenExpr(self, node):
+        return self.visit(node.code)
+
+    def visit_GenExprFor(self, node):
+        ifs = [self.visit(i) for i in node.ifs]
+        return self._new(_ast.comprehension, self.visit(node.assign),
+                        self.visit(node.iter), ifs)
+
+    def visit_ListCompFor(self, node):
+        ifs = [self.visit(i) for i in node.ifs]
+        return self._new(_ast.comprehension, self.visit(node.assign),
+                        self.visit(node.list), ifs)
+
+    def visit_GenExprIf(self, node):
+        return self.visit(node.test)
+    visit_ListCompIf = visit_GenExprIf
+
+    def visit_Yield(self, node):
+        return self._new(_ast.Yield, self.visit(node.value))
+
+    def visit_CallFunc(self, node):
+        args = []
+        keywords = []
+        for arg in node.args:
+            if isinstance(arg, compiler.ast.Keyword):
+                keywords.append(self._new(_ast.keyword, arg.name, 
+                                        self.visit(arg.expr)))
+            else:
+                args.append(self.visit(arg))
+        return self._new(_ast.Call, self.visit(node.node), args, keywords,
+                    self.visit(node.star_args), self.visit(node.dstar_args))
+
+    def visit_Backquote(self, node):
+        return self._new(_ast.Repr, self.visit(node.expr))
+
+    def visit_Const(self, node):
+        if node.value is None: # appears in slices
+            return None
+        elif isinstance(node.value, basestring):
+            return self._new(_ast.Str, node.value)
+        else:
+            return self._new(_ast.Num, node.value)
+
+    def visit_Name(self, node):
+        return self._new(_ast.Name, node.name, _ast.Load())
+
+    def visit_Getattr(self, node):
+        return self._new(_ast.Attribute, self.visit(node.expr), node.attrname,
+                         _ast.Load())
+
+    def visit_Tuple(self, node):
+        nodes = [self.visit(n) for n in node.nodes]
+        return self._new(_ast.Tuple, nodes, _ast.Load())
+
+    def visit_List(self, node):
+        nodes = [self.visit(n) for n in node.nodes]
+        return self._new(_ast.List, nodes, _ast.Load())
+
+    def get_ctx(self, flags):
+        if flags == 'OP_DELETE':
+            return _ast.Del()
+        elif flags == 'OP_APPLY':
+            return _ast.Load()
+        elif flags == 'OP_ASSIGN':
+            return _ast.Store()
+        else:
+            # FIXME Exception here
+            assert False, repr(flags)
+
+    def visit_AssName(self, node):
+        self.out_flags = node.flags
+        ctx = self.get_ctx(node.flags)
+        return self._new(_ast.Name, node.name, ctx)
+
+    def visit_AssAttr(self, node):
+        self.out_flags = node.flags
+        ctx = self.get_ctx(node.flags)
+        return self._new(_ast.Attribute, self.visit(node.expr), 
+                         node.attrname, ctx)
+
+    def _visit_AssCollection(cls):
+        def _visit(self, node):
+            flags = None
+            elts = []
+            for n in node.nodes:
+                elts.append(self.visit(n))
+                if flags is None:
+                    flags = self.out_flags
+                else:
+                    assert flags == self.out_flags
+            self.out_flags = flags
+            ctx = self.get_ctx(flags)
+            return self._new(cls, elts, ctx)
+        return _visit
+
+    visit_AssList = _visit_AssCollection(_ast.List)
+    visit_AssTuple = _visit_AssCollection(_ast.Tuple)
+    del _visit_AssCollection
+
+    def visit_Slice(self, node):
+        lower = self.visit(node.lower)
+        upper = self.visit(node.upper)
+        ctx = self.get_ctx(node.flags)
+        self.out_flags = node.flags
+        return self._new(_ast.Subscript, self.visit(node.expr),
+                    self._new(_ast.Slice, lower, upper, None), ctx)
+
+    def visit_Subscript(self, node):
+        ctx = self.get_ctx(node.flags)
+        subs = [self.visit(s) for s in node.subs]
+
+        advanced = (_ast.Slice, _ast.Ellipsis)
+        slices = []
+        nonindex = False
+        for sub in subs:
+            if isinstance(sub, advanced):
+                nonindex = True
+                slices.append(sub)
+            else:
+                slices.append(self._new(_ast.Index, sub))
+        if len(slices) == 1:
+            slice = slices[0]
+        elif nonindex:
+            slice = self._new(_ast.ExtSlice, slices)
+        else:
+            slice = self._new(_ast.Tuple, slices, _ast.Load())
+
+        self.out_flags = node.flags
+        return self._new(_ast.Subscript, self.visit(node.expr), slice, ctx)
+
+    def visit_Sliceobj(self, node):
+        a = [self.visit(n) for n in node.nodes + [None]*(3 - len(node.nodes))]
+        return self._new(_ast.Slice, a[0], a[1], a[2])
+
+    def visit_Ellipsis(self, node):
+        return self._new(_ast.Ellipsis)
+
+    def visit_Stmt(self, node):
+        def _check_del(n):
+            # del x is just AssName('x', 'OP_DELETE')
+            # we want to transform it to Delete([Name('x', Del())])
+            dcls = (_ast.Name, _ast.List, _ast.Subscript, _ast.Attribute)
+            if isinstance(n, dcls) and isinstance(n.ctx, _ast.Del):
+                return self._new(_ast.Delete, [n])
+            elif isinstance(n, _ast.Tuple) and isinstance(n.ctx, _ast.Del):
+                # unpack last tuple to avoid making del (x, y, z,);
+                # out of del x, y, z; (there's no difference between
+                # this two in compiler.ast)
+                return self._new(_ast.Delete, n.elts)
+            else:
+                return n
+        def _keep(n):
+            if isinstance(n, _ast.Expr) and n.value is None:
+                return False
+            else:
+                return True
+        return [s for s in [_check_del(self.visit(n)) for n in node.nodes]
+                if _keep(s)]
+
+
+def parse(source, mode):
+    node = compiler.parse(source, mode)
+    return ASTUpgrader().visit(node)
diff --git a/genshi/template/astutil.py b/genshi/template/astutil.py
new file mode 100644
index 0000000..c3ad107
--- /dev/null
+++ b/genshi/template/astutil.py
@@ -0,0 +1,784 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2008-2010 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""Support classes for generating code from abstract syntax trees."""
+
+try:
+    import _ast
+except ImportError:
+    from genshi.template.ast24 import _ast, parse
+else:
+    def parse(source, mode):
+        return compile(source, '', mode, _ast.PyCF_ONLY_AST)
+
+
+__docformat__ = 'restructuredtext en'
+
+
+class ASTCodeGenerator(object):
+    """General purpose base class for AST transformations.
+
+    Every visitor method can be overridden to return an AST node that has been
+    altered or replaced in some way.
+    """
+    def __init__(self, tree):
+        self.lines_info = []
+        self.line_info = None
+        self.code = ''
+        self.line = None
+        self.last = None
+        self.indent = 0
+        self.blame_stack = []
+        self.visit(tree)
+        if self.line.strip():
+            self.code += self.line + '\n'
+            self.lines_info.append(self.line_info)
+        self.line = None
+        self.line_info = None
+
+    def _change_indent(self, delta):
+        self.indent += delta
+
+    def _new_line(self):
+        if self.line is not None:
+            self.code += self.line + '\n'
+            self.lines_info.append(self.line_info)
+        self.line = ' '*4*self.indent
+        if len(self.blame_stack) == 0:
+            self.line_info = []
+            self.last = None
+        else:
+            self.line_info = [(0, self.blame_stack[-1],)]
+            self.last = self.blame_stack[-1]
+
+    def _write(self, s):
+        if len(s) == 0:
+            return
+        if len(self.blame_stack) == 0:
+            if self.last is not None:
+                self.last = None
+                self.line_info.append((len(self.line), self.last))
+        else:
+            if self.last != self.blame_stack[-1]:
+                self.last = self.blame_stack[-1]
+                self.line_info.append((len(self.line), self.last))
+        self.line += s
+
+    def visit(self, node):
+        if node is None:
+            return None
+        if type(node) is tuple:
+            return tuple([self.visit(n) for n in node])
+        try:
+            self.blame_stack.append((node.lineno, node.col_offset,))
+            info = True
+        except AttributeError:
+            info = False
+        visitor = getattr(self, 'visit_%s' % node.__class__.__name__, None)
+        if visitor is None:
+            raise Exception('Unhandled node type %r' % type(node))
+        ret = visitor(node)
+        if info:
+            self.blame_stack.pop()
+        return ret
+
+    def visit_Module(self, node):
+        for n in node.body:
+            self.visit(n)
+    visit_Interactive = visit_Module
+    visit_Suite = visit_Module
+
+    def visit_Expression(self, node):
+        self._new_line()
+        return self.visit(node.body)
+
+    # arguments = (expr* args, identifier? vararg,
+    #              identifier? kwarg, expr* defaults)
+    def visit_arguments(self, node):
+        first = True
+        no_default_count = len(node.args) - len(node.defaults)
+        for i, arg in enumerate(node.args):
+            if not first:
+                self._write(', ')
+            else:
+                first = False
+            self.visit(arg)
+            if i >= no_default_count:
+                self._write('=')
+                self.visit(node.defaults[i - no_default_count])
+        if getattr(node, 'vararg', None):
+            if not first:
+                self._write(', ')
+            else:
+                first = False
+            self._write('*' + node.vararg)
+        if getattr(node, 'kwarg', None):
+            if not first:
+                self._write(', ')
+            else:
+                first = False
+            self._write('**' + node.kwarg)
+
+    # FunctionDef(identifier name, arguments args,
+    #                           stmt* body, expr* decorator_list)
+    def visit_FunctionDef(self, node):
+        decarators = ()
+        if hasattr(node, 'decorator_list'):
+            decorators = getattr(node, 'decorator_list')
+        else: # different name in earlier Python versions
+            decorators = getattr(node, 'decorators', ())
+        for decorator in decorators:
+            self._new_line()
+            self._write('@')
+            self.visit(decorator)
+        self._new_line()
+        self._write('def ' + node.name + '(')
+        self.visit(node.args)
+        self._write('):')
+        self._change_indent(1)
+        for statement in node.body:
+            self.visit(statement)
+        self._change_indent(-1)
+
+    # ClassDef(identifier name, expr* bases, stmt* body)
+    def visit_ClassDef(self, node):
+        self._new_line()
+        self._write('class ' + node.name)
+        if node.bases:
+            self._write('(')
+            self.visit(node.bases[0])
+            for base in node.bases[1:]:
+                self._write(', ')
+                self.visit(base)
+            self._write(')')
+        self._write(':')
+        self._change_indent(1)
+        for statement in node.body:
+            self.visit(statement)
+        self._change_indent(-1)
+
+    # Return(expr? value)
+    def visit_Return(self, node):
+        self._new_line()
+        self._write('return')
+        if getattr(node, 'value', None):
+            self._write(' ')
+            self.visit(node.value)
+
+    # Delete(expr* targets)
+    def visit_Delete(self, node):
+        self._new_line()
+        self._write('del ')
+        self.visit(node.targets[0])
+        for target in node.targets[1:]:
+            self._write(', ')
+            self.visit(target)
+
+    # Assign(expr* targets, expr value)
+    def visit_Assign(self, node):
+        self._new_line()
+        for target in node.targets:
+            self.visit(target)
+            self._write(' = ')
+        self.visit(node.value)
+
+    # AugAssign(expr target, operator op, expr value)
+    def visit_AugAssign(self, node):
+        self._new_line()
+        self.visit(node.target)
+        self._write(' ' + self.binary_operators[node.op.__class__] + '= ')
+        self.visit(node.value)
+
+    # Print(expr? dest, expr* values, bool nl)
+    def visit_Print(self, node):
+        self._new_line()
+        self._write('print')
+        if getattr(node, 'dest', None):
+            self._write(' >> ')
+            self.visit(node.dest)
+            if getattr(node, 'values', None):
+                self._write(', ')
+        else:
+            self._write(' ')
+        if getattr(node, 'values', None):
+            self.visit(node.values[0])
+            for value in node.values[1:]:
+                self._write(', ')
+                self.visit(value)
+        if not node.nl:
+            self._write(',')
+
+    # For(expr target, expr iter, stmt* body, stmt* orelse)
+    def visit_For(self, node):
+        self._new_line()
+        self._write('for ')
+        self.visit(node.target)
+        self._write(' in ')
+        self.visit(node.iter)
+        self._write(':')
+        self._change_indent(1)
+        for statement in node.body:
+            self.visit(statement)
+        self._change_indent(-1)
+        if getattr(node, 'orelse', None):
+            self._new_line()
+            self._write('else:')
+            self._change_indent(1)
+            for statement in node.orelse:
+                self.visit(statement)
+            self._change_indent(-1)
+
+    # While(expr test, stmt* body, stmt* orelse)
+    def visit_While(self, node):
+        self._new_line()
+        self._write('while ')
+        self.visit(node.test)
+        self._write(':')
+        self._change_indent(1)
+        for statement in node.body:
+            self.visit(statement)
+        self._change_indent(-1)
+        if getattr(node, 'orelse', None):
+            self._new_line()
+            self._write('else:')
+            self._change_indent(1)
+            for statement in node.orelse:
+                self.visit(statement)
+            self._change_indent(-1)
+
+    # If(expr test, stmt* body, stmt* orelse)
+    def visit_If(self, node):
+        self._new_line()
+        self._write('if ')
+        self.visit(node.test)
+        self._write(':')
+        self._change_indent(1)
+        for statement in node.body:
+            self.visit(statement)
+        self._change_indent(-1)
+        if getattr(node, 'orelse', None):
+            self._new_line()
+            self._write('else:')
+            self._change_indent(1)
+            for statement in node.orelse:
+                self.visit(statement)
+            self._change_indent(-1)
+
+    # With(expr context_expr, expr? optional_vars, stmt* body)
+    def visit_With(self, node):
+        self._new_line()
+        self._write('with ')
+        self.visit(node.context_expr)
+        if getattr(node, 'optional_vars', None):
+            self._write(' as ')
+            self.visit(node.optional_vars)
+        self._write(':')
+        self._change_indent(1)
+        for statement in node.body:
+            self.visit(statement)
+        self._change_indent(-1)
+
+
+    # Raise(expr? type, expr? inst, expr? tback)
+    def visit_Raise(self, node):
+        self._new_line()
+        self._write('raise')
+        if not node.type:
+            return
+        self._write(' ')
+        self.visit(node.type)
+        if not node.inst:
+            return
+        self._write(', ')
+        self.visit(node.inst)
+        if not node.tback:
+            return
+        self._write(', ')
+        self.visit(node.tback)
+
+    # TryExcept(stmt* body, excepthandler* handlers, stmt* orelse)
+    def visit_TryExcept(self, node):
+        self._new_line()
+        self._write('try:')
+        self._change_indent(1)
+        for statement in node.body:
+            self.visit(statement)
+        self._change_indent(-1)
+        if getattr(node, 'handlers', None):
+            for handler in node.handlers:
+                self.visit(handler)
+        self._new_line()
+        if getattr(node, 'orelse', None):
+            self._write('else:')
+            self._change_indent(1)
+            for statement in node.orelse:
+                self.visit(statement)
+            self._change_indent(-1)
+
+    # excepthandler = (expr? type, expr? name, stmt* body)
+    def visit_ExceptHandler(self, node):
+        self._new_line()
+        self._write('except')
+        if getattr(node, 'type', None):
+            self._write(' ')
+            self.visit(node.type)
+        if getattr(node, 'name', None):
+            self._write(', ')
+            self.visit(node.name)
+        self._write(':')
+        self._change_indent(1)
+        for statement in node.body:
+            self.visit(statement)
+        self._change_indent(-1)
+    visit_excepthandler = visit_ExceptHandler
+
+    # TryFinally(stmt* body, stmt* finalbody)
+    def visit_TryFinally(self, node):
+        self._new_line()
+        self._write('try:')
+        self._change_indent(1)
+        for statement in node.body:
+            self.visit(statement)
+        self._change_indent(-1)
+
+        if getattr(node, 'finalbody', None):
+            self._new_line()
+            self._write('finally:')
+            self._change_indent(1)
+            for statement in node.finalbody:
+                self.visit(statement)
+            self._change_indent(-1)
+
+    # Assert(expr test, expr? msg)
+    def visit_Assert(self, node):
+        self._new_line()
+        self._write('assert ')
+        self.visit(node.test)
+        if getattr(node, 'msg', None):
+            self._write(', ')
+            self.visit(node.msg)
+
+    def visit_alias(self, node):
+        self._write(node.name)
+        if getattr(node, 'asname', None):
+            self._write(' as ')
+            self._write(node.asname)
+
+    # Import(alias* names)
+    def visit_Import(self, node):
+        self._new_line()
+        self._write('import ')
+        self.visit(node.names[0])
+        for name in node.names[1:]:
+            self._write(', ')
+            self.visit(name)
+
+    # ImportFrom(identifier module, alias* names, int? level)
+    def visit_ImportFrom(self, node):
+        self._new_line()
+        self._write('from ')
+        if node.level:
+            self._write('.' * node.level)
+        self._write(node.module)
+        self._write(' import ')
+        self.visit(node.names[0])
+        for name in node.names[1:]:
+            self._write(', ')
+            self.visit(name)
+
+    # Exec(expr body, expr? globals, expr? locals)
+    def visit_Exec(self, node):
+        self._new_line()
+        self._write('exec ')
+        self.visit(node.body)
+        if not node.globals:
+            return
+        self._write(', ')
+        self.visit(node.globals)
+        if not node.locals:
+            return
+        self._write(', ')
+        self.visit(node.locals)
+
+    # Global(identifier* names)
+    def visit_Global(self, node):
+        self._new_line()
+        self._write('global ')
+        self.visit(node.names[0])
+        for name in node.names[1:]:
+            self._write(', ')
+            self.visit(name)
+
+    # Expr(expr value)
+    def visit_Expr(self, node):
+        self._new_line()
+        self.visit(node.value)
+
+    # Pass
+    def visit_Pass(self, node):
+        self._new_line()
+        self._write('pass')
+
+    # Break
+    def visit_Break(self, node):
+        self._new_line()
+        self._write('break')
+
+    # Continue
+    def visit_Continue(self, node):
+        self._new_line()
+        self._write('continue')
+
+    ### EXPRESSIONS
+    def with_parens(f):
+        def _f(self, node):
+            self._write('(')
+            f(self, node)
+            self._write(')')
+        return _f
+
+    bool_operators = {_ast.And: 'and', _ast.Or: 'or'}
+
+    # BoolOp(boolop op, expr* values)
+    @with_parens
+    def visit_BoolOp(self, node):
+        joiner = ' ' + self.bool_operators[node.op.__class__] + ' '
+        self.visit(node.values[0])
+        for value in node.values[1:]:
+            self._write(joiner)
+            self.visit(value)
+
+    binary_operators = {
+        _ast.Add: '+',
+        _ast.Sub: '-',
+        _ast.Mult: '*',
+        _ast.Div: '/',
+        _ast.Mod: '%',
+        _ast.Pow: '**',
+        _ast.LShift: '<<',
+        _ast.RShift: '>>',
+        _ast.BitOr: '|',
+        _ast.BitXor: '^',
+        _ast.BitAnd: '&',
+        _ast.FloorDiv: '//'
+    }
+
+    # BinOp(expr left, operator op, expr right)
+    @with_parens
+    def visit_BinOp(self, node):
+        self.visit(node.left)
+        self._write(' ' + self.binary_operators[node.op.__class__] + ' ')
+        self.visit(node.right)
+
+    unary_operators = {
+        _ast.Invert: '~',
+        _ast.Not: 'not',
+        _ast.UAdd: '+',
+        _ast.USub: '-',
+    }
+
+    # UnaryOp(unaryop op, expr operand)
+    def visit_UnaryOp(self, node):
+        self._write(self.unary_operators[node.op.__class__] + ' ')
+        self.visit(node.operand)
+
+    # Lambda(arguments args, expr body)
+    @with_parens
+    def visit_Lambda(self, node):
+        self._write('lambda ')
+        self.visit(node.args)
+        self._write(': ')
+        self.visit(node.body)
+
+    # IfExp(expr test, expr body, expr orelse)
+    @with_parens
+    def visit_IfExp(self, node):
+        self.visit(node.body)
+        self._write(' if ')
+        self.visit(node.test)
+        self._write(' else ')
+        self.visit(node.orelse)
+
+    # Dict(expr* keys, expr* values)
+    def visit_Dict(self, node):
+        self._write('{')
+        for key, value in zip(node.keys, node.values):
+            self.visit(key)
+            self._write(': ')
+            self.visit(value)
+            self._write(', ')
+        self._write('}')
+
+    # ListComp(expr elt, comprehension* generators)
+    def visit_ListComp(self, node):
+        self._write('[')
+        self.visit(node.elt)
+        for generator in node.generators:
+            # comprehension = (expr target, expr iter, expr* ifs)
+            self._write(' for ')
+            self.visit(generator.target)
+            self._write(' in ')
+            self.visit(generator.iter)
+            for ifexpr in generator.ifs:
+                self._write(' if ')
+                self.visit(ifexpr)
+        self._write(']')
+
+    # GeneratorExp(expr elt, comprehension* generators)
+    def visit_GeneratorExp(self, node):
+        self._write('(')
+        self.visit(node.elt)
+        for generator in node.generators:
+            # comprehension = (expr target, expr iter, expr* ifs)
+            self._write(' for ')
+            self.visit(generator.target)
+            self._write(' in ')
+            self.visit(generator.iter)
+            for ifexpr in generator.ifs:
+                self._write(' if ')
+                self.visit(ifexpr)
+        self._write(')')
+
+    # Yield(expr? value)
+    def visit_Yield(self, node):
+        self._write('yield')
+        if getattr(node, 'value', None):
+            self._write(' ')
+            self.visit(node.value)
+
+    comparision_operators = {
+        _ast.Eq: '==',
+        _ast.NotEq: '!=',
+        _ast.Lt: '<',
+        _ast.LtE: '<=',
+        _ast.Gt: '>',
+        _ast.GtE: '>=',
+        _ast.Is: 'is',
+        _ast.IsNot: 'is not',
+        _ast.In: 'in',
+        _ast.NotIn: 'not in',
+    }
+
+    # Compare(expr left, cmpop* ops, expr* comparators)
+    @with_parens
+    def visit_Compare(self, node):
+        self.visit(node.left)
+        for op, comparator in zip(node.ops, node.comparators):
+            self._write(' ' + self.comparision_operators[op.__class__] + ' ')
+            self.visit(comparator)
+
+    # Call(expr func, expr* args, keyword* keywords,
+    #                         expr? starargs, expr? kwargs)
+    def visit_Call(self, node):
+        self.visit(node.func)
+        self._write('(')
+        first = True
+        for arg in node.args:
+            if not first:
+                self._write(', ')
+            first = False
+            self.visit(arg)
+
+        for keyword in node.keywords:
+            if not first:
+                self._write(', ')
+            first = False
+            # keyword = (identifier arg, expr value)
+            self._write(keyword.arg)
+            self._write('=')
+            self.visit(keyword.value)
+        if getattr(node, 'starargs', None):
+            if not first:
+                self._write(', ')
+            first = False
+            self._write('*')
+            self.visit(node.starargs)
+
+        if getattr(node, 'kwargs', None):
+            if not first:
+                self._write(', ')
+            first = False
+            self._write('**')
+            self.visit(node.kwargs)
+        self._write(')')
+
+    # Repr(expr value)
+    def visit_Repr(self, node):
+        self._write('`')
+        self.visit(node.value)
+        self._write('`')
+
+    # Num(object n)
+    def visit_Num(self, node):
+        self._write(repr(node.n))
+
+    # Str(string s)
+    def visit_Str(self, node):
+        self._write(repr(node.s))
+
+    # Attribute(expr value, identifier attr, expr_context ctx)
+    def visit_Attribute(self, node):
+        self.visit(node.value)
+        self._write('.')
+        self._write(node.attr)
+
+    # Subscript(expr value, slice slice, expr_context ctx)
+    def visit_Subscript(self, node):
+        self.visit(node.value)
+        self._write('[')
+        def _process_slice(node):
+            if isinstance(node, _ast.Ellipsis):
+                self._write('...')
+            elif isinstance(node, _ast.Slice):
+                if getattr(node, 'lower', 'None'):
+                    self.visit(node.lower)
+                self._write(':')
+                if getattr(node, 'upper', None):
+                    self.visit(node.upper)
+                if getattr(node, 'step', None):
+                    self._write(':')
+                    self.visit(node.step)
+            elif isinstance(node, _ast.Index):
+                self.visit(node.value)
+            elif isinstance(node, _ast.ExtSlice):
+                self.visit(node.dims[0])
+                for dim in node.dims[1:]:
+                    self._write(', ')
+                    self.visit(dim)
+            else:
+                raise NotImplemented('Slice type not implemented')
+        _process_slice(node.slice)
+        self._write(']')
+
+    # Name(identifier id, expr_context ctx)
+    def visit_Name(self, node):
+        self._write(node.id)
+
+    # List(expr* elts, expr_context ctx)
+    def visit_List(self, node):
+        self._write('[')
+        for elt in node.elts:
+            self.visit(elt)
+            self._write(', ')
+        self._write(']')
+
+    # Tuple(expr *elts, expr_context ctx)
+    def visit_Tuple(self, node):
+        self._write('(')
+        for elt in node.elts:
+            self.visit(elt)
+            self._write(', ')
+        self._write(')')
+
+
+class ASTTransformer(object):
+    """General purpose base class for AST transformations.
+    
+    Every visitor method can be overridden to return an AST node that has been
+    altered or replaced in some way.
+    """
+
+    def visit(self, node):
+        if node is None:
+            return None
+        if type(node) is tuple:
+            return tuple([self.visit(n) for n in node])
+        visitor = getattr(self, 'visit_%s' % node.__class__.__name__, None)
+        if visitor is None:
+            return node
+        return visitor(node)
+
+    def _clone(self, node):
+        clone = node.__class__()
+        for name in getattr(clone, '_attributes', ()):
+            try:
+                setattr(clone, 'name', getattr(node, name))
+            except AttributeError:
+                pass
+        for name in clone._fields:
+            try:
+                value = getattr(node, name)
+            except AttributeError:
+                pass
+            else:
+                if value is None:
+                    pass
+                elif isinstance(value, list):
+                    value = [self.visit(x) for x in value]
+                elif isinstance(value, tuple):
+                    value = tuple(self.visit(x) for x in value)
+                else: 
+                    value = self.visit(value)
+                setattr(clone, name, value)
+        return clone
+
+    visit_Module = _clone
+    visit_Interactive = _clone
+    visit_Expression = _clone
+    visit_Suite = _clone
+
+    visit_FunctionDef = _clone
+    visit_ClassDef = _clone
+    visit_Return = _clone
+    visit_Delete = _clone
+    visit_Assign = _clone
+    visit_AugAssign = _clone
+    visit_Print = _clone
+    visit_For = _clone
+    visit_While = _clone
+    visit_If = _clone
+    visit_With = _clone
+    visit_Raise = _clone
+    visit_TryExcept = _clone
+    visit_TryFinally = _clone
+    visit_Assert = _clone
+    visit_ExceptHandler = _clone
+
+    visit_Import = _clone
+    visit_ImportFrom = _clone
+    visit_Exec = _clone
+    visit_Global = _clone
+    visit_Expr = _clone
+    # Pass, Break, Continue don't need to be copied
+
+    visit_BoolOp = _clone
+    visit_BinOp = _clone
+    visit_UnaryOp = _clone
+    visit_Lambda = _clone
+    visit_IfExp = _clone
+    visit_Dict = _clone
+    visit_ListComp = _clone
+    visit_GeneratorExp = _clone
+    visit_Yield = _clone
+    visit_Compare = _clone
+    visit_Call = _clone
+    visit_Repr = _clone
+    # Num, Str don't need to be copied
+
+    visit_Attribute = _clone
+    visit_Subscript = _clone
+    visit_Name = _clone
+    visit_List = _clone
+    visit_Tuple = _clone
+
+    visit_comprehension = _clone
+    visit_excepthandler = _clone
+    visit_arguments = _clone
+    visit_keyword = _clone
+    visit_alias = _clone
+
+    visit_Slice = _clone
+    visit_ExtSlice = _clone
+    visit_Index = _clone
+
+    del _clone
diff --git a/genshi/template/base.py b/genshi/template/base.py
new file mode 100644
index 0000000..202faae
--- /dev/null
+++ b/genshi/template/base.py
@@ -0,0 +1,634 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006-2010 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""Basic templating functionality."""
+
+from collections import deque
+import os
+from StringIO import StringIO
+import sys
+
+from genshi.core import Attrs, Stream, StreamEventKind, START, TEXT, _ensure
+from genshi.input import ParseError
+
+__all__ = ['Context', 'DirectiveFactory', 'Template', 'TemplateError',
+           'TemplateRuntimeError', 'TemplateSyntaxError', 'BadDirectiveError']
+__docformat__ = 'restructuredtext en'
+
+
+class TemplateError(Exception):
+    """Base exception class for errors related to template processing."""
+
+    def __init__(self, message, filename=None, lineno=-1, offset=-1):
+        """Create the exception.
+        
+        :param message: the error message
+        :param filename: the filename of the template
+        :param lineno: the number of line in the template at which the error
+                       occurred
+        :param offset: the column number at which the error occurred
+        """
+        if filename is None:
+            filename = '<string>'
+        self.msg = message #: the error message string
+        if filename != '<string>' or lineno >= 0:
+            message = '%s (%s, line %d)' % (self.msg, filename, lineno)
+        Exception.__init__(self, message)
+        self.filename = filename #: the name of the template file
+        self.lineno = lineno #: the number of the line containing the error
+        self.offset = offset #: the offset on the line
+
+
+class TemplateSyntaxError(TemplateError):
+    """Exception raised when an expression in a template causes a Python syntax
+    error, or the template is not well-formed.
+    """
+
+    def __init__(self, message, filename=None, lineno=-1, offset=-1):
+        """Create the exception
+        
+        :param message: the error message
+        :param filename: the filename of the template
+        :param lineno: the number of line in the template at which the error
+                       occurred
+        :param offset: the column number at which the error occurred
+        """
+        if isinstance(message, SyntaxError) and message.lineno is not None:
+            message = str(message).replace(' (line %d)' % message.lineno, '')
+        TemplateError.__init__(self, message, filename, lineno)
+
+
+class BadDirectiveError(TemplateSyntaxError):
+    """Exception raised when an unknown directive is encountered when parsing
+    a template.
+    
+    An unknown directive is any attribute using the namespace for directives,
+    with a local name that doesn't match any registered directive.
+    """
+
+    def __init__(self, name, filename=None, lineno=-1):
+        """Create the exception
+        
+        :param name: the name of the directive
+        :param filename: the filename of the template
+        :param lineno: the number of line in the template at which the error
+                       occurred
+        """
+        TemplateSyntaxError.__init__(self, 'bad directive "%s"' % name,
+                                     filename, lineno)
+
+
+class TemplateRuntimeError(TemplateError):
+    """Exception raised when an the evaluation of a Python expression in a
+    template causes an error.
+    """
+
+
+class Context(object):
+    """Container for template input data.
+    
+    A context provides a stack of scopes (represented by dictionaries).
+    
+    Template directives such as loops can push a new scope on the stack with
+    data that should only be available inside the loop. When the loop
+    terminates, that scope can get popped off the stack again.
+    
+    >>> ctxt = Context(one='foo', other=1)
+    >>> ctxt.get('one')
+    'foo'
+    >>> ctxt.get('other')
+    1
+    >>> ctxt.push(dict(one='frost'))
+    >>> ctxt.get('one')
+    'frost'
+    >>> ctxt.get('other')
+    1
+    >>> ctxt.pop()
+    {'one': 'frost'}
+    >>> ctxt.get('one')
+    'foo'
+    """
+
+    def __init__(self, **data):
+        """Initialize the template context with the given keyword arguments as
+        data.
+        """
+        self.frames = deque([data])
+        self.pop = self.frames.popleft
+        self.push = self.frames.appendleft
+        self._match_templates = []
+        self._choice_stack = []
+
+        # Helper functions for use in expressions
+        def defined(name):
+            """Return whether a variable with the specified name exists in the
+            expression scope."""
+            return name in self
+        def value_of(name, default=None):
+            """If a variable of the specified name is defined, return its value.
+            Otherwise, return the provided default value, or ``None``."""
+            return self.get(name, default)
+        data.setdefault('defined', defined)
+        data.setdefault('value_of', value_of)
+
+    def __repr__(self):
+        return repr(list(self.frames))
+
+    def __contains__(self, key):
+        """Return whether a variable exists in any of the scopes.
+        
+        :param key: the name of the variable
+        """
+        return self._find(key)[1] is not None
+    has_key = __contains__
+
+    def __delitem__(self, key):
+        """Remove a variable from all scopes.
+        
+        :param key: the name of the variable
+        """
+        for frame in self.frames:
+            if key in frame:
+                del frame[key]
+
+    def __getitem__(self, key):
+        """Get a variables's value, starting at the current scope and going
+        upward.
+        
+        :param key: the name of the variable
+        :return: the variable value
+        :raises KeyError: if the requested variable wasn't found in any scope
+        """
+        value, frame = self._find(key)
+        if frame is None:
+            raise KeyError(key)
+        return value
+
+    def __len__(self):
+        """Return the number of distinctly named variables in the context.
+        
+        :return: the number of variables in the context
+        """
+        return len(self.items())
+
+    def __setitem__(self, key, value):
+        """Set a variable in the current scope.
+        
+        :param key: the name of the variable
+        :param value: the variable value
+        """
+        self.frames[0][key] = value
+
+    def _find(self, key, default=None):
+        """Retrieve a given variable's value and the frame it was found in.
+
+        Intended primarily for internal use by directives.
+        
+        :param key: the name of the variable
+        :param default: the default value to return when the variable is not
+                        found
+        """
+        for frame in self.frames:
+            if key in frame:
+                return frame[key], frame
+        return default, None
+
+    def get(self, key, default=None):
+        """Get a variable's value, starting at the current scope and going
+        upward.
+        
+        :param key: the name of the variable
+        :param default: the default value to return when the variable is not
+                        found
+        """
+        for frame in self.frames:
+            if key in frame:
+                return frame[key]
+        return default
+
+    def keys(self):
+        """Return the name of all variables in the context.
+        
+        :return: a list of variable names
+        """
+        keys = []
+        for frame in self.frames:
+            keys += [key for key in frame if key not in keys]
+        return keys
+
+    def items(self):
+        """Return a list of ``(name, value)`` tuples for all variables in the
+        context.
+        
+        :return: a list of variables
+        """
+        return [(key, self.get(key)) for key in self.keys()]
+
+    def update(self, mapping):
+        """Update the context from the mapping provided."""
+        self.frames[0].update(mapping)
+
+    def push(self, data):
+        """Push a new scope on the stack.
+        
+        :param data: the data dictionary to push on the context stack.
+        """
+
+    def pop(self):
+        """Pop the top-most scope from the stack."""
+
+
+def _apply_directives(stream, directives, ctxt, vars):
+    """Apply the given directives to the stream.
+    
+    :param stream: the stream the directives should be applied to
+    :param directives: the list of directives to apply
+    :param ctxt: the `Context`
+    :param vars: additional variables that should be available when Python
+                 code is executed
+    :return: the stream with the given directives applied
+    """
+    if directives:
+        stream = directives[0](iter(stream), directives[1:], ctxt, **vars)
+    return stream
+
+
+def _eval_expr(expr, ctxt, vars=None):
+    """Evaluate the given `Expression` object.
+    
+    :param expr: the expression to evaluate
+    :param ctxt: the `Context`
+    :param vars: additional variables that should be available to the
+                 expression
+    :return: the result of the evaluation
+    """
+    if vars:
+        ctxt.push(vars)
+    retval = expr.evaluate(ctxt)
+    if vars:
+        ctxt.pop()
+    return retval
+
+
+def _exec_suite(suite, ctxt, vars=None):
+    """Execute the given `Suite` object.
+    
+    :param suite: the code suite to execute
+    :param ctxt: the `Context`
+    :param vars: additional variables that should be available to the
+                 code
+    """
+    if vars:
+        ctxt.push(vars)
+        ctxt.push({})
+    suite.execute(ctxt)
+    if vars:
+        top = ctxt.pop()
+        ctxt.pop()
+        ctxt.frames[0].update(top)
+
+
+class DirectiveFactoryMeta(type):
+    """Meta class for directive factories."""
+
+    def __new__(cls, name, bases, d):
+        if 'directives' in d:
+            d['_dir_by_name'] = dict(d['directives'])
+            d['_dir_order'] = [directive[1] for directive in d['directives']]
+
+        return type.__new__(cls, name, bases, d)
+
+
+class DirectiveFactory(object):
+    """Base for classes that provide a set of template directives.
+    
+    :since: version 0.6
+    """
+    __metaclass__ = DirectiveFactoryMeta
+
+    directives = []
+    """A list of ``(name, cls)`` tuples that define the set of directives
+    provided by this factory.
+    """
+
+    def get_directive(self, name):
+        """Return the directive class for the given name.
+        
+        :param name: the directive name as used in the template
+        :return: the directive class
+        :see: `Directive`
+        """
+        return self._dir_by_name.get(name)
+
+    def get_directive_index(self, dir_cls):
+        """Return a key for the given directive class that should be used to
+        sort it among other directives on the same `SUB` event.
+        
+        The default implementation simply returns the index of the directive in
+        the `directives` list.
+        
+        :param dir_cls: the directive class
+        :return: the sort key
+        """
+        if dir_cls in self._dir_order:
+            return self._dir_order.index(dir_cls)
+        return len(self._dir_order)
+
+
+class Template(DirectiveFactory):
+    """Abstract template base class.
+    
+    This class implements most of the template processing model, but does not
+    specify the syntax of templates.
+    """
+
+    EXEC = StreamEventKind('EXEC')
+    """Stream event kind representing a Python code suite to execute."""
+
+    EXPR = StreamEventKind('EXPR')
+    """Stream event kind representing a Python expression."""
+
+    INCLUDE = StreamEventKind('INCLUDE')
+    """Stream event kind representing the inclusion of another template."""
+
+    SUB = StreamEventKind('SUB')
+    """Stream event kind representing a nested stream to which one or more
+    directives should be applied.
+    """
+
+    serializer = None
+    _number_conv = unicode # function used to convert numbers to event data
+
+    def __init__(self, source, filepath=None, filename=None, loader=None,
+                 encoding=None, lookup='strict', allow_exec=True):
+        """Initialize a template from either a string, a file-like object, or
+        an already parsed markup stream.
+        
+        :param source: a string, file-like object, or markup stream to read the
+                       template from
+        :param filepath: the absolute path to the template file
+        :param filename: the path to the template file relative to the search
+                         path
+        :param loader: the `TemplateLoader` to use for loading included
+                       templates
+        :param encoding: the encoding of the `source`
+        :param lookup: the variable lookup mechanism; either "strict" (the
+                       default), "lenient", or a custom lookup class
+        :param allow_exec: whether Python code blocks in templates should be
+                           allowed
+        
+        :note: Changed in 0.5: Added the `allow_exec` argument
+        """
+        self.filepath = filepath or filename
+        self.filename = filename
+        self.loader = loader
+        self.lookup = lookup
+        self.allow_exec = allow_exec
+        self._init_filters()
+        self._init_loader()
+        self._prepared = False
+
+        if isinstance(source, basestring):
+            source = StringIO(source)
+        else:
+            source = source
+        try:
+            self._stream = self._parse(source, encoding)
+        except ParseError, e:
+            raise TemplateSyntaxError(e.msg, self.filepath, e.lineno, e.offset)
+
+    def __getstate__(self):
+        state = self.__dict__.copy()
+        state['filters'] = []
+        return state
+
+    def __setstate__(self, state):
+        self.__dict__ = state
+        self._init_filters()
+
+    def __repr__(self):
+        return '<%s "%s">' % (type(self).__name__, self.filename)
+
+    def _init_filters(self):
+        self.filters = [self._flatten, self._include]
+
+    def _init_loader(self):
+        if self.loader is None:
+            from genshi.template.loader import TemplateLoader
+            if self.filename:
+                if self.filepath != self.filename:
+                    basedir = os.path.normpath(self.filepath)[:-len(
+                        os.path.normpath(self.filename))
+                    ]
+                else:
+                    basedir = os.path.dirname(self.filename)
+            else:
+                basedir = '.'
+            self.loader = TemplateLoader([os.path.abspath(basedir)])
+
+    @property
+    def stream(self):
+        if not self._prepared:
+            self._stream = list(self._prepare(self._stream))
+            self._prepared = True
+        return self._stream
+
+    def _parse(self, source, encoding):
+        """Parse the template.
+        
+        The parsing stage parses the template and constructs a list of
+        directives that will be executed in the render stage. The input is
+        split up into literal output (text that does not depend on the context
+        data) and directives or expressions.
+        
+        :param source: a file-like object containing the XML source of the
+                       template, or an XML event stream
+        :param encoding: the encoding of the `source`
+        """
+        raise NotImplementedError
+
+    def _prepare(self, stream):
+        """Call the `attach` method of every directive found in the template.
+        
+        :param stream: the event stream of the template
+        """
+        from genshi.template.loader import TemplateNotFound
+
+        for kind, data, pos in stream:
+            if kind is SUB:
+                directives = []
+                substream = data[1]
+                for _, cls, value, namespaces, pos in sorted(data[0]):
+                    directive, substream = cls.attach(self, substream, value,
+                                                      namespaces, pos)
+                    if directive:
+                        directives.append(directive)
+                substream = self._prepare(substream)
+                if directives:
+                    yield kind, (directives, list(substream)), pos
+                else:
+                    for event in substream:
+                        yield event
+            else:
+                if kind is INCLUDE:
+                    href, cls, fallback = data
+                    if isinstance(href, basestring) and \
+                            not getattr(self.loader, 'auto_reload', True):
+                        # If the path to the included template is static, and
+                        # auto-reloading is disabled on the template loader,
+                        # the template is inlined into the stream
+                        try:
+                            tmpl = self.loader.load(href, relative_to=pos[0],
+                                                    cls=cls or self.__class__)
+                            for event in tmpl.stream:
+                                yield event
+                        except TemplateNotFound:
+                            if fallback is None:
+                                raise
+                            for event in self._prepare(fallback):
+                                yield event
+                        continue
+                    elif fallback:
+                        # Otherwise the include is performed at run time
+                        data = href, cls, list(self._prepare(fallback))
+
+                yield kind, data, pos
+
+    def generate(self, *args, **kwargs):
+        """Apply the template to the given context data.
+        
+        Any keyword arguments are made available to the template as context
+        data.
+        
+        Only one positional argument is accepted: if it is provided, it must be
+        an instance of the `Context` class, and keyword arguments are ignored.
+        This calling style is used for internal processing.
+        
+        :return: a markup event stream representing the result of applying
+                 the template to the context data.
+        """
+        vars = {}
+        if args:
+            assert len(args) == 1
+            ctxt = args[0]
+            if ctxt is None:
+                ctxt = Context(**kwargs)
+            else:
+                vars = kwargs
+            assert isinstance(ctxt, Context)
+        else:
+            ctxt = Context(**kwargs)
+
+        stream = self.stream
+        for filter_ in self.filters:
+            stream = filter_(iter(stream), ctxt, **vars)
+        return Stream(stream, self.serializer)
+
+    def _flatten(self, stream, ctxt, **vars):
+        number_conv = self._number_conv
+        stack = []
+        push = stack.append
+        pop = stack.pop
+        stream = iter(stream)
+
+        while 1:
+            for kind, data, pos in stream:
+
+                if kind is START and data[1]:
+                    # Attributes may still contain expressions in start tags at
+                    # this point, so do some evaluation
+                    tag, attrs = data
+                    new_attrs = []
+                    for name, value in attrs:
+                        if type(value) is list: # this is an interpolated string
+                            values = [event[1]
+                                for event in self._flatten(value, ctxt, **vars)
+                                if event[0] is TEXT and event[1] is not None
+                            ]
+                            if not values:
+                                continue
+                            value = ''.join(values)
+                        new_attrs.append((name, value))
+                    yield kind, (tag, Attrs(new_attrs)), pos
+
+                elif kind is EXPR:
+                    result = _eval_expr(data, ctxt, vars)
+                    if result is not None:
+                        # First check for a string, otherwise the iterable test
+                        # below succeeds, and the string will be chopped up into
+                        # individual characters
+                        if isinstance(result, basestring):
+                            yield TEXT, result, pos
+                        elif isinstance(result, (int, float, long)):
+                            yield TEXT, number_conv(result), pos
+                        elif hasattr(result, '__iter__'):
+                            push(stream)
+                            stream = _ensure(result)
+                            break
+                        else:
+                            yield TEXT, unicode(result), pos
+
+                elif kind is SUB:
+                    # This event is a list of directives and a list of nested
+                    # events to which those directives should be applied
+                    push(stream)
+                    stream = _apply_directives(data[1], data[0], ctxt, vars)
+                    break
+
+                elif kind is EXEC:
+                    _exec_suite(data, ctxt, vars)
+
+                else:
+                    yield kind, data, pos
+
+            else:
+                if not stack:
+                    break
+                stream = pop()
+
+    def _include(self, stream, ctxt, **vars):
+        """Internal stream filter that performs inclusion of external
+        template files.
+        """
+        from genshi.template.loader import TemplateNotFound
+
+        for event in stream:
+            if event[0] is INCLUDE:
+                href, cls, fallback = event[1]
+                if not isinstance(href, basestring):
+                    parts = []
+                    for subkind, subdata, subpos in self._flatten(href, ctxt,
+                                                                  **vars):
+                        if subkind is TEXT:
+                            parts.append(subdata)
+                    href = ''.join([x for x in parts if x is not None])
+                try:
+                    tmpl = self.loader.load(href, relative_to=event[2][0],
+                                            cls=cls or self.__class__)
+                    for event in tmpl.generate(ctxt, **vars):
+                        yield event
+                except TemplateNotFound:
+                    if fallback is None:
+                        raise
+                    for filter_ in self.filters:
+                        fallback = filter_(iter(fallback), ctxt, **vars)
+                    for event in fallback:
+                        yield event
+            else:
+                yield event
+
+
+EXEC = Template.EXEC
+EXPR = Template.EXPR
+INCLUDE = Template.INCLUDE
+SUB = Template.SUB
diff --git a/genshi/template/directives.py b/genshi/template/directives.py
new file mode 100644
index 0000000..e2c9424
--- /dev/null
+++ b/genshi/template/directives.py
@@ -0,0 +1,725 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006-2009 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""Implementation of the various template directives."""
+
+from genshi.core import QName, Stream
+from genshi.path import Path
+from genshi.template.base import TemplateRuntimeError, TemplateSyntaxError, \
+                                 EXPR, _apply_directives, _eval_expr
+from genshi.template.eval import Expression, ExpressionASTTransformer, \
+                                 _ast, _parse
+
+__all__ = ['AttrsDirective', 'ChooseDirective', 'ContentDirective',
+           'DefDirective', 'ForDirective', 'IfDirective', 'MatchDirective',
+           'OtherwiseDirective', 'ReplaceDirective', 'StripDirective',
+           'WhenDirective', 'WithDirective']
+__docformat__ = 'restructuredtext en'
+
+
+class DirectiveMeta(type):
+    """Meta class for template directives."""
+
+    def __new__(cls, name, bases, d):
+        d['tagname'] = name.lower().replace('directive', '')
+        return type.__new__(cls, name, bases, d)
+
+
+class Directive(object):
+    """Abstract base class for template directives.
+    
+    A directive is basically a callable that takes three positional arguments:
+    ``ctxt`` is the template data context, ``stream`` is an iterable over the
+    events that the directive applies to, and ``directives`` is is a list of
+    other directives on the same stream that need to be applied.
+    
+    Directives can be "anonymous" or "registered". Registered directives can be
+    applied by the template author using an XML attribute with the
+    corresponding name in the template. Such directives should be subclasses of
+    this base class that can  be instantiated with the value of the directive
+    attribute as parameter.
+    
+    Anonymous directives are simply functions conforming to the protocol
+    described above, and can only be applied programmatically (for example by
+    template filters).
+    """
+    __metaclass__ = DirectiveMeta
+    __slots__ = ['expr']
+
+    def __init__(self, value, template=None, namespaces=None, lineno=-1,
+                 offset=-1):
+        self.expr = self._parse_expr(value, template, lineno, offset)
+
+    @classmethod
+    def attach(cls, template, stream, value, namespaces, pos):
+        """Called after the template stream has been completely parsed.
+        
+        :param template: the `Template` object
+        :param stream: the event stream associated with the directive
+        :param value: the argument value for the directive; if the directive was
+                      specified as an element, this will be an `Attrs` instance
+                      with all specified attributes, otherwise it will be a
+                      `unicode` object with just the attribute value
+        :param namespaces: a mapping of namespace URIs to prefixes
+        :param pos: a ``(filename, lineno, offset)`` tuple describing the
+                    location where the directive was found in the source
+        
+        This class method should return a ``(directive, stream)`` tuple. If
+        ``directive`` is not ``None``, it should be an instance of the `Directive`
+        class, and gets added to the list of directives applied to the substream
+        at runtime. `stream` is an event stream that replaces the original
+        stream associated with the directive.
+        """
+        return cls(value, template, namespaces, *pos[1:]), stream
+
+    def __call__(self, stream, directives, ctxt, **vars):
+        """Apply the directive to the given stream.
+        
+        :param stream: the event stream
+        :param directives: a list of the remaining directives that should
+                           process the stream
+        :param ctxt: the context data
+        :param vars: additional variables that should be made available when
+                     Python code is executed
+        """
+        raise NotImplementedError
+
+    def __repr__(self):
+        expr = ''
+        if getattr(self, 'expr', None) is not None:
+            expr = ' "%s"' % self.expr.source
+        return '<%s%s>' % (type(self).__name__, expr)
+
+    @classmethod
+    def _parse_expr(cls, expr, template, lineno=-1, offset=-1):
+        """Parses the given expression, raising a useful error message when a
+        syntax error is encountered.
+        """
+        try:
+            return expr and Expression(expr, template.filepath, lineno,
+                                       lookup=template.lookup) or None
+        except SyntaxError, err:
+            err.msg += ' in expression "%s" of "%s" directive' % (expr,
+                                                                  cls.tagname)
+            raise TemplateSyntaxError(err, template.filepath, lineno,
+                                      offset + (err.offset or 0))
+
+
+def _assignment(ast):
+    """Takes the AST representation of an assignment, and returns a
+    function that applies the assignment of a given value to a dictionary.
+    """
+    def _names(node):
+        if isinstance(node, _ast.Tuple):
+            return tuple([_names(child) for child in node.elts])
+        elif isinstance(node, _ast.Name):
+            return node.id
+    def _assign(data, value, names=_names(ast)):
+        if type(names) is tuple:
+            for idx in range(len(names)):
+                _assign(data, value[idx], names[idx])
+        else:
+            data[names] = value
+    return _assign
+
+
+class AttrsDirective(Directive):
+    """Implementation of the ``py:attrs`` template directive.
+    
+    The value of the ``py:attrs`` attribute should be a dictionary or a sequence
+    of ``(name, value)`` tuples. The items in that dictionary or sequence are
+    added as attributes to the element:
+    
+    >>> from genshi.template import MarkupTemplate
+    >>> tmpl = MarkupTemplate('''<ul xmlns:py="http://genshi.edgewall.org/">
+    ...   <li py:attrs="foo">Bar</li>
+    ... </ul>''')
+    >>> print(tmpl.generate(foo={'class': 'collapse'}))
+    <ul>
+      <li class="collapse">Bar</li>
+    </ul>
+    >>> print(tmpl.generate(foo=[('class', 'collapse')]))
+    <ul>
+      <li class="collapse">Bar</li>
+    </ul>
+    
+    If the value evaluates to ``None`` (or any other non-truth value), no
+    attributes are added:
+    
+    >>> print(tmpl.generate(foo=None))
+    <ul>
+      <li>Bar</li>
+    </ul>
+    """
+    __slots__ = []
+
+    def __call__(self, stream, directives, ctxt, **vars):
+        def _generate():
+            kind, (tag, attrib), pos  = stream.next()
+            attrs = _eval_expr(self.expr, ctxt, vars)
+            if attrs:
+                if isinstance(attrs, Stream):
+                    try:
+                        attrs = iter(attrs).next()
+                    except StopIteration:
+                        attrs = []
+                elif not isinstance(attrs, list): # assume it's a dict
+                    attrs = attrs.items()
+                attrib -= [name for name, val in attrs if val is None]
+                attrib |= [(QName(name), unicode(val).strip()) for name, val
+                           in attrs if val is not None]
+            yield kind, (tag, attrib), pos
+            for event in stream:
+                yield event
+
+        return _apply_directives(_generate(), directives, ctxt, vars)
+
+
+class ContentDirective(Directive):
+    """Implementation of the ``py:content`` template directive.
+    
+    This directive replaces the content of the element with the result of
+    evaluating the value of the ``py:content`` attribute:
+    
+    >>> from genshi.template import MarkupTemplate
+    >>> tmpl = MarkupTemplate('''<ul xmlns:py="http://genshi.edgewall.org/">
+    ...   <li py:content="bar">Hello</li>
+    ... </ul>''')
+    >>> print(tmpl.generate(bar='Bye'))
+    <ul>
+      <li>Bye</li>
+    </ul>
+    """
+    __slots__ = []
+
+    @classmethod
+    def attach(cls, template, stream, value, namespaces, pos):
+        if type(value) is dict:
+            raise TemplateSyntaxError('The content directive can not be used '
+                                      'as an element', template.filepath,
+                                      *pos[1:])
+        expr = cls._parse_expr(value, template, *pos[1:])
+        return None, [stream[0], (EXPR, expr, pos),  stream[-1]]
+
+
+class DefDirective(Directive):
+    """Implementation of the ``py:def`` template directive.
+    
+    This directive can be used to create "Named Template Functions", which
+    are template snippets that are not actually output during normal
+    processing, but rather can be expanded from expressions in other places
+    in the template.
+    
+    A named template function can be used just like a normal Python function
+    from template expressions:
+    
+    >>> from genshi.template import MarkupTemplate
+    >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/">
+    ...   <p py:def="echo(greeting, name='world')" class="message">
+    ...     ${greeting}, ${name}!
+    ...   </p>
+    ...   ${echo('Hi', name='you')}
+    ... </div>''')
+    >>> print(tmpl.generate(bar='Bye'))
+    <div>
+      <p class="message">
+        Hi, you!
+      </p>
+    </div>
+    
+    If a function does not require parameters, the parenthesis can be omitted
+    in the definition:
+    
+    >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/">
+    ...   <p py:def="helloworld" class="message">
+    ...     Hello, world!
+    ...   </p>
+    ...   ${helloworld()}
+    ... </div>''')
+    >>> print(tmpl.generate(bar='Bye'))
+    <div>
+      <p class="message">
+        Hello, world!
+      </p>
+    </div>
+    """
+    __slots__ = ['name', 'args', 'star_args', 'dstar_args', 'defaults']
+
+    def __init__(self, args, template, namespaces=None, lineno=-1, offset=-1):
+        Directive.__init__(self, None, template, namespaces, lineno, offset)
+        ast = _parse(args).body
+        self.args = []
+        self.star_args = None
+        self.dstar_args = None
+        self.defaults = {}
+        if isinstance(ast, _ast.Call):
+            self.name = ast.func.id
+            for arg in ast.args:
+                # only names
+                self.args.append(arg.id)
+            for kwd in ast.keywords:
+                self.args.append(kwd.arg)
+                exp = Expression(kwd.value, template.filepath,
+                                 lineno, lookup=template.lookup)
+                self.defaults[kwd.arg] = exp
+            if getattr(ast, 'starargs', None):
+                self.star_args = ast.starargs.id
+            if getattr(ast, 'kwargs', None):
+                self.dstar_args = ast.kwargs.id
+        else:
+            self.name = ast.id
+
+    @classmethod
+    def attach(cls, template, stream, value, namespaces, pos):
+        if type(value) is dict:
+            value = value.get('function')
+        return super(DefDirective, cls).attach(template, stream, value,
+                                               namespaces, pos)
+
+    def __call__(self, stream, directives, ctxt, **vars):
+        stream = list(stream)
+
+        def function(*args, **kwargs):
+            scope = {}
+            args = list(args) # make mutable
+            for name in self.args:
+                if args:
+                    scope[name] = args.pop(0)
+                else:
+                    if name in kwargs:
+                        val = kwargs.pop(name)
+                    else:
+                        val = _eval_expr(self.defaults.get(name), ctxt, vars)
+                    scope[name] = val
+            if not self.star_args is None:
+                scope[self.star_args] = args
+            if not self.dstar_args is None:
+                scope[self.dstar_args] = kwargs
+            ctxt.push(scope)
+            for event in _apply_directives(stream, directives, ctxt, vars):
+                yield event
+            ctxt.pop()
+        function.__name__ = self.name
+
+        # Store the function reference in the bottom context frame so that it
+        # doesn't get popped off before processing the template has finished
+        # FIXME: this makes context data mutable as a side-effect
+        ctxt.frames[-1][self.name] = function
+
+        return []
+
+    def __repr__(self):
+        return '<%s "%s">' % (type(self).__name__, self.name)
+
+
+class ForDirective(Directive):
+    """Implementation of the ``py:for`` template directive for repeating an
+    element based on an iterable in the context data.
+    
+    >>> from genshi.template import MarkupTemplate
+    >>> tmpl = MarkupTemplate('''<ul xmlns:py="http://genshi.edgewall.org/">
+    ...   <li py:for="item in items">${item}</li>
+    ... </ul>''')
+    >>> print(tmpl.generate(items=[1, 2, 3]))
+    <ul>
+      <li>1</li><li>2</li><li>3</li>
+    </ul>
+    """
+    __slots__ = ['assign', 'filename']
+
+    def __init__(self, value, template, namespaces=None, lineno=-1, offset=-1):
+        if ' in ' not in value:
+            raise TemplateSyntaxError('"in" keyword missing in "for" directive',
+                                      template.filepath, lineno, offset)
+        assign, value = value.split(' in ', 1)
+        ast = _parse(assign, 'exec')
+        value = 'iter(%s)' % value.strip()
+        self.assign = _assignment(ast.body[0].value)
+        self.filename = template.filepath
+        Directive.__init__(self, value, template, namespaces, lineno, offset)
+
+    @classmethod
+    def attach(cls, template, stream, value, namespaces, pos):
+        if type(value) is dict:
+            value = value.get('each')
+        return super(ForDirective, cls).attach(template, stream, value,
+                                               namespaces, pos)
+
+    def __call__(self, stream, directives, ctxt, **vars):
+        iterable = _eval_expr(self.expr, ctxt, vars)
+        if iterable is None:
+            return
+
+        assign = self.assign
+        scope = {}
+        stream = list(stream)
+        for item in iterable:
+            assign(scope, item)
+            ctxt.push(scope)
+            for event in _apply_directives(stream, directives, ctxt, vars):
+                yield event
+            ctxt.pop()
+
+    def __repr__(self):
+        return '<%s>' % type(self).__name__
+
+
+class IfDirective(Directive):
+    """Implementation of the ``py:if`` template directive for conditionally
+    excluding elements from being output.
+    
+    >>> from genshi.template import MarkupTemplate
+    >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/">
+    ...   <b py:if="foo">${bar}</b>
+    ... </div>''')
+    >>> print(tmpl.generate(foo=True, bar='Hello'))
+    <div>
+      <b>Hello</b>
+    </div>
+    """
+    __slots__ = []
+
+    @classmethod
+    def attach(cls, template, stream, value, namespaces, pos):
+        if type(value) is dict:
+            value = value.get('test')
+        return super(IfDirective, cls).attach(template, stream, value,
+                                              namespaces, pos)
+
+    def __call__(self, stream, directives, ctxt, **vars):
+        value = _eval_expr(self.expr, ctxt, vars)
+        if value:
+            return _apply_directives(stream, directives, ctxt, vars)
+        return []
+
+
+class MatchDirective(Directive):
+    """Implementation of the ``py:match`` template directive.
+
+    >>> from genshi.template import MarkupTemplate
+    >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/">
+    ...   <span py:match="greeting">
+    ...     Hello ${select('@name')}
+    ...   </span>
+    ...   <greeting name="Dude" />
+    ... </div>''')
+    >>> print(tmpl.generate())
+    <div>
+      <span>
+        Hello Dude
+      </span>
+    </div>
+    """
+    __slots__ = ['path', 'namespaces', 'hints']
+
+    def __init__(self, value, template, hints=None, namespaces=None,
+                 lineno=-1, offset=-1):
+        Directive.__init__(self, None, template, namespaces, lineno, offset)
+        self.path = Path(value, template.filepath, lineno)
+        self.namespaces = namespaces or {}
+        self.hints = hints or ()
+
+    @classmethod
+    def attach(cls, template, stream, value, namespaces, pos):
+        hints = []
+        if type(value) is dict:
+            if value.get('buffer', '').lower() == 'false':
+                hints.append('not_buffered')
+            if value.get('once', '').lower() == 'true':
+                hints.append('match_once')
+            if value.get('recursive', '').lower() == 'false':
+                hints.append('not_recursive')
+            value = value.get('path')
+        return cls(value, template, frozenset(hints), namespaces, *pos[1:]), \
+               stream
+
+    def __call__(self, stream, directives, ctxt, **vars):
+        ctxt._match_templates.append((self.path.test(ignore_context=True),
+                                      self.path, list(stream), self.hints,
+                                      self.namespaces, directives))
+        return []
+
+    def __repr__(self):
+        return '<%s "%s">' % (type(self).__name__, self.path.source)
+
+
+class ReplaceDirective(Directive):
+    """Implementation of the ``py:replace`` template directive.
+    
+    This directive replaces the element with the result of evaluating the
+    value of the ``py:replace`` attribute:
+    
+    >>> from genshi.template import MarkupTemplate
+    >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/">
+    ...   <span py:replace="bar">Hello</span>
+    ... </div>''')
+    >>> print(tmpl.generate(bar='Bye'))
+    <div>
+      Bye
+    </div>
+    
+    This directive is equivalent to ``py:content`` combined with ``py:strip``,
+    providing a less verbose way to achieve the same effect:
+    
+    >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/">
+    ...   <span py:content="bar" py:strip="">Hello</span>
+    ... </div>''')
+    >>> print(tmpl.generate(bar='Bye'))
+    <div>
+      Bye
+    </div>
+    """
+    __slots__ = []
+
+    @classmethod
+    def attach(cls, template, stream, value, namespaces, pos):
+        if type(value) is dict:
+            value = value.get('value')
+        if not value:
+            raise TemplateSyntaxError('missing value for "replace" directive',
+                                      template.filepath, *pos[1:])
+        expr = cls._parse_expr(value, template, *pos[1:])
+        return None, [(EXPR, expr, pos)]
+
+
+class StripDirective(Directive):
+    """Implementation of the ``py:strip`` template directive.
+    
+    When the value of the ``py:strip`` attribute evaluates to ``True``, the
+    element is stripped from the output
+    
+    >>> from genshi.template import MarkupTemplate
+    >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/">
+    ...   <div py:strip="True"><b>foo</b></div>
+    ... </div>''')
+    >>> print(tmpl.generate())
+    <div>
+      <b>foo</b>
+    </div>
+    
+    Leaving the attribute value empty is equivalent to a truth value.
+    
+    This directive is particulary interesting for named template functions or
+    match templates that do not generate a top-level element:
+    
+    >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/">
+    ...   <div py:def="echo(what)" py:strip="">
+    ...     <b>${what}</b>
+    ...   </div>
+    ...   ${echo('foo')}
+    ... </div>''')
+    >>> print(tmpl.generate())
+    <div>
+        <b>foo</b>
+    </div>
+    """
+    __slots__ = []
+
+    def __call__(self, stream, directives, ctxt, **vars):
+        def _generate():
+            if not self.expr or _eval_expr(self.expr, ctxt, vars):
+                stream.next() # skip start tag
+                previous = stream.next()
+                for event in stream:
+                    yield previous
+                    previous = event
+            else:
+                for event in stream:
+                    yield event
+        return _apply_directives(_generate(), directives, ctxt, vars)
+
+
+class ChooseDirective(Directive):
+    """Implementation of the ``py:choose`` directive for conditionally selecting
+    one of several body elements to display.
+    
+    If the ``py:choose`` expression is empty the expressions of nested
+    ``py:when`` directives are tested for truth.  The first true ``py:when``
+    body is output. If no ``py:when`` directive is matched then the fallback
+    directive ``py:otherwise`` will be used.
+    
+    >>> from genshi.template import MarkupTemplate
+    >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/"
+    ...   py:choose="">
+    ...   <span py:when="0 == 1">0</span>
+    ...   <span py:when="1 == 1">1</span>
+    ...   <span py:otherwise="">2</span>
+    ... </div>''')
+    >>> print(tmpl.generate())
+    <div>
+      <span>1</span>
+    </div>
+    
+    If the ``py:choose`` directive contains an expression, the nested
+    ``py:when`` directives are tested for equality to the ``py:choose``
+    expression:
+    
+    >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/"
+    ...   py:choose="2">
+    ...   <span py:when="1">1</span>
+    ...   <span py:when="2">2</span>
+    ... </div>''')
+    >>> print(tmpl.generate())
+    <div>
+      <span>2</span>
+    </div>
+    
+    Behavior is undefined if a ``py:choose`` block contains content outside a
+    ``py:when`` or ``py:otherwise`` block.  Behavior is also undefined if a
+    ``py:otherwise`` occurs before ``py:when`` blocks.
+    """
+    __slots__ = ['matched', 'value']
+
+    @classmethod
+    def attach(cls, template, stream, value, namespaces, pos):
+        if type(value) is dict:
+            value = value.get('test')
+        return super(ChooseDirective, cls).attach(template, stream, value,
+                                                  namespaces, pos)
+
+    def __call__(self, stream, directives, ctxt, **vars):
+        info = [False, bool(self.expr), None]
+        if self.expr:
+            info[2] = _eval_expr(self.expr, ctxt, vars)
+        ctxt._choice_stack.append(info)
+        for event in _apply_directives(stream, directives, ctxt, vars):
+            yield event
+        ctxt._choice_stack.pop()
+
+
+class WhenDirective(Directive):
+    """Implementation of the ``py:when`` directive for nesting in a parent with
+    the ``py:choose`` directive.
+    
+    See the documentation of the `ChooseDirective` for usage.
+    """
+    __slots__ = ['filename']
+
+    def __init__(self, value, template, namespaces=None, lineno=-1, offset=-1):
+        Directive.__init__(self, value, template, namespaces, lineno, offset)
+        self.filename = template.filepath
+
+    @classmethod
+    def attach(cls, template, stream, value, namespaces, pos):
+        if type(value) is dict:
+            value = value.get('test')
+        return super(WhenDirective, cls).attach(template, stream, value,
+                                                namespaces, pos)
+
+    def __call__(self, stream, directives, ctxt, **vars):
+        info = ctxt._choice_stack and ctxt._choice_stack[-1]
+        if not info:
+            raise TemplateRuntimeError('"when" directives can only be used '
+                                       'inside a "choose" directive',
+                                       self.filename, *stream.next()[2][1:])
+        if info[0]:
+            return []
+        if not self.expr and not info[1]:
+            raise TemplateRuntimeError('either "choose" or "when" directive '
+                                       'must have a test expression',
+                                       self.filename, *stream.next()[2][1:])
+        if info[1]:
+            value = info[2]
+            if self.expr:
+                matched = value == _eval_expr(self.expr, ctxt, vars)
+            else:
+                matched = bool(value)
+        else:
+            matched = bool(_eval_expr(self.expr, ctxt, vars))
+        info[0] = matched
+        if not matched:
+            return []
+
+        return _apply_directives(stream, directives, ctxt, vars)
+
+
+class OtherwiseDirective(Directive):
+    """Implementation of the ``py:otherwise`` directive for nesting in a parent
+    with the ``py:choose`` directive.
+    
+    See the documentation of `ChooseDirective` for usage.
+    """
+    __slots__ = ['filename']
+
+    def __init__(self, value, template, namespaces=None, lineno=-1, offset=-1):
+        Directive.__init__(self, None, template, namespaces, lineno, offset)
+        self.filename = template.filepath
+
+    def __call__(self, stream, directives, ctxt, **vars):
+        info = ctxt._choice_stack and ctxt._choice_stack[-1]
+        if not info:
+            raise TemplateRuntimeError('an "otherwise" directive can only be '
+                                       'used inside a "choose" directive',
+                                       self.filename, *stream.next()[2][1:])
+        if info[0]:
+            return []
+        info[0] = True
+
+        return _apply_directives(stream, directives, ctxt, vars)
+
+
+class WithDirective(Directive):
+    """Implementation of the ``py:with`` template directive, which allows
+    shorthand access to variables and expressions.
+    
+    >>> from genshi.template import MarkupTemplate
+    >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/">
+    ...   <span py:with="y=7; z=x+10">$x $y $z</span>
+    ... </div>''')
+    >>> print(tmpl.generate(x=42))
+    <div>
+      <span>42 7 52</span>
+    </div>
+    """
+    __slots__ = ['vars']
+
+    def __init__(self, value, template, namespaces=None, lineno=-1, offset=-1):
+        Directive.__init__(self, None, template, namespaces, lineno, offset)
+        self.vars = []
+        value = value.strip()
+        try:
+            ast = _parse(value, 'exec')
+            for node in ast.body:
+                if not isinstance(node, _ast.Assign):
+                    raise TemplateSyntaxError('only assignment allowed in '
+                                              'value of the "with" directive',
+                                              template.filepath, lineno, offset)
+                self.vars.append(([_assignment(n) for n in node.targets],
+                                  Expression(node.value, template.filepath,
+                                             lineno, lookup=template.lookup)))
+        except SyntaxError, err:
+            err.msg += ' in expression "%s" of "%s" directive' % (value,
+                                                                  self.tagname)
+            raise TemplateSyntaxError(err, template.filepath, lineno,
+                                      offset + (err.offset or 0))
+
+    @classmethod
+    def attach(cls, template, stream, value, namespaces, pos):
+        if type(value) is dict:
+            value = value.get('vars')
+        return super(WithDirective, cls).attach(template, stream, value,
+                                                namespaces, pos)
+
+    def __call__(self, stream, directives, ctxt, **vars):
+        frame = {}
+        ctxt.push(frame)
+        for targets, expr in self.vars:
+            value = _eval_expr(expr, ctxt, vars)
+            for assign in targets:
+                assign(frame, value)
+        for event in _apply_directives(stream, directives, ctxt, vars):
+            yield event
+        ctxt.pop()
+
+    def __repr__(self):
+        return '<%s>' % (type(self).__name__)
diff --git a/genshi/template/eval.py b/genshi/template/eval.py
new file mode 100644
index 0000000..8593aaa
--- /dev/null
+++ b/genshi/template/eval.py
@@ -0,0 +1,629 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006-2010 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""Support for "safe" evaluation of Python expressions."""
+
+import __builtin__
+
+from textwrap import dedent
+from types import CodeType
+
+from genshi.core import Markup
+from genshi.template.astutil import ASTTransformer, ASTCodeGenerator, \
+                                    _ast, parse
+from genshi.template.base import TemplateRuntimeError
+from genshi.util import flatten
+
+__all__ = ['Code', 'Expression', 'Suite', 'LenientLookup', 'StrictLookup',
+           'Undefined', 'UndefinedError']
+__docformat__ = 'restructuredtext en'
+
+
+# Check for a Python 2.4 bug in the eval loop
+has_star_import_bug = False
+try:
+    class _FakeMapping(object):
+        __getitem__ = __setitem__ = lambda *a: None
+    exec 'from sys import *' in {}, _FakeMapping()
+except SystemError:
+    has_star_import_bug = True
+del _FakeMapping
+
+
+def _star_import_patch(mapping, modname):
+    """This function is used as helper if a Python version with a broken
+    star-import opcode is in use.
+    """
+    module = __import__(modname, None, None, ['__all__'])
+    if hasattr(module, '__all__'):
+        members = module.__all__
+    else:
+        members = [x for x in module.__dict__ if not x.startswith('_')]
+    mapping.update([(name, getattr(module, name)) for name in members])
+
+
+class Code(object):
+    """Abstract base class for the `Expression` and `Suite` classes."""
+    __slots__ = ['source', 'code', 'ast', '_globals']
+
+    def __init__(self, source, filename=None, lineno=-1, lookup='strict',
+                 xform=None):
+        """Create the code object, either from a string, or from an AST node.
+        
+        :param source: either a string containing the source code, or an AST
+                       node
+        :param filename: the (preferably absolute) name of the file containing
+                         the code
+        :param lineno: the number of the line on which the code was found
+        :param lookup: the lookup class that defines how variables are looked
+                       up in the context; can be either "strict" (the default),
+                       "lenient", or a custom lookup class
+        :param xform: the AST transformer that should be applied to the code;
+                      if `None`, the appropriate transformation is chosen
+                      depending on the mode
+        """
+        if isinstance(source, basestring):
+            self.source = source
+            node = _parse(source, mode=self.mode)
+        else:
+            assert isinstance(source, _ast.AST), \
+                'Expected string or AST node, but got %r' % source
+            self.source = '?'
+            if self.mode == 'eval':
+                node = _ast.Expression()
+                node.body = source
+            else:
+                node = _ast.Module()
+                node.body = [source]
+
+        self.ast = node
+        self.code = _compile(node, self.source, mode=self.mode,
+                             filename=filename, lineno=lineno, xform=xform)
+        if lookup is None:
+            lookup = LenientLookup
+        elif isinstance(lookup, basestring):
+            lookup = {'lenient': LenientLookup, 'strict': StrictLookup}[lookup]
+        self._globals = lookup.globals
+
+    def __getstate__(self):
+        state = {'source': self.source, 'ast': self.ast,
+                 'lookup': self._globals.im_self}
+        c = self.code
+        state['code'] = (c.co_nlocals, c.co_stacksize, c.co_flags, c.co_code,
+                         c.co_consts, c.co_names, c.co_varnames, c.co_filename,
+                         c.co_name, c.co_firstlineno, c.co_lnotab, (), ())
+        return state
+
+    def __setstate__(self, state):
+        self.source = state['source']
+        self.ast = state['ast']
+        self.code = CodeType(0, *state['code'])
+        self._globals = state['lookup'].globals
+
+    def __eq__(self, other):
+        return (type(other) == type(self)) and (self.code == other.code)
+
+    def __hash__(self):
+        return hash(self.code)
+
+    def __ne__(self, other):
+        return not self == other
+
+    def __repr__(self):
+        return '%s(%r)' % (type(self).__name__, self.source)
+
+
+class Expression(Code):
+    """Evaluates Python expressions used in templates.
+
+    >>> data = dict(test='Foo', items=[1, 2, 3], dict={'some': 'thing'})
+    >>> Expression('test').evaluate(data)
+    'Foo'
+
+    >>> Expression('items[0]').evaluate(data)
+    1
+    >>> Expression('items[-1]').evaluate(data)
+    3
+    >>> Expression('dict["some"]').evaluate(data)
+    'thing'
+    
+    Similar to e.g. Javascript, expressions in templates can use the dot
+    notation for attribute access to access items in mappings:
+    
+    >>> Expression('dict.some').evaluate(data)
+    'thing'
+    
+    This also works the other way around: item access can be used to access
+    any object attribute:
+    
+    >>> class MyClass(object):
+    ...     myattr = 'Bar'
+    >>> data = dict(mine=MyClass(), key='myattr')
+    >>> Expression('mine.myattr').evaluate(data)
+    'Bar'
+    >>> Expression('mine["myattr"]').evaluate(data)
+    'Bar'
+    >>> Expression('mine[key]').evaluate(data)
+    'Bar'
+    
+    All of the standard Python operators are available to template expressions.
+    Built-in functions such as ``len()`` are also available in template
+    expressions:
+    
+    >>> data = dict(items=[1, 2, 3])
+    >>> Expression('len(items)').evaluate(data)
+    3
+    """
+    __slots__ = []
+    mode = 'eval'
+
+    def evaluate(self, data):
+        """Evaluate the expression against the given data dictionary.
+        
+        :param data: a mapping containing the data to evaluate against
+        :return: the result of the evaluation
+        """
+        __traceback_hide__ = 'before_and_this'
+        _globals = self._globals(data)
+        return eval(self.code, _globals, {'__data__': data})
+
+
+class Suite(Code):
+    """Executes Python statements used in templates.
+
+    >>> data = dict(test='Foo', items=[1, 2, 3], dict={'some': 'thing'})
+    >>> Suite("foo = dict['some']").execute(data)
+    >>> data['foo']
+    'thing'
+    """
+    __slots__ = []
+    mode = 'exec'
+
+    def execute(self, data):
+        """Execute the suite in the given data dictionary.
+        
+        :param data: a mapping containing the data to execute in
+        """
+        __traceback_hide__ = 'before_and_this'
+        _globals = self._globals(data)
+        exec self.code in _globals, data
+
+
+UNDEFINED = object()
+
+
+class UndefinedError(TemplateRuntimeError):
+    """Exception thrown when a template expression attempts to access a variable
+    not defined in the context.
+    
+    :see: `LenientLookup`, `StrictLookup`
+    """
+    def __init__(self, name, owner=UNDEFINED):
+        if owner is not UNDEFINED:
+            message = '%s has no member named "%s"' % (repr(owner), name)
+        else:
+            message = '"%s" not defined' % name
+        TemplateRuntimeError.__init__(self, message)
+
+
+class Undefined(object):
+    """Represents a reference to an undefined variable.
+    
+    Unlike the Python runtime, template expressions can refer to an undefined
+    variable without causing a `NameError` to be raised. The result will be an
+    instance of the `Undefined` class, which is treated the same as ``False`` in
+    conditions, but raise an exception on any other operation:
+    
+    >>> foo = Undefined('foo')
+    >>> bool(foo)
+    False
+    >>> list(foo)
+    []
+    >>> print(foo)
+    undefined
+    
+    However, calling an undefined variable, or trying to access an attribute
+    of that variable, will raise an exception that includes the name used to
+    reference that undefined variable.
+    
+    >>> foo('bar')
+    Traceback (most recent call last):
+        ...
+    UndefinedError: "foo" not defined
+
+    >>> foo.bar
+    Traceback (most recent call last):
+        ...
+    UndefinedError: "foo" not defined
+    
+    :see: `LenientLookup`
+    """
+    __slots__ = ['_name', '_owner']
+
+    def __init__(self, name, owner=UNDEFINED):
+        """Initialize the object.
+        
+        :param name: the name of the reference
+        :param owner: the owning object, if the variable is accessed as a member
+        """
+        self._name = name
+        self._owner = owner
+
+    def __iter__(self):
+        return iter([])
+
+    def __nonzero__(self):
+        return False
+
+    def __repr__(self):
+        return '<%s %r>' % (type(self).__name__, self._name)
+
+    def __str__(self):
+        return 'undefined'
+
+    def _die(self, *args, **kwargs):
+        """Raise an `UndefinedError`."""
+        __traceback_hide__ = True
+        raise UndefinedError(self._name, self._owner)
+    __call__ = __getattr__ = __getitem__ = _die
+
+    # Hack around some behavior introduced in Python 2.6.2
+    # http://genshi.edgewall.org/ticket/324
+    __length_hint__ = None
+
+
+class LookupBase(object):
+    """Abstract base class for variable lookup implementations."""
+
+    @classmethod
+    def globals(cls, data):
+        """Construct the globals dictionary to use as the execution context for
+        the expression or suite.
+        """
+        return {
+            '__data__': data,
+            '_lookup_name': cls.lookup_name,
+            '_lookup_attr': cls.lookup_attr,
+            '_lookup_item': cls.lookup_item,
+            '_star_import_patch': _star_import_patch,
+            'UndefinedError': UndefinedError,
+        }
+
+    @classmethod
+    def lookup_name(cls, data, name):
+        __traceback_hide__ = True
+        val = data.get(name, UNDEFINED)
+        if val is UNDEFINED:
+            val = BUILTINS.get(name, val)
+            if val is UNDEFINED:
+                val = cls.undefined(name)
+        return val
+
+    @classmethod
+    def lookup_attr(cls, obj, key):
+        __traceback_hide__ = True
+        try:
+            val = getattr(obj, key)
+        except AttributeError:
+            if hasattr(obj.__class__, key):
+                raise
+            else:
+                try:
+                    val = obj[key]
+                except (KeyError, TypeError):
+                    val = cls.undefined(key, owner=obj)
+        return val
+
+    @classmethod
+    def lookup_item(cls, obj, key):
+        __traceback_hide__ = True
+        if len(key) == 1:
+            key = key[0]
+        try:
+            return obj[key]
+        except (AttributeError, KeyError, IndexError, TypeError), e:
+            if isinstance(key, basestring):
+                val = getattr(obj, key, UNDEFINED)
+                if val is UNDEFINED:
+                    val = cls.undefined(key, owner=obj)
+                return val
+            raise
+
+    @classmethod
+    def undefined(cls, key, owner=UNDEFINED):
+        """Can be overridden by subclasses to specify behavior when undefined
+        variables are accessed.
+        
+        :param key: the name of the variable
+        :param owner: the owning object, if the variable is accessed as a member
+        """
+        raise NotImplementedError
+
+
+class LenientLookup(LookupBase):
+    """Default variable lookup mechanism for expressions.
+    
+    When an undefined variable is referenced using this lookup style, the
+    reference evaluates to an instance of the `Undefined` class:
+    
+    >>> expr = Expression('nothing', lookup='lenient')
+    >>> undef = expr.evaluate({})
+    >>> undef
+    <Undefined 'nothing'>
+    
+    The same will happen when a non-existing attribute or item is accessed on
+    an existing object:
+    
+    >>> expr = Expression('something.nil', lookup='lenient')
+    >>> expr.evaluate({'something': dict()})
+    <Undefined 'nil'>
+    
+    See the documentation of the `Undefined` class for details on the behavior
+    of such objects.
+    
+    :see: `StrictLookup`
+    """
+
+    @classmethod
+    def undefined(cls, key, owner=UNDEFINED):
+        """Return an ``Undefined`` object."""
+        __traceback_hide__ = True
+        return Undefined(key, owner=owner)
+
+
+class StrictLookup(LookupBase):
+    """Strict variable lookup mechanism for expressions.
+    
+    Referencing an undefined variable using this lookup style will immediately
+    raise an ``UndefinedError``:
+    
+    >>> expr = Expression('nothing', lookup='strict')
+    >>> expr.evaluate({})
+    Traceback (most recent call last):
+        ...
+    UndefinedError: "nothing" not defined
+    
+    The same happens when a non-existing attribute or item is accessed on an
+    existing object:
+    
+    >>> expr = Expression('something.nil', lookup='strict')
+    >>> expr.evaluate({'something': dict()})
+    Traceback (most recent call last):
+        ...
+    UndefinedError: {} has no member named "nil"
+    """
+
+    @classmethod
+    def undefined(cls, key, owner=UNDEFINED):
+        """Raise an ``UndefinedError`` immediately."""
+        __traceback_hide__ = True
+        raise UndefinedError(key, owner=owner)
+
+
+def _parse(source, mode='eval'):
+    source = source.strip()
+    if mode == 'exec':
+        lines = [line.expandtabs() for line in source.splitlines()]
+        if lines:
+            first = lines[0]
+            rest = dedent('\n'.join(lines[1:])).rstrip()
+            if first.rstrip().endswith(':') and not rest[0].isspace():
+                rest = '\n'.join(['    %s' % line for line in rest.splitlines()])
+            source = '\n'.join([first, rest])
+    if isinstance(source, unicode):
+        source = '\xef\xbb\xbf' + source.encode('utf-8')
+    return parse(source, mode)
+
+
+def _compile(node, source=None, mode='eval', filename=None, lineno=-1,
+             xform=None):
+    if isinstance(filename, unicode):
+        # unicode file names not allowed for code objects
+        filename = filename.encode('utf-8', 'replace')
+    elif not filename:
+        filename = '<string>'
+    if lineno <= 0:
+        lineno = 1
+
+    if xform is None:
+        xform = {
+            'eval': ExpressionASTTransformer
+        }.get(mode, TemplateASTTransformer)
+    tree = xform().visit(node)
+
+    if mode == 'eval':
+        name = '<Expression %r>' % (source or '?')
+    else:
+        lines = source.splitlines()
+        if not lines:
+            extract = ''
+        else:
+            extract = lines[0]
+        if len(lines) > 1:
+            extract += ' ...'
+        name = '<Suite %r>' % (extract)
+    new_source = ASTCodeGenerator(tree).code
+    code = compile(new_source, filename, mode)
+
+    try:
+        # We'd like to just set co_firstlineno, but it's readonly. So we need
+        # to clone the code object while adjusting the line number
+        return CodeType(0, code.co_nlocals, code.co_stacksize,
+                        code.co_flags | 0x0040, code.co_code, code.co_consts,
+                        code.co_names, code.co_varnames, filename, name,
+                        lineno, code.co_lnotab, (), ())
+    except RuntimeError:
+        return code
+
+
+def _new(class_, *args, **kwargs):
+    ret = class_()
+    for attr, value in zip(ret._fields, args):
+        if attr in kwargs:
+            raise ValueError('Field set both in args and kwargs')
+        setattr(ret, attr, value)
+    for attr, value in kwargs:
+        setattr(ret, attr, value)
+    return ret
+
+
+BUILTINS = __builtin__.__dict__.copy()
+BUILTINS.update({'Markup': Markup, 'Undefined': Undefined})
+CONSTANTS = frozenset(['False', 'True', 'None', 'NotImplemented', 'Ellipsis'])
+
+
+class TemplateASTTransformer(ASTTransformer):
+    """Concrete AST transformer that implements the AST transformations needed
+    for code embedded in templates.
+    """
+
+    def __init__(self):
+        self.locals = [CONSTANTS]
+
+    def _extract_names(self, node):
+        names = set()
+        def _process(node):
+            if isinstance(node, _ast.Name):
+                names.add(node.id)
+            elif isinstance(node, _ast.alias):
+                names.add(node.asname or node.name)
+            elif isinstance(node, _ast.Tuple):
+                for elt in node.elts:
+                    _process(elt)
+        if hasattr(node, 'args'):
+            for arg in node.args:
+                _process(arg)
+            if hasattr(node, 'vararg'):
+                names.add(node.vararg)
+            if hasattr(node, 'kwarg'):
+                names.add(node.kwarg)
+        elif hasattr(node, 'names'):
+            for elt in node.names:
+                _process(elt)
+        return names
+
+    def visit_Str(self, node):
+        if isinstance(node.s, str):
+            try: # If the string is ASCII, return a `str` object
+                node.s.decode('ascii')
+            except ValueError: # Otherwise return a `unicode` object
+                return _new(_ast.Str, node.s.decode('utf-8'))
+        return node
+
+    def visit_ClassDef(self, node):
+        if len(self.locals) > 1:
+            self.locals[-1].add(node.name)
+        self.locals.append(set())
+        try:
+            return ASTTransformer.visit_ClassDef(self, node)
+        finally:
+            self.locals.pop()
+
+    def visit_Import(self, node):
+        if len(self.locals) > 1:
+            self.locals[-1].update(self._extract_names(node))
+        return ASTTransformer.visit_Import(self, node)
+
+    def visit_ImportFrom(self, node):
+        if [a.name for a in node.names] == ['*']:
+            if has_star_import_bug:
+                # This is a Python 2.4 bug. Only if we have a broken Python
+                # version do we need to apply this hack
+                node = _new(_ast.Expr, _new(_ast.Call,
+                    _new(_ast.Name, '_star_import_patch'), [
+                        _new(_ast.Name, '__data__'),
+                        _new(_ast.Str, node.module)
+                    ], (), ()))
+            return node
+        if len(self.locals) > 1:
+            self.locals[-1].update(self._extract_names(node))
+        return ASTTransformer.visit_ImportFrom(self, node)
+
+    def visit_FunctionDef(self, node):
+        if len(self.locals) > 1:
+            self.locals[-1].add(node.name)
+
+        self.locals.append(self._extract_names(node.args))
+        try:
+            return ASTTransformer.visit_FunctionDef(self, node)
+        finally:
+            self.locals.pop()
+
+    # GeneratorExp(expr elt, comprehension* generators)
+    def visit_GeneratorExp(self, node):
+        gens = []
+        for generator in node.generators:
+            # comprehension = (expr target, expr iter, expr* ifs)
+            self.locals.append(set())
+            gen = _new(_ast.comprehension, self.visit(generator.target),
+                       self.visit(generator.iter),
+                       [self.visit(if_) for if_ in generator.ifs])
+            gens.append(gen)
+
+        # use node.__class__ to make it reusable as ListComp
+        ret = _new(node.__class__, self.visit(node.elt), gens)
+        #delete inserted locals
+        del self.locals[-len(node.generators):]
+        return ret
+
+    # ListComp(expr elt, comprehension* generators)
+    visit_ListComp = visit_GeneratorExp
+
+    def visit_Lambda(self, node):
+        self.locals.append(self._extract_names(node.args))
+        try:
+            return ASTTransformer.visit_Lambda(self, node)
+        finally:
+            self.locals.pop()
+
+    def visit_Name(self, node):
+        # If the name refers to a local inside a lambda, list comprehension, or
+        # generator expression, leave it alone
+        if isinstance(node.ctx, _ast.Load) and \
+                node.id not in flatten(self.locals):
+            # Otherwise, translate the name ref into a context lookup
+            name = _new(_ast.Name, '_lookup_name', _ast.Load())
+            namearg = _new(_ast.Name, '__data__', _ast.Load())
+            strarg = _new(_ast.Str, node.id)
+            node = _new(_ast.Call, name, [namearg, strarg], [])
+        elif isinstance(node.ctx, _ast.Store):
+            if len(self.locals) > 1:
+                self.locals[-1].add(node.id)
+
+        return node
+
+
+class ExpressionASTTransformer(TemplateASTTransformer):
+    """Concrete AST transformer that implements the AST transformations needed
+    for code embedded in templates.
+    """
+
+    def visit_Attribute(self, node):
+        if not isinstance(node.ctx, _ast.Load):
+            return ASTTransformer.visit_Attribute(self, node)
+
+        func = _new(_ast.Name, '_lookup_attr', _ast.Load())
+        args = [self.visit(node.value), _new(_ast.Str, node.attr)]
+        return _new(_ast.Call, func, args, [])
+
+    def visit_Subscript(self, node):
+        if not isinstance(node.ctx, _ast.Load) or \
+                not isinstance(node.slice, _ast.Index):
+            return ASTTransformer.visit_Subscript(self, node)
+
+        func = _new(_ast.Name, '_lookup_item', _ast.Load())
+        args = [
+            self.visit(node.value),
+            _new(_ast.Tuple, (self.visit(node.slice.value),), _ast.Load())
+        ]
+        return _new(_ast.Call, func, args, [])
diff --git a/genshi/template/interpolation.py b/genshi/template/interpolation.py
new file mode 100644
index 0000000..1e1a385
--- /dev/null
+++ b/genshi/template/interpolation.py
@@ -0,0 +1,153 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2007-2009 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""String interpolation routines, i.e. the splitting up a given text into some
+parts that are literal strings, and others that are Python expressions.
+"""
+
+from itertools import chain
+import os
+import re
+from tokenize import PseudoToken
+
+from genshi.core import TEXT
+from genshi.template.base import TemplateSyntaxError, EXPR
+from genshi.template.eval import Expression
+
+__all__ = ['interpolate']
+__docformat__ = 'restructuredtext en'
+
+NAMESTART = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'
+NAMECHARS = NAMESTART + '.0123456789'
+PREFIX = '$'
+
+token_re = re.compile('%s|%s(?s)' % (
+    r'[uU]?[rR]?("""|\'\'\')((?<!\\)\\\1|.)*?\1',
+    PseudoToken
+))
+
+
+def interpolate(text, filepath=None, lineno=-1, offset=0, lookup='strict'):
+    """Parse the given string and extract expressions.
+    
+    This function is a generator that yields `TEXT` events for literal strings,
+    and `EXPR` events for expressions, depending on the results of parsing the
+    string.
+    
+    >>> for kind, data, pos in interpolate("hey ${foo}bar"):
+    ...     print('%s %r' % (kind, data))
+    TEXT 'hey '
+    EXPR Expression('foo')
+    TEXT 'bar'
+    
+    :param text: the text to parse
+    :param filepath: absolute path to the file in which the text was found
+                     (optional)
+    :param lineno: the line number at which the text was found (optional)
+    :param offset: the column number at which the text starts in the source
+                   (optional)
+    :param lookup: the variable lookup mechanism; either "lenient" (the
+                   default), "strict", or a custom lookup class
+    :return: a list of `TEXT` and `EXPR` events
+    :raise TemplateSyntaxError: when a syntax error in an expression is
+                                encountered
+    """
+    pos = [filepath, lineno, offset]
+
+    textbuf = []
+    textpos = None
+    for is_expr, chunk in chain(lex(text, pos, filepath), [(True, '')]):
+        if is_expr:
+            if textbuf:
+                yield TEXT, ''.join(textbuf), textpos
+                del textbuf[:]
+                textpos = None
+            if chunk:
+                try:
+                    expr = Expression(chunk.strip(), pos[0], pos[1],
+                                      lookup=lookup)
+                    yield EXPR, expr, tuple(pos)
+                except SyntaxError, err:
+                    raise TemplateSyntaxError(err, filepath, pos[1],
+                                              pos[2] + (err.offset or 0))
+        else:
+            textbuf.append(chunk)
+            if textpos is None:
+                textpos = tuple(pos)
+
+        if '\n' in chunk:
+            lines = chunk.splitlines()
+            pos[1] += len(lines) - 1
+            pos[2] += len(lines[-1])
+        else:
+            pos[2] += len(chunk)
+
+
+def lex(text, textpos, filepath):
+    offset = pos = 0
+    end = len(text)
+    escaped = False
+
+    while 1:
+        if escaped:
+            offset = text.find(PREFIX, offset + 2)
+            escaped = False
+        else:
+            offset = text.find(PREFIX, pos)
+        if offset < 0 or offset == end - 1:
+            break
+        next = text[offset + 1]
+
+        if next == '{':
+            if offset > pos:
+                yield False, text[pos:offset]
+            pos = offset + 2
+            level = 1
+            while level:
+                match = token_re.match(text, pos)
+                if match is None:
+                    raise TemplateSyntaxError('invalid syntax',  filepath,
+                                              *textpos[1:])
+                pos = match.end()
+                tstart, tend = match.regs[3]
+                token = text[tstart:tend]
+                if token == '{':
+                    level += 1
+                elif token == '}':
+                    level -= 1
+            yield True, text[offset + 2:pos - 1]
+
+        elif next in NAMESTART:
+            if offset > pos:
+                yield False, text[pos:offset]
+                pos = offset
+            pos += 1
+            while pos < end:
+                char = text[pos]
+                if char not in NAMECHARS:
+                    break
+                pos += 1
+            yield True, text[offset + 1:pos].strip()
+
+        elif not escaped and next == PREFIX:
+            if offset > pos:
+                yield False, text[pos:offset]
+            escaped = True
+            pos = offset + 1
+
+        else:
+            yield False, text[pos:offset + 1]
+            pos = offset + 1
+
+    if pos < end:
+        yield False, text[pos:]
diff --git a/genshi/template/loader.py b/genshi/template/loader.py
new file mode 100644
index 0000000..0e7cda7
--- /dev/null
+++ b/genshi/template/loader.py
@@ -0,0 +1,344 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006-2010 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""Template loading and caching."""
+
+import os
+try:
+    import threading
+except ImportError:
+    import dummy_threading as threading
+
+from genshi.template.base import TemplateError
+from genshi.util import LRUCache
+
+__all__ = ['TemplateLoader', 'TemplateNotFound', 'directory', 'package',
+           'prefixed']
+__docformat__ = 'restructuredtext en'
+
+
+class TemplateNotFound(TemplateError):
+    """Exception raised when a specific template file could not be found."""
+
+    def __init__(self, name, search_path):
+        """Create the exception.
+        
+        :param name: the filename of the template
+        :param search_path: the search path used to lookup the template
+        """
+        TemplateError.__init__(self, 'Template "%s" not found' % name)
+        self.search_path = search_path
+
+
+class TemplateLoader(object):
+    """Responsible for loading templates from files on the specified search
+    path.
+    
+    >>> import tempfile
+    >>> fd, path = tempfile.mkstemp(suffix='.html', prefix='template')
+    >>> os.write(fd, '<p>$var</p>')
+    11
+    >>> os.close(fd)
+    
+    The template loader accepts a list of directory paths that are then used
+    when searching for template files, in the given order:
+    
+    >>> loader = TemplateLoader([os.path.dirname(path)])
+    
+    The `load()` method first checks the template cache whether the requested
+    template has already been loaded. If not, it attempts to locate the
+    template file, and returns the corresponding `Template` object:
+    
+    >>> from genshi.template import MarkupTemplate
+    >>> template = loader.load(os.path.basename(path))
+    >>> isinstance(template, MarkupTemplate)
+    True
+    
+    Template instances are cached: requesting a template with the same name
+    results in the same instance being returned:
+    
+    >>> loader.load(os.path.basename(path)) is template
+    True
+    
+    The `auto_reload` option can be used to control whether a template should
+    be automatically reloaded when the file it was loaded from has been
+    changed. Disable this automatic reloading to improve performance.
+    
+    >>> os.remove(path)
+    """
+    def __init__(self, search_path=None, auto_reload=False,
+                 default_encoding=None, max_cache_size=25, default_class=None,
+                 variable_lookup='strict', allow_exec=True, callback=None):
+        """Create the template laoder.
+        
+        :param search_path: a list of absolute path names that should be
+                            searched for template files, or a string containing
+                            a single absolute path; alternatively, any item on
+                            the list may be a ''load function'' that is passed
+                            a filename and returns a file-like object and some
+                            metadata
+        :param auto_reload: whether to check the last modification time of
+                            template files, and reload them if they have changed
+        :param default_encoding: the default encoding to assume when loading
+                                 templates; defaults to UTF-8
+        :param max_cache_size: the maximum number of templates to keep in the
+                               cache
+        :param default_class: the default `Template` subclass to use when
+                              instantiating templates
+        :param variable_lookup: the variable lookup mechanism; either "strict"
+                                (the default), "lenient", or a custom lookup
+                                class
+        :param allow_exec: whether to allow Python code blocks in templates
+        :param callback: (optional) a callback function that is invoked after a
+                         template was initialized by this loader; the function
+                         is passed the template object as only argument. This
+                         callback can be used for example to add any desired
+                         filters to the template
+        :see: `LenientLookup`, `StrictLookup`
+        
+        :note: Changed in 0.5: Added the `allow_exec` argument
+        """
+        from genshi.template.markup import MarkupTemplate
+
+        self.search_path = search_path
+        if self.search_path is None:
+            self.search_path = []
+        elif not isinstance(self.search_path, (list, tuple)):
+            self.search_path = [self.search_path]
+
+        self.auto_reload = auto_reload
+        """Whether templates should be reloaded when the underlying file is
+        changed"""
+
+        self.default_encoding = default_encoding
+        self.default_class = default_class or MarkupTemplate
+        self.variable_lookup = variable_lookup
+        self.allow_exec = allow_exec
+        if callback is not None and not hasattr(callback, '__call__'):
+            raise TypeError('The "callback" parameter needs to be callable')
+        self.callback = callback
+        self._cache = LRUCache(max_cache_size)
+        self._uptodate = {}
+        self._lock = threading.RLock()
+
+    def __getstate__(self):
+        state = self.__dict__.copy()
+        state['_lock'] = None
+        return state
+
+    def __setstate__(self, state):
+        self.__dict__ = state
+        self._lock = threading.RLock()
+
+    def load(self, filename, relative_to=None, cls=None, encoding=None):
+        """Load the template with the given name.
+        
+        If the `filename` parameter is relative, this method searches the
+        search path trying to locate a template matching the given name. If the
+        file name is an absolute path, the search path is ignored.
+        
+        If the requested template is not found, a `TemplateNotFound` exception
+        is raised. Otherwise, a `Template` object is returned that represents
+        the parsed template.
+        
+        Template instances are cached to avoid having to parse the same
+        template file more than once. Thus, subsequent calls of this method
+        with the same template file name will return the same `Template`
+        object (unless the ``auto_reload`` option is enabled and the file was
+        changed since the last parse.)
+        
+        If the `relative_to` parameter is provided, the `filename` is
+        interpreted as being relative to that path.
+        
+        :param filename: the relative path of the template file to load
+        :param relative_to: the filename of the template from which the new
+                            template is being loaded, or ``None`` if the
+                            template is being loaded directly
+        :param cls: the class of the template object to instantiate
+        :param encoding: the encoding of the template to load; defaults to the
+                         ``default_encoding`` of the loader instance
+        :return: the loaded `Template` instance
+        :raises TemplateNotFound: if a template with the given name could not
+                                  be found
+        """
+        if cls is None:
+            cls = self.default_class
+        search_path = self.search_path
+
+        # Make the filename relative to the template file its being loaded
+        # from, but only if that file is specified as a relative path, or no
+        # search path has been set up
+        if relative_to and (not search_path or not os.path.isabs(relative_to)):
+            filename = os.path.join(os.path.dirname(relative_to), filename)
+
+        filename = os.path.normpath(filename)
+        cachekey = filename
+
+        self._lock.acquire()
+        try:
+            # First check the cache to avoid reparsing the same file
+            try:
+                tmpl = self._cache[cachekey]
+                if not self.auto_reload:
+                    return tmpl
+                uptodate = self._uptodate[cachekey]
+                if uptodate is not None and uptodate():
+                    return tmpl
+            except (KeyError, OSError):
+                pass
+
+            isabs = False
+
+            if os.path.isabs(filename):
+                # Bypass the search path if the requested filename is absolute
+                search_path = [os.path.dirname(filename)]
+                isabs = True
+
+            elif relative_to and os.path.isabs(relative_to):
+                # Make sure that the directory containing the including
+                # template is on the search path
+                dirname = os.path.dirname(relative_to)
+                if dirname not in search_path:
+                    search_path = list(search_path) + [dirname]
+                isabs = True
+
+            elif not search_path:
+                # Uh oh, don't know where to look for the template
+                raise TemplateError('Search path for templates not configured')
+
+            for loadfunc in search_path:
+                if isinstance(loadfunc, basestring):
+                    loadfunc = directory(loadfunc)
+                try:
+                    filepath, filename, fileobj, uptodate = loadfunc(filename)
+                except IOError:
+                    continue
+                else:
+                    try:
+                        if isabs:
+                            # If the filename of either the included or the 
+                            # including template is absolute, make sure the
+                            # included template gets an absolute path, too,
+                            # so that nested includes work properly without a
+                            # search path
+                            filename = filepath
+                        tmpl = self._instantiate(cls, fileobj, filepath,
+                                                 filename, encoding=encoding)
+                        if self.callback:
+                            self.callback(tmpl)
+                        self._cache[cachekey] = tmpl
+                        self._uptodate[cachekey] = uptodate
+                    finally:
+                        if hasattr(fileobj, 'close'):
+                            fileobj.close()
+                    return tmpl
+
+            raise TemplateNotFound(filename, search_path)
+
+        finally:
+            self._lock.release()
+
+    def _instantiate(self, cls, fileobj, filepath, filename, encoding=None):
+        """Instantiate and return the `Template` object based on the given
+        class and parameters.
+        
+        This function is intended for subclasses to override if they need to
+        implement special template instantiation logic. Code that just uses
+        the `TemplateLoader` should use the `load` method instead.
+        
+        :param cls: the class of the template object to instantiate
+        :param fileobj: a readable file-like object containing the template
+                        source
+        :param filepath: the absolute path to the template file
+        :param filename: the path to the template file relative to the search
+                         path
+        :param encoding: the encoding of the template to load; defaults to the
+                         ``default_encoding`` of the loader instance
+        :return: the loaded `Template` instance
+        :rtype: `Template`
+        """
+        if encoding is None:
+            encoding = self.default_encoding
+        return cls(fileobj, filepath=filepath, filename=filename, loader=self,
+                   encoding=encoding, lookup=self.variable_lookup,
+                   allow_exec=self.allow_exec)
+
+    @staticmethod
+    def directory(path):
+        """Loader factory for loading templates from a local directory.
+        
+        :param path: the path to the local directory containing the templates
+        :return: the loader function to load templates from the given directory
+        :rtype: ``function``
+        """
+        def _load_from_directory(filename):
+            filepath = os.path.join(path, filename)
+            fileobj = open(filepath, 'U')
+            mtime = os.path.getmtime(filepath)
+            def _uptodate():
+                return mtime == os.path.getmtime(filepath)
+            return filepath, filename, fileobj, _uptodate
+        return _load_from_directory
+
+    @staticmethod
+    def package(name, path):
+        """Loader factory for loading templates from egg package data.
+        
+        :param name: the name of the package containing the resources
+        :param path: the path inside the package data
+        :return: the loader function to load templates from the given package
+        :rtype: ``function``
+        """
+        from pkg_resources import resource_stream
+        def _load_from_package(filename):
+            filepath = os.path.join(path, filename)
+            return filepath, filename, resource_stream(name, filepath), None
+        return _load_from_package
+
+    @staticmethod
+    def prefixed(**delegates):
+        """Factory for a load function that delegates to other loaders
+        depending on the prefix of the requested template path.
+        
+        The prefix is stripped from the filename when passing on the load
+        request to the delegate.
+        
+        >>> load = prefixed(
+        ...     app1 = lambda filename: ('app1', filename, None, None),
+        ...     app2 = lambda filename: ('app2', filename, None, None)
+        ... )
+        >>> print(load('app1/foo.html'))
+        ('app1', 'app1/foo.html', None, None)
+        >>> print(load('app2/bar.html'))
+        ('app2', 'app2/bar.html', None, None)
+        
+        :param delegates: mapping of path prefixes to loader functions
+        :return: the loader function
+        :rtype: ``function``
+        """
+        def _dispatch_by_prefix(filename):
+            for prefix, delegate in delegates.items():
+                if filename.startswith(prefix):
+                    if isinstance(delegate, basestring):
+                        delegate = directory(delegate)
+                    filepath, _, fileobj, uptodate = delegate(
+                        filename[len(prefix):].lstrip('/\\')
+                    )
+                    return filepath, filename, fileobj, uptodate
+            raise TemplateNotFound(filename, list(delegates.keys()))
+        return _dispatch_by_prefix
+
+
+directory = TemplateLoader.directory
+package = TemplateLoader.package
+prefixed = TemplateLoader.prefixed
diff --git a/genshi/template/markup.py b/genshi/template/markup.py
new file mode 100644
index 0000000..0e31632
--- /dev/null
+++ b/genshi/template/markup.py
@@ -0,0 +1,397 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006-2010 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""Markup templating engine."""
+
+from itertools import chain
+
+from genshi.core import Attrs, Markup, Namespace, Stream, StreamEventKind
+from genshi.core import START, END, START_NS, END_NS, TEXT, PI, COMMENT
+from genshi.input import XMLParser
+from genshi.template.base import BadDirectiveError, Template, \
+                                 TemplateSyntaxError, _apply_directives, \
+                                 EXEC, INCLUDE, SUB
+from genshi.template.eval import Suite
+from genshi.template.interpolation import interpolate
+from genshi.template.directives import *
+from genshi.template.text import NewTextTemplate
+
+__all__ = ['MarkupTemplate']
+__docformat__ = 'restructuredtext en'
+
+
+class MarkupTemplate(Template):
+    """Implementation of the template language for XML-based templates.
+    
+    >>> tmpl = MarkupTemplate('''<ul xmlns:py="http://genshi.edgewall.org/">
+    ...   <li py:for="item in items">${item}</li>
+    ... </ul>''')
+    >>> print(tmpl.generate(items=[1, 2, 3]))
+    <ul>
+      <li>1</li><li>2</li><li>3</li>
+    </ul>
+    """
+
+    DIRECTIVE_NAMESPACE = 'http://genshi.edgewall.org/'
+    XINCLUDE_NAMESPACE = 'http://www.w3.org/2001/XInclude'
+
+    directives = [('def', DefDirective),
+                  ('match', MatchDirective),
+                  ('when', WhenDirective),
+                  ('otherwise', OtherwiseDirective),
+                  ('for', ForDirective),
+                  ('if', IfDirective),
+                  ('choose', ChooseDirective),
+                  ('with', WithDirective),
+                  ('replace', ReplaceDirective),
+                  ('content', ContentDirective),
+                  ('attrs', AttrsDirective),
+                  ('strip', StripDirective)]
+    serializer = 'xml'
+    _number_conv = Markup
+
+    def __init__(self, source, filepath=None, filename=None, loader=None,
+                 encoding=None, lookup='strict', allow_exec=True):
+        Template.__init__(self, source, filepath=filepath, filename=filename,
+                          loader=loader, encoding=encoding, lookup=lookup,
+                          allow_exec=allow_exec)
+        self.add_directives(self.DIRECTIVE_NAMESPACE, self)
+
+    def _init_filters(self):
+        Template._init_filters(self)
+        # Make sure the include filter comes after the match filter
+        self.filters.remove(self._include)
+        self.filters += [self._match, self._include]
+
+    def _parse(self, source, encoding):
+        if not isinstance(source, Stream):
+            source = XMLParser(source, filename=self.filename,
+                               encoding=encoding)
+        stream = []
+
+        for kind, data, pos in source:
+
+            if kind is TEXT:
+                for kind, data, pos in interpolate(data, self.filepath, pos[1],
+                                                   pos[2], lookup=self.lookup):
+                    stream.append((kind, data, pos))
+
+            elif kind is PI and data[0] == 'python':
+                if not self.allow_exec:
+                    raise TemplateSyntaxError('Python code blocks not allowed',
+                                              self.filepath, *pos[1:])
+                try:
+                    suite = Suite(data[1], self.filepath, pos[1],
+                                  lookup=self.lookup)
+                except SyntaxError, err:
+                    raise TemplateSyntaxError(err, self.filepath,
+                                              pos[1] + (err.lineno or 1) - 1,
+                                              pos[2] + (err.offset or 0))
+                stream.append((EXEC, suite, pos))
+
+            elif kind is COMMENT:
+                if not data.lstrip().startswith('!'):
+                    stream.append((kind, data, pos))
+
+            else:
+                stream.append((kind, data, pos))
+
+        return stream
+
+    def _extract_directives(self, stream, namespace, factory):
+        depth = 0
+        dirmap = {} # temporary mapping of directives to elements
+        new_stream = []
+        ns_prefix = {} # namespace prefixes in use
+
+        for kind, data, pos in stream:
+
+            if kind is START:
+                tag, attrs = data
+                directives = []
+                strip = False
+
+                if tag.namespace == namespace:
+                    cls = factory.get_directive(tag.localname)
+                    if cls is None:
+                        raise BadDirectiveError(tag.localname,
+                                                self.filepath, pos[1])
+                    args = dict([(name.localname, value) for name, value
+                                 in attrs if not name.namespace])
+                    directives.append((factory.get_directive_index(cls), cls,
+                                       args, ns_prefix.copy(), pos))
+                    strip = True
+
+                new_attrs = []
+                for name, value in attrs:
+                    if name.namespace == namespace:
+                        cls = factory.get_directive(name.localname)
+                        if cls is None:
+                            raise BadDirectiveError(name.localname,
+                                                    self.filepath, pos[1])
+                        if type(value) is list and len(value) == 1:
+                            value = value[0][1]
+                        directives.append((factory.get_directive_index(cls),
+                                           cls, value, ns_prefix.copy(), pos))
+                    else:
+                        new_attrs.append((name, value))
+                new_attrs = Attrs(new_attrs)
+
+                if directives:
+                    directives.sort()
+                    dirmap[(depth, tag)] = (directives, len(new_stream),
+                                            strip)
+
+                new_stream.append((kind, (tag, new_attrs), pos))
+                depth += 1
+
+            elif kind is END:
+                depth -= 1
+                new_stream.append((kind, data, pos))
+
+                # If there have have directive attributes with the
+                # corresponding start tag, move the events inbetween into
+                # a "subprogram"
+                if (depth, data) in dirmap:
+                    directives, offset, strip = dirmap.pop((depth, data))
+                    substream = new_stream[offset:]
+                    if strip:
+                        substream = substream[1:-1]
+                    new_stream[offset:] = [
+                        (SUB, (directives, substream), pos)
+                    ]
+
+            elif kind is SUB:
+                directives, substream = data
+                substream = self._extract_directives(substream, namespace,
+                                                     factory)
+
+                if len(substream) == 1 and substream[0][0] is SUB:
+                    added_directives, substream = substream[0][1]
+                    directives += added_directives
+
+                new_stream.append((kind, (directives, substream), pos))
+
+            elif kind is START_NS:
+                # Strip out the namespace declaration for template
+                # directives
+                prefix, uri = data
+                ns_prefix[prefix] = uri
+                if uri != namespace:
+                    new_stream.append((kind, data, pos))
+
+            elif kind is END_NS:
+                uri = ns_prefix.pop(data, None)
+                if uri and uri != namespace:
+                    new_stream.append((kind, data, pos))
+
+            else:
+                new_stream.append((kind, data, pos))
+
+        return new_stream
+
+    def _extract_includes(self, stream):
+        streams = [[]] # stacked lists of events of the "compiled" template
+        prefixes = {}
+        fallbacks = []
+        includes = []
+        xinclude_ns = Namespace(self.XINCLUDE_NAMESPACE)
+
+        for kind, data, pos in stream:
+            stream = streams[-1]
+
+            if kind is START:
+                # Record any directive attributes in start tags
+                tag, attrs = data
+                if tag in xinclude_ns:
+                    if tag.localname == 'include':
+                        include_href = attrs.get('href')
+                        if not include_href:
+                            raise TemplateSyntaxError('Include misses required '
+                                                      'attribute "href"',
+                                                      self.filepath, *pos[1:])
+                        includes.append((include_href, attrs.get('parse')))
+                        streams.append([])
+                    elif tag.localname == 'fallback':
+                        streams.append([])
+                        fallbacks.append(streams[-1])
+                else:
+                    stream.append((kind, (tag, attrs), pos))
+
+            elif kind is END:
+                if fallbacks and data == xinclude_ns['fallback']:
+                    assert streams.pop() is fallbacks[-1]
+                elif data == xinclude_ns['include']:
+                    fallback = None
+                    if len(fallbacks) == len(includes):
+                        fallback = fallbacks.pop()
+                    streams.pop() # discard anything between the include tags
+                                  # and the fallback element
+                    stream = streams[-1]
+                    href, parse = includes.pop()
+                    try:
+                        cls = {
+                            'xml': MarkupTemplate,
+                            'text': NewTextTemplate
+                        }.get(parse) or self.__class__
+                    except KeyError:
+                        raise TemplateSyntaxError('Invalid value for "parse" '
+                                                  'attribute of include',
+                                                  self.filepath, *pos[1:])
+                    stream.append((INCLUDE, (href, cls, fallback), pos))
+                else:
+                    stream.append((kind, data, pos))
+
+            elif kind is START_NS and data[1] == xinclude_ns:
+                # Strip out the XInclude namespace
+                prefixes[data[0]] = data[1]
+
+            elif kind is END_NS and data in prefixes:
+                prefixes.pop(data)
+
+            else:
+                stream.append((kind, data, pos))
+
+        assert len(streams) == 1
+        return streams[0]
+
+    def _interpolate_attrs(self, stream):
+        for kind, data, pos in stream:
+
+            if kind is START:
+                # Record any directive attributes in start tags
+                tag, attrs = data
+                new_attrs = []
+                for name, value in attrs:
+                    if value:
+                        value = list(interpolate(value, self.filepath, pos[1],
+                                                 pos[2], lookup=self.lookup))
+                        if len(value) == 1 and value[0][0] is TEXT:
+                            value = value[0][1]
+                    new_attrs.append((name, value))
+                data = tag, Attrs(new_attrs)
+
+            yield kind, data, pos
+
+    def _prepare(self, stream):
+        return Template._prepare(self,
+            self._extract_includes(self._interpolate_attrs(stream))
+        )
+
+    def add_directives(self, namespace, factory):
+        """Register a custom `DirectiveFactory` for a given namespace.
+        
+        :param namespace: the namespace URI
+        :type namespace: `basestring`
+        :param factory: the directive factory to register
+        :type factory: `DirectiveFactory`
+        :since: version 0.6
+        """
+        assert not self._prepared, 'Too late for adding directives, ' \
+                                   'template already prepared'
+        self._stream = self._extract_directives(self._stream, namespace,
+                                                factory)
+
+    def _match(self, stream, ctxt, start=0, end=None, **vars):
+        """Internal stream filter that applies any defined match templates
+        to the stream.
+        """
+        match_templates = ctxt._match_templates
+
+        tail = []
+        def _strip(stream, append=tail.append):
+            depth = 1
+            next = stream.next
+            while 1:
+                event = next()
+                if event[0] is START:
+                    depth += 1
+                elif event[0] is END:
+                    depth -= 1
+                if depth > 0:
+                    yield event
+                else:
+                    append(event)
+                    break
+
+        for event in stream:
+
+            # We (currently) only care about start and end events for matching
+            # We might care about namespace events in the future, though
+            if not match_templates or (event[0] is not START and
+                                       event[0] is not END):
+                yield event
+                continue
+
+            for idx, (test, path, template, hints, namespaces, directives) \
+                    in enumerate(match_templates):
+                if idx < start or end is not None and idx >= end:
+                    continue
+
+                if test(event, namespaces, ctxt) is True:
+                    if 'match_once' in hints:
+                        del match_templates[idx]
+                        idx -= 1
+
+                    # Let the remaining match templates know about the event so
+                    # they get a chance to update their internal state
+                    for test in [mt[0] for mt in match_templates[idx + 1:]]:
+                        test(event, namespaces, ctxt, updateonly=True)
+
+                    # Consume and store all events until an end event
+                    # corresponding to this start event is encountered
+                    pre_end = idx + 1
+                    if 'match_once' not in hints and 'not_recursive' in hints:
+                        pre_end -= 1
+                    inner = _strip(stream)
+                    if pre_end > 0:
+                        inner = self._match(inner, ctxt, start=start,
+                                            end=pre_end, **vars)
+                    content = self._include(chain([event], inner, tail), ctxt)
+                    if 'not_buffered' not in hints:
+                        content = list(content)
+                    content = Stream(content)
+
+                    # Make the select() function available in the body of the
+                    # match template
+                    selected = [False]
+                    def select(path):
+                        selected[0] = True
+                        return content.select(path, namespaces, ctxt)
+                    vars = dict(select=select)
+
+                    # Recursively process the output
+                    template = _apply_directives(template, directives, ctxt,
+                                                 vars)
+                    for event in self._match(self._flatten(template, ctxt,
+                                                           **vars),
+                                             ctxt, start=idx + 1, **vars):
+                        yield event
+
+                    # If the match template did not actually call select to
+                    # consume the matched stream, the original events need to
+                    # be consumed here or they'll get appended to the output
+                    if not selected[0]:
+                        for event in content:
+                            pass
+
+                    # Let the remaining match templates know about the last
+                    # event in the matched content, so they can update their
+                    # internal state accordingly
+                    for test in [mt[0] for mt in match_templates[idx + 1:]]:
+                        test(tail[0], namespaces, ctxt, updateonly=True)
+
+                    break
+
+            else: # no matches
+                yield event
diff --git a/genshi/template/plugin.py b/genshi/template/plugin.py
new file mode 100644
index 0000000..70d56af
--- /dev/null
+++ b/genshi/template/plugin.py
@@ -0,0 +1,176 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006-2009 Edgewall Software
+# Copyright (C) 2006 Matthew Good
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""Basic support for the template engine plugin API used by TurboGears and
+CherryPy/Buffet.
+"""
+
+from genshi.input import ET, HTML, XML
+from genshi.output import DocType
+from genshi.template.base import Template
+from genshi.template.loader import TemplateLoader
+from genshi.template.markup import MarkupTemplate
+from genshi.template.text import TextTemplate, NewTextTemplate
+
+__all__ = ['ConfigurationError', 'AbstractTemplateEnginePlugin',
+           'MarkupTemplateEnginePlugin', 'TextTemplateEnginePlugin']
+__docformat__ = 'restructuredtext en'
+
+
+class ConfigurationError(ValueError):
+    """Exception raised when invalid plugin options are encountered."""
+
+
+class AbstractTemplateEnginePlugin(object):
+    """Implementation of the plugin API."""
+
+    template_class = None
+    extension = None
+
+    def __init__(self, extra_vars_func=None, options=None):
+        self.get_extra_vars = extra_vars_func
+        if options is None:
+            options = {}
+        self.options = options
+
+        self.default_encoding = options.get('genshi.default_encoding', 'utf-8')
+        auto_reload = options.get('genshi.auto_reload', '1')
+        if isinstance(auto_reload, basestring):
+            auto_reload = auto_reload.lower() in ('1', 'on', 'yes', 'true')
+        search_path = [p for p in
+                       options.get('genshi.search_path', '').split(':') if p]
+        self.use_package_naming = not search_path
+        try:
+            max_cache_size = int(options.get('genshi.max_cache_size', 25))
+        except ValueError:
+            raise ConfigurationError('Invalid value for max_cache_size: "%s"' %
+                                     options.get('genshi.max_cache_size'))
+
+        loader_callback = options.get('genshi.loader_callback', None)
+        if loader_callback and not hasattr(loader_callback, '__call__'):
+            raise ConfigurationError('loader callback must be a function')
+
+        lookup_errors = options.get('genshi.lookup_errors', 'strict')
+        if lookup_errors not in ('lenient', 'strict'):
+            raise ConfigurationError('Unknown lookup errors mode "%s"' %
+                                     lookup_errors)
+
+        try:
+            allow_exec = bool(options.get('genshi.allow_exec', True))
+        except ValueError:
+            raise ConfigurationError('Invalid value for allow_exec "%s"' %
+                                     options.get('genshi.allow_exec'))
+
+        self.loader = TemplateLoader([p for p in search_path if p],
+                                     auto_reload=auto_reload,
+                                     max_cache_size=max_cache_size,
+                                     default_class=self.template_class,
+                                     variable_lookup=lookup_errors,
+                                     allow_exec=allow_exec,
+                                     callback=loader_callback)
+
+    def load_template(self, templatename, template_string=None):
+        """Find a template specified in python 'dot' notation, or load one from
+        a string.
+        """
+        if template_string is not None:
+            return self.template_class(template_string)
+
+        if self.use_package_naming:
+            divider = templatename.rfind('.')
+            if divider >= 0:
+                from pkg_resources import resource_filename
+                package = templatename[:divider]
+                basename = templatename[divider + 1:] + self.extension
+                templatename = resource_filename(package, basename)
+
+        return self.loader.load(templatename)
+
+    def _get_render_options(self, format=None, fragment=False):
+        if format is None:
+            format = self.default_format
+        kwargs = {'method': format}
+        if self.default_encoding:
+            kwargs['encoding'] = self.default_encoding
+        return kwargs
+
+    def render(self, info, format=None, fragment=False, template=None):
+        """Render the template to a string using the provided info."""
+        kwargs = self._get_render_options(format=format, fragment=fragment)
+        return self.transform(info, template).render(**kwargs)
+
+    def transform(self, info, template):
+        """Render the output to an event stream."""
+        if not isinstance(template, Template):
+            template = self.load_template(template)
+        return template.generate(**info)
+
+
+class MarkupTemplateEnginePlugin(AbstractTemplateEnginePlugin):
+    """Implementation of the plugin API for markup templates."""
+
+    template_class = MarkupTemplate
+    extension = '.html'
+
+    def __init__(self, extra_vars_func=None, options=None):
+        AbstractTemplateEnginePlugin.__init__(self, extra_vars_func, options)
+
+        default_doctype = self.options.get('genshi.default_doctype')
+        if default_doctype:
+            doctype = DocType.get(default_doctype)
+            if doctype is None:
+                raise ConfigurationError('Unknown doctype %r' % default_doctype)
+            self.default_doctype = doctype
+        else:
+            self.default_doctype = None
+
+        format = self.options.get('genshi.default_format', 'html').lower()
+        if format not in ('html', 'xhtml', 'xml', 'text'):
+            raise ConfigurationError('Unknown output format %r' % format)
+        self.default_format = format
+
+    def _get_render_options(self, format=None, fragment=False):
+        kwargs = super(MarkupTemplateEnginePlugin,
+                       self)._get_render_options(format, fragment)
+        if self.default_doctype and not fragment:
+            kwargs['doctype'] = self.default_doctype
+        return kwargs
+
+    def transform(self, info, template):
+        """Render the output to an event stream."""
+        data = {'ET': ET, 'HTML': HTML, 'XML': XML}
+        if self.get_extra_vars:
+            data.update(self.get_extra_vars())
+        data.update(info)
+        return super(MarkupTemplateEnginePlugin, self).transform(data, template)
+
+
+class TextTemplateEnginePlugin(AbstractTemplateEnginePlugin):
+    """Implementation of the plugin API for text templates."""
+
+    template_class = TextTemplate
+    extension = '.txt'
+    default_format = 'text'
+
+    def __init__(self, extra_vars_func=None, options=None):
+        if options is None:
+            options = {}
+
+        new_syntax = options.get('genshi.new_text_syntax')
+        if isinstance(new_syntax, basestring):
+            new_syntax = new_syntax.lower() in ('1', 'on', 'yes', 'true')
+        if new_syntax:
+            self.template_class = NewTextTemplate
+
+        AbstractTemplateEnginePlugin.__init__(self, extra_vars_func, options)
diff --git a/genshi/template/text.py b/genshi/template/text.py
new file mode 100644
index 0000000..746226c
--- /dev/null
+++ b/genshi/template/text.py
@@ -0,0 +1,333 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006-2009 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""Plain text templating engine.
+
+This module implements two template language syntaxes, at least for a certain
+transitional period. `OldTextTemplate` (aliased to just `TextTemplate`) defines
+a syntax that was inspired by Cheetah/Velocity. `NewTextTemplate` on the other
+hand is inspired by the syntax of the Django template language, which has more
+explicit delimiting of directives, and is more flexible with regards to
+white space and line breaks.
+
+In a future release, `OldTextTemplate` will be phased out in favor of
+`NewTextTemplate`, as the names imply. Therefore the new syntax is strongly
+recommended for new projects, and existing projects may want to migrate to the
+new syntax to remain compatible with future Genshi releases.
+"""
+
+import re
+
+from genshi.core import TEXT
+from genshi.template.base import BadDirectiveError, Template, \
+                                 TemplateSyntaxError, EXEC, INCLUDE, SUB
+from genshi.template.eval import Suite
+from genshi.template.directives import *
+from genshi.template.directives import Directive
+from genshi.template.interpolation import interpolate
+
+__all__ = ['NewTextTemplate', 'OldTextTemplate', 'TextTemplate']
+__docformat__ = 'restructuredtext en'
+
+
+class NewTextTemplate(Template):
+    r"""Implementation of a simple text-based template engine. This class will
+    replace `OldTextTemplate` in a future release.
+    
+    It uses a more explicit delimiting style for directives: instead of the old
+    style which required putting directives on separate lines that were prefixed
+    with a ``#`` sign, directives and commenbtsr are enclosed in delimiter pairs
+    (by default ``{% ... %}`` and ``{# ... #}``, respectively).
+    
+    Variable substitution uses the same interpolation syntax as for markup
+    languages: simple references are prefixed with a dollar sign, more complex
+    expression enclosed in curly braces.
+    
+    >>> tmpl = NewTextTemplate('''Dear $name,
+    ... 
+    ... {# This is a comment #}
+    ... We have the following items for you:
+    ... {% for item in items %}
+    ...  * ${'Item %d' % item}
+    ... {% end %}
+    ... ''')
+    >>> print(tmpl.generate(name='Joe', items=[1, 2, 3]).render(encoding=None))
+    Dear Joe,
+    <BLANKLINE>
+    <BLANKLINE>
+    We have the following items for you:
+    <BLANKLINE>
+     * Item 1
+    <BLANKLINE>
+     * Item 2
+    <BLANKLINE>
+     * Item 3
+    <BLANKLINE>
+    <BLANKLINE>
+    
+    By default, no spaces or line breaks are removed. If a line break should
+    not be included in the output, prefix it with a backslash:
+    
+    >>> tmpl = NewTextTemplate('''Dear $name,
+    ... 
+    ... {# This is a comment #}\
+    ... We have the following items for you:
+    ... {% for item in items %}\
+    ...  * $item
+    ... {% end %}\
+    ... ''')
+    >>> print(tmpl.generate(name='Joe', items=[1, 2, 3]).render(encoding=None))
+    Dear Joe,
+    <BLANKLINE>
+    We have the following items for you:
+     * 1
+     * 2
+     * 3
+    <BLANKLINE>
+    
+    Backslashes are also used to escape the start delimiter of directives and
+    comments:
+
+    >>> tmpl = NewTextTemplate('''Dear $name,
+    ... 
+    ... \{# This is a comment #}
+    ... We have the following items for you:
+    ... {% for item in items %}\
+    ...  * $item
+    ... {% end %}\
+    ... ''')
+    >>> print(tmpl.generate(name='Joe', items=[1, 2, 3]).render(encoding=None))
+    Dear Joe,
+    <BLANKLINE>
+    {# This is a comment #}
+    We have the following items for you:
+     * 1
+     * 2
+     * 3
+    <BLANKLINE>
+    
+    :since: version 0.5
+    """
+    directives = [('def', DefDirective),
+                  ('when', WhenDirective),
+                  ('otherwise', OtherwiseDirective),
+                  ('for', ForDirective),
+                  ('if', IfDirective),
+                  ('choose', ChooseDirective),
+                  ('with', WithDirective)]
+    serializer = 'text'
+
+    _DIRECTIVE_RE = r'((?<!\\)%s\s*(\w+)\s*(.*?)\s*%s|(?<!\\)%s.*?%s)'
+    _ESCAPE_RE = r'\\\n|\\(\\)|\\(%s)|\\(%s)'
+
+    def __init__(self, source, filepath=None, filename=None, loader=None,
+                 encoding=None, lookup='strict', allow_exec=False,
+                 delims=('{%', '%}', '{#', '#}')):
+        self.delimiters = delims
+        Template.__init__(self, source, filepath=filepath, filename=filename,
+                          loader=loader, encoding=encoding, lookup=lookup)
+
+    def _get_delims(self):
+        return self._delims
+    def _set_delims(self, delims):
+        if len(delims) != 4:
+            raise ValueError('delimiers tuple must have exactly four elements')
+        self._delims = delims
+        self._directive_re = re.compile(self._DIRECTIVE_RE % tuple(
+            [re.escape(d) for d in delims]
+        ), re.DOTALL)
+        self._escape_re = re.compile(self._ESCAPE_RE % tuple(
+            [re.escape(d) for d in delims[::2]]
+        ))
+    delimiters = property(_get_delims, _set_delims, """\
+    The delimiters for directives and comments. This should be a four item tuple
+    of the form ``(directive_start, directive_end, comment_start,
+    comment_end)``, where each item is a string.
+    """)
+
+    def _parse(self, source, encoding):
+        """Parse the template from text input."""
+        stream = [] # list of events of the "compiled" template
+        dirmap = {} # temporary mapping of directives to elements
+        depth = 0
+
+        source = source.read()
+        if isinstance(source, str):
+            source = source.decode(encoding or 'utf-8', 'replace')
+        offset = 0
+        lineno = 1
+
+        _escape_sub = self._escape_re.sub
+        def _escape_repl(mo):
+            groups = [g for g in mo.groups() if g]
+            if not groups:
+                return ''
+            return groups[0]
+
+        for idx, mo in enumerate(self._directive_re.finditer(source)):
+            start, end = mo.span(1)
+            if start > offset:
+                text = _escape_sub(_escape_repl, source[offset:start])
+                for kind, data, pos in interpolate(text, self.filepath, lineno,
+                                                   lookup=self.lookup):
+                    stream.append((kind, data, pos))
+                lineno += len(text.splitlines())
+
+            lineno += len(source[start:end].splitlines())
+            command, value = mo.group(2, 3)
+
+            if command == 'include':
+                pos = (self.filename, lineno, 0)
+                value = list(interpolate(value, self.filepath, lineno, 0,
+                                         lookup=self.lookup))
+                if len(value) == 1 and value[0][0] is TEXT:
+                    value = value[0][1]
+                stream.append((INCLUDE, (value, None, []), pos))
+
+            elif command == 'python':
+                if not self.allow_exec:
+                    raise TemplateSyntaxError('Python code blocks not allowed',
+                                              self.filepath, lineno)
+                try:
+                    suite = Suite(value, self.filepath, lineno,
+                                  lookup=self.lookup)
+                except SyntaxError, err:
+                    raise TemplateSyntaxError(err, self.filepath,
+                                              lineno + (err.lineno or 1) - 1)
+                pos = (self.filename, lineno, 0)
+                stream.append((EXEC, suite, pos))
+
+            elif command == 'end':
+                depth -= 1
+                if depth in dirmap:
+                    directive, start_offset = dirmap.pop(depth)
+                    substream = stream[start_offset:]
+                    stream[start_offset:] = [(SUB, ([directive], substream),
+                                              (self.filepath, lineno, 0))]
+
+            elif command:
+                cls = self.get_directive(command)
+                if cls is None:
+                    raise BadDirectiveError(command)
+                directive = 0, cls, value, None, (self.filepath, lineno, 0)
+                dirmap[depth] = (directive, len(stream))
+                depth += 1
+
+            offset = end
+
+        if offset < len(source):
+            text = _escape_sub(_escape_repl, source[offset:])
+            for kind, data, pos in interpolate(text, self.filepath, lineno,
+                                               lookup=self.lookup):
+                stream.append((kind, data, pos))
+
+        return stream
+
+
+class OldTextTemplate(Template):
+    """Legacy implementation of the old syntax text-based templates. This class
+    is provided in a transition phase for backwards compatibility. New code
+    should use the `NewTextTemplate` class and the improved syntax it provides.
+    
+    >>> tmpl = OldTextTemplate('''Dear $name,
+    ... 
+    ... We have the following items for you:
+    ... #for item in items
+    ...  * $item
+    ... #end
+    ... 
+    ... All the best,
+    ... Foobar''')
+    >>> print(tmpl.generate(name='Joe', items=[1, 2, 3]).render(encoding=None))
+    Dear Joe,
+    <BLANKLINE>
+    We have the following items for you:
+     * 1
+     * 2
+     * 3
+    <BLANKLINE>
+    All the best,
+    Foobar
+    """
+    directives = [('def', DefDirective),
+                  ('when', WhenDirective),
+                  ('otherwise', OtherwiseDirective),
+                  ('for', ForDirective),
+                  ('if', IfDirective),
+                  ('choose', ChooseDirective),
+                  ('with', WithDirective)]
+    serializer = 'text'
+
+    _DIRECTIVE_RE = re.compile(r'(?:^[ \t]*(?<!\\)#(end).*\n?)|'
+                               r'(?:^[ \t]*(?<!\\)#((?:\w+|#).*)\n?)',
+                               re.MULTILINE)
+
+    def _parse(self, source, encoding):
+        """Parse the template from text input."""
+        stream = [] # list of events of the "compiled" template
+        dirmap = {} # temporary mapping of directives to elements
+        depth = 0
+
+        source = source.read()
+        if isinstance(source, str):
+            source = source.decode(encoding or 'utf-8', 'replace')
+        offset = 0
+        lineno = 1
+
+        for idx, mo in enumerate(self._DIRECTIVE_RE.finditer(source)):
+            start, end = mo.span()
+            if start > offset:
+                text = source[offset:start]
+                for kind, data, pos in interpolate(text, self.filepath, lineno,
+                                                   lookup=self.lookup):
+                    stream.append((kind, data, pos))
+                lineno += len(text.splitlines())
+
+            text = source[start:end].lstrip()[1:]
+            lineno += len(text.splitlines())
+            directive = text.split(None, 1)
+            if len(directive) > 1:
+                command, value = directive
+            else:
+                command, value = directive[0], None
+
+            if command == 'end':
+                depth -= 1
+                if depth in dirmap:
+                    directive, start_offset = dirmap.pop(depth)
+                    substream = stream[start_offset:]
+                    stream[start_offset:] = [(SUB, ([directive], substream),
+                                              (self.filepath, lineno, 0))]
+            elif command == 'include':
+                pos = (self.filename, lineno, 0)
+                stream.append((INCLUDE, (value.strip(), None, []), pos))
+            elif command != '#':
+                cls = self.get_directive(command)
+                if cls is None:
+                    raise BadDirectiveError(command)
+                directive = 0, cls, value, None, (self.filepath, lineno, 0)
+                dirmap[depth] = (directive, len(stream))
+                depth += 1
+
+            offset = end
+
+        if offset < len(source):
+            text = source[offset:].replace('\\#', '#')
+            for kind, data, pos in interpolate(text, self.filepath, lineno,
+                                               lookup=self.lookup):
+                stream.append((kind, data, pos))
+
+        return stream
+
+
+TextTemplate = OldTextTemplate
diff --git a/genshi/util.py b/genshi/util.py
new file mode 100644
index 0000000..b964a01
--- /dev/null
+++ b/genshi/util.py
@@ -0,0 +1,274 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006-2009 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""Various utility classes and functions."""
+
+import htmlentitydefs as entities
+import re
+
+__docformat__ = 'restructuredtext en'
+
+
+class LRUCache(dict):
+    """A dictionary-like object that stores only a certain number of items, and
+    discards its least recently used item when full.
+    
+    >>> cache = LRUCache(3)
+    >>> cache['A'] = 0
+    >>> cache['B'] = 1
+    >>> cache['C'] = 2
+    >>> len(cache)
+    3
+    
+    >>> cache['A']
+    0
+    
+    Adding new items to the cache does not increase its size. Instead, the least
+    recently used item is dropped:
+    
+    >>> cache['D'] = 3
+    >>> len(cache)
+    3
+    >>> 'B' in cache
+    False
+    
+    Iterating over the cache returns the keys, starting with the most recently
+    used:
+    
+    >>> for key in cache:
+    ...     print(key)
+    D
+    A
+    C
+
+    This code is based on the LRUCache class from ``myghtyutils.util``, written
+    by Mike Bayer and released under the MIT license. See:
+
+      http://svn.myghty.org/myghtyutils/trunk/lib/myghtyutils/util.py
+    """
+
+    class _Item(object):
+        def __init__(self, key, value):
+            self.prv = self.nxt = None
+            self.key = key
+            self.value = value
+        def __repr__(self):
+            return repr(self.value)
+
+    def __init__(self, capacity):
+        self._dict = dict()
+        self.capacity = capacity
+        self.head = None
+        self.tail = None
+
+    def __contains__(self, key):
+        return key in self._dict
+
+    def __iter__(self):
+        cur = self.head
+        while cur:
+            yield cur.key
+            cur = cur.nxt
+
+    def __len__(self):
+        return len(self._dict)
+
+    def __getitem__(self, key):
+        item = self._dict[key]
+        self._update_item(item)
+        return item.value
+
+    def __setitem__(self, key, value):
+        item = self._dict.get(key)
+        if item is None:
+            item = self._Item(key, value)
+            self._dict[key] = item
+            self._insert_item(item)
+        else:
+            item.value = value
+            self._update_item(item)
+            self._manage_size()
+
+    def __repr__(self):
+        return repr(self._dict)
+
+    def _insert_item(self, item):
+        item.prv = None
+        item.nxt = self.head
+        if self.head is not None:
+            self.head.prv = item
+        else:
+            self.tail = item
+        self.head = item
+        self._manage_size()
+
+    def _manage_size(self):
+        while len(self._dict) > self.capacity:
+            olditem = self._dict[self.tail.key]
+            del self._dict[self.tail.key]
+            if self.tail != self.head:
+                self.tail = self.tail.prv
+                self.tail.nxt = None
+            else:
+                self.head = self.tail = None
+
+    def _update_item(self, item):
+        if self.head == item:
+            return
+
+        prv = item.prv
+        prv.nxt = item.nxt
+        if item.nxt is not None:
+            item.nxt.prv = prv
+        else:
+            self.tail = prv
+
+        item.prv = None
+        item.nxt = self.head
+        self.head.prv = self.head = item
+
+
+def flatten(items):
+    """Flattens a potentially nested sequence into a flat list.
+    
+    :param items: the sequence to flatten
+    
+    >>> flatten((1, 2))
+    [1, 2]
+    >>> flatten([1, (2, 3), 4])
+    [1, 2, 3, 4]
+    >>> flatten([1, (2, [3, 4]), 5])
+    [1, 2, 3, 4, 5]
+    """
+    retval = []
+    for item in items:
+        if isinstance(item, (frozenset, list, set, tuple)):
+            retval += flatten(item)
+        else:
+            retval.append(item)
+    return retval
+
+
+def plaintext(text, keeplinebreaks=True):
+    """Return the text with all entities and tags removed.
+    
+    >>> plaintext('<b>1 &lt; 2</b>')
+    u'1 < 2'
+    
+    The `keeplinebreaks` parameter can be set to ``False`` to replace any line
+    breaks by simple spaces:
+    
+    >>> plaintext('''<b>1
+    ... &lt;
+    ... 2</b>''', keeplinebreaks=False)
+    u'1 < 2'
+    
+    :param text: the text to convert to plain text
+    :param keeplinebreaks: whether line breaks in the text should be kept intact
+    :return: the text with tags and entities removed
+    """
+    text = stripentities(striptags(text))
+    if not keeplinebreaks:
+        text = text.replace('\n', ' ')
+    return text
+
+
+_STRIPENTITIES_RE = re.compile(r'&(?:#((?:\d+)|(?:[xX][0-9a-fA-F]+));?|(\w+);)')
+def stripentities(text, keepxmlentities=False):
+    """Return a copy of the given text with any character or numeric entities
+    replaced by the equivalent UTF-8 characters.
+    
+    >>> stripentities('1 &lt; 2')
+    u'1 < 2'
+    >>> stripentities('more &hellip;')
+    u'more \u2026'
+    >>> stripentities('&#8230;')
+    u'\u2026'
+    >>> stripentities('&#x2026;')
+    u'\u2026'
+    
+    If the `keepxmlentities` parameter is provided and is a truth value, the
+    core XML entities (&amp;, &apos;, &gt;, &lt; and &quot;) are left intact.
+    
+    >>> stripentities('1 &lt; 2 &hellip;', keepxmlentities=True)
+    u'1 &lt; 2 \u2026'
+    """
+    def _replace_entity(match):
+        if match.group(1): # numeric entity
+            ref = match.group(1)
+            if ref.startswith('x'):
+                ref = int(ref[1:], 16)
+            else:
+                ref = int(ref, 10)
+            return unichr(ref)
+        else: # character entity
+            ref = match.group(2)
+            if keepxmlentities and ref in ('amp', 'apos', 'gt', 'lt', 'quot'):
+                return '&%s;' % ref
+            try:
+                return unichr(entities.name2codepoint[ref])
+            except KeyError:
+                if keepxmlentities:
+                    return '&amp;%s;' % ref
+                else:
+                    return ref
+    return _STRIPENTITIES_RE.sub(_replace_entity, text)
+
+
+_STRIPTAGS_RE = re.compile(r'(<!--.*?-->|<[^>]*>)')
+def striptags(text):
+    """Return a copy of the text with any XML/HTML tags removed.
+    
+    >>> striptags('<span>Foo</span> bar')
+    'Foo bar'
+    >>> striptags('<span class="bar">Foo</span>')
+    'Foo'
+    >>> striptags('Foo<br />')
+    'Foo'
+    
+    HTML/XML comments are stripped, too:
+    
+    >>> striptags('<!-- <blub>hehe</blah> -->test')
+    'test'
+    
+    :param text: the string to remove tags from
+    :return: the text with tags removed
+    """
+    return _STRIPTAGS_RE.sub('', text)
+
+
+def stringrepr(string):
+    ascii = string.encode('ascii', 'backslashreplace')
+    quoted = "'" +  ascii.replace("'", "\\'") + "'"
+    if len(ascii) > len(string):
+        return 'u' + quoted
+    return quoted
+
+
+# Compatibility fallback implementations for older Python versions
+
+try:
+    all = all
+    any = any
+except NameError:
+    def any(S):
+        for x in S:
+            if x:
+               return True
+        return False
+
+    def all(S):
+        for x in S:
+            if not x:
+               return False
+        return True