4 files changed, 3021 insertions, 0 deletions
diff --git a/genshi/filters/__init__.py b/genshi/filters/__init__.py
new file mode 100644
index 0000000..efc2565
--- /dev/null
+++ b/genshi/filters/__init__.py
@@ -0,0 +1,20 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2007-2009 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""Implementation of a number of stream filters."""
+
+from genshi.filters.html import HTMLFormFiller, HTMLSanitizer
+from genshi.filters.i18n import Translator
+from genshi.filters.transform import Transformer
+
+__docformat__ = 'restructuredtext en'
diff --git a/genshi/filters/html.py b/genshi/filters/html.py
new file mode 100644
index 0000000..d554a54
--- /dev/null
+++ b/genshi/filters/html.py
@@ -0,0 +1,453 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006-2009 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""Implementation of a number of stream filters."""
+
+try:
+    any
+except NameError:
+    from genshi.util import any
+import re
+
+from genshi.core import Attrs, QName, stripentities
+from genshi.core import END, START, TEXT, COMMENT
+
+__all__ = ['HTMLFormFiller', 'HTMLSanitizer']
+__docformat__ = 'restructuredtext en'
+
+
+class HTMLFormFiller(object):
+    """A stream filter that can populate HTML forms from a dictionary of values.
+    
+    >>> from genshi.input import HTML
+    >>> html = HTML('''<form>
+    ...   <p><input type="text" name="foo" /></p>
+    ... </form>''')
+    >>> filler = HTMLFormFiller(data={'foo': 'bar'})
+    >>> print(html | filler)
+    <form>
+      <p><input type="text" name="foo" value="bar"/></p>
+    </form>
+    """
+    # TODO: only select the first radio button, and the first select option
+    #       (if not in a multiple-select)
+    # TODO: only apply to elements in the XHTML namespace (or no namespace)?
+
+    def __init__(self, name=None, id=None, data=None, passwords=False):
+        """Create the filter.
+        
+        :param name: The name of the form that should be populated. If this
+                     parameter is given, only forms where the ``name`` attribute
+                     value matches the parameter are processed.
+        :param id: The ID of the form that should be populated. If this
+                   parameter is given, only forms where the ``id`` attribute
+                   value matches the parameter are processed.
+        :param data: The dictionary of form values, where the keys are the names
+                     of the form fields, and the values are the values to fill
+                     in.
+        :param passwords: Whether password input fields should be populated.
+                          This is off by default for security reasons (for
+                          example, a password may end up in the browser cache)
+        :note: Changed in 0.5.2: added the `passwords` option
+        """
+        self.name = name
+        self.id = id
+        if data is None:
+            data = {}
+        self.data = data
+        self.passwords = passwords
+
+    def __call__(self, stream):
+        """Apply the filter to the given stream.
+        
+        :param stream: the markup event stream to filter
+        """
+        in_form = in_select = in_option = in_textarea = False
+        select_value = option_value = textarea_value = None
+        option_start = None
+        option_text = []
+        no_option_value = False
+
+        for kind, data, pos in stream:
+
+            if kind is START:
+                tag, attrs = data
+                tagname = tag.localname
+
+                if tagname == 'form' and (
+                        self.name and attrs.get('name') == self.name or
+                        self.id and attrs.get('id') == self.id or
+                        not (self.id or self.name)):
+                    in_form = True
+
+                elif in_form:
+                    if tagname == 'input':
+                        type = attrs.get('type', '').lower()
+                        if type in ('checkbox', 'radio'):
+                            name = attrs.get('name')
+                            if name and name in self.data:
+                                value = self.data[name]
+                                declval = attrs.get('value')
+                                checked = False
+                                if isinstance(value, (list, tuple)):
+                                    if declval:
+                                        checked = declval in [unicode(v) for v
+                                                              in value]
+                                    else:
+                                        checked = any(value)
+                                else:
+                                    if declval:
+                                        checked = declval == unicode(value)
+                                    elif type == 'checkbox':
+                                        checked = bool(value)
+                                if checked:
+                                    attrs |= [(QName('checked'), 'checked')]
+                                elif 'checked' in attrs:
+                                    attrs -= 'checked'
+                        elif type in ('', 'hidden', 'text') \
+                                or type == 'password' and self.passwords:
+                            name = attrs.get('name')
+                            if name and name in self.data:
+                                value = self.data[name]
+                                if isinstance(value, (list, tuple)):
+                                    value = value[0]
+                                if value is not None:
+                                    attrs |= [
+                                        (QName('value'), unicode(value))
+                                    ]
+                    elif tagname == 'select':
+                        name = attrs.get('name')
+                        if name in self.data:
+                            select_value = self.data[name]
+                            in_select = True
+                    elif tagname == 'textarea':
+                        name = attrs.get('name')
+                        if name in self.data:
+                            textarea_value = self.data.get(name)
+                            if isinstance(textarea_value, (list, tuple)):
+                                textarea_value = textarea_value[0]
+                            in_textarea = True
+                    elif in_select and tagname == 'option':
+                        option_start = kind, data, pos
+                        option_value = attrs.get('value')
+                        if option_value is None:
+                            no_option_value = True
+                            option_value = ''
+                        in_option = True
+                        continue
+                yield kind, (tag, attrs), pos
+
+            elif in_form and kind is TEXT:
+                if in_select and in_option:
+                    if no_option_value:
+                        option_value += data
+                    option_text.append((kind, data, pos))
+                    continue
+                elif in_textarea:
+                    continue
+                yield kind, data, pos
+
+            elif in_form and kind is END:
+                tagname = data.localname
+                if tagname == 'form':
+                    in_form = False
+                elif tagname == 'select':
+                    in_select = False
+                    select_value = None
+                elif in_select and tagname == 'option':
+                    if isinstance(select_value, (tuple, list)):
+                        selected = option_value in [unicode(v) for v
+                                                    in select_value]
+                    else:
+                        selected = option_value == unicode(select_value)
+                    okind, (tag, attrs), opos = option_start
+                    if selected:
+                        attrs |= [(QName('selected'), 'selected')]
+                    elif 'selected' in attrs:
+                        attrs -= 'selected'
+                    yield okind, (tag, attrs), opos
+                    if option_text:
+                        for event in option_text:
+                            yield event
+                    in_option = False
+                    no_option_value = False
+                    option_start = option_value = None
+                    option_text = []
+                elif tagname == 'textarea':
+                    if textarea_value:
+                        yield TEXT, unicode(textarea_value), pos
+                    in_textarea = False
+                yield kind, data, pos
+
+            else:
+                yield kind, data, pos
+
+
+class HTMLSanitizer(object):
+    """A filter that removes potentially dangerous HTML tags and attributes
+    from the stream.
+    
+    >>> from genshi import HTML
+    >>> html = HTML('<div><script>alert(document.cookie)</script></div>')
+    >>> print(html | HTMLSanitizer())
+    <div/>
+    
+    The default set of safe tags and attributes can be modified when the filter
+    is instantiated. For example, to allow inline ``style`` attributes, the
+    following instantation would work:
+    
+    >>> html = HTML('<div style="background: #000"></div>')
+    >>> sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style']))
+    >>> print(html | sanitizer)
+    <div style="background: #000"/>
+    
+    Note that even in this case, the filter *does* attempt to remove dangerous
+    constructs from style attributes:
+
+    >>> html = HTML('<div style="background: url(javascript:void); color: #000"></div>')
+    >>> print(html | sanitizer)
+    <div style="color: #000"/>
+    
+    This handles HTML entities, unicode escapes in CSS and Javascript text, as
+    well as a lot of other things. However, the style tag is still excluded by
+    default because it is very hard for such sanitizing to be completely safe,
+    especially considering how much error recovery current web browsers perform.
+    
+    It also does some basic filtering of CSS properties that may be used for
+    typical phishing attacks. For more sophisticated filtering, this class
+    provides a couple of hooks that can be overridden in sub-classes.
+    
+    :warn: Note that this special processing of CSS is currently only applied to
+           style attributes, **not** style elements.
+    """
+
+    SAFE_TAGS = frozenset(['a', 'abbr', 'acronym', 'address', 'area', 'b',
+        'big', 'blockquote', 'br', 'button', 'caption', 'center', 'cite',
+        'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt',
+        'em', 'fieldset', 'font', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
+        'hr', 'i', 'img', 'input', 'ins', 'kbd', 'label', 'legend', 'li', 'map',
+        'menu', 'ol', 'optgroup', 'option', 'p', 'pre', 'q', 's', 'samp',
+        'select', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'table',
+        'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'tt', 'u',
+        'ul', 'var'])
+
+    SAFE_ATTRS = frozenset(['abbr', 'accept', 'accept-charset', 'accesskey',
+        'action', 'align', 'alt', 'axis', 'bgcolor', 'border', 'cellpadding',
+        'cellspacing', 'char', 'charoff', 'charset', 'checked', 'cite', 'class',
+        'clear', 'cols', 'colspan', 'color', 'compact', 'coords', 'datetime',
+        'dir', 'disabled', 'enctype', 'for', 'frame', 'headers', 'height',
+        'href', 'hreflang', 'hspace', 'id', 'ismap', 'label', 'lang',
+        'longdesc', 'maxlength', 'media', 'method', 'multiple', 'name',
+        'nohref', 'noshade', 'nowrap', 'prompt', 'readonly', 'rel', 'rev',
+        'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size',
+        'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title',
+        'type', 'usemap', 'valign', 'value', 'vspace', 'width'])
+
+    SAFE_SCHEMES = frozenset(['file', 'ftp', 'http', 'https', 'mailto', None])
+
+    URI_ATTRS = frozenset(['action', 'background', 'dynsrc', 'href', 'lowsrc',
+        'src'])
+
+    def __init__(self, safe_tags=SAFE_TAGS, safe_attrs=SAFE_ATTRS,
+                 safe_schemes=SAFE_SCHEMES, uri_attrs=URI_ATTRS):
+        """Create the sanitizer.
+        
+        The exact set of allowed elements and attributes can be configured.
+        
+        :param safe_tags: a set of tag names that are considered safe
+        :param safe_attrs: a set of attribute names that are considered safe
+        :param safe_schemes: a set of URI schemes that are considered safe
+        :param uri_attrs: a set of names of attributes that contain URIs
+        """
+        self.safe_tags = safe_tags
+        "The set of tag names that are considered safe."
+        self.safe_attrs = safe_attrs
+        "The set of attribute names that are considered safe."
+        self.uri_attrs = uri_attrs
+        "The set of names of attributes that may contain URIs."
+        self.safe_schemes = safe_schemes
+        "The set of URI schemes that are considered safe."
+
+    def __call__(self, stream):
+        """Apply the filter to the given stream.
+        
+        :param stream: the markup event stream to filter
+        """
+        waiting_for = None
+
+        for kind, data, pos in stream:
+            if kind is START:
+                if waiting_for:
+                    continue
+                tag, attrs = data
+                if not self.is_safe_elem(tag, attrs):
+                    waiting_for = tag
+                    continue
+
+                new_attrs = []
+                for attr, value in attrs:
+                    value = stripentities(value)
+                    if attr not in self.safe_attrs:
+                        continue
+                    elif attr in self.uri_attrs:
+                        # Don't allow URI schemes such as "javascript:"
+                        if not self.is_safe_uri(value):
+                            continue
+                    elif attr == 'style':
+                        # Remove dangerous CSS declarations from inline styles
+                        decls = self.sanitize_css(value)
+                        if not decls:
+                            continue
+                        value = '; '.join(decls)
+                    new_attrs.append((attr, value))
+
+                yield kind, (tag, Attrs(new_attrs)), pos
+
+            elif kind is END:
+                tag = data
+                if waiting_for:
+                    if waiting_for == tag:
+                        waiting_for = None
+                else:
+                    yield kind, data, pos
+
+            elif kind is not COMMENT:
+                if not waiting_for:
+                    yield kind, data, pos
+
+    def is_safe_css(self, propname, value):
+        """Determine whether the given css property declaration is to be
+        considered safe for inclusion in the output.
+        
+        :param propname: the CSS property name
+        :param value: the value of the property
+        :return: whether the property value should be considered safe
+        :rtype: bool
+        :since: version 0.6
+        """
+        if propname == 'position':
+            return False
+        if propname.startswith('margin') and '-' in value:
+            # Negative margins can be used for phishing
+            return False
+        return True
+
+    def is_safe_elem(self, tag, attrs):
+        """Determine whether the given element should be considered safe for
+        inclusion in the output.
+        
+        :param tag: the tag name of the element
+        :type tag: QName
+        :param attrs: the element attributes
+        :type attrs: Attrs
+        :return: whether the element should be considered safe
+        :rtype: bool
+        :since: version 0.6
+        """
+        if tag not in self.safe_tags:
+            return False
+        if tag.localname == 'input':
+            input_type = attrs.get('type', '').lower()
+            if input_type == 'password':
+                return False
+        return True
+
+    def is_safe_uri(self, uri):
+        """Determine whether the given URI is to be considered safe for
+        inclusion in the output.
+        
+        The default implementation checks whether the scheme of the URI is in
+        the set of allowed URIs (`safe_schemes`).
+        
+        >>> sanitizer = HTMLSanitizer()
+        >>> sanitizer.is_safe_uri('http://example.org/')
+        True
+        >>> sanitizer.is_safe_uri('javascript:alert(document.cookie)')
+        False
+        
+        :param uri: the URI to check
+        :return: `True` if the URI can be considered safe, `False` otherwise
+        :rtype: `bool`
+        :since: version 0.4.3
+        """
+        if '#' in uri:
+            uri = uri.split('#', 1)[0] # Strip out the fragment identifier
+        if ':' not in uri:
+            return True # This is a relative URI
+        chars = [char for char in uri.split(':', 1)[0] if char.isalnum()]
+        return ''.join(chars).lower() in self.safe_schemes
+
+    def sanitize_css(self, text):
+        """Remove potentially dangerous property declarations from CSS code.
+        
+        In particular, properties using the CSS ``url()`` function with a scheme
+        that is not considered safe are removed:
+        
+        >>> sanitizer = HTMLSanitizer()
+        >>> sanitizer.sanitize_css(u'''
+        ...   background: url(javascript:alert("foo"));
+        ...   color: #000;
+        ... ''')
+        [u'color: #000']
+        
+        Also, the proprietary Internet Explorer function ``expression()`` is
+        always stripped:
+        
+        >>> sanitizer.sanitize_css(u'''
+        ...   background: #fff;
+        ...   color: #000;
+        ...   width: e/**/xpression(alert("foo"));
+        ... ''')
+        [u'background: #fff', u'color: #000']
+        
+        :param text: the CSS text; this is expected to be `unicode` and to not
+                     contain any character or numeric references
+        :return: a list of declarations that are considered safe
+        :rtype: `list`
+        :since: version 0.4.3
+        """
+        decls = []
+        text = self._strip_css_comments(self._replace_unicode_escapes(text))
+        for decl in text.split(';'):
+            decl = decl.strip()
+            if not decl:
+                continue
+            try:
+                propname, value = decl.split(':', 1)
+            except ValueError:
+                continue
+            if not self.is_safe_css(propname.strip().lower(), value.strip()):
+                continue
+            is_evil = False
+            if 'expression' in value:
+                is_evil = True
+            for match in re.finditer(r'url\s*\(([^)]+)', value):
+                if not self.is_safe_uri(match.group(1)):
+                    is_evil = True
+                    break
+            if not is_evil:
+                decls.append(decl.strip())
+        return decls
+
+    _NORMALIZE_NEWLINES = re.compile(r'\r\n').sub
+    _UNICODE_ESCAPE = re.compile(r'\\([0-9a-fA-F]{1,6})\s?').sub
+
+    def _replace_unicode_escapes(self, text):
+        def _repl(match):
+            return unichr(int(match.group(1), 16))
+        return self._UNICODE_ESCAPE(_repl, self._NORMALIZE_NEWLINES('\n', text))
+
+    _CSS_COMMENTS = re.compile(r'/\*.*?\*/').sub
+
+    def _strip_css_comments(self, text):
+        return self._CSS_COMMENTS('', text)
diff --git a/genshi/filters/i18n.py b/genshi/filters/i18n.py
new file mode 100644
index 0000000..7852875
--- /dev/null
+++ b/genshi/filters/i18n.py
@@ -0,0 +1,1238 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2007-2010 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""Directives and utilities for internationalization and localization of
+templates.
+
+:since: version 0.4
+:note: Directives support added since version 0.6
+"""
+
+try:
+    any
+except NameError:
+    from genshi.util import any
+from gettext import NullTranslations
+import os
+import re
+from types import FunctionType
+
+from genshi.core import Attrs, Namespace, QName, START, END, TEXT, \
+                        XML_NAMESPACE, _ensure, StreamEventKind
+from genshi.template.eval import _ast
+from genshi.template.base import DirectiveFactory, EXPR, SUB, _apply_directives
+from genshi.template.directives import Directive, StripDirective
+from genshi.template.markup import MarkupTemplate, EXEC
+
+__all__ = ['Translator', 'extract']
+__docformat__ = 'restructuredtext en'
+
+
+I18N_NAMESPACE = Namespace('http://genshi.edgewall.org/i18n')
+
+MSGBUF = StreamEventKind('MSGBUF')
+SUB_START = StreamEventKind('SUB_START')
+SUB_END = StreamEventKind('SUB_END')
+
+GETTEXT_FUNCTIONS = ('_', 'gettext', 'ngettext', 'dgettext', 'dngettext',
+                     'ugettext', 'ungettext')
+
+
+class I18NDirective(Directive):
+    """Simple interface for i18n directives to support messages extraction."""
+
+    def __call__(self, stream, directives, ctxt, **vars):
+        return _apply_directives(stream, directives, ctxt, vars)
+
+
+class ExtractableI18NDirective(I18NDirective):
+    """Simple interface for directives to support messages extraction."""
+
+    def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS,
+                search_text=True, comment_stack=None):
+        raise NotImplementedError
+
+
+class CommentDirective(I18NDirective):
+    """Implementation of the ``i18n:comment`` template directive which adds
+    translation comments.
+    
+    >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n">
+    ...   <p i18n:comment="As in Foo Bar">Foo</p>
+    ... </html>''')
+    >>> translator = Translator()
+    >>> translator.setup(tmpl)
+    >>> list(translator.extract(tmpl.stream))
+    [(2, None, u'Foo', [u'As in Foo Bar'])]
+    """
+    __slots__ = ['comment']
+
+    def __init__(self, value, template=None, namespaces=None, lineno=-1,
+                 offset=-1):
+        Directive.__init__(self, None, template, namespaces, lineno, offset)
+        self.comment = value
+
+
+class MsgDirective(ExtractableI18NDirective):
+    r"""Implementation of the ``i18n:msg`` directive which marks inner content
+    as translatable. Consider the following examples:
+    
+    >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n">
+    ...   <div i18n:msg="">
+    ...     <p>Foo</p>
+    ...     <p>Bar</p>
+    ...   </div>
+    ...   <p i18n:msg="">Foo <em>bar</em>!</p>
+    ... </html>''')
+    
+    >>> translator = Translator()
+    >>> translator.setup(tmpl)
+    >>> list(translator.extract(tmpl.stream))
+    [(2, None, u'[1:Foo]\n    [2:Bar]', []), (6, None, u'Foo [1:bar]!', [])]
+    >>> print(tmpl.generate().render())
+    <html>
+      <div><p>Foo</p>
+        <p>Bar</p></div>
+      <p>Foo <em>bar</em>!</p>
+    </html>
+
+    >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n">
+    ...   <div i18n:msg="fname, lname">
+    ...     <p>First Name: ${fname}</p>
+    ...     <p>Last Name: ${lname}</p>
+    ...   </div>
+    ...   <p i18n:msg="">Foo <em>bar</em>!</p>
+    ... </html>''')
+    >>> translator.setup(tmpl)
+    >>> list(translator.extract(tmpl.stream)) #doctest: +NORMALIZE_WHITESPACE
+    [(2, None, u'[1:First Name: %(fname)s]\n    [2:Last Name: %(lname)s]', []),
+    (6, None, u'Foo [1:bar]!', [])]
+
+    >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n">
+    ...   <div i18n:msg="fname, lname">
+    ...     <p>First Name: ${fname}</p>
+    ...     <p>Last Name: ${lname}</p>
+    ...   </div>
+    ...   <p i18n:msg="">Foo <em>bar</em>!</p>
+    ... </html>''')
+    >>> translator.setup(tmpl)
+    >>> print(tmpl.generate(fname='John', lname='Doe').render())
+    <html>
+      <div><p>First Name: John</p>
+        <p>Last Name: Doe</p></div>
+      <p>Foo <em>bar</em>!</p>
+    </html>
+
+    Starting and ending white-space is stripped of to make it simpler for
+    translators. Stripping it is not that important since it's on the html
+    source, the rendered output will remain the same.
+    """
+    __slots__ = ['params', 'lineno']
+
+    def __init__(self, value, template=None, namespaces=None, lineno=-1,
+                 offset=-1):
+        Directive.__init__(self, None, template, namespaces, lineno, offset)
+        self.params = [param.strip() for param in value.split(',') if param]
+        self.lineno = lineno
+
+    @classmethod
+    def attach(cls, template, stream, value, namespaces, pos):
+        if type(value) is dict:
+            value = value.get('params', '').strip()
+        return super(MsgDirective, cls).attach(template, stream, value.strip(),
+                                               namespaces, pos)
+
+    def __call__(self, stream, directives, ctxt, **vars):
+        gettext = ctxt.get('_i18n.gettext')
+        if ctxt.get('_i18n.domain'):
+            dgettext = ctxt.get('_i18n.dgettext')
+            assert hasattr(dgettext, '__call__'), \
+                'No domain gettext function passed'
+            gettext = lambda msg: dgettext(ctxt.get('_i18n.domain'), msg)
+
+        def _generate():
+            msgbuf = MessageBuffer(self)
+            previous = stream.next()
+            if previous[0] is START:
+                yield previous
+            else:
+                msgbuf.append(*previous)
+            previous = stream.next()
+            for kind, data, pos in stream:
+                msgbuf.append(*previous)
+                previous = kind, data, pos
+            if previous[0] is not END:
+                msgbuf.append(*previous)
+                previous = None
+            for event in msgbuf.translate(gettext(msgbuf.format())):
+                yield event
+            if previous:
+                yield previous
+
+        return _apply_directives(_generate(), directives, ctxt, vars)
+
+    def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS,
+                search_text=True, comment_stack=None):
+        msgbuf = MessageBuffer(self)
+        strip = False
+
+        stream = iter(stream)
+        previous = stream.next()
+        if previous[0] is START:
+            for message in translator._extract_attrs(previous,
+                                                     gettext_functions,
+                                                     search_text=search_text):
+                yield message
+            previous = stream.next()
+            strip = True
+        for event in stream:
+            if event[0] is START:
+                for message in translator._extract_attrs(event,
+                                                         gettext_functions,
+                                                         search_text=search_text):
+                    yield message
+            msgbuf.append(*previous)
+            previous = event
+        if not strip:
+            msgbuf.append(*previous)
+
+        yield self.lineno, None, msgbuf.format(), comment_stack[-1:]
+
+
+class ChooseBranchDirective(I18NDirective):
+    __slots__ = ['params']
+
+    def __call__(self, stream, directives, ctxt, **vars):
+        self.params = ctxt.get('_i18n.choose.params', [])[:]
+        msgbuf = MessageBuffer(self)
+        stream = _apply_directives(stream, directives, ctxt, vars)
+
+        previous = stream.next()
+        if previous[0] is START:
+            yield previous
+        else:
+            msgbuf.append(*previous)
+
+        try:
+            previous = stream.next()
+        except StopIteration:
+            # For example <i18n:singular> or <i18n:plural> directives
+            yield MSGBUF, (), -1 # the place holder for msgbuf output
+            ctxt['_i18n.choose.%s' % self.tagname] = msgbuf
+            return
+
+        for event in stream:
+            msgbuf.append(*previous)
+            previous = event
+        yield MSGBUF, (), -1 # the place holder for msgbuf output
+
+        if previous[0] is END:
+            yield previous # the outer end tag
+        else:
+            msgbuf.append(*previous)
+        ctxt['_i18n.choose.%s' % self.tagname] = msgbuf
+
+    def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS,
+                search_text=True, comment_stack=None, msgbuf=None):
+        stream = iter(stream)
+        previous = stream.next()
+
+        if previous[0] is START:
+            # skip the enclosing element
+            for message in translator._extract_attrs(previous,
+                                                     gettext_functions,
+                                                     search_text=search_text):
+                yield message
+            previous = stream.next()
+
+        for event in stream:
+            if previous[0] is START:
+                for message in translator._extract_attrs(previous,
+                                                         gettext_functions,
+                                                         search_text=search_text):
+                    yield message
+            msgbuf.append(*previous)
+            previous = event
+
+        if previous[0] is not END:
+            msgbuf.append(*previous)
+
+
+class SingularDirective(ChooseBranchDirective):
+    """Implementation of the ``i18n:singular`` directive to be used with the
+    ``i18n:choose`` directive."""
+
+
+class PluralDirective(ChooseBranchDirective):
+    """Implementation of the ``i18n:plural`` directive to be used with the
+    ``i18n:choose`` directive."""
+
+
+class ChooseDirective(ExtractableI18NDirective):
+    """Implementation of the ``i18n:choose`` directive which provides plural
+    internationalisation of strings.
+    
+    This directive requires at least one parameter, the one which evaluates to
+    an integer which will allow to choose the plural/singular form. If you also
+    have expressions inside the singular and plural version of the string you
+    also need to pass a name for those parameters. Consider the following
+    examples:
+    
+    >>> tmpl = MarkupTemplate('''\
+        <html xmlns:i18n="http://genshi.edgewall.org/i18n">
+    ...   <div i18n:choose="num; num">
+    ...     <p i18n:singular="">There is $num coin</p>
+    ...     <p i18n:plural="">There are $num coins</p>
+    ...   </div>
+    ... </html>''')
+    >>> translator = Translator()
+    >>> translator.setup(tmpl)
+    >>> list(translator.extract(tmpl.stream)) #doctest: +NORMALIZE_WHITESPACE
+    [(2, 'ngettext', (u'There is %(num)s coin',
+                      u'There are %(num)s coins'), [])]
+
+    >>> tmpl = MarkupTemplate('''\
+        <html xmlns:i18n="http://genshi.edgewall.org/i18n">
+    ...   <div i18n:choose="num; num">
+    ...     <p i18n:singular="">There is $num coin</p>
+    ...     <p i18n:plural="">There are $num coins</p>
+    ...   </div>
+    ... </html>''')
+    >>> translator.setup(tmpl)
+    >>> print(tmpl.generate(num=1).render())
+    <html>
+      <div>
+        <p>There is 1 coin</p>
+      </div>
+    </html>
+    >>> print(tmpl.generate(num=2).render())
+    <html>
+      <div>
+        <p>There are 2 coins</p>
+      </div>
+    </html>
+
+    When used as a element and not as an attribute:
+
+    >>> tmpl = MarkupTemplate('''\
+        <html xmlns:i18n="http://genshi.edgewall.org/i18n">
+    ...   <i18n:choose numeral="num" params="num">
+    ...     <p i18n:singular="">There is $num coin</p>
+    ...     <p i18n:plural="">There are $num coins</p>
+    ...   </i18n:choose>
+    ... </html>''')
+    >>> translator.setup(tmpl)
+    >>> list(translator.extract(tmpl.stream)) #doctest: +NORMALIZE_WHITESPACE
+    [(2, 'ngettext', (u'There is %(num)s coin',
+                      u'There are %(num)s coins'), [])]
+    """
+    __slots__ = ['numeral', 'params', 'lineno']
+
+    def __init__(self, value, template=None, namespaces=None, lineno=-1,
+                 offset=-1):
+        Directive.__init__(self, None, template, namespaces, lineno, offset)
+        params = [v.strip() for v in value.split(';')]
+        self.numeral = self._parse_expr(params.pop(0), template, lineno, offset)
+        self.params = params and [name.strip() for name in
+                                  params[0].split(',') if name] or []
+        self.lineno = lineno
+
+    @classmethod
+    def attach(cls, template, stream, value, namespaces, pos):
+        if type(value) is dict:
+            numeral = value.get('numeral', '').strip()
+            assert numeral is not '', "at least pass the numeral param"
+            params = [v.strip() for v in value.get('params', '').split(',')]
+            value = '%s; ' % numeral + ', '.join(params)
+        return super(ChooseDirective, cls).attach(template, stream, value,
+                                                  namespaces, pos)
+
+    def __call__(self, stream, directives, ctxt, **vars):
+        ctxt.push({'_i18n.choose.params': self.params,
+                   '_i18n.choose.singular': None,
+                   '_i18n.choose.plural': None})
+
+        ngettext = ctxt.get('_i18n.ngettext')
+        assert hasattr(ngettext, '__call__'), 'No ngettext function available'
+        dngettext = ctxt.get('_i18n.dngettext')
+        if not dngettext:
+            dngettext = lambda d, s, p, n: ngettext(s, p, n)
+
+        new_stream = []
+        singular_stream = None
+        singular_msgbuf = None
+        plural_stream = None
+        plural_msgbuf = None
+
+        numeral = self.numeral.evaluate(ctxt)
+        is_plural = self._is_plural(numeral, ngettext)
+
+        for event in stream:
+            if event[0] is SUB and any(isinstance(d, ChooseBranchDirective)
+                                       for d in event[1][0]):
+                subdirectives, substream = event[1]
+
+                if isinstance(subdirectives[0], SingularDirective):
+                    singular_stream = list(_apply_directives(substream,
+                                                             subdirectives,
+                                                             ctxt, vars))
+                    new_stream.append((MSGBUF, None, (None, -1, -1)))
+
+                elif isinstance(subdirectives[0], PluralDirective):
+                    if is_plural:
+                        plural_stream = list(_apply_directives(substream,
+                                                               subdirectives,
+                                                               ctxt, vars))
+
+            else:
+                new_stream.append(event)
+
+        if ctxt.get('_i18n.domain'):
+            ngettext = lambda s, p, n: dngettext(ctxt.get('_i18n.domain'),
+                                                 s, p, n)
+
+        singular_msgbuf = ctxt.get('_i18n.choose.singular')
+        if is_plural:
+            plural_msgbuf = ctxt.get('_i18n.choose.plural')
+            msgbuf, choice = plural_msgbuf, plural_stream
+        else:
+            msgbuf, choice = singular_msgbuf, singular_stream
+            plural_msgbuf = MessageBuffer(self)
+
+        for kind, data, pos in new_stream:
+            if kind is MSGBUF:
+                for event in choice:
+                    if event[0] is MSGBUF:
+                        translation = ngettext(singular_msgbuf.format(),
+                                               plural_msgbuf.format(),
+                                               numeral)
+                        for subevent in msgbuf.translate(translation):
+                            yield subevent
+                    else:
+                        yield event
+            else:
+                yield kind, data, pos
+
+        ctxt.pop()
+
+    def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS,
+                search_text=True, comment_stack=None):
+        strip = False
+        stream = iter(stream)
+        previous = stream.next()
+
+        if previous[0] is START:
+            # skip the enclosing element
+            for message in translator._extract_attrs(previous,
+                                                     gettext_functions,
+                                                     search_text=search_text):
+                yield message
+            previous = stream.next()
+            strip = True
+
+        singular_msgbuf = MessageBuffer(self)
+        plural_msgbuf = MessageBuffer(self)
+
+        for event in stream:
+            if previous[0] is SUB:
+                directives, substream = previous[1]
+                for directive in directives:
+                    if isinstance(directive, SingularDirective):
+                        for message in directive.extract(translator,
+                                substream, gettext_functions, search_text,
+                                comment_stack, msgbuf=singular_msgbuf):
+                            yield message
+                    elif isinstance(directive, PluralDirective):
+                        for message in directive.extract(translator,
+                                substream, gettext_functions, search_text,
+                                comment_stack, msgbuf=plural_msgbuf):
+                            yield message
+                    elif not isinstance(directive, StripDirective):
+                        singular_msgbuf.append(*previous)
+                        plural_msgbuf.append(*previous)
+            else:
+                if previous[0] is START:
+                    for message in translator._extract_attrs(previous,
+                                                             gettext_functions,
+                                                             search_text):
+                        yield message
+                singular_msgbuf.append(*previous)
+                plural_msgbuf.append(*previous)
+            previous = event
+
+        if not strip:
+            singular_msgbuf.append(*previous)
+            plural_msgbuf.append(*previous)
+
+        yield self.lineno, 'ngettext', \
+            (singular_msgbuf.format(), plural_msgbuf.format()), \
+            comment_stack[-1:]
+
+    def _is_plural(self, numeral, ngettext):
+        # XXX: should we test which form was chosen like this!?!?!?
+        # There should be no match in any catalogue for these singular and
+        # plural test strings
+        singular = u'O\x85\xbe\xa9\xa8az\xc3?\xe6\xa1\x02n\x84\x93'
+        plural = u'\xcc\xfb+\xd3Pn\x9d\tT\xec\x1d\xda\x1a\x88\x00'
+        return ngettext(singular, plural, numeral) == plural
+
+
+class DomainDirective(I18NDirective):
+    """Implementation of the ``i18n:domain`` directive which allows choosing
+    another i18n domain(catalog) to translate from.
+    
+    >>> from genshi.filters.tests.i18n import DummyTranslations
+    >>> tmpl = MarkupTemplate('''\
+        <html xmlns:i18n="http://genshi.edgewall.org/i18n">
+    ...   <p i18n:msg="">Bar</p>
+    ...   <div i18n:domain="foo">
+    ...     <p i18n:msg="">FooBar</p>
+    ...     <p>Bar</p>
+    ...     <p i18n:domain="bar" i18n:msg="">Bar</p>
+    ...     <p i18n:domain="">Bar</p>
+    ...   </div>
+    ...   <p>Bar</p>
+    ... </html>''')
+
+    >>> translations = DummyTranslations({'Bar': 'Voh'})
+    >>> translations.add_domain('foo', {'FooBar': 'BarFoo', 'Bar': 'foo_Bar'})
+    >>> translations.add_domain('bar', {'Bar': 'bar_Bar'})
+    >>> translator = Translator(translations)
+    >>> translator.setup(tmpl)
+
+    >>> print(tmpl.generate().render())
+    <html>
+      <p>Voh</p>
+      <div>
+        <p>BarFoo</p>
+        <p>foo_Bar</p>
+        <p>bar_Bar</p>
+        <p>Voh</p>
+      </div>
+      <p>Voh</p>
+    </html>
+    """
+    __slots__ = ['domain']
+
+    def __init__(self, value, template=None, namespaces=None, lineno=-1,
+                 offset=-1):
+        Directive.__init__(self, None, template, namespaces, lineno, offset)
+        self.domain = value and value.strip() or '__DEFAULT__'
+
+    @classmethod
+    def attach(cls, template, stream, value, namespaces, pos):
+        if type(value) is dict:
+            value = value.get('name')
+        return super(DomainDirective, cls).attach(template, stream, value,
+                                                  namespaces, pos)
+
+    def __call__(self, stream, directives, ctxt, **vars):
+        ctxt.push({'_i18n.domain': self.domain})
+        for event in _apply_directives(stream, directives, ctxt, vars):
+            yield event
+        ctxt.pop()
+
+
+class Translator(DirectiveFactory):
+    """Can extract and translate localizable strings from markup streams and
+    templates.
+    
+    For example, assume the following template:
+    
+    >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/">
+    ...   <head>
+    ...     <title>Example</title>
+    ...   </head>
+    ...   <body>
+    ...     <h1>Example</h1>
+    ...     <p>${_("Hello, %(name)s") % dict(name=username)}</p>
+    ...   </body>
+    ... </html>''', filename='example.html')
+    
+    For demonstration, we define a dummy ``gettext``-style function with a
+    hard-coded translation table, and pass that to the `Translator` initializer:
+    
+    >>> def pseudo_gettext(string):
+    ...     return {
+    ...         'Example': 'Beispiel',
+    ...         'Hello, %(name)s': 'Hallo, %(name)s'
+    ...     }[string]
+    >>> translator = Translator(pseudo_gettext)
+    
+    Next, the translator needs to be prepended to any already defined filters
+    on the template:
+    
+    >>> tmpl.filters.insert(0, translator)
+    
+    When generating the template output, our hard-coded translations should be
+    applied as expected:
+    
+    >>> print(tmpl.generate(username='Hans', _=pseudo_gettext))
+    <html>
+      <head>
+        <title>Beispiel</title>
+      </head>
+      <body>
+        <h1>Beispiel</h1>
+        <p>Hallo, Hans</p>
+      </body>
+    </html>
+    
+    Note that elements defining ``xml:lang`` attributes that do not contain
+    variable expressions are ignored by this filter. That can be used to
+    exclude specific parts of a template from being extracted and translated.
+    """
+
+    directives = [
+        ('domain', DomainDirective),
+        ('comment', CommentDirective),
+        ('msg', MsgDirective),
+        ('choose', ChooseDirective),
+        ('singular', SingularDirective),
+        ('plural', PluralDirective)
+    ]
+
+    IGNORE_TAGS = frozenset([
+        QName('script'), QName('http://www.w3.org/1999/xhtml}script'),
+        QName('style'), QName('http://www.w3.org/1999/xhtml}style')
+    ])
+    INCLUDE_ATTRS = frozenset([
+        'abbr', 'alt', 'label', 'prompt', 'standby', 'summary', 'title'
+    ])
+    NAMESPACE = I18N_NAMESPACE
+
+    def __init__(self, translate=NullTranslations(), ignore_tags=IGNORE_TAGS,
+                 include_attrs=INCLUDE_ATTRS, extract_text=True):
+        """Initialize the translator.
+        
+        :param translate: the translation function, for example ``gettext`` or
+                          ``ugettext``.
+        :param ignore_tags: a set of tag names that should not be localized
+        :param include_attrs: a set of attribute names should be localized
+        :param extract_text: whether the content of text nodes should be
+                             extracted, or only text in explicit ``gettext``
+                             function calls
+        
+        :note: Changed in 0.6: the `translate` parameter can now be either
+               a ``gettext``-style function, or an object compatible with the
+               ``NullTransalations`` or ``GNUTranslations`` interface
+        """
+        self.translate = translate
+        self.ignore_tags = ignore_tags
+        self.include_attrs = include_attrs
+        self.extract_text = extract_text
+
+    def __call__(self, stream, ctxt=None, translate_text=True,
+                 translate_attrs=True):
+        """Translate any localizable strings in the given stream.
+        
+        This function shouldn't be called directly. Instead, an instance of
+        the `Translator` class should be registered as a filter with the
+        `Template` or the `TemplateLoader`, or applied as a regular stream
+        filter. If used as a template filter, it should be inserted in front of
+        all the default filters.
+        
+        :param stream: the markup event stream
+        :param ctxt: the template context (not used)
+        :param translate_text: whether text nodes should be translated (used
+                               internally)
+        :param translate_attrs: whether attribute values should be translated
+                                (used internally)
+        :return: the localized stream
+        """
+        ignore_tags = self.ignore_tags
+        include_attrs = self.include_attrs
+        skip = 0
+        xml_lang = XML_NAMESPACE['lang']
+        if not self.extract_text:
+            translate_text = False
+            translate_attrs = False
+
+        if type(self.translate) is FunctionType:
+            gettext = self.translate
+            if ctxt:
+                ctxt['_i18n.gettext'] = gettext
+        else:
+            gettext = self.translate.ugettext
+            ngettext = self.translate.ungettext
+            try:
+                dgettext = self.translate.dugettext
+                dngettext = self.translate.dungettext
+            except AttributeError:
+                dgettext = lambda _, y: gettext(y)
+                dngettext = lambda _, s, p, n: ngettext(s, p, n)
+            if ctxt:
+                ctxt['_i18n.gettext'] = gettext
+                ctxt['_i18n.ngettext'] = ngettext
+                ctxt['_i18n.dgettext'] = dgettext
+                ctxt['_i18n.dngettext'] = dngettext
+
+        if ctxt and ctxt.get('_i18n.domain'):
+            gettext = lambda msg: dgettext(ctxt.get('_i18n.domain'), msg)
+
+        for kind, data, pos in stream:
+
+            # skip chunks that should not be localized
+            if skip:
+                if kind is START:
+                    skip += 1
+                elif kind is END:
+                    skip -= 1
+                yield kind, data, pos
+                continue
+
+            # handle different events that can be localized
+            if kind is START:
+                tag, attrs = data
+                if tag in self.ignore_tags or \
+                        isinstance(attrs.get(xml_lang), basestring):
+                    skip += 1
+                    yield kind, data, pos
+                    continue
+
+                new_attrs = []
+                changed = False
+
+                for name, value in attrs:
+                    newval = value
+                    if isinstance(value, basestring):
+                        if translate_attrs and name in include_attrs:
+                            newval = gettext(value)
+                    else:
+                        newval = list(
+                            self(_ensure(value), ctxt, translate_text=False)
+                        )
+                    if newval != value:
+                        value = newval
+                        changed = True
+                    new_attrs.append((name, value))
+                if changed:
+                    attrs = Attrs(new_attrs)
+
+                yield kind, (tag, attrs), pos
+
+            elif translate_text and kind is TEXT:
+                text = data.strip()
+                if text:
+                    data = data.replace(text, unicode(gettext(text)))
+                yield kind, data, pos
+
+            elif kind is SUB:
+                directives, substream = data
+                current_domain = None
+                for idx, directive in enumerate(directives):
+                    # Organize directives to make everything work
+                    # FIXME: There's got to be a better way to do this!
+                    if isinstance(directive, DomainDirective):
+                        # Grab current domain and update context
+                        current_domain = directive.domain
+                        ctxt.push({'_i18n.domain': current_domain})
+                        # Put domain directive as the first one in order to
+                        # update context before any other directives evaluation
+                        directives.insert(0, directives.pop(idx))
+
+                # If this is an i18n directive, no need to translate text
+                # nodes here
+                is_i18n_directive = any([
+                    isinstance(d, ExtractableI18NDirective)
+                    for d in directives
+                ])
+                substream = list(self(substream, ctxt,
+                                      translate_text=not is_i18n_directive,
+                                      translate_attrs=translate_attrs))
+                yield kind, (directives, substream), pos
+
+                if current_domain:
+                    ctxt.pop()
+            else:
+                yield kind, data, pos
+
+    def extract(self, stream, gettext_functions=GETTEXT_FUNCTIONS,
+                search_text=True, comment_stack=None):
+        """Extract localizable strings from the given template stream.
+        
+        For every string found, this function yields a ``(lineno, function,
+        message, comments)`` tuple, where:
+        
+        * ``lineno`` is the number of the line on which the string was found,
+        * ``function`` is the name of the ``gettext`` function used (if the
+          string was extracted from embedded Python code), and
+        *  ``message`` is the string itself (a ``unicode`` object, or a tuple
+           of ``unicode`` objects for functions with multiple string
+           arguments).
+        *  ``comments`` is a list of comments related to the message, extracted
+           from ``i18n:comment`` attributes found in the markup
+        
+        >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/">
+        ...   <head>
+        ...     <title>Example</title>
+        ...   </head>
+        ...   <body>
+        ...     <h1>Example</h1>
+        ...     <p>${_("Hello, %(name)s") % dict(name=username)}</p>
+        ...     <p>${ngettext("You have %d item", "You have %d items", num)}</p>
+        ...   </body>
+        ... </html>''', filename='example.html')
+        >>> for line, func, msg, comments in Translator().extract(tmpl.stream):
+        ...    print('%d, %r, %r' % (line, func, msg))
+        3, None, u'Example'
+        6, None, u'Example'
+        7, '_', u'Hello, %(name)s'
+        8, 'ngettext', (u'You have %d item', u'You have %d items', None)
+        
+        :param stream: the event stream to extract strings from; can be a
+                       regular stream or a template stream
+        :param gettext_functions: a sequence of function names that should be
+                                  treated as gettext-style localization
+                                  functions
+        :param search_text: whether the content of text nodes should be
+                            extracted (used internally)
+        
+        :note: Changed in 0.4.1: For a function with multiple string arguments
+               (such as ``ngettext``), a single item with a tuple of strings is
+               yielded, instead an item for each string argument.
+        :note: Changed in 0.6: The returned tuples now include a fourth
+               element, which is a list of comments for the translator.
+        """
+        if not self.extract_text:
+            search_text = False
+        if comment_stack is None:
+            comment_stack = []
+        skip = 0
+
+        xml_lang = XML_NAMESPACE['lang']
+
+        for kind, data, pos in stream:
+            if skip:
+                if kind is START:
+                    skip += 1
+                if kind is END:
+                    skip -= 1
+
+            if kind is START and not skip:
+                tag, attrs = data
+                if tag in self.ignore_tags or \
+                        isinstance(attrs.get(xml_lang), basestring):
+                    skip += 1
+                    continue
+
+                for message in self._extract_attrs((kind, data, pos),
+                                                   gettext_functions,
+                                                   search_text=search_text):
+                    yield message
+
+            elif not skip and search_text and kind is TEXT:
+                text = data.strip()
+                if text and [ch for ch in text if ch.isalpha()]:
+                    yield pos[1], None, text, comment_stack[-1:]
+
+            elif kind is EXPR or kind is EXEC:
+                for funcname, strings in extract_from_code(data,
+                                                           gettext_functions):
+                    # XXX: Do we need to grab i18n:comment from comment_stack ???
+                    yield pos[1], funcname, strings, []
+
+            elif kind is SUB:
+                directives, substream = data
+                in_comment = False
+
+                for idx, directive in enumerate(directives):
+                    # Do a first loop to see if there's a comment directive
+                    # If there is update context and pop it from directives
+                    if isinstance(directive, CommentDirective):
+                        in_comment = True
+                        comment_stack.append(directive.comment)
+                        if len(directives) == 1:
+                            # in case we're in the presence of something like:
+                            # <p i18n:comment="foo">Foo</p>
+                            for message in self.extract(
+                                    substream, gettext_functions,
+                                    search_text=search_text and not skip,
+                                    comment_stack=comment_stack):
+                                yield message
+                        directives.pop(idx)
+                    elif not isinstance(directive, I18NDirective):
+                        # Remove all other non i18n directives from the process
+                        directives.pop(idx)
+
+                if not directives and not in_comment:
+                    # Extract content if there's no directives because
+                    # strip was pop'ed and not because comment was pop'ed.
+                    # Extraction in this case has been taken care of.
+                    for message in self.extract(
+                            substream, gettext_functions,
+                            search_text=search_text and not skip):
+                        yield message
+
+                for directive in directives:
+                    if isinstance(directive, ExtractableI18NDirective):
+                        for message in directive.extract(self,
+                                substream, gettext_functions,
+                                search_text=search_text and not skip,
+                                comment_stack=comment_stack):
+                            yield message
+                    else:
+                        for message in self.extract(
+                                substream, gettext_functions,
+                                search_text=search_text and not skip,
+                                comment_stack=comment_stack):
+                            yield message
+
+                if in_comment:
+                    comment_stack.pop()
+
+    def get_directive_index(self, dir_cls):
+        total = len(self._dir_order)
+        if dir_cls in self._dir_order:
+            return self._dir_order.index(dir_cls) - total
+        return total
+
+    def setup(self, template):
+        """Convenience function to register the `Translator` filter and the
+        related directives with the given template.
+        
+        :param template: a `Template` instance
+        """
+        template.filters.insert(0, self)
+        if hasattr(template, 'add_directives'):
+            template.add_directives(Translator.NAMESPACE, self)
+
+    def _extract_attrs(self, event, gettext_functions, search_text):
+        for name, value in event[1][1]:
+            if search_text and isinstance(value, basestring):
+                if name in self.include_attrs:
+                    text = value.strip()
+                    if text:
+                        yield event[2][1], None, text, []
+            else:
+                for message in self.extract(_ensure(value), gettext_functions,
+                                            search_text=False):
+                    yield message
+
+
+class MessageBuffer(object):
+    """Helper class for managing internationalized mixed content.
+    
+    :since: version 0.5
+    """
+
+    def __init__(self, directive=None):
+        """Initialize the message buffer.
+        
+        :param directive: the directive owning the buffer
+        :type directive: I18NDirective
+        """
+        # params list needs to be copied so that directives can be evaluated
+        # more than once
+        self.orig_params = self.params = directive.params[:]
+        self.directive = directive
+        self.string = []
+        self.events = {}
+        self.values = {}
+        self.depth = 1
+        self.order = 1
+        self.stack = [0]
+        self.subdirectives = {}
+
+    def append(self, kind, data, pos):
+        """Append a stream event to the buffer.
+        
+        :param kind: the stream event kind
+        :param data: the event data
+        :param pos: the position of the event in the source
+        """
+        if kind is SUB:
+            # The order needs to be +1 because a new START kind event will
+            # happen and we we need to wrap those events into our custom kind(s)
+            order = self.stack[-1] + 1
+            subdirectives, substream = data
+            # Store the directives that should be applied after translation
+            self.subdirectives.setdefault(order, []).extend(subdirectives)
+            self.events.setdefault(order, []).append((SUB_START, None, pos))
+            for skind, sdata, spos in substream:
+                self.append(skind, sdata, spos)
+            self.events.setdefault(order, []).append((SUB_END, None, pos))
+        elif kind is TEXT:
+            if '[' in data or ']' in data:
+                # Quote [ and ] if it ain't us adding it, ie, if the user is
+                # using those chars in his templates, escape them
+                data = data.replace('[', '\[').replace(']', '\]')
+            self.string.append(data)
+            self.events.setdefault(self.stack[-1], []).append((kind, data, pos))
+        elif kind is EXPR:
+            if self.params:
+                param = self.params.pop(0)
+            else:
+                params = ', '.join(['"%s"' % p for p in self.orig_params if p])
+                if params:
+                    params = "(%s)" % params
+                raise IndexError("%d parameters%s given to 'i18n:%s' but "
+                                 "%d or more expressions used in '%s', line %s"
+                                 % (len(self.orig_params), params, 
+                                    self.directive.tagname,
+                                    len(self.orig_params) + 1,
+                                    os.path.basename(pos[0] or
+                                                     'In-memory Template'),
+                                    pos[1]))
+            self.string.append('%%(%s)s' % param)
+            self.events.setdefault(self.stack[-1], []).append((kind, data, pos))
+            self.values[param] = (kind, data, pos)
+        else:
+            if kind is START: 
+                self.string.append('[%d:' % self.order)
+                self.stack.append(self.order)
+                self.events.setdefault(self.stack[-1],
+                                       []).append((kind, data, pos))
+                self.depth += 1
+                self.order += 1
+            elif kind is END:
+                self.depth -= 1
+                if self.depth:
+                    self.events[self.stack[-1]].append((kind, data, pos))
+                    self.string.append(']')
+                    self.stack.pop()
+
+    def format(self):
+        """Return a message identifier representing the content in the
+        buffer.
+        """
+        return ''.join(self.string).strip()
+
+    def translate(self, string, regex=re.compile(r'%\((\w+)\)s')):
+        """Interpolate the given message translation with the events in the
+        buffer and return the translated stream.
+        
+        :param string: the translated message string
+        """
+        substream = None
+
+        def yield_parts(string):
+            for idx, part in enumerate(regex.split(string)):
+                if idx % 2:
+                    yield self.values[part]
+                elif part:
+                    yield (TEXT,
+                           part.replace('\[', '[').replace('\]', ']'),
+                           (None, -1, -1)
+                    )
+
+        parts = parse_msg(string)
+        parts_counter = {}
+        for order, string in parts:
+            parts_counter.setdefault(order, []).append(None)
+
+        while parts:
+            order, string = parts.pop(0)
+            if len(parts_counter[order]) == 1:
+                events = self.events[order]
+            else:
+                events = [self.events[order].pop(0)]
+            parts_counter[order].pop()
+
+            for event in events:
+                if event[0] is SUB_START:
+                    substream = []
+                elif event[0] is SUB_END:
+                    # Yield a substream which might have directives to be
+                    # applied to it (after translation events)
+                    yield SUB, (self.subdirectives[order], substream), event[2]
+                    substream = None
+                elif event[0] is TEXT:
+                    if string:
+                        for part in yield_parts(string):
+                            if substream is not None:
+                                substream.append(part)
+                            else:
+                                yield part
+                        # String handled, reset it
+                        string = None
+                elif event[0] is START:
+                    if substream is not None:
+                        substream.append(event)
+                    else:
+                        yield event
+                    if string:
+                        for part in yield_parts(string):
+                            if substream is not None:
+                                substream.append(part)
+                            else:
+                                yield part
+                        # String handled, reset it
+                        string = None
+                elif event[0] is END:
+                    if string:
+                        for part in yield_parts(string):
+                            if substream is not None:
+                                substream.append(part)
+                            else:
+                                yield part
+                        # String handled, reset it
+                        string = None
+                    if substream is not None:
+                        substream.append(event)
+                    else:
+                        yield event
+                elif event[0] is EXPR:
+                    # These are handled on the strings itself
+                    continue
+                else:
+                    if string:
+                        for part in yield_parts(string):
+                            if substream is not None:
+                                substream.append(part)
+                            else:
+                                yield part
+                        # String handled, reset it
+                        string = None
+                    if substream is not None:
+                        substream.append(event)
+                    else:
+                        yield event
+
+
+def parse_msg(string, regex=re.compile(r'(?:\[(\d+)\:)|(?<!\\)\]')):
+    """Parse a translated message using Genshi mixed content message
+    formatting.
+    
+    >>> parse_msg("See [1:Help].")
+    [(0, 'See '), (1, 'Help'), (0, '.')]
+    
+    >>> parse_msg("See [1:our [2:Help] page] for details.")
+    [(0, 'See '), (1, 'our '), (2, 'Help'), (1, ' page'), (0, ' for details.')]
+    
+    >>> parse_msg("[2:Details] finden Sie in [1:Hilfe].")
+    [(2, 'Details'), (0, ' finden Sie in '), (1, 'Hilfe'), (0, '.')]
+    
+    >>> parse_msg("[1:] Bilder pro Seite anzeigen.")
+    [(1, ''), (0, ' Bilder pro Seite anzeigen.')]
+    
+    :param string: the translated message string
+    :return: a list of ``(order, string)`` tuples
+    :rtype: `list`
+    """
+    parts = []
+    stack = [0]
+    while True:
+        mo = regex.search(string)
+        if not mo:
+            break
+
+        if mo.start() or stack[-1]:
+            parts.append((stack[-1], string[:mo.start()]))
+        string = string[mo.end():]
+
+        orderno = mo.group(1)
+        if orderno is not None:
+            stack.append(int(orderno))
+        else:
+            stack.pop()
+        if not stack:
+            break
+
+    if string:
+        parts.append((stack[-1], string))
+
+    return parts
+
+
+def extract_from_code(code, gettext_functions):
+    """Extract strings from Python bytecode.
+    
+    >>> from genshi.template.eval import Expression
+    >>> expr = Expression('_("Hello")')
+    >>> list(extract_from_code(expr, GETTEXT_FUNCTIONS))
+    [('_', u'Hello')]
+    
+    >>> expr = Expression('ngettext("You have %(num)s item", '
+    ...                            '"You have %(num)s items", num)')
+    >>> list(extract_from_code(expr, GETTEXT_FUNCTIONS))
+    [('ngettext', (u'You have %(num)s item', u'You have %(num)s items', None))]
+    
+    :param code: the `Code` object
+    :type code: `genshi.template.eval.Code`
+    :param gettext_functions: a sequence of function names
+    :since: version 0.5
+    """
+    def _walk(node):
+        if isinstance(node, _ast.Call) and isinstance(node.func, _ast.Name) \
+                and node.func.id in gettext_functions:
+            strings = []
+            def _add(arg):
+                if isinstance(arg, _ast.Str) and isinstance(arg.s, basestring):
+                    strings.append(unicode(arg.s, 'utf-8'))
+                elif arg:
+                    strings.append(None)
+            [_add(arg) for arg in node.args]
+            _add(node.starargs)
+            _add(node.kwargs)
+            if len(strings) == 1:
+                strings = strings[0]
+            else:
+                strings = tuple(strings)
+            yield node.func.id, strings
+        elif node._fields:
+            children = []
+            for field in node._fields:
+                child = getattr(node, field, None)
+                if isinstance(child, list):
+                    for elem in child:
+                        children.append(elem)
+                elif isinstance(child, _ast.AST):
+                    children.append(child)
+            for child in children:
+                for funcname, strings in _walk(child):
+                    yield funcname, strings
+    return _walk(code.ast)
+
+
+def extract(fileobj, keywords, comment_tags, options):
+    """Babel extraction method for Genshi templates.
+    
+    :param fileobj: the file-like object the messages should be extracted from
+    :param keywords: a list of keywords (i.e. function names) that should be
+                     recognized as translation functions
+    :param comment_tags: a list of translator tags to search for and include
+                         in the results
+    :param options: a dictionary of additional options (optional)
+    :return: an iterator over ``(lineno, funcname, message, comments)`` tuples
+    :rtype: ``iterator``
+    """
+    template_class = options.get('template_class', MarkupTemplate)
+    if isinstance(template_class, basestring):
+        module, clsname = template_class.split(':', 1)
+        template_class = getattr(__import__(module, {}, {}, [clsname]), clsname)
+    encoding = options.get('encoding', None)
+
+    extract_text = options.get('extract_text', True)
+    if isinstance(extract_text, basestring):
+        extract_text = extract_text.lower() in ('1', 'on', 'yes', 'true')
+
+    ignore_tags = options.get('ignore_tags', Translator.IGNORE_TAGS)
+    if isinstance(ignore_tags, basestring):
+        ignore_tags = ignore_tags.split()
+    ignore_tags = [QName(tag) for tag in ignore_tags]
+
+    include_attrs = options.get('include_attrs', Translator.INCLUDE_ATTRS)
+    if isinstance(include_attrs, basestring):
+        include_attrs = include_attrs.split()
+    include_attrs = [QName(attr) for attr in include_attrs]
+
+    tmpl = template_class(fileobj, filename=getattr(fileobj, 'name', None),
+                          encoding=encoding)
+    tmpl.loader = None
+
+    translator = Translator(None, ignore_tags, include_attrs, extract_text)
+    if hasattr(tmpl, 'add_directives'):
+        tmpl.add_directives(Translator.NAMESPACE, translator)
+    for message in translator.extract(tmpl.stream, gettext_functions=keywords):
+        yield message
diff --git a/genshi/filters/transform.py b/genshi/filters/transform.py
new file mode 100644
index 0000000..9b75b06
--- /dev/null
+++ b/genshi/filters/transform.py
@@ -0,0 +1,1310 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2007-2009 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""A filter for functional-style transformations of markup streams.
+
+The `Transformer` filter provides a variety of transformations that can be
+applied to parts of streams that match given XPath expressions. These
+transformations can be chained to achieve results that would be comparitively
+tedious to achieve by writing stream filters by hand. The approach of chaining
+node selection and transformation has been inspired by the `jQuery`_ Javascript
+library.
+
+ .. _`jQuery`: http://jquery.com/
+
+For example, the following transformation removes the ``<title>`` element from
+the ``<head>`` of the input document:
+
+>>> from genshi.builder import tag
+>>> html = HTML('''<html>
+...  <head><title>Some Title</title></head>
+...  <body>
+...    Some <em>body</em> text.
+...  </body>
+... </html>''')
+>>> print(html | Transformer('body/em').map(unicode.upper, TEXT)
+...                                    .unwrap().wrap(tag.u))
+<html>
+  <head><title>Some Title</title></head>
+  <body>
+    Some <u>BODY</u> text.
+  </body>
+</html>
+
+The ``Transformer`` support a large number of useful transformations out of the
+box, but custom transformations can be added easily.
+
+:since: version 0.5
+"""
+
+import re
+import sys
+
+from genshi.builder import Element
+from genshi.core import Stream, Attrs, QName, TEXT, START, END, _ensure, Markup
+from genshi.path import Path
+
+__all__ = ['Transformer', 'StreamBuffer', 'InjectorTransformation', 'ENTER',
+           'EXIT', 'INSIDE', 'OUTSIDE', 'BREAK']
+
+
+class TransformMark(str):
+    """A mark on a transformation stream."""
+    __slots__ = []
+    _instances = {}
+
+    def __new__(cls, val):
+        return cls._instances.setdefault(val, str.__new__(cls, val))
+
+
+ENTER = TransformMark('ENTER')
+"""Stream augmentation mark indicating that a selected element is being
+entered."""
+
+INSIDE = TransformMark('INSIDE')
+"""Stream augmentation mark indicating that processing is currently inside a
+selected element."""
+
+OUTSIDE = TransformMark('OUTSIDE')
+"""Stream augmentation mark indicating that a match occurred outside a selected
+element."""
+
+ATTR = TransformMark('ATTR')
+"""Stream augmentation mark indicating a selected element attribute."""
+
+EXIT = TransformMark('EXIT')
+"""Stream augmentation mark indicating that a selected element is being
+exited."""
+
+BREAK = TransformMark('BREAK')
+"""Stream augmentation mark indicating a break between two otherwise contiguous
+blocks of marked events.
+
+This is used primarily by the cut() transform to provide later transforms with
+an opportunity to operate on the cut buffer.
+"""
+
+
+class PushBackStream(object):
+    """Allows a single event to be pushed back onto the stream and re-consumed.
+    """
+    def __init__(self, stream):
+        self.stream = iter(stream)
+        self.peek = None
+
+    def push(self, event):
+        assert self.peek is None
+        self.peek = event
+
+    def __iter__(self):
+        while True:
+            if self.peek is not None:
+                peek = self.peek
+                self.peek = None
+                yield peek
+            else:
+                try:
+                    event = self.stream.next()
+                    yield event
+                except StopIteration:
+                    if self.peek is None:
+                        raise
+
+
+class Transformer(object):
+    """Stream filter that can apply a variety of different transformations to
+    a stream.
+
+    This is achieved by selecting the events to be transformed using XPath,
+    then applying the transformations to the events matched by the path
+    expression. Each marked event is in the form (mark, (kind, data, pos)),
+    where mark can be any of `ENTER`, `INSIDE`, `EXIT`, `OUTSIDE`, or `None`.
+
+    The first three marks match `START` and `END` events, and any events
+    contained `INSIDE` any selected XML/HTML element. A non-element match
+    outside a `START`/`END` container (e.g. ``text()``) will yield an `OUTSIDE`
+    mark.
+
+    >>> html = HTML('<html><head><title>Some Title</title></head>'
+    ...             '<body>Some <em>body</em> text.</body></html>')
+
+    Transformations act on selected stream events matching an XPath expression.
+    Here's an example of removing some markup (the title, in this case)
+    selected by an expression:
+
+    >>> print(html | Transformer('head/title').remove())
+    <html><head/><body>Some <em>body</em> text.</body></html>
+
+    Inserted content can be passed in the form of a string, or a markup event
+    stream, which includes streams generated programmatically via the
+    `builder` module:
+
+    >>> from genshi.builder import tag
+    >>> print(html | Transformer('body').prepend(tag.h1('Document Title')))
+    <html><head><title>Some Title</title></head><body><h1>Document
+    Title</h1>Some <em>body</em> text.</body></html>
+
+    Each XPath expression determines the set of tags that will be acted upon by
+    subsequent transformations. In this example we select the ``<title>`` text,
+    copy it into a buffer, then select the ``<body>`` element and paste the
+    copied text into the body as ``<h1>`` enclosed text:
+
+    >>> buffer = StreamBuffer()
+    >>> print(html | Transformer('head/title/text()').copy(buffer)
+    ...     .end().select('body').prepend(tag.h1(buffer)))
+    <html><head><title>Some Title</title></head><body><h1>Some Title</h1>Some
+    <em>body</em> text.</body></html>
+
+    Transformations can also be assigned and reused, although care must be
+    taken when using buffers, to ensure that buffers are cleared between
+    transforms:
+
+    >>> emphasis = Transformer('body//em').attr('class', 'emphasis')
+    >>> print(html | emphasis)
+    <html><head><title>Some Title</title></head><body>Some <em
+    class="emphasis">body</em> text.</body></html>
+    """
+
+    __slots__ = ['transforms']
+
+    def __init__(self, path='.'):
+        """Construct a new transformation filter.
+
+        :param path: an XPath expression (as string) or a `Path` instance
+        """
+        self.transforms = [SelectTransformation(path)]
+
+    def __call__(self, stream, keep_marks=False):
+        """Apply the transform filter to the marked stream.
+
+        :param stream: the marked event stream to filter
+        :param keep_marks: Do not strip transformer selection marks from the
+                           stream. Useful for testing.
+        :return: the transformed stream
+        :rtype: `Stream`
+        """
+        transforms = self._mark(stream)
+        for link in self.transforms:
+            transforms = link(transforms)
+        if not keep_marks:
+            transforms = self._unmark(transforms)
+        return Stream(transforms,
+                      serializer=getattr(stream, 'serializer', None))
+
+    def apply(self, function):
+        """Apply a transformation to the stream.
+
+        Transformations can be chained, similar to stream filters. Any callable
+        accepting a marked stream can be used as a transform.
+
+        As an example, here is a simple `TEXT` event upper-casing transform:
+
+        >>> def upper(stream):
+        ...     for mark, (kind, data, pos) in stream:
+        ...         if mark and kind is TEXT:
+        ...             yield mark, (kind, data.upper(), pos)
+        ...         else:
+        ...             yield mark, (kind, data, pos)
+        >>> short_stream = HTML('<body>Some <em>test</em> text</body>')
+        >>> print(short_stream | Transformer('.//em/text()').apply(upper))
+        <body>Some <em>TEST</em> text</body>
+        """
+        transformer = Transformer()
+        transformer.transforms = self.transforms[:]
+        if isinstance(function, Transformer):
+            transformer.transforms.extend(function.transforms)
+        else:
+            transformer.transforms.append(function)
+        return transformer
+
+    #{ Selection operations
+
+    def select(self, path):
+        """Mark events matching the given XPath expression, within the current
+        selection.
+
+        >>> html = HTML('<body>Some <em>test</em> text</body>')
+        >>> print(html | Transformer().select('.//em').trace())
+        (None, ('START', (QName('body'), Attrs()), (None, 1, 0)))
+        (None, ('TEXT', u'Some ', (None, 1, 6)))
+        ('ENTER', ('START', (QName('em'), Attrs()), (None, 1, 11)))
+        ('INSIDE', ('TEXT', u'test', (None, 1, 15)))
+        ('EXIT', ('END', QName('em'), (None, 1, 19)))
+        (None, ('TEXT', u' text', (None, 1, 24)))
+        (None, ('END', QName('body'), (None, 1, 29)))
+        <body>Some <em>test</em> text</body>
+
+        :param path: an XPath expression (as string) or a `Path` instance
+        :return: the stream augmented by transformation marks
+        :rtype: `Transformer`
+        """
+        return self.apply(SelectTransformation(path))
+
+    def invert(self):
+        """Invert selection so that marked events become unmarked, and vice
+        versa.
+
+        Specificaly, all marks are converted to null marks, and all null marks
+        are converted to OUTSIDE marks.
+
+        >>> html = HTML('<body>Some <em>test</em> text</body>')
+        >>> print(html | Transformer('//em').invert().trace())
+        ('OUTSIDE', ('START', (QName('body'), Attrs()), (None, 1, 0)))
+        ('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6)))
+        (None, ('START', (QName('em'), Attrs()), (None, 1, 11)))
+        (None, ('TEXT', u'test', (None, 1, 15)))
+        (None, ('END', QName('em'), (None, 1, 19)))
+        ('OUTSIDE', ('TEXT', u' text', (None, 1, 24)))
+        ('OUTSIDE', ('END', QName('body'), (None, 1, 29)))
+        <body>Some <em>test</em> text</body>
+
+        :rtype: `Transformer`
+        """
+        return self.apply(InvertTransformation())
+
+    def end(self):
+        """End current selection, allowing all events to be selected.
+
+        Example:
+
+        >>> html = HTML('<body>Some <em>test</em> text</body>')
+        >>> print(html | Transformer('//em').end().trace())
+        ('OUTSIDE', ('START', (QName('body'), Attrs()), (None, 1, 0)))
+        ('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6)))
+        ('OUTSIDE', ('START', (QName('em'), Attrs()), (None, 1, 11)))
+        ('OUTSIDE', ('TEXT', u'test', (None, 1, 15)))
+        ('OUTSIDE', ('END', QName('em'), (None, 1, 19)))
+        ('OUTSIDE', ('TEXT', u' text', (None, 1, 24)))
+        ('OUTSIDE', ('END', QName('body'), (None, 1, 29)))
+        <body>Some <em>test</em> text</body>
+
+        :return: the stream augmented by transformation marks
+        :rtype: `Transformer`
+        """
+        return self.apply(EndTransformation())
+
+    #{ Deletion operations
+
+    def empty(self):
+        """Empty selected elements of all content.
+
+        Example:
+
+        >>> html = HTML('<html><head><title>Some Title</title></head>'
+        ...             '<body>Some <em>body</em> text.</body></html>')
+        >>> print(html | Transformer('.//em').empty())
+        <html><head><title>Some Title</title></head><body>Some <em/>
+        text.</body></html>
+
+        :rtype: `Transformer`
+        """
+        return self.apply(EmptyTransformation())
+
+    def remove(self):
+        """Remove selection from the stream.
+
+        Example:
+
+        >>> html = HTML('<html><head><title>Some Title</title></head>'
+        ...             '<body>Some <em>body</em> text.</body></html>')
+        >>> print(html | Transformer('.//em').remove())
+        <html><head><title>Some Title</title></head><body>Some
+        text.</body></html>
+
+        :rtype: `Transformer`
+        """
+        return self.apply(RemoveTransformation())
+
+    #{ Direct element operations
+
+    def unwrap(self):
+        """Remove outermost enclosing elements from selection.
+
+        Example:
+
+        >>> html = HTML('<html><head><title>Some Title</title></head>'
+        ...             '<body>Some <em>body</em> text.</body></html>')
+        >>> print(html | Transformer('.//em').unwrap())
+        <html><head><title>Some Title</title></head><body>Some body
+        text.</body></html>
+
+        :rtype: `Transformer`
+        """
+        return self.apply(UnwrapTransformation())
+
+    def wrap(self, element):
+        """Wrap selection in an element.
+
+        >>> html = HTML('<html><head><title>Some Title</title></head>'
+        ...             '<body>Some <em>body</em> text.</body></html>')
+        >>> print(html | Transformer('.//em').wrap('strong'))
+        <html><head><title>Some Title</title></head><body>Some
+        <strong><em>body</em></strong> text.</body></html>
+
+        :param element: either a tag name (as string) or an `Element` object
+        :rtype: `Transformer`
+        """
+        return self.apply(WrapTransformation(element))
+
+    #{ Content insertion operations
+
+    def replace(self, content):
+        """Replace selection with content.
+
+        >>> html = HTML('<html><head><title>Some Title</title></head>'
+        ...             '<body>Some <em>body</em> text.</body></html>')
+        >>> print(html | Transformer('.//title/text()').replace('New Title'))
+        <html><head><title>New Title</title></head><body>Some <em>body</em>
+        text.</body></html>
+
+        :param content: Either a callable, an iterable of events, or a string
+                        to insert.
+        :rtype: `Transformer`
+        """
+        return self.apply(ReplaceTransformation(content))
+
+    def before(self, content):
+        """Insert content before selection.
+
+        In this example we insert the word 'emphasised' before the <em> opening
+        tag:
+
+        >>> html = HTML('<html><head><title>Some Title</title></head>'
+        ...             '<body>Some <em>body</em> text.</body></html>')
+        >>> print(html | Transformer('.//em').before('emphasised '))
+        <html><head><title>Some Title</title></head><body>Some emphasised
+        <em>body</em> text.</body></html>
+
+        :param content: Either a callable, an iterable of events, or a string
+                        to insert.
+        :rtype: `Transformer`
+        """
+        return self.apply(BeforeTransformation(content))
+
+    def after(self, content):
+        """Insert content after selection.
+
+        Here, we insert some text after the </em> closing tag:
+
+        >>> html = HTML('<html><head><title>Some Title</title></head>'
+        ...             '<body>Some <em>body</em> text.</body></html>')
+        >>> print(html | Transformer('.//em').after(' rock'))
+        <html><head><title>Some Title</title></head><body>Some <em>body</em>
+        rock text.</body></html>
+
+        :param content: Either a callable, an iterable of events, or a string
+                        to insert.
+        :rtype: `Transformer`
+        """
+        return self.apply(AfterTransformation(content))
+
+    def prepend(self, content):
+        """Insert content after the ENTER event of the selection.
+
+        Inserting some new text at the start of the <body>:
+
+        >>> html = HTML('<html><head><title>Some Title</title></head>'
+        ...             '<body>Some <em>body</em> text.</body></html>')
+        >>> print(html | Transformer('.//body').prepend('Some new body text. '))
+        <html><head><title>Some Title</title></head><body>Some new body text.
+        Some <em>body</em> text.</body></html>
+
+        :param content: Either a callable, an iterable of events, or a string
+                        to insert.
+        :rtype: `Transformer`
+        """
+        return self.apply(PrependTransformation(content))
+
+    def append(self, content):
+        """Insert content before the END event of the selection.
+
+        >>> html = HTML('<html><head><title>Some Title</title></head>'
+        ...             '<body>Some <em>body</em> text.</body></html>')
+        >>> print(html | Transformer('.//body').append(' Some new body text.'))
+        <html><head><title>Some Title</title></head><body>Some <em>body</em>
+        text. Some new body text.</body></html>
+
+        :param content: Either a callable, an iterable of events, or a string
+                        to insert.
+        :rtype: `Transformer`
+        """
+        return self.apply(AppendTransformation(content))
+
+    #{ Attribute manipulation
+
+    def attr(self, name, value):
+        """Add, replace or delete an attribute on selected elements.
+
+        If `value` evaulates to `None` the attribute will be deleted from the
+        element:
+
+        >>> html = HTML('<html><head><title>Some Title</title></head>'
+        ...             '<body>Some <em class="before">body</em> <em>text</em>.</body>'
+        ...             '</html>')
+        >>> print(html | Transformer('body/em').attr('class', None))
+        <html><head><title>Some Title</title></head><body>Some <em>body</em>
+        <em>text</em>.</body></html>
+
+        Otherwise the attribute will be set to `value`:
+
+        >>> print(html | Transformer('body/em').attr('class', 'emphasis'))
+        <html><head><title>Some Title</title></head><body>Some <em
+        class="emphasis">body</em> <em class="emphasis">text</em>.</body></html>
+
+        If `value` is a callable it will be called with the attribute name and
+        the `START` event for the matching element. Its return value will then
+        be used to set the attribute:
+
+        >>> def print_attr(name, event):
+        ...     attrs = event[1][1]
+        ...     print(attrs)
+        ...     return attrs.get(name)
+        >>> print(html | Transformer('body/em').attr('class', print_attr))
+        Attrs([(QName('class'), u'before')])
+        Attrs()
+        <html><head><title>Some Title</title></head><body>Some <em
+        class="before">body</em> <em>text</em>.</body></html>
+
+        :param name: the name of the attribute
+        :param value: the value that should be set for the attribute.
+        :rtype: `Transformer`
+        """
+        return self.apply(AttrTransformation(name, value))
+
+    #{ Buffer operations
+
+    def copy(self, buffer, accumulate=False):
+        """Copy selection into buffer.
+
+        The buffer is replaced by each *contiguous* selection before being passed
+        to the next transformation. If accumulate=True, further selections will
+        be appended to the buffer rather than replacing it.
+
+        >>> from genshi.builder import tag
+        >>> buffer = StreamBuffer()
+        >>> html = HTML('<html><head><title>Some Title</title></head>'
+        ...             '<body>Some <em>body</em> text.</body></html>')
+        >>> print(html | Transformer('head/title/text()').copy(buffer)
+        ...     .end().select('body').prepend(tag.h1(buffer)))
+        <html><head><title>Some Title</title></head><body><h1>Some
+        Title</h1>Some <em>body</em> text.</body></html>
+
+        This example illustrates that only a single contiguous selection will
+        be buffered:
+
+        >>> print(html | Transformer('head/title/text()').copy(buffer)
+        ...     .end().select('body/em').copy(buffer).end().select('body')
+        ...     .prepend(tag.h1(buffer)))
+        <html><head><title>Some Title</title></head><body><h1>Some
+        Title</h1>Some <em>body</em> text.</body></html>
+        >>> print(buffer)
+        <em>body</em>
+
+        Element attributes can also be copied for later use:
+
+        >>> html = HTML('<html><head><title>Some Title</title></head>'
+        ...             '<body><em>Some</em> <em class="before">body</em>'
+        ...             '<em>text</em>.</body></html>')
+        >>> buffer = StreamBuffer()
+        >>> def apply_attr(name, entry):
+        ...     return list(buffer)[0][1][1].get('class')
+        >>> print(html | Transformer('body/em[@class]/@class').copy(buffer)
+        ...     .end().buffer().select('body/em[not(@class)]')
+        ...     .attr('class', apply_attr))
+        <html><head><title>Some Title</title></head><body><em
+        class="before">Some</em> <em class="before">body</em><em
+        class="before">text</em>.</body></html>
+
+
+        :param buffer: the `StreamBuffer` in which the selection should be
+                       stored
+        :rtype: `Transformer`
+        :note: Copy (and cut) copy each individual selected object into the
+               buffer before passing to the next transform. For example, the
+               XPath ``*|text()`` will select all elements and text, each
+               instance of which will be copied to the buffer individually
+               before passing to the next transform. This has implications for
+               how ``StreamBuffer`` objects can be used, so some
+               experimentation may be required.
+
+        """
+        return self.apply(CopyTransformation(buffer, accumulate))
+
+    def cut(self, buffer, accumulate=False):
+        """Copy selection into buffer and remove the selection from the stream.
+
+        >>> from genshi.builder import tag
+        >>> buffer = StreamBuffer()
+        >>> html = HTML('<html><head><title>Some Title</title></head>'
+        ...             '<body>Some <em>body</em> text.</body></html>')
+        >>> print(html | Transformer('.//em/text()').cut(buffer)
+        ...     .end().select('.//em').after(tag.h1(buffer)))
+        <html><head><title>Some Title</title></head><body>Some
+        <em/><h1>body</h1> text.</body></html>
+
+        Specifying accumulate=True, appends all selected intervals onto the
+        buffer. Combining this with the .buffer() operation allows us operate
+        on all copied events rather than per-segment. See the documentation on
+        buffer() for more information.
+
+        :param buffer: the `StreamBuffer` in which the selection should be
+                       stored
+        :rtype: `Transformer`
+        :note: this transformation will buffer the entire input stream
+        """
+        return self.apply(CutTransformation(buffer, accumulate))
+
+    def buffer(self):
+        """Buffer the entire stream (can consume a considerable amount of
+        memory).
+
+        Useful in conjunction with copy(accumulate=True) and
+        cut(accumulate=True) to ensure that all marked events in the entire
+        stream are copied to the buffer before further transformations are
+        applied.
+
+        For example, to move all <note> elements inside a <notes> tag at the
+        top of the document:
+
+        >>> doc = HTML('<doc><notes></notes><body>Some <note>one</note> '
+        ...            'text <note>two</note>.</body></doc>')
+        >>> buffer = StreamBuffer()
+        >>> print(doc | Transformer('body/note').cut(buffer, accumulate=True)
+        ...     .end().buffer().select('notes').prepend(buffer))
+        <doc><notes><note>one</note><note>two</note></notes><body>Some  text
+        .</body></doc>
+
+        """
+        return self.apply(list)
+
+    #{ Miscellaneous operations
+
+    def filter(self, filter):
+        """Apply a normal stream filter to the selection. The filter is called
+        once for each contiguous block of marked events.
+
+        >>> from genshi.filters.html import HTMLSanitizer
+        >>> html = HTML('<html><body>Some text<script>alert(document.cookie)'
+        ...             '</script> and some more text</body></html>')
+        >>> print(html | Transformer('body/*').filter(HTMLSanitizer()))
+        <html><body>Some text and some more text</body></html>
+
+        :param filter: The stream filter to apply.
+        :rtype: `Transformer`
+        """
+        return self.apply(FilterTransformation(filter))
+
+    def map(self, function, kind):
+        """Applies a function to the ``data`` element of events of ``kind`` in
+        the selection.
+
+        >>> html = HTML('<html><head><title>Some Title</title></head>'
+        ...               '<body>Some <em>body</em> text.</body></html>')
+        >>> print(html | Transformer('head/title').map(unicode.upper, TEXT))
+        <html><head><title>SOME TITLE</title></head><body>Some <em>body</em>
+        text.</body></html>
+
+        :param function: the function to apply
+        :param kind: the kind of event the function should be applied to
+        :rtype: `Transformer`
+        """
+        return self.apply(MapTransformation(function, kind))
+
+    def substitute(self, pattern, replace, count=1):
+        """Replace text matching a regular expression.
+
+        Refer to the documentation for ``re.sub()`` for details.
+
+        >>> html = HTML('<html><body>Some text, some more text and '
+        ...             '<b>some bold text</b>\\n'
+        ...             '<i>some italicised text</i></body></html>')
+        >>> print(html | Transformer('body/b').substitute('(?i)some', 'SOME'))
+        <html><body>Some text, some more text and <b>SOME bold text</b>
+        <i>some italicised text</i></body></html>
+        >>> tags = tag.html(tag.body('Some text, some more text and\\n',
+        ...      Markup('<b>some bold text</b>')))
+        >>> print(tags.generate() | Transformer('body').substitute(
+        ...     '(?i)some', 'SOME'))
+        <html><body>SOME text, some more text and
+        <b>SOME bold text</b></body></html>
+
+        :param pattern: A regular expression object or string.
+        :param replace: Replacement pattern.
+        :param count: Number of replacements to make in each text fragment.
+        :rtype: `Transformer`
+        """
+        return self.apply(SubstituteTransformation(pattern, replace, count))
+
+    def rename(self, name):
+        """Rename matching elements.
+
+        >>> html = HTML('<html><body>Some text, some more text and '
+        ...             '<b>some bold text</b></body></html>')
+        >>> print(html | Transformer('body/b').rename('strong'))
+        <html><body>Some text, some more text and <strong>some bold text</strong></body></html>
+        """
+        return self.apply(RenameTransformation(name))
+
+    def trace(self, prefix='', fileobj=None):
+        """Print events as they pass through the transform.
+
+        >>> html = HTML('<body>Some <em>test</em> text</body>')
+        >>> print(html | Transformer('em').trace())
+        (None, ('START', (QName('body'), Attrs()), (None, 1, 0)))
+        (None, ('TEXT', u'Some ', (None, 1, 6)))
+        ('ENTER', ('START', (QName('em'), Attrs()), (None, 1, 11)))
+        ('INSIDE', ('TEXT', u'test', (None, 1, 15)))
+        ('EXIT', ('END', QName('em'), (None, 1, 19)))
+        (None, ('TEXT', u' text', (None, 1, 24)))
+        (None, ('END', QName('body'), (None, 1, 29)))
+        <body>Some <em>test</em> text</body>
+
+        :param prefix: a string to prefix each event with in the output
+        :param fileobj: the writable file-like object to write to; defaults to
+                        the standard output stream
+        :rtype: `Transformer`
+        """
+        return self.apply(TraceTransformation(prefix, fileobj=fileobj))
+
+    # Internal methods
+
+    def _mark(self, stream):
+        for event in stream:
+            yield OUTSIDE, event
+
+    def _unmark(self, stream):
+        for mark, event in stream:
+            kind = event[0]
+            if not (kind is None or kind is ATTR or kind is BREAK):
+                yield event
+
+
+class SelectTransformation(object):
+    """Select and mark events that match an XPath expression."""
+
+    def __init__(self, path):
+        """Create selection.
+
+        :param path: an XPath expression (as string) or a `Path` object
+        """
+        if not isinstance(path, Path):
+            path = Path(path)
+        self.path = path
+
+    def __call__(self, stream):
+        """Apply the transform filter to the marked stream.
+
+        :param stream: the marked event stream to filter
+        """
+        namespaces = {}
+        variables = {}
+        test = self.path.test()
+        stream = iter(stream)
+        next = stream.next
+        for mark, event in stream:
+            if mark is None:
+                yield mark, event
+                continue
+            result = test(event, namespaces, variables)
+            # XXX This is effectively genshi.core._ensure() for transform
+            # streams.
+            if result is True:
+                if event[0] is START:
+                    yield ENTER, event
+                    depth = 1
+                    while depth > 0:
+                        mark, subevent = next()
+                        if subevent[0] is START:
+                            depth += 1
+                        elif subevent[0] is END:
+                            depth -= 1
+                        if depth == 0:
+                            yield EXIT, subevent
+                        else:
+                            yield INSIDE, subevent
+                        test(subevent, namespaces, variables, updateonly=True)
+                else:
+                    yield OUTSIDE, event
+            elif isinstance(result, Attrs):
+                # XXX  Selected *attributes* are given a "kind" of None to
+                # indicate they are not really part of the stream.
+                yield ATTR, (ATTR, (QName(event[1][0] + '@*'), result), event[2])
+                yield None, event
+            elif isinstance(result, tuple):
+                yield OUTSIDE, result
+            elif result:
+                # XXX Assume everything else is "text"?
+                yield None, (TEXT, unicode(result), (None, -1, -1))
+            else:
+                yield None, event
+
+
+class InvertTransformation(object):
+    """Invert selection so that marked events become unmarked, and vice versa.
+
+    Specificaly, all input marks are converted to null marks, and all input
+    null marks are converted to OUTSIDE marks.
+    """
+
+    def __call__(self, stream):
+        """Apply the transform filter to the marked stream.
+
+        :param stream: the marked event stream to filter
+        """
+        for mark, event in stream:
+            if mark:
+                yield None, event
+            else:
+                yield OUTSIDE, event
+
+
+class EndTransformation(object):
+    """End the current selection."""
+
+    def __call__(self, stream):
+        """Apply the transform filter to the marked stream.
+
+        :param stream: the marked event stream to filter
+        """
+        for mark, event in stream:
+            yield OUTSIDE, event
+
+
+class EmptyTransformation(object):
+    """Empty selected elements of all content."""
+
+    def __call__(self, stream):
+        """Apply the transform filter to the marked stream.
+
+        :param stream: the marked event stream to filter
+        """
+        for mark, event in stream:
+            yield mark, event
+            if mark is ENTER:
+                for mark, event in stream:
+                    if mark is EXIT:
+                        yield mark, event
+                        break
+
+
+class RemoveTransformation(object):
+    """Remove selection from the stream."""
+
+    def __call__(self, stream):
+        """Apply the transform filter to the marked stream.
+
+        :param stream: the marked event stream to filter
+        """
+        for mark, event in stream:
+            if mark is None:
+                yield mark, event
+
+
+class UnwrapTransformation(object):
+    """Remove outtermost enclosing elements from selection."""
+
+    def __call__(self, stream):
+        """Apply the transform filter to the marked stream.
+
+        :param stream: the marked event stream to filter
+        """
+        for mark, event in stream:
+            if mark not in (ENTER, EXIT):
+                yield mark, event
+
+
+class WrapTransformation(object):
+    """Wrap selection in an element."""
+
+    def __init__(self, element):
+        if isinstance(element, Element):
+            self.element = element
+        else:
+            self.element = Element(element)
+
+    def __call__(self, stream):
+        for mark, event in stream:
+            if mark:
+                element = list(self.element.generate())
+                for prefix in element[:-1]:
+                    yield None, prefix
+                yield mark, event
+                start = mark
+                stopped = False
+                for mark, event in stream:
+                    if start is ENTER and mark is EXIT:
+                        yield mark, event
+                        stopped = True
+                        break
+                    if not mark:
+                        break
+                    yield mark, event
+                else:
+                    stopped = True
+                yield None, element[-1]
+                if not stopped:
+                    yield mark, event
+            else:
+                yield mark, event
+
+
+class TraceTransformation(object):
+    """Print events as they pass through the transform."""
+
+    def __init__(self, prefix='', fileobj=None):
+        """Trace constructor.
+
+        :param prefix: text to prefix each traced line with.
+        :param fileobj: the writable file-like object to write to
+        """
+        self.prefix = prefix
+        self.fileobj = fileobj or sys.stdout
+
+    def __call__(self, stream):
+        """Apply the transform filter to the marked stream.
+
+        :param stream: the marked event stream to filter
+        """
+        for event in stream:
+            self.fileobj.write('%s%s\n' % (self.prefix, event))
+            yield event
+
+
+class FilterTransformation(object):
+    """Apply a normal stream filter to the selection. The filter is called once
+    for each selection."""
+
+    def __init__(self, filter):
+        """Create the transform.
+
+        :param filter: The stream filter to apply.
+        """
+        self.filter = filter
+
+    def __call__(self, stream):
+        """Apply the transform filter to the marked stream.
+
+        :param stream: The marked event stream to filter
+        """
+        def flush(queue):
+            if queue:
+                for event in self.filter(queue):
+                    yield OUTSIDE, event
+                del queue[:]
+
+        queue = []
+        for mark, event in stream:
+            if mark is ENTER:
+                queue.append(event)
+                for mark, event in stream:
+                    queue.append(event)
+                    if mark is EXIT:
+                        break
+                for queue_event in flush(queue):
+                    yield queue_event
+            elif mark is OUTSIDE:
+                stopped = False
+                queue.append(event)
+                for mark, event in stream:
+                    if mark is not OUTSIDE:
+                        break
+                    queue.append(event)
+                else:
+                    stopped = True
+                for queue_event in flush(queue):
+                    yield queue_event
+                if not stopped:
+                    yield mark, event
+            else:
+                yield mark, event
+        for queue_event in flush(queue):
+            yield queue_event
+
+
+class MapTransformation(object):
+    """Apply a function to the `data` element of events of ``kind`` in the
+    selection.
+    """
+
+    def __init__(self, function, kind):
+        """Create the transform.
+
+        :param function: the function to apply; the function must take one
+                         argument, the `data` element of each selected event
+        :param kind: the stream event ``kind`` to apply the `function` to
+        """
+        self.function = function
+        self.kind = kind
+
+    def __call__(self, stream):
+        """Apply the transform filter to the marked stream.
+
+        :param stream: The marked event stream to filter
+        """
+        for mark, (kind, data, pos) in stream:
+            if mark and self.kind in (None, kind):
+                yield mark, (kind, self.function(data), pos)
+            else:
+                yield mark, (kind, data, pos)
+
+
+class SubstituteTransformation(object):
+    """Replace text matching a regular expression.
+
+    Refer to the documentation for ``re.sub()`` for details.
+    """
+    def __init__(self, pattern, replace, count=0):
+        """Create the transform.
+
+        :param pattern: A regular expression object, or string.
+        :param replace: Replacement pattern.
+        :param count: Number of replacements to make in each text fragment.
+        """
+        if isinstance(pattern, basestring):
+            self.pattern = re.compile(pattern)
+        else:
+            self.pattern = pattern
+        self.count = count
+        self.replace = replace
+
+    def __call__(self, stream):
+        """Apply the transform filter to the marked stream.
+
+        :param stream: The marked event stream to filter
+        """
+        for mark, (kind, data, pos) in stream:
+            if mark is not None and kind is TEXT:
+                new_data = self.pattern.sub(self.replace, data, self.count)
+                if isinstance(data, Markup):
+                    data = Markup(new_data)
+                else:
+                    data = new_data
+            yield mark, (kind, data, pos)
+
+
+class RenameTransformation(object):
+    """Rename matching elements."""
+    def __init__(self, name):
+        """Create the transform.
+
+        :param name: New element name.
+        """
+        self.name = QName(name)
+
+    def __call__(self, stream):
+        """Apply the transform filter to the marked stream.
+
+        :param stream: The marked event stream to filter
+        """
+        for mark, (kind, data, pos) in stream:
+            if mark is ENTER:
+                data = self.name, data[1]
+            elif mark is EXIT:
+                data = self.name
+            yield mark, (kind, data, pos)
+
+
+class InjectorTransformation(object):
+    """Abstract base class for transformations that inject content into a
+    stream.
+
+    >>> class Top(InjectorTransformation):
+    ...     def __call__(self, stream):
+    ...         for event in self._inject():
+    ...             yield event
+    ...         for event in stream:
+    ...             yield event
+    >>> html = HTML('<body>Some <em>test</em> text</body>')
+    >>> print(html | Transformer('.//em').apply(Top('Prefix ')))
+    Prefix <body>Some <em>test</em> text</body>
+    """
+    def __init__(self, content):
+        """Create a new injector.
+
+        :param content: An iterable of Genshi stream events, or a string to be
+                        injected.
+        """
+        self.content = content
+
+    def _inject(self):
+        content = self.content
+        if hasattr(content, '__call__'):
+            content = content()
+        for event in _ensure(content):
+            yield None, event
+
+
+class ReplaceTransformation(InjectorTransformation):
+    """Replace selection with content."""
+
+    def __call__(self, stream):
+        """Apply the transform filter to the marked stream.
+
+        :param stream: The marked event stream to filter
+        """
+        stream = PushBackStream(stream)
+        for mark, event in stream:
+            if mark is not None:
+                start = mark
+                for subevent in self._inject():
+                    yield subevent
+                for mark, event in stream:
+                    if start is ENTER:
+                        if mark is EXIT:
+                            break
+                    elif mark != start:
+                        stream.push((mark, event))
+                        break
+            else:
+                yield mark, event
+
+
+class BeforeTransformation(InjectorTransformation):
+    """Insert content before selection."""
+
+    def __call__(self, stream):
+        """Apply the transform filter to the marked stream.
+
+        :param stream: The marked event stream to filter
+        """
+        stream = PushBackStream(stream)
+        for mark, event in stream:
+            if mark is not None:
+                start = mark
+                for subevent in self._inject():
+                    yield subevent
+                yield mark, event
+                for mark, event in stream:
+                    if mark != start and start is not ENTER:
+                        stream.push((mark, event))
+                        break
+                    yield mark, event
+                    if start is ENTER and mark is EXIT:
+                        break
+            else:
+                yield mark, event
+
+
+class AfterTransformation(InjectorTransformation):
+    """Insert content after selection."""
+
+    def __call__(self, stream):
+        """Apply the transform filter to the marked stream.
+
+        :param stream: The marked event stream to filter
+        """
+        stream = PushBackStream(stream)
+        for mark, event in stream:
+            yield mark, event
+            if mark:
+                start = mark
+                for mark, event in stream:
+                    if start is not ENTER and mark != start:
+                        stream.push((mark, event))
+                        break
+                    yield mark, event
+                    if start is ENTER and mark is EXIT:
+                        break
+                for subevent in self._inject():
+                    yield subevent
+
+
+class PrependTransformation(InjectorTransformation):
+    """Prepend content to the inside of selected elements."""
+
+    def __call__(self, stream):
+        """Apply the transform filter to the marked stream.
+
+        :param stream: The marked event stream to filter
+        """
+        for mark, event in stream:
+            yield mark, event
+            if mark is ENTER:
+                for subevent in self._inject():
+                    yield subevent
+
+
+class AppendTransformation(InjectorTransformation):
+    """Append content after the content of selected elements."""
+
+    def __call__(self, stream):
+        """Apply the transform filter to the marked stream.
+
+        :param stream: The marked event stream to filter
+        """
+        for mark, event in stream:
+            yield mark, event
+            if mark is ENTER:
+                for mark, event in stream:
+                    if mark is EXIT:
+                        break
+                    yield mark, event
+                for subevent in self._inject():
+                    yield subevent
+                yield mark, event
+
+
+class AttrTransformation(object):
+    """Set an attribute on selected elements."""
+
+    def __init__(self, name, value):
+        """Construct transform.
+
+        :param name: name of the attribute that should be set
+        :param value: the value to set
+        """
+        self.name = name
+        self.value = value
+
+    def __call__(self, stream):
+        """Apply the transform filter to the marked stream.
+
+        :param stream: The marked event stream to filter
+        """
+        callable_value = hasattr(self.value, '__call__')
+        for mark, (kind, data, pos) in stream:
+            if mark is ENTER:
+                if callable_value:
+                    value = self.value(self.name, (kind, data, pos))
+                else:
+                    value = self.value
+                if value is None:
+                    attrs = data[1] - [QName(self.name)]
+                else:
+                    attrs = data[1] | [(QName(self.name), value)]
+                data = (data[0], attrs)
+            yield mark, (kind, data, pos)
+
+
+
+class StreamBuffer(Stream):
+    """Stream event buffer used for cut and copy transformations."""
+
+    def __init__(self):
+        """Create the buffer."""
+        Stream.__init__(self, [])
+
+    def append(self, event):
+        """Add an event to the buffer.
+
+        :param event: the markup event to add
+        """
+        self.events.append(event)
+
+    def reset(self):
+        """Empty the buffer of events."""
+        del self.events[:]
+
+
+class CopyTransformation(object):
+    """Copy selected events into a buffer for later insertion."""
+
+    def __init__(self, buffer, accumulate=False):
+        """Create the copy transformation.
+
+        :param buffer: the `StreamBuffer` in which the selection should be
+                       stored
+        """
+        if not accumulate:
+            buffer.reset()
+        self.buffer = buffer
+        self.accumulate = accumulate
+
+    def __call__(self, stream):
+        """Apply the transformation to the marked stream.
+
+        :param stream: the marked event stream to filter
+        """
+        stream = PushBackStream(stream)
+
+        for mark, event in stream:
+            if mark:
+                if not self.accumulate:
+                    self.buffer.reset()
+                events = [(mark, event)]
+                self.buffer.append(event)
+                start = mark
+                for mark, event in stream:
+                    if start is not ENTER and mark != start:
+                        stream.push((mark, event))
+                        break
+                    events.append((mark, event))
+                    self.buffer.append(event)
+                    if start is ENTER and mark is EXIT:
+                        break
+                for i in events:
+                    yield i
+            else:
+                yield mark, event
+
+
+class CutTransformation(object):
+    """Cut selected events into a buffer for later insertion and remove the
+    selection.
+    """
+
+    def __init__(self, buffer, accumulate=False):
+        """Create the cut transformation.
+
+        :param buffer: the `StreamBuffer` in which the selection should be
+                       stored
+        """
+        self.buffer = buffer
+        self.accumulate = accumulate
+
+
+    def __call__(self, stream):
+        """Apply the transform filter to the marked stream.
+
+        :param stream: the marked event stream to filter
+        """
+        attributes = []
+        stream = PushBackStream(stream)
+        broken = False
+        if not self.accumulate:
+            self.buffer.reset()
+        for mark, event in stream:
+            if mark:
+                # Send a BREAK event if there was no other event sent between 
+                if not self.accumulate:
+                    if not broken and self.buffer:
+                        yield BREAK, (BREAK, None, None)
+                    self.buffer.reset()
+                self.buffer.append(event)
+                start = mark
+                if mark is ATTR:
+                    attributes.extend([name for name, _ in event[1][1]])
+                for mark, event in stream:
+                    if start is mark is ATTR:
+                        attributes.extend([name for name, _ in event[1][1]])
+                    # Handle non-element contiguous selection
+                    if start is not ENTER and mark != start:
+                        # Operating on the attributes of a START event
+                        if start is ATTR:
+                            kind, data, pos = event
+                            assert kind is START
+                            data = (data[0], data[1] - attributes)
+                            attributes = None
+                            stream.push((mark, (kind, data, pos)))
+                        else:
+                            stream.push((mark, event))
+                        break
+                    self.buffer.append(event)
+                    if start is ENTER and mark is EXIT:
+                        break
+                broken = False
+            else:
+                broken = True
+                yield mark, event
+        if not broken and self.buffer:
+            yield BREAK, (BREAK, None, None)