From 570a268e7562303690ef6b599ea244945a3100ce Mon Sep 17 00:00:00 2001
From: Sebastian Silva <sebastian@somosazucar.org>
Date: Sat, 09 Jul 2011 00:17:44 +0000
Subject: Still importing WebSDK.

Need to read up on GIT.
---
(limited to 'genshi/output.py')

diff --git a/genshi/output.py b/genshi/output.py
new file mode 100644
index 0000000..2ebb38b
--- /dev/null
+++ b/genshi/output.py
@@ -0,0 +1,838 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006-2009 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""This module provides different kinds of serialization methods for XML event
+streams.
+"""
+
+from itertools import chain
+import re
+
+from genshi.core import escape, Attrs, Markup, Namespace, QName, StreamEventKind
+from genshi.core import START, END, TEXT, XML_DECL, DOCTYPE, START_NS, END_NS, \
+                        START_CDATA, END_CDATA, PI, COMMENT, XML_NAMESPACE
+
+__all__ = ['encode', 'get_serializer', 'DocType', 'XMLSerializer',
+           'XHTMLSerializer', 'HTMLSerializer', 'TextSerializer']
+__docformat__ = 'restructuredtext en'
+
+
+def encode(iterator, method='xml', encoding='utf-8', out=None):
+    """Encode serializer output into a string.
+    
+    :param iterator: the iterator returned from serializing a stream (basically
+                     any iterator that yields unicode objects)
+    :param method: the serialization method; determines how characters not
+                   representable in the specified encoding are treated
+    :param encoding: how the output string should be encoded; if set to `None`,
+                     this method returns a `unicode` object
+    :param out: a file-like object that the output should be written to
+                instead of being returned as one big string; note that if
+                this is a file or socket (or similar), the `encoding` must
+                not be `None` (that is, the output must be encoded)
+    :return: a `str` or `unicode` object (depending on the `encoding`
+             parameter), or `None` if the `out` parameter is provided
+    
+    :since: version 0.4.1
+    :note: Changed in 0.5: added the `out` parameter
+    """
+    if encoding is not None:
+        errors = 'replace'
+        if method != 'text' and not isinstance(method, TextSerializer):
+            errors = 'xmlcharrefreplace'
+        _encode = lambda string: string.encode(encoding, errors)
+    else:
+        _encode = lambda string: string
+    if out is None:
+        return _encode(''.join(list(iterator)))
+    for chunk in iterator:
+        out.write(_encode(chunk))
+
+
+def get_serializer(method='xml', **kwargs):
+    """Return a serializer object for the given method.
+    
+    :param method: the serialization method; can be either "xml", "xhtml",
+                   "html", "text", or a custom serializer class
+
+    Any additional keyword arguments are passed to the serializer, and thus
+    depend on the `method` parameter value.
+    
+    :see: `XMLSerializer`, `XHTMLSerializer`, `HTMLSerializer`, `TextSerializer`
+    :since: version 0.4.1
+    """
+    if isinstance(method, basestring):
+        method = {'xml':   XMLSerializer,
+                  'xhtml': XHTMLSerializer,
+                  'html':  HTMLSerializer,
+                  'text':  TextSerializer}[method.lower()]
+    return method(**kwargs)
+
+
+class DocType(object):
+    """Defines a number of commonly used DOCTYPE declarations as constants."""
+
+    HTML_STRICT = (
+        'html', '-//W3C//DTD HTML 4.01//EN',
+        'http://www.w3.org/TR/html4/strict.dtd'
+    )
+    HTML_TRANSITIONAL = (
+        'html', '-//W3C//DTD HTML 4.01 Transitional//EN',
+        'http://www.w3.org/TR/html4/loose.dtd'
+    )
+    HTML_FRAMESET = (
+        'html', '-//W3C//DTD HTML 4.01 Frameset//EN',
+        'http://www.w3.org/TR/html4/frameset.dtd'
+    )
+    HTML = HTML_STRICT
+
+    HTML5 = ('html', None, None)
+
+    XHTML_STRICT = (
+        'html', '-//W3C//DTD XHTML 1.0 Strict//EN',
+        'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'
+    )
+    XHTML_TRANSITIONAL = (
+        'html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
+        'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'
+    )
+    XHTML_FRAMESET = (
+        'html', '-//W3C//DTD XHTML 1.0 Frameset//EN',
+        'http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd'
+    )
+    XHTML = XHTML_STRICT
+
+    XHTML11 = (
+        'html', '-//W3C//DTD XHTML 1.1//EN',
+        'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd'
+    )
+
+    SVG_FULL = (
+        'svg', '-//W3C//DTD SVG 1.1//EN',
+        'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd'
+    )
+    SVG_BASIC = (
+        'svg', '-//W3C//DTD SVG Basic 1.1//EN',
+        'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-basic.dtd'
+    )
+    SVG_TINY = (
+        'svg', '-//W3C//DTD SVG Tiny 1.1//EN',
+        'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-tiny.dtd'
+    )
+    SVG = SVG_FULL
+
+    @classmethod
+    def get(cls, name):
+        """Return the ``(name, pubid, sysid)`` tuple of the ``DOCTYPE``
+        declaration for the specified name.
+        
+        The following names are recognized in this version:
+         * "html" or "html-strict" for the HTML 4.01 strict DTD
+         * "html-transitional" for the HTML 4.01 transitional DTD
+         * "html-frameset" for the HTML 4.01 frameset DTD
+         * "html5" for the ``DOCTYPE`` proposed for HTML5
+         * "xhtml" or "xhtml-strict" for the XHTML 1.0 strict DTD
+         * "xhtml-transitional" for the XHTML 1.0 transitional DTD
+         * "xhtml-frameset" for the XHTML 1.0 frameset DTD
+         * "xhtml11" for the XHTML 1.1 DTD
+         * "svg" or "svg-full" for the SVG 1.1 DTD
+         * "svg-basic" for the SVG Basic 1.1 DTD
+         * "svg-tiny" for the SVG Tiny 1.1 DTD
+        
+        :param name: the name of the ``DOCTYPE``
+        :return: the ``(name, pubid, sysid)`` tuple for the requested
+                 ``DOCTYPE``, or ``None`` if the name is not recognized
+        :since: version 0.4.1
+        """
+        return {
+            'html': cls.HTML, 'html-strict': cls.HTML_STRICT,
+            'html-transitional': DocType.HTML_TRANSITIONAL,
+            'html-frameset': DocType.HTML_FRAMESET,
+            'html5': cls.HTML5,
+            'xhtml': cls.XHTML, 'xhtml-strict': cls.XHTML_STRICT,
+            'xhtml-transitional': cls.XHTML_TRANSITIONAL,
+            'xhtml-frameset': cls.XHTML_FRAMESET,
+            'xhtml11': cls.XHTML11,
+            'svg': cls.SVG, 'svg-full': cls.SVG_FULL,
+            'svg-basic': cls.SVG_BASIC,
+            'svg-tiny': cls.SVG_TINY
+        }.get(name.lower())
+
+
+class XMLSerializer(object):
+    """Produces XML text from an event stream.
+    
+    >>> from genshi.builder import tag
+    >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True))
+    >>> print(''.join(XMLSerializer()(elem.generate())))
+    <div><a href="foo"/><br/><hr noshade="True"/></div>
+    """
+
+    _PRESERVE_SPACE = frozenset()
+
+    def __init__(self, doctype=None, strip_whitespace=True,
+                 namespace_prefixes=None, cache=True):
+        """Initialize the XML serializer.
+        
+        :param doctype: a ``(name, pubid, sysid)`` tuple that represents the
+                        DOCTYPE declaration that should be included at the top
+                        of the generated output, or the name of a DOCTYPE as
+                        defined in `DocType.get`
+        :param strip_whitespace: whether extraneous whitespace should be
+                                 stripped from the output
+        :param cache: whether to cache the text output per event, which
+                      improves performance for repetitive markup
+        :note: Changed in 0.4.2: The  `doctype` parameter can now be a string.
+        :note: Changed in 0.6: The `cache` parameter was added
+        """
+        self.filters = [EmptyTagFilter()]
+        if strip_whitespace:
+            self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE))
+        self.filters.append(NamespaceFlattener(prefixes=namespace_prefixes,
+                                               cache=cache))
+        if doctype:
+            self.filters.append(DocTypeInserter(doctype))
+        self.cache = cache
+
+    def __call__(self, stream):
+        have_decl = have_doctype = False
+        in_cdata = False
+
+        cache = {}
+        cache_get = cache.get
+        if self.cache:
+            def _emit(kind, input, output):
+                cache[kind, input] = output
+                return output
+        else:
+            def _emit(kind, input, output):
+                return output
+
+        for filter_ in self.filters:
+            stream = filter_(stream)
+        for kind, data, pos in stream:
+            cached = cache_get((kind, data))
+            if cached is not None:
+                yield cached
+
+            elif kind is START or kind is EMPTY:
+                tag, attrib = data
+                buf = ['<', tag]
+                for attr, value in attrib:
+                    buf += [' ', attr, '="', escape(value), '"']
+                buf.append(kind is EMPTY and '/>' or '>')
+                yield _emit(kind, data, Markup(''.join(buf)))
+
+            elif kind is END:
+                yield _emit(kind, data, Markup('</%s>' % data))
+
+            elif kind is TEXT:
+                if in_cdata:
+                    yield _emit(kind, data, data)
+                else:
+                    yield _emit(kind, data, escape(data, quotes=False))
+
+            elif kind is COMMENT:
+                yield _emit(kind, data, Markup('<!--%s-->' % data))
+
+            elif kind is XML_DECL and not have_decl:
+                version, encoding, standalone = data
+                buf = ['<?xml version="%s"' % version]
+                if encoding:
+                    buf.append(' encoding="%s"' % encoding)
+                if standalone != -1:
+                    standalone = standalone and 'yes' or 'no'
+                    buf.append(' standalone="%s"' % standalone)
+                buf.append('?>\n')
+                yield Markup(''.join(buf))
+                have_decl = True
+
+            elif kind is DOCTYPE and not have_doctype:
+                name, pubid, sysid = data
+                buf = ['<!DOCTYPE %s']
+                if pubid:
+                    buf.append(' PUBLIC "%s"')
+                elif sysid:
+                    buf.append(' SYSTEM')
+                if sysid:
+                    buf.append(' "%s"')
+                buf.append('>\n')
+                yield Markup(''.join(buf)) % tuple([p for p in data if p])
+                have_doctype = True
+
+            elif kind is START_CDATA:
+                yield Markup('<![CDATA[')
+                in_cdata = True
+
+            elif kind is END_CDATA:
+                yield Markup(']]>')
+                in_cdata = False
+
+            elif kind is PI:
+                yield _emit(kind, data, Markup('<?%s %s?>' % data))
+
+
+class XHTMLSerializer(XMLSerializer):
+    """Produces XHTML text from an event stream.
+    
+    >>> from genshi.builder import tag
+    >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True))
+    >>> print(''.join(XHTMLSerializer()(elem.generate())))
+    <div><a href="foo"></a><br /><hr noshade="noshade" /></div>
+    """
+
+    _EMPTY_ELEMS = frozenset(['area', 'base', 'basefont', 'br', 'col', 'frame',
+                              'hr', 'img', 'input', 'isindex', 'link', 'meta',
+                              'param'])
+    _BOOLEAN_ATTRS = frozenset(['selected', 'checked', 'compact', 'declare',
+                                'defer', 'disabled', 'ismap', 'multiple',
+                                'nohref', 'noresize', 'noshade', 'nowrap'])
+    _PRESERVE_SPACE = frozenset([
+        QName('pre'), QName('http://www.w3.org/1999/xhtml}pre'),
+        QName('textarea'), QName('http://www.w3.org/1999/xhtml}textarea')
+    ])
+
+    def __init__(self, doctype=None, strip_whitespace=True,
+                 namespace_prefixes=None, drop_xml_decl=True, cache=True):
+        super(XHTMLSerializer, self).__init__(doctype, False)
+        self.filters = [EmptyTagFilter()]
+        if strip_whitespace:
+            self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE))
+        namespace_prefixes = namespace_prefixes or {}
+        namespace_prefixes['http://www.w3.org/1999/xhtml'] = ''
+        self.filters.append(NamespaceFlattener(prefixes=namespace_prefixes,
+                                               cache=cache))
+        if doctype:
+            self.filters.append(DocTypeInserter(doctype))
+        self.drop_xml_decl = drop_xml_decl
+        self.cache = cache
+
+    def __call__(self, stream):
+        boolean_attrs = self._BOOLEAN_ATTRS
+        empty_elems = self._EMPTY_ELEMS
+        drop_xml_decl = self.drop_xml_decl
+        have_decl = have_doctype = False
+        in_cdata = False
+
+        cache = {}
+        cache_get = cache.get
+        if self.cache:
+            def _emit(kind, input, output):
+                cache[kind, input] = output
+                return output
+        else:
+            def _emit(kind, input, output):
+                return output
+
+        for filter_ in self.filters:
+            stream = filter_(stream)
+        for kind, data, pos in stream:
+            cached = cache_get((kind, data))
+            if cached is not None:
+                yield cached
+
+            elif kind is START or kind is EMPTY:
+                tag, attrib = data
+                buf = ['<', tag]
+                for attr, value in attrib:
+                    if attr in boolean_attrs:
+                        value = attr
+                    elif attr == 'xml:lang' and 'lang' not in attrib:
+                        buf += [' lang="', escape(value), '"']
+                    elif attr == 'xml:space':
+                        continue
+                    buf += [' ', attr, '="', escape(value), '"']
+                if kind is EMPTY:
+                    if tag in empty_elems:
+                        buf.append(' />')
+                    else:
+                        buf.append('></%s>' % tag)
+                else:
+                    buf.append('>')
+                yield _emit(kind, data, Markup(''.join(buf)))
+
+            elif kind is END:
+                yield _emit(kind, data, Markup('</%s>' % data))
+
+            elif kind is TEXT:
+                if in_cdata:
+                    yield _emit(kind, data, data)
+                else:
+                    yield _emit(kind, data, escape(data, quotes=False))
+
+            elif kind is COMMENT:
+                yield _emit(kind, data, Markup('<!--%s-->' % data))
+
+            elif kind is DOCTYPE and not have_doctype:
+                name, pubid, sysid = data
+                buf = ['<!DOCTYPE %s']
+                if pubid:
+                    buf.append(' PUBLIC "%s"')
+                elif sysid:
+                    buf.append(' SYSTEM')
+                if sysid:
+                    buf.append(' "%s"')
+                buf.append('>\n')
+                yield Markup(''.join(buf)) % tuple([p for p in data if p])
+                have_doctype = True
+
+            elif kind is XML_DECL and not have_decl and not drop_xml_decl:
+                version, encoding, standalone = data
+                buf = ['<?xml version="%s"' % version]
+                if encoding:
+                    buf.append(' encoding="%s"' % encoding)
+                if standalone != -1:
+                    standalone = standalone and 'yes' or 'no'
+                    buf.append(' standalone="%s"' % standalone)
+                buf.append('?>\n')
+                yield Markup(''.join(buf))
+                have_decl = True
+
+            elif kind is START_CDATA:
+                yield Markup('<![CDATA[')
+                in_cdata = True
+
+            elif kind is END_CDATA:
+                yield Markup(']]>')
+                in_cdata = False
+
+            elif kind is PI:
+                yield _emit(kind, data, Markup('<?%s %s?>' % data))
+
+
+class HTMLSerializer(XHTMLSerializer):
+    """Produces HTML text from an event stream.
+    
+    >>> from genshi.builder import tag
+    >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True))
+    >>> print(''.join(HTMLSerializer()(elem.generate())))
+    <div><a href="foo"></a><br><hr noshade></div>
+    """
+
+    _NOESCAPE_ELEMS = frozenset([
+        QName('script'), QName('http://www.w3.org/1999/xhtml}script'),
+        QName('style'), QName('http://www.w3.org/1999/xhtml}style')
+    ])
+
+    def __init__(self, doctype=None, strip_whitespace=True, cache=True):
+        """Initialize the HTML serializer.
+        
+        :param doctype: a ``(name, pubid, sysid)`` tuple that represents the
+                        DOCTYPE declaration that should be included at the top
+                        of the generated output
+        :param strip_whitespace: whether extraneous whitespace should be
+                                 stripped from the output
+        :param cache: whether to cache the text output per event, which
+                      improves performance for repetitive markup
+        :note: Changed in 0.6: The `cache` parameter was added
+        """
+        super(HTMLSerializer, self).__init__(doctype, False)
+        self.filters = [EmptyTagFilter()]
+        if strip_whitespace:
+            self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE,
+                                                 self._NOESCAPE_ELEMS))
+        self.filters.append(NamespaceFlattener(prefixes={
+            'http://www.w3.org/1999/xhtml': ''
+        }, cache=cache))
+        if doctype:
+            self.filters.append(DocTypeInserter(doctype))
+        self.cache = True
+
+    def __call__(self, stream):
+        boolean_attrs = self._BOOLEAN_ATTRS
+        empty_elems = self._EMPTY_ELEMS
+        noescape_elems = self._NOESCAPE_ELEMS
+        have_doctype = False
+        noescape = False
+
+        cache = {}
+        cache_get = cache.get
+        if self.cache:
+            def _emit(kind, input, output):
+                cache[kind, input] = output
+                return output
+        else:
+            def _emit(kind, input, output):
+                return output
+
+        for filter_ in self.filters:
+            stream = filter_(stream)
+        for kind, data, _ in stream:
+            output = cache_get((kind, data))
+            if output is not None:
+                yield output
+                if (kind is START or kind is EMPTY) \
+                        and data[0] in noescape_elems:
+                    noescape = True
+                elif kind is END:
+                    noescape = False
+
+            elif kind is START or kind is EMPTY:
+                tag, attrib = data
+                buf = ['<', tag]
+                for attr, value in attrib:
+                    if attr in boolean_attrs:
+                        if value:
+                            buf += [' ', attr]
+                    elif ':' in attr:
+                        if attr == 'xml:lang' and 'lang' not in attrib:
+                            buf += [' lang="', escape(value), '"']
+                    elif attr != 'xmlns':
+                        buf += [' ', attr, '="', escape(value), '"']
+                buf.append('>')
+                if kind is EMPTY:
+                    if tag not in empty_elems:
+                        buf.append('</%s>' % tag)
+                yield _emit(kind, data, Markup(''.join(buf)))
+                if tag in noescape_elems:
+                    noescape = True
+
+            elif kind is END:
+                yield _emit(kind, data, Markup('</%s>' % data))
+                noescape = False
+
+            elif kind is TEXT:
+                if noescape:
+                    yield _emit(kind, data, data)
+                else:
+                    yield _emit(kind, data, escape(data, quotes=False))
+
+            elif kind is COMMENT:
+                yield _emit(kind, data, Markup('<!--%s-->' % data))
+
+            elif kind is DOCTYPE and not have_doctype:
+                name, pubid, sysid = data
+                buf = ['<!DOCTYPE %s']
+                if pubid:
+                    buf.append(' PUBLIC "%s"')
+                elif sysid:
+                    buf.append(' SYSTEM')
+                if sysid:
+                    buf.append(' "%s"')
+                buf.append('>\n')
+                yield Markup(''.join(buf)) % tuple([p for p in data if p])
+                have_doctype = True
+
+            elif kind is PI:
+                yield _emit(kind, data, Markup('<?%s %s?>' % data))
+
+
+class TextSerializer(object):
+    """Produces plain text from an event stream.
+    
+    Only text events are included in the output. Unlike the other serializer,
+    special XML characters are not escaped:
+    
+    >>> from genshi.builder import tag
+    >>> elem = tag.div(tag.a('<Hello!>', href='foo'), tag.br)
+    >>> print(elem)
+    <div><a href="foo">&lt;Hello!&gt;</a><br/></div>
+    >>> print(''.join(TextSerializer()(elem.generate())))
+    <Hello!>
+
+    If text events contain literal markup (instances of the `Markup` class),
+    that markup is by default passed through unchanged:
+    
+    >>> elem = tag.div(Markup('<a href="foo">Hello &amp; Bye!</a><br/>'))
+    >>> print(elem.generate().render(TextSerializer, encoding=None))
+    <a href="foo">Hello &amp; Bye!</a><br/>
+    
+    You can use the ``strip_markup`` to change this behavior, so that tags and
+    entities are stripped from the output (or in the case of entities,
+    replaced with the equivalent character):
+
+    >>> print(elem.generate().render(TextSerializer, strip_markup=True,
+    ...                              encoding=None))
+    Hello & Bye!
+    """
+
+    def __init__(self, strip_markup=False):
+        """Create the serializer.
+        
+        :param strip_markup: whether markup (tags and encoded characters) found
+                             in the text should be removed
+        """
+        self.strip_markup = strip_markup
+
+    def __call__(self, stream):
+        strip_markup = self.strip_markup
+        for event in stream:
+            if event[0] is TEXT:
+                data = event[1]
+                if strip_markup and type(data) is Markup:
+                    data = data.striptags().stripentities()
+                yield unicode(data)
+
+
+class EmptyTagFilter(object):
+    """Combines `START` and `STOP` events into `EMPTY` events for elements that
+    have no contents.
+    """
+
+    EMPTY = StreamEventKind('EMPTY')
+
+    def __call__(self, stream):
+        prev = (None, None, None)
+        for ev in stream:
+            if prev[0] is START:
+                if ev[0] is END:
+                    prev = EMPTY, prev[1], prev[2]
+                    yield prev
+                    continue
+                else:
+                    yield prev
+            if ev[0] is not START:
+                yield ev
+            prev = ev
+
+
+EMPTY = EmptyTagFilter.EMPTY
+
+
+class NamespaceFlattener(object):
+    r"""Output stream filter that removes namespace information from the stream,
+    instead adding namespace attributes and prefixes as needed.
+    
+    :param prefixes: optional mapping of namespace URIs to prefixes
+    
+    >>> from genshi.input import XML
+    >>> xml = XML('''<doc xmlns="NS1" xmlns:two="NS2">
+    ...   <two:item/>
+    ... </doc>''')
+    >>> for kind, data, pos in NamespaceFlattener()(xml):
+    ...     print('%s %r' % (kind, data))
+    START (u'doc', Attrs([('xmlns', u'NS1'), (u'xmlns:two', u'NS2')]))
+    TEXT u'\n  '
+    START (u'two:item', Attrs())
+    END u'two:item'
+    TEXT u'\n'
+    END u'doc'
+    """
+
+    def __init__(self, prefixes=None, cache=True):
+        self.prefixes = {XML_NAMESPACE.uri: 'xml'}
+        if prefixes is not None:
+            self.prefixes.update(prefixes)
+        self.cache = cache
+
+    def __call__(self, stream):
+        cache = {}
+        cache_get = cache.get
+        if self.cache:
+            def _emit(kind, input, output, pos):
+                cache[kind, input] = output
+                return kind, output, pos
+        else:
+            def _emit(kind, input, output, pos):
+                return output
+
+        prefixes = dict([(v, [k]) for k, v in self.prefixes.items()])
+        namespaces = {XML_NAMESPACE.uri: ['xml']}
+        def _push_ns(prefix, uri):
+            namespaces.setdefault(uri, []).append(prefix)
+            prefixes.setdefault(prefix, []).append(uri)
+            cache.clear()
+        def _pop_ns(prefix):
+            uris = prefixes.get(prefix)
+            uri = uris.pop()
+            if not uris:
+                del prefixes[prefix]
+            if uri not in uris or uri != uris[-1]:
+                uri_prefixes = namespaces[uri]
+                uri_prefixes.pop()
+                if not uri_prefixes:
+                    del namespaces[uri]
+            cache.clear()
+            return uri
+
+        ns_attrs = []
+        _push_ns_attr = ns_attrs.append
+        def _make_ns_attr(prefix, uri):
+            return 'xmlns%s' % (prefix and ':%s' % prefix or ''), uri
+
+        def _gen_prefix():
+            val = 0
+            while 1:
+                val += 1
+                yield 'ns%d' % val
+        _gen_prefix = _gen_prefix().next
+
+        for kind, data, pos in stream:
+            output = cache_get((kind, data))
+            if output is not None:
+                yield kind, output, pos
+
+            elif kind is START or kind is EMPTY:
+                tag, attrs = data
+
+                tagname = tag.localname
+                tagns = tag.namespace
+                if tagns:
+                    if tagns in namespaces:
+                        prefix = namespaces[tagns][-1]
+                        if prefix:
+                            tagname = '%s:%s' % (prefix, tagname)
+                    else:
+                        _push_ns_attr(('xmlns', tagns))
+                        _push_ns('', tagns)
+
+                new_attrs = []
+                for attr, value in attrs:
+                    attrname = attr.localname
+                    attrns = attr.namespace
+                    if attrns:
+                        if attrns not in namespaces:
+                            prefix = _gen_prefix()
+                            _push_ns(prefix, attrns)
+                            _push_ns_attr(('xmlns:%s' % prefix, attrns))
+                        else:
+                            prefix = namespaces[attrns][-1]
+                        if prefix:
+                            attrname = '%s:%s' % (prefix, attrname)
+                    new_attrs.append((attrname, value))
+
+                yield _emit(kind, data, (tagname, Attrs(ns_attrs + new_attrs)), pos)
+                del ns_attrs[:]
+
+            elif kind is END:
+                tagname = data.localname
+                tagns = data.namespace
+                if tagns:
+                    prefix = namespaces[tagns][-1]
+                    if prefix:
+                        tagname = '%s:%s' % (prefix, tagname)
+                yield _emit(kind, data, tagname, pos)
+
+            elif kind is START_NS:
+                prefix, uri = data
+                if uri not in namespaces:
+                    prefix = prefixes.get(uri, [prefix])[-1]
+                    _push_ns_attr(_make_ns_attr(prefix, uri))
+                _push_ns(prefix, uri)
+
+            elif kind is END_NS:
+                if data in prefixes:
+                    uri = _pop_ns(data)
+                    if ns_attrs:
+                        attr = _make_ns_attr(data, uri)
+                        if attr in ns_attrs:
+                            ns_attrs.remove(attr)
+
+            else:
+                yield kind, data, pos
+
+
+class WhitespaceFilter(object):
+    """A filter that removes extraneous ignorable white space from the
+    stream.
+    """
+
+    def __init__(self, preserve=None, noescape=None):
+        """Initialize the filter.
+        
+        :param preserve: a set or sequence of tag names for which white-space
+                         should be preserved
+        :param noescape: a set or sequence of tag names for which text content
+                         should not be escaped
+        
+        The `noescape` set is expected to refer to elements that cannot contain
+        further child elements (such as ``<style>`` or ``<script>`` in HTML
+        documents).
+        """
+        if preserve is None:
+            preserve = []
+        self.preserve = frozenset(preserve)
+        if noescape is None:
+            noescape = []
+        self.noescape = frozenset(noescape)
+
+    def __call__(self, stream, ctxt=None, space=XML_NAMESPACE['space'],
+                 trim_trailing_space=re.compile('[ \t]+(?=\n)').sub,
+                 collapse_lines=re.compile('\n{2,}').sub):
+        mjoin = Markup('').join
+        preserve_elems = self.preserve
+        preserve = 0
+        noescape_elems = self.noescape
+        noescape = False
+
+        textbuf = []
+        push_text = textbuf.append
+        pop_text = textbuf.pop
+        for kind, data, pos in chain(stream, [(None, None, None)]):
+
+            if kind is TEXT:
+                if noescape:
+                    data = Markup(data)
+                push_text(data)
+            else:
+                if textbuf:
+                    if len(textbuf) > 1:
+                        text = mjoin(textbuf, escape_quotes=False)
+                        del textbuf[:]
+                    else:
+                        text = escape(pop_text(), quotes=False)
+                    if not preserve:
+                        text = collapse_lines('\n', trim_trailing_space('', text))
+                    yield TEXT, Markup(text), pos
+
+                if kind is START:
+                    tag, attrs = data
+                    if preserve or (tag in preserve_elems or
+                                    attrs.get(space) == 'preserve'):
+                        preserve += 1
+                    if not noescape and tag in noescape_elems:
+                        noescape = True
+
+                elif kind is END:
+                    noescape = False
+                    if preserve:
+                        preserve -= 1
+
+                elif kind is START_CDATA:
+                    noescape = True
+
+                elif kind is END_CDATA:
+                    noescape = False
+
+                if kind:
+                    yield kind, data, pos
+
+
+class DocTypeInserter(object):
+    """A filter that inserts the DOCTYPE declaration in the correct location,
+    after the XML declaration.
+    """
+    def __init__(self, doctype):
+        """Initialize the filter.
+
+        :param doctype: DOCTYPE as a string or DocType object.
+        """
+        if isinstance(doctype, basestring):
+            doctype = DocType.get(doctype)
+        self.doctype_event = (DOCTYPE, doctype, (None, -1, -1))
+
+    def __call__(self, stream):
+        doctype_inserted = False
+        for kind, data, pos in stream:
+            if not doctype_inserted:
+                doctype_inserted = True
+                if kind is XML_DECL:
+                    yield (kind, data, pos)
+                    yield self.doctype_event
+                    continue
+                yield self.doctype_event
+
+            yield (kind, data, pos)
+
+        if not doctype_inserted:
+            yield self.doctype_event
--
cgit v0.9.1