Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/genshi
diff options
context:
space:
mode:
authorSebastian Silva <sebastian@sugarlabs.org>2011-09-28 00:19:33 (GMT)
committer Sebastian Silva <sebastian@sugarlabs.org>2011-09-28 06:54:34 (GMT)
commit5861585e94a32b3032ac473804bf90c6e1363940 (patch)
treefb3a5bab0d75bf8eb780e749737fea87369754db /genshi
parentbe7aa93d7ba3682d5189e1a7d72169c0b02a1ec1 (diff)
Migrated to Flask, added JQuery sugar theme, fixed race condition
Diffstat (limited to 'genshi')
-rw-r--r--genshi/__init__.py26
-rw-r--r--genshi/builder.py359
-rw-r--r--genshi/core.py727
-rw-r--r--genshi/filters/__init__.py20
-rw-r--r--genshi/filters/html.py453
-rw-r--r--genshi/filters/i18n.py1238
-rw-r--r--genshi/filters/transform.py1310
-rw-r--r--genshi/input.py443
-rw-r--r--genshi/output.py838
-rw-r--r--genshi/path.py1528
-rw-r--r--genshi/template/__init__.py23
-rw-r--r--genshi/template/_ast24.py446
-rw-r--r--genshi/template/ast24.py505
-rw-r--r--genshi/template/astutil.py784
-rw-r--r--genshi/template/base.py634
-rw-r--r--genshi/template/directives.py725
-rw-r--r--genshi/template/eval.py629
-rw-r--r--genshi/template/interpolation.py153
-rw-r--r--genshi/template/loader.py344
-rw-r--r--genshi/template/markup.py397
-rw-r--r--genshi/template/plugin.py176
-rw-r--r--genshi/template/text.py333
-rw-r--r--genshi/util.py274
23 files changed, 0 insertions, 12365 deletions
diff --git a/genshi/__init__.py b/genshi/__init__.py
deleted file mode 100644
index 02f4347..0000000
--- a/genshi/__init__.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Copyright (C) 2006-2009 Edgewall Software
-# All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://genshi.edgewall.org/wiki/License.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For the exact contribution history, see the revision
-# history and logs, available at http://genshi.edgewall.org/log/.
-
-"""This package provides various means for generating and processing web markup
-(XML or HTML).
-
-The design is centered around the concept of streams of markup events (similar
-in concept to SAX parsing events) which can be processed in a uniform manner
-independently of where or how they are produced.
-"""
-
-__docformat__ = 'restructuredtext en'
-__version__ = '0.6'
-
-from genshi.core import *
-from genshi.input import ParseError, XML, HTML
diff --git a/genshi/builder.py b/genshi/builder.py
deleted file mode 100644
index 724e364..0000000
--- a/genshi/builder.py
+++ /dev/null
@@ -1,359 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Copyright (C) 2006-2009 Edgewall Software
-# All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://genshi.edgewall.org/wiki/License.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For the exact contribution history, see the revision
-# history and logs, available at http://genshi.edgewall.org/log/.
-
-"""Support for programmatically generating markup streams from Python code using
-a very simple syntax. The main entry point to this module is the `tag` object
-(which is actually an instance of the ``ElementFactory`` class). You should
-rarely (if ever) need to directly import and use any of the other classes in
-this module.
-
-Elements can be created using the `tag` object using attribute access. For
-example:
-
->>> doc = tag.p('Some text and ', tag.a('a link', href='http://example.org/'), '.')
->>> doc
-<Element "p">
-
-This produces an `Element` instance which can be further modified to add child
-nodes and attributes. This is done by "calling" the element: positional
-arguments are added as child nodes (alternatively, the `Element.append` method
-can be used for that purpose), whereas keywords arguments are added as
-attributes:
-
->>> doc(tag.br)
-<Element "p">
->>> print(doc)
-<p>Some text and <a href="http://example.org/">a link</a>.<br/></p>
-
-If an attribute name collides with a Python keyword, simply append an underscore
-to the name:
-
->>> doc(class_='intro')
-<Element "p">
->>> print(doc)
-<p class="intro">Some text and <a href="http://example.org/">a link</a>.<br/></p>
-
-As shown above, an `Element` can easily be directly rendered to XML text by
-printing it or using the Python ``str()`` function. This is basically a
-shortcut for converting the `Element` to a stream and serializing that
-stream:
-
->>> stream = doc.generate()
->>> stream #doctest: +ELLIPSIS
-<genshi.core.Stream object at ...>
->>> print(stream)
-<p class="intro">Some text and <a href="http://example.org/">a link</a>.<br/></p>
-
-
-The `tag` object also allows creating "fragments", which are basically lists
-of nodes (elements or text) that don't have a parent element. This can be useful
-for creating snippets of markup that are attached to a parent element later (for
-example in a template). Fragments are created by calling the `tag` object, which
-returns an object of type `Fragment`:
-
->>> fragment = tag('Hello, ', tag.em('world'), '!')
->>> fragment
-<Fragment>
->>> print(fragment)
-Hello, <em>world</em>!
-"""
-
-from genshi.core import Attrs, Markup, Namespace, QName, Stream, \
- START, END, TEXT
-
-__all__ = ['Fragment', 'Element', 'ElementFactory', 'tag']
-__docformat__ = 'restructuredtext en'
-
-
-class Fragment(object):
- """Represents a markup fragment, which is basically just a list of element
- or text nodes.
- """
- __slots__ = ['children']
-
- def __init__(self):
- """Create a new fragment."""
- self.children = []
-
- def __add__(self, other):
- return Fragment()(self, other)
-
- def __call__(self, *args):
- """Append any positional arguments as child nodes.
-
- :see: `append`
- """
- for arg in args:
- self.append(arg)
- return self
-
- def __iter__(self):
- return self._generate()
-
- def __repr__(self):
- return '<%s>' % type(self).__name__
-
- def __str__(self):
- return str(self.generate())
-
- def __unicode__(self):
- return unicode(self.generate())
-
- def __html__(self):
- return Markup(self.generate())
-
- def append(self, node):
- """Append an element or string as child node.
-
- :param node: the node to append; can be an `Element`, `Fragment`, or a
- `Stream`, or a Python string or number
- """
- if isinstance(node, (Stream, Element, basestring, int, float, long)):
- # For objects of a known/primitive type, we avoid the check for
- # whether it is iterable for better performance
- self.children.append(node)
- elif isinstance(node, Fragment):
- self.children.extend(node.children)
- elif node is not None:
- try:
- for child in node:
- self.append(child)
- except TypeError:
- self.children.append(node)
-
- def _generate(self):
- for child in self.children:
- if isinstance(child, Fragment):
- for event in child._generate():
- yield event
- elif isinstance(child, Stream):
- for event in child:
- yield event
- else:
- if not isinstance(child, basestring):
- child = unicode(child)
- yield TEXT, child, (None, -1, -1)
-
- def generate(self):
- """Return a markup event stream for the fragment.
-
- :rtype: `Stream`
- """
- return Stream(self._generate())
-
-
-def _kwargs_to_attrs(kwargs):
- attrs = []
- names = set()
- for name, value in kwargs.items():
- name = name.rstrip('_').replace('_', '-')
- if value is not None and name not in names:
- attrs.append((QName(name), unicode(value)))
- names.add(name)
- return Attrs(attrs)
-
-
-class Element(Fragment):
- """Simple XML output generator based on the builder pattern.
-
- Construct XML elements by passing the tag name to the constructor:
-
- >>> print(Element('strong'))
- <strong/>
-
- Attributes can be specified using keyword arguments. The values of the
- arguments will be converted to strings and any special XML characters
- escaped:
-
- >>> print(Element('textarea', rows=10, cols=60))
- <textarea rows="10" cols="60"/>
- >>> print(Element('span', title='1 < 2'))
- <span title="1 &lt; 2"/>
- >>> print(Element('span', title='"baz"'))
- <span title="&#34;baz&#34;"/>
-
- The " character is escaped using a numerical entity.
- The order in which attributes are rendered is undefined.
-
- If an attribute value evaluates to `None`, that attribute is not included
- in the output:
-
- >>> print(Element('a', name=None))
- <a/>
-
- Attribute names that conflict with Python keywords can be specified by
- appending an underscore:
-
- >>> print(Element('div', class_='warning'))
- <div class="warning"/>
-
- Nested elements can be added to an element using item access notation.
- The call notation can also be used for this and for adding attributes
- using keyword arguments, as one would do in the constructor.
-
- >>> print(Element('ul')(Element('li'), Element('li')))
- <ul><li/><li/></ul>
- >>> print(Element('a')('Label'))
- <a>Label</a>
- >>> print(Element('a')('Label', href="target"))
- <a href="target">Label</a>
-
- Text nodes can be nested in an element by adding strings instead of
- elements. Any special characters in the strings are escaped automatically:
-
- >>> print(Element('em')('Hello world'))
- <em>Hello world</em>
- >>> print(Element('em')(42))
- <em>42</em>
- >>> print(Element('em')('1 < 2'))
- <em>1 &lt; 2</em>
-
- This technique also allows mixed content:
-
- >>> print(Element('p')('Hello ', Element('b')('world')))
- <p>Hello <b>world</b></p>
-
- Quotes are not escaped inside text nodes:
- >>> print(Element('p')('"Hello"'))
- <p>"Hello"</p>
-
- Elements can also be combined with other elements or strings using the
- addition operator, which results in a `Fragment` object that contains the
- operands:
-
- >>> print(Element('br') + 'some text' + Element('br'))
- <br/>some text<br/>
-
- Elements with a namespace can be generated using the `Namespace` and/or
- `QName` classes:
-
- >>> from genshi.core import Namespace
- >>> xhtml = Namespace('http://www.w3.org/1999/xhtml')
- >>> print(Element(xhtml.html, lang='en'))
- <html xmlns="http://www.w3.org/1999/xhtml" lang="en"/>
- """
- __slots__ = ['tag', 'attrib']
-
- def __init__(self, tag_, **attrib):
- Fragment.__init__(self)
- self.tag = QName(tag_)
- self.attrib = _kwargs_to_attrs(attrib)
-
- def __call__(self, *args, **kwargs):
- """Append any positional arguments as child nodes, and keyword arguments
- as attributes.
-
- :return: the element itself so that calls can be chained
- :rtype: `Element`
- :see: `Fragment.append`
- """
- self.attrib |= _kwargs_to_attrs(kwargs)
- Fragment.__call__(self, *args)
- return self
-
- def __repr__(self):
- return '<%s "%s">' % (type(self).__name__, self.tag)
-
- def _generate(self):
- yield START, (self.tag, self.attrib), (None, -1, -1)
- for kind, data, pos in Fragment._generate(self):
- yield kind, data, pos
- yield END, self.tag, (None, -1, -1)
-
- def generate(self):
- """Return a markup event stream for the fragment.
-
- :rtype: `Stream`
- """
- return Stream(self._generate())
-
-
-class ElementFactory(object):
- """Factory for `Element` objects.
-
- A new element is created simply by accessing a correspondingly named
- attribute of the factory object:
-
- >>> factory = ElementFactory()
- >>> print(factory.foo)
- <foo/>
- >>> print(factory.foo(id=2))
- <foo id="2"/>
-
- Markup fragments (lists of nodes without a parent element) can be created
- by calling the factory:
-
- >>> print(factory('Hello, ', factory.em('world'), '!'))
- Hello, <em>world</em>!
-
- A factory can also be bound to a specific namespace:
-
- >>> factory = ElementFactory('http://www.w3.org/1999/xhtml')
- >>> print(factory.html(lang="en"))
- <html xmlns="http://www.w3.org/1999/xhtml" lang="en"/>
-
- The namespace for a specific element can be altered on an existing factory
- by specifying the new namespace using item access:
-
- >>> factory = ElementFactory()
- >>> print(factory.html(factory['http://www.w3.org/2000/svg'].g(id=3)))
- <html><g xmlns="http://www.w3.org/2000/svg" id="3"/></html>
-
- Usually, the `ElementFactory` class is not be used directly. Rather, the
- `tag` instance should be used to create elements.
- """
-
- def __init__(self, namespace=None):
- """Create the factory, optionally bound to the given namespace.
-
- :param namespace: the namespace URI for any created elements, or `None`
- for no namespace
- """
- if namespace and not isinstance(namespace, Namespace):
- namespace = Namespace(namespace)
- self.namespace = namespace
-
- def __call__(self, *args):
- """Create a fragment that has the given positional arguments as child
- nodes.
-
- :return: the created `Fragment`
- :rtype: `Fragment`
- """
- return Fragment()(*args)
-
- def __getitem__(self, namespace):
- """Return a new factory that is bound to the specified namespace.
-
- :param namespace: the namespace URI or `Namespace` object
- :return: an `ElementFactory` that produces elements bound to the given
- namespace
- :rtype: `ElementFactory`
- """
- return ElementFactory(namespace)
-
- def __getattr__(self, name):
- """Create an `Element` with the given name.
-
- :param name: the tag name of the element to create
- :return: an `Element` with the specified name
- :rtype: `Element`
- """
- return Element(self.namespace and self.namespace[name] or name)
-
-
-tag = ElementFactory()
-"""Global `ElementFactory` bound to the default namespace.
-
-:type: `ElementFactory`
-"""
diff --git a/genshi/core.py b/genshi/core.py
deleted file mode 100644
index f7cddff..0000000
--- a/genshi/core.py
+++ /dev/null
@@ -1,727 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Copyright (C) 2006-2009 Edgewall Software
-# All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://genshi.edgewall.org/wiki/License.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For the exact contribution history, see the revision
-# history and logs, available at http://genshi.edgewall.org/log/.
-
-"""Core classes for markup processing."""
-
-try:
- reduce # builtin in Python < 3
-except NameError:
- from functools import reduce
-from itertools import chain
-import operator
-
-from genshi.util import plaintext, stripentities, striptags, stringrepr
-
-__all__ = ['Stream', 'Markup', 'escape', 'unescape', 'Attrs', 'Namespace',
- 'QName']
-__docformat__ = 'restructuredtext en'
-
-
-class StreamEventKind(str):
- """A kind of event on a markup stream."""
- __slots__ = []
- _instances = {}
-
- def __new__(cls, val):
- return cls._instances.setdefault(val, str.__new__(cls, val))
-
-
-class Stream(object):
- """Represents a stream of markup events.
-
- This class is basically an iterator over the events.
-
- Stream events are tuples of the form::
-
- (kind, data, position)
-
- where ``kind`` is the event kind (such as `START`, `END`, `TEXT`, etc),
- ``data`` depends on the kind of event, and ``position`` is a
- ``(filename, line, offset)`` tuple that contains the location of the
- original element or text in the input. If the original location is unknown,
- ``position`` is ``(None, -1, -1)``.
-
- Also provided are ways to serialize the stream to text. The `serialize()`
- method will return an iterator over generated strings, while `render()`
- returns the complete generated text at once. Both accept various parameters
- that impact the way the stream is serialized.
- """
- __slots__ = ['events', 'serializer']
-
- START = StreamEventKind('START') #: a start tag
- END = StreamEventKind('END') #: an end tag
- TEXT = StreamEventKind('TEXT') #: literal text
- XML_DECL = StreamEventKind('XML_DECL') #: XML declaration
- DOCTYPE = StreamEventKind('DOCTYPE') #: doctype declaration
- START_NS = StreamEventKind('START_NS') #: start namespace mapping
- END_NS = StreamEventKind('END_NS') #: end namespace mapping
- START_CDATA = StreamEventKind('START_CDATA') #: start CDATA section
- END_CDATA = StreamEventKind('END_CDATA') #: end CDATA section
- PI = StreamEventKind('PI') #: processing instruction
- COMMENT = StreamEventKind('COMMENT') #: comment
-
- def __init__(self, events, serializer=None):
- """Initialize the stream with a sequence of markup events.
-
- :param events: a sequence or iterable providing the events
- :param serializer: the default serialization method to use for this
- stream
-
- :note: Changed in 0.5: added the `serializer` argument
- """
- self.events = events #: The underlying iterable producing the events
- self.serializer = serializer #: The default serializion method
-
- def __iter__(self):
- return iter(self.events)
-
- def __or__(self, function):
- """Override the "bitwise or" operator to apply filters or serializers
- to the stream, providing a syntax similar to pipes on Unix shells.
-
- Assume the following stream produced by the `HTML` function:
-
- >>> from genshi.input import HTML
- >>> html = HTML('''<p onclick="alert('Whoa')">Hello, world!</p>''')
- >>> print(html)
- <p onclick="alert('Whoa')">Hello, world!</p>
-
- A filter such as the HTML sanitizer can be applied to that stream using
- the pipe notation as follows:
-
- >>> from genshi.filters import HTMLSanitizer
- >>> sanitizer = HTMLSanitizer()
- >>> print(html | sanitizer)
- <p>Hello, world!</p>
-
- Filters can be any function that accepts and produces a stream (where
- a stream is anything that iterates over events):
-
- >>> def uppercase(stream):
- ... for kind, data, pos in stream:
- ... if kind is TEXT:
- ... data = data.upper()
- ... yield kind, data, pos
- >>> print(html | sanitizer | uppercase)
- <p>HELLO, WORLD!</p>
-
- Serializers can also be used with this notation:
-
- >>> from genshi.output import TextSerializer
- >>> output = TextSerializer()
- >>> print(html | sanitizer | uppercase | output)
- HELLO, WORLD!
-
- Commonly, serializers should be used at the end of the "pipeline";
- using them somewhere in the middle may produce unexpected results.
-
- :param function: the callable object that should be applied as a filter
- :return: the filtered stream
- :rtype: `Stream`
- """
- return Stream(_ensure(function(self)), serializer=self.serializer)
-
- def filter(self, *filters):
- """Apply filters to the stream.
-
- This method returns a new stream with the given filters applied. The
- filters must be callables that accept the stream object as parameter,
- and return the filtered stream.
-
- The call::
-
- stream.filter(filter1, filter2)
-
- is equivalent to::
-
- stream | filter1 | filter2
-
- :param filters: one or more callable objects that should be applied as
- filters
- :return: the filtered stream
- :rtype: `Stream`
- """
- return reduce(operator.or_, (self,) + filters)
-
- def render(self, method=None, encoding='utf-8', out=None, **kwargs):
- """Return a string representation of the stream.
-
- Any additional keyword arguments are passed to the serializer, and thus
- depend on the `method` parameter value.
-
- :param method: determines how the stream is serialized; can be either
- "xml", "xhtml", "html", "text", or a custom serializer
- class; if `None`, the default serialization method of
- the stream is used
- :param encoding: how the output string should be encoded; if set to
- `None`, this method returns a `unicode` object
- :param out: a file-like object that the output should be written to
- instead of being returned as one big string; note that if
- this is a file or socket (or similar), the `encoding` must
- not be `None` (that is, the output must be encoded)
- :return: a `str` or `unicode` object (depending on the `encoding`
- parameter), or `None` if the `out` parameter is provided
- :rtype: `basestring`
-
- :see: XMLSerializer, XHTMLSerializer, HTMLSerializer, TextSerializer
- :note: Changed in 0.5: added the `out` parameter
- """
- from genshi.output import encode
- if method is None:
- method = self.serializer or 'xml'
- generator = self.serialize(method=method, **kwargs)
- return encode(generator, method=method, encoding=encoding, out=out)
-
- def select(self, path, namespaces=None, variables=None):
- """Return a new stream that contains the events matching the given
- XPath expression.
-
- >>> from genshi import HTML
- >>> stream = HTML('<doc><elem>foo</elem><elem>bar</elem></doc>')
- >>> print(stream.select('elem'))
- <elem>foo</elem><elem>bar</elem>
- >>> print(stream.select('elem/text()'))
- foobar
-
- Note that the outermost element of the stream becomes the *context
- node* for the XPath test. That means that the expression "doc" would
- not match anything in the example above, because it only tests against
- child elements of the outermost element:
-
- >>> print(stream.select('doc'))
- <BLANKLINE>
-
- You can use the "." expression to match the context node itself
- (although that usually makes little sense):
-
- >>> print(stream.select('.'))
- <doc><elem>foo</elem><elem>bar</elem></doc>
-
- :param path: a string containing the XPath expression
- :param namespaces: mapping of namespace prefixes used in the path
- :param variables: mapping of variable names to values
- :return: the selected substream
- :rtype: `Stream`
- :raises PathSyntaxError: if the given path expression is invalid or not
- supported
- """
- from genshi.path import Path
- return Path(path).select(self, namespaces, variables)
-
- def serialize(self, method='xml', **kwargs):
- """Generate strings corresponding to a specific serialization of the
- stream.
-
- Unlike the `render()` method, this method is a generator that returns
- the serialized output incrementally, as opposed to returning a single
- string.
-
- Any additional keyword arguments are passed to the serializer, and thus
- depend on the `method` parameter value.
-
- :param method: determines how the stream is serialized; can be either
- "xml", "xhtml", "html", "text", or a custom serializer
- class; if `None`, the default serialization method of
- the stream is used
- :return: an iterator over the serialization results (`Markup` or
- `unicode` objects, depending on the serialization method)
- :rtype: ``iterator``
- :see: XMLSerializer, XHTMLSerializer, HTMLSerializer, TextSerializer
- """
- from genshi.output import get_serializer
- if method is None:
- method = self.serializer or 'xml'
- return get_serializer(method, **kwargs)(_ensure(self))
-
- def __str__(self):
- return self.render()
-
- def __unicode__(self):
- return self.render(encoding=None)
-
- def __html__(self):
- return self
-
-
-START = Stream.START
-END = Stream.END
-TEXT = Stream.TEXT
-XML_DECL = Stream.XML_DECL
-DOCTYPE = Stream.DOCTYPE
-START_NS = Stream.START_NS
-END_NS = Stream.END_NS
-START_CDATA = Stream.START_CDATA
-END_CDATA = Stream.END_CDATA
-PI = Stream.PI
-COMMENT = Stream.COMMENT
-
-
-def _ensure(stream):
- """Ensure that every item on the stream is actually a markup event."""
- stream = iter(stream)
- event = stream.next()
-
- # Check whether the iterable is a real markup event stream by examining the
- # first item it yields; if it's not we'll need to do some conversion
- if type(event) is not tuple or len(event) != 3:
- for event in chain([event], stream):
- if hasattr(event, 'totuple'):
- event = event.totuple()
- else:
- event = TEXT, unicode(event), (None, -1, -1)
- yield event
- return
-
- # This looks like a markup event stream, so we'll just pass it through
- # unchanged
- yield event
- for event in stream:
- yield event
-
-
-class Attrs(tuple):
- """Immutable sequence type that stores the attributes of an element.
-
- Ordering of the attributes is preserved, while access by name is also
- supported.
-
- >>> attrs = Attrs([('href', '#'), ('title', 'Foo')])
- >>> attrs
- Attrs([('href', '#'), ('title', 'Foo')])
-
- >>> 'href' in attrs
- True
- >>> 'tabindex' in attrs
- False
- >>> attrs.get('title')
- 'Foo'
-
- Instances may not be manipulated directly. Instead, the operators ``|`` and
- ``-`` can be used to produce new instances that have specific attributes
- added, replaced or removed.
-
- To remove an attribute, use the ``-`` operator. The right hand side can be
- either a string or a set/sequence of strings, identifying the name(s) of
- the attribute(s) to remove:
-
- >>> attrs - 'title'
- Attrs([('href', '#')])
- >>> attrs - ('title', 'href')
- Attrs()
-
- The original instance is not modified, but the operator can of course be
- used with an assignment:
-
- >>> attrs
- Attrs([('href', '#'), ('title', 'Foo')])
- >>> attrs -= 'title'
- >>> attrs
- Attrs([('href', '#')])
-
- To add a new attribute, use the ``|`` operator, where the right hand value
- is a sequence of ``(name, value)`` tuples (which includes `Attrs`
- instances):
-
- >>> attrs | [('title', 'Bar')]
- Attrs([('href', '#'), ('title', 'Bar')])
-
- If the attributes already contain an attribute with a given name, the value
- of that attribute is replaced:
-
- >>> attrs | [('href', 'http://example.org/')]
- Attrs([('href', 'http://example.org/')])
- """
- __slots__ = []
-
- def __contains__(self, name):
- """Return whether the list includes an attribute with the specified
- name.
-
- :return: `True` if the list includes the attribute
- :rtype: `bool`
- """
- for attr, _ in self:
- if attr == name:
- return True
-
- def __getitem__(self, i):
- """Return an item or slice of the attributes list.
-
- >>> attrs = Attrs([('href', '#'), ('title', 'Foo')])
- >>> attrs[1]
- ('title', 'Foo')
- >>> attrs[1:]
- Attrs([('title', 'Foo')])
- """
- items = tuple.__getitem__(self, i)
- if type(i) is slice:
- return Attrs(items)
- return items
-
- def __getslice__(self, i, j):
- """Return a slice of the attributes list.
-
- >>> attrs = Attrs([('href', '#'), ('title', 'Foo')])
- >>> attrs[1:]
- Attrs([('title', 'Foo')])
- """
- return Attrs(tuple.__getslice__(self, i, j))
-
- def __or__(self, attrs):
- """Return a new instance that contains the attributes in `attrs` in
- addition to any already existing attributes.
-
- :return: a new instance with the merged attributes
- :rtype: `Attrs`
- """
- repl = dict([(an, av) for an, av in attrs if an in self])
- return Attrs([(sn, repl.get(sn, sv)) for sn, sv in self] +
- [(an, av) for an, av in attrs if an not in self])
-
- def __repr__(self):
- if not self:
- return 'Attrs()'
- return 'Attrs([%s])' % ', '.join([repr(item) for item in self])
-
- def __sub__(self, names):
- """Return a new instance with all attributes with a name in `names` are
- removed.
-
- :param names: the names of the attributes to remove
- :return: a new instance with the attribute removed
- :rtype: `Attrs`
- """
- if isinstance(names, basestring):
- names = (names,)
- return Attrs([(name, val) for name, val in self if name not in names])
-
- def get(self, name, default=None):
- """Return the value of the attribute with the specified name, or the
- value of the `default` parameter if no such attribute is found.
-
- :param name: the name of the attribute
- :param default: the value to return when the attribute does not exist
- :return: the attribute value, or the `default` value if that attribute
- does not exist
- :rtype: `object`
- """
- for attr, value in self:
- if attr == name:
- return value
- return default
-
- def totuple(self):
- """Return the attributes as a markup event.
-
- The returned event is a `TEXT` event, the data is the value of all
- attributes joined together.
-
- >>> Attrs([('href', '#'), ('title', 'Foo')]).totuple()
- ('TEXT', '#Foo', (None, -1, -1))
-
- :return: a `TEXT` event
- :rtype: `tuple`
- """
- return TEXT, ''.join([x[1] for x in self]), (None, -1, -1)
-
-
-class Markup(unicode):
- """Marks a string as being safe for inclusion in HTML/XML output without
- needing to be escaped.
- """
- __slots__ = []
-
- def __add__(self, other):
- return Markup(unicode.__add__(self, escape(other)))
-
- def __radd__(self, other):
- return Markup(unicode.__add__(escape(other), self))
-
- def __mod__(self, args):
- if isinstance(args, dict):
- args = dict(zip(args.keys(), map(escape, args.values())))
- elif isinstance(args, (list, tuple)):
- args = tuple(map(escape, args))
- else:
- args = escape(args)
- return Markup(unicode.__mod__(self, args))
-
- def __mul__(self, num):
- return Markup(unicode.__mul__(self, num))
- __rmul__ = __mul__
-
- def __repr__(self):
- return "<%s %s>" % (type(self).__name__, unicode.__repr__(self))
-
- def join(self, seq, escape_quotes=True):
- """Return a `Markup` object which is the concatenation of the strings
- in the given sequence, where this `Markup` object is the separator
- between the joined elements.
-
- Any element in the sequence that is not a `Markup` instance is
- automatically escaped.
-
- :param seq: the sequence of strings to join
- :param escape_quotes: whether double quote characters in the elements
- should be escaped
- :return: the joined `Markup` object
- :rtype: `Markup`
- :see: `escape`
- """
- return Markup(unicode.join(self, [escape(item, quotes=escape_quotes)
- for item in seq]))
-
- @classmethod
- def escape(cls, text, quotes=True):
- """Create a Markup instance from a string and escape special characters
- it may contain (<, >, & and \").
-
- >>> escape('"1 < 2"')
- <Markup u'&#34;1 &lt; 2&#34;'>
-
- If the `quotes` parameter is set to `False`, the \" character is left
- as is. Escaping quotes is generally only required for strings that are
- to be used in attribute values.
-
- >>> escape('"1 < 2"', quotes=False)
- <Markup u'"1 &lt; 2"'>
-
- :param text: the text to escape
- :param quotes: if ``True``, double quote characters are escaped in
- addition to the other special characters
- :return: the escaped `Markup` string
- :rtype: `Markup`
- """
- if not text:
- return cls()
- if type(text) is cls:
- return text
- if hasattr(text, '__html__'):
- return Markup(text.__html__())
-
- text = text.replace('&', '&amp;') \
- .replace('<', '&lt;') \
- .replace('>', '&gt;')
- if quotes:
- text = text.replace('"', '&#34;')
- return cls(text)
-
- def unescape(self):
- """Reverse-escapes &, <, >, and \" and returns a `unicode` object.
-
- >>> Markup('1 &lt; 2').unescape()
- u'1 < 2'
-
- :return: the unescaped string
- :rtype: `unicode`
- :see: `genshi.core.unescape`
- """
- if not self:
- return ''
- return unicode(self).replace('&#34;', '"') \
- .replace('&gt;', '>') \
- .replace('&lt;', '<') \
- .replace('&amp;', '&')
-
- def stripentities(self, keepxmlentities=False):
- """Return a copy of the text with any character or numeric entities
- replaced by the equivalent UTF-8 characters.
-
- If the `keepxmlentities` parameter is provided and evaluates to `True`,
- the core XML entities (``&amp;``, ``&apos;``, ``&gt;``, ``&lt;`` and
- ``&quot;``) are not stripped.
-
- :return: a `Markup` instance with entities removed
- :rtype: `Markup`
- :see: `genshi.util.stripentities`
- """
- return Markup(stripentities(self, keepxmlentities=keepxmlentities))
-
- def striptags(self):
- """Return a copy of the text with all XML/HTML tags removed.
-
- :return: a `Markup` instance with all tags removed
- :rtype: `Markup`
- :see: `genshi.util.striptags`
- """
- return Markup(striptags(self))
-
-
-try:
- from genshi._speedups import Markup
-except ImportError:
- pass # just use the Python implementation
-
-
-escape = Markup.escape
-
-
-def unescape(text):
- """Reverse-escapes &, <, >, and \" and returns a `unicode` object.
-
- >>> unescape(Markup('1 &lt; 2'))
- u'1 < 2'
-
- If the provided `text` object is not a `Markup` instance, it is returned
- unchanged.
-
- >>> unescape('1 &lt; 2')
- '1 &lt; 2'
-
- :param text: the text to unescape
- :return: the unescsaped string
- :rtype: `unicode`
- """
- if not isinstance(text, Markup):
- return text
- return text.unescape()
-
-
-class Namespace(object):
- """Utility class creating and testing elements with a namespace.
-
- Internally, namespace URIs are encoded in the `QName` of any element or
- attribute, the namespace URI being enclosed in curly braces. This class
- helps create and test these strings.
-
- A `Namespace` object is instantiated with the namespace URI.
-
- >>> html = Namespace('http://www.w3.org/1999/xhtml')
- >>> html
- Namespace('http://www.w3.org/1999/xhtml')
- >>> html.uri
- u'http://www.w3.org/1999/xhtml'
-
- The `Namespace` object can than be used to generate `QName` objects with
- that namespace:
-
- >>> html.body
- QName('http://www.w3.org/1999/xhtml}body')
- >>> html.body.localname
- u'body'
- >>> html.body.namespace
- u'http://www.w3.org/1999/xhtml'
-
- The same works using item access notation, which is useful for element or
- attribute names that are not valid Python identifiers:
-
- >>> html['body']
- QName('http://www.w3.org/1999/xhtml}body')
-
- A `Namespace` object can also be used to test whether a specific `QName`
- belongs to that namespace using the ``in`` operator:
-
- >>> qname = html.body
- >>> qname in html
- True
- >>> qname in Namespace('http://www.w3.org/2002/06/xhtml2')
- False
- """
- def __new__(cls, uri):
- if type(uri) is cls:
- return uri
- return object.__new__(cls)
-
- def __getnewargs__(self):
- return (self.uri,)
-
- def __getstate__(self):
- return self.uri
-
- def __setstate__(self, uri):
- self.uri = uri
-
- def __init__(self, uri):
- self.uri = unicode(uri)
-
- def __contains__(self, qname):
- return qname.namespace == self.uri
-
- def __ne__(self, other):
- return not self == other
-
- def __eq__(self, other):
- if isinstance(other, Namespace):
- return self.uri == other.uri
- return self.uri == other
-
- def __getitem__(self, name):
- return QName(self.uri + '}' + name)
- __getattr__ = __getitem__
-
- def __hash__(self):
- return hash(self.uri)
-
- def __repr__(self):
- return '%s(%s)' % (type(self).__name__, stringrepr(self.uri))
-
- def __str__(self):
- return self.uri.encode('utf-8')
-
- def __unicode__(self):
- return self.uri
-
-
-# The namespace used by attributes such as xml:lang and xml:space
-XML_NAMESPACE = Namespace('http://www.w3.org/XML/1998/namespace')
-
-
-class QName(unicode):
- """A qualified element or attribute name.
-
- The unicode value of instances of this class contains the qualified name of
- the element or attribute, in the form ``{namespace-uri}local-name``. The
- namespace URI can be obtained through the additional `namespace` attribute,
- while the local name can be accessed through the `localname` attribute.
-
- >>> qname = QName('foo')
- >>> qname
- QName('foo')
- >>> qname.localname
- u'foo'
- >>> qname.namespace
-
- >>> qname = QName('http://www.w3.org/1999/xhtml}body')
- >>> qname
- QName('http://www.w3.org/1999/xhtml}body')
- >>> qname.localname
- u'body'
- >>> qname.namespace
- u'http://www.w3.org/1999/xhtml'
- """
- __slots__ = ['namespace', 'localname']
-
- def __new__(cls, qname):
- """Create the `QName` instance.
-
- :param qname: the qualified name as a string of the form
- ``{namespace-uri}local-name``, where the leading curly
- brace is optional
- """
- if type(qname) is cls:
- return qname
-
- parts = qname.lstrip('{').split('}', 1)
- if len(parts) > 1:
- self = unicode.__new__(cls, '{%s' % qname)
- self.namespace, self.localname = map(unicode, parts)
- else:
- self = unicode.__new__(cls, qname)
- self.namespace, self.localname = None, unicode(qname)
- return self
-
- def __getnewargs__(self):
- return (self.lstrip('{'),)
-
- def __repr__(self):
- return '%s(%s)' % (type(self).__name__, stringrepr(self.lstrip('{')))
diff --git a/genshi/filters/__init__.py b/genshi/filters/__init__.py
deleted file mode 100644
index efc2565..0000000
--- a/genshi/filters/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Copyright (C) 2007-2009 Edgewall Software
-# All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://genshi.edgewall.org/wiki/License.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For the exact contribution history, see the revision
-# history and logs, available at http://genshi.edgewall.org/log/.
-
-"""Implementation of a number of stream filters."""
-
-from genshi.filters.html import HTMLFormFiller, HTMLSanitizer
-from genshi.filters.i18n import Translator
-from genshi.filters.transform import Transformer
-
-__docformat__ = 'restructuredtext en'
diff --git a/genshi/filters/html.py b/genshi/filters/html.py
deleted file mode 100644
index d554a54..0000000
--- a/genshi/filters/html.py
+++ /dev/null
@@ -1,453 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Copyright (C) 2006-2009 Edgewall Software
-# All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://genshi.edgewall.org/wiki/License.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For the exact contribution history, see the revision
-# history and logs, available at http://genshi.edgewall.org/log/.
-
-"""Implementation of a number of stream filters."""
-
-try:
- any
-except NameError:
- from genshi.util import any
-import re
-
-from genshi.core import Attrs, QName, stripentities
-from genshi.core import END, START, TEXT, COMMENT
-
-__all__ = ['HTMLFormFiller', 'HTMLSanitizer']
-__docformat__ = 'restructuredtext en'
-
-
-class HTMLFormFiller(object):
- """A stream filter that can populate HTML forms from a dictionary of values.
-
- >>> from genshi.input import HTML
- >>> html = HTML('''<form>
- ... <p><input type="text" name="foo" /></p>
- ... </form>''')
- >>> filler = HTMLFormFiller(data={'foo': 'bar'})
- >>> print(html | filler)
- <form>
- <p><input type="text" name="foo" value="bar"/></p>
- </form>
- """
- # TODO: only select the first radio button, and the first select option
- # (if not in a multiple-select)
- # TODO: only apply to elements in the XHTML namespace (or no namespace)?
-
- def __init__(self, name=None, id=None, data=None, passwords=False):
- """Create the filter.
-
- :param name: The name of the form that should be populated. If this
- parameter is given, only forms where the ``name`` attribute
- value matches the parameter are processed.
- :param id: The ID of the form that should be populated. If this
- parameter is given, only forms where the ``id`` attribute
- value matches the parameter are processed.
- :param data: The dictionary of form values, where the keys are the names
- of the form fields, and the values are the values to fill
- in.
- :param passwords: Whether password input fields should be populated.
- This is off by default for security reasons (for
- example, a password may end up in the browser cache)
- :note: Changed in 0.5.2: added the `passwords` option
- """
- self.name = name
- self.id = id
- if data is None:
- data = {}
- self.data = data
- self.passwords = passwords
-
- def __call__(self, stream):
- """Apply the filter to the given stream.
-
- :param stream: the markup event stream to filter
- """
- in_form = in_select = in_option = in_textarea = False
- select_value = option_value = textarea_value = None
- option_start = None
- option_text = []
- no_option_value = False
-
- for kind, data, pos in stream:
-
- if kind is START:
- tag, attrs = data
- tagname = tag.localname
-
- if tagname == 'form' and (
- self.name and attrs.get('name') == self.name or
- self.id and attrs.get('id') == self.id or
- not (self.id or self.name)):
- in_form = True
-
- elif in_form:
- if tagname == 'input':
- type = attrs.get('type', '').lower()
- if type in ('checkbox', 'radio'):
- name = attrs.get('name')
- if name and name in self.data:
- value = self.data[name]
- declval = attrs.get('value')
- checked = False
- if isinstance(value, (list, tuple)):
- if declval:
- checked = declval in [unicode(v) for v
- in value]
- else:
- checked = any(value)
- else:
- if declval:
- checked = declval == unicode(value)
- elif type == 'checkbox':
- checked = bool(value)
- if checked:
- attrs |= [(QName('checked'), 'checked')]
- elif 'checked' in attrs:
- attrs -= 'checked'
- elif type in ('', 'hidden', 'text') \
- or type == 'password' and self.passwords:
- name = attrs.get('name')
- if name and name in self.data:
- value = self.data[name]
- if isinstance(value, (list, tuple)):
- value = value[0]
- if value is not None:
- attrs |= [
- (QName('value'), unicode(value))
- ]
- elif tagname == 'select':
- name = attrs.get('name')
- if name in self.data:
- select_value = self.data[name]
- in_select = True
- elif tagname == 'textarea':
- name = attrs.get('name')
- if name in self.data:
- textarea_value = self.data.get(name)
- if isinstance(textarea_value, (list, tuple)):
- textarea_value = textarea_value[0]
- in_textarea = True
- elif in_select and tagname == 'option':
- option_start = kind, data, pos
- option_value = attrs.get('value')
- if option_value is None:
- no_option_value = True
- option_value = ''
- in_option = True
- continue
- yield kind, (tag, attrs), pos
-
- elif in_form and kind is TEXT:
- if in_select and in_option:
- if no_option_value:
- option_value += data
- option_text.append((kind, data, pos))
- continue
- elif in_textarea:
- continue
- yield kind, data, pos
-
- elif in_form and kind is END:
- tagname = data.localname
- if tagname == 'form':
- in_form = False
- elif tagname == 'select':
- in_select = False
- select_value = None
- elif in_select and tagname == 'option':
- if isinstance(select_value, (tuple, list)):
- selected = option_value in [unicode(v) for v
- in select_value]
- else:
- selected = option_value == unicode(select_value)
- okind, (tag, attrs), opos = option_start
- if selected:
- attrs |= [(QName('selected'), 'selected')]
- elif 'selected' in attrs:
- attrs -= 'selected'
- yield okind, (tag, attrs), opos
- if option_text:
- for event in option_text:
- yield event
- in_option = False
- no_option_value = False
- option_start = option_value = None
- option_text = []
- elif tagname == 'textarea':
- if textarea_value:
- yield TEXT, unicode(textarea_value), pos
- in_textarea = False
- yield kind, data, pos
-
- else:
- yield kind, data, pos
-
-
-class HTMLSanitizer(object):
- """A filter that removes potentially dangerous HTML tags and attributes
- from the stream.
-
- >>> from genshi import HTML
- >>> html = HTML('<div><script>alert(document.cookie)</script></div>')
- >>> print(html | HTMLSanitizer())
- <div/>
-
- The default set of safe tags and attributes can be modified when the filter
- is instantiated. For example, to allow inline ``style`` attributes, the
- following instantation would work:
-
- >>> html = HTML('<div style="background: #000"></div>')
- >>> sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style']))
- >>> print(html | sanitizer)
- <div style="background: #000"/>
-
- Note that even in this case, the filter *does* attempt to remove dangerous
- constructs from style attributes:
-
- >>> html = HTML('<div style="background: url(javascript:void); color: #000"></div>')
- >>> print(html | sanitizer)
- <div style="color: #000"/>
-
- This handles HTML entities, unicode escapes in CSS and Javascript text, as
- well as a lot of other things. However, the style tag is still excluded by
- default because it is very hard for such sanitizing to be completely safe,
- especially considering how much error recovery current web browsers perform.
-
- It also does some basic filtering of CSS properties that may be used for
- typical phishing attacks. For more sophisticated filtering, this class
- provides a couple of hooks that can be overridden in sub-classes.
-
- :warn: Note that this special processing of CSS is currently only applied to
- style attributes, **not** style elements.
- """
-
- SAFE_TAGS = frozenset(['a', 'abbr', 'acronym', 'address', 'area', 'b',
- 'big', 'blockquote', 'br', 'button', 'caption', 'center', 'cite',
- 'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt',
- 'em', 'fieldset', 'font', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
- 'hr', 'i', 'img', 'input', 'ins', 'kbd', 'label', 'legend', 'li', 'map',
- 'menu', 'ol', 'optgroup', 'option', 'p', 'pre', 'q', 's', 'samp',
- 'select', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'table',
- 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'tt', 'u',
- 'ul', 'var'])
-
- SAFE_ATTRS = frozenset(['abbr', 'accept', 'accept-charset', 'accesskey',
- 'action', 'align', 'alt', 'axis', 'bgcolor', 'border', 'cellpadding',
- 'cellspacing', 'char', 'charoff', 'charset', 'checked', 'cite', 'class',
- 'clear', 'cols', 'colspan', 'color', 'compact', 'coords', 'datetime',
- 'dir', 'disabled', 'enctype', 'for', 'frame', 'headers', 'height',
- 'href', 'hreflang', 'hspace', 'id', 'ismap', 'label', 'lang',
- 'longdesc', 'maxlength', 'media', 'method', 'multiple', 'name',
- 'nohref', 'noshade', 'nowrap', 'prompt', 'readonly', 'rel', 'rev',
- 'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size',
- 'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title',
- 'type', 'usemap', 'valign', 'value', 'vspace', 'width'])
-
- SAFE_SCHEMES = frozenset(['file', 'ftp', 'http', 'https', 'mailto', None])
-
- URI_ATTRS = frozenset(['action', 'background', 'dynsrc', 'href', 'lowsrc',
- 'src'])
-
- def __init__(self, safe_tags=SAFE_TAGS, safe_attrs=SAFE_ATTRS,
- safe_schemes=SAFE_SCHEMES, uri_attrs=URI_ATTRS):
- """Create the sanitizer.
-
- The exact set of allowed elements and attributes can be configured.
-
- :param safe_tags: a set of tag names that are considered safe
- :param safe_attrs: a set of attribute names that are considered safe
- :param safe_schemes: a set of URI schemes that are considered safe
- :param uri_attrs: a set of names of attributes that contain URIs
- """
- self.safe_tags = safe_tags
- "The set of tag names that are considered safe."
- self.safe_attrs = safe_attrs
- "The set of attribute names that are considered safe."
- self.uri_attrs = uri_attrs
- "The set of names of attributes that may contain URIs."
- self.safe_schemes = safe_schemes
- "The set of URI schemes that are considered safe."
-
- def __call__(self, stream):
- """Apply the filter to the given stream.
-
- :param stream: the markup event stream to filter
- """
- waiting_for = None
-
- for kind, data, pos in stream:
- if kind is START:
- if waiting_for:
- continue
- tag, attrs = data
- if not self.is_safe_elem(tag, attrs):
- waiting_for = tag
- continue
-
- new_attrs = []
- for attr, value in attrs:
- value = stripentities(value)
- if attr not in self.safe_attrs:
- continue
- elif attr in self.uri_attrs:
- # Don't allow URI schemes such as "javascript:"
- if not self.is_safe_uri(value):
- continue
- elif attr == 'style':
- # Remove dangerous CSS declarations from inline styles
- decls = self.sanitize_css(value)
- if not decls:
- continue
- value = '; '.join(decls)
- new_attrs.append((attr, value))
-
- yield kind, (tag, Attrs(new_attrs)), pos
-
- elif kind is END:
- tag = data
- if waiting_for:
- if waiting_for == tag:
- waiting_for = None
- else:
- yield kind, data, pos
-
- elif kind is not COMMENT:
- if not waiting_for:
- yield kind, data, pos
-
- def is_safe_css(self, propname, value):
- """Determine whether the given css property declaration is to be
- considered safe for inclusion in the output.
-
- :param propname: the CSS property name
- :param value: the value of the property
- :return: whether the property value should be considered safe
- :rtype: bool
- :since: version 0.6
- """
- if propname == 'position':
- return False
- if propname.startswith('margin') and '-' in value:
- # Negative margins can be used for phishing
- return False
- return True
-
- def is_safe_elem(self, tag, attrs):
- """Determine whether the given element should be considered safe for
- inclusion in the output.
-
- :param tag: the tag name of the element
- :type tag: QName
- :param attrs: the element attributes
- :type attrs: Attrs
- :return: whether the element should be considered safe
- :rtype: bool
- :since: version 0.6
- """
- if tag not in self.safe_tags:
- return False
- if tag.localname == 'input':
- input_type = attrs.get('type', '').lower()
- if input_type == 'password':
- return False
- return True
-
- def is_safe_uri(self, uri):
- """Determine whether the given URI is to be considered safe for
- inclusion in the output.
-
- The default implementation checks whether the scheme of the URI is in
- the set of allowed URIs (`safe_schemes`).
-
- >>> sanitizer = HTMLSanitizer()
- >>> sanitizer.is_safe_uri('http://example.org/')
- True
- >>> sanitizer.is_safe_uri('javascript:alert(document.cookie)')
- False
-
- :param uri: the URI to check
- :return: `True` if the URI can be considered safe, `False` otherwise
- :rtype: `bool`
- :since: version 0.4.3
- """
- if '#' in uri:
- uri = uri.split('#', 1)[0] # Strip out the fragment identifier
- if ':' not in uri:
- return True # This is a relative URI
- chars = [char for char in uri.split(':', 1)[0] if char.isalnum()]
- return ''.join(chars).lower() in self.safe_schemes
-
- def sanitize_css(self, text):
- """Remove potentially dangerous property declarations from CSS code.
-
- In particular, properties using the CSS ``url()`` function with a scheme
- that is not considered safe are removed:
-
- >>> sanitizer = HTMLSanitizer()
- >>> sanitizer.sanitize_css(u'''
- ... background: url(javascript:alert("foo"));
- ... color: #000;
- ... ''')
- [u'color: #000']
-
- Also, the proprietary Internet Explorer function ``expression()`` is
- always stripped:
-
- >>> sanitizer.sanitize_css(u'''
- ... background: #fff;
- ... color: #000;
- ... width: e/**/xpression(alert("foo"));
- ... ''')
- [u'background: #fff', u'color: #000']
-
- :param text: the CSS text; this is expected to be `unicode` and to not
- contain any character or numeric references
- :return: a list of declarations that are considered safe
- :rtype: `list`
- :since: version 0.4.3
- """
- decls = []
- text = self._strip_css_comments(self._replace_unicode_escapes(text))
- for decl in text.split(';'):
- decl = decl.strip()
- if not decl:
- continue
- try:
- propname, value = decl.split(':', 1)
- except ValueError:
- continue
- if not self.is_safe_css(propname.strip().lower(), value.strip()):
- continue
- is_evil = False
- if 'expression' in value:
- is_evil = True
- for match in re.finditer(r'url\s*\(([^)]+)', value):
- if not self.is_safe_uri(match.group(1)):
- is_evil = True
- break
- if not is_evil:
- decls.append(decl.strip())
- return decls
-
- _NORMALIZE_NEWLINES = re.compile(r'\r\n').sub
- _UNICODE_ESCAPE = re.compile(r'\\([0-9a-fA-F]{1,6})\s?').sub
-
- def _replace_unicode_escapes(self, text):
- def _repl(match):
- return unichr(int(match.group(1), 16))
- return self._UNICODE_ESCAPE(_repl, self._NORMALIZE_NEWLINES('\n', text))
-
- _CSS_COMMENTS = re.compile(r'/\*.*?\*/').sub
-
- def _strip_css_comments(self, text):
- return self._CSS_COMMENTS('', text)
diff --git a/genshi/filters/i18n.py b/genshi/filters/i18n.py
deleted file mode 100644
index 7852875..0000000
--- a/genshi/filters/i18n.py
+++ /dev/null
@@ -1,1238 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Copyright (C) 2007-2010 Edgewall Software
-# All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://genshi.edgewall.org/wiki/License.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For the exact contribution history, see the revision
-# history and logs, available at http://genshi.edgewall.org/log/.
-
-"""Directives and utilities for internationalization and localization of
-templates.
-
-:since: version 0.4
-:note: Directives support added since version 0.6
-"""
-
-try:
- any
-except NameError:
- from genshi.util import any
-from gettext import NullTranslations
-import os
-import re
-from types import FunctionType
-
-from genshi.core import Attrs, Namespace, QName, START, END, TEXT, \
- XML_NAMESPACE, _ensure, StreamEventKind
-from genshi.template.eval import _ast
-from genshi.template.base import DirectiveFactory, EXPR, SUB, _apply_directives
-from genshi.template.directives import Directive, StripDirective
-from genshi.template.markup import MarkupTemplate, EXEC
-
-__all__ = ['Translator', 'extract']
-__docformat__ = 'restructuredtext en'
-
-
-I18N_NAMESPACE = Namespace('http://genshi.edgewall.org/i18n')
-
-MSGBUF = StreamEventKind('MSGBUF')
-SUB_START = StreamEventKind('SUB_START')
-SUB_END = StreamEventKind('SUB_END')
-
-GETTEXT_FUNCTIONS = ('_', 'gettext', 'ngettext', 'dgettext', 'dngettext',
- 'ugettext', 'ungettext')
-
-
-class I18NDirective(Directive):
- """Simple interface for i18n directives to support messages extraction."""
-
- def __call__(self, stream, directives, ctxt, **vars):
- return _apply_directives(stream, directives, ctxt, vars)
-
-
-class ExtractableI18NDirective(I18NDirective):
- """Simple interface for directives to support messages extraction."""
-
- def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS,
- search_text=True, comment_stack=None):
- raise NotImplementedError
-
-
-class CommentDirective(I18NDirective):
- """Implementation of the ``i18n:comment`` template directive which adds
- translation comments.
-
- >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n">
- ... <p i18n:comment="As in Foo Bar">Foo</p>
- ... </html>''')
- >>> translator = Translator()
- >>> translator.setup(tmpl)
- >>> list(translator.extract(tmpl.stream))
- [(2, None, u'Foo', [u'As in Foo Bar'])]
- """
- __slots__ = ['comment']
-
- def __init__(self, value, template=None, namespaces=None, lineno=-1,
- offset=-1):
- Directive.__init__(self, None, template, namespaces, lineno, offset)
- self.comment = value
-
-
-class MsgDirective(ExtractableI18NDirective):
- r"""Implementation of the ``i18n:msg`` directive which marks inner content
- as translatable. Consider the following examples:
-
- >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n">
- ... <div i18n:msg="">
- ... <p>Foo</p>
- ... <p>Bar</p>
- ... </div>
- ... <p i18n:msg="">Foo <em>bar</em>!</p>
- ... </html>''')
-
- >>> translator = Translator()
- >>> translator.setup(tmpl)
- >>> list(translator.extract(tmpl.stream))
- [(2, None, u'[1:Foo]\n [2:Bar]', []), (6, None, u'Foo [1:bar]!', [])]
- >>> print(tmpl.generate().render())
- <html>
- <div><p>Foo</p>
- <p>Bar</p></div>
- <p>Foo <em>bar</em>!</p>
- </html>
-
- >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n">
- ... <div i18n:msg="fname, lname">
- ... <p>First Name: ${fname}</p>
- ... <p>Last Name: ${lname}</p>
- ... </div>
- ... <p i18n:msg="">Foo <em>bar</em>!</p>
- ... </html>''')
- >>> translator.setup(tmpl)
- >>> list(translator.extract(tmpl.stream)) #doctest: +NORMALIZE_WHITESPACE
- [(2, None, u'[1:First Name: %(fname)s]\n [2:Last Name: %(lname)s]', []),
- (6, None, u'Foo [1:bar]!', [])]
-
- >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n">
- ... <div i18n:msg="fname, lname">
- ... <p>First Name: ${fname}</p>
- ... <p>Last Name: ${lname}</p>
- ... </div>
- ... <p i18n:msg="">Foo <em>bar</em>!</p>
- ... </html>''')
- >>> translator.setup(tmpl)
- >>> print(tmpl.generate(fname='John', lname='Doe').render())
- <html>
- <div><p>First Name: John</p>
- <p>Last Name: Doe</p></div>
- <p>Foo <em>bar</em>!</p>
- </html>
-
- Starting and ending white-space is stripped of to make it simpler for
- translators. Stripping it is not that important since it's on the html
- source, the rendered output will remain the same.
- """
- __slots__ = ['params', 'lineno']
-
- def __init__(self, value, template=None, namespaces=None, lineno=-1,
- offset=-1):
- Directive.__init__(self, None, template, namespaces, lineno, offset)
- self.params = [param.strip() for param in value.split(',') if param]
- self.lineno = lineno
-
- @classmethod
- def attach(cls, template, stream, value, namespaces, pos):
- if type(value) is dict:
- value = value.get('params', '').strip()
- return super(MsgDirective, cls).attach(template, stream, value.strip(),
- namespaces, pos)
-
- def __call__(self, stream, directives, ctxt, **vars):
- gettext = ctxt.get('_i18n.gettext')
- if ctxt.get('_i18n.domain'):
- dgettext = ctxt.get('_i18n.dgettext')
- assert hasattr(dgettext, '__call__'), \
- 'No domain gettext function passed'
- gettext = lambda msg: dgettext(ctxt.get('_i18n.domain'), msg)
-
- def _generate():
- msgbuf = MessageBuffer(self)
- previous = stream.next()
- if previous[0] is START:
- yield previous
- else:
- msgbuf.append(*previous)
- previous = stream.next()
- for kind, data, pos in stream:
- msgbuf.append(*previous)
- previous = kind, data, pos
- if previous[0] is not END:
- msgbuf.append(*previous)
- previous = None
- for event in msgbuf.translate(gettext(msgbuf.format())):
- yield event
- if previous:
- yield previous
-
- return _apply_directives(_generate(), directives, ctxt, vars)
-
- def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS,
- search_text=True, comment_stack=None):
- msgbuf = MessageBuffer(self)
- strip = False
-
- stream = iter(stream)
- previous = stream.next()
- if previous[0] is START:
- for message in translator._extract_attrs(previous,
- gettext_functions,
- search_text=search_text):
- yield message
- previous = stream.next()
- strip = True
- for event in stream:
- if event[0] is START:
- for message in translator._extract_attrs(event,
- gettext_functions,
- search_text=search_text):
- yield message
- msgbuf.append(*previous)
- previous = event
- if not strip:
- msgbuf.append(*previous)
-
- yield self.lineno, None, msgbuf.format(), comment_stack[-1:]
-
-
-class ChooseBranchDirective(I18NDirective):
- __slots__ = ['params']
-
- def __call__(self, stream, directives, ctxt, **vars):
- self.params = ctxt.get('_i18n.choose.params', [])[:]
- msgbuf = MessageBuffer(self)
- stream = _apply_directives(stream, directives, ctxt, vars)
-
- previous = stream.next()
- if previous[0] is START:
- yield previous
- else:
- msgbuf.append(*previous)
-
- try:
- previous = stream.next()
- except StopIteration:
- # For example <i18n:singular> or <i18n:plural> directives
- yield MSGBUF, (), -1 # the place holder for msgbuf output
- ctxt['_i18n.choose.%s' % self.tagname] = msgbuf
- return
-
- for event in stream:
- msgbuf.append(*previous)
- previous = event
- yield MSGBUF, (), -1 # the place holder for msgbuf output
-
- if previous[0] is END:
- yield previous # the outer end tag
- else:
- msgbuf.append(*previous)
- ctxt['_i18n.choose.%s' % self.tagname] = msgbuf
-
- def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS,
- search_text=True, comment_stack=None, msgbuf=None):
- stream = iter(stream)
- previous = stream.next()
-
- if previous[0] is START:
- # skip the enclosing element
- for message in translator._extract_attrs(previous,
- gettext_functions,
- search_text=search_text):
- yield message
- previous = stream.next()
-
- for event in stream:
- if previous[0] is START:
- for message in translator._extract_attrs(previous,
- gettext_functions,
- search_text=search_text):
- yield message
- msgbuf.append(*previous)
- previous = event
-
- if previous[0] is not END:
- msgbuf.append(*previous)
-
-
-class SingularDirective(ChooseBranchDirective):
- """Implementation of the ``i18n:singular`` directive to be used with the
- ``i18n:choose`` directive."""
-
-
-class PluralDirective(ChooseBranchDirective):
- """Implementation of the ``i18n:plural`` directive to be used with the
- ``i18n:choose`` directive."""
-
-
-class ChooseDirective(ExtractableI18NDirective):
- """Implementation of the ``i18n:choose`` directive which provides plural
- internationalisation of strings.
-
- This directive requires at least one parameter, the one which evaluates to
- an integer which will allow to choose the plural/singular form. If you also
- have expressions inside the singular and plural version of the string you
- also need to pass a name for those parameters. Consider the following
- examples:
-
- >>> tmpl = MarkupTemplate('''\
- <html xmlns:i18n="http://genshi.edgewall.org/i18n">
- ... <div i18n:choose="num; num">
- ... <p i18n:singular="">There is $num coin</p>
- ... <p i18n:plural="">There are $num coins</p>
- ... </div>
- ... </html>''')
- >>> translator = Translator()
- >>> translator.setup(tmpl)
- >>> list(translator.extract(tmpl.stream)) #doctest: +NORMALIZE_WHITESPACE
- [(2, 'ngettext', (u'There is %(num)s coin',
- u'There are %(num)s coins'), [])]
-
- >>> tmpl = MarkupTemplate('''\
- <html xmlns:i18n="http://genshi.edgewall.org/i18n">
- ... <div i18n:choose="num; num">
- ... <p i18n:singular="">There is $num coin</p>
- ... <p i18n:plural="">There are $num coins</p>
- ... </div>
- ... </html>''')
- >>> translator.setup(tmpl)
- >>> print(tmpl.generate(num=1).render())
- <html>
- <div>
- <p>There is 1 coin</p>
- </div>
- </html>
- >>> print(tmpl.generate(num=2).render())
- <html>
- <div>
- <p>There are 2 coins</p>
- </div>
- </html>
-
- When used as a element and not as an attribute:
-
- >>> tmpl = MarkupTemplate('''\
- <html xmlns:i18n="http://genshi.edgewall.org/i18n">
- ... <i18n:choose numeral="num" params="num">
- ... <p i18n:singular="">There is $num coin</p>
- ... <p i18n:plural="">There are $num coins</p>
- ... </i18n:choose>
- ... </html>''')
- >>> translator.setup(tmpl)
- >>> list(translator.extract(tmpl.stream)) #doctest: +NORMALIZE_WHITESPACE
- [(2, 'ngettext', (u'There is %(num)s coin',
- u'There are %(num)s coins'), [])]
- """
- __slots__ = ['numeral', 'params', 'lineno']
-
- def __init__(self, value, template=None, namespaces=None, lineno=-1,
- offset=-1):
- Directive.__init__(self, None, template, namespaces, lineno, offset)
- params = [v.strip() for v in value.split(';')]
- self.numeral = self._parse_expr(params.pop(0), template, lineno, offset)
- self.params = params and [name.strip() for name in
- params[0].split(',') if name] or []
- self.lineno = lineno
-
- @classmethod
- def attach(cls, template, stream, value, namespaces, pos):
- if type(value) is dict:
- numeral = value.get('numeral', '').strip()
- assert numeral is not '', "at least pass the numeral param"
- params = [v.strip() for v in value.get('params', '').split(',')]
- value = '%s; ' % numeral + ', '.join(params)
- return super(ChooseDirective, cls).attach(template, stream, value,
- namespaces, pos)
-
- def __call__(self, stream, directives, ctxt, **vars):
- ctxt.push({'_i18n.choose.params': self.params,
- '_i18n.choose.singular': None,
- '_i18n.choose.plural': None})
-
- ngettext = ctxt.get('_i18n.ngettext')
- assert hasattr(ngettext, '__call__'), 'No ngettext function available'
- dngettext = ctxt.get('_i18n.dngettext')
- if not dngettext:
- dngettext = lambda d, s, p, n: ngettext(s, p, n)
-
- new_stream = []
- singular_stream = None
- singular_msgbuf = None
- plural_stream = None
- plural_msgbuf = None
-
- numeral = self.numeral.evaluate(ctxt)
- is_plural = self._is_plural(numeral, ngettext)
-
- for event in stream:
- if event[0] is SUB and any(isinstance(d, ChooseBranchDirective)
- for d in event[1][0]):
- subdirectives, substream = event[1]
-
- if isinstance(subdirectives[0], SingularDirective):
- singular_stream = list(_apply_directives(substream,
- subdirectives,
- ctxt, vars))
- new_stream.append((MSGBUF, None, (None, -1, -1)))
-
- elif isinstance(subdirectives[0], PluralDirective):
- if is_plural:
- plural_stream = list(_apply_directives(substream,
- subdirectives,
- ctxt, vars))
-
- else:
- new_stream.append(event)
-
- if ctxt.get('_i18n.domain'):
- ngettext = lambda s, p, n: dngettext(ctxt.get('_i18n.domain'),
- s, p, n)
-
- singular_msgbuf = ctxt.get('_i18n.choose.singular')
- if is_plural:
- plural_msgbuf = ctxt.get('_i18n.choose.plural')
- msgbuf, choice = plural_msgbuf, plural_stream
- else:
- msgbuf, choice = singular_msgbuf, singular_stream
- plural_msgbuf = MessageBuffer(self)
-
- for kind, data, pos in new_stream:
- if kind is MSGBUF:
- for event in choice:
- if event[0] is MSGBUF:
- translation = ngettext(singular_msgbuf.format(),
- plural_msgbuf.format(),
- numeral)
- for subevent in msgbuf.translate(translation):
- yield subevent
- else:
- yield event
- else:
- yield kind, data, pos
-
- ctxt.pop()
-
- def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS,
- search_text=True, comment_stack=None):
- strip = False
- stream = iter(stream)
- previous = stream.next()
-
- if previous[0] is START:
- # skip the enclosing element
- for message in translator._extract_attrs(previous,
- gettext_functions,
- search_text=search_text):
- yield message
- previous = stream.next()
- strip = True
-
- singular_msgbuf = MessageBuffer(self)
- plural_msgbuf = MessageBuffer(self)
-
- for event in stream:
- if previous[0] is SUB:
- directives, substream = previous[1]
- for directive in directives:
- if isinstance(directive, SingularDirective):
- for message in directive.extract(translator,
- substream, gettext_functions, search_text,
- comment_stack, msgbuf=singular_msgbuf):
- yield message
- elif isinstance(directive, PluralDirective):
- for message in directive.extract(translator,
- substream, gettext_functions, search_text,
- comment_stack, msgbuf=plural_msgbuf):
- yield message
- elif not isinstance(directive, StripDirective):
- singular_msgbuf.append(*previous)
- plural_msgbuf.append(*previous)
- else:
- if previous[0] is START:
- for message in translator._extract_attrs(previous,
- gettext_functions,
- search_text):
- yield message
- singular_msgbuf.append(*previous)
- plural_msgbuf.append(*previous)
- previous = event
-
- if not strip:
- singular_msgbuf.append(*previous)
- plural_msgbuf.append(*previous)
-
- yield self.lineno, 'ngettext', \
- (singular_msgbuf.format(), plural_msgbuf.format()), \
- comment_stack[-1:]
-
- def _is_plural(self, numeral, ngettext):
- # XXX: should we test which form was chosen like this!?!?!?
- # There should be no match in any catalogue for these singular and
- # plural test strings
- singular = u'O\x85\xbe\xa9\xa8az\xc3?\xe6\xa1\x02n\x84\x93'
- plural = u'\xcc\xfb+\xd3Pn\x9d\tT\xec\x1d\xda\x1a\x88\x00'
- return ngettext(singular, plural, numeral) == plural
-
-
-class DomainDirective(I18NDirective):
- """Implementation of the ``i18n:domain`` directive which allows choosing
- another i18n domain(catalog) to translate from.
-
- >>> from genshi.filters.tests.i18n import DummyTranslations
- >>> tmpl = MarkupTemplate('''\
- <html xmlns:i18n="http://genshi.edgewall.org/i18n">
- ... <p i18n:msg="">Bar</p>
- ... <div i18n:domain="foo">
- ... <p i18n:msg="">FooBar</p>
- ... <p>Bar</p>
- ... <p i18n:domain="bar" i18n:msg="">Bar</p>
- ... <p i18n:domain="">Bar</p>
- ... </div>
- ... <p>Bar</p>
- ... </html>''')
-
- >>> translations = DummyTranslations({'Bar': 'Voh'})
- >>> translations.add_domain('foo', {'FooBar': 'BarFoo', 'Bar': 'foo_Bar'})
- >>> translations.add_domain('bar', {'Bar': 'bar_Bar'})
- >>> translator = Translator(translations)
- >>> translator.setup(tmpl)
-
- >>> print(tmpl.generate().render())
- <html>
- <p>Voh</p>
- <div>
- <p>BarFoo</p>
- <p>foo_Bar</p>
- <p>bar_Bar</p>
- <p>Voh</p>
- </div>
- <p>Voh</p>
- </html>
- """
- __slots__ = ['domain']
-
- def __init__(self, value, template=None, namespaces=None, lineno=-1,
- offset=-1):
- Directive.__init__(self, None, template, namespaces, lineno, offset)
- self.domain = value and value.strip() or '__DEFAULT__'
-
- @classmethod
- def attach(cls, template, stream, value, namespaces, pos):
- if type(value) is dict:
- value = value.get('name')
- return super(DomainDirective, cls).attach(template, stream, value,
- namespaces, pos)
-
- def __call__(self, stream, directives, ctxt, **vars):
- ctxt.push({'_i18n.domain': self.domain})
- for event in _apply_directives(stream, directives, ctxt, vars):
- yield event
- ctxt.pop()
-
-
-class Translator(DirectiveFactory):
- """Can extract and translate localizable strings from markup streams and
- templates.
-
- For example, assume the following template:
-
- >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/">
- ... <head>
- ... <title>Example</title>
- ... </head>
- ... <body>
- ... <h1>Example</h1>
- ... <p>${_("Hello, %(name)s") % dict(name=username)}</p>
- ... </body>
- ... </html>''', filename='example.html')
-
- For demonstration, we define a dummy ``gettext``-style function with a
- hard-coded translation table, and pass that to the `Translator` initializer:
-
- >>> def pseudo_gettext(string):
- ... return {
- ... 'Example': 'Beispiel',
- ... 'Hello, %(name)s': 'Hallo, %(name)s'
- ... }[string]
- >>> translator = Translator(pseudo_gettext)
-
- Next, the translator needs to be prepended to any already defined filters
- on the template:
-
- >>> tmpl.filters.insert(0, translator)
-
- When generating the template output, our hard-coded translations should be
- applied as expected:
-
- >>> print(tmpl.generate(username='Hans', _=pseudo_gettext))
- <html>
- <head>
- <title>Beispiel</title>
- </head>
- <body>
- <h1>Beispiel</h1>
- <p>Hallo, Hans</p>
- </body>
- </html>
-
- Note that elements defining ``xml:lang`` attributes that do not contain
- variable expressions are ignored by this filter. That can be used to
- exclude specific parts of a template from being extracted and translated.
- """
-
- directives = [
- ('domain', DomainDirective),
- ('comment', CommentDirective),
- ('msg', MsgDirective),
- ('choose', ChooseDirective),
- ('singular', SingularDirective),
- ('plural', PluralDirective)
- ]
-
- IGNORE_TAGS = frozenset([
- QName('script'), QName('http://www.w3.org/1999/xhtml}script'),
- QName('style'), QName('http://www.w3.org/1999/xhtml}style')
- ])
- INCLUDE_ATTRS = frozenset([
- 'abbr', 'alt', 'label', 'prompt', 'standby', 'summary', 'title'
- ])
- NAMESPACE = I18N_NAMESPACE
-
- def __init__(self, translate=NullTranslations(), ignore_tags=IGNORE_TAGS,
- include_attrs=INCLUDE_ATTRS, extract_text=True):
- """Initialize the translator.
-
- :param translate: the translation function, for example ``gettext`` or
- ``ugettext``.
- :param ignore_tags: a set of tag names that should not be localized
- :param include_attrs: a set of attribute names should be localized
- :param extract_text: whether the content of text nodes should be
- extracted, or only text in explicit ``gettext``
- function calls
-
- :note: Changed in 0.6: the `translate` parameter can now be either
- a ``gettext``-style function, or an object compatible with the
- ``NullTransalations`` or ``GNUTranslations`` interface
- """
- self.translate = translate
- self.ignore_tags = ignore_tags
- self.include_attrs = include_attrs
- self.extract_text = extract_text
-
- def __call__(self, stream, ctxt=None, translate_text=True,
- translate_attrs=True):
- """Translate any localizable strings in the given stream.
-
- This function shouldn't be called directly. Instead, an instance of
- the `Translator` class should be registered as a filter with the
- `Template` or the `TemplateLoader`, or applied as a regular stream
- filter. If used as a template filter, it should be inserted in front of
- all the default filters.
-
- :param stream: the markup event stream
- :param ctxt: the template context (not used)
- :param translate_text: whether text nodes should be translated (used
- internally)
- :param translate_attrs: whether attribute values should be translated
- (used internally)
- :return: the localized stream
- """
- ignore_tags = self.ignore_tags
- include_attrs = self.include_attrs
- skip = 0
- xml_lang = XML_NAMESPACE['lang']
- if not self.extract_text:
- translate_text = False
- translate_attrs = False
-
- if type(self.translate) is FunctionType:
- gettext = self.translate
- if ctxt:
- ctxt['_i18n.gettext'] = gettext
- else:
- gettext = self.translate.ugettext
- ngettext = self.translate.ungettext
- try:
- dgettext = self.translate.dugettext
- dngettext = self.translate.dungettext
- except AttributeError:
- dgettext = lambda _, y: gettext(y)
- dngettext = lambda _, s, p, n: ngettext(s, p, n)
- if ctxt:
- ctxt['_i18n.gettext'] = gettext
- ctxt['_i18n.ngettext'] = ngettext
- ctxt['_i18n.dgettext'] = dgettext
- ctxt['_i18n.dngettext'] = dngettext
-
- if ctxt and ctxt.get('_i18n.domain'):
- gettext = lambda msg: dgettext(ctxt.get('_i18n.domain'), msg)
-
- for kind, data, pos in stream:
-
- # skip chunks that should not be localized
- if skip:
- if kind is START:
- skip += 1
- elif kind is END:
- skip -= 1
- yield kind, data, pos
- continue
-
- # handle different events that can be localized
- if kind is START:
- tag, attrs = data
- if tag in self.ignore_tags or \
- isinstance(attrs.get(xml_lang), basestring):
- skip += 1
- yield kind, data, pos
- continue
-
- new_attrs = []
- changed = False
-
- for name, value in attrs:
- newval = value
- if isinstance(value, basestring):
- if translate_attrs and name in include_attrs:
- newval = gettext(value)
- else:
- newval = list(
- self(_ensure(value), ctxt, translate_text=False)
- )
- if newval != value:
- value = newval
- changed = True
- new_attrs.append((name, value))
- if changed:
- attrs = Attrs(new_attrs)
-
- yield kind, (tag, attrs), pos
-
- elif translate_text and kind is TEXT:
- text = data.strip()
- if text:
- data = data.replace(text, unicode(gettext(text)))
- yield kind, data, pos
-
- elif kind is SUB:
- directives, substream = data
- current_domain = None
- for idx, directive in enumerate(directives):
- # Organize directives to make everything work
- # FIXME: There's got to be a better way to do this!
- if isinstance(directive, DomainDirective):
- # Grab current domain and update context
- current_domain = directive.domain
- ctxt.push({'_i18n.domain': current_domain})
- # Put domain directive as the first one in order to
- # update context before any other directives evaluation
- directives.insert(0, directives.pop(idx))
-
- # If this is an i18n directive, no need to translate text
- # nodes here
- is_i18n_directive = any([
- isinstance(d, ExtractableI18NDirective)
- for d in directives
- ])
- substream = list(self(substream, ctxt,
- translate_text=not is_i18n_directive,
- translate_attrs=translate_attrs))
- yield kind, (directives, substream), pos
-
- if current_domain:
- ctxt.pop()
- else:
- yield kind, data, pos
-
- def extract(self, stream, gettext_functions=GETTEXT_FUNCTIONS,
- search_text=True, comment_stack=None):
- """Extract localizable strings from the given template stream.
-
- For every string found, this function yields a ``(lineno, function,
- message, comments)`` tuple, where:
-
- * ``lineno`` is the number of the line on which the string was found,
- * ``function`` is the name of the ``gettext`` function used (if the
- string was extracted from embedded Python code), and
- * ``message`` is the string itself (a ``unicode`` object, or a tuple
- of ``unicode`` objects for functions with multiple string
- arguments).
- * ``comments`` is a list of comments related to the message, extracted
- from ``i18n:comment`` attributes found in the markup
-
- >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/">
- ... <head>
- ... <title>Example</title>
- ... </head>
- ... <body>
- ... <h1>Example</h1>
- ... <p>${_("Hello, %(name)s") % dict(name=username)}</p>
- ... <p>${ngettext("You have %d item", "You have %d items", num)}</p>
- ... </body>
- ... </html>''', filename='example.html')
- >>> for line, func, msg, comments in Translator().extract(tmpl.stream):
- ... print('%d, %r, %r' % (line, func, msg))
- 3, None, u'Example'
- 6, None, u'Example'
- 7, '_', u'Hello, %(name)s'
- 8, 'ngettext', (u'You have %d item', u'You have %d items', None)
-
- :param stream: the event stream to extract strings from; can be a
- regular stream or a template stream
- :param gettext_functions: a sequence of function names that should be
- treated as gettext-style localization
- functions
- :param search_text: whether the content of text nodes should be
- extracted (used internally)
-
- :note: Changed in 0.4.1: For a function with multiple string arguments
- (such as ``ngettext``), a single item with a tuple of strings is
- yielded, instead an item for each string argument.
- :note: Changed in 0.6: The returned tuples now include a fourth
- element, which is a list of comments for the translator.
- """
- if not self.extract_text:
- search_text = False
- if comment_stack is None:
- comment_stack = []
- skip = 0
-
- xml_lang = XML_NAMESPACE['lang']
-
- for kind, data, pos in stream:
- if skip:
- if kind is START:
- skip += 1
- if kind is END:
- skip -= 1
-
- if kind is START and not skip:
- tag, attrs = data
- if tag in self.ignore_tags or \
- isinstance(attrs.get(xml_lang), basestring):
- skip += 1
- continue
-
- for message in self._extract_attrs((kind, data, pos),
- gettext_functions,
- search_text=search_text):
- yield message
-
- elif not skip and search_text and kind is TEXT:
- text = data.strip()
- if text and [ch for ch in text if ch.isalpha()]:
- yield pos[1], None, text, comment_stack[-1:]
-
- elif kind is EXPR or kind is EXEC:
- for funcname, strings in extract_from_code(data,
- gettext_functions):
- # XXX: Do we need to grab i18n:comment from comment_stack ???
- yield pos[1], funcname, strings, []
-
- elif kind is SUB:
- directives, substream = data
- in_comment = False
-
- for idx, directive in enumerate(directives):
- # Do a first loop to see if there's a comment directive
- # If there is update context and pop it from directives
- if isinstance(directive, CommentDirective):
- in_comment = True
- comment_stack.append(directive.comment)
- if len(directives) == 1:
- # in case we're in the presence of something like:
- # <p i18n:comment="foo">Foo</p>
- for message in self.extract(
- substream, gettext_functions,
- search_text=search_text and not skip,
- comment_stack=comment_stack):
- yield message
- directives.pop(idx)
- elif not isinstance(directive, I18NDirective):
- # Remove all other non i18n directives from the process
- directives.pop(idx)
-
- if not directives and not in_comment:
- # Extract content if there's no directives because
- # strip was pop'ed and not because comment was pop'ed.
- # Extraction in this case has been taken care of.
- for message in self.extract(
- substream, gettext_functions,
- search_text=search_text and not skip):
- yield message
-
- for directive in directives:
- if isinstance(directive, ExtractableI18NDirective):
- for message in directive.extract(self,
- substream, gettext_functions,
- search_text=search_text and not skip,
- comment_stack=comment_stack):
- yield message
- else:
- for message in self.extract(
- substream, gettext_functions,
- search_text=search_text and not skip,
- comment_stack=comment_stack):
- yield message
-
- if in_comment:
- comment_stack.pop()
-
- def get_directive_index(self, dir_cls):
- total = len(self._dir_order)
- if dir_cls in self._dir_order:
- return self._dir_order.index(dir_cls) - total
- return total
-
- def setup(self, template):
- """Convenience function to register the `Translator` filter and the
- related directives with the given template.
-
- :param template: a `Template` instance
- """
- template.filters.insert(0, self)
- if hasattr(template, 'add_directives'):
- template.add_directives(Translator.NAMESPACE, self)
-
- def _extract_attrs(self, event, gettext_functions, search_text):
- for name, value in event[1][1]:
- if search_text and isinstance(value, basestring):
- if name in self.include_attrs:
- text = value.strip()
- if text:
- yield event[2][1], None, text, []
- else:
- for message in self.extract(_ensure(value), gettext_functions,
- search_text=False):
- yield message
-
-
-class MessageBuffer(object):
- """Helper class for managing internationalized mixed content.
-
- :since: version 0.5
- """
-
- def __init__(self, directive=None):
- """Initialize the message buffer.
-
- :param directive: the directive owning the buffer
- :type directive: I18NDirective
- """
- # params list needs to be copied so that directives can be evaluated
- # more than once
- self.orig_params = self.params = directive.params[:]
- self.directive = directive
- self.string = []
- self.events = {}
- self.values = {}
- self.depth = 1
- self.order = 1
- self.stack = [0]
- self.subdirectives = {}
-
- def append(self, kind, data, pos):
- """Append a stream event to the buffer.
-
- :param kind: the stream event kind
- :param data: the event data
- :param pos: the position of the event in the source
- """
- if kind is SUB:
- # The order needs to be +1 because a new START kind event will
- # happen and we we need to wrap those events into our custom kind(s)
- order = self.stack[-1] + 1
- subdirectives, substream = data
- # Store the directives that should be applied after translation
- self.subdirectives.setdefault(order, []).extend(subdirectives)
- self.events.setdefault(order, []).append((SUB_START, None, pos))
- for skind, sdata, spos in substream:
- self.append(skind, sdata, spos)
- self.events.setdefault(order, []).append((SUB_END, None, pos))
- elif kind is TEXT:
- if '[' in data or ']' in data:
- # Quote [ and ] if it ain't us adding it, ie, if the user is
- # using those chars in his templates, escape them
- data = data.replace('[', '\[').replace(']', '\]')
- self.string.append(data)
- self.events.setdefault(self.stack[-1], []).append((kind, data, pos))
- elif kind is EXPR:
- if self.params:
- param = self.params.pop(0)
- else:
- params = ', '.join(['"%s"' % p for p in self.orig_params if p])
- if params:
- params = "(%s)" % params
- raise IndexError("%d parameters%s given to 'i18n:%s' but "
- "%d or more expressions used in '%s', line %s"
- % (len(self.orig_params), params,
- self.directive.tagname,
- len(self.orig_params) + 1,
- os.path.basename(pos[0] or
- 'In-memory Template'),
- pos[1]))
- self.string.append('%%(%s)s' % param)
- self.events.setdefault(self.stack[-1], []).append((kind, data, pos))
- self.values[param] = (kind, data, pos)
- else:
- if kind is START:
- self.string.append('[%d:' % self.order)
- self.stack.append(self.order)
- self.events.setdefault(self.stack[-1],
- []).append((kind, data, pos))
- self.depth += 1
- self.order += 1
- elif kind is END:
- self.depth -= 1
- if self.depth:
- self.events[self.stack[-1]].append((kind, data, pos))
- self.string.append(']')
- self.stack.pop()
-
- def format(self):
- """Return a message identifier representing the content in the
- buffer.
- """
- return ''.join(self.string).strip()
-
- def translate(self, string, regex=re.compile(r'%\((\w+)\)s')):
- """Interpolate the given message translation with the events in the
- buffer and return the translated stream.
-
- :param string: the translated message string
- """
- substream = None
-
- def yield_parts(string):
- for idx, part in enumerate(regex.split(string)):
- if idx % 2:
- yield self.values[part]
- elif part:
- yield (TEXT,
- part.replace('\[', '[').replace('\]', ']'),
- (None, -1, -1)
- )
-
- parts = parse_msg(string)
- parts_counter = {}
- for order, string in parts:
- parts_counter.setdefault(order, []).append(None)
-
- while parts:
- order, string = parts.pop(0)
- if len(parts_counter[order]) == 1:
- events = self.events[order]
- else:
- events = [self.events[order].pop(0)]
- parts_counter[order].pop()
-
- for event in events:
- if event[0] is SUB_START:
- substream = []
- elif event[0] is SUB_END:
- # Yield a substream which might have directives to be
- # applied to it (after translation events)
- yield SUB, (self.subdirectives[order], substream), event[2]
- substream = None
- elif event[0] is TEXT:
- if string:
- for part in yield_parts(string):
- if substream is not None:
- substream.append(part)
- else:
- yield part
- # String handled, reset it
- string = None
- elif event[0] is START:
- if substream is not None:
- substream.append(event)
- else:
- yield event
- if string:
- for part in yield_parts(string):
- if substream is not None:
- substream.append(part)
- else:
- yield part
- # String handled, reset it
- string = None
- elif event[0] is END:
- if string:
- for part in yield_parts(string):
- if substream is not None:
- substream.append(part)
- else:
- yield part
- # String handled, reset it
- string = None
- if substream is not None:
- substream.append(event)
- else:
- yield event
- elif event[0] is EXPR:
- # These are handled on the strings itself
- continue
- else:
- if string:
- for part in yield_parts(string):
- if substream is not None:
- substream.append(part)
- else:
- yield part
- # String handled, reset it
- string = None
- if substream is not None:
- substream.append(event)
- else:
- yield event
-
-
-def parse_msg(string, regex=re.compile(r'(?:\[(\d+)\:)|(?<!\\)\]')):
- """Parse a translated message using Genshi mixed content message
- formatting.
-
- >>> parse_msg("See [1:Help].")
- [(0, 'See '), (1, 'Help'), (0, '.')]
-
- >>> parse_msg("See [1:our [2:Help] page] for details.")
- [(0, 'See '), (1, 'our '), (2, 'Help'), (1, ' page'), (0, ' for details.')]
-
- >>> parse_msg("[2:Details] finden Sie in [1:Hilfe].")
- [(2, 'Details'), (0, ' finden Sie in '), (1, 'Hilfe'), (0, '.')]
-
- >>> parse_msg("[1:] Bilder pro Seite anzeigen.")
- [(1, ''), (0, ' Bilder pro Seite anzeigen.')]
-
- :param string: the translated message string
- :return: a list of ``(order, string)`` tuples
- :rtype: `list`
- """
- parts = []
- stack = [0]
- while True:
- mo = regex.search(string)
- if not mo:
- break
-
- if mo.start() or stack[-1]:
- parts.append((stack[-1], string[:mo.start()]))
- string = string[mo.end():]
-
- orderno = mo.group(1)
- if orderno is not None:
- stack.append(int(orderno))
- else:
- stack.pop()
- if not stack:
- break
-
- if string:
- parts.append((stack[-1], string))
-
- return parts
-
-
-def extract_from_code(code, gettext_functions):
- """Extract strings from Python bytecode.
-
- >>> from genshi.template.eval import Expression
- >>> expr = Expression('_("Hello")')
- >>> list(extract_from_code(expr, GETTEXT_FUNCTIONS))
- [('_', u'Hello')]
-
- >>> expr = Expression('ngettext("You have %(num)s item", '
- ... '"You have %(num)s items", num)')
- >>> list(extract_from_code(expr, GETTEXT_FUNCTIONS))
- [('ngettext', (u'You have %(num)s item', u'You have %(num)s items', None))]
-
- :param code: the `Code` object
- :type code: `genshi.template.eval.Code`
- :param gettext_functions: a sequence of function names
- :since: version 0.5
- """
- def _walk(node):
- if isinstance(node, _ast.Call) and isinstance(node.func, _ast.Name) \
- and node.func.id in gettext_functions:
- strings = []
- def _add(arg):
- if isinstance(arg, _ast.Str) and isinstance(arg.s, basestring):
- strings.append(unicode(arg.s, 'utf-8'))
- elif arg:
- strings.append(None)
- [_add(arg) for arg in node.args]
- _add(node.starargs)
- _add(node.kwargs)
- if len(strings) == 1:
- strings = strings[0]
- else:
- strings = tuple(strings)
- yield node.func.id, strings
- elif node._fields:
- children = []
- for field in node._fields:
- child = getattr(node, field, None)
- if isinstance(child, list):
- for elem in child:
- children.append(elem)
- elif isinstance(child, _ast.AST):
- children.append(child)
- for child in children:
- for funcname, strings in _walk(child):
- yield funcname, strings
- return _walk(code.ast)
-
-
-def extract(fileobj, keywords, comment_tags, options):
- """Babel extraction method for Genshi templates.
-
- :param fileobj: the file-like object the messages should be extracted from
- :param keywords: a list of keywords (i.e. function names) that should be
- recognized as translation functions
- :param comment_tags: a list of translator tags to search for and include
- in the results
- :param options: a dictionary of additional options (optional)
- :return: an iterator over ``(lineno, funcname, message, comments)`` tuples
- :rtype: ``iterator``
- """
- template_class = options.get('template_class', MarkupTemplate)
- if isinstance(template_class, basestring):
- module, clsname = template_class.split(':', 1)
- template_class = getattr(__import__(module, {}, {}, [clsname]), clsname)
- encoding = options.get('encoding', None)
-
- extract_text = options.get('extract_text', True)
- if isinstance(extract_text, basestring):
- extract_text = extract_text.lower() in ('1', 'on', 'yes', 'true')
-
- ignore_tags = options.get('ignore_tags', Translator.IGNORE_TAGS)
- if isinstance(ignore_tags, basestring):
- ignore_tags = ignore_tags.split()
- ignore_tags = [QName(tag) for tag in ignore_tags]
-
- include_attrs = options.get('include_attrs', Translator.INCLUDE_ATTRS)
- if isinstance(include_attrs, basestring):
- include_attrs = include_attrs.split()
- include_attrs = [QName(attr) for attr in include_attrs]
-
- tmpl = template_class(fileobj, filename=getattr(fileobj, 'name', None),
- encoding=encoding)
- tmpl.loader = None
-
- translator = Translator(None, ignore_tags, include_attrs, extract_text)
- if hasattr(tmpl, 'add_directives'):
- tmpl.add_directives(Translator.NAMESPACE, translator)
- for message in translator.extract(tmpl.stream, gettext_functions=keywords):
- yield message
diff --git a/genshi/filters/transform.py b/genshi/filters/transform.py
deleted file mode 100644
index 9b75b06..0000000
--- a/genshi/filters/transform.py
+++ /dev/null
@@ -1,1310 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Copyright (C) 2007-2009 Edgewall Software
-# All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://genshi.edgewall.org/wiki/License.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For the exact contribution history, see the revision
-# history and logs, available at http://genshi.edgewall.org/log/.
-
-"""A filter for functional-style transformations of markup streams.
-
-The `Transformer` filter provides a variety of transformations that can be
-applied to parts of streams that match given XPath expressions. These
-transformations can be chained to achieve results that would be comparitively
-tedious to achieve by writing stream filters by hand. The approach of chaining
-node selection and transformation has been inspired by the `jQuery`_ Javascript
-library.
-
- .. _`jQuery`: http://jquery.com/
-
-For example, the following transformation removes the ``<title>`` element from
-the ``<head>`` of the input document:
-
->>> from genshi.builder import tag
->>> html = HTML('''<html>
-... <head><title>Some Title</title></head>
-... <body>
-... Some <em>body</em> text.
-... </body>
-... </html>''')
->>> print(html | Transformer('body/em').map(unicode.upper, TEXT)
-... .unwrap().wrap(tag.u))
-<html>
- <head><title>Some Title</title></head>
- <body>
- Some <u>BODY</u> text.
- </body>
-</html>
-
-The ``Transformer`` support a large number of useful transformations out of the
-box, but custom transformations can be added easily.
-
-:since: version 0.5
-"""
-
-import re
-import sys
-
-from genshi.builder import Element
-from genshi.core import Stream, Attrs, QName, TEXT, START, END, _ensure, Markup
-from genshi.path import Path
-
-__all__ = ['Transformer', 'StreamBuffer', 'InjectorTransformation', 'ENTER',
- 'EXIT', 'INSIDE', 'OUTSIDE', 'BREAK']
-
-
-class TransformMark(str):
- """A mark on a transformation stream."""
- __slots__ = []
- _instances = {}
-
- def __new__(cls, val):
- return cls._instances.setdefault(val, str.__new__(cls, val))
-
-
-ENTER = TransformMark('ENTER')
-"""Stream augmentation mark indicating that a selected element is being
-entered."""
-
-INSIDE = TransformMark('INSIDE')
-"""Stream augmentation mark indicating that processing is currently inside a
-selected element."""
-
-OUTSIDE = TransformMark('OUTSIDE')
-"""Stream augmentation mark indicating that a match occurred outside a selected
-element."""
-
-ATTR = TransformMark('ATTR')
-"""Stream augmentation mark indicating a selected element attribute."""
-
-EXIT = TransformMark('EXIT')
-"""Stream augmentation mark indicating that a selected element is being
-exited."""
-
-BREAK = TransformMark('BREAK')
-"""Stream augmentation mark indicating a break between two otherwise contiguous
-blocks of marked events.
-
-This is used primarily by the cut() transform to provide later transforms with
-an opportunity to operate on the cut buffer.
-"""
-
-
-class PushBackStream(object):
- """Allows a single event to be pushed back onto the stream and re-consumed.
- """
- def __init__(self, stream):
- self.stream = iter(stream)
- self.peek = None
-
- def push(self, event):
- assert self.peek is None
- self.peek = event
-
- def __iter__(self):
- while True:
- if self.peek is not None:
- peek = self.peek
- self.peek = None
- yield peek
- else:
- try:
- event = self.stream.next()
- yield event
- except StopIteration:
- if self.peek is None:
- raise
-
-
-class Transformer(object):
- """Stream filter that can apply a variety of different transformations to
- a stream.
-
- This is achieved by selecting the events to be transformed using XPath,
- then applying the transformations to the events matched by the path
- expression. Each marked event is in the form (mark, (kind, data, pos)),
- where mark can be any of `ENTER`, `INSIDE`, `EXIT`, `OUTSIDE`, or `None`.
-
- The first three marks match `START` and `END` events, and any events
- contained `INSIDE` any selected XML/HTML element. A non-element match
- outside a `START`/`END` container (e.g. ``text()``) will yield an `OUTSIDE`
- mark.
-
- >>> html = HTML('<html><head><title>Some Title</title></head>'
- ... '<body>Some <em>body</em> text.</body></html>')
-
- Transformations act on selected stream events matching an XPath expression.
- Here's an example of removing some markup (the title, in this case)
- selected by an expression:
-
- >>> print(html | Transformer('head/title').remove())
- <html><head/><body>Some <em>body</em> text.</body></html>
-
- Inserted content can be passed in the form of a string, or a markup event
- stream, which includes streams generated programmatically via the
- `builder` module:
-
- >>> from genshi.builder import tag
- >>> print(html | Transformer('body').prepend(tag.h1('Document Title')))
- <html><head><title>Some Title</title></head><body><h1>Document
- Title</h1>Some <em>body</em> text.</body></html>
-
- Each XPath expression determines the set of tags that will be acted upon by
- subsequent transformations. In this example we select the ``<title>`` text,
- copy it into a buffer, then select the ``<body>`` element and paste the
- copied text into the body as ``<h1>`` enclosed text:
-
- >>> buffer = StreamBuffer()
- >>> print(html | Transformer('head/title/text()').copy(buffer)
- ... .end().select('body').prepend(tag.h1(buffer)))
- <html><head><title>Some Title</title></head><body><h1>Some Title</h1>Some
- <em>body</em> text.</body></html>
-
- Transformations can also be assigned and reused, although care must be
- taken when using buffers, to ensure that buffers are cleared between
- transforms:
-
- >>> emphasis = Transformer('body//em').attr('class', 'emphasis')
- >>> print(html | emphasis)
- <html><head><title>Some Title</title></head><body>Some <em
- class="emphasis">body</em> text.</body></html>
- """
-
- __slots__ = ['transforms']
-
- def __init__(self, path='.'):
- """Construct a new transformation filter.
-
- :param path: an XPath expression (as string) or a `Path` instance
- """
- self.transforms = [SelectTransformation(path)]
-
- def __call__(self, stream, keep_marks=False):
- """Apply the transform filter to the marked stream.
-
- :param stream: the marked event stream to filter
- :param keep_marks: Do not strip transformer selection marks from the
- stream. Useful for testing.
- :return: the transformed stream
- :rtype: `Stream`
- """
- transforms = self._mark(stream)
- for link in self.transforms:
- transforms = link(transforms)
- if not keep_marks:
- transforms = self._unmark(transforms)
- return Stream(transforms,
- serializer=getattr(stream, 'serializer', None))
-
- def apply(self, function):
- """Apply a transformation to the stream.
-
- Transformations can be chained, similar to stream filters. Any callable
- accepting a marked stream can be used as a transform.
-
- As an example, here is a simple `TEXT` event upper-casing transform:
-
- >>> def upper(stream):
- ... for mark, (kind, data, pos) in stream:
- ... if mark and kind is TEXT:
- ... yield mark, (kind, data.upper(), pos)
- ... else:
- ... yield mark, (kind, data, pos)
- >>> short_stream = HTML('<body>Some <em>test</em> text</body>')
- >>> print(short_stream | Transformer('.//em/text()').apply(upper))
- <body>Some <em>TEST</em> text</body>
- """
- transformer = Transformer()
- transformer.transforms = self.transforms[:]
- if isinstance(function, Transformer):
- transformer.transforms.extend(function.transforms)
- else:
- transformer.transforms.append(function)
- return transformer
-
- #{ Selection operations
-
- def select(self, path):
- """Mark events matching the given XPath expression, within the current
- selection.
-
- >>> html = HTML('<body>Some <em>test</em> text</body>')
- >>> print(html | Transformer().select('.//em').trace())
- (None, ('START', (QName('body'), Attrs()), (None, 1, 0)))
- (None, ('TEXT', u'Some ', (None, 1, 6)))
- ('ENTER', ('START', (QName('em'), Attrs()), (None, 1, 11)))
- ('INSIDE', ('TEXT', u'test', (None, 1, 15)))
- ('EXIT', ('END', QName('em'), (None, 1, 19)))
- (None, ('TEXT', u' text', (None, 1, 24)))
- (None, ('END', QName('body'), (None, 1, 29)))
- <body>Some <em>test</em> text</body>
-
- :param path: an XPath expression (as string) or a `Path` instance
- :return: the stream augmented by transformation marks
- :rtype: `Transformer`
- """
- return self.apply(SelectTransformation(path))
-
- def invert(self):
- """Invert selection so that marked events become unmarked, and vice
- versa.
-
- Specificaly, all marks are converted to null marks, and all null marks
- are converted to OUTSIDE marks.
-
- >>> html = HTML('<body>Some <em>test</em> text</body>')
- >>> print(html | Transformer('//em').invert().trace())
- ('OUTSIDE', ('START', (QName('body'), Attrs()), (None, 1, 0)))
- ('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6)))
- (None, ('START', (QName('em'), Attrs()), (None, 1, 11)))
- (None, ('TEXT', u'test', (None, 1, 15)))
- (None, ('END', QName('em'), (None, 1, 19)))
- ('OUTSIDE', ('TEXT', u' text', (None, 1, 24)))
- ('OUTSIDE', ('END', QName('body'), (None, 1, 29)))
- <body>Some <em>test</em> text</body>
-
- :rtype: `Transformer`
- """
- return self.apply(InvertTransformation())
-
- def end(self):
- """End current selection, allowing all events to be selected.
-
- Example:
-
- >>> html = HTML('<body>Some <em>test</em> text</body>')
- >>> print(html | Transformer('//em').end().trace())
- ('OUTSIDE', ('START', (QName('body'), Attrs()), (None, 1, 0)))
- ('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6)))
- ('OUTSIDE', ('START', (QName('em'), Attrs()), (None, 1, 11)))
- ('OUTSIDE', ('TEXT', u'test', (None, 1, 15)))
- ('OUTSIDE', ('END', QName('em'), (None, 1, 19)))
- ('OUTSIDE', ('TEXT', u' text', (None, 1, 24)))
- ('OUTSIDE', ('END', QName('body'), (None, 1, 29)))
- <body>Some <em>test</em> text</body>
-
- :return: the stream augmented by transformation marks
- :rtype: `Transformer`
- """
- return self.apply(EndTransformation())
-
- #{ Deletion operations
-
- def empty(self):
- """Empty selected elements of all content.
-
- Example:
-
- >>> html = HTML('<html><head><title>Some Title</title></head>'
- ... '<body>Some <em>body</em> text.</body></html>')
- >>> print(html | Transformer('.//em').empty())
- <html><head><title>Some Title</title></head><body>Some <em/>
- text.</body></html>
-
- :rtype: `Transformer`
- """
- return self.apply(EmptyTransformation())
-
- def remove(self):
- """Remove selection from the stream.
-
- Example:
-
- >>> html = HTML('<html><head><title>Some Title</title></head>'
- ... '<body>Some <em>body</em> text.</body></html>')
- >>> print(html | Transformer('.//em').remove())
- <html><head><title>Some Title</title></head><body>Some
- text.</body></html>
-
- :rtype: `Transformer`
- """
- return self.apply(RemoveTransformation())
-
- #{ Direct element operations
-
- def unwrap(self):
- """Remove outermost enclosing elements from selection.
-
- Example:
-
- >>> html = HTML('<html><head><title>Some Title</title></head>'
- ... '<body>Some <em>body</em> text.</body></html>')
- >>> print(html | Transformer('.//em').unwrap())
- <html><head><title>Some Title</title></head><body>Some body
- text.</body></html>
-
- :rtype: `Transformer`
- """
- return self.apply(UnwrapTransformation())
-
- def wrap(self, element):
- """Wrap selection in an element.
-
- >>> html = HTML('<html><head><title>Some Title</title></head>'
- ... '<body>Some <em>body</em> text.</body></html>')
- >>> print(html | Transformer('.//em').wrap('strong'))
- <html><head><title>Some Title</title></head><body>Some
- <strong><em>body</em></strong> text.</body></html>
-
- :param element: either a tag name (as string) or an `Element` object
- :rtype: `Transformer`
- """
- return self.apply(WrapTransformation(element))
-
- #{ Content insertion operations
-
- def replace(self, content):
- """Replace selection with content.
-
- >>> html = HTML('<html><head><title>Some Title</title></head>'
- ... '<body>Some <em>body</em> text.</body></html>')
- >>> print(html | Transformer('.//title/text()').replace('New Title'))
- <html><head><title>New Title</title></head><body>Some <em>body</em>
- text.</body></html>
-
- :param content: Either a callable, an iterable of events, or a string
- to insert.
- :rtype: `Transformer`
- """
- return self.apply(ReplaceTransformation(content))
-
- def before(self, content):
- """Insert content before selection.
-
- In this example we insert the word 'emphasised' before the <em> opening
- tag:
-
- >>> html = HTML('<html><head><title>Some Title</title></head>'
- ... '<body>Some <em>body</em> text.</body></html>')
- >>> print(html | Transformer('.//em').before('emphasised '))
- <html><head><title>Some Title</title></head><body>Some emphasised
- <em>body</em> text.</body></html>
-
- :param content: Either a callable, an iterable of events, or a string
- to insert.
- :rtype: `Transformer`
- """
- return self.apply(BeforeTransformation(content))
-
- def after(self, content):
- """Insert content after selection.
-
- Here, we insert some text after the </em> closing tag:
-
- >>> html = HTML('<html><head><title>Some Title</title></head>'
- ... '<body>Some <em>body</em> text.</body></html>')
- >>> print(html | Transformer('.//em').after(' rock'))
- <html><head><title>Some Title</title></head><body>Some <em>body</em>
- rock text.</body></html>
-
- :param content: Either a callable, an iterable of events, or a string
- to insert.
- :rtype: `Transformer`
- """
- return self.apply(AfterTransformation(content))
-
- def prepend(self, content):
- """Insert content after the ENTER event of the selection.
-
- Inserting some new text at the start of the <body>:
-
- >>> html = HTML('<html><head><title>Some Title</title></head>'
- ... '<body>Some <em>body</em> text.</body></html>')
- >>> print(html | Transformer('.//body').prepend('Some new body text. '))
- <html><head><title>Some Title</title></head><body>Some new body text.
- Some <em>body</em> text.</body></html>
-
- :param content: Either a callable, an iterable of events, or a string
- to insert.
- :rtype: `Transformer`
- """
- return self.apply(PrependTransformation(content))
-
- def append(self, content):
- """Insert content before the END event of the selection.
-
- >>> html = HTML('<html><head><title>Some Title</title></head>'
- ... '<body>Some <em>body</em> text.</body></html>')
- >>> print(html | Transformer('.//body').append(' Some new body text.'))
- <html><head><title>Some Title</title></head><body>Some <em>body</em>
- text. Some new body text.</body></html>
-
- :param content: Either a callable, an iterable of events, or a string
- to insert.
- :rtype: `Transformer`
- """
- return self.apply(AppendTransformation(content))
-
- #{ Attribute manipulation
-
- def attr(self, name, value):
- """Add, replace or delete an attribute on selected elements.
-
- If `value` evaulates to `None` the attribute will be deleted from the
- element:
-
- >>> html = HTML('<html><head><title>Some Title</title></head>'
- ... '<body>Some <em class="before">body</em> <em>text</em>.</body>'
- ... '</html>')
- >>> print(html | Transformer('body/em').attr('class', None))
- <html><head><title>Some Title</title></head><body>Some <em>body</em>
- <em>text</em>.</body></html>
-
- Otherwise the attribute will be set to `value`:
-
- >>> print(html | Transformer('body/em').attr('class', 'emphasis'))
- <html><head><title>Some Title</title></head><body>Some <em
- class="emphasis">body</em> <em class="emphasis">text</em>.</body></html>
-
- If `value` is a callable it will be called with the attribute name and
- the `START` event for the matching element. Its return value will then
- be used to set the attribute:
-
- >>> def print_attr(name, event):
- ... attrs = event[1][1]
- ... print(attrs)
- ... return attrs.get(name)
- >>> print(html | Transformer('body/em').attr('class', print_attr))
- Attrs([(QName('class'), u'before')])
- Attrs()
- <html><head><title>Some Title</title></head><body>Some <em
- class="before">body</em> <em>text</em>.</body></html>
-
- :param name: the name of the attribute
- :param value: the value that should be set for the attribute.
- :rtype: `Transformer`
- """
- return self.apply(AttrTransformation(name, value))
-
- #{ Buffer operations
-
- def copy(self, buffer, accumulate=False):
- """Copy selection into buffer.
-
- The buffer is replaced by each *contiguous* selection before being passed
- to the next transformation. If accumulate=True, further selections will
- be appended to the buffer rather than replacing it.
-
- >>> from genshi.builder import tag
- >>> buffer = StreamBuffer()
- >>> html = HTML('<html><head><title>Some Title</title></head>'
- ... '<body>Some <em>body</em> text.</body></html>')
- >>> print(html | Transformer('head/title/text()').copy(buffer)
- ... .end().select('body').prepend(tag.h1(buffer)))
- <html><head><title>Some Title</title></head><body><h1>Some
- Title</h1>Some <em>body</em> text.</body></html>
-
- This example illustrates that only a single contiguous selection will
- be buffered:
-
- >>> print(html | Transformer('head/title/text()').copy(buffer)
- ... .end().select('body/em').copy(buffer).end().select('body')
- ... .prepend(tag.h1(buffer)))
- <html><head><title>Some Title</title></head><body><h1>Some
- Title</h1>Some <em>body</em> text.</body></html>
- >>> print(buffer)
- <em>body</em>
-
- Element attributes can also be copied for later use:
-
- >>> html = HTML('<html><head><title>Some Title</title></head>'
- ... '<body><em>Some</em> <em class="before">body</em>'
- ... '<em>text</em>.</body></html>')
- >>> buffer = StreamBuffer()
- >>> def apply_attr(name, entry):
- ... return list(buffer)[0][1][1].get('class')
- >>> print(html | Transformer('body/em[@class]/@class').copy(buffer)
- ... .end().buffer().select('body/em[not(@class)]')
- ... .attr('class', apply_attr))
- <html><head><title>Some Title</title></head><body><em
- class="before">Some</em> <em class="before">body</em><em
- class="before">text</em>.</body></html>
-
-
- :param buffer: the `StreamBuffer` in which the selection should be
- stored
- :rtype: `Transformer`
- :note: Copy (and cut) copy each individual selected object into the
- buffer before passing to the next transform. For example, the
- XPath ``*|text()`` will select all elements and text, each
- instance of which will be copied to the buffer individually
- before passing to the next transform. This has implications for
- how ``StreamBuffer`` objects can be used, so some
- experimentation may be required.
-
- """
- return self.apply(CopyTransformation(buffer, accumulate))
-
- def cut(self, buffer, accumulate=False):
- """Copy selection into buffer and remove the selection from the stream.
-
- >>> from genshi.builder import tag
- >>> buffer = StreamBuffer()
- >>> html = HTML('<html><head><title>Some Title</title></head>'
- ... '<body>Some <em>body</em> text.</body></html>')
- >>> print(html | Transformer('.//em/text()').cut(buffer)
- ... .end().select('.//em').after(tag.h1(buffer)))
- <html><head><title>Some Title</title></head><body>Some
- <em/><h1>body</h1> text.</body></html>
-
- Specifying accumulate=True, appends all selected intervals onto the
- buffer. Combining this with the .buffer() operation allows us operate
- on all copied events rather than per-segment. See the documentation on
- buffer() for more information.
-
- :param buffer: the `StreamBuffer` in which the selection should be
- stored
- :rtype: `Transformer`
- :note: this transformation will buffer the entire input stream
- """
- return self.apply(CutTransformation(buffer, accumulate))
-
- def buffer(self):
- """Buffer the entire stream (can consume a considerable amount of
- memory).
-
- Useful in conjunction with copy(accumulate=True) and
- cut(accumulate=True) to ensure that all marked events in the entire
- stream are copied to the buffer before further transformations are
- applied.
-
- For example, to move all <note> elements inside a <notes> tag at the
- top of the document:
-
- >>> doc = HTML('<doc><notes></notes><body>Some <note>one</note> '
- ... 'text <note>two</note>.</body></doc>')
- >>> buffer = StreamBuffer()
- >>> print(doc | Transformer('body/note').cut(buffer, accumulate=True)
- ... .end().buffer().select('notes').prepend(buffer))
- <doc><notes><note>one</note><note>two</note></notes><body>Some text
- .</body></doc>
-
- """
- return self.apply(list)
-
- #{ Miscellaneous operations
-
- def filter(self, filter):
- """Apply a normal stream filter to the selection. The filter is called
- once for each contiguous block of marked events.
-
- >>> from genshi.filters.html import HTMLSanitizer
- >>> html = HTML('<html><body>Some text<script>alert(document.cookie)'
- ... '</script> and some more text</body></html>')
- >>> print(html | Transformer('body/*').filter(HTMLSanitizer()))
- <html><body>Some text and some more text</body></html>
-
- :param filter: The stream filter to apply.
- :rtype: `Transformer`
- """
- return self.apply(FilterTransformation(filter))
-
- def map(self, function, kind):
- """Applies a function to the ``data`` element of events of ``kind`` in
- the selection.
-
- >>> html = HTML('<html><head><title>Some Title</title></head>'
- ... '<body>Some <em>body</em> text.</body></html>')
- >>> print(html | Transformer('head/title').map(unicode.upper, TEXT))
- <html><head><title>SOME TITLE</title></head><body>Some <em>body</em>
- text.</body></html>
-
- :param function: the function to apply
- :param kind: the kind of event the function should be applied to
- :rtype: `Transformer`
- """
- return self.apply(MapTransformation(function, kind))
-
- def substitute(self, pattern, replace, count=1):
- """Replace text matching a regular expression.
-
- Refer to the documentation for ``re.sub()`` for details.
-
- >>> html = HTML('<html><body>Some text, some more text and '
- ... '<b>some bold text</b>\\n'
- ... '<i>some italicised text</i></body></html>')
- >>> print(html | Transformer('body/b').substitute('(?i)some', 'SOME'))
- <html><body>Some text, some more text and <b>SOME bold text</b>
- <i>some italicised text</i></body></html>
- >>> tags = tag.html(tag.body('Some text, some more text and\\n',
- ... Markup('<b>some bold text</b>')))
- >>> print(tags.generate() | Transformer('body').substitute(
- ... '(?i)some', 'SOME'))
- <html><body>SOME text, some more text and
- <b>SOME bold text</b></body></html>
-
- :param pattern: A regular expression object or string.
- :param replace: Replacement pattern.
- :param count: Number of replacements to make in each text fragment.
- :rtype: `Transformer`
- """
- return self.apply(SubstituteTransformation(pattern, replace, count))
-
- def rename(self, name):
- """Rename matching elements.
-
- >>> html = HTML('<html><body>Some text, some more text and '
- ... '<b>some bold text</b></body></html>')
- >>> print(html | Transformer('body/b').rename('strong'))
- <html><body>Some text, some more text and <strong>some bold text</strong></body></html>
- """
- return self.apply(RenameTransformation(name))
-
- def trace(self, prefix='', fileobj=None):
- """Print events as they pass through the transform.
-
- >>> html = HTML('<body>Some <em>test</em> text</body>')
- >>> print(html | Transformer('em').trace())
- (None, ('START', (QName('body'), Attrs()), (None, 1, 0)))
- (None, ('TEXT', u'Some ', (None, 1, 6)))
- ('ENTER', ('START', (QName('em'), Attrs()), (None, 1, 11)))
- ('INSIDE', ('TEXT', u'test', (None, 1, 15)))
- ('EXIT', ('END', QName('em'), (None, 1, 19)))
- (None, ('TEXT', u' text', (None, 1, 24)))
- (None, ('END', QName('body'), (None, 1, 29)))
- <body>Some <em>test</em> text</body>
-
- :param prefix: a string to prefix each event with in the output
- :param fileobj: the writable file-like object to write to; defaults to
- the standard output stream
- :rtype: `Transformer`
- """
- return self.apply(TraceTransformation(prefix, fileobj=fileobj))
-
- # Internal methods
-
- def _mark(self, stream):
- for event in stream:
- yield OUTSIDE, event
-
- def _unmark(self, stream):
- for mark, event in stream:
- kind = event[0]
- if not (kind is None or kind is ATTR or kind is BREAK):
- yield event
-
-
-class SelectTransformation(object):
- """Select and mark events that match an XPath expression."""
-
- def __init__(self, path):
- """Create selection.
-
- :param path: an XPath expression (as string) or a `Path` object
- """
- if not isinstance(path, Path):
- path = Path(path)
- self.path = path
-
- def __call__(self, stream):
- """Apply the transform filter to the marked stream.
-
- :param stream: the marked event stream to filter
- """
- namespaces = {}
- variables = {}
- test = self.path.test()
- stream = iter(stream)
- next = stream.next
- for mark, event in stream:
- if mark is None:
- yield mark, event
- continue
- result = test(event, namespaces, variables)
- # XXX This is effectively genshi.core._ensure() for transform
- # streams.
- if result is True:
- if event[0] is START:
- yield ENTER, event
- depth = 1
- while depth > 0:
- mark, subevent = next()
- if subevent[0] is START:
- depth += 1
- elif subevent[0] is END:
- depth -= 1
- if depth == 0:
- yield EXIT, subevent
- else:
- yield INSIDE, subevent
- test(subevent, namespaces, variables, updateonly=True)
- else:
- yield OUTSIDE, event
- elif isinstance(result, Attrs):
- # XXX Selected *attributes* are given a "kind" of None to
- # indicate they are not really part of the stream.
- yield ATTR, (ATTR, (QName(event[1][0] + '@*'), result), event[2])
- yield None, event
- elif isinstance(result, tuple):
- yield OUTSIDE, result
- elif result:
- # XXX Assume everything else is "text"?
- yield None, (TEXT, unicode(result), (None, -1, -1))
- else:
- yield None, event
-
-
-class InvertTransformation(object):
- """Invert selection so that marked events become unmarked, and vice versa.
-
- Specificaly, all input marks are converted to null marks, and all input
- null marks are converted to OUTSIDE marks.
- """
-
- def __call__(self, stream):
- """Apply the transform filter to the marked stream.
-
- :param stream: the marked event stream to filter
- """
- for mark, event in stream:
- if mark:
- yield None, event
- else:
- yield OUTSIDE, event
-
-
-class EndTransformation(object):
- """End the current selection."""
-
- def __call__(self, stream):
- """Apply the transform filter to the marked stream.
-
- :param stream: the marked event stream to filter
- """
- for mark, event in stream:
- yield OUTSIDE, event
-
-
-class EmptyTransformation(object):
- """Empty selected elements of all content."""
-
- def __call__(self, stream):
- """Apply the transform filter to the marked stream.
-
- :param stream: the marked event stream to filter
- """
- for mark, event in stream:
- yield mark, event
- if mark is ENTER:
- for mark, event in stream:
- if mark is EXIT:
- yield mark, event
- break
-
-
-class RemoveTransformation(object):
- """Remove selection from the stream."""
-
- def __call__(self, stream):
- """Apply the transform filter to the marked stream.
-
- :param stream: the marked event stream to filter
- """
- for mark, event in stream:
- if mark is None:
- yield mark, event
-
-
-class UnwrapTransformation(object):
- """Remove outtermost enclosing elements from selection."""
-
- def __call__(self, stream):
- """Apply the transform filter to the marked stream.
-
- :param stream: the marked event stream to filter
- """
- for mark, event in stream:
- if mark not in (ENTER, EXIT):
- yield mark, event
-
-
-class WrapTransformation(object):
- """Wrap selection in an element."""
-
- def __init__(self, element):
- if isinstance(element, Element):
- self.element = element
- else:
- self.element = Element(element)
-
- def __call__(self, stream):
- for mark, event in stream:
- if mark:
- element = list(self.element.generate())
- for prefix in element[:-1]:
- yield None, prefix
- yield mark, event
- start = mark
- stopped = False
- for mark, event in stream:
- if start is ENTER and mark is EXIT:
- yield mark, event
- stopped = True
- break
- if not mark:
- break
- yield mark, event
- else:
- stopped = True
- yield None, element[-1]
- if not stopped:
- yield mark, event
- else:
- yield mark, event
-
-
-class TraceTransformation(object):
- """Print events as they pass through the transform."""
-
- def __init__(self, prefix='', fileobj=None):
- """Trace constructor.
-
- :param prefix: text to prefix each traced line with.
- :param fileobj: the writable file-like object to write to
- """
- self.prefix = prefix
- self.fileobj = fileobj or sys.stdout
-
- def __call__(self, stream):
- """Apply the transform filter to the marked stream.
-
- :param stream: the marked event stream to filter
- """
- for event in stream:
- self.fileobj.write('%s%s\n' % (self.prefix, event))
- yield event
-
-
-class FilterTransformation(object):
- """Apply a normal stream filter to the selection. The filter is called once
- for each selection."""
-
- def __init__(self, filter):
- """Create the transform.
-
- :param filter: The stream filter to apply.
- """
- self.filter = filter
-
- def __call__(self, stream):
- """Apply the transform filter to the marked stream.
-
- :param stream: The marked event stream to filter
- """
- def flush(queue):
- if queue:
- for event in self.filter(queue):
- yield OUTSIDE, event
- del queue[:]
-
- queue = []
- for mark, event in stream:
- if mark is ENTER:
- queue.append(event)
- for mark, event in stream:
- queue.append(event)
- if mark is EXIT:
- break
- for queue_event in flush(queue):
- yield queue_event
- elif mark is OUTSIDE:
- stopped = False
- queue.append(event)
- for mark, event in stream:
- if mark is not OUTSIDE:
- break
- queue.append(event)
- else:
- stopped = True
- for queue_event in flush(queue):
- yield queue_event
- if not stopped:
- yield mark, event
- else:
- yield mark, event
- for queue_event in flush(queue):
- yield queue_event
-
-
-class MapTransformation(object):
- """Apply a function to the `data` element of events of ``kind`` in the
- selection.
- """
-
- def __init__(self, function, kind):
- """Create the transform.
-
- :param function: the function to apply; the function must take one
- argument, the `data` element of each selected event
- :param kind: the stream event ``kind`` to apply the `function` to
- """
- self.function = function
- self.kind = kind
-
- def __call__(self, stream):
- """Apply the transform filter to the marked stream.
-
- :param stream: The marked event stream to filter
- """
- for mark, (kind, data, pos) in stream:
- if mark and self.kind in (None, kind):
- yield mark, (kind, self.function(data), pos)
- else:
- yield mark, (kind, data, pos)
-
-
-class SubstituteTransformation(object):
- """Replace text matching a regular expression.
-
- Refer to the documentation for ``re.sub()`` for details.
- """
- def __init__(self, pattern, replace, count=0):
- """Create the transform.
-
- :param pattern: A regular expression object, or string.
- :param replace: Replacement pattern.
- :param count: Number of replacements to make in each text fragment.
- """
- if isinstance(pattern, basestring):
- self.pattern = re.compile(pattern)
- else:
- self.pattern = pattern
- self.count = count
- self.replace = replace
-
- def __call__(self, stream):
- """Apply the transform filter to the marked stream.
-
- :param stream: The marked event stream to filter
- """
- for mark, (kind, data, pos) in stream:
- if mark is not None and kind is TEXT:
- new_data = self.pattern.sub(self.replace, data, self.count)
- if isinstance(data, Markup):
- data = Markup(new_data)
- else:
- data = new_data
- yield mark, (kind, data, pos)
-
-
-class RenameTransformation(object):
- """Rename matching elements."""
- def __init__(self, name):
- """Create the transform.
-
- :param name: New element name.
- """
- self.name = QName(name)
-
- def __call__(self, stream):
- """Apply the transform filter to the marked stream.
-
- :param stream: The marked event stream to filter
- """
- for mark, (kind, data, pos) in stream:
- if mark is ENTER:
- data = self.name, data[1]
- elif mark is EXIT:
- data = self.name
- yield mark, (kind, data, pos)
-
-
-class InjectorTransformation(object):
- """Abstract base class for transformations that inject content into a
- stream.
-
- >>> class Top(InjectorTransformation):
- ... def __call__(self, stream):
- ... for event in self._inject():
- ... yield event
- ... for event in stream:
- ... yield event
- >>> html = HTML('<body>Some <em>test</em> text</body>')
- >>> print(html | Transformer('.//em').apply(Top('Prefix ')))
- Prefix <body>Some <em>test</em> text</body>
- """
- def __init__(self, content):
- """Create a new injector.
-
- :param content: An iterable of Genshi stream events, or a string to be
- injected.
- """
- self.content = content
-
- def _inject(self):
- content = self.content
- if hasattr(content, '__call__'):
- content = content()
- for event in _ensure(content):
- yield None, event
-
-
-class ReplaceTransformation(InjectorTransformation):
- """Replace selection with content."""
-
- def __call__(self, stream):
- """Apply the transform filter to the marked stream.
-
- :param stream: The marked event stream to filter
- """
- stream = PushBackStream(stream)
- for mark, event in stream:
- if mark is not None:
- start = mark
- for subevent in self._inject():
- yield subevent
- for mark, event in stream:
- if start is ENTER:
- if mark is EXIT:
- break
- elif mark != start:
- stream.push((mark, event))
- break
- else:
- yield mark, event
-
-
-class BeforeTransformation(InjectorTransformation):
- """Insert content before selection."""
-
- def __call__(self, stream):
- """Apply the transform filter to the marked stream.
-
- :param stream: The marked event stream to filter
- """
- stream = PushBackStream(stream)
- for mark, event in stream:
- if mark is not None:
- start = mark
- for subevent in self._inject():
- yield subevent
- yield mark, event
- for mark, event in stream:
- if mark != start and start is not ENTER:
- stream.push((mark, event))
- break
- yield mark, event
- if start is ENTER and mark is EXIT:
- break
- else:
- yield mark, event
-
-
-class AfterTransformation(InjectorTransformation):
- """Insert content after selection."""
-
- def __call__(self, stream):
- """Apply the transform filter to the marked stream.
-
- :param stream: The marked event stream to filter
- """
- stream = PushBackStream(stream)
- for mark, event in stream:
- yield mark, event
- if mark:
- start = mark
- for mark, event in stream:
- if start is not ENTER and mark != start:
- stream.push((mark, event))
- break
- yield mark, event
- if start is ENTER and mark is EXIT:
- break
- for subevent in self._inject():
- yield subevent
-
-
-class PrependTransformation(InjectorTransformation):
- """Prepend content to the inside of selected elements."""
-
- def __call__(self, stream):
- """Apply the transform filter to the marked stream.
-
- :param stream: The marked event stream to filter
- """
- for mark, event in stream:
- yield mark, event
- if mark is ENTER:
- for subevent in self._inject():
- yield subevent
-
-
-class AppendTransformation(InjectorTransformation):
- """Append content after the content of selected elements."""
-
- def __call__(self, stream):
- """Apply the transform filter to the marked stream.
-
- :param stream: The marked event stream to filter
- """
- for mark, event in stream:
- yield mark, event
- if mark is ENTER:
- for mark, event in stream:
- if mark is EXIT:
- break
- yield mark, event
- for subevent in self._inject():
- yield subevent
- yield mark, event
-
-
-class AttrTransformation(object):
- """Set an attribute on selected elements."""
-
- def __init__(self, name, value):
- """Construct transform.
-
- :param name: name of the attribute that should be set
- :param value: the value to set
- """
- self.name = name
- self.value = value
-
- def __call__(self, stream):
- """Apply the transform filter to the marked stream.
-
- :param stream: The marked event stream to filter
- """
- callable_value = hasattr(self.value, '__call__')
- for mark, (kind, data, pos) in stream:
- if mark is ENTER:
- if callable_value:
- value = self.value(self.name, (kind, data, pos))
- else:
- value = self.value
- if value is None:
- attrs = data[1] - [QName(self.name)]
- else:
- attrs = data[1] | [(QName(self.name), value)]
- data = (data[0], attrs)
- yield mark, (kind, data, pos)
-
-
-
-class StreamBuffer(Stream):
- """Stream event buffer used for cut and copy transformations."""
-
- def __init__(self):
- """Create the buffer."""
- Stream.__init__(self, [])
-
- def append(self, event):
- """Add an event to the buffer.
-
- :param event: the markup event to add
- """
- self.events.append(event)
-
- def reset(self):
- """Empty the buffer of events."""
- del self.events[:]
-
-
-class CopyTransformation(object):
- """Copy selected events into a buffer for later insertion."""
-
- def __init__(self, buffer, accumulate=False):
- """Create the copy transformation.
-
- :param buffer: the `StreamBuffer` in which the selection should be
- stored
- """
- if not accumulate:
- buffer.reset()
- self.buffer = buffer
- self.accumulate = accumulate
-
- def __call__(self, stream):
- """Apply the transformation to the marked stream.
-
- :param stream: the marked event stream to filter
- """
- stream = PushBackStream(stream)
-
- for mark, event in stream:
- if mark:
- if not self.accumulate:
- self.buffer.reset()
- events = [(mark, event)]
- self.buffer.append(event)
- start = mark
- for mark, event in stream:
- if start is not ENTER and mark != start:
- stream.push((mark, event))
- break
- events.append((mark, event))
- self.buffer.append(event)
- if start is ENTER and mark is EXIT:
- break
- for i in events:
- yield i
- else:
- yield mark, event
-
-
-class CutTransformation(object):
- """Cut selected events into a buffer for later insertion and remove the
- selection.
- """
-
- def __init__(self, buffer, accumulate=False):
- """Create the cut transformation.
-
- :param buffer: the `StreamBuffer` in which the selection should be
- stored
- """
- self.buffer = buffer
- self.accumulate = accumulate
-
-
- def __call__(self, stream):
- """Apply the transform filter to the marked stream.
-
- :param stream: the marked event stream to filter
- """
- attributes = []
- stream = PushBackStream(stream)
- broken = False
- if not self.accumulate:
- self.buffer.reset()
- for mark, event in stream:
- if mark:
- # Send a BREAK event if there was no other event sent between
- if not self.accumulate:
- if not broken and self.buffer:
- yield BREAK, (BREAK, None, None)
- self.buffer.reset()
- self.buffer.append(event)
- start = mark
- if mark is ATTR:
- attributes.extend([name for name, _ in event[1][1]])
- for mark, event in stream:
- if start is mark is ATTR:
- attributes.extend([name for name, _ in event[1][1]])
- # Handle non-element contiguous selection
- if start is not ENTER and mark != start:
- # Operating on the attributes of a START event
- if start is ATTR:
- kind, data, pos = event
- assert kind is START
- data = (data[0], data[1] - attributes)
- attributes = None
- stream.push((mark, (kind, data, pos)))
- else:
- stream.push((mark, event))
- break
- self.buffer.append(event)
- if start is ENTER and mark is EXIT:
- break
- broken = False
- else:
- broken = True
- yield mark, event
- if not broken and self.buffer:
- yield BREAK, (BREAK, None, None)
diff --git a/genshi/input.py b/genshi/input.py
deleted file mode 100644
index 039e5e5..0000000
--- a/genshi/input.py
+++ /dev/null
@@ -1,443 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Copyright (C) 2006-2009 Edgewall Software
-# All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://genshi.edgewall.org/wiki/License.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For the exact contribution history, see the revision
-# history and logs, available at http://genshi.edgewall.org/log/.
-
-"""Support for constructing markup streams from files, strings, or other
-sources.
-"""
-
-from itertools import chain
-import htmlentitydefs as entities
-import HTMLParser as html
-from StringIO import StringIO
-from xml.parsers import expat
-
-from genshi.core import Attrs, QName, Stream, stripentities
-from genshi.core import START, END, XML_DECL, DOCTYPE, TEXT, START_NS, \
- END_NS, START_CDATA, END_CDATA, PI, COMMENT
-
-__all__ = ['ET', 'ParseError', 'XMLParser', 'XML', 'HTMLParser', 'HTML']
-__docformat__ = 'restructuredtext en'
-
-
-def ET(element):
- """Convert a given ElementTree element to a markup stream.
-
- :param element: an ElementTree element
- :return: a markup stream
- """
- tag_name = QName(element.tag.lstrip('{'))
- attrs = Attrs([(QName(attr.lstrip('{')), value)
- for attr, value in element.items()])
-
- yield START, (tag_name, attrs), (None, -1, -1)
- if element.text:
- yield TEXT, element.text, (None, -1, -1)
- for child in element.getchildren():
- for item in ET(child):
- yield item
- yield END, tag_name, (None, -1, -1)
- if element.tail:
- yield TEXT, element.tail, (None, -1, -1)
-
-
-class ParseError(Exception):
- """Exception raised when fatal syntax errors are found in the input being
- parsed.
- """
-
- def __init__(self, message, filename=None, lineno=-1, offset=-1):
- """Exception initializer.
-
- :param message: the error message from the parser
- :param filename: the path to the file that was parsed
- :param lineno: the number of the line on which the error was encountered
- :param offset: the column number where the error was encountered
- """
- self.msg = message
- if filename:
- message += ', in ' + filename
- Exception.__init__(self, message)
- self.filename = filename or '<string>'
- self.lineno = lineno
- self.offset = offset
-
-
-class XMLParser(object):
- """Generator-based XML parser based on roughly equivalent code in
- Kid/ElementTree.
-
- The parsing is initiated by iterating over the parser object:
-
- >>> parser = XMLParser(StringIO('<root id="2"><child>Foo</child></root>'))
- >>> for kind, data, pos in parser:
- ... print('%s %s' % (kind, data))
- START (QName('root'), Attrs([(QName('id'), u'2')]))
- START (QName('child'), Attrs())
- TEXT Foo
- END child
- END root
- """
-
- _entitydefs = ['<!ENTITY %s "&#%d;">' % (name, value) for name, value in
- entities.name2codepoint.items()]
- _external_dtd = '\n'.join(_entitydefs)
-
- def __init__(self, source, filename=None, encoding=None):
- """Initialize the parser for the given XML input.
-
- :param source: the XML text as a file-like object
- :param filename: the name of the file, if appropriate
- :param encoding: the encoding of the file; if not specified, the
- encoding is assumed to be ASCII, UTF-8, or UTF-16, or
- whatever the encoding specified in the XML declaration
- (if any)
- """
- self.source = source
- self.filename = filename
-
- # Setup the Expat parser
- parser = expat.ParserCreate(encoding, '}')
- parser.buffer_text = True
- parser.returns_unicode = True
- parser.ordered_attributes = True
-
- parser.StartElementHandler = self._handle_start
- parser.EndElementHandler = self._handle_end
- parser.CharacterDataHandler = self._handle_data
- parser.StartDoctypeDeclHandler = self._handle_doctype
- parser.StartNamespaceDeclHandler = self._handle_start_ns
- parser.EndNamespaceDeclHandler = self._handle_end_ns
- parser.StartCdataSectionHandler = self._handle_start_cdata
- parser.EndCdataSectionHandler = self._handle_end_cdata
- parser.ProcessingInstructionHandler = self._handle_pi
- parser.XmlDeclHandler = self._handle_xml_decl
- parser.CommentHandler = self._handle_comment
-
- # Tell Expat that we'll handle non-XML entities ourselves
- # (in _handle_other)
- parser.DefaultHandler = self._handle_other
- parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
- parser.UseForeignDTD()
- parser.ExternalEntityRefHandler = self._build_foreign
-
- self.expat = parser
- self._queue = []
-
- def parse(self):
- """Generator that parses the XML source, yielding markup events.
-
- :return: a markup event stream
- :raises ParseError: if the XML text is not well formed
- """
- def _generate():
- try:
- bufsize = 4 * 1024 # 4K
- done = False
- while 1:
- while not done and len(self._queue) == 0:
- data = self.source.read(bufsize)
- if data == '': # end of data
- if hasattr(self, 'expat'):
- self.expat.Parse('', True)
- del self.expat # get rid of circular references
- done = True
- else:
- if isinstance(data, unicode):
- data = data.encode('utf-8')
- self.expat.Parse(data, False)
- for event in self._queue:
- yield event
- self._queue = []
- if done:
- break
- except expat.ExpatError, e:
- msg = str(e)
- raise ParseError(msg, self.filename, e.lineno, e.offset)
- return Stream(_generate()).filter(_coalesce)
-
- def __iter__(self):
- return iter(self.parse())
-
- def _build_foreign(self, context, base, sysid, pubid):
- parser = self.expat.ExternalEntityParserCreate(context)
- parser.ParseFile(StringIO(self._external_dtd))
- return 1
-
- def _enqueue(self, kind, data=None, pos=None):
- if pos is None:
- pos = self._getpos()
- if kind is TEXT:
- # Expat reports the *end* of the text event as current position. We
- # try to fix that up here as much as possible. Unfortunately, the
- # offset is only valid for single-line text. For multi-line text,
- # it is apparently not possible to determine at what offset it
- # started
- if '\n' in data:
- lines = data.splitlines()
- lineno = pos[1] - len(lines) + 1
- offset = -1
- else:
- lineno = pos[1]
- offset = pos[2] - len(data)
- pos = (pos[0], lineno, offset)
- self._queue.append((kind, data, pos))
-
- def _getpos_unknown(self):
- return (self.filename, -1, -1)
-
- def _getpos(self):
- return (self.filename, self.expat.CurrentLineNumber,
- self.expat.CurrentColumnNumber)
-
- def _handle_start(self, tag, attrib):
- attrs = Attrs([(QName(name), value) for name, value in
- zip(*[iter(attrib)] * 2)])
- self._enqueue(START, (QName(tag), attrs))
-
- def _handle_end(self, tag):
- self._enqueue(END, QName(tag))
-
- def _handle_data(self, text):
- self._enqueue(TEXT, text)
-
- def _handle_xml_decl(self, version, encoding, standalone):
- self._enqueue(XML_DECL, (version, encoding, standalone))
-
- def _handle_doctype(self, name, sysid, pubid, has_internal_subset):
- self._enqueue(DOCTYPE, (name, pubid, sysid))
-
- def _handle_start_ns(self, prefix, uri):
- self._enqueue(START_NS, (prefix or '', uri))
-
- def _handle_end_ns(self, prefix):
- self._enqueue(END_NS, prefix or '')
-
- def _handle_start_cdata(self):
- self._enqueue(START_CDATA)
-
- def _handle_end_cdata(self):
- self._enqueue(END_CDATA)
-
- def _handle_pi(self, target, data):
- self._enqueue(PI, (target, data))
-
- def _handle_comment(self, text):
- self._enqueue(COMMENT, text)
-
- def _handle_other(self, text):
- if text.startswith('&'):
- # deal with undefined entities
- try:
- text = unichr(entities.name2codepoint[text[1:-1]])
- self._enqueue(TEXT, text)
- except KeyError:
- filename, lineno, offset = self._getpos()
- error = expat.error('undefined entity "%s": line %d, column %d'
- % (text, lineno, offset))
- error.code = expat.errors.XML_ERROR_UNDEFINED_ENTITY
- error.lineno = lineno
- error.offset = offset
- raise error
-
-
-def XML(text):
- """Parse the given XML source and return a markup stream.
-
- Unlike with `XMLParser`, the returned stream is reusable, meaning it can be
- iterated over multiple times:
-
- >>> xml = XML('<doc><elem>Foo</elem><elem>Bar</elem></doc>')
- >>> print(xml)
- <doc><elem>Foo</elem><elem>Bar</elem></doc>
- >>> print(xml.select('elem'))
- <elem>Foo</elem><elem>Bar</elem>
- >>> print(xml.select('elem/text()'))
- FooBar
-
- :param text: the XML source
- :return: the parsed XML event stream
- :raises ParseError: if the XML text is not well-formed
- """
- return Stream(list(XMLParser(StringIO(text))))
-
-
-class HTMLParser(html.HTMLParser, object):
- """Parser for HTML input based on the Python `HTMLParser` module.
-
- This class provides the same interface for generating stream events as
- `XMLParser`, and attempts to automatically balance tags.
-
- The parsing is initiated by iterating over the parser object:
-
- >>> parser = HTMLParser(StringIO('<UL compact><LI>Foo</UL>'))
- >>> for kind, data, pos in parser:
- ... print('%s %s' % (kind, data))
- START (QName('ul'), Attrs([(QName('compact'), u'compact')]))
- START (QName('li'), Attrs())
- TEXT Foo
- END li
- END ul
- """
-
- _EMPTY_ELEMS = frozenset(['area', 'base', 'basefont', 'br', 'col', 'frame',
- 'hr', 'img', 'input', 'isindex', 'link', 'meta',
- 'param'])
-
- def __init__(self, source, filename=None, encoding='utf-8'):
- """Initialize the parser for the given HTML input.
-
- :param source: the HTML text as a file-like object
- :param filename: the name of the file, if known
- :param filename: encoding of the file; ignored if the input is unicode
- """
- html.HTMLParser.__init__(self)
- self.source = source
- self.filename = filename
- self.encoding = encoding
- self._queue = []
- self._open_tags = []
-
- def parse(self):
- """Generator that parses the HTML source, yielding markup events.
-
- :return: a markup event stream
- :raises ParseError: if the HTML text is not well formed
- """
- def _generate():
- try:
- bufsize = 4 * 1024 # 4K
- done = False
- while 1:
- while not done and len(self._queue) == 0:
- data = self.source.read(bufsize)
- if data == '': # end of data
- self.close()
- done = True
- else:
- self.feed(data)
- for kind, data, pos in self._queue:
- yield kind, data, pos
- self._queue = []
- if done:
- open_tags = self._open_tags
- open_tags.reverse()
- for tag in open_tags:
- yield END, QName(tag), pos
- break
- except html.HTMLParseError, e:
- msg = '%s: line %d, column %d' % (e.msg, e.lineno, e.offset)
- raise ParseError(msg, self.filename, e.lineno, e.offset)
- return Stream(_generate()).filter(_coalesce)
-
- def __iter__(self):
- return iter(self.parse())
-
- def _enqueue(self, kind, data, pos=None):
- if pos is None:
- pos = self._getpos()
- self._queue.append((kind, data, pos))
-
- def _getpos(self):
- lineno, column = self.getpos()
- return (self.filename, lineno, column)
-
- def handle_starttag(self, tag, attrib):
- fixed_attrib = []
- for name, value in attrib: # Fixup minimized attributes
- if value is None:
- value = unicode(name)
- elif not isinstance(value, unicode):
- value = value.decode(self.encoding, 'replace')
- fixed_attrib.append((QName(name), stripentities(value)))
-
- self._enqueue(START, (QName(tag), Attrs(fixed_attrib)))
- if tag in self._EMPTY_ELEMS:
- self._enqueue(END, QName(tag))
- else:
- self._open_tags.append(tag)
-
- def handle_endtag(self, tag):
- if tag not in self._EMPTY_ELEMS:
- while self._open_tags:
- open_tag = self._open_tags.pop()
- self._enqueue(END, QName(open_tag))
- if open_tag.lower() == tag.lower():
- break
-
- def handle_data(self, text):
- if not isinstance(text, unicode):
- text = text.decode(self.encoding, 'replace')
- self._enqueue(TEXT, text)
-
- def handle_charref(self, name):
- if name.lower().startswith('x'):
- text = unichr(int(name[1:], 16))
- else:
- text = unichr(int(name))
- self._enqueue(TEXT, text)
-
- def handle_entityref(self, name):
- try:
- text = unichr(entities.name2codepoint[name])
- except KeyError:
- text = '&%s;' % name
- self._enqueue(TEXT, text)
-
- def handle_pi(self, data):
- target, data = data.split(None, 1)
- if data.endswith('?'):
- data = data[:-1]
- self._enqueue(PI, (target.strip(), data.strip()))
-
- def handle_comment(self, text):
- self._enqueue(COMMENT, text)
-
-
-def HTML(text, encoding='utf-8'):
- """Parse the given HTML source and return a markup stream.
-
- Unlike with `HTMLParser`, the returned stream is reusable, meaning it can be
- iterated over multiple times:
-
- >>> html = HTML('<body><h1>Foo</h1></body>')
- >>> print(html)
- <body><h1>Foo</h1></body>
- >>> print(html.select('h1'))
- <h1>Foo</h1>
- >>> print(html.select('h1/text()'))
- Foo
-
- :param text: the HTML source
- :return: the parsed XML event stream
- :raises ParseError: if the HTML text is not well-formed, and error recovery
- fails
- """
- return Stream(list(HTMLParser(StringIO(text), encoding=encoding)))
-
-
-def _coalesce(stream):
- """Coalesces adjacent TEXT events into a single event."""
- textbuf = []
- textpos = None
- for kind, data, pos in chain(stream, [(None, None, None)]):
- if kind is TEXT:
- textbuf.append(data)
- if textpos is None:
- textpos = pos
- else:
- if textbuf:
- yield TEXT, ''.join(textbuf), textpos
- del textbuf[:]
- textpos = None
- if kind:
- yield kind, data, pos
diff --git a/genshi/output.py b/genshi/output.py
deleted file mode 100644
index 2ebb38b..0000000
--- a/genshi/output.py
+++ /dev/null
@@ -1,838 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Copyright (C) 2006-2009 Edgewall Software
-# All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://genshi.edgewall.org/wiki/License.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For the exact contribution history, see the revision
-# history and logs, available at http://genshi.edgewall.org/log/.
-
-"""This module provides different kinds of serialization methods for XML event
-streams.
-"""
-
-from itertools import chain
-import re
-
-from genshi.core import escape, Attrs, Markup, Namespace, QName, StreamEventKind
-from genshi.core import START, END, TEXT, XML_DECL, DOCTYPE, START_NS, END_NS, \
- START_CDATA, END_CDATA, PI, COMMENT, XML_NAMESPACE
-
-__all__ = ['encode', 'get_serializer', 'DocType', 'XMLSerializer',
- 'XHTMLSerializer', 'HTMLSerializer', 'TextSerializer']
-__docformat__ = 'restructuredtext en'
-
-
-def encode(iterator, method='xml', encoding='utf-8', out=None):
- """Encode serializer output into a string.
-
- :param iterator: the iterator returned from serializing a stream (basically
- any iterator that yields unicode objects)
- :param method: the serialization method; determines how characters not
- representable in the specified encoding are treated
- :param encoding: how the output string should be encoded; if set to `None`,
- this method returns a `unicode` object
- :param out: a file-like object that the output should be written to
- instead of being returned as one big string; note that if
- this is a file or socket (or similar), the `encoding` must
- not be `None` (that is, the output must be encoded)
- :return: a `str` or `unicode` object (depending on the `encoding`
- parameter), or `None` if the `out` parameter is provided
-
- :since: version 0.4.1
- :note: Changed in 0.5: added the `out` parameter
- """
- if encoding is not None:
- errors = 'replace'
- if method != 'text' and not isinstance(method, TextSerializer):
- errors = 'xmlcharrefreplace'
- _encode = lambda string: string.encode(encoding, errors)
- else:
- _encode = lambda string: string
- if out is None:
- return _encode(''.join(list(iterator)))
- for chunk in iterator:
- out.write(_encode(chunk))
-
-
-def get_serializer(method='xml', **kwargs):
- """Return a serializer object for the given method.
-
- :param method: the serialization method; can be either "xml", "xhtml",
- "html", "text", or a custom serializer class
-
- Any additional keyword arguments are passed to the serializer, and thus
- depend on the `method` parameter value.
-
- :see: `XMLSerializer`, `XHTMLSerializer`, `HTMLSerializer`, `TextSerializer`
- :since: version 0.4.1
- """
- if isinstance(method, basestring):
- method = {'xml': XMLSerializer,
- 'xhtml': XHTMLSerializer,
- 'html': HTMLSerializer,
- 'text': TextSerializer}[method.lower()]
- return method(**kwargs)
-
-
-class DocType(object):
- """Defines a number of commonly used DOCTYPE declarations as constants."""
-
- HTML_STRICT = (
- 'html', '-//W3C//DTD HTML 4.01//EN',
- 'http://www.w3.org/TR/html4/strict.dtd'
- )
- HTML_TRANSITIONAL = (
- 'html', '-//W3C//DTD HTML 4.01 Transitional//EN',
- 'http://www.w3.org/TR/html4/loose.dtd'
- )
- HTML_FRAMESET = (
- 'html', '-//W3C//DTD HTML 4.01 Frameset//EN',
- 'http://www.w3.org/TR/html4/frameset.dtd'
- )
- HTML = HTML_STRICT
-
- HTML5 = ('html', None, None)
-
- XHTML_STRICT = (
- 'html', '-//W3C//DTD XHTML 1.0 Strict//EN',
- 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'
- )
- XHTML_TRANSITIONAL = (
- 'html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
- 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'
- )
- XHTML_FRAMESET = (
- 'html', '-//W3C//DTD XHTML 1.0 Frameset//EN',
- 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd'
- )
- XHTML = XHTML_STRICT
-
- XHTML11 = (
- 'html', '-//W3C//DTD XHTML 1.1//EN',
- 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd'
- )
-
- SVG_FULL = (
- 'svg', '-//W3C//DTD SVG 1.1//EN',
- 'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd'
- )
- SVG_BASIC = (
- 'svg', '-//W3C//DTD SVG Basic 1.1//EN',
- 'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-basic.dtd'
- )
- SVG_TINY = (
- 'svg', '-//W3C//DTD SVG Tiny 1.1//EN',
- 'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-tiny.dtd'
- )
- SVG = SVG_FULL
-
- @classmethod
- def get(cls, name):
- """Return the ``(name, pubid, sysid)`` tuple of the ``DOCTYPE``
- declaration for the specified name.
-
- The following names are recognized in this version:
- * "html" or "html-strict" for the HTML 4.01 strict DTD
- * "html-transitional" for the HTML 4.01 transitional DTD
- * "html-frameset" for the HTML 4.01 frameset DTD
- * "html5" for the ``DOCTYPE`` proposed for HTML5
- * "xhtml" or "xhtml-strict" for the XHTML 1.0 strict DTD
- * "xhtml-transitional" for the XHTML 1.0 transitional DTD
- * "xhtml-frameset" for the XHTML 1.0 frameset DTD
- * "xhtml11" for the XHTML 1.1 DTD
- * "svg" or "svg-full" for the SVG 1.1 DTD
- * "svg-basic" for the SVG Basic 1.1 DTD
- * "svg-tiny" for the SVG Tiny 1.1 DTD
-
- :param name: the name of the ``DOCTYPE``
- :return: the ``(name, pubid, sysid)`` tuple for the requested
- ``DOCTYPE``, or ``None`` if the name is not recognized
- :since: version 0.4.1
- """
- return {
- 'html': cls.HTML, 'html-strict': cls.HTML_STRICT,
- 'html-transitional': DocType.HTML_TRANSITIONAL,
- 'html-frameset': DocType.HTML_FRAMESET,
- 'html5': cls.HTML5,
- 'xhtml': cls.XHTML, 'xhtml-strict': cls.XHTML_STRICT,
- 'xhtml-transitional': cls.XHTML_TRANSITIONAL,
- 'xhtml-frameset': cls.XHTML_FRAMESET,
- 'xhtml11': cls.XHTML11,
- 'svg': cls.SVG, 'svg-full': cls.SVG_FULL,
- 'svg-basic': cls.SVG_BASIC,
- 'svg-tiny': cls.SVG_TINY
- }.get(name.lower())
-
-
-class XMLSerializer(object):
- """Produces XML text from an event stream.
-
- >>> from genshi.builder import tag
- >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True))
- >>> print(''.join(XMLSerializer()(elem.generate())))
- <div><a href="foo"/><br/><hr noshade="True"/></div>
- """
-
- _PRESERVE_SPACE = frozenset()
-
- def __init__(self, doctype=None, strip_whitespace=True,
- namespace_prefixes=None, cache=True):
- """Initialize the XML serializer.
-
- :param doctype: a ``(name, pubid, sysid)`` tuple that represents the
- DOCTYPE declaration that should be included at the top
- of the generated output, or the name of a DOCTYPE as
- defined in `DocType.get`
- :param strip_whitespace: whether extraneous whitespace should be
- stripped from the output
- :param cache: whether to cache the text output per event, which
- improves performance for repetitive markup
- :note: Changed in 0.4.2: The `doctype` parameter can now be a string.
- :note: Changed in 0.6: The `cache` parameter was added
- """
- self.filters = [EmptyTagFilter()]
- if strip_whitespace:
- self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE))
- self.filters.append(NamespaceFlattener(prefixes=namespace_prefixes,
- cache=cache))
- if doctype:
- self.filters.append(DocTypeInserter(doctype))
- self.cache = cache
-
- def __call__(self, stream):
- have_decl = have_doctype = False
- in_cdata = False
-
- cache = {}
- cache_get = cache.get
- if self.cache:
- def _emit(kind, input, output):
- cache[kind, input] = output
- return output
- else:
- def _emit(kind, input, output):
- return output
-
- for filter_ in self.filters:
- stream = filter_(stream)
- for kind, data, pos in stream:
- cached = cache_get((kind, data))
- if cached is not None:
- yield cached
-
- elif kind is START or kind is EMPTY:
- tag, attrib = data
- buf = ['<', tag]
- for attr, value in attrib:
- buf += [' ', attr, '="', escape(value), '"']
- buf.append(kind is EMPTY and '/>' or '>')
- yield _emit(kind, data, Markup(''.join(buf)))
-
- elif kind is END:
- yield _emit(kind, data, Markup('</%s>' % data))
-
- elif kind is TEXT:
- if in_cdata:
- yield _emit(kind, data, data)
- else:
- yield _emit(kind, data, escape(data, quotes=False))
-
- elif kind is COMMENT:
- yield _emit(kind, data, Markup('<!--%s-->' % data))
-
- elif kind is XML_DECL and not have_decl:
- version, encoding, standalone = data
- buf = ['<?xml version="%s"' % version]
- if encoding:
- buf.append(' encoding="%s"' % encoding)
- if standalone != -1:
- standalone = standalone and 'yes' or 'no'
- buf.append(' standalone="%s"' % standalone)
- buf.append('?>\n')
- yield Markup(''.join(buf))
- have_decl = True
-
- elif kind is DOCTYPE and not have_doctype:
- name, pubid, sysid = data
- buf = ['<!DOCTYPE %s']
- if pubid:
- buf.append(' PUBLIC "%s"')
- elif sysid:
- buf.append(' SYSTEM')
- if sysid:
- buf.append(' "%s"')
- buf.append('>\n')
- yield Markup(''.join(buf)) % tuple([p for p in data if p])
- have_doctype = True
-
- elif kind is START_CDATA:
- yield Markup('<![CDATA[')
- in_cdata = True
-
- elif kind is END_CDATA:
- yield Markup(']]>')
- in_cdata = False
-
- elif kind is PI:
- yield _emit(kind, data, Markup('<?%s %s?>' % data))
-
-
-class XHTMLSerializer(XMLSerializer):
- """Produces XHTML text from an event stream.
-
- >>> from genshi.builder import tag
- >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True))
- >>> print(''.join(XHTMLSerializer()(elem.generate())))
- <div><a href="foo"></a><br /><hr noshade="noshade" /></div>
- """
-
- _EMPTY_ELEMS = frozenset(['area', 'base', 'basefont', 'br', 'col', 'frame',
- 'hr', 'img', 'input', 'isindex', 'link', 'meta',
- 'param'])
- _BOOLEAN_ATTRS = frozenset(['selected', 'checked', 'compact', 'declare',
- 'defer', 'disabled', 'ismap', 'multiple',
- 'nohref', 'noresize', 'noshade', 'nowrap'])
- _PRESERVE_SPACE = frozenset([
- QName('pre'), QName('http://www.w3.org/1999/xhtml}pre'),
- QName('textarea'), QName('http://www.w3.org/1999/xhtml}textarea')
- ])
-
- def __init__(self, doctype=None, strip_whitespace=True,
- namespace_prefixes=None, drop_xml_decl=True, cache=True):
- super(XHTMLSerializer, self).__init__(doctype, False)
- self.filters = [EmptyTagFilter()]
- if strip_whitespace:
- self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE))
- namespace_prefixes = namespace_prefixes or {}
- namespace_prefixes['http://www.w3.org/1999/xhtml'] = ''
- self.filters.append(NamespaceFlattener(prefixes=namespace_prefixes,
- cache=cache))
- if doctype:
- self.filters.append(DocTypeInserter(doctype))
- self.drop_xml_decl = drop_xml_decl
- self.cache = cache
-
- def __call__(self, stream):
- boolean_attrs = self._BOOLEAN_ATTRS
- empty_elems = self._EMPTY_ELEMS
- drop_xml_decl = self.drop_xml_decl
- have_decl = have_doctype = False
- in_cdata = False
-
- cache = {}
- cache_get = cache.get
- if self.cache:
- def _emit(kind, input, output):
- cache[kind, input] = output
- return output
- else:
- def _emit(kind, input, output):
- return output
-
- for filter_ in self.filters:
- stream = filter_(stream)
- for kind, data, pos in stream:
- cached = cache_get((kind, data))
- if cached is not None:
- yield cached
-
- elif kind is START or kind is EMPTY:
- tag, attrib = data
- buf = ['<', tag]
- for attr, value in attrib:
- if attr in boolean_attrs:
- value = attr
- elif attr == 'xml:lang' and 'lang' not in attrib:
- buf += [' lang="', escape(value), '"']
- elif attr == 'xml:space':
- continue
- buf += [' ', attr, '="', escape(value), '"']
- if kind is EMPTY:
- if tag in empty_elems:
- buf.append(' />')
- else:
- buf.append('></%s>' % tag)
- else:
- buf.append('>')
- yield _emit(kind, data, Markup(''.join(buf)))
-
- elif kind is END:
- yield _emit(kind, data, Markup('</%s>' % data))
-
- elif kind is TEXT:
- if in_cdata:
- yield _emit(kind, data, data)
- else:
- yield _emit(kind, data, escape(data, quotes=False))
-
- elif kind is COMMENT:
- yield _emit(kind, data, Markup('<!--%s-->' % data))
-
- elif kind is DOCTYPE and not have_doctype:
- name, pubid, sysid = data
- buf = ['<!DOCTYPE %s']
- if pubid:
- buf.append(' PUBLIC "%s"')
- elif sysid:
- buf.append(' SYSTEM')
- if sysid:
- buf.append(' "%s"')
- buf.append('>\n')
- yield Markup(''.join(buf)) % tuple([p for p in data if p])
- have_doctype = True
-
- elif kind is XML_DECL and not have_decl and not drop_xml_decl:
- version, encoding, standalone = data
- buf = ['<?xml version="%s"' % version]
- if encoding:
- buf.append(' encoding="%s"' % encoding)
- if standalone != -1:
- standalone = standalone and 'yes' or 'no'
- buf.append(' standalone="%s"' % standalone)
- buf.append('?>\n')
- yield Markup(''.join(buf))
- have_decl = True
-
- elif kind is START_CDATA:
- yield Markup('<![CDATA[')
- in_cdata = True
-
- elif kind is END_CDATA:
- yield Markup(']]>')
- in_cdata = False
-
- elif kind is PI:
- yield _emit(kind, data, Markup('<?%s %s?>' % data))
-
-
-class HTMLSerializer(XHTMLSerializer):
- """Produces HTML text from an event stream.
-
- >>> from genshi.builder import tag
- >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True))
- >>> print(''.join(HTMLSerializer()(elem.generate())))
- <div><a href="foo"></a><br><hr noshade></div>
- """
-
- _NOESCAPE_ELEMS = frozenset([
- QName('script'), QName('http://www.w3.org/1999/xhtml}script'),
- QName('style'), QName('http://www.w3.org/1999/xhtml}style')
- ])
-
- def __init__(self, doctype=None, strip_whitespace=True, cache=True):
- """Initialize the HTML serializer.
-
- :param doctype: a ``(name, pubid, sysid)`` tuple that represents the
- DOCTYPE declaration that should be included at the top
- of the generated output
- :param strip_whitespace: whether extraneous whitespace should be
- stripped from the output
- :param cache: whether to cache the text output per event, which
- improves performance for repetitive markup
- :note: Changed in 0.6: The `cache` parameter was added
- """
- super(HTMLSerializer, self).__init__(doctype, False)
- self.filters = [EmptyTagFilter()]
- if strip_whitespace:
- self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE,
- self._NOESCAPE_ELEMS))
- self.filters.append(NamespaceFlattener(prefixes={
- 'http://www.w3.org/1999/xhtml': ''
- }, cache=cache))
- if doctype:
- self.filters.append(DocTypeInserter(doctype))
- self.cache = True
-
- def __call__(self, stream):
- boolean_attrs = self._BOOLEAN_ATTRS
- empty_elems = self._EMPTY_ELEMS
- noescape_elems = self._NOESCAPE_ELEMS
- have_doctype = False
- noescape = False
-
- cache = {}
- cache_get = cache.get
- if self.cache:
- def _emit(kind, input, output):
- cache[kind, input] = output
- return output
- else:
- def _emit(kind, input, output):
- return output
-
- for filter_ in self.filters:
- stream = filter_(stream)
- for kind, data, _ in stream:
- output = cache_get((kind, data))
- if output is not None:
- yield output
- if (kind is START or kind is EMPTY) \
- and data[0] in noescape_elems:
- noescape = True
- elif kind is END:
- noescape = False
-
- elif kind is START or kind is EMPTY:
- tag, attrib = data
- buf = ['<', tag]
- for attr, value in attrib:
- if attr in boolean_attrs:
- if value:
- buf += [' ', attr]
- elif ':' in attr:
- if attr == 'xml:lang' and 'lang' not in attrib:
- buf += [' lang="', escape(value), '"']
- elif attr != 'xmlns':
- buf += [' ', attr, '="', escape(value), '"']
- buf.append('>')
- if kind is EMPTY:
- if tag not in empty_elems:
- buf.append('</%s>' % tag)
- yield _emit(kind, data, Markup(''.join(buf)))
- if tag in noescape_elems:
- noescape = True
-
- elif kind is END:
- yield _emit(kind, data, Markup('</%s>' % data))
- noescape = False
-
- elif kind is TEXT:
- if noescape:
- yield _emit(kind, data, data)
- else:
- yield _emit(kind, data, escape(data, quotes=False))
-
- elif kind is COMMENT:
- yield _emit(kind, data, Markup('<!--%s-->' % data))
-
- elif kind is DOCTYPE and not have_doctype:
- name, pubid, sysid = data
- buf = ['<!DOCTYPE %s']
- if pubid:
- buf.append(' PUBLIC "%s"')
- elif sysid:
- buf.append(' SYSTEM')
- if sysid:
- buf.append(' "%s"')
- buf.append('>\n')
- yield Markup(''.join(buf)) % tuple([p for p in data if p])
- have_doctype = True
-
- elif kind is PI:
- yield _emit(kind, data, Markup('<?%s %s?>' % data))
-
-
-class TextSerializer(object):
- """Produces plain text from an event stream.
-
- Only text events are included in the output. Unlike the other serializer,
- special XML characters are not escaped:
-
- >>> from genshi.builder import tag
- >>> elem = tag.div(tag.a('<Hello!>', href='foo'), tag.br)
- >>> print(elem)
- <div><a href="foo">&lt;Hello!&gt;</a><br/></div>
- >>> print(''.join(TextSerializer()(elem.generate())))
- <Hello!>
-
- If text events contain literal markup (instances of the `Markup` class),
- that markup is by default passed through unchanged:
-
- >>> elem = tag.div(Markup('<a href="foo">Hello &amp; Bye!</a><br/>'))
- >>> print(elem.generate().render(TextSerializer, encoding=None))
- <a href="foo">Hello &amp; Bye!</a><br/>
-
- You can use the ``strip_markup`` to change this behavior, so that tags and
- entities are stripped from the output (or in the case of entities,
- replaced with the equivalent character):
-
- >>> print(elem.generate().render(TextSerializer, strip_markup=True,
- ... encoding=None))
- Hello & Bye!
- """
-
- def __init__(self, strip_markup=False):
- """Create the serializer.
-
- :param strip_markup: whether markup (tags and encoded characters) found
- in the text should be removed
- """
- self.strip_markup = strip_markup
-
- def __call__(self, stream):
- strip_markup = self.strip_markup
- for event in stream:
- if event[0] is TEXT:
- data = event[1]
- if strip_markup and type(data) is Markup:
- data = data.striptags().stripentities()
- yield unicode(data)
-
-
-class EmptyTagFilter(object):
- """Combines `START` and `STOP` events into `EMPTY` events for elements that
- have no contents.
- """
-
- EMPTY = StreamEventKind('EMPTY')
-
- def __call__(self, stream):
- prev = (None, None, None)
- for ev in stream:
- if prev[0] is START:
- if ev[0] is END:
- prev = EMPTY, prev[1], prev[2]
- yield prev
- continue
- else:
- yield prev
- if ev[0] is not START:
- yield ev
- prev = ev
-
-
-EMPTY = EmptyTagFilter.EMPTY
-
-
-class NamespaceFlattener(object):
- r"""Output stream filter that removes namespace information from the stream,
- instead adding namespace attributes and prefixes as needed.
-
- :param prefixes: optional mapping of namespace URIs to prefixes
-
- >>> from genshi.input import XML
- >>> xml = XML('''<doc xmlns="NS1" xmlns:two="NS2">
- ... <two:item/>
- ... </doc>''')
- >>> for kind, data, pos in NamespaceFlattener()(xml):
- ... print('%s %r' % (kind, data))
- START (u'doc', Attrs([('xmlns', u'NS1'), (u'xmlns:two', u'NS2')]))
- TEXT u'\n '
- START (u'two:item', Attrs())
- END u'two:item'
- TEXT u'\n'
- END u'doc'
- """
-
- def __init__(self, prefixes=None, cache=True):
- self.prefixes = {XML_NAMESPACE.uri: 'xml'}
- if prefixes is not None:
- self.prefixes.update(prefixes)
- self.cache = cache
-
- def __call__(self, stream):
- cache = {}
- cache_get = cache.get
- if self.cache:
- def _emit(kind, input, output, pos):
- cache[kind, input] = output
- return kind, output, pos
- else:
- def _emit(kind, input, output, pos):
- return output
-
- prefixes = dict([(v, [k]) for k, v in self.prefixes.items()])
- namespaces = {XML_NAMESPACE.uri: ['xml']}
- def _push_ns(prefix, uri):
- namespaces.setdefault(uri, []).append(prefix)
- prefixes.setdefault(prefix, []).append(uri)
- cache.clear()
- def _pop_ns(prefix):
- uris = prefixes.get(prefix)
- uri = uris.pop()
- if not uris:
- del prefixes[prefix]
- if uri not in uris or uri != uris[-1]:
- uri_prefixes = namespaces[uri]
- uri_prefixes.pop()
- if not uri_prefixes:
- del namespaces[uri]
- cache.clear()
- return uri
-
- ns_attrs = []
- _push_ns_attr = ns_attrs.append
- def _make_ns_attr(prefix, uri):
- return 'xmlns%s' % (prefix and ':%s' % prefix or ''), uri
-
- def _gen_prefix():
- val = 0
- while 1:
- val += 1
- yield 'ns%d' % val
- _gen_prefix = _gen_prefix().next
-
- for kind, data, pos in stream:
- output = cache_get((kind, data))
- if output is not None:
- yield kind, output, pos
-
- elif kind is START or kind is EMPTY:
- tag, attrs = data
-
- tagname = tag.localname
- tagns = tag.namespace
- if tagns:
- if tagns in namespaces:
- prefix = namespaces[tagns][-1]
- if prefix:
- tagname = '%s:%s' % (prefix, tagname)
- else:
- _push_ns_attr(('xmlns', tagns))
- _push_ns('', tagns)
-
- new_attrs = []
- for attr, value in attrs:
- attrname = attr.localname
- attrns = attr.namespace
- if attrns:
- if attrns not in namespaces:
- prefix = _gen_prefix()
- _push_ns(prefix, attrns)
- _push_ns_attr(('xmlns:%s' % prefix, attrns))
- else:
- prefix = namespaces[attrns][-1]
- if prefix:
- attrname = '%s:%s' % (prefix, attrname)
- new_attrs.append((attrname, value))
-
- yield _emit(kind, data, (tagname, Attrs(ns_attrs + new_attrs)), pos)
- del ns_attrs[:]
-
- elif kind is END:
- tagname = data.localname
- tagns = data.namespace
- if tagns:
- prefix = namespaces[tagns][-1]
- if prefix:
- tagname = '%s:%s' % (prefix, tagname)
- yield _emit(kind, data, tagname, pos)
-
- elif kind is START_NS:
- prefix, uri = data
- if uri not in namespaces:
- prefix = prefixes.get(uri, [prefix])[-1]
- _push_ns_attr(_make_ns_attr(prefix, uri))
- _push_ns(prefix, uri)
-
- elif kind is END_NS:
- if data in prefixes:
- uri = _pop_ns(data)
- if ns_attrs:
- attr = _make_ns_attr(data, uri)
- if attr in ns_attrs:
- ns_attrs.remove(attr)
-
- else:
- yield kind, data, pos
-
-
-class WhitespaceFilter(object):
- """A filter that removes extraneous ignorable white space from the
- stream.
- """
-
- def __init__(self, preserve=None, noescape=None):
- """Initialize the filter.
-
- :param preserve: a set or sequence of tag names for which white-space
- should be preserved
- :param noescape: a set or sequence of tag names for which text content
- should not be escaped
-
- The `noescape` set is expected to refer to elements that cannot contain
- further child elements (such as ``<style>`` or ``<script>`` in HTML
- documents).
- """
- if preserve is None:
- preserve = []
- self.preserve = frozenset(preserve)
- if noescape is None:
- noescape = []
- self.noescape = frozenset(noescape)
-
- def __call__(self, stream, ctxt=None, space=XML_NAMESPACE['space'],
- trim_trailing_space=re.compile('[ \t]+(?=\n)').sub,
- collapse_lines=re.compile('\n{2,}').sub):
- mjoin = Markup('').join
- preserve_elems = self.preserve
- preserve = 0
- noescape_elems = self.noescape
- noescape = False
-
- textbuf = []
- push_text = textbuf.append
- pop_text = textbuf.pop
- for kind, data, pos in chain(stream, [(None, None, None)]):
-
- if kind is TEXT:
- if noescape:
- data = Markup(data)
- push_text(data)
- else:
- if textbuf:
- if len(textbuf) > 1:
- text = mjoin(textbuf, escape_quotes=False)
- del textbuf[:]
- else:
- text = escape(pop_text(), quotes=False)
- if not preserve:
- text = collapse_lines('\n', trim_trailing_space('', text))
- yield TEXT, Markup(text), pos
-
- if kind is START:
- tag, attrs = data
- if preserve or (tag in preserve_elems or
- attrs.get(space) == 'preserve'):
- preserve += 1
- if not noescape and tag in noescape_elems:
- noescape = True
-
- elif kind is END:
- noescape = False
- if preserve:
- preserve -= 1
-
- elif kind is START_CDATA:
- noescape = True
-
- elif kind is END_CDATA:
- noescape = False
-
- if kind:
- yield kind, data, pos
-
-
-class DocTypeInserter(object):
- """A filter that inserts the DOCTYPE declaration in the correct location,
- after the XML declaration.
- """
- def __init__(self, doctype):
- """Initialize the filter.
-
- :param doctype: DOCTYPE as a string or DocType object.
- """
- if isinstance(doctype, basestring):
- doctype = DocType.get(doctype)
- self.doctype_event = (DOCTYPE, doctype, (None, -1, -1))
-
- def __call__(self, stream):
- doctype_inserted = False
- for kind, data, pos in stream:
- if not doctype_inserted:
- doctype_inserted = True
- if kind is XML_DECL:
- yield (kind, data, pos)
- yield self.doctype_event
- continue
- yield self.doctype_event
-
- yield (kind, data, pos)
-
- if not doctype_inserted:
- yield self.doctype_event
diff --git a/genshi/path.py b/genshi/path.py
deleted file mode 100644
index 122fbf0..0000000
--- a/genshi/path.py
+++ /dev/null
@@ -1,1528 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Copyright (C) 2006-2009 Edgewall Software
-# All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://genshi.edgewall.org/wiki/License.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For the exact contribution history, see the revision
-# history and logs, available at http://genshi.edgewall.org/log/.
-
-"""Basic support for evaluating XPath expressions against streams.
-
->>> from genshi.input import XML
->>> doc = XML('''<doc>
-... <items count="4">
-... <item status="new">
-... <summary>Foo</summary>
-... </item>
-... <item status="closed">
-... <summary>Bar</summary>
-... </item>
-... <item status="closed" resolution="invalid">
-... <summary>Baz</summary>
-... </item>
-... <item status="closed" resolution="fixed">
-... <summary>Waz</summary>
-... </item>
-... </items>
-... </doc>''')
->>> print(doc.select('items/item[@status="closed" and '
-... '(@resolution="invalid" or not(@resolution))]/summary/text()'))
-BarBaz
-
-Because the XPath engine operates on markup streams (as opposed to tree
-structures), it only implements a subset of the full XPath 1.0 language.
-"""
-
-from collections import deque
-try:
- reduce # builtin in Python < 3
-except NameError:
- from functools import reduce
-from math import ceil, floor
-import operator
-import re
-from itertools import chain
-
-from genshi.core import Stream, Attrs, Namespace, QName
-from genshi.core import START, END, TEXT, START_NS, END_NS, COMMENT, PI, \
- START_CDATA, END_CDATA
-
-__all__ = ['Path', 'PathSyntaxError']
-__docformat__ = 'restructuredtext en'
-
-
-class Axis(object):
- """Defines constants for the various supported XPath axes."""
-
- ATTRIBUTE = 'attribute'
- CHILD = 'child'
- DESCENDANT = 'descendant'
- DESCENDANT_OR_SELF = 'descendant-or-self'
- SELF = 'self'
-
- @classmethod
- def forname(cls, name):
- """Return the axis constant for the given name, or `None` if no such
- axis was defined.
- """
- return getattr(cls, name.upper().replace('-', '_'), None)
-
-
-ATTRIBUTE = Axis.ATTRIBUTE
-CHILD = Axis.CHILD
-DESCENDANT = Axis.DESCENDANT
-DESCENDANT_OR_SELF = Axis.DESCENDANT_OR_SELF
-SELF = Axis.SELF
-
-
-class GenericStrategy(object):
-
- @classmethod
- def supports(cls, path):
- return True
-
- def __init__(self, path):
- self.path = path
-
- def test(self, ignore_context):
- p = self.path
- if ignore_context:
- if p[0][0] is ATTRIBUTE:
- steps = [_DOTSLASHSLASH] + p
- else:
- steps = [(DESCENDANT_OR_SELF, p[0][1], p[0][2])] + p[1:]
- elif p[0][0] is CHILD or p[0][0] is ATTRIBUTE \
- or p[0][0] is DESCENDANT:
- steps = [_DOTSLASH] + p
- else:
- steps = p
-
- # for node it contains all positions of xpath expression
- # where its child should start checking for matches
- # with list of corresponding context counters
- # there can be many of them, because position that is from
- # descendant-like axis can be achieved from different nodes
- # for example <a><a><b/></a></a> should match both //a//b[1]
- # and //a//b[2]
- # positions always form increasing sequence (invariant)
- stack = [[(0, [[]])]]
-
- def _test(event, namespaces, variables, updateonly=False):
- kind, data, pos = event[:3]
- retval = None
-
- # Manage the stack that tells us "where we are" in the stream
- if kind is END:
- if stack:
- stack.pop()
- return None
- if kind is START_NS or kind is END_NS \
- or kind is START_CDATA or kind is END_CDATA:
- # should we make namespaces work?
- return None
-
- pos_queue = deque([(pos, cou, []) for pos, cou in stack[-1]])
- next_pos = []
-
- # length of real part of path - we omit attribute axis
- real_len = len(steps) - ((steps[-1][0] == ATTRIBUTE) or 1 and 0)
- last_checked = -1
-
- # places where we have to check for match, are these
- # provided by parent
- while pos_queue:
- x, pcou, mcou = pos_queue.popleft()
- axis, nodetest, predicates = steps[x]
-
- # we need to push descendant-like positions from parent
- # further
- if (axis is DESCENDANT or axis is DESCENDANT_OR_SELF) and pcou:
- if next_pos and next_pos[-1][0] == x:
- next_pos[-1][1].extend(pcou)
- else:
- next_pos.append((x, pcou))
-
- # nodetest first
- if not nodetest(kind, data, pos, namespaces, variables):
- continue
-
- # counters packs that were already bad
- missed = set()
- counters_len = len(pcou) + len(mcou)
-
- # number of counters - we have to create one
- # for every context position based predicate
- cnum = 0
-
- # tells if we have match with position x
- matched = True
-
- if predicates:
- for predicate in predicates:
- pretval = predicate(kind, data, pos,
- namespaces,
- variables)
- if type(pretval) is float: # FIXME <- need to check
- # this for other types that
- # can be coerced to float
-
- # each counter pack needs to be checked
- for i, cou in enumerate(chain(pcou, mcou)):
- # it was bad before
- if i in missed:
- continue
-
- if len(cou) < cnum + 1:
- cou.append(0)
- cou[cnum] += 1
-
- # it is bad now
- if cou[cnum] != int(pretval):
- missed.add(i)
-
- # none of counters pack was good
- if len(missed) == counters_len:
- pretval = False
- cnum += 1
-
- if not pretval:
- matched = False
- break
-
- if not matched:
- continue
-
- # counter for next position with current node as context node
- child_counter = []
-
- if x + 1 == real_len:
- # we reached end of expression, because x + 1
- # is equal to the length of expression
- matched = True
- axis, nodetest, predicates = steps[-1]
- if axis is ATTRIBUTE:
- matched = nodetest(kind, data, pos, namespaces,
- variables)
- if matched:
- retval = matched
- else:
- next_axis = steps[x + 1][0]
-
- # if next axis allows matching self we have
- # to add next position to our queue
- if next_axis is DESCENDANT_OR_SELF or next_axis is SELF:
- if not pos_queue or pos_queue[0][0] > x + 1:
- pos_queue.appendleft((x + 1, [], [child_counter]))
- else:
- pos_queue[0][2].append(child_counter)
-
- # if axis is not self we have to add it to child's list
- if next_axis is not SELF:
- next_pos.append((x + 1, [child_counter]))
-
- if kind is START:
- stack.append(next_pos)
-
- return retval
-
- return _test
-
-
-class SimplePathStrategy(object):
- """Strategy for path with only local names, attributes and text nodes."""
-
- @classmethod
- def supports(cls, path):
- if path[0][0] is ATTRIBUTE:
- return False
- allowed_tests = (LocalNameTest, CommentNodeTest, TextNodeTest)
- for _, nodetest, predicates in path:
- if predicates:
- return False
- if not isinstance(nodetest, allowed_tests):
- return False
- return True
-
- def __init__(self, path):
- # fragments is list of tuples (fragment, pi, attr, self_beginning)
- # fragment is list of nodetests for fragment of path with only
- # child:: axes between
- # pi is KMP partial match table for this fragment
- # attr is attribute nodetest if fragment ends with @ and None otherwise
- # self_beginning is True if axis for first fragment element
- # was self (first fragment) or descendant-or-self (farther fragment)
- self.fragments = []
-
- self_beginning = False
- fragment = []
-
- def nodes_equal(node1, node2):
- """Tests if two node tests are equal"""
- if type(node1) is not type(node2):
- return False
- if type(node1) == LocalNameTest:
- return node1.name == node2.name
- return True
-
- def calculate_pi(f):
- """KMP prefix calculation for table"""
- # the indexes in prefix table are shifted by one
- # in comparision with common implementations
- # pi[i] = NORMAL_PI[i + 1]
- if len(f) == 0:
- return []
- pi = [0]
- s = 0
- for i in range(1, len(f)):
- while s > 0 and not nodes_equal(f[s], f[i]):
- s = pi[s-1]
- if nodes_equal(f[s], f[i]):
- s += 1
- pi.append(s)
- return pi
-
- for axis in path:
- if axis[0] is SELF:
- if len(fragment) != 0:
- # if element is not first in fragment it has to be
- # the same as previous one
- # for example child::a/self::b is always wrong
- if axis[1] != fragment[-1][1]:
- self.fragments = None
- return
- else:
- self_beginning = True
- fragment.append(axis[1])
- elif axis[0] is CHILD:
- fragment.append(axis[1])
- elif axis[0] is ATTRIBUTE:
- pi = calculate_pi(fragment)
- self.fragments.append((fragment, pi, axis[1], self_beginning))
- # attribute has always to be at the end, so we can jump out
- return
- else:
- pi = calculate_pi(fragment)
- self.fragments.append((fragment, pi, None, self_beginning))
- fragment = [axis[1]]
- if axis[0] is DESCENDANT:
- self_beginning = False
- else: # DESCENDANT_OR_SELF
- self_beginning = True
- pi = calculate_pi(fragment)
- self.fragments.append((fragment, pi, None, self_beginning))
-
- def test(self, ignore_context):
- # stack of triples (fid, p, ic)
- # fid is index of current fragment
- # p is position in this fragment
- # ic is if we ignore context in this fragment
- stack = []
- stack_push = stack.append
- stack_pop = stack.pop
- frags = self.fragments
- frags_len = len(frags)
-
- def _test(event, namespaces, variables, updateonly=False):
- # expression found impossible during init
- if frags is None:
- return None
-
- kind, data, pos = event[:3]
-
- # skip events we don't care about
- if kind is END:
- if stack:
- stack_pop()
- return None
- if kind is START_NS or kind is END_NS \
- or kind is START_CDATA or kind is END_CDATA:
- return None
-
- if not stack:
- # root node, nothing on stack, special case
- fid = 0
- # skip empty fragments (there can be actually only one)
- while not frags[fid][0]:
- fid += 1
- p = 0
- # empty fragment means descendant node at beginning
- ic = ignore_context or (fid > 0)
-
- # expression can match first node, if first axis is self::,
- # descendant-or-self:: or if ignore_context is True and
- # axis is not descendant::
- if not frags[fid][3] and (not ignore_context or fid > 0):
- # axis is not self-beggining, we have to skip this node
- stack_push((fid, p, ic))
- return None
- else:
- # take position of parent
- fid, p, ic = stack[-1]
-
- if fid is not None and not ic:
- # fragment not ignoring context - we can't jump back
- frag, pi, attrib, _ = frags[fid]
- frag_len = len(frag)
-
- if p == frag_len:
- # that probably means empty first fragment
- pass
- elif frag[p](kind, data, pos, namespaces, variables):
- # match, so we can go further
- p += 1
- else:
- # not matched, so there will be no match in subtree
- fid, p = None, None
-
- if p == frag_len and fid + 1 != frags_len:
- # we made it to end of fragment, we can go to following
- fid += 1
- p = 0
- ic = True
-
- if fid is None:
- # there was no match in fragment not ignoring context
- if kind is START:
- stack_push((fid, p, ic))
- return None
-
- if ic:
- # we are in fragment ignoring context
- while True:
- frag, pi, attrib, _ = frags[fid]
- frag_len = len(frag)
-
- # KMP new "character"
- while p > 0 and (p >= frag_len or not \
- frag[p](kind, data, pos, namespaces, variables)):
- p = pi[p-1]
- if frag[p](kind, data, pos, namespaces, variables):
- p += 1
-
- if p == frag_len:
- # end of fragment reached
- if fid + 1 == frags_len:
- # that was last fragment
- break
- else:
- fid += 1
- p = 0
- ic = True
- if not frags[fid][3]:
- # next fragment not self-beginning
- break
- else:
- break
-
- if kind is START:
- # we have to put new position on stack, for children
-
- if not ic and fid + 1 == frags_len and p == frag_len:
- # it is end of the only, not context ignoring fragment
- # so there will be no matches in subtree
- stack_push((None, None, ic))
- else:
- stack_push((fid, p, ic))
-
- # have we reached the end of the last fragment?
- if fid + 1 == frags_len and p == frag_len:
- if attrib: # attribute ended path, return value
- return attrib(kind, data, pos, namespaces, variables)
- return True
-
- return None
-
- return _test
-
-
-class SingleStepStrategy(object):
-
- @classmethod
- def supports(cls, path):
- return len(path) == 1
-
- def __init__(self, path):
- self.path = path
-
- def test(self, ignore_context):
- steps = self.path
- if steps[0][0] is ATTRIBUTE:
- steps = [_DOTSLASH] + steps
- select_attr = steps[-1][0] is ATTRIBUTE and steps[-1][1] or None
-
- # for every position in expression stores counters' list
- # it is used for position based predicates
- counters = []
- depth = [0]
-
- def _test(event, namespaces, variables, updateonly=False):
- kind, data, pos = event[:3]
-
- # Manage the stack that tells us "where we are" in the stream
- if kind is END:
- if not ignore_context:
- depth[0] -= 1
- return None
- elif kind is START_NS or kind is END_NS \
- or kind is START_CDATA or kind is END_CDATA:
- # should we make namespaces work?
- return None
-
- if not ignore_context:
- outside = (steps[0][0] is SELF and depth[0] != 0) \
- or (steps[0][0] is CHILD and depth[0] != 1) \
- or (steps[0][0] is DESCENDANT and depth[0] < 1)
- if kind is START:
- depth[0] += 1
- if outside:
- return None
-
- axis, nodetest, predicates = steps[0]
- if not nodetest(kind, data, pos, namespaces, variables):
- return None
-
- if predicates:
- cnum = 0
- for predicate in predicates:
- pretval = predicate(kind, data, pos, namespaces, variables)
- if type(pretval) is float: # FIXME <- need to check this
- # for other types that can be
- # coerced to float
- if len(counters) < cnum + 1:
- counters.append(0)
- counters[cnum] += 1
- if counters[cnum] != int(pretval):
- pretval = False
- cnum += 1
- if not pretval:
- return None
-
- if select_attr:
- return select_attr(kind, data, pos, namespaces, variables)
-
- return True
-
- return _test
-
-
-class Path(object):
- """Implements basic XPath support on streams.
-
- Instances of this class represent a "compiled" XPath expression, and
- provide methods for testing the path against a stream, as well as
- extracting a substream matching that path.
- """
-
- STRATEGIES = (SingleStepStrategy, SimplePathStrategy, GenericStrategy)
-
- def __init__(self, text, filename=None, lineno=-1):
- """Create the path object from a string.
-
- :param text: the path expression
- :param filename: the name of the file in which the path expression was
- found (used in error messages)
- :param lineno: the line on which the expression was found
- """
- self.source = text
- self.paths = PathParser(text, filename, lineno).parse()
- self.strategies = []
- for path in self.paths:
- for strategy_class in self.STRATEGIES:
- if strategy_class.supports(path):
- self.strategies.append(strategy_class(path))
- break
- else:
- raise NotImplemented('No strategy found for path')
-
- def __repr__(self):
- paths = []
- for path in self.paths:
- steps = []
- for axis, nodetest, predicates in path:
- steps.append('%s::%s' % (axis, nodetest))
- for predicate in predicates:
- steps[-1] += '[%s]' % predicate
- paths.append('/'.join(steps))
- return '<%s "%s">' % (type(self).__name__, '|'.join(paths))
-
- def select(self, stream, namespaces=None, variables=None):
- """Returns a substream of the given stream that matches the path.
-
- If there are no matches, this method returns an empty stream.
-
- >>> from genshi.input import XML
- >>> xml = XML('<root><elem><child>Text</child></elem></root>')
-
- >>> print(Path('.//child').select(xml))
- <child>Text</child>
-
- >>> print(Path('.//child/text()').select(xml))
- Text
-
- :param stream: the stream to select from
- :param namespaces: (optional) a mapping of namespace prefixes to URIs
- :param variables: (optional) a mapping of variable names to values
- :return: the substream matching the path, or an empty stream
- :rtype: `Stream`
- """
- if namespaces is None:
- namespaces = {}
- if variables is None:
- variables = {}
- stream = iter(stream)
- def _generate(stream=stream, ns=namespaces, vs=variables):
- next = stream.next
- test = self.test()
- for event in stream:
- result = test(event, ns, vs)
- if result is True:
- yield event
- if event[0] is START:
- depth = 1
- while depth > 0:
- subevent = next()
- if subevent[0] is START:
- depth += 1
- elif subevent[0] is END:
- depth -= 1
- yield subevent
- test(subevent, ns, vs, updateonly=True)
- elif result:
- yield result
- return Stream(_generate(),
- serializer=getattr(stream, 'serializer', None))
-
- def test(self, ignore_context=False):
- """Returns a function that can be used to track whether the path matches
- a specific stream event.
-
- The function returned expects the positional arguments ``event``,
- ``namespaces`` and ``variables``. The first is a stream event, while the
- latter two are a mapping of namespace prefixes to URIs, and a mapping
- of variable names to values, respectively. In addition, the function
- accepts an ``updateonly`` keyword argument that default to ``False``. If
- it is set to ``True``, the function only updates its internal state,
- but does not perform any tests or return a result.
-
- If the path matches the event, the function returns the match (for
- example, a `START` or `TEXT` event.) Otherwise, it returns ``None``.
-
- >>> from genshi.input import XML
- >>> xml = XML('<root><elem><child id="1"/></elem><child id="2"/></root>')
- >>> test = Path('child').test()
- >>> namespaces, variables = {}, {}
- >>> for event in xml:
- ... if test(event, namespaces, variables):
- ... print('%s %r' % (event[0], event[1]))
- START (QName('child'), Attrs([(QName('id'), u'2')]))
-
- :param ignore_context: if `True`, the path is interpreted like a pattern
- in XSLT, meaning for example that it will match
- at any depth
- :return: a function that can be used to test individual events in a
- stream against the path
- :rtype: ``function``
- """
- tests = [s.test(ignore_context) for s in self.strategies]
- if len(tests) == 1:
- return tests[0]
-
- def _multi(event, namespaces, variables, updateonly=False):
- retval = None
- for test in tests:
- val = test(event, namespaces, variables, updateonly=updateonly)
- if retval is None:
- retval = val
- return retval
- return _multi
-
-
-class PathSyntaxError(Exception):
- """Exception raised when an XPath expression is syntactically incorrect."""
-
- def __init__(self, message, filename=None, lineno=-1, offset=-1):
- if filename:
- message = '%s (%s, line %d)' % (message, filename, lineno)
- Exception.__init__(self, message)
- self.filename = filename
- self.lineno = lineno
- self.offset = offset
-
-
-class PathParser(object):
- """Tokenizes and parses an XPath expression."""
-
- _QUOTES = (("'", "'"), ('"', '"'))
- _TOKENS = ('::', ':', '..', '.', '//', '/', '[', ']', '()', '(', ')', '@',
- '=', '!=', '!', '|', ',', '>=', '>', '<=', '<', '$')
- _tokenize = re.compile('("[^"]*")|(\'[^\']*\')|((?:\d+)?\.\d+)|(%s)|([^%s\s]+)|\s+' % (
- '|'.join([re.escape(t) for t in _TOKENS]),
- ''.join([re.escape(t[0]) for t in _TOKENS]))).findall
-
- def __init__(self, text, filename=None, lineno=-1):
- self.filename = filename
- self.lineno = lineno
- self.tokens = [t for t in [dqstr or sqstr or number or token or name
- for dqstr, sqstr, number, token, name in
- self._tokenize(text)] if t]
- self.pos = 0
-
- # Tokenizer
-
- @property
- def at_end(self):
- return self.pos == len(self.tokens) - 1
-
- @property
- def cur_token(self):
- return self.tokens[self.pos]
-
- def next_token(self):
- self.pos += 1
- return self.tokens[self.pos]
-
- def peek_token(self):
- if not self.at_end:
- return self.tokens[self.pos + 1]
- return None
-
- # Recursive descent parser
-
- def parse(self):
- """Parses the XPath expression and returns a list of location path
- tests.
-
- For union expressions (such as `*|text()`), this function returns one
- test for each operand in the union. For patch expressions that don't
- use the union operator, the function always returns a list of size 1.
-
- Each path test in turn is a sequence of tests that correspond to the
- location steps, each tuples of the form `(axis, testfunc, predicates)`
- """
- paths = [self._location_path()]
- while self.cur_token == '|':
- self.next_token()
- paths.append(self._location_path())
- if not self.at_end:
- raise PathSyntaxError('Unexpected token %r after end of expression'
- % self.cur_token, self.filename, self.lineno)
- return paths
-
- def _location_path(self):
- steps = []
- while True:
- if self.cur_token.startswith('/'):
- if not steps:
- if self.cur_token == '//':
- # hack to make //* match every node - also root
- self.next_token()
- axis, nodetest, predicates = self._location_step()
- steps.append((DESCENDANT_OR_SELF, nodetest,
- predicates))
- if self.at_end or not self.cur_token.startswith('/'):
- break
- continue
- else:
- raise PathSyntaxError('Absolute location paths not '
- 'supported', self.filename,
- self.lineno)
- elif self.cur_token == '//':
- steps.append((DESCENDANT_OR_SELF, NodeTest(), []))
- self.next_token()
-
- axis, nodetest, predicates = self._location_step()
- if not axis:
- axis = CHILD
- steps.append((axis, nodetest, predicates))
- if self.at_end or not self.cur_token.startswith('/'):
- break
-
- return steps
-
- def _location_step(self):
- if self.cur_token == '@':
- axis = ATTRIBUTE
- self.next_token()
- elif self.cur_token == '.':
- axis = SELF
- elif self.cur_token == '..':
- raise PathSyntaxError('Unsupported axis "parent"', self.filename,
- self.lineno)
- elif self.peek_token() == '::':
- axis = Axis.forname(self.cur_token)
- if axis is None:
- raise PathSyntaxError('Unsupport axis "%s"' % axis,
- self.filename, self.lineno)
- self.next_token()
- self.next_token()
- else:
- axis = None
- nodetest = self._node_test(axis or CHILD)
- predicates = []
- while self.cur_token == '[':
- predicates.append(self._predicate())
- return axis, nodetest, predicates
-
- def _node_test(self, axis=None):
- test = prefix = None
- next_token = self.peek_token()
- if next_token in ('(', '()'): # Node type test
- test = self._node_type()
-
- elif next_token == ':': # Namespace prefix
- prefix = self.cur_token
- self.next_token()
- localname = self.next_token()
- if localname == '*':
- test = QualifiedPrincipalTypeTest(axis, prefix)
- else:
- test = QualifiedNameTest(axis, prefix, localname)
-
- else: # Name test
- if self.cur_token == '*':
- test = PrincipalTypeTest(axis)
- elif self.cur_token == '.':
- test = NodeTest()
- else:
- test = LocalNameTest(axis, self.cur_token)
-
- if not self.at_end:
- self.next_token()
- return test
-
- def _node_type(self):
- name = self.cur_token
- self.next_token()
-
- args = []
- if self.cur_token != '()':
- # The processing-instruction() function optionally accepts the
- # name of the PI as argument, which must be a literal string
- self.next_token() # (
- if self.cur_token != ')':
- string = self.cur_token
- if (string[0], string[-1]) in self._QUOTES:
- string = string[1:-1]
- args.append(string)
-
- cls = _nodetest_map.get(name)
- if not cls:
- raise PathSyntaxError('%s() not allowed here' % name, self.filename,
- self.lineno)
- return cls(*args)
-
- def _predicate(self):
- assert self.cur_token == '['
- self.next_token()
- expr = self._or_expr()
- if self.cur_token != ']':
- raise PathSyntaxError('Expected "]" to close predicate, '
- 'but found "%s"' % self.cur_token,
- self.filename, self.lineno)
- if not self.at_end:
- self.next_token()
- return expr
-
- def _or_expr(self):
- expr = self._and_expr()
- while self.cur_token == 'or':
- self.next_token()
- expr = OrOperator(expr, self._and_expr())
- return expr
-
- def _and_expr(self):
- expr = self._equality_expr()
- while self.cur_token == 'and':
- self.next_token()
- expr = AndOperator(expr, self._equality_expr())
- return expr
-
- def _equality_expr(self):
- expr = self._relational_expr()
- while self.cur_token in ('=', '!='):
- op = _operator_map[self.cur_token]
- self.next_token()
- expr = op(expr, self._relational_expr())
- return expr
-
- def _relational_expr(self):
- expr = self._sub_expr()
- while self.cur_token in ('>', '>=', '<', '>='):
- op = _operator_map[self.cur_token]
- self.next_token()
- expr = op(expr, self._sub_expr())
- return expr
-
- def _sub_expr(self):
- token = self.cur_token
- if token != '(':
- return self._primary_expr()
- self.next_token()
- expr = self._or_expr()
- if self.cur_token != ')':
- raise PathSyntaxError('Expected ")" to close sub-expression, '
- 'but found "%s"' % self.cur_token,
- self.filename, self.lineno)
- self.next_token()
- return expr
-
- def _primary_expr(self):
- token = self.cur_token
- if len(token) > 1 and (token[0], token[-1]) in self._QUOTES:
- self.next_token()
- return StringLiteral(token[1:-1])
- elif token[0].isdigit() or token[0] == '.':
- self.next_token()
- return NumberLiteral(as_float(token))
- elif token == '$':
- token = self.next_token()
- self.next_token()
- return VariableReference(token)
- elif not self.at_end and self.peek_token().startswith('('):
- return self._function_call()
- else:
- axis = None
- if token == '@':
- axis = ATTRIBUTE
- self.next_token()
- return self._node_test(axis)
-
- def _function_call(self):
- name = self.cur_token
- if self.next_token() == '()':
- args = []
- else:
- assert self.cur_token == '('
- self.next_token()
- args = [self._or_expr()]
- while self.cur_token == ',':
- self.next_token()
- args.append(self._or_expr())
- if not self.cur_token == ')':
- raise PathSyntaxError('Expected ")" to close function argument '
- 'list, but found "%s"' % self.cur_token,
- self.filename, self.lineno)
- self.next_token()
- cls = _function_map.get(name)
- if not cls:
- raise PathSyntaxError('Unsupported function "%s"' % name,
- self.filename, self.lineno)
- return cls(*args)
-
-
-# Type coercion
-
-def as_scalar(value):
- """Convert value to a scalar. If a single element Attrs() object is passed
- the value of the single attribute will be returned."""
- if isinstance(value, Attrs):
- assert len(value) == 1
- return value[0][1]
- else:
- return value
-
-def as_float(value):
- # FIXME - if value is a bool it will be coerced to 0.0 and consequently
- # compared as a float. This is probably not ideal.
- return float(as_scalar(value))
-
-def as_long(value):
- return long(as_scalar(value))
-
-def as_string(value):
- value = as_scalar(value)
- if value is False:
- return ''
- return unicode(value)
-
-def as_bool(value):
- return bool(as_scalar(value))
-
-
-# Node tests
-
-class PrincipalTypeTest(object):
- """Node test that matches any event with the given principal type."""
- __slots__ = ['principal_type']
- def __init__(self, principal_type):
- self.principal_type = principal_type
- def __call__(self, kind, data, pos, namespaces, variables):
- if kind is START:
- if self.principal_type is ATTRIBUTE:
- return data[1] or None
- else:
- return True
- def __repr__(self):
- return '*'
-
-class QualifiedPrincipalTypeTest(object):
- """Node test that matches any event with the given principal type in a
- specific namespace."""
- __slots__ = ['principal_type', 'prefix']
- def __init__(self, principal_type, prefix):
- self.principal_type = principal_type
- self.prefix = prefix
- def __call__(self, kind, data, pos, namespaces, variables):
- namespace = Namespace(namespaces.get(self.prefix))
- if kind is START:
- if self.principal_type is ATTRIBUTE and data[1]:
- return Attrs([(name, value) for name, value in data[1]
- if name in namespace]) or None
- else:
- return data[0] in namespace
- def __repr__(self):
- return '%s:*' % self.prefix
-
-class LocalNameTest(object):
- """Node test that matches any event with the given principal type and
- local name.
- """
- __slots__ = ['principal_type', 'name']
- def __init__(self, principal_type, name):
- self.principal_type = principal_type
- self.name = name
- def __call__(self, kind, data, pos, namespaces, variables):
- if kind is START:
- if self.principal_type is ATTRIBUTE and self.name in data[1]:
- return Attrs([(self.name, data[1].get(self.name))])
- else:
- return data[0].localname == self.name
- def __repr__(self):
- return self.name
-
-class QualifiedNameTest(object):
- """Node test that matches any event with the given principal type and
- qualified name.
- """
- __slots__ = ['principal_type', 'prefix', 'name']
- def __init__(self, principal_type, prefix, name):
- self.principal_type = principal_type
- self.prefix = prefix
- self.name = name
- def __call__(self, kind, data, pos, namespaces, variables):
- qname = QName('%s}%s' % (namespaces.get(self.prefix), self.name))
- if kind is START:
- if self.principal_type is ATTRIBUTE and qname in data[1]:
- return Attrs([(self.name, data[1].get(self.name))])
- else:
- return data[0] == qname
- def __repr__(self):
- return '%s:%s' % (self.prefix, self.name)
-
-class CommentNodeTest(object):
- """Node test that matches any comment events."""
- __slots__ = []
- def __call__(self, kind, data, pos, namespaces, variables):
- return kind is COMMENT
- def __repr__(self):
- return 'comment()'
-
-class NodeTest(object):
- """Node test that matches any node."""
- __slots__ = []
- def __call__(self, kind, data, pos, namespaces, variables):
- if kind is START:
- return True
- return kind, data, pos
- def __repr__(self):
- return 'node()'
-
-class ProcessingInstructionNodeTest(object):
- """Node test that matches any processing instruction event."""
- __slots__ = ['target']
- def __init__(self, target=None):
- self.target = target
- def __call__(self, kind, data, pos, namespaces, variables):
- return kind is PI and (not self.target or data[0] == self.target)
- def __repr__(self):
- arg = ''
- if self.target:
- arg = '"' + self.target + '"'
- return 'processing-instruction(%s)' % arg
-
-class TextNodeTest(object):
- """Node test that matches any text event."""
- __slots__ = []
- def __call__(self, kind, data, pos, namespaces, variables):
- return kind is TEXT
- def __repr__(self):
- return 'text()'
-
-_nodetest_map = {'comment': CommentNodeTest, 'node': NodeTest,
- 'processing-instruction': ProcessingInstructionNodeTest,
- 'text': TextNodeTest}
-
-# Functions
-
-class Function(object):
- """Base class for function nodes in XPath expressions."""
-
-class BooleanFunction(Function):
- """The `boolean` function, which converts its argument to a boolean
- value.
- """
- __slots__ = ['expr']
- _return_type = bool
- def __init__(self, expr):
- self.expr = expr
- def __call__(self, kind, data, pos, namespaces, variables):
- val = self.expr(kind, data, pos, namespaces, variables)
- return as_bool(val)
- def __repr__(self):
- return 'boolean(%r)' % self.expr
-
-class CeilingFunction(Function):
- """The `ceiling` function, which returns the nearest lower integer number
- for the given number.
- """
- __slots__ = ['number']
- def __init__(self, number):
- self.number = number
- def __call__(self, kind, data, pos, namespaces, variables):
- number = self.number(kind, data, pos, namespaces, variables)
- return ceil(as_float(number))
- def __repr__(self):
- return 'ceiling(%r)' % self.number
-
-class ConcatFunction(Function):
- """The `concat` function, which concatenates (joins) the variable number of
- strings it gets as arguments.
- """
- __slots__ = ['exprs']
- def __init__(self, *exprs):
- self.exprs = exprs
- def __call__(self, kind, data, pos, namespaces, variables):
- strings = []
- for item in [expr(kind, data, pos, namespaces, variables)
- for expr in self.exprs]:
- strings.append(as_string(item))
- return ''.join(strings)
- def __repr__(self):
- return 'concat(%s)' % ', '.join([repr(expr) for expr in self.exprs])
-
-class ContainsFunction(Function):
- """The `contains` function, which returns whether a string contains a given
- substring.
- """
- __slots__ = ['string1', 'string2']
- def __init__(self, string1, string2):
- self.string1 = string1
- self.string2 = string2
- def __call__(self, kind, data, pos, namespaces, variables):
- string1 = self.string1(kind, data, pos, namespaces, variables)
- string2 = self.string2(kind, data, pos, namespaces, variables)
- return as_string(string2) in as_string(string1)
- def __repr__(self):
- return 'contains(%r, %r)' % (self.string1, self.string2)
-
-class MatchesFunction(Function):
- """The `matches` function, which returns whether a string matches a regular
- expression.
- """
- __slots__ = ['string1', 'string2']
- flag_mapping = {'s': re.S, 'm': re.M, 'i': re.I, 'x': re.X}
-
- def __init__(self, string1, string2, flags=''):
- self.string1 = string1
- self.string2 = string2
- self.flags = self._map_flags(flags)
- def __call__(self, kind, data, pos, namespaces, variables):
- string1 = as_string(self.string1(kind, data, pos, namespaces, variables))
- string2 = as_string(self.string2(kind, data, pos, namespaces, variables))
- return re.search(string2, string1, self.flags)
- def _map_flags(self, flags):
- return reduce(operator.or_,
- [self.flag_map[flag] for flag in flags], re.U)
- def __repr__(self):
- return 'contains(%r, %r)' % (self.string1, self.string2)
-
-class FalseFunction(Function):
- """The `false` function, which always returns the boolean `false` value."""
- __slots__ = []
- def __call__(self, kind, data, pos, namespaces, variables):
- return False
- def __repr__(self):
- return 'false()'
-
-class FloorFunction(Function):
- """The `ceiling` function, which returns the nearest higher integer number
- for the given number.
- """
- __slots__ = ['number']
- def __init__(self, number):
- self.number = number
- def __call__(self, kind, data, pos, namespaces, variables):
- number = self.number(kind, data, pos, namespaces, variables)
- return floor(as_float(number))
- def __repr__(self):
- return 'floor(%r)' % self.number
-
-class LocalNameFunction(Function):
- """The `local-name` function, which returns the local name of the current
- element.
- """
- __slots__ = []
- def __call__(self, kind, data, pos, namespaces, variables):
- if kind is START:
- return data[0].localname
- def __repr__(self):
- return 'local-name()'
-
-class NameFunction(Function):
- """The `name` function, which returns the qualified name of the current
- element.
- """
- __slots__ = []
- def __call__(self, kind, data, pos, namespaces, variables):
- if kind is START:
- return data[0]
- def __repr__(self):
- return 'name()'
-
-class NamespaceUriFunction(Function):
- """The `namespace-uri` function, which returns the namespace URI of the
- current element.
- """
- __slots__ = []
- def __call__(self, kind, data, pos, namespaces, variables):
- if kind is START:
- return data[0].namespace
- def __repr__(self):
- return 'namespace-uri()'
-
-class NotFunction(Function):
- """The `not` function, which returns the negated boolean value of its
- argument.
- """
- __slots__ = ['expr']
- def __init__(self, expr):
- self.expr = expr
- def __call__(self, kind, data, pos, namespaces, variables):
- return not as_bool(self.expr(kind, data, pos, namespaces, variables))
- def __repr__(self):
- return 'not(%s)' % self.expr
-
-class NormalizeSpaceFunction(Function):
- """The `normalize-space` function, which removes leading and trailing
- whitespace in the given string, and replaces multiple adjacent whitespace
- characters inside the string with a single space.
- """
- __slots__ = ['expr']
- _normalize = re.compile(r'\s{2,}').sub
- def __init__(self, expr):
- self.expr = expr
- def __call__(self, kind, data, pos, namespaces, variables):
- string = self.expr(kind, data, pos, namespaces, variables)
- return self._normalize(' ', as_string(string).strip())
- def __repr__(self):
- return 'normalize-space(%s)' % repr(self.expr)
-
-class NumberFunction(Function):
- """The `number` function that converts its argument to a number."""
- __slots__ = ['expr']
- def __init__(self, expr):
- self.expr = expr
- def __call__(self, kind, data, pos, namespaces, variables):
- val = self.expr(kind, data, pos, namespaces, variables)
- return as_float(val)
- def __repr__(self):
- return 'number(%r)' % self.expr
-
-class RoundFunction(Function):
- """The `round` function, which returns the nearest integer number for the
- given number.
- """
- __slots__ = ['number']
- def __init__(self, number):
- self.number = number
- def __call__(self, kind, data, pos, namespaces, variables):
- number = self.number(kind, data, pos, namespaces, variables)
- return round(as_float(number))
- def __repr__(self):
- return 'round(%r)' % self.number
-
-class StartsWithFunction(Function):
- """The `starts-with` function that returns whether one string starts with
- a given substring.
- """
- __slots__ = ['string1', 'string2']
- def __init__(self, string1, string2):
- self.string1 = string1
- self.string2 = string2
- def __call__(self, kind, data, pos, namespaces, variables):
- string1 = self.string1(kind, data, pos, namespaces, variables)
- string2 = self.string2(kind, data, pos, namespaces, variables)
- return as_string(string1).startswith(as_string(string2))
- def __repr__(self):
- return 'starts-with(%r, %r)' % (self.string1, self.string2)
-
-class StringLengthFunction(Function):
- """The `string-length` function that returns the length of the given
- string.
- """
- __slots__ = ['expr']
- def __init__(self, expr):
- self.expr = expr
- def __call__(self, kind, data, pos, namespaces, variables):
- string = self.expr(kind, data, pos, namespaces, variables)
- return len(as_string(string))
- def __repr__(self):
- return 'string-length(%r)' % self.expr
-
-class SubstringFunction(Function):
- """The `substring` function that returns the part of a string that starts
- at the given offset, and optionally limited to the given length.
- """
- __slots__ = ['string', 'start', 'length']
- def __init__(self, string, start, length=None):
- self.string = string
- self.start = start
- self.length = length
- def __call__(self, kind, data, pos, namespaces, variables):
- string = self.string(kind, data, pos, namespaces, variables)
- start = self.start(kind, data, pos, namespaces, variables)
- length = 0
- if self.length is not None:
- length = self.length(kind, data, pos, namespaces, variables)
- return string[as_long(start):len(as_string(string)) - as_long(length)]
- def __repr__(self):
- if self.length is not None:
- return 'substring(%r, %r, %r)' % (self.string, self.start,
- self.length)
- else:
- return 'substring(%r, %r)' % (self.string, self.start)
-
-class SubstringAfterFunction(Function):
- """The `substring-after` function that returns the part of a string that
- is found after the given substring.
- """
- __slots__ = ['string1', 'string2']
- def __init__(self, string1, string2):
- self.string1 = string1
- self.string2 = string2
- def __call__(self, kind, data, pos, namespaces, variables):
- string1 = as_string(self.string1(kind, data, pos, namespaces, variables))
- string2 = as_string(self.string2(kind, data, pos, namespaces, variables))
- index = string1.find(string2)
- if index >= 0:
- return string1[index + len(string2):]
- return ''
- def __repr__(self):
- return 'substring-after(%r, %r)' % (self.string1, self.string2)
-
-class SubstringBeforeFunction(Function):
- """The `substring-before` function that returns the part of a string that
- is found before the given substring.
- """
- __slots__ = ['string1', 'string2']
- def __init__(self, string1, string2):
- self.string1 = string1
- self.string2 = string2
- def __call__(self, kind, data, pos, namespaces, variables):
- string1 = as_string(self.string1(kind, data, pos, namespaces, variables))
- string2 = as_string(self.string2(kind, data, pos, namespaces, variables))
- index = string1.find(string2)
- if index >= 0:
- return string1[:index]
- return ''
- def __repr__(self):
- return 'substring-after(%r, %r)' % (self.string1, self.string2)
-
-class TranslateFunction(Function):
- """The `translate` function that translates a set of characters in a
- string to target set of characters.
- """
- __slots__ = ['string', 'fromchars', 'tochars']
- def __init__(self, string, fromchars, tochars):
- self.string = string
- self.fromchars = fromchars
- self.tochars = tochars
- def __call__(self, kind, data, pos, namespaces, variables):
- string = as_string(self.string(kind, data, pos, namespaces, variables))
- fromchars = as_string(self.fromchars(kind, data, pos, namespaces, variables))
- tochars = as_string(self.tochars(kind, data, pos, namespaces, variables))
- table = dict(zip([ord(c) for c in fromchars],
- [ord(c) for c in tochars]))
- return string.translate(table)
- def __repr__(self):
- return 'translate(%r, %r, %r)' % (self.string, self.fromchars,
- self.tochars)
-
-class TrueFunction(Function):
- """The `true` function, which always returns the boolean `true` value."""
- __slots__ = []
- def __call__(self, kind, data, pos, namespaces, variables):
- return True
- def __repr__(self):
- return 'true()'
-
-_function_map = {'boolean': BooleanFunction, 'ceiling': CeilingFunction,
- 'concat': ConcatFunction, 'contains': ContainsFunction,
- 'matches': MatchesFunction, 'false': FalseFunction, 'floor':
- FloorFunction, 'local-name': LocalNameFunction, 'name':
- NameFunction, 'namespace-uri': NamespaceUriFunction,
- 'normalize-space': NormalizeSpaceFunction, 'not': NotFunction,
- 'number': NumberFunction, 'round': RoundFunction,
- 'starts-with': StartsWithFunction, 'string-length':
- StringLengthFunction, 'substring': SubstringFunction,
- 'substring-after': SubstringAfterFunction, 'substring-before':
- SubstringBeforeFunction, 'translate': TranslateFunction,
- 'true': TrueFunction}
-
-# Literals & Variables
-
-class Literal(object):
- """Abstract base class for literal nodes."""
-
-class StringLiteral(Literal):
- """A string literal node."""
- __slots__ = ['text']
- def __init__(self, text):
- self.text = text
- def __call__(self, kind, data, pos, namespaces, variables):
- return self.text
- def __repr__(self):
- return '"%s"' % self.text
-
-class NumberLiteral(Literal):
- """A number literal node."""
- __slots__ = ['number']
- def __init__(self, number):
- self.number = number
- def __call__(self, kind, data, pos, namespaces, variables):
- return self.number
- def __repr__(self):
- return str(self.number)
-
-class VariableReference(Literal):
- """A variable reference node."""
- __slots__ = ['name']
- def __init__(self, name):
- self.name = name
- def __call__(self, kind, data, pos, namespaces, variables):
- return variables.get(self.name)
- def __repr__(self):
- return str(self.name)
-
-# Operators
-
-class AndOperator(object):
- """The boolean operator `and`."""
- __slots__ = ['lval', 'rval']
- def __init__(self, lval, rval):
- self.lval = lval
- self.rval = rval
- def __call__(self, kind, data, pos, namespaces, variables):
- lval = as_bool(self.lval(kind, data, pos, namespaces, variables))
- if not lval:
- return False
- rval = self.rval(kind, data, pos, namespaces, variables)
- return as_bool(rval)
- def __repr__(self):
- return '%s and %s' % (self.lval, self.rval)
-
-class EqualsOperator(object):
- """The equality operator `=`."""
- __slots__ = ['lval', 'rval']
- def __init__(self, lval, rval):
- self.lval = lval
- self.rval = rval
- def __call__(self, kind, data, pos, namespaces, variables):
- lval = as_scalar(self.lval(kind, data, pos, namespaces, variables))
- rval = as_scalar(self.rval(kind, data, pos, namespaces, variables))
- return lval == rval
- def __repr__(self):
- return '%s=%s' % (self.lval, self.rval)
-
-class NotEqualsOperator(object):
- """The equality operator `!=`."""
- __slots__ = ['lval', 'rval']
- def __init__(self, lval, rval):
- self.lval = lval
- self.rval = rval
- def __call__(self, kind, data, pos, namespaces, variables):
- lval = as_scalar(self.lval(kind, data, pos, namespaces, variables))
- rval = as_scalar(self.rval(kind, data, pos, namespaces, variables))
- return lval != rval
- def __repr__(self):
- return '%s!=%s' % (self.lval, self.rval)
-
-class OrOperator(object):
- """The boolean operator `or`."""
- __slots__ = ['lval', 'rval']
- def __init__(self, lval, rval):
- self.lval = lval
- self.rval = rval
- def __call__(self, kind, data, pos, namespaces, variables):
- lval = as_bool(self.lval(kind, data, pos, namespaces, variables))
- if lval:
- return True
- rval = self.rval(kind, data, pos, namespaces, variables)
- return as_bool(rval)
- def __repr__(self):
- return '%s or %s' % (self.lval, self.rval)
-
-class GreaterThanOperator(object):
- """The relational operator `>` (greater than)."""
- __slots__ = ['lval', 'rval']
- def __init__(self, lval, rval):
- self.lval = lval
- self.rval = rval
- def __call__(self, kind, data, pos, namespaces, variables):
- lval = self.lval(kind, data, pos, namespaces, variables)
- rval = self.rval(kind, data, pos, namespaces, variables)
- return as_float(lval) > as_float(rval)
- def __repr__(self):
- return '%s>%s' % (self.lval, self.rval)
-
-class GreaterThanOrEqualOperator(object):
- """The relational operator `>=` (greater than or equal)."""
- __slots__ = ['lval', 'rval']
- def __init__(self, lval, rval):
- self.lval = lval
- self.rval = rval
- def __call__(self, kind, data, pos, namespaces, variables):
- lval = self.lval(kind, data, pos, namespaces, variables)
- rval = self.rval(kind, data, pos, namespaces, variables)
- return as_float(lval) >= as_float(rval)
- def __repr__(self):
- return '%s>=%s' % (self.lval, self.rval)
-
-class LessThanOperator(object):
- """The relational operator `<` (less than)."""
- __slots__ = ['lval', 'rval']
- def __init__(self, lval, rval):
- self.lval = lval
- self.rval = rval
- def __call__(self, kind, data, pos, namespaces, variables):
- lval = self.lval(kind, data, pos, namespaces, variables)
- rval = self.rval(kind, data, pos, namespaces, variables)
- return as_float(lval) < as_float(rval)
- def __repr__(self):
- return '%s<%s' % (self.lval, self.rval)
-
-class LessThanOrEqualOperator(object):
- """The relational operator `<=` (less than or equal)."""
- __slots__ = ['lval', 'rval']
- def __init__(self, lval, rval):
- self.lval = lval
- self.rval = rval
- def __call__(self, kind, data, pos, namespaces, variables):
- lval = self.lval(kind, data, pos, namespaces, variables)
- rval = self.rval(kind, data, pos, namespaces, variables)
- return as_float(lval) <= as_float(rval)
- def __repr__(self):
- return '%s<=%s' % (self.lval, self.rval)
-
-_operator_map = {'=': EqualsOperator, '!=': NotEqualsOperator,
- '>': GreaterThanOperator, '>=': GreaterThanOrEqualOperator,
- '<': LessThanOperator, '>=': LessThanOrEqualOperator}
-
-
-_DOTSLASHSLASH = (DESCENDANT_OR_SELF, PrincipalTypeTest(None), ())
-_DOTSLASH = (SELF, PrincipalTypeTest(None), ())
diff --git a/genshi/template/__init__.py b/genshi/template/__init__.py
deleted file mode 100644
index 47a9310..0000000
--- a/genshi/template/__init__.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Copyright (C) 2006-2007 Edgewall Software
-# All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://genshi.edgewall.org/wiki/License.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For the exact contribution history, see the revision
-# history and logs, available at http://genshi.edgewall.org/log/.
-
-"""Implementation of the template engine."""
-
-from genshi.template.base import Context, Template, TemplateError, \
- TemplateRuntimeError, TemplateSyntaxError, \
- BadDirectiveError
-from genshi.template.loader import TemplateLoader, TemplateNotFound
-from genshi.template.markup import MarkupTemplate
-from genshi.template.text import TextTemplate, OldTextTemplate, NewTextTemplate
-
-__docformat__ = 'restructuredtext en'
diff --git a/genshi/template/_ast24.py b/genshi/template/_ast24.py
deleted file mode 100644
index 05d241b..0000000
--- a/genshi/template/_ast24.py
+++ /dev/null
@@ -1,446 +0,0 @@
-# Generated automatically, please do not edit
-# Generator can be found in Genshi SVN, scripts/ast-generator.py
-
-__version__ = 43614
-
-class AST(object):
- _fields = None
- __doc__ = None
-
-class operator(AST):
- _fields = None
- __doc__ = None
- _attributes = []
-class Add(operator):
- _fields = None
- __doc__ = None
-
-class boolop(AST):
- _fields = None
- __doc__ = None
- _attributes = []
-class And(boolop):
- _fields = None
- __doc__ = None
-
-class stmt(AST):
- _fields = None
- __doc__ = None
- _attributes = ['lineno', 'col_offset']
-class Assert(stmt):
- _fields = ('test', 'msg')
- __doc__ = None
-
-class Assign(stmt):
- _fields = ('targets', 'value')
- __doc__ = None
-
-class expr(AST):
- _fields = None
- __doc__ = None
- _attributes = ['lineno', 'col_offset']
-class Attribute(expr):
- _fields = ('value', 'attr', 'ctx')
- __doc__ = None
-
-class AugAssign(stmt):
- _fields = ('target', 'op', 'value')
- __doc__ = None
-
-class expr_context(AST):
- _fields = None
- __doc__ = None
- _attributes = []
-class AugLoad(expr_context):
- _fields = None
- __doc__ = None
-
-class AugStore(expr_context):
- _fields = None
- __doc__ = None
-
-class BinOp(expr):
- _fields = ('left', 'op', 'right')
- __doc__ = None
-
-class BitAnd(operator):
- _fields = None
- __doc__ = None
-
-class BitOr(operator):
- _fields = None
- __doc__ = None
-
-class BitXor(operator):
- _fields = None
- __doc__ = None
-
-class BoolOp(expr):
- _fields = ('op', 'values')
- __doc__ = None
-
-class Break(stmt):
- _fields = None
- __doc__ = None
-
-class Call(expr):
- _fields = ('func', 'args', 'keywords', 'starargs', 'kwargs')
- __doc__ = None
-
-class ClassDef(stmt):
- _fields = ('name', 'bases', 'body')
- __doc__ = None
-
-class Compare(expr):
- _fields = ('left', 'ops', 'comparators')
- __doc__ = None
-
-class Continue(stmt):
- _fields = None
- __doc__ = None
-
-class Del(expr_context):
- _fields = None
- __doc__ = None
-
-class Delete(stmt):
- _fields = ('targets',)
- __doc__ = None
-
-class Dict(expr):
- _fields = ('keys', 'values')
- __doc__ = None
-
-class Div(operator):
- _fields = None
- __doc__ = None
-
-class slice(AST):
- _fields = None
- __doc__ = None
- _attributes = []
-class Ellipsis(slice):
- _fields = None
- __doc__ = None
-
-class cmpop(AST):
- _fields = None
- __doc__ = None
- _attributes = []
-class Eq(cmpop):
- _fields = None
- __doc__ = None
-
-class Exec(stmt):
- _fields = ('body', 'globals', 'locals')
- __doc__ = None
-
-class Expr(stmt):
- _fields = ('value',)
- __doc__ = None
-
-class mod(AST):
- _fields = None
- __doc__ = None
- _attributes = []
-class Expression(mod):
- _fields = ('body',)
- __doc__ = None
-
-class ExtSlice(slice):
- _fields = ('dims',)
- __doc__ = None
-
-class FloorDiv(operator):
- _fields = None
- __doc__ = None
-
-class For(stmt):
- _fields = ('target', 'iter', 'body', 'orelse')
- __doc__ = None
-
-class FunctionDef(stmt):
- _fields = ('name', 'args', 'body', 'decorators')
- __doc__ = None
-
-class GeneratorExp(expr):
- _fields = ('elt', 'generators')
- __doc__ = None
-
-class Global(stmt):
- _fields = ('names',)
- __doc__ = None
-
-class Gt(cmpop):
- _fields = None
- __doc__ = None
-
-class GtE(cmpop):
- _fields = None
- __doc__ = None
-
-class If(stmt):
- _fields = ('test', 'body', 'orelse')
- __doc__ = None
-
-class IfExp(expr):
- _fields = ('test', 'body', 'orelse')
- __doc__ = None
-
-class Import(stmt):
- _fields = ('names',)
- __doc__ = None
-
-class ImportFrom(stmt):
- _fields = ('module', 'names', 'level')
- __doc__ = None
-
-class In(cmpop):
- _fields = None
- __doc__ = None
-
-class Index(slice):
- _fields = ('value',)
- __doc__ = None
-
-class Interactive(mod):
- _fields = ('body',)
- __doc__ = None
-
-class unaryop(AST):
- _fields = None
- __doc__ = None
- _attributes = []
-class Invert(unaryop):
- _fields = None
- __doc__ = None
-
-class Is(cmpop):
- _fields = None
- __doc__ = None
-
-class IsNot(cmpop):
- _fields = None
- __doc__ = None
-
-class LShift(operator):
- _fields = None
- __doc__ = None
-
-class Lambda(expr):
- _fields = ('args', 'body')
- __doc__ = None
-
-class List(expr):
- _fields = ('elts', 'ctx')
- __doc__ = None
-
-class ListComp(expr):
- _fields = ('elt', 'generators')
- __doc__ = None
-
-class Load(expr_context):
- _fields = None
- __doc__ = None
-
-class Lt(cmpop):
- _fields = None
- __doc__ = None
-
-class LtE(cmpop):
- _fields = None
- __doc__ = None
-
-class Mod(operator):
- _fields = None
- __doc__ = None
-
-class Module(mod):
- _fields = ('body',)
- __doc__ = None
-
-class Mult(operator):
- _fields = None
- __doc__ = None
-
-class Name(expr):
- _fields = ('id', 'ctx')
- __doc__ = None
-
-class Not(unaryop):
- _fields = None
- __doc__ = None
-
-class NotEq(cmpop):
- _fields = None
- __doc__ = None
-
-class NotIn(cmpop):
- _fields = None
- __doc__ = None
-
-class Num(expr):
- _fields = ('n',)
- __doc__ = None
-
-class Or(boolop):
- _fields = None
- __doc__ = None
-
-class Param(expr_context):
- _fields = None
- __doc__ = None
-
-class Pass(stmt):
- _fields = None
- __doc__ = None
-
-class Pow(operator):
- _fields = None
- __doc__ = None
-
-class Print(stmt):
- _fields = ('dest', 'values', 'nl')
- __doc__ = None
-
-class RShift(operator):
- _fields = None
- __doc__ = None
-
-class Raise(stmt):
- _fields = ('type', 'inst', 'tback')
- __doc__ = None
-
-class Repr(expr):
- _fields = ('value',)
- __doc__ = None
-
-class Return(stmt):
- _fields = ('value',)
- __doc__ = None
-
-class Slice(slice):
- _fields = ('lower', 'upper', 'step')
- __doc__ = None
-
-class Store(expr_context):
- _fields = None
- __doc__ = None
-
-class Str(expr):
- _fields = ('s',)
- __doc__ = None
-
-class Sub(operator):
- _fields = None
- __doc__ = None
-
-class Subscript(expr):
- _fields = ('value', 'slice', 'ctx')
- __doc__ = None
-
-class Suite(mod):
- _fields = ('body',)
- __doc__ = None
-
-class TryExcept(stmt):
- _fields = ('body', 'handlers', 'orelse')
- __doc__ = None
-
-class TryFinally(stmt):
- _fields = ('body', 'finalbody')
- __doc__ = None
-
-class Tuple(expr):
- _fields = ('elts', 'ctx')
- __doc__ = None
-
-class UAdd(unaryop):
- _fields = None
- __doc__ = None
-
-class USub(unaryop):
- _fields = None
- __doc__ = None
-
-class UnaryOp(expr):
- _fields = ('op', 'operand')
- __doc__ = None
-
-class While(stmt):
- _fields = ('test', 'body', 'orelse')
- __doc__ = None
-
-class With(stmt):
- _fields = ('context_expr', 'optional_vars', 'body')
- __doc__ = None
-
-class Yield(expr):
- _fields = ('value',)
- __doc__ = None
-
-class alias(AST):
- _fields = ('name', 'asname')
- __doc__ = None
-
-class arguments(AST):
- _fields = ('args', 'vararg', 'kwarg', 'defaults')
- __doc__ = None
-
-class boolop(AST):
- _fields = None
- __doc__ = None
- _attributes = []
-
-class cmpop(AST):
- _fields = None
- __doc__ = None
- _attributes = []
-
-class comprehension(AST):
- _fields = ('target', 'iter', 'ifs')
- __doc__ = None
-
-class excepthandler(AST):
- _fields = ('type', 'name', 'body', 'lineno', 'col_offset')
- __doc__ = None
-
-class expr(AST):
- _fields = None
- __doc__ = None
- _attributes = ['lineno', 'col_offset']
-
-class expr_context(AST):
- _fields = None
- __doc__ = None
- _attributes = []
-
-class keyword(AST):
- _fields = ('arg', 'value')
- __doc__ = None
-
-class mod(AST):
- _fields = None
- __doc__ = None
- _attributes = []
-
-class operator(AST):
- _fields = None
- __doc__ = None
- _attributes = []
-
-class slice(AST):
- _fields = None
- __doc__ = None
- _attributes = []
-
-class stmt(AST):
- _fields = None
- __doc__ = None
- _attributes = ['lineno', 'col_offset']
-
-class unaryop(AST):
- _fields = None
- __doc__ = None
- _attributes = []
-
diff --git a/genshi/template/ast24.py b/genshi/template/ast24.py
deleted file mode 100644
index af6dce9..0000000
--- a/genshi/template/ast24.py
+++ /dev/null
@@ -1,505 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Copyright (C) 2008-2009 Edgewall Software
-# All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://genshi.edgewall.org/wiki/License.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For the exact contribution history, see the revision
-# history and logs, available at http://genshi.edgewall.org/log/.
-
-"""Emulation of the proper abstract syntax tree API for Python 2.4."""
-
-import compiler
-import compiler.ast
-
-from genshi.template import _ast24 as _ast
-
-__all__ = ['_ast', 'parse']
-__docformat__ = 'restructuredtext en'
-
-
-def _new(cls, *args, **kwargs):
- ret = cls()
- if ret._fields:
- for attr, value in zip(ret._fields, args):
- if attr in kwargs:
- raise ValueError('Field set both in args and kwargs')
- setattr(ret, attr, value)
- for attr in kwargs:
- if (getattr(ret, '_fields', None) and attr in ret._fields) \
- or (getattr(ret, '_attributes', None) and
- attr in ret._attributes):
- setattr(ret, attr, kwargs[attr])
- return ret
-
-
-class ASTUpgrader(object):
- """Transformer changing structure of Python 2.4 ASTs to
- Python 2.5 ones.
-
- Transforms ``compiler.ast`` Abstract Syntax Tree to builtin ``_ast``.
- It can use fake`` _ast`` classes and this way allow ``_ast`` emulation
- in Python 2.4.
- """
-
- def __init__(self):
- self.out_flags = None
- self.lines = [-1]
-
- def _new(self, *args, **kwargs):
- return _new(lineno = self.lines[-1], *args, **kwargs)
-
- def visit(self, node):
- if node is None:
- return None
- if type(node) is tuple:
- return tuple([self.visit(n) for n in node])
- lno = getattr(node, 'lineno', None)
- if lno is not None:
- self.lines.append(lno)
- visitor = getattr(self, 'visit_%s' % node.__class__.__name__, None)
- if visitor is None:
- raise Exception('Unhandled node type %r' % type(node))
-
- retval = visitor(node)
- if lno is not None:
- self.lines.pop()
- return retval
-
- def visit_Module(self, node):
- body = self.visit(node.node)
- if node.doc:
- body = [self._new(_ast.Expr, self._new(_ast.Str, node.doc))] + body
- return self._new(_ast.Module, body)
-
- def visit_Expression(self, node):
- return self._new(_ast.Expression, self.visit(node.node))
-
- def _extract_args(self, node):
- tab = node.argnames[:]
- if node.flags & compiler.ast.CO_VARKEYWORDS:
- kwarg = tab[-1]
- tab = tab[:-1]
- else:
- kwarg = None
-
- if node.flags & compiler.ast.CO_VARARGS:
- vararg = tab[-1]
- tab = tab[:-1]
- else:
- vararg = None
-
- def _tup(t):
- if isinstance(t, str):
- return self._new(_ast.Name, t, _ast.Store())
- elif isinstance(t, tuple):
- elts = [_tup(x) for x in t]
- return self._new(_ast.Tuple, elts, _ast.Store())
- else:
- raise NotImplemented
-
- args = []
- for arg in tab:
- if isinstance(arg, str):
- args.append(self._new(_ast.Name, arg, _ast.Param()))
- elif isinstance(arg, tuple):
- args.append(_tup(arg))
- else:
- assert False, node.__class__
-
- defaults = [self.visit(d) for d in node.defaults]
- return self._new(_ast.arguments, args, vararg, kwarg, defaults)
-
-
- def visit_Function(self, node):
- if getattr(node, 'decorators', ()):
- decorators = [self.visit(d) for d in node.decorators.nodes]
- else:
- decorators = []
-
- args = self._extract_args(node)
- body = self.visit(node.code)
- if node.doc:
- body = [self._new(_ast.Expr, self._new(_ast.Str, node.doc))] + body
- return self._new(_ast.FunctionDef, node.name, args, body, decorators)
-
- def visit_Class(self, node):
- #self.name_types.append(_ast.Load)
- bases = [self.visit(b) for b in node.bases]
- #self.name_types.pop()
- body = self.visit(node.code)
- if node.doc:
- body = [self._new(_ast.Expr, self._new(_ast.Str, node.doc))] + body
- return self._new(_ast.ClassDef, node.name, bases, body)
-
- def visit_Return(self, node):
- return self._new(_ast.Return, self.visit(node.value))
-
- def visit_Assign(self, node):
- #self.name_types.append(_ast.Store)
- targets = [self.visit(t) for t in node.nodes]
- #self.name_types.pop()
- return self._new(_ast.Assign, targets, self.visit(node.expr))
-
- aug_operators = {
- '+=': _ast.Add,
- '/=': _ast.Div,
- '//=': _ast.FloorDiv,
- '<<=': _ast.LShift,
- '%=': _ast.Mod,
- '*=': _ast.Mult,
- '**=': _ast.Pow,
- '>>=': _ast.RShift,
- '-=': _ast.Sub,
- }
-
- def visit_AugAssign(self, node):
- target = self.visit(node.node)
-
- # Because it's AugAssign target can't be list nor tuple
- # so we only have to change context of one node
- target.ctx = _ast.Store()
- op = self.aug_operators[node.op]()
- return self._new(_ast.AugAssign, target, op, self.visit(node.expr))
-
- def _visit_Print(nl):
- def _visit(self, node):
- values = [self.visit(v) for v in node.nodes]
- return self._new(_ast.Print, self.visit(node.dest), values, nl)
- return _visit
-
- visit_Print = _visit_Print(False)
- visit_Printnl = _visit_Print(True)
- del _visit_Print
-
- def visit_For(self, node):
- return self._new(_ast.For, self.visit(node.assign), self.visit(node.list),
- self.visit(node.body), self.visit(node.else_))
-
- def visit_While(self, node):
- return self._new(_ast.While, self.visit(node.test), self.visit(node.body),
- self.visit(node.else_))
-
- def visit_If(self, node):
- def _level(tests, else_):
- test = self.visit(tests[0][0])
- body = self.visit(tests[0][1])
- if len(tests) == 1:
- orelse = self.visit(else_)
- else:
- orelse = [_level(tests[1:], else_)]
- return self._new(_ast.If, test, body, orelse)
- return _level(node.tests, node.else_)
-
- def visit_With(self, node):
- return self._new(_ast.With, self.visit(node.expr),
- self.visit(node.vars), self.visit(node.body))
-
- def visit_Raise(self, node):
- return self._new(_ast.Raise, self.visit(node.expr1),
- self.visit(node.expr2), self.visit(node.expr3))
-
- def visit_TryExcept(self, node):
- handlers = []
- for type, name, body in node.handlers:
- handlers.append(self._new(_ast.excepthandler, self.visit(type),
- self.visit(name), self.visit(body)))
- return self._new(_ast.TryExcept, self.visit(node.body),
- handlers, self.visit(node.else_))
-
- def visit_TryFinally(self, node):
- return self._new(_ast.TryFinally, self.visit(node.body),
- self.visit(node.final))
-
- def visit_Assert(self, node):
- return self._new(_ast.Assert, self.visit(node.test), self.visit(node.fail))
-
- def visit_Import(self, node):
- names = [self._new(_ast.alias, n[0], n[1]) for n in node.names]
- return self._new(_ast.Import, names)
-
- def visit_From(self, node):
- names = [self._new(_ast.alias, n[0], n[1]) for n in node.names]
- return self._new(_ast.ImportFrom, node.modname, names, 0)
-
- def visit_Exec(self, node):
- return self._new(_ast.Exec, self.visit(node.expr),
- self.visit(node.locals), self.visit(node.globals))
-
- def visit_Global(self, node):
- return self._new(_ast.Global, node.names[:])
-
- def visit_Discard(self, node):
- return self._new(_ast.Expr, self.visit(node.expr))
-
- def _map_class(to):
- def _visit(self, node):
- return self._new(to)
- return _visit
-
- visit_Pass = _map_class(_ast.Pass)
- visit_Break = _map_class(_ast.Break)
- visit_Continue = _map_class(_ast.Continue)
-
- def _visit_BinOperator(opcls):
- def _visit(self, node):
- return self._new(_ast.BinOp, self.visit(node.left),
- opcls(), self.visit(node.right))
- return _visit
- visit_Add = _visit_BinOperator(_ast.Add)
- visit_Div = _visit_BinOperator(_ast.Div)
- visit_FloorDiv = _visit_BinOperator(_ast.FloorDiv)
- visit_LeftShift = _visit_BinOperator(_ast.LShift)
- visit_Mod = _visit_BinOperator(_ast.Mod)
- visit_Mul = _visit_BinOperator(_ast.Mult)
- visit_Power = _visit_BinOperator(_ast.Pow)
- visit_RightShift = _visit_BinOperator(_ast.RShift)
- visit_Sub = _visit_BinOperator(_ast.Sub)
- del _visit_BinOperator
-
- def _visit_BitOperator(opcls):
- def _visit(self, node):
- def _make(nodes):
- if len(nodes) == 1:
- return self.visit(nodes[0])
- left = _make(nodes[:-1])
- right = self.visit(nodes[-1])
- return self._new(_ast.BinOp, left, opcls(), right)
- return _make(node.nodes)
- return _visit
- visit_Bitand = _visit_BitOperator(_ast.BitAnd)
- visit_Bitor = _visit_BitOperator(_ast.BitOr)
- visit_Bitxor = _visit_BitOperator(_ast.BitXor)
- del _visit_BitOperator
-
- def _visit_UnaryOperator(opcls):
- def _visit(self, node):
- return self._new(_ast.UnaryOp, opcls(), self.visit(node.expr))
- return _visit
-
- visit_Invert = _visit_UnaryOperator(_ast.Invert)
- visit_Not = _visit_UnaryOperator(_ast.Not)
- visit_UnaryAdd = _visit_UnaryOperator(_ast.UAdd)
- visit_UnarySub = _visit_UnaryOperator(_ast.USub)
- del _visit_UnaryOperator
-
- def _visit_BoolOperator(opcls):
- def _visit(self, node):
- values = [self.visit(n) for n in node.nodes]
- return self._new(_ast.BoolOp, opcls(), values)
- return _visit
- visit_And = _visit_BoolOperator(_ast.And)
- visit_Or = _visit_BoolOperator(_ast.Or)
- del _visit_BoolOperator
-
- cmp_operators = {
- '==': _ast.Eq,
- '!=': _ast.NotEq,
- '<': _ast.Lt,
- '<=': _ast.LtE,
- '>': _ast.Gt,
- '>=': _ast.GtE,
- 'is': _ast.Is,
- 'is not': _ast.IsNot,
- 'in': _ast.In,
- 'not in': _ast.NotIn,
- }
-
- def visit_Compare(self, node):
- left = self.visit(node.expr)
- ops = []
- comparators = []
- for optype, expr in node.ops:
- ops.append(self.cmp_operators[optype]())
- comparators.append(self.visit(expr))
- return self._new(_ast.Compare, left, ops, comparators)
-
- def visit_Lambda(self, node):
- args = self._extract_args(node)
- body = self.visit(node.code)
- return self._new(_ast.Lambda, args, body)
-
- def visit_IfExp(self, node):
- return self._new(_ast.IfExp, self.visit(node.test), self.visit(node.then),
- self.visit(node.else_))
-
- def visit_Dict(self, node):
- keys = [self.visit(x[0]) for x in node.items]
- values = [self.visit(x[1]) for x in node.items]
- return self._new(_ast.Dict, keys, values)
-
- def visit_ListComp(self, node):
- generators = [self.visit(q) for q in node.quals]
- return self._new(_ast.ListComp, self.visit(node.expr), generators)
-
- def visit_GenExprInner(self, node):
- generators = [self.visit(q) for q in node.quals]
- return self._new(_ast.GeneratorExp, self.visit(node.expr), generators)
-
- def visit_GenExpr(self, node):
- return self.visit(node.code)
-
- def visit_GenExprFor(self, node):
- ifs = [self.visit(i) for i in node.ifs]
- return self._new(_ast.comprehension, self.visit(node.assign),
- self.visit(node.iter), ifs)
-
- def visit_ListCompFor(self, node):
- ifs = [self.visit(i) for i in node.ifs]
- return self._new(_ast.comprehension, self.visit(node.assign),
- self.visit(node.list), ifs)
-
- def visit_GenExprIf(self, node):
- return self.visit(node.test)
- visit_ListCompIf = visit_GenExprIf
-
- def visit_Yield(self, node):
- return self._new(_ast.Yield, self.visit(node.value))
-
- def visit_CallFunc(self, node):
- args = []
- keywords = []
- for arg in node.args:
- if isinstance(arg, compiler.ast.Keyword):
- keywords.append(self._new(_ast.keyword, arg.name,
- self.visit(arg.expr)))
- else:
- args.append(self.visit(arg))
- return self._new(_ast.Call, self.visit(node.node), args, keywords,
- self.visit(node.star_args), self.visit(node.dstar_args))
-
- def visit_Backquote(self, node):
- return self._new(_ast.Repr, self.visit(node.expr))
-
- def visit_Const(self, node):
- if node.value is None: # appears in slices
- return None
- elif isinstance(node.value, basestring):
- return self._new(_ast.Str, node.value)
- else:
- return self._new(_ast.Num, node.value)
-
- def visit_Name(self, node):
- return self._new(_ast.Name, node.name, _ast.Load())
-
- def visit_Getattr(self, node):
- return self._new(_ast.Attribute, self.visit(node.expr), node.attrname,
- _ast.Load())
-
- def visit_Tuple(self, node):
- nodes = [self.visit(n) for n in node.nodes]
- return self._new(_ast.Tuple, nodes, _ast.Load())
-
- def visit_List(self, node):
- nodes = [self.visit(n) for n in node.nodes]
- return self._new(_ast.List, nodes, _ast.Load())
-
- def get_ctx(self, flags):
- if flags == 'OP_DELETE':
- return _ast.Del()
- elif flags == 'OP_APPLY':
- return _ast.Load()
- elif flags == 'OP_ASSIGN':
- return _ast.Store()
- else:
- # FIXME Exception here
- assert False, repr(flags)
-
- def visit_AssName(self, node):
- self.out_flags = node.flags
- ctx = self.get_ctx(node.flags)
- return self._new(_ast.Name, node.name, ctx)
-
- def visit_AssAttr(self, node):
- self.out_flags = node.flags
- ctx = self.get_ctx(node.flags)
- return self._new(_ast.Attribute, self.visit(node.expr),
- node.attrname, ctx)
-
- def _visit_AssCollection(cls):
- def _visit(self, node):
- flags = None
- elts = []
- for n in node.nodes:
- elts.append(self.visit(n))
- if flags is None:
- flags = self.out_flags
- else:
- assert flags == self.out_flags
- self.out_flags = flags
- ctx = self.get_ctx(flags)
- return self._new(cls, elts, ctx)
- return _visit
-
- visit_AssList = _visit_AssCollection(_ast.List)
- visit_AssTuple = _visit_AssCollection(_ast.Tuple)
- del _visit_AssCollection
-
- def visit_Slice(self, node):
- lower = self.visit(node.lower)
- upper = self.visit(node.upper)
- ctx = self.get_ctx(node.flags)
- self.out_flags = node.flags
- return self._new(_ast.Subscript, self.visit(node.expr),
- self._new(_ast.Slice, lower, upper, None), ctx)
-
- def visit_Subscript(self, node):
- ctx = self.get_ctx(node.flags)
- subs = [self.visit(s) for s in node.subs]
-
- advanced = (_ast.Slice, _ast.Ellipsis)
- slices = []
- nonindex = False
- for sub in subs:
- if isinstance(sub, advanced):
- nonindex = True
- slices.append(sub)
- else:
- slices.append(self._new(_ast.Index, sub))
- if len(slices) == 1:
- slice = slices[0]
- elif nonindex:
- slice = self._new(_ast.ExtSlice, slices)
- else:
- slice = self._new(_ast.Tuple, slices, _ast.Load())
-
- self.out_flags = node.flags
- return self._new(_ast.Subscript, self.visit(node.expr), slice, ctx)
-
- def visit_Sliceobj(self, node):
- a = [self.visit(n) for n in node.nodes + [None]*(3 - len(node.nodes))]
- return self._new(_ast.Slice, a[0], a[1], a[2])
-
- def visit_Ellipsis(self, node):
- return self._new(_ast.Ellipsis)
-
- def visit_Stmt(self, node):
- def _check_del(n):
- # del x is just AssName('x', 'OP_DELETE')
- # we want to transform it to Delete([Name('x', Del())])
- dcls = (_ast.Name, _ast.List, _ast.Subscript, _ast.Attribute)
- if isinstance(n, dcls) and isinstance(n.ctx, _ast.Del):
- return self._new(_ast.Delete, [n])
- elif isinstance(n, _ast.Tuple) and isinstance(n.ctx, _ast.Del):
- # unpack last tuple to avoid making del (x, y, z,);
- # out of del x, y, z; (there's no difference between
- # this two in compiler.ast)
- return self._new(_ast.Delete, n.elts)
- else:
- return n
- def _keep(n):
- if isinstance(n, _ast.Expr) and n.value is None:
- return False
- else:
- return True
- return [s for s in [_check_del(self.visit(n)) for n in node.nodes]
- if _keep(s)]
-
-
-def parse(source, mode):
- node = compiler.parse(source, mode)
- return ASTUpgrader().visit(node)
diff --git a/genshi/template/astutil.py b/genshi/template/astutil.py
deleted file mode 100644
index c3ad107..0000000
--- a/genshi/template/astutil.py
+++ /dev/null
@@ -1,784 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Copyright (C) 2008-2010 Edgewall Software
-# All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://genshi.edgewall.org/wiki/License.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For the exact contribution history, see the revision
-# history and logs, available at http://genshi.edgewall.org/log/.
-
-"""Support classes for generating code from abstract syntax trees."""
-
-try:
- import _ast
-except ImportError:
- from genshi.template.ast24 import _ast, parse
-else:
- def parse(source, mode):
- return compile(source, '', mode, _ast.PyCF_ONLY_AST)
-
-
-__docformat__ = 'restructuredtext en'
-
-
-class ASTCodeGenerator(object):
- """General purpose base class for AST transformations.
-
- Every visitor method can be overridden to return an AST node that has been
- altered or replaced in some way.
- """
- def __init__(self, tree):
- self.lines_info = []
- self.line_info = None
- self.code = ''
- self.line = None
- self.last = None
- self.indent = 0
- self.blame_stack = []
- self.visit(tree)
- if self.line.strip():
- self.code += self.line + '\n'
- self.lines_info.append(self.line_info)
- self.line = None
- self.line_info = None
-
- def _change_indent(self, delta):
- self.indent += delta
-
- def _new_line(self):
- if self.line is not None:
- self.code += self.line + '\n'
- self.lines_info.append(self.line_info)
- self.line = ' '*4*self.indent
- if len(self.blame_stack) == 0:
- self.line_info = []
- self.last = None
- else:
- self.line_info = [(0, self.blame_stack[-1],)]
- self.last = self.blame_stack[-1]
-
- def _write(self, s):
- if len(s) == 0:
- return
- if len(self.blame_stack) == 0:
- if self.last is not None:
- self.last = None
- self.line_info.append((len(self.line), self.last))
- else:
- if self.last != self.blame_stack[-1]:
- self.last = self.blame_stack[-1]
- self.line_info.append((len(self.line), self.last))
- self.line += s
-
- def visit(self, node):
- if node is None:
- return None
- if type(node) is tuple:
- return tuple([self.visit(n) for n in node])
- try:
- self.blame_stack.append((node.lineno, node.col_offset,))
- info = True
- except AttributeError:
- info = False
- visitor = getattr(self, 'visit_%s' % node.__class__.__name__, None)
- if visitor is None:
- raise Exception('Unhandled node type %r' % type(node))
- ret = visitor(node)
- if info:
- self.blame_stack.pop()
- return ret
-
- def visit_Module(self, node):
- for n in node.body:
- self.visit(n)
- visit_Interactive = visit_Module
- visit_Suite = visit_Module
-
- def visit_Expression(self, node):
- self._new_line()
- return self.visit(node.body)
-
- # arguments = (expr* args, identifier? vararg,
- # identifier? kwarg, expr* defaults)
- def visit_arguments(self, node):
- first = True
- no_default_count = len(node.args) - len(node.defaults)
- for i, arg in enumerate(node.args):
- if not first:
- self._write(', ')
- else:
- first = False
- self.visit(arg)
- if i >= no_default_count:
- self._write('=')
- self.visit(node.defaults[i - no_default_count])
- if getattr(node, 'vararg', None):
- if not first:
- self._write(', ')
- else:
- first = False
- self._write('*' + node.vararg)
- if getattr(node, 'kwarg', None):
- if not first:
- self._write(', ')
- else:
- first = False
- self._write('**' + node.kwarg)
-
- # FunctionDef(identifier name, arguments args,
- # stmt* body, expr* decorator_list)
- def visit_FunctionDef(self, node):
- decarators = ()
- if hasattr(node, 'decorator_list'):
- decorators = getattr(node, 'decorator_list')
- else: # different name in earlier Python versions
- decorators = getattr(node, 'decorators', ())
- for decorator in decorators:
- self._new_line()
- self._write('@')
- self.visit(decorator)
- self._new_line()
- self._write('def ' + node.name + '(')
- self.visit(node.args)
- self._write('):')
- self._change_indent(1)
- for statement in node.body:
- self.visit(statement)
- self._change_indent(-1)
-
- # ClassDef(identifier name, expr* bases, stmt* body)
- def visit_ClassDef(self, node):
- self._new_line()
- self._write('class ' + node.name)
- if node.bases:
- self._write('(')
- self.visit(node.bases[0])
- for base in node.bases[1:]:
- self._write(', ')
- self.visit(base)
- self._write(')')
- self._write(':')
- self._change_indent(1)
- for statement in node.body:
- self.visit(statement)
- self._change_indent(-1)
-
- # Return(expr? value)
- def visit_Return(self, node):
- self._new_line()
- self._write('return')
- if getattr(node, 'value', None):
- self._write(' ')
- self.visit(node.value)
-
- # Delete(expr* targets)
- def visit_Delete(self, node):
- self._new_line()
- self._write('del ')
- self.visit(node.targets[0])
- for target in node.targets[1:]:
- self._write(', ')
- self.visit(target)
-
- # Assign(expr* targets, expr value)
- def visit_Assign(self, node):
- self._new_line()
- for target in node.targets:
- self.visit(target)
- self._write(' = ')
- self.visit(node.value)
-
- # AugAssign(expr target, operator op, expr value)
- def visit_AugAssign(self, node):
- self._new_line()
- self.visit(node.target)
- self._write(' ' + self.binary_operators[node.op.__class__] + '= ')
- self.visit(node.value)
-
- # Print(expr? dest, expr* values, bool nl)
- def visit_Print(self, node):
- self._new_line()
- self._write('print')
- if getattr(node, 'dest', None):
- self._write(' >> ')
- self.visit(node.dest)
- if getattr(node, 'values', None):
- self._write(', ')
- else:
- self._write(' ')
- if getattr(node, 'values', None):
- self.visit(node.values[0])
- for value in node.values[1:]:
- self._write(', ')
- self.visit(value)
- if not node.nl:
- self._write(',')
-
- # For(expr target, expr iter, stmt* body, stmt* orelse)
- def visit_For(self, node):
- self._new_line()
- self._write('for ')
- self.visit(node.target)
- self._write(' in ')
- self.visit(node.iter)
- self._write(':')
- self._change_indent(1)
- for statement in node.body:
- self.visit(statement)
- self._change_indent(-1)
- if getattr(node, 'orelse', None):
- self._new_line()
- self._write('else:')
- self._change_indent(1)
- for statement in node.orelse:
- self.visit(statement)
- self._change_indent(-1)
-
- # While(expr test, stmt* body, stmt* orelse)
- def visit_While(self, node):
- self._new_line()
- self._write('while ')
- self.visit(node.test)
- self._write(':')
- self._change_indent(1)
- for statement in node.body:
- self.visit(statement)
- self._change_indent(-1)
- if getattr(node, 'orelse', None):
- self._new_line()
- self._write('else:')
- self._change_indent(1)
- for statement in node.orelse:
- self.visit(statement)
- self._change_indent(-1)
-
- # If(expr test, stmt* body, stmt* orelse)
- def visit_If(self, node):
- self._new_line()
- self._write('if ')
- self.visit(node.test)
- self._write(':')
- self._change_indent(1)
- for statement in node.body:
- self.visit(statement)
- self._change_indent(-1)
- if getattr(node, 'orelse', None):
- self._new_line()
- self._write('else:')
- self._change_indent(1)
- for statement in node.orelse:
- self.visit(statement)
- self._change_indent(-1)
-
- # With(expr context_expr, expr? optional_vars, stmt* body)
- def visit_With(self, node):
- self._new_line()
- self._write('with ')
- self.visit(node.context_expr)
- if getattr(node, 'optional_vars', None):
- self._write(' as ')
- self.visit(node.optional_vars)
- self._write(':')
- self._change_indent(1)
- for statement in node.body:
- self.visit(statement)
- self._change_indent(-1)
-
-
- # Raise(expr? type, expr? inst, expr? tback)
- def visit_Raise(self, node):
- self._new_line()
- self._write('raise')
- if not node.type:
- return
- self._write(' ')
- self.visit(node.type)
- if not node.inst:
- return
- self._write(', ')
- self.visit(node.inst)
- if not node.tback:
- return
- self._write(', ')
- self.visit(node.tback)
-
- # TryExcept(stmt* body, excepthandler* handlers, stmt* orelse)
- def visit_TryExcept(self, node):
- self._new_line()
- self._write('try:')
- self._change_indent(1)
- for statement in node.body:
- self.visit(statement)
- self._change_indent(-1)
- if getattr(node, 'handlers', None):
- for handler in node.handlers:
- self.visit(handler)
- self._new_line()
- if getattr(node, 'orelse', None):
- self._write('else:')
- self._change_indent(1)
- for statement in node.orelse:
- self.visit(statement)
- self._change_indent(-1)
-
- # excepthandler = (expr? type, expr? name, stmt* body)
- def visit_ExceptHandler(self, node):
- self._new_line()
- self._write('except')
- if getattr(node, 'type', None):
- self._write(' ')
- self.visit(node.type)
- if getattr(node, 'name', None):
- self._write(', ')
- self.visit(node.name)
- self._write(':')
- self._change_indent(1)
- for statement in node.body:
- self.visit(statement)
- self._change_indent(-1)
- visit_excepthandler = visit_ExceptHandler
-
- # TryFinally(stmt* body, stmt* finalbody)
- def visit_TryFinally(self, node):
- self._new_line()
- self._write('try:')
- self._change_indent(1)
- for statement in node.body:
- self.visit(statement)
- self._change_indent(-1)
-
- if getattr(node, 'finalbody', None):
- self._new_line()
- self._write('finally:')
- self._change_indent(1)
- for statement in node.finalbody:
- self.visit(statement)
- self._change_indent(-1)
-
- # Assert(expr test, expr? msg)
- def visit_Assert(self, node):
- self._new_line()
- self._write('assert ')
- self.visit(node.test)
- if getattr(node, 'msg', None):
- self._write(', ')
- self.visit(node.msg)
-
- def visit_alias(self, node):
- self._write(node.name)
- if getattr(node, 'asname', None):
- self._write(' as ')
- self._write(node.asname)
-
- # Import(alias* names)
- def visit_Import(self, node):
- self._new_line()
- self._write('import ')
- self.visit(node.names[0])
- for name in node.names[1:]:
- self._write(', ')
- self.visit(name)
-
- # ImportFrom(identifier module, alias* names, int? level)
- def visit_ImportFrom(self, node):
- self._new_line()
- self._write('from ')
- if node.level:
- self._write('.' * node.level)
- self._write(node.module)
- self._write(' import ')
- self.visit(node.names[0])
- for name in node.names[1:]:
- self._write(', ')
- self.visit(name)
-
- # Exec(expr body, expr? globals, expr? locals)
- def visit_Exec(self, node):
- self._new_line()
- self._write('exec ')
- self.visit(node.body)
- if not node.globals:
- return
- self._write(', ')
- self.visit(node.globals)
- if not node.locals:
- return
- self._write(', ')
- self.visit(node.locals)
-
- # Global(identifier* names)
- def visit_Global(self, node):
- self._new_line()
- self._write('global ')
- self.visit(node.names[0])
- for name in node.names[1:]:
- self._write(', ')
- self.visit(name)
-
- # Expr(expr value)
- def visit_Expr(self, node):
- self._new_line()
- self.visit(node.value)
-
- # Pass
- def visit_Pass(self, node):
- self._new_line()
- self._write('pass')
-
- # Break
- def visit_Break(self, node):
- self._new_line()
- self._write('break')
-
- # Continue
- def visit_Continue(self, node):
- self._new_line()
- self._write('continue')
-
- ### EXPRESSIONS
- def with_parens(f):
- def _f(self, node):
- self._write('(')
- f(self, node)
- self._write(')')
- return _f
-
- bool_operators = {_ast.And: 'and', _ast.Or: 'or'}
-
- # BoolOp(boolop op, expr* values)
- @with_parens
- def visit_BoolOp(self, node):
- joiner = ' ' + self.bool_operators[node.op.__class__] + ' '
- self.visit(node.values[0])
- for value in node.values[1:]:
- self._write(joiner)
- self.visit(value)
-
- binary_operators = {
- _ast.Add: '+',
- _ast.Sub: '-',
- _ast.Mult: '*',
- _ast.Div: '/',
- _ast.Mod: '%',
- _ast.Pow: '**',
- _ast.LShift: '<<',
- _ast.RShift: '>>',
- _ast.BitOr: '|',
- _ast.BitXor: '^',
- _ast.BitAnd: '&',
- _ast.FloorDiv: '//'
- }
-
- # BinOp(expr left, operator op, expr right)
- @with_parens
- def visit_BinOp(self, node):
- self.visit(node.left)
- self._write(' ' + self.binary_operators[node.op.__class__] + ' ')
- self.visit(node.right)
-
- unary_operators = {
- _ast.Invert: '~',
- _ast.Not: 'not',
- _ast.UAdd: '+',
- _ast.USub: '-',
- }
-
- # UnaryOp(unaryop op, expr operand)
- def visit_UnaryOp(self, node):
- self._write(self.unary_operators[node.op.__class__] + ' ')
- self.visit(node.operand)
-
- # Lambda(arguments args, expr body)
- @with_parens
- def visit_Lambda(self, node):
- self._write('lambda ')
- self.visit(node.args)
- self._write(': ')
- self.visit(node.body)
-
- # IfExp(expr test, expr body, expr orelse)
- @with_parens
- def visit_IfExp(self, node):
- self.visit(node.body)
- self._write(' if ')
- self.visit(node.test)
- self._write(' else ')
- self.visit(node.orelse)
-
- # Dict(expr* keys, expr* values)
- def visit_Dict(self, node):
- self._write('{')
- for key, value in zip(node.keys, node.values):
- self.visit(key)
- self._write(': ')
- self.visit(value)
- self._write(', ')
- self._write('}')
-
- # ListComp(expr elt, comprehension* generators)
- def visit_ListComp(self, node):
- self._write('[')
- self.visit(node.elt)
- for generator in node.generators:
- # comprehension = (expr target, expr iter, expr* ifs)
- self._write(' for ')
- self.visit(generator.target)
- self._write(' in ')
- self.visit(generator.iter)
- for ifexpr in generator.ifs:
- self._write(' if ')
- self.visit(ifexpr)
- self._write(']')
-
- # GeneratorExp(expr elt, comprehension* generators)
- def visit_GeneratorExp(self, node):
- self._write('(')
- self.visit(node.elt)
- for generator in node.generators:
- # comprehension = (expr target, expr iter, expr* ifs)
- self._write(' for ')
- self.visit(generator.target)
- self._write(' in ')
- self.visit(generator.iter)
- for ifexpr in generator.ifs:
- self._write(' if ')
- self.visit(ifexpr)
- self._write(')')
-
- # Yield(expr? value)
- def visit_Yield(self, node):
- self._write('yield')
- if getattr(node, 'value', None):
- self._write(' ')
- self.visit(node.value)
-
- comparision_operators = {
- _ast.Eq: '==',
- _ast.NotEq: '!=',
- _ast.Lt: '<',
- _ast.LtE: '<=',
- _ast.Gt: '>',
- _ast.GtE: '>=',
- _ast.Is: 'is',
- _ast.IsNot: 'is not',
- _ast.In: 'in',
- _ast.NotIn: 'not in',
- }
-
- # Compare(expr left, cmpop* ops, expr* comparators)
- @with_parens
- def visit_Compare(self, node):
- self.visit(node.left)
- for op, comparator in zip(node.ops, node.comparators):
- self._write(' ' + self.comparision_operators[op.__class__] + ' ')
- self.visit(comparator)
-
- # Call(expr func, expr* args, keyword* keywords,
- # expr? starargs, expr? kwargs)
- def visit_Call(self, node):
- self.visit(node.func)
- self._write('(')
- first = True
- for arg in node.args:
- if not first:
- self._write(', ')
- first = False
- self.visit(arg)
-
- for keyword in node.keywords:
- if not first:
- self._write(', ')
- first = False
- # keyword = (identifier arg, expr value)
- self._write(keyword.arg)
- self._write('=')
- self.visit(keyword.value)
- if getattr(node, 'starargs', None):
- if not first:
- self._write(', ')
- first = False
- self._write('*')
- self.visit(node.starargs)
-
- if getattr(node, 'kwargs', None):
- if not first:
- self._write(', ')
- first = False
- self._write('**')
- self.visit(node.kwargs)
- self._write(')')
-
- # Repr(expr value)
- def visit_Repr(self, node):
- self._write('`')
- self.visit(node.value)
- self._write('`')
-
- # Num(object n)
- def visit_Num(self, node):
- self._write(repr(node.n))
-
- # Str(string s)
- def visit_Str(self, node):
- self._write(repr(node.s))
-
- # Attribute(expr value, identifier attr, expr_context ctx)
- def visit_Attribute(self, node):
- self.visit(node.value)
- self._write('.')
- self._write(node.attr)
-
- # Subscript(expr value, slice slice, expr_context ctx)
- def visit_Subscript(self, node):
- self.visit(node.value)
- self._write('[')
- def _process_slice(node):
- if isinstance(node, _ast.Ellipsis):
- self._write('...')
- elif isinstance(node, _ast.Slice):
- if getattr(node, 'lower', 'None'):
- self.visit(node.lower)
- self._write(':')
- if getattr(node, 'upper', None):
- self.visit(node.upper)
- if getattr(node, 'step', None):
- self._write(':')
- self.visit(node.step)
- elif isinstance(node, _ast.Index):
- self.visit(node.value)
- elif isinstance(node, _ast.ExtSlice):
- self.visit(node.dims[0])
- for dim in node.dims[1:]:
- self._write(', ')
- self.visit(dim)
- else:
- raise NotImplemented('Slice type not implemented')
- _process_slice(node.slice)
- self._write(']')
-
- # Name(identifier id, expr_context ctx)
- def visit_Name(self, node):
- self._write(node.id)
-
- # List(expr* elts, expr_context ctx)
- def visit_List(self, node):
- self._write('[')
- for elt in node.elts:
- self.visit(elt)
- self._write(', ')
- self._write(']')
-
- # Tuple(expr *elts, expr_context ctx)
- def visit_Tuple(self, node):
- self._write('(')
- for elt in node.elts:
- self.visit(elt)
- self._write(', ')
- self._write(')')
-
-
-class ASTTransformer(object):
- """General purpose base class for AST transformations.
-
- Every visitor method can be overridden to return an AST node that has been
- altered or replaced in some way.
- """
-
- def visit(self, node):
- if node is None:
- return None
- if type(node) is tuple:
- return tuple([self.visit(n) for n in node])
- visitor = getattr(self, 'visit_%s' % node.__class__.__name__, None)
- if visitor is None:
- return node
- return visitor(node)
-
- def _clone(self, node):
- clone = node.__class__()
- for name in getattr(clone, '_attributes', ()):
- try:
- setattr(clone, 'name', getattr(node, name))
- except AttributeError:
- pass
- for name in clone._fields:
- try:
- value = getattr(node, name)
- except AttributeError:
- pass
- else:
- if value is None:
- pass
- elif isinstance(value, list):
- value = [self.visit(x) for x in value]
- elif isinstance(value, tuple):
- value = tuple(self.visit(x) for x in value)
- else:
- value = self.visit(value)
- setattr(clone, name, value)
- return clone
-
- visit_Module = _clone
- visit_Interactive = _clone
- visit_Expression = _clone
- visit_Suite = _clone
-
- visit_FunctionDef = _clone
- visit_ClassDef = _clone
- visit_Return = _clone
- visit_Delete = _clone
- visit_Assign = _clone
- visit_AugAssign = _clone
- visit_Print = _clone
- visit_For = _clone
- visit_While = _clone
- visit_If = _clone
- visit_With = _clone
- visit_Raise = _clone
- visit_TryExcept = _clone
- visit_TryFinally = _clone
- visit_Assert = _clone
- visit_ExceptHandler = _clone
-
- visit_Import = _clone
- visit_ImportFrom = _clone
- visit_Exec = _clone
- visit_Global = _clone
- visit_Expr = _clone
- # Pass, Break, Continue don't need to be copied
-
- visit_BoolOp = _clone
- visit_BinOp = _clone
- visit_UnaryOp = _clone
- visit_Lambda = _clone
- visit_IfExp = _clone
- visit_Dict = _clone
- visit_ListComp = _clone
- visit_GeneratorExp = _clone
- visit_Yield = _clone
- visit_Compare = _clone
- visit_Call = _clone
- visit_Repr = _clone
- # Num, Str don't need to be copied
-
- visit_Attribute = _clone
- visit_Subscript = _clone
- visit_Name = _clone
- visit_List = _clone
- visit_Tuple = _clone
-
- visit_comprehension = _clone
- visit_excepthandler = _clone
- visit_arguments = _clone
- visit_keyword = _clone
- visit_alias = _clone
-
- visit_Slice = _clone
- visit_ExtSlice = _clone
- visit_Index = _clone
-
- del _clone
diff --git a/genshi/template/base.py b/genshi/template/base.py
deleted file mode 100644
index 202faae..0000000
--- a/genshi/template/base.py
+++ /dev/null
@@ -1,634 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Copyright (C) 2006-2010 Edgewall Software
-# All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://genshi.edgewall.org/wiki/License.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For the exact contribution history, see the revision
-# history and logs, available at http://genshi.edgewall.org/log/.
-
-"""Basic templating functionality."""
-
-from collections import deque
-import os
-from StringIO import StringIO
-import sys
-
-from genshi.core import Attrs, Stream, StreamEventKind, START, TEXT, _ensure
-from genshi.input import ParseError
-
-__all__ = ['Context', 'DirectiveFactory', 'Template', 'TemplateError',
- 'TemplateRuntimeError', 'TemplateSyntaxError', 'BadDirectiveError']
-__docformat__ = 'restructuredtext en'
-
-
-class TemplateError(Exception):
- """Base exception class for errors related to template processing."""
-
- def __init__(self, message, filename=None, lineno=-1, offset=-1):
- """Create the exception.
-
- :param message: the error message
- :param filename: the filename of the template
- :param lineno: the number of line in the template at which the error
- occurred
- :param offset: the column number at which the error occurred
- """
- if filename is None:
- filename = '<string>'
- self.msg = message #: the error message string
- if filename != '<string>' or lineno >= 0:
- message = '%s (%s, line %d)' % (self.msg, filename, lineno)
- Exception.__init__(self, message)
- self.filename = filename #: the name of the template file
- self.lineno = lineno #: the number of the line containing the error
- self.offset = offset #: the offset on the line
-
-
-class TemplateSyntaxError(TemplateError):
- """Exception raised when an expression in a template causes a Python syntax
- error, or the template is not well-formed.
- """
-
- def __init__(self, message, filename=None, lineno=-1, offset=-1):
- """Create the exception
-
- :param message: the error message
- :param filename: the filename of the template
- :param lineno: the number of line in the template at which the error
- occurred
- :param offset: the column number at which the error occurred
- """
- if isinstance(message, SyntaxError) and message.lineno is not None:
- message = str(message).replace(' (line %d)' % message.lineno, '')
- TemplateError.__init__(self, message, filename, lineno)
-
-
-class BadDirectiveError(TemplateSyntaxError):
- """Exception raised when an unknown directive is encountered when parsing
- a template.
-
- An unknown directive is any attribute using the namespace for directives,
- with a local name that doesn't match any registered directive.
- """
-
- def __init__(self, name, filename=None, lineno=-1):
- """Create the exception
-
- :param name: the name of the directive
- :param filename: the filename of the template
- :param lineno: the number of line in the template at which the error
- occurred
- """
- TemplateSyntaxError.__init__(self, 'bad directive "%s"' % name,
- filename, lineno)
-
-
-class TemplateRuntimeError(TemplateError):
- """Exception raised when an the evaluation of a Python expression in a
- template causes an error.
- """
-
-
-class Context(object):
- """Container for template input data.
-
- A context provides a stack of scopes (represented by dictionaries).
-
- Template directives such as loops can push a new scope on the stack with
- data that should only be available inside the loop. When the loop
- terminates, that scope can get popped off the stack again.
-
- >>> ctxt = Context(one='foo', other=1)
- >>> ctxt.get('one')
- 'foo'
- >>> ctxt.get('other')
- 1
- >>> ctxt.push(dict(one='frost'))
- >>> ctxt.get('one')
- 'frost'
- >>> ctxt.get('other')
- 1
- >>> ctxt.pop()
- {'one': 'frost'}
- >>> ctxt.get('one')
- 'foo'
- """
-
- def __init__(self, **data):
- """Initialize the template context with the given keyword arguments as
- data.
- """
- self.frames = deque([data])
- self.pop = self.frames.popleft
- self.push = self.frames.appendleft
- self._match_templates = []
- self._choice_stack = []
-
- # Helper functions for use in expressions
- def defined(name):
- """Return whether a variable with the specified name exists in the
- expression scope."""
- return name in self
- def value_of(name, default=None):
- """If a variable of the specified name is defined, return its value.
- Otherwise, return the provided default value, or ``None``."""
- return self.get(name, default)
- data.setdefault('defined', defined)
- data.setdefault('value_of', value_of)
-
- def __repr__(self):
- return repr(list(self.frames))
-
- def __contains__(self, key):
- """Return whether a variable exists in any of the scopes.
-
- :param key: the name of the variable
- """
- return self._find(key)[1] is not None
- has_key = __contains__
-
- def __delitem__(self, key):
- """Remove a variable from all scopes.
-
- :param key: the name of the variable
- """
- for frame in self.frames:
- if key in frame:
- del frame[key]
-
- def __getitem__(self, key):
- """Get a variables's value, starting at the current scope and going
- upward.
-
- :param key: the name of the variable
- :return: the variable value
- :raises KeyError: if the requested variable wasn't found in any scope
- """
- value, frame = self._find(key)
- if frame is None:
- raise KeyError(key)
- return value
-
- def __len__(self):
- """Return the number of distinctly named variables in the context.
-
- :return: the number of variables in the context
- """
- return len(self.items())
-
- def __setitem__(self, key, value):
- """Set a variable in the current scope.
-
- :param key: the name of the variable
- :param value: the variable value
- """
- self.frames[0][key] = value
-
- def _find(self, key, default=None):
- """Retrieve a given variable's value and the frame it was found in.
-
- Intended primarily for internal use by directives.
-
- :param key: the name of the variable
- :param default: the default value to return when the variable is not
- found
- """
- for frame in self.frames:
- if key in frame:
- return frame[key], frame
- return default, None
-
- def get(self, key, default=None):
- """Get a variable's value, starting at the current scope and going
- upward.
-
- :param key: the name of the variable
- :param default: the default value to return when the variable is not
- found
- """
- for frame in self.frames:
- if key in frame:
- return frame[key]
- return default
-
- def keys(self):
- """Return the name of all variables in the context.
-
- :return: a list of variable names
- """
- keys = []
- for frame in self.frames:
- keys += [key for key in frame if key not in keys]
- return keys
-
- def items(self):
- """Return a list of ``(name, value)`` tuples for all variables in the
- context.
-
- :return: a list of variables
- """
- return [(key, self.get(key)) for key in self.keys()]
-
- def update(self, mapping):
- """Update the context from the mapping provided."""
- self.frames[0].update(mapping)
-
- def push(self, data):
- """Push a new scope on the stack.
-
- :param data: the data dictionary to push on the context stack.
- """
-
- def pop(self):
- """Pop the top-most scope from the stack."""
-
-
-def _apply_directives(stream, directives, ctxt, vars):
- """Apply the given directives to the stream.
-
- :param stream: the stream the directives should be applied to
- :param directives: the list of directives to apply
- :param ctxt: the `Context`
- :param vars: additional variables that should be available when Python
- code is executed
- :return: the stream with the given directives applied
- """
- if directives:
- stream = directives[0](iter(stream), directives[1:], ctxt, **vars)
- return stream
-
-
-def _eval_expr(expr, ctxt, vars=None):
- """Evaluate the given `Expression` object.
-
- :param expr: the expression to evaluate
- :param ctxt: the `Context`
- :param vars: additional variables that should be available to the
- expression
- :return: the result of the evaluation
- """
- if vars:
- ctxt.push(vars)
- retval = expr.evaluate(ctxt)
- if vars:
- ctxt.pop()
- return retval
-
-
-def _exec_suite(suite, ctxt, vars=None):
- """Execute the given `Suite` object.
-
- :param suite: the code suite to execute
- :param ctxt: the `Context`
- :param vars: additional variables that should be available to the
- code
- """
- if vars:
- ctxt.push(vars)
- ctxt.push({})
- suite.execute(ctxt)
- if vars:
- top = ctxt.pop()
- ctxt.pop()
- ctxt.frames[0].update(top)
-
-
-class DirectiveFactoryMeta(type):
- """Meta class for directive factories."""
-
- def __new__(cls, name, bases, d):
- if 'directives' in d:
- d['_dir_by_name'] = dict(d['directives'])
- d['_dir_order'] = [directive[1] for directive in d['directives']]
-
- return type.__new__(cls, name, bases, d)
-
-
-class DirectiveFactory(object):
- """Base for classes that provide a set of template directives.
-
- :since: version 0.6
- """
- __metaclass__ = DirectiveFactoryMeta
-
- directives = []
- """A list of ``(name, cls)`` tuples that define the set of directives
- provided by this factory.
- """
-
- def get_directive(self, name):
- """Return the directive class for the given name.
-
- :param name: the directive name as used in the template
- :return: the directive class
- :see: `Directive`
- """
- return self._dir_by_name.get(name)
-
- def get_directive_index(self, dir_cls):
- """Return a key for the given directive class that should be used to
- sort it among other directives on the same `SUB` event.
-
- The default implementation simply returns the index of the directive in
- the `directives` list.
-
- :param dir_cls: the directive class
- :return: the sort key
- """
- if dir_cls in self._dir_order:
- return self._dir_order.index(dir_cls)
- return len(self._dir_order)
-
-
-class Template(DirectiveFactory):
- """Abstract template base class.
-
- This class implements most of the template processing model, but does not
- specify the syntax of templates.
- """
-
- EXEC = StreamEventKind('EXEC')
- """Stream event kind representing a Python code suite to execute."""
-
- EXPR = StreamEventKind('EXPR')
- """Stream event kind representing a Python expression."""
-
- INCLUDE = StreamEventKind('INCLUDE')
- """Stream event kind representing the inclusion of another template."""
-
- SUB = StreamEventKind('SUB')
- """Stream event kind representing a nested stream to which one or more
- directives should be applied.
- """
-
- serializer = None
- _number_conv = unicode # function used to convert numbers to event data
-
- def __init__(self, source, filepath=None, filename=None, loader=None,
- encoding=None, lookup='strict', allow_exec=True):
- """Initialize a template from either a string, a file-like object, or
- an already parsed markup stream.
-
- :param source: a string, file-like object, or markup stream to read the
- template from
- :param filepath: the absolute path to the template file
- :param filename: the path to the template file relative to the search
- path
- :param loader: the `TemplateLoader` to use for loading included
- templates
- :param encoding: the encoding of the `source`
- :param lookup: the variable lookup mechanism; either "strict" (the
- default), "lenient", or a custom lookup class
- :param allow_exec: whether Python code blocks in templates should be
- allowed
-
- :note: Changed in 0.5: Added the `allow_exec` argument
- """
- self.filepath = filepath or filename
- self.filename = filename
- self.loader = loader
- self.lookup = lookup
- self.allow_exec = allow_exec
- self._init_filters()
- self._init_loader()
- self._prepared = False
-
- if isinstance(source, basestring):
- source = StringIO(source)
- else:
- source = source
- try:
- self._stream = self._parse(source, encoding)
- except ParseError, e:
- raise TemplateSyntaxError(e.msg, self.filepath, e.lineno, e.offset)
-
- def __getstate__(self):
- state = self.__dict__.copy()
- state['filters'] = []
- return state
-
- def __setstate__(self, state):
- self.__dict__ = state
- self._init_filters()
-
- def __repr__(self):
- return '<%s "%s">' % (type(self).__name__, self.filename)
-
- def _init_filters(self):
- self.filters = [self._flatten, self._include]
-
- def _init_loader(self):
- if self.loader is None:
- from genshi.template.loader import TemplateLoader
- if self.filename:
- if self.filepath != self.filename:
- basedir = os.path.normpath(self.filepath)[:-len(
- os.path.normpath(self.filename))
- ]
- else:
- basedir = os.path.dirname(self.filename)
- else:
- basedir = '.'
- self.loader = TemplateLoader([os.path.abspath(basedir)])
-
- @property
- def stream(self):
- if not self._prepared:
- self._stream = list(self._prepare(self._stream))
- self._prepared = True
- return self._stream
-
- def _parse(self, source, encoding):
- """Parse the template.
-
- The parsing stage parses the template and constructs a list of
- directives that will be executed in the render stage. The input is
- split up into literal output (text that does not depend on the context
- data) and directives or expressions.
-
- :param source: a file-like object containing the XML source of the
- template, or an XML event stream
- :param encoding: the encoding of the `source`
- """
- raise NotImplementedError
-
- def _prepare(self, stream):
- """Call the `attach` method of every directive found in the template.
-
- :param stream: the event stream of the template
- """
- from genshi.template.loader import TemplateNotFound
-
- for kind, data, pos in stream:
- if kind is SUB:
- directives = []
- substream = data[1]
- for _, cls, value, namespaces, pos in sorted(data[0]):
- directive, substream = cls.attach(self, substream, value,
- namespaces, pos)
- if directive:
- directives.append(directive)
- substream = self._prepare(substream)
- if directives:
- yield kind, (directives, list(substream)), pos
- else:
- for event in substream:
- yield event
- else:
- if kind is INCLUDE:
- href, cls, fallback = data
- if isinstance(href, basestring) and \
- not getattr(self.loader, 'auto_reload', True):
- # If the path to the included template is static, and
- # auto-reloading is disabled on the template loader,
- # the template is inlined into the stream
- try:
- tmpl = self.loader.load(href, relative_to=pos[0],
- cls=cls or self.__class__)
- for event in tmpl.stream:
- yield event
- except TemplateNotFound:
- if fallback is None:
- raise
- for event in self._prepare(fallback):
- yield event
- continue
- elif fallback:
- # Otherwise the include is performed at run time
- data = href, cls, list(self._prepare(fallback))
-
- yield kind, data, pos
-
- def generate(self, *args, **kwargs):
- """Apply the template to the given context data.
-
- Any keyword arguments are made available to the template as context
- data.
-
- Only one positional argument is accepted: if it is provided, it must be
- an instance of the `Context` class, and keyword arguments are ignored.
- This calling style is used for internal processing.
-
- :return: a markup event stream representing the result of applying
- the template to the context data.
- """
- vars = {}
- if args:
- assert len(args) == 1
- ctxt = args[0]
- if ctxt is None:
- ctxt = Context(**kwargs)
- else:
- vars = kwargs
- assert isinstance(ctxt, Context)
- else:
- ctxt = Context(**kwargs)
-
- stream = self.stream
- for filter_ in self.filters:
- stream = filter_(iter(stream), ctxt, **vars)
- return Stream(stream, self.serializer)
-
- def _flatten(self, stream, ctxt, **vars):
- number_conv = self._number_conv
- stack = []
- push = stack.append
- pop = stack.pop
- stream = iter(stream)
-
- while 1:
- for kind, data, pos in stream:
-
- if kind is START and data[1]:
- # Attributes may still contain expressions in start tags at
- # this point, so do some evaluation
- tag, attrs = data
- new_attrs = []
- for name, value in attrs:
- if type(value) is list: # this is an interpolated string
- values = [event[1]
- for event in self._flatten(value, ctxt, **vars)
- if event[0] is TEXT and event[1] is not None
- ]
- if not values:
- continue
- value = ''.join(values)
- new_attrs.append((name, value))
- yield kind, (tag, Attrs(new_attrs)), pos
-
- elif kind is EXPR:
- result = _eval_expr(data, ctxt, vars)
- if result is not None:
- # First check for a string, otherwise the iterable test
- # below succeeds, and the string will be chopped up into
- # individual characters
- if isinstance(result, basestring):
- yield TEXT, result, pos
- elif isinstance(result, (int, float, long)):
- yield TEXT, number_conv(result), pos
- elif hasattr(result, '__iter__'):
- push(stream)
- stream = _ensure(result)
- break
- else:
- yield TEXT, unicode(result), pos
-
- elif kind is SUB:
- # This event is a list of directives and a list of nested
- # events to which those directives should be applied
- push(stream)
- stream = _apply_directives(data[1], data[0], ctxt, vars)
- break
-
- elif kind is EXEC:
- _exec_suite(data, ctxt, vars)
-
- else:
- yield kind, data, pos
-
- else:
- if not stack:
- break
- stream = pop()
-
- def _include(self, stream, ctxt, **vars):
- """Internal stream filter that performs inclusion of external
- template files.
- """
- from genshi.template.loader import TemplateNotFound
-
- for event in stream:
- if event[0] is INCLUDE:
- href, cls, fallback = event[1]
- if not isinstance(href, basestring):
- parts = []
- for subkind, subdata, subpos in self._flatten(href, ctxt,
- **vars):
- if subkind is TEXT:
- parts.append(subdata)
- href = ''.join([x for x in parts if x is not None])
- try:
- tmpl = self.loader.load(href, relative_to=event[2][0],
- cls=cls or self.__class__)
- for event in tmpl.generate(ctxt, **vars):
- yield event
- except TemplateNotFound:
- if fallback is None:
- raise
- for filter_ in self.filters:
- fallback = filter_(iter(fallback), ctxt, **vars)
- for event in fallback:
- yield event
- else:
- yield event
-
-
-EXEC = Template.EXEC
-EXPR = Template.EXPR
-INCLUDE = Template.INCLUDE
-SUB = Template.SUB
diff --git a/genshi/template/directives.py b/genshi/template/directives.py
deleted file mode 100644
index e2c9424..0000000
--- a/genshi/template/directives.py
+++ /dev/null
@@ -1,725 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Copyright (C) 2006-2009 Edgewall Software
-# All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://genshi.edgewall.org/wiki/License.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For the exact contribution history, see the revision
-# history and logs, available at http://genshi.edgewall.org/log/.
-
-"""Implementation of the various template directives."""
-
-from genshi.core import QName, Stream
-from genshi.path import Path
-from genshi.template.base import TemplateRuntimeError, TemplateSyntaxError, \
- EXPR, _apply_directives, _eval_expr
-from genshi.template.eval import Expression, ExpressionASTTransformer, \
- _ast, _parse
-
-__all__ = ['AttrsDirective', 'ChooseDirective', 'ContentDirective',
- 'DefDirective', 'ForDirective', 'IfDirective', 'MatchDirective',
- 'OtherwiseDirective', 'ReplaceDirective', 'StripDirective',
- 'WhenDirective', 'WithDirective']
-__docformat__ = 'restructuredtext en'
-
-
-class DirectiveMeta(type):
- """Meta class for template directives."""
-
- def __new__(cls, name, bases, d):
- d['tagname'] = name.lower().replace('directive', '')
- return type.__new__(cls, name, bases, d)
-
-
-class Directive(object):
- """Abstract base class for template directives.
-
- A directive is basically a callable that takes three positional arguments:
- ``ctxt`` is the template data context, ``stream`` is an iterable over the
- events that the directive applies to, and ``directives`` is is a list of
- other directives on the same stream that need to be applied.
-
- Directives can be "anonymous" or "registered". Registered directives can be
- applied by the template author using an XML attribute with the
- corresponding name in the template. Such directives should be subclasses of
- this base class that can be instantiated with the value of the directive
- attribute as parameter.
-
- Anonymous directives are simply functions conforming to the protocol
- described above, and can only be applied programmatically (for example by
- template filters).
- """
- __metaclass__ = DirectiveMeta
- __slots__ = ['expr']
-
- def __init__(self, value, template=None, namespaces=None, lineno=-1,
- offset=-1):
- self.expr = self._parse_expr(value, template, lineno, offset)
-
- @classmethod
- def attach(cls, template, stream, value, namespaces, pos):
- """Called after the template stream has been completely parsed.
-
- :param template: the `Template` object
- :param stream: the event stream associated with the directive
- :param value: the argument value for the directive; if the directive was
- specified as an element, this will be an `Attrs` instance
- with all specified attributes, otherwise it will be a
- `unicode` object with just the attribute value
- :param namespaces: a mapping of namespace URIs to prefixes
- :param pos: a ``(filename, lineno, offset)`` tuple describing the
- location where the directive was found in the source
-
- This class method should return a ``(directive, stream)`` tuple. If
- ``directive`` is not ``None``, it should be an instance of the `Directive`
- class, and gets added to the list of directives applied to the substream
- at runtime. `stream` is an event stream that replaces the original
- stream associated with the directive.
- """
- return cls(value, template, namespaces, *pos[1:]), stream
-
- def __call__(self, stream, directives, ctxt, **vars):
- """Apply the directive to the given stream.
-
- :param stream: the event stream
- :param directives: a list of the remaining directives that should
- process the stream
- :param ctxt: the context data
- :param vars: additional variables that should be made available when
- Python code is executed
- """
- raise NotImplementedError
-
- def __repr__(self):
- expr = ''
- if getattr(self, 'expr', None) is not None:
- expr = ' "%s"' % self.expr.source
- return '<%s%s>' % (type(self).__name__, expr)
-
- @classmethod
- def _parse_expr(cls, expr, template, lineno=-1, offset=-1):
- """Parses the given expression, raising a useful error message when a
- syntax error is encountered.
- """
- try:
- return expr and Expression(expr, template.filepath, lineno,
- lookup=template.lookup) or None
- except SyntaxError, err:
- err.msg += ' in expression "%s" of "%s" directive' % (expr,
- cls.tagname)
- raise TemplateSyntaxError(err, template.filepath, lineno,
- offset + (err.offset or 0))
-
-
-def _assignment(ast):
- """Takes the AST representation of an assignment, and returns a
- function that applies the assignment of a given value to a dictionary.
- """
- def _names(node):
- if isinstance(node, _ast.Tuple):
- return tuple([_names(child) for child in node.elts])
- elif isinstance(node, _ast.Name):
- return node.id
- def _assign(data, value, names=_names(ast)):
- if type(names) is tuple:
- for idx in range(len(names)):
- _assign(data, value[idx], names[idx])
- else:
- data[names] = value
- return _assign
-
-
-class AttrsDirective(Directive):
- """Implementation of the ``py:attrs`` template directive.
-
- The value of the ``py:attrs`` attribute should be a dictionary or a sequence
- of ``(name, value)`` tuples. The items in that dictionary or sequence are
- added as attributes to the element:
-
- >>> from genshi.template import MarkupTemplate
- >>> tmpl = MarkupTemplate('''<ul xmlns:py="http://genshi.edgewall.org/">
- ... <li py:attrs="foo">Bar</li>
- ... </ul>''')
- >>> print(tmpl.generate(foo={'class': 'collapse'}))
- <ul>
- <li class="collapse">Bar</li>
- </ul>
- >>> print(tmpl.generate(foo=[('class', 'collapse')]))
- <ul>
- <li class="collapse">Bar</li>
- </ul>
-
- If the value evaluates to ``None`` (or any other non-truth value), no
- attributes are added:
-
- >>> print(tmpl.generate(foo=None))
- <ul>
- <li>Bar</li>
- </ul>
- """
- __slots__ = []
-
- def __call__(self, stream, directives, ctxt, **vars):
- def _generate():
- kind, (tag, attrib), pos = stream.next()
- attrs = _eval_expr(self.expr, ctxt, vars)
- if attrs:
- if isinstance(attrs, Stream):
- try:
- attrs = iter(attrs).next()
- except StopIteration:
- attrs = []
- elif not isinstance(attrs, list): # assume it's a dict
- attrs = attrs.items()
- attrib -= [name for name, val in attrs if val is None]
- attrib |= [(QName(name), unicode(val).strip()) for name, val
- in attrs if val is not None]
- yield kind, (tag, attrib), pos
- for event in stream:
- yield event
-
- return _apply_directives(_generate(), directives, ctxt, vars)
-
-
-class ContentDirective(Directive):
- """Implementation of the ``py:content`` template directive.
-
- This directive replaces the content of the element with the result of
- evaluating the value of the ``py:content`` attribute:
-
- >>> from genshi.template import MarkupTemplate
- >>> tmpl = MarkupTemplate('''<ul xmlns:py="http://genshi.edgewall.org/">
- ... <li py:content="bar">Hello</li>
- ... </ul>''')
- >>> print(tmpl.generate(bar='Bye'))
- <ul>
- <li>Bye</li>
- </ul>
- """
- __slots__ = []
-
- @classmethod
- def attach(cls, template, stream, value, namespaces, pos):
- if type(value) is dict:
- raise TemplateSyntaxError('The content directive can not be used '
- 'as an element', template.filepath,
- *pos[1:])
- expr = cls._parse_expr(value, template, *pos[1:])
- return None, [stream[0], (EXPR, expr, pos), stream[-1]]
-
-
-class DefDirective(Directive):
- """Implementation of the ``py:def`` template directive.
-
- This directive can be used to create "Named Template Functions", which
- are template snippets that are not actually output during normal
- processing, but rather can be expanded from expressions in other places
- in the template.
-
- A named template function can be used just like a normal Python function
- from template expressions:
-
- >>> from genshi.template import MarkupTemplate
- >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/">
- ... <p py:def="echo(greeting, name='world')" class="message">
- ... ${greeting}, ${name}!
- ... </p>
- ... ${echo('Hi', name='you')}
- ... </div>''')
- >>> print(tmpl.generate(bar='Bye'))
- <div>
- <p class="message">
- Hi, you!
- </p>
- </div>
-
- If a function does not require parameters, the parenthesis can be omitted
- in the definition:
-
- >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/">
- ... <p py:def="helloworld" class="message">
- ... Hello, world!
- ... </p>
- ... ${helloworld()}
- ... </div>''')
- >>> print(tmpl.generate(bar='Bye'))
- <div>
- <p class="message">
- Hello, world!
- </p>
- </div>
- """
- __slots__ = ['name', 'args', 'star_args', 'dstar_args', 'defaults']
-
- def __init__(self, args, template, namespaces=None, lineno=-1, offset=-1):
- Directive.__init__(self, None, template, namespaces, lineno, offset)
- ast = _parse(args).body
- self.args = []
- self.star_args = None
- self.dstar_args = None
- self.defaults = {}
- if isinstance(ast, _ast.Call):
- self.name = ast.func.id
- for arg in ast.args:
- # only names
- self.args.append(arg.id)
- for kwd in ast.keywords:
- self.args.append(kwd.arg)
- exp = Expression(kwd.value, template.filepath,
- lineno, lookup=template.lookup)
- self.defaults[kwd.arg] = exp
- if getattr(ast, 'starargs', None):
- self.star_args = ast.starargs.id
- if getattr(ast, 'kwargs', None):
- self.dstar_args = ast.kwargs.id
- else:
- self.name = ast.id
-
- @classmethod
- def attach(cls, template, stream, value, namespaces, pos):
- if type(value) is dict:
- value = value.get('function')
- return super(DefDirective, cls).attach(template, stream, value,
- namespaces, pos)
-
- def __call__(self, stream, directives, ctxt, **vars):
- stream = list(stream)
-
- def function(*args, **kwargs):
- scope = {}
- args = list(args) # make mutable
- for name in self.args:
- if args:
- scope[name] = args.pop(0)
- else:
- if name in kwargs:
- val = kwargs.pop(name)
- else:
- val = _eval_expr(self.defaults.get(name), ctxt, vars)
- scope[name] = val
- if not self.star_args is None:
- scope[self.star_args] = args
- if not self.dstar_args is None:
- scope[self.dstar_args] = kwargs
- ctxt.push(scope)
- for event in _apply_directives(stream, directives, ctxt, vars):
- yield event
- ctxt.pop()
- function.__name__ = self.name
-
- # Store the function reference in the bottom context frame so that it
- # doesn't get popped off before processing the template has finished
- # FIXME: this makes context data mutable as a side-effect
- ctxt.frames[-1][self.name] = function
-
- return []
-
- def __repr__(self):
- return '<%s "%s">' % (type(self).__name__, self.name)
-
-
-class ForDirective(Directive):
- """Implementation of the ``py:for`` template directive for repeating an
- element based on an iterable in the context data.
-
- >>> from genshi.template import MarkupTemplate
- >>> tmpl = MarkupTemplate('''<ul xmlns:py="http://genshi.edgewall.org/">
- ... <li py:for="item in items">${item}</li>
- ... </ul>''')
- >>> print(tmpl.generate(items=[1, 2, 3]))
- <ul>
- <li>1</li><li>2</li><li>3</li>
- </ul>
- """
- __slots__ = ['assign', 'filename']
-
- def __init__(self, value, template, namespaces=None, lineno=-1, offset=-1):
- if ' in ' not in value:
- raise TemplateSyntaxError('"in" keyword missing in "for" directive',
- template.filepath, lineno, offset)
- assign, value = value.split(' in ', 1)
- ast = _parse(assign, 'exec')
- value = 'iter(%s)' % value.strip()
- self.assign = _assignment(ast.body[0].value)
- self.filename = template.filepath
- Directive.__init__(self, value, template, namespaces, lineno, offset)
-
- @classmethod
- def attach(cls, template, stream, value, namespaces, pos):
- if type(value) is dict:
- value = value.get('each')
- return super(ForDirective, cls).attach(template, stream, value,
- namespaces, pos)
-
- def __call__(self, stream, directives, ctxt, **vars):
- iterable = _eval_expr(self.expr, ctxt, vars)
- if iterable is None:
- return
-
- assign = self.assign
- scope = {}
- stream = list(stream)
- for item in iterable:
- assign(scope, item)
- ctxt.push(scope)
- for event in _apply_directives(stream, directives, ctxt, vars):
- yield event
- ctxt.pop()
-
- def __repr__(self):
- return '<%s>' % type(self).__name__
-
-
-class IfDirective(Directive):
- """Implementation of the ``py:if`` template directive for conditionally
- excluding elements from being output.
-
- >>> from genshi.template import MarkupTemplate
- >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/">
- ... <b py:if="foo">${bar}</b>
- ... </div>''')
- >>> print(tmpl.generate(foo=True, bar='Hello'))
- <div>
- <b>Hello</b>
- </div>
- """
- __slots__ = []
-
- @classmethod
- def attach(cls, template, stream, value, namespaces, pos):
- if type(value) is dict:
- value = value.get('test')
- return super(IfDirective, cls).attach(template, stream, value,
- namespaces, pos)
-
- def __call__(self, stream, directives, ctxt, **vars):
- value = _eval_expr(self.expr, ctxt, vars)
- if value:
- return _apply_directives(stream, directives, ctxt, vars)
- return []
-
-
-class MatchDirective(Directive):
- """Implementation of the ``py:match`` template directive.
-
- >>> from genshi.template import MarkupTemplate
- >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/">
- ... <span py:match="greeting">
- ... Hello ${select('@name')}
- ... </span>
- ... <greeting name="Dude" />
- ... </div>''')
- >>> print(tmpl.generate())
- <div>
- <span>
- Hello Dude
- </span>
- </div>
- """
- __slots__ = ['path', 'namespaces', 'hints']
-
- def __init__(self, value, template, hints=None, namespaces=None,
- lineno=-1, offset=-1):
- Directive.__init__(self, None, template, namespaces, lineno, offset)
- self.path = Path(value, template.filepath, lineno)
- self.namespaces = namespaces or {}
- self.hints = hints or ()
-
- @classmethod
- def attach(cls, template, stream, value, namespaces, pos):
- hints = []
- if type(value) is dict:
- if value.get('buffer', '').lower() == 'false':
- hints.append('not_buffered')
- if value.get('once', '').lower() == 'true':
- hints.append('match_once')
- if value.get('recursive', '').lower() == 'false':
- hints.append('not_recursive')
- value = value.get('path')
- return cls(value, template, frozenset(hints), namespaces, *pos[1:]), \
- stream
-
- def __call__(self, stream, directives, ctxt, **vars):
- ctxt._match_templates.append((self.path.test(ignore_context=True),
- self.path, list(stream), self.hints,
- self.namespaces, directives))
- return []
-
- def __repr__(self):
- return '<%s "%s">' % (type(self).__name__, self.path.source)
-
-
-class ReplaceDirective(Directive):
- """Implementation of the ``py:replace`` template directive.
-
- This directive replaces the element with the result of evaluating the
- value of the ``py:replace`` attribute:
-
- >>> from genshi.template import MarkupTemplate
- >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/">
- ... <span py:replace="bar">Hello</span>
- ... </div>''')
- >>> print(tmpl.generate(bar='Bye'))
- <div>
- Bye
- </div>
-
- This directive is equivalent to ``py:content`` combined with ``py:strip``,
- providing a less verbose way to achieve the same effect:
-
- >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/">
- ... <span py:content="bar" py:strip="">Hello</span>
- ... </div>''')
- >>> print(tmpl.generate(bar='Bye'))
- <div>
- Bye
- </div>
- """
- __slots__ = []
-
- @classmethod
- def attach(cls, template, stream, value, namespaces, pos):
- if type(value) is dict:
- value = value.get('value')
- if not value:
- raise TemplateSyntaxError('missing value for "replace" directive',
- template.filepath, *pos[1:])
- expr = cls._parse_expr(value, template, *pos[1:])
- return None, [(EXPR, expr, pos)]
-
-
-class StripDirective(Directive):
- """Implementation of the ``py:strip`` template directive.
-
- When the value of the ``py:strip`` attribute evaluates to ``True``, the
- element is stripped from the output
-
- >>> from genshi.template import MarkupTemplate
- >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/">
- ... <div py:strip="True"><b>foo</b></div>
- ... </div>''')
- >>> print(tmpl.generate())
- <div>
- <b>foo</b>
- </div>
-
- Leaving the attribute value empty is equivalent to a truth value.
-
- This directive is particulary interesting for named template functions or
- match templates that do not generate a top-level element:
-
- >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/">
- ... <div py:def="echo(what)" py:strip="">
- ... <b>${what}</b>
- ... </div>
- ... ${echo('foo')}
- ... </div>''')
- >>> print(tmpl.generate())
- <div>
- <b>foo</b>
- </div>
- """
- __slots__ = []
-
- def __call__(self, stream, directives, ctxt, **vars):
- def _generate():
- if not self.expr or _eval_expr(self.expr, ctxt, vars):
- stream.next() # skip start tag
- previous = stream.next()
- for event in stream:
- yield previous
- previous = event
- else:
- for event in stream:
- yield event
- return _apply_directives(_generate(), directives, ctxt, vars)
-
-
-class ChooseDirective(Directive):
- """Implementation of the ``py:choose`` directive for conditionally selecting
- one of several body elements to display.
-
- If the ``py:choose`` expression is empty the expressions of nested
- ``py:when`` directives are tested for truth. The first true ``py:when``
- body is output. If no ``py:when`` directive is matched then the fallback
- directive ``py:otherwise`` will be used.
-
- >>> from genshi.template import MarkupTemplate
- >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/"
- ... py:choose="">
- ... <span py:when="0 == 1">0</span>
- ... <span py:when="1 == 1">1</span>
- ... <span py:otherwise="">2</span>
- ... </div>''')
- >>> print(tmpl.generate())
- <div>
- <span>1</span>
- </div>
-
- If the ``py:choose`` directive contains an expression, the nested
- ``py:when`` directives are tested for equality to the ``py:choose``
- expression:
-
- >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/"
- ... py:choose="2">
- ... <span py:when="1">1</span>
- ... <span py:when="2">2</span>
- ... </div>''')
- >>> print(tmpl.generate())
- <div>
- <span>2</span>
- </div>
-
- Behavior is undefined if a ``py:choose`` block contains content outside a
- ``py:when`` or ``py:otherwise`` block. Behavior is also undefined if a
- ``py:otherwise`` occurs before ``py:when`` blocks.
- """
- __slots__ = ['matched', 'value']
-
- @classmethod
- def attach(cls, template, stream, value, namespaces, pos):
- if type(value) is dict:
- value = value.get('test')
- return super(ChooseDirective, cls).attach(template, stream, value,
- namespaces, pos)
-
- def __call__(self, stream, directives, ctxt, **vars):
- info = [False, bool(self.expr), None]
- if self.expr:
- info[2] = _eval_expr(self.expr, ctxt, vars)
- ctxt._choice_stack.append(info)
- for event in _apply_directives(stream, directives, ctxt, vars):
- yield event
- ctxt._choice_stack.pop()
-
-
-class WhenDirective(Directive):
- """Implementation of the ``py:when`` directive for nesting in a parent with
- the ``py:choose`` directive.
-
- See the documentation of the `ChooseDirective` for usage.
- """
- __slots__ = ['filename']
-
- def __init__(self, value, template, namespaces=None, lineno=-1, offset=-1):
- Directive.__init__(self, value, template, namespaces, lineno, offset)
- self.filename = template.filepath
-
- @classmethod
- def attach(cls, template, stream, value, namespaces, pos):
- if type(value) is dict:
- value = value.get('test')
- return super(WhenDirective, cls).attach(template, stream, value,
- namespaces, pos)
-
- def __call__(self, stream, directives, ctxt, **vars):
- info = ctxt._choice_stack and ctxt._choice_stack[-1]
- if not info:
- raise TemplateRuntimeError('"when" directives can only be used '
- 'inside a "choose" directive',
- self.filename, *stream.next()[2][1:])
- if info[0]:
- return []
- if not self.expr and not info[1]:
- raise TemplateRuntimeError('either "choose" or "when" directive '
- 'must have a test expression',
- self.filename, *stream.next()[2][1:])
- if info[1]:
- value = info[2]
- if self.expr:
- matched = value == _eval_expr(self.expr, ctxt, vars)
- else:
- matched = bool(value)
- else:
- matched = bool(_eval_expr(self.expr, ctxt, vars))
- info[0] = matched
- if not matched:
- return []
-
- return _apply_directives(stream, directives, ctxt, vars)
-
-
-class OtherwiseDirective(Directive):
- """Implementation of the ``py:otherwise`` directive for nesting in a parent
- with the ``py:choose`` directive.
-
- See the documentation of `ChooseDirective` for usage.
- """
- __slots__ = ['filename']
-
- def __init__(self, value, template, namespaces=None, lineno=-1, offset=-1):
- Directive.__init__(self, None, template, namespaces, lineno, offset)
- self.filename = template.filepath
-
- def __call__(self, stream, directives, ctxt, **vars):
- info = ctxt._choice_stack and ctxt._choice_stack[-1]
- if not info:
- raise TemplateRuntimeError('an "otherwise" directive can only be '
- 'used inside a "choose" directive',
- self.filename, *stream.next()[2][1:])
- if info[0]:
- return []
- info[0] = True
-
- return _apply_directives(stream, directives, ctxt, vars)
-
-
-class WithDirective(Directive):
- """Implementation of the ``py:with`` template directive, which allows
- shorthand access to variables and expressions.
-
- >>> from genshi.template import MarkupTemplate
- >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/">
- ... <span py:with="y=7; z=x+10">$x $y $z</span>
- ... </div>''')
- >>> print(tmpl.generate(x=42))
- <div>
- <span>42 7 52</span>
- </div>
- """
- __slots__ = ['vars']
-
- def __init__(self, value, template, namespaces=None, lineno=-1, offset=-1):
- Directive.__init__(self, None, template, namespaces, lineno, offset)
- self.vars = []
- value = value.strip()
- try:
- ast = _parse(value, 'exec')
- for node in ast.body:
- if not isinstance(node, _ast.Assign):
- raise TemplateSyntaxError('only assignment allowed in '
- 'value of the "with" directive',
- template.filepath, lineno, offset)
- self.vars.append(([_assignment(n) for n in node.targets],
- Expression(node.value, template.filepath,
- lineno, lookup=template.lookup)))
- except SyntaxError, err:
- err.msg += ' in expression "%s" of "%s" directive' % (value,
- self.tagname)
- raise TemplateSyntaxError(err, template.filepath, lineno,
- offset + (err.offset or 0))
-
- @classmethod
- def attach(cls, template, stream, value, namespaces, pos):
- if type(value) is dict:
- value = value.get('vars')
- return super(WithDirective, cls).attach(template, stream, value,
- namespaces, pos)
-
- def __call__(self, stream, directives, ctxt, **vars):
- frame = {}
- ctxt.push(frame)
- for targets, expr in self.vars:
- value = _eval_expr(expr, ctxt, vars)
- for assign in targets:
- assign(frame, value)
- for event in _apply_directives(stream, directives, ctxt, vars):
- yield event
- ctxt.pop()
-
- def __repr__(self):
- return '<%s>' % (type(self).__name__)
diff --git a/genshi/template/eval.py b/genshi/template/eval.py
deleted file mode 100644
index 8593aaa..0000000
--- a/genshi/template/eval.py
+++ /dev/null
@@ -1,629 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Copyright (C) 2006-2010 Edgewall Software
-# All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://genshi.edgewall.org/wiki/License.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For the exact contribution history, see the revision
-# history and logs, available at http://genshi.edgewall.org/log/.
-
-"""Support for "safe" evaluation of Python expressions."""
-
-import __builtin__
-
-from textwrap import dedent
-from types import CodeType
-
-from genshi.core import Markup
-from genshi.template.astutil import ASTTransformer, ASTCodeGenerator, \
- _ast, parse
-from genshi.template.base import TemplateRuntimeError
-from genshi.util import flatten
-
-__all__ = ['Code', 'Expression', 'Suite', 'LenientLookup', 'StrictLookup',
- 'Undefined', 'UndefinedError']
-__docformat__ = 'restructuredtext en'
-
-
-# Check for a Python 2.4 bug in the eval loop
-has_star_import_bug = False
-try:
- class _FakeMapping(object):
- __getitem__ = __setitem__ = lambda *a: None
- exec 'from sys import *' in {}, _FakeMapping()
-except SystemError:
- has_star_import_bug = True
-del _FakeMapping
-
-
-def _star_import_patch(mapping, modname):
- """This function is used as helper if a Python version with a broken
- star-import opcode is in use.
- """
- module = __import__(modname, None, None, ['__all__'])
- if hasattr(module, '__all__'):
- members = module.__all__
- else:
- members = [x for x in module.__dict__ if not x.startswith('_')]
- mapping.update([(name, getattr(module, name)) for name in members])
-
-
-class Code(object):
- """Abstract base class for the `Expression` and `Suite` classes."""
- __slots__ = ['source', 'code', 'ast', '_globals']
-
- def __init__(self, source, filename=None, lineno=-1, lookup='strict',
- xform=None):
- """Create the code object, either from a string, or from an AST node.
-
- :param source: either a string containing the source code, or an AST
- node
- :param filename: the (preferably absolute) name of the file containing
- the code
- :param lineno: the number of the line on which the code was found
- :param lookup: the lookup class that defines how variables are looked
- up in the context; can be either "strict" (the default),
- "lenient", or a custom lookup class
- :param xform: the AST transformer that should be applied to the code;
- if `None`, the appropriate transformation is chosen
- depending on the mode
- """
- if isinstance(source, basestring):
- self.source = source
- node = _parse(source, mode=self.mode)
- else:
- assert isinstance(source, _ast.AST), \
- 'Expected string or AST node, but got %r' % source
- self.source = '?'
- if self.mode == 'eval':
- node = _ast.Expression()
- node.body = source
- else:
- node = _ast.Module()
- node.body = [source]
-
- self.ast = node
- self.code = _compile(node, self.source, mode=self.mode,
- filename=filename, lineno=lineno, xform=xform)
- if lookup is None:
- lookup = LenientLookup
- elif isinstance(lookup, basestring):
- lookup = {'lenient': LenientLookup, 'strict': StrictLookup}[lookup]
- self._globals = lookup.globals
-
- def __getstate__(self):
- state = {'source': self.source, 'ast': self.ast,
- 'lookup': self._globals.im_self}
- c = self.code
- state['code'] = (c.co_nlocals, c.co_stacksize, c.co_flags, c.co_code,
- c.co_consts, c.co_names, c.co_varnames, c.co_filename,
- c.co_name, c.co_firstlineno, c.co_lnotab, (), ())
- return state
-
- def __setstate__(self, state):
- self.source = state['source']
- self.ast = state['ast']
- self.code = CodeType(0, *state['code'])
- self._globals = state['lookup'].globals
-
- def __eq__(self, other):
- return (type(other) == type(self)) and (self.code == other.code)
-
- def __hash__(self):
- return hash(self.code)
-
- def __ne__(self, other):
- return not self == other
-
- def __repr__(self):
- return '%s(%r)' % (type(self).__name__, self.source)
-
-
-class Expression(Code):
- """Evaluates Python expressions used in templates.
-
- >>> data = dict(test='Foo', items=[1, 2, 3], dict={'some': 'thing'})
- >>> Expression('test').evaluate(data)
- 'Foo'
-
- >>> Expression('items[0]').evaluate(data)
- 1
- >>> Expression('items[-1]').evaluate(data)
- 3
- >>> Expression('dict["some"]').evaluate(data)
- 'thing'
-
- Similar to e.g. Javascript, expressions in templates can use the dot
- notation for attribute access to access items in mappings:
-
- >>> Expression('dict.some').evaluate(data)
- 'thing'
-
- This also works the other way around: item access can be used to access
- any object attribute:
-
- >>> class MyClass(object):
- ... myattr = 'Bar'
- >>> data = dict(mine=MyClass(), key='myattr')
- >>> Expression('mine.myattr').evaluate(data)
- 'Bar'
- >>> Expression('mine["myattr"]').evaluate(data)
- 'Bar'
- >>> Expression('mine[key]').evaluate(data)
- 'Bar'
-
- All of the standard Python operators are available to template expressions.
- Built-in functions such as ``len()`` are also available in template
- expressions:
-
- >>> data = dict(items=[1, 2, 3])
- >>> Expression('len(items)').evaluate(data)
- 3
- """
- __slots__ = []
- mode = 'eval'
-
- def evaluate(self, data):
- """Evaluate the expression against the given data dictionary.
-
- :param data: a mapping containing the data to evaluate against
- :return: the result of the evaluation
- """
- __traceback_hide__ = 'before_and_this'
- _globals = self._globals(data)
- return eval(self.code, _globals, {'__data__': data})
-
-
-class Suite(Code):
- """Executes Python statements used in templates.
-
- >>> data = dict(test='Foo', items=[1, 2, 3], dict={'some': 'thing'})
- >>> Suite("foo = dict['some']").execute(data)
- >>> data['foo']
- 'thing'
- """
- __slots__ = []
- mode = 'exec'
-
- def execute(self, data):
- """Execute the suite in the given data dictionary.
-
- :param data: a mapping containing the data to execute in
- """
- __traceback_hide__ = 'before_and_this'
- _globals = self._globals(data)
- exec self.code in _globals, data
-
-
-UNDEFINED = object()
-
-
-class UndefinedError(TemplateRuntimeError):
- """Exception thrown when a template expression attempts to access a variable
- not defined in the context.
-
- :see: `LenientLookup`, `StrictLookup`
- """
- def __init__(self, name, owner=UNDEFINED):
- if owner is not UNDEFINED:
- message = '%s has no member named "%s"' % (repr(owner), name)
- else:
- message = '"%s" not defined' % name
- TemplateRuntimeError.__init__(self, message)
-
-
-class Undefined(object):
- """Represents a reference to an undefined variable.
-
- Unlike the Python runtime, template expressions can refer to an undefined
- variable without causing a `NameError` to be raised. The result will be an
- instance of the `Undefined` class, which is treated the same as ``False`` in
- conditions, but raise an exception on any other operation:
-
- >>> foo = Undefined('foo')
- >>> bool(foo)
- False
- >>> list(foo)
- []
- >>> print(foo)
- undefined
-
- However, calling an undefined variable, or trying to access an attribute
- of that variable, will raise an exception that includes the name used to
- reference that undefined variable.
-
- >>> foo('bar')
- Traceback (most recent call last):
- ...
- UndefinedError: "foo" not defined
-
- >>> foo.bar
- Traceback (most recent call last):
- ...
- UndefinedError: "foo" not defined
-
- :see: `LenientLookup`
- """
- __slots__ = ['_name', '_owner']
-
- def __init__(self, name, owner=UNDEFINED):
- """Initialize the object.
-
- :param name: the name of the reference
- :param owner: the owning object, if the variable is accessed as a member
- """
- self._name = name
- self._owner = owner
-
- def __iter__(self):
- return iter([])
-
- def __nonzero__(self):
- return False
-
- def __repr__(self):
- return '<%s %r>' % (type(self).__name__, self._name)
-
- def __str__(self):
- return 'undefined'
-
- def _die(self, *args, **kwargs):
- """Raise an `UndefinedError`."""
- __traceback_hide__ = True
- raise UndefinedError(self._name, self._owner)
- __call__ = __getattr__ = __getitem__ = _die
-
- # Hack around some behavior introduced in Python 2.6.2
- # http://genshi.edgewall.org/ticket/324
- __length_hint__ = None
-
-
-class LookupBase(object):
- """Abstract base class for variable lookup implementations."""
-
- @classmethod
- def globals(cls, data):
- """Construct the globals dictionary to use as the execution context for
- the expression or suite.
- """
- return {
- '__data__': data,
- '_lookup_name': cls.lookup_name,
- '_lookup_attr': cls.lookup_attr,
- '_lookup_item': cls.lookup_item,
- '_star_import_patch': _star_import_patch,
- 'UndefinedError': UndefinedError,
- }
-
- @classmethod
- def lookup_name(cls, data, name):
- __traceback_hide__ = True
- val = data.get(name, UNDEFINED)
- if val is UNDEFINED:
- val = BUILTINS.get(name, val)
- if val is UNDEFINED:
- val = cls.undefined(name)
- return val
-
- @classmethod
- def lookup_attr(cls, obj, key):
- __traceback_hide__ = True
- try:
- val = getattr(obj, key)
- except AttributeError:
- if hasattr(obj.__class__, key):
- raise
- else:
- try:
- val = obj[key]
- except (KeyError, TypeError):
- val = cls.undefined(key, owner=obj)
- return val
-
- @classmethod
- def lookup_item(cls, obj, key):
- __traceback_hide__ = True
- if len(key) == 1:
- key = key[0]
- try:
- return obj[key]
- except (AttributeError, KeyError, IndexError, TypeError), e:
- if isinstance(key, basestring):
- val = getattr(obj, key, UNDEFINED)
- if val is UNDEFINED:
- val = cls.undefined(key, owner=obj)
- return val
- raise
-
- @classmethod
- def undefined(cls, key, owner=UNDEFINED):
- """Can be overridden by subclasses to specify behavior when undefined
- variables are accessed.
-
- :param key: the name of the variable
- :param owner: the owning object, if the variable is accessed as a member
- """
- raise NotImplementedError
-
-
-class LenientLookup(LookupBase):
- """Default variable lookup mechanism for expressions.
-
- When an undefined variable is referenced using this lookup style, the
- reference evaluates to an instance of the `Undefined` class:
-
- >>> expr = Expression('nothing', lookup='lenient')
- >>> undef = expr.evaluate({})
- >>> undef
- <Undefined 'nothing'>
-
- The same will happen when a non-existing attribute or item is accessed on
- an existing object:
-
- >>> expr = Expression('something.nil', lookup='lenient')
- >>> expr.evaluate({'something': dict()})
- <Undefined 'nil'>
-
- See the documentation of the `Undefined` class for details on the behavior
- of such objects.
-
- :see: `StrictLookup`
- """
-
- @classmethod
- def undefined(cls, key, owner=UNDEFINED):
- """Return an ``Undefined`` object."""
- __traceback_hide__ = True
- return Undefined(key, owner=owner)
-
-
-class StrictLookup(LookupBase):
- """Strict variable lookup mechanism for expressions.
-
- Referencing an undefined variable using this lookup style will immediately
- raise an ``UndefinedError``:
-
- >>> expr = Expression('nothing', lookup='strict')
- >>> expr.evaluate({})
- Traceback (most recent call last):
- ...
- UndefinedError: "nothing" not defined
-
- The same happens when a non-existing attribute or item is accessed on an
- existing object:
-
- >>> expr = Expression('something.nil', lookup='strict')
- >>> expr.evaluate({'something': dict()})
- Traceback (most recent call last):
- ...
- UndefinedError: {} has no member named "nil"
- """
-
- @classmethod
- def undefined(cls, key, owner=UNDEFINED):
- """Raise an ``UndefinedError`` immediately."""
- __traceback_hide__ = True
- raise UndefinedError(key, owner=owner)
-
-
-def _parse(source, mode='eval'):
- source = source.strip()
- if mode == 'exec':
- lines = [line.expandtabs() for line in source.splitlines()]
- if lines:
- first = lines[0]
- rest = dedent('\n'.join(lines[1:])).rstrip()
- if first.rstrip().endswith(':') and not rest[0].isspace():
- rest = '\n'.join([' %s' % line for line in rest.splitlines()])
- source = '\n'.join([first, rest])
- if isinstance(source, unicode):
- source = '\xef\xbb\xbf' + source.encode('utf-8')
- return parse(source, mode)
-
-
-def _compile(node, source=None, mode='eval', filename=None, lineno=-1,
- xform=None):
- if isinstance(filename, unicode):
- # unicode file names not allowed for code objects
- filename = filename.encode('utf-8', 'replace')
- elif not filename:
- filename = '<string>'
- if lineno <= 0:
- lineno = 1
-
- if xform is None:
- xform = {
- 'eval': ExpressionASTTransformer
- }.get(mode, TemplateASTTransformer)
- tree = xform().visit(node)
-
- if mode == 'eval':
- name = '<Expression %r>' % (source or '?')
- else:
- lines = source.splitlines()
- if not lines:
- extract = ''
- else:
- extract = lines[0]
- if len(lines) > 1:
- extract += ' ...'
- name = '<Suite %r>' % (extract)
- new_source = ASTCodeGenerator(tree).code
- code = compile(new_source, filename, mode)
-
- try:
- # We'd like to just set co_firstlineno, but it's readonly. So we need
- # to clone the code object while adjusting the line number
- return CodeType(0, code.co_nlocals, code.co_stacksize,
- code.co_flags | 0x0040, code.co_code, code.co_consts,
- code.co_names, code.co_varnames, filename, name,
- lineno, code.co_lnotab, (), ())
- except RuntimeError:
- return code
-
-
-def _new(class_, *args, **kwargs):
- ret = class_()
- for attr, value in zip(ret._fields, args):
- if attr in kwargs:
- raise ValueError('Field set both in args and kwargs')
- setattr(ret, attr, value)
- for attr, value in kwargs:
- setattr(ret, attr, value)
- return ret
-
-
-BUILTINS = __builtin__.__dict__.copy()
-BUILTINS.update({'Markup': Markup, 'Undefined': Undefined})
-CONSTANTS = frozenset(['False', 'True', 'None', 'NotImplemented', 'Ellipsis'])
-
-
-class TemplateASTTransformer(ASTTransformer):
- """Concrete AST transformer that implements the AST transformations needed
- for code embedded in templates.
- """
-
- def __init__(self):
- self.locals = [CONSTANTS]
-
- def _extract_names(self, node):
- names = set()
- def _process(node):
- if isinstance(node, _ast.Name):
- names.add(node.id)
- elif isinstance(node, _ast.alias):
- names.add(node.asname or node.name)
- elif isinstance(node, _ast.Tuple):
- for elt in node.elts:
- _process(elt)
- if hasattr(node, 'args'):
- for arg in node.args:
- _process(arg)
- if hasattr(node, 'vararg'):
- names.add(node.vararg)
- if hasattr(node, 'kwarg'):
- names.add(node.kwarg)
- elif hasattr(node, 'names'):
- for elt in node.names:
- _process(elt)
- return names
-
- def visit_Str(self, node):
- if isinstance(node.s, str):
- try: # If the string is ASCII, return a `str` object
- node.s.decode('ascii')
- except ValueError: # Otherwise return a `unicode` object
- return _new(_ast.Str, node.s.decode('utf-8'))
- return node
-
- def visit_ClassDef(self, node):
- if len(self.locals) > 1:
- self.locals[-1].add(node.name)
- self.locals.append(set())
- try:
- return ASTTransformer.visit_ClassDef(self, node)
- finally:
- self.locals.pop()
-
- def visit_Import(self, node):
- if len(self.locals) > 1:
- self.locals[-1].update(self._extract_names(node))
- return ASTTransformer.visit_Import(self, node)
-
- def visit_ImportFrom(self, node):
- if [a.name for a in node.names] == ['*']:
- if has_star_import_bug:
- # This is a Python 2.4 bug. Only if we have a broken Python
- # version do we need to apply this hack
- node = _new(_ast.Expr, _new(_ast.Call,
- _new(_ast.Name, '_star_import_patch'), [
- _new(_ast.Name, '__data__'),
- _new(_ast.Str, node.module)
- ], (), ()))
- return node
- if len(self.locals) > 1:
- self.locals[-1].update(self._extract_names(node))
- return ASTTransformer.visit_ImportFrom(self, node)
-
- def visit_FunctionDef(self, node):
- if len(self.locals) > 1:
- self.locals[-1].add(node.name)
-
- self.locals.append(self._extract_names(node.args))
- try:
- return ASTTransformer.visit_FunctionDef(self, node)
- finally:
- self.locals.pop()
-
- # GeneratorExp(expr elt, comprehension* generators)
- def visit_GeneratorExp(self, node):
- gens = []
- for generator in node.generators:
- # comprehension = (expr target, expr iter, expr* ifs)
- self.locals.append(set())
- gen = _new(_ast.comprehension, self.visit(generator.target),
- self.visit(generator.iter),
- [self.visit(if_) for if_ in generator.ifs])
- gens.append(gen)
-
- # use node.__class__ to make it reusable as ListComp
- ret = _new(node.__class__, self.visit(node.elt), gens)
- #delete inserted locals
- del self.locals[-len(node.generators):]
- return ret
-
- # ListComp(expr elt, comprehension* generators)
- visit_ListComp = visit_GeneratorExp
-
- def visit_Lambda(self, node):
- self.locals.append(self._extract_names(node.args))
- try:
- return ASTTransformer.visit_Lambda(self, node)
- finally:
- self.locals.pop()
-
- def visit_Name(self, node):
- # If the name refers to a local inside a lambda, list comprehension, or
- # generator expression, leave it alone
- if isinstance(node.ctx, _ast.Load) and \
- node.id not in flatten(self.locals):
- # Otherwise, translate the name ref into a context lookup
- name = _new(_ast.Name, '_lookup_name', _ast.Load())
- namearg = _new(_ast.Name, '__data__', _ast.Load())
- strarg = _new(_ast.Str, node.id)
- node = _new(_ast.Call, name, [namearg, strarg], [])
- elif isinstance(node.ctx, _ast.Store):
- if len(self.locals) > 1:
- self.locals[-1].add(node.id)
-
- return node
-
-
-class ExpressionASTTransformer(TemplateASTTransformer):
- """Concrete AST transformer that implements the AST transformations needed
- for code embedded in templates.
- """
-
- def visit_Attribute(self, node):
- if not isinstance(node.ctx, _ast.Load):
- return ASTTransformer.visit_Attribute(self, node)
-
- func = _new(_ast.Name, '_lookup_attr', _ast.Load())
- args = [self.visit(node.value), _new(_ast.Str, node.attr)]
- return _new(_ast.Call, func, args, [])
-
- def visit_Subscript(self, node):
- if not isinstance(node.ctx, _ast.Load) or \
- not isinstance(node.slice, _ast.Index):
- return ASTTransformer.visit_Subscript(self, node)
-
- func = _new(_ast.Name, '_lookup_item', _ast.Load())
- args = [
- self.visit(node.value),
- _new(_ast.Tuple, (self.visit(node.slice.value),), _ast.Load())
- ]
- return _new(_ast.Call, func, args, [])
diff --git a/genshi/template/interpolation.py b/genshi/template/interpolation.py
deleted file mode 100644
index 1e1a385..0000000
--- a/genshi/template/interpolation.py
+++ /dev/null
@@ -1,153 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Copyright (C) 2007-2009 Edgewall Software
-# All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://genshi.edgewall.org/wiki/License.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For the exact contribution history, see the revision
-# history and logs, available at http://genshi.edgewall.org/log/.
-
-"""String interpolation routines, i.e. the splitting up a given text into some
-parts that are literal strings, and others that are Python expressions.
-"""
-
-from itertools import chain
-import os
-import re
-from tokenize import PseudoToken
-
-from genshi.core import TEXT
-from genshi.template.base import TemplateSyntaxError, EXPR
-from genshi.template.eval import Expression
-
-__all__ = ['interpolate']
-__docformat__ = 'restructuredtext en'
-
-NAMESTART = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'
-NAMECHARS = NAMESTART + '.0123456789'
-PREFIX = '$'
-
-token_re = re.compile('%s|%s(?s)' % (
- r'[uU]?[rR]?("""|\'\'\')((?<!\\)\\\1|.)*?\1',
- PseudoToken
-))
-
-
-def interpolate(text, filepath=None, lineno=-1, offset=0, lookup='strict'):
- """Parse the given string and extract expressions.
-
- This function is a generator that yields `TEXT` events for literal strings,
- and `EXPR` events for expressions, depending on the results of parsing the
- string.
-
- >>> for kind, data, pos in interpolate("hey ${foo}bar"):
- ... print('%s %r' % (kind, data))
- TEXT 'hey '
- EXPR Expression('foo')
- TEXT 'bar'
-
- :param text: the text to parse
- :param filepath: absolute path to the file in which the text was found
- (optional)
- :param lineno: the line number at which the text was found (optional)
- :param offset: the column number at which the text starts in the source
- (optional)
- :param lookup: the variable lookup mechanism; either "lenient" (the
- default), "strict", or a custom lookup class
- :return: a list of `TEXT` and `EXPR` events
- :raise TemplateSyntaxError: when a syntax error in an expression is
- encountered
- """
- pos = [filepath, lineno, offset]
-
- textbuf = []
- textpos = None
- for is_expr, chunk in chain(lex(text, pos, filepath), [(True, '')]):
- if is_expr:
- if textbuf:
- yield TEXT, ''.join(textbuf), textpos
- del textbuf[:]
- textpos = None
- if chunk:
- try:
- expr = Expression(chunk.strip(), pos[0], pos[1],
- lookup=lookup)
- yield EXPR, expr, tuple(pos)
- except SyntaxError, err:
- raise TemplateSyntaxError(err, filepath, pos[1],
- pos[2] + (err.offset or 0))
- else:
- textbuf.append(chunk)
- if textpos is None:
- textpos = tuple(pos)
-
- if '\n' in chunk:
- lines = chunk.splitlines()
- pos[1] += len(lines) - 1
- pos[2] += len(lines[-1])
- else:
- pos[2] += len(chunk)
-
-
-def lex(text, textpos, filepath):
- offset = pos = 0
- end = len(text)
- escaped = False
-
- while 1:
- if escaped:
- offset = text.find(PREFIX, offset + 2)
- escaped = False
- else:
- offset = text.find(PREFIX, pos)
- if offset < 0 or offset == end - 1:
- break
- next = text[offset + 1]
-
- if next == '{':
- if offset > pos:
- yield False, text[pos:offset]
- pos = offset + 2
- level = 1
- while level:
- match = token_re.match(text, pos)
- if match is None:
- raise TemplateSyntaxError('invalid syntax', filepath,
- *textpos[1:])
- pos = match.end()
- tstart, tend = match.regs[3]
- token = text[tstart:tend]
- if token == '{':
- level += 1
- elif token == '}':
- level -= 1
- yield True, text[offset + 2:pos - 1]
-
- elif next in NAMESTART:
- if offset > pos:
- yield False, text[pos:offset]
- pos = offset
- pos += 1
- while pos < end:
- char = text[pos]
- if char not in NAMECHARS:
- break
- pos += 1
- yield True, text[offset + 1:pos].strip()
-
- elif not escaped and next == PREFIX:
- if offset > pos:
- yield False, text[pos:offset]
- escaped = True
- pos = offset + 1
-
- else:
- yield False, text[pos:offset + 1]
- pos = offset + 1
-
- if pos < end:
- yield False, text[pos:]
diff --git a/genshi/template/loader.py b/genshi/template/loader.py
deleted file mode 100644
index 0e7cda7..0000000
--- a/genshi/template/loader.py
+++ /dev/null
@@ -1,344 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Copyright (C) 2006-2010 Edgewall Software
-# All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://genshi.edgewall.org/wiki/License.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For the exact contribution history, see the revision
-# history and logs, available at http://genshi.edgewall.org/log/.
-
-"""Template loading and caching."""
-
-import os
-try:
- import threading
-except ImportError:
- import dummy_threading as threading
-
-from genshi.template.base import TemplateError
-from genshi.util import LRUCache
-
-__all__ = ['TemplateLoader', 'TemplateNotFound', 'directory', 'package',
- 'prefixed']
-__docformat__ = 'restructuredtext en'
-
-
-class TemplateNotFound(TemplateError):
- """Exception raised when a specific template file could not be found."""
-
- def __init__(self, name, search_path):
- """Create the exception.
-
- :param name: the filename of the template
- :param search_path: the search path used to lookup the template
- """
- TemplateError.__init__(self, 'Template "%s" not found' % name)
- self.search_path = search_path
-
-
-class TemplateLoader(object):
- """Responsible for loading templates from files on the specified search
- path.
-
- >>> import tempfile
- >>> fd, path = tempfile.mkstemp(suffix='.html', prefix='template')
- >>> os.write(fd, '<p>$var</p>')
- 11
- >>> os.close(fd)
-
- The template loader accepts a list of directory paths that are then used
- when searching for template files, in the given order:
-
- >>> loader = TemplateLoader([os.path.dirname(path)])
-
- The `load()` method first checks the template cache whether the requested
- template has already been loaded. If not, it attempts to locate the
- template file, and returns the corresponding `Template` object:
-
- >>> from genshi.template import MarkupTemplate
- >>> template = loader.load(os.path.basename(path))
- >>> isinstance(template, MarkupTemplate)
- True
-
- Template instances are cached: requesting a template with the same name
- results in the same instance being returned:
-
- >>> loader.load(os.path.basename(path)) is template
- True
-
- The `auto_reload` option can be used to control whether a template should
- be automatically reloaded when the file it was loaded from has been
- changed. Disable this automatic reloading to improve performance.
-
- >>> os.remove(path)
- """
- def __init__(self, search_path=None, auto_reload=False,
- default_encoding=None, max_cache_size=25, default_class=None,
- variable_lookup='strict', allow_exec=True, callback=None):
- """Create the template laoder.
-
- :param search_path: a list of absolute path names that should be
- searched for template files, or a string containing
- a single absolute path; alternatively, any item on
- the list may be a ''load function'' that is passed
- a filename and returns a file-like object and some
- metadata
- :param auto_reload: whether to check the last modification time of
- template files, and reload them if they have changed
- :param default_encoding: the default encoding to assume when loading
- templates; defaults to UTF-8
- :param max_cache_size: the maximum number of templates to keep in the
- cache
- :param default_class: the default `Template` subclass to use when
- instantiating templates
- :param variable_lookup: the variable lookup mechanism; either "strict"
- (the default), "lenient", or a custom lookup
- class
- :param allow_exec: whether to allow Python code blocks in templates
- :param callback: (optional) a callback function that is invoked after a
- template was initialized by this loader; the function
- is passed the template object as only argument. This
- callback can be used for example to add any desired
- filters to the template
- :see: `LenientLookup`, `StrictLookup`
-
- :note: Changed in 0.5: Added the `allow_exec` argument
- """
- from genshi.template.markup import MarkupTemplate
-
- self.search_path = search_path
- if self.search_path is None:
- self.search_path = []
- elif not isinstance(self.search_path, (list, tuple)):
- self.search_path = [self.search_path]
-
- self.auto_reload = auto_reload
- """Whether templates should be reloaded when the underlying file is
- changed"""
-
- self.default_encoding = default_encoding
- self.default_class = default_class or MarkupTemplate
- self.variable_lookup = variable_lookup
- self.allow_exec = allow_exec
- if callback is not None and not hasattr(callback, '__call__'):
- raise TypeError('The "callback" parameter needs to be callable')
- self.callback = callback
- self._cache = LRUCache(max_cache_size)
- self._uptodate = {}
- self._lock = threading.RLock()
-
- def __getstate__(self):
- state = self.__dict__.copy()
- state['_lock'] = None
- return state
-
- def __setstate__(self, state):
- self.__dict__ = state
- self._lock = threading.RLock()
-
- def load(self, filename, relative_to=None, cls=None, encoding=None):
- """Load the template with the given name.
-
- If the `filename` parameter is relative, this method searches the
- search path trying to locate a template matching the given name. If the
- file name is an absolute path, the search path is ignored.
-
- If the requested template is not found, a `TemplateNotFound` exception
- is raised. Otherwise, a `Template` object is returned that represents
- the parsed template.
-
- Template instances are cached to avoid having to parse the same
- template file more than once. Thus, subsequent calls of this method
- with the same template file name will return the same `Template`
- object (unless the ``auto_reload`` option is enabled and the file was
- changed since the last parse.)
-
- If the `relative_to` parameter is provided, the `filename` is
- interpreted as being relative to that path.
-
- :param filename: the relative path of the template file to load
- :param relative_to: the filename of the template from which the new
- template is being loaded, or ``None`` if the
- template is being loaded directly
- :param cls: the class of the template object to instantiate
- :param encoding: the encoding of the template to load; defaults to the
- ``default_encoding`` of the loader instance
- :return: the loaded `Template` instance
- :raises TemplateNotFound: if a template with the given name could not
- be found
- """
- if cls is None:
- cls = self.default_class
- search_path = self.search_path
-
- # Make the filename relative to the template file its being loaded
- # from, but only if that file is specified as a relative path, or no
- # search path has been set up
- if relative_to and (not search_path or not os.path.isabs(relative_to)):
- filename = os.path.join(os.path.dirname(relative_to), filename)
-
- filename = os.path.normpath(filename)
- cachekey = filename
-
- self._lock.acquire()
- try:
- # First check the cache to avoid reparsing the same file
- try:
- tmpl = self._cache[cachekey]
- if not self.auto_reload:
- return tmpl
- uptodate = self._uptodate[cachekey]
- if uptodate is not None and uptodate():
- return tmpl
- except (KeyError, OSError):
- pass
-
- isabs = False
-
- if os.path.isabs(filename):
- # Bypass the search path if the requested filename is absolute
- search_path = [os.path.dirname(filename)]
- isabs = True
-
- elif relative_to and os.path.isabs(relative_to):
- # Make sure that the directory containing the including
- # template is on the search path
- dirname = os.path.dirname(relative_to)
- if dirname not in search_path:
- search_path = list(search_path) + [dirname]
- isabs = True
-
- elif not search_path:
- # Uh oh, don't know where to look for the template
- raise TemplateError('Search path for templates not configured')
-
- for loadfunc in search_path:
- if isinstance(loadfunc, basestring):
- loadfunc = directory(loadfunc)
- try:
- filepath, filename, fileobj, uptodate = loadfunc(filename)
- except IOError:
- continue
- else:
- try:
- if isabs:
- # If the filename of either the included or the
- # including template is absolute, make sure the
- # included template gets an absolute path, too,
- # so that nested includes work properly without a
- # search path
- filename = filepath
- tmpl = self._instantiate(cls, fileobj, filepath,
- filename, encoding=encoding)
- if self.callback:
- self.callback(tmpl)
- self._cache[cachekey] = tmpl
- self._uptodate[cachekey] = uptodate
- finally:
- if hasattr(fileobj, 'close'):
- fileobj.close()
- return tmpl
-
- raise TemplateNotFound(filename, search_path)
-
- finally:
- self._lock.release()
-
- def _instantiate(self, cls, fileobj, filepath, filename, encoding=None):
- """Instantiate and return the `Template` object based on the given
- class and parameters.
-
- This function is intended for subclasses to override if they need to
- implement special template instantiation logic. Code that just uses
- the `TemplateLoader` should use the `load` method instead.
-
- :param cls: the class of the template object to instantiate
- :param fileobj: a readable file-like object containing the template
- source
- :param filepath: the absolute path to the template file
- :param filename: the path to the template file relative to the search
- path
- :param encoding: the encoding of the template to load; defaults to the
- ``default_encoding`` of the loader instance
- :return: the loaded `Template` instance
- :rtype: `Template`
- """
- if encoding is None:
- encoding = self.default_encoding
- return cls(fileobj, filepath=filepath, filename=filename, loader=self,
- encoding=encoding, lookup=self.variable_lookup,
- allow_exec=self.allow_exec)
-
- @staticmethod
- def directory(path):
- """Loader factory for loading templates from a local directory.
-
- :param path: the path to the local directory containing the templates
- :return: the loader function to load templates from the given directory
- :rtype: ``function``
- """
- def _load_from_directory(filename):
- filepath = os.path.join(path, filename)
- fileobj = open(filepath, 'U')
- mtime = os.path.getmtime(filepath)
- def _uptodate():
- return mtime == os.path.getmtime(filepath)
- return filepath, filename, fileobj, _uptodate
- return _load_from_directory
-
- @staticmethod
- def package(name, path):
- """Loader factory for loading templates from egg package data.
-
- :param name: the name of the package containing the resources
- :param path: the path inside the package data
- :return: the loader function to load templates from the given package
- :rtype: ``function``
- """
- from pkg_resources import resource_stream
- def _load_from_package(filename):
- filepath = os.path.join(path, filename)
- return filepath, filename, resource_stream(name, filepath), None
- return _load_from_package
-
- @staticmethod
- def prefixed(**delegates):
- """Factory for a load function that delegates to other loaders
- depending on the prefix of the requested template path.
-
- The prefix is stripped from the filename when passing on the load
- request to the delegate.
-
- >>> load = prefixed(
- ... app1 = lambda filename: ('app1', filename, None, None),
- ... app2 = lambda filename: ('app2', filename, None, None)
- ... )
- >>> print(load('app1/foo.html'))
- ('app1', 'app1/foo.html', None, None)
- >>> print(load('app2/bar.html'))
- ('app2', 'app2/bar.html', None, None)
-
- :param delegates: mapping of path prefixes to loader functions
- :return: the loader function
- :rtype: ``function``
- """
- def _dispatch_by_prefix(filename):
- for prefix, delegate in delegates.items():
- if filename.startswith(prefix):
- if isinstance(delegate, basestring):
- delegate = directory(delegate)
- filepath, _, fileobj, uptodate = delegate(
- filename[len(prefix):].lstrip('/\\')
- )
- return filepath, filename, fileobj, uptodate
- raise TemplateNotFound(filename, list(delegates.keys()))
- return _dispatch_by_prefix
-
-
-directory = TemplateLoader.directory
-package = TemplateLoader.package
-prefixed = TemplateLoader.prefixed
diff --git a/genshi/template/markup.py b/genshi/template/markup.py
deleted file mode 100644
index 0e31632..0000000
--- a/genshi/template/markup.py
+++ /dev/null
@@ -1,397 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Copyright (C) 2006-2010 Edgewall Software
-# All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://genshi.edgewall.org/wiki/License.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For the exact contribution history, see the revision
-# history and logs, available at http://genshi.edgewall.org/log/.
-
-"""Markup templating engine."""
-
-from itertools import chain
-
-from genshi.core import Attrs, Markup, Namespace, Stream, StreamEventKind
-from genshi.core import START, END, START_NS, END_NS, TEXT, PI, COMMENT
-from genshi.input import XMLParser
-from genshi.template.base import BadDirectiveError, Template, \
- TemplateSyntaxError, _apply_directives, \
- EXEC, INCLUDE, SUB
-from genshi.template.eval import Suite
-from genshi.template.interpolation import interpolate
-from genshi.template.directives import *
-from genshi.template.text import NewTextTemplate
-
-__all__ = ['MarkupTemplate']
-__docformat__ = 'restructuredtext en'
-
-
-class MarkupTemplate(Template):
- """Implementation of the template language for XML-based templates.
-
- >>> tmpl = MarkupTemplate('''<ul xmlns:py="http://genshi.edgewall.org/">
- ... <li py:for="item in items">${item}</li>
- ... </ul>''')
- >>> print(tmpl.generate(items=[1, 2, 3]))
- <ul>
- <li>1</li><li>2</li><li>3</li>
- </ul>
- """
-
- DIRECTIVE_NAMESPACE = 'http://genshi.edgewall.org/'
- XINCLUDE_NAMESPACE = 'http://www.w3.org/2001/XInclude'
-
- directives = [('def', DefDirective),
- ('match', MatchDirective),
- ('when', WhenDirective),
- ('otherwise', OtherwiseDirective),
- ('for', ForDirective),
- ('if', IfDirective),
- ('choose', ChooseDirective),
- ('with', WithDirective),
- ('replace', ReplaceDirective),
- ('content', ContentDirective),
- ('attrs', AttrsDirective),
- ('strip', StripDirective)]
- serializer = 'xml'
- _number_conv = Markup
-
- def __init__(self, source, filepath=None, filename=None, loader=None,
- encoding=None, lookup='strict', allow_exec=True):
- Template.__init__(self, source, filepath=filepath, filename=filename,
- loader=loader, encoding=encoding, lookup=lookup,
- allow_exec=allow_exec)
- self.add_directives(self.DIRECTIVE_NAMESPACE, self)
-
- def _init_filters(self):
- Template._init_filters(self)
- # Make sure the include filter comes after the match filter
- self.filters.remove(self._include)
- self.filters += [self._match, self._include]
-
- def _parse(self, source, encoding):
- if not isinstance(source, Stream):
- source = XMLParser(source, filename=self.filename,
- encoding=encoding)
- stream = []
-
- for kind, data, pos in source:
-
- if kind is TEXT:
- for kind, data, pos in interpolate(data, self.filepath, pos[1],
- pos[2], lookup=self.lookup):
- stream.append((kind, data, pos))
-
- elif kind is PI and data[0] == 'python':
- if not self.allow_exec:
- raise TemplateSyntaxError('Python code blocks not allowed',
- self.filepath, *pos[1:])
- try:
- suite = Suite(data[1], self.filepath, pos[1],
- lookup=self.lookup)
- except SyntaxError, err:
- raise TemplateSyntaxError(err, self.filepath,
- pos[1] + (err.lineno or 1) - 1,
- pos[2] + (err.offset or 0))
- stream.append((EXEC, suite, pos))
-
- elif kind is COMMENT:
- if not data.lstrip().startswith('!'):
- stream.append((kind, data, pos))
-
- else:
- stream.append((kind, data, pos))
-
- return stream
-
- def _extract_directives(self, stream, namespace, factory):
- depth = 0
- dirmap = {} # temporary mapping of directives to elements
- new_stream = []
- ns_prefix = {} # namespace prefixes in use
-
- for kind, data, pos in stream:
-
- if kind is START:
- tag, attrs = data
- directives = []
- strip = False
-
- if tag.namespace == namespace:
- cls = factory.get_directive(tag.localname)
- if cls is None:
- raise BadDirectiveError(tag.localname,
- self.filepath, pos[1])
- args = dict([(name.localname, value) for name, value
- in attrs if not name.namespace])
- directives.append((factory.get_directive_index(cls), cls,
- args, ns_prefix.copy(), pos))
- strip = True
-
- new_attrs = []
- for name, value in attrs:
- if name.namespace == namespace:
- cls = factory.get_directive(name.localname)
- if cls is None:
- raise BadDirectiveError(name.localname,
- self.filepath, pos[1])
- if type(value) is list and len(value) == 1:
- value = value[0][1]
- directives.append((factory.get_directive_index(cls),
- cls, value, ns_prefix.copy(), pos))
- else:
- new_attrs.append((name, value))
- new_attrs = Attrs(new_attrs)
-
- if directives:
- directives.sort()
- dirmap[(depth, tag)] = (directives, len(new_stream),
- strip)
-
- new_stream.append((kind, (tag, new_attrs), pos))
- depth += 1
-
- elif kind is END:
- depth -= 1
- new_stream.append((kind, data, pos))
-
- # If there have have directive attributes with the
- # corresponding start tag, move the events inbetween into
- # a "subprogram"
- if (depth, data) in dirmap:
- directives, offset, strip = dirmap.pop((depth, data))
- substream = new_stream[offset:]
- if strip:
- substream = substream[1:-1]
- new_stream[offset:] = [
- (SUB, (directives, substream), pos)
- ]
-
- elif kind is SUB:
- directives, substream = data
- substream = self._extract_directives(substream, namespace,
- factory)
-
- if len(substream) == 1 and substream[0][0] is SUB:
- added_directives, substream = substream[0][1]
- directives += added_directives
-
- new_stream.append((kind, (directives, substream), pos))
-
- elif kind is START_NS:
- # Strip out the namespace declaration for template
- # directives
- prefix, uri = data
- ns_prefix[prefix] = uri
- if uri != namespace:
- new_stream.append((kind, data, pos))
-
- elif kind is END_NS:
- uri = ns_prefix.pop(data, None)
- if uri and uri != namespace:
- new_stream.append((kind, data, pos))
-
- else:
- new_stream.append((kind, data, pos))
-
- return new_stream
-
- def _extract_includes(self, stream):
- streams = [[]] # stacked lists of events of the "compiled" template
- prefixes = {}
- fallbacks = []
- includes = []
- xinclude_ns = Namespace(self.XINCLUDE_NAMESPACE)
-
- for kind, data, pos in stream:
- stream = streams[-1]
-
- if kind is START:
- # Record any directive attributes in start tags
- tag, attrs = data
- if tag in xinclude_ns:
- if tag.localname == 'include':
- include_href = attrs.get('href')
- if not include_href:
- raise TemplateSyntaxError('Include misses required '
- 'attribute "href"',
- self.filepath, *pos[1:])
- includes.append((include_href, attrs.get('parse')))
- streams.append([])
- elif tag.localname == 'fallback':
- streams.append([])
- fallbacks.append(streams[-1])
- else:
- stream.append((kind, (tag, attrs), pos))
-
- elif kind is END:
- if fallbacks and data == xinclude_ns['fallback']:
- assert streams.pop() is fallbacks[-1]
- elif data == xinclude_ns['include']:
- fallback = None
- if len(fallbacks) == len(includes):
- fallback = fallbacks.pop()
- streams.pop() # discard anything between the include tags
- # and the fallback element
- stream = streams[-1]
- href, parse = includes.pop()
- try:
- cls = {
- 'xml': MarkupTemplate,
- 'text': NewTextTemplate
- }.get(parse) or self.__class__
- except KeyError:
- raise TemplateSyntaxError('Invalid value for "parse" '
- 'attribute of include',
- self.filepath, *pos[1:])
- stream.append((INCLUDE, (href, cls, fallback), pos))
- else:
- stream.append((kind, data, pos))
-
- elif kind is START_NS and data[1] == xinclude_ns:
- # Strip out the XInclude namespace
- prefixes[data[0]] = data[1]
-
- elif kind is END_NS and data in prefixes:
- prefixes.pop(data)
-
- else:
- stream.append((kind, data, pos))
-
- assert len(streams) == 1
- return streams[0]
-
- def _interpolate_attrs(self, stream):
- for kind, data, pos in stream:
-
- if kind is START:
- # Record any directive attributes in start tags
- tag, attrs = data
- new_attrs = []
- for name, value in attrs:
- if value:
- value = list(interpolate(value, self.filepath, pos[1],
- pos[2], lookup=self.lookup))
- if len(value) == 1 and value[0][0] is TEXT:
- value = value[0][1]
- new_attrs.append((name, value))
- data = tag, Attrs(new_attrs)
-
- yield kind, data, pos
-
- def _prepare(self, stream):
- return Template._prepare(self,
- self._extract_includes(self._interpolate_attrs(stream))
- )
-
- def add_directives(self, namespace, factory):
- """Register a custom `DirectiveFactory` for a given namespace.
-
- :param namespace: the namespace URI
- :type namespace: `basestring`
- :param factory: the directive factory to register
- :type factory: `DirectiveFactory`
- :since: version 0.6
- """
- assert not self._prepared, 'Too late for adding directives, ' \
- 'template already prepared'
- self._stream = self._extract_directives(self._stream, namespace,
- factory)
-
- def _match(self, stream, ctxt, start=0, end=None, **vars):
- """Internal stream filter that applies any defined match templates
- to the stream.
- """
- match_templates = ctxt._match_templates
-
- tail = []
- def _strip(stream, append=tail.append):
- depth = 1
- next = stream.next
- while 1:
- event = next()
- if event[0] is START:
- depth += 1
- elif event[0] is END:
- depth -= 1
- if depth > 0:
- yield event
- else:
- append(event)
- break
-
- for event in stream:
-
- # We (currently) only care about start and end events for matching
- # We might care about namespace events in the future, though
- if not match_templates or (event[0] is not START and
- event[0] is not END):
- yield event
- continue
-
- for idx, (test, path, template, hints, namespaces, directives) \
- in enumerate(match_templates):
- if idx < start or end is not None and idx >= end:
- continue
-
- if test(event, namespaces, ctxt) is True:
- if 'match_once' in hints:
- del match_templates[idx]
- idx -= 1
-
- # Let the remaining match templates know about the event so
- # they get a chance to update their internal state
- for test in [mt[0] for mt in match_templates[idx + 1:]]:
- test(event, namespaces, ctxt, updateonly=True)
-
- # Consume and store all events until an end event
- # corresponding to this start event is encountered
- pre_end = idx + 1
- if 'match_once' not in hints and 'not_recursive' in hints:
- pre_end -= 1
- inner = _strip(stream)
- if pre_end > 0:
- inner = self._match(inner, ctxt, start=start,
- end=pre_end, **vars)
- content = self._include(chain([event], inner, tail), ctxt)
- if 'not_buffered' not in hints:
- content = list(content)
- content = Stream(content)
-
- # Make the select() function available in the body of the
- # match template
- selected = [False]
- def select(path):
- selected[0] = True
- return content.select(path, namespaces, ctxt)
- vars = dict(select=select)
-
- # Recursively process the output
- template = _apply_directives(template, directives, ctxt,
- vars)
- for event in self._match(self._flatten(template, ctxt,
- **vars),
- ctxt, start=idx + 1, **vars):
- yield event
-
- # If the match template did not actually call select to
- # consume the matched stream, the original events need to
- # be consumed here or they'll get appended to the output
- if not selected[0]:
- for event in content:
- pass
-
- # Let the remaining match templates know about the last
- # event in the matched content, so they can update their
- # internal state accordingly
- for test in [mt[0] for mt in match_templates[idx + 1:]]:
- test(tail[0], namespaces, ctxt, updateonly=True)
-
- break
-
- else: # no matches
- yield event
diff --git a/genshi/template/plugin.py b/genshi/template/plugin.py
deleted file mode 100644
index 70d56af..0000000
--- a/genshi/template/plugin.py
+++ /dev/null
@@ -1,176 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Copyright (C) 2006-2009 Edgewall Software
-# Copyright (C) 2006 Matthew Good
-# All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://genshi.edgewall.org/wiki/License.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For the exact contribution history, see the revision
-# history and logs, available at http://genshi.edgewall.org/log/.
-
-"""Basic support for the template engine plugin API used by TurboGears and
-CherryPy/Buffet.
-"""
-
-from genshi.input import ET, HTML, XML
-from genshi.output import DocType
-from genshi.template.base import Template
-from genshi.template.loader import TemplateLoader
-from genshi.template.markup import MarkupTemplate
-from genshi.template.text import TextTemplate, NewTextTemplate
-
-__all__ = ['ConfigurationError', 'AbstractTemplateEnginePlugin',
- 'MarkupTemplateEnginePlugin', 'TextTemplateEnginePlugin']
-__docformat__ = 'restructuredtext en'
-
-
-class ConfigurationError(ValueError):
- """Exception raised when invalid plugin options are encountered."""
-
-
-class AbstractTemplateEnginePlugin(object):
- """Implementation of the plugin API."""
-
- template_class = None
- extension = None
-
- def __init__(self, extra_vars_func=None, options=None):
- self.get_extra_vars = extra_vars_func
- if options is None:
- options = {}
- self.options = options
-
- self.default_encoding = options.get('genshi.default_encoding', 'utf-8')
- auto_reload = options.get('genshi.auto_reload', '1')
- if isinstance(auto_reload, basestring):
- auto_reload = auto_reload.lower() in ('1', 'on', 'yes', 'true')
- search_path = [p for p in
- options.get('genshi.search_path', '').split(':') if p]
- self.use_package_naming = not search_path
- try:
- max_cache_size = int(options.get('genshi.max_cache_size', 25))
- except ValueError:
- raise ConfigurationError('Invalid value for max_cache_size: "%s"' %
- options.get('genshi.max_cache_size'))
-
- loader_callback = options.get('genshi.loader_callback', None)
- if loader_callback and not hasattr(loader_callback, '__call__'):
- raise ConfigurationError('loader callback must be a function')
-
- lookup_errors = options.get('genshi.lookup_errors', 'strict')
- if lookup_errors not in ('lenient', 'strict'):
- raise ConfigurationError('Unknown lookup errors mode "%s"' %
- lookup_errors)
-
- try:
- allow_exec = bool(options.get('genshi.allow_exec', True))
- except ValueError:
- raise ConfigurationError('Invalid value for allow_exec "%s"' %
- options.get('genshi.allow_exec'))
-
- self.loader = TemplateLoader([p for p in search_path if p],
- auto_reload=auto_reload,
- max_cache_size=max_cache_size,
- default_class=self.template_class,
- variable_lookup=lookup_errors,
- allow_exec=allow_exec,
- callback=loader_callback)
-
- def load_template(self, templatename, template_string=None):
- """Find a template specified in python 'dot' notation, or load one from
- a string.
- """
- if template_string is not None:
- return self.template_class(template_string)
-
- if self.use_package_naming:
- divider = templatename.rfind('.')
- if divider >= 0:
- from pkg_resources import resource_filename
- package = templatename[:divider]
- basename = templatename[divider + 1:] + self.extension
- templatename = resource_filename(package, basename)
-
- return self.loader.load(templatename)
-
- def _get_render_options(self, format=None, fragment=False):
- if format is None:
- format = self.default_format
- kwargs = {'method': format}
- if self.default_encoding:
- kwargs['encoding'] = self.default_encoding
- return kwargs
-
- def render(self, info, format=None, fragment=False, template=None):
- """Render the template to a string using the provided info."""
- kwargs = self._get_render_options(format=format, fragment=fragment)
- return self.transform(info, template).render(**kwargs)
-
- def transform(self, info, template):
- """Render the output to an event stream."""
- if not isinstance(template, Template):
- template = self.load_template(template)
- return template.generate(**info)
-
-
-class MarkupTemplateEnginePlugin(AbstractTemplateEnginePlugin):
- """Implementation of the plugin API for markup templates."""
-
- template_class = MarkupTemplate
- extension = '.html'
-
- def __init__(self, extra_vars_func=None, options=None):
- AbstractTemplateEnginePlugin.__init__(self, extra_vars_func, options)
-
- default_doctype = self.options.get('genshi.default_doctype')
- if default_doctype:
- doctype = DocType.get(default_doctype)
- if doctype is None:
- raise ConfigurationError('Unknown doctype %r' % default_doctype)
- self.default_doctype = doctype
- else:
- self.default_doctype = None
-
- format = self.options.get('genshi.default_format', 'html').lower()
- if format not in ('html', 'xhtml', 'xml', 'text'):
- raise ConfigurationError('Unknown output format %r' % format)
- self.default_format = format
-
- def _get_render_options(self, format=None, fragment=False):
- kwargs = super(MarkupTemplateEnginePlugin,
- self)._get_render_options(format, fragment)
- if self.default_doctype and not fragment:
- kwargs['doctype'] = self.default_doctype
- return kwargs
-
- def transform(self, info, template):
- """Render the output to an event stream."""
- data = {'ET': ET, 'HTML': HTML, 'XML': XML}
- if self.get_extra_vars:
- data.update(self.get_extra_vars())
- data.update(info)
- return super(MarkupTemplateEnginePlugin, self).transform(data, template)
-
-
-class TextTemplateEnginePlugin(AbstractTemplateEnginePlugin):
- """Implementation of the plugin API for text templates."""
-
- template_class = TextTemplate
- extension = '.txt'
- default_format = 'text'
-
- def __init__(self, extra_vars_func=None, options=None):
- if options is None:
- options = {}
-
- new_syntax = options.get('genshi.new_text_syntax')
- if isinstance(new_syntax, basestring):
- new_syntax = new_syntax.lower() in ('1', 'on', 'yes', 'true')
- if new_syntax:
- self.template_class = NewTextTemplate
-
- AbstractTemplateEnginePlugin.__init__(self, extra_vars_func, options)
diff --git a/genshi/template/text.py b/genshi/template/text.py
deleted file mode 100644
index 746226c..0000000
--- a/genshi/template/text.py
+++ /dev/null
@@ -1,333 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Copyright (C) 2006-2009 Edgewall Software
-# All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://genshi.edgewall.org/wiki/License.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For the exact contribution history, see the revision
-# history and logs, available at http://genshi.edgewall.org/log/.
-
-"""Plain text templating engine.
-
-This module implements two template language syntaxes, at least for a certain
-transitional period. `OldTextTemplate` (aliased to just `TextTemplate`) defines
-a syntax that was inspired by Cheetah/Velocity. `NewTextTemplate` on the other
-hand is inspired by the syntax of the Django template language, which has more
-explicit delimiting of directives, and is more flexible with regards to
-white space and line breaks.
-
-In a future release, `OldTextTemplate` will be phased out in favor of
-`NewTextTemplate`, as the names imply. Therefore the new syntax is strongly
-recommended for new projects, and existing projects may want to migrate to the
-new syntax to remain compatible with future Genshi releases.
-"""
-
-import re
-
-from genshi.core import TEXT
-from genshi.template.base import BadDirectiveError, Template, \
- TemplateSyntaxError, EXEC, INCLUDE, SUB
-from genshi.template.eval import Suite
-from genshi.template.directives import *
-from genshi.template.directives import Directive
-from genshi.template.interpolation import interpolate
-
-__all__ = ['NewTextTemplate', 'OldTextTemplate', 'TextTemplate']
-__docformat__ = 'restructuredtext en'
-
-
-class NewTextTemplate(Template):
- r"""Implementation of a simple text-based template engine. This class will
- replace `OldTextTemplate` in a future release.
-
- It uses a more explicit delimiting style for directives: instead of the old
- style which required putting directives on separate lines that were prefixed
- with a ``#`` sign, directives and commenbtsr are enclosed in delimiter pairs
- (by default ``{% ... %}`` and ``{# ... #}``, respectively).
-
- Variable substitution uses the same interpolation syntax as for markup
- languages: simple references are prefixed with a dollar sign, more complex
- expression enclosed in curly braces.
-
- >>> tmpl = NewTextTemplate('''Dear $name,
- ...
- ... {# This is a comment #}
- ... We have the following items for you:
- ... {% for item in items %}
- ... * ${'Item %d' % item}
- ... {% end %}
- ... ''')
- >>> print(tmpl.generate(name='Joe', items=[1, 2, 3]).render(encoding=None))
- Dear Joe,
- <BLANKLINE>
- <BLANKLINE>
- We have the following items for you:
- <BLANKLINE>
- * Item 1
- <BLANKLINE>
- * Item 2
- <BLANKLINE>
- * Item 3
- <BLANKLINE>
- <BLANKLINE>
-
- By default, no spaces or line breaks are removed. If a line break should
- not be included in the output, prefix it with a backslash:
-
- >>> tmpl = NewTextTemplate('''Dear $name,
- ...
- ... {# This is a comment #}\
- ... We have the following items for you:
- ... {% for item in items %}\
- ... * $item
- ... {% end %}\
- ... ''')
- >>> print(tmpl.generate(name='Joe', items=[1, 2, 3]).render(encoding=None))
- Dear Joe,
- <BLANKLINE>
- We have the following items for you:
- * 1
- * 2
- * 3
- <BLANKLINE>
-
- Backslashes are also used to escape the start delimiter of directives and
- comments:
-
- >>> tmpl = NewTextTemplate('''Dear $name,
- ...
- ... \{# This is a comment #}
- ... We have the following items for you:
- ... {% for item in items %}\
- ... * $item
- ... {% end %}\
- ... ''')
- >>> print(tmpl.generate(name='Joe', items=[1, 2, 3]).render(encoding=None))
- Dear Joe,
- <BLANKLINE>
- {# This is a comment #}
- We have the following items for you:
- * 1
- * 2
- * 3
- <BLANKLINE>
-
- :since: version 0.5
- """
- directives = [('def', DefDirective),
- ('when', WhenDirective),
- ('otherwise', OtherwiseDirective),
- ('for', ForDirective),
- ('if', IfDirective),
- ('choose', ChooseDirective),
- ('with', WithDirective)]
- serializer = 'text'
-
- _DIRECTIVE_RE = r'((?<!\\)%s\s*(\w+)\s*(.*?)\s*%s|(?<!\\)%s.*?%s)'
- _ESCAPE_RE = r'\\\n|\\(\\)|\\(%s)|\\(%s)'
-
- def __init__(self, source, filepath=None, filename=None, loader=None,
- encoding=None, lookup='strict', allow_exec=False,
- delims=('{%', '%}', '{#', '#}')):
- self.delimiters = delims
- Template.__init__(self, source, filepath=filepath, filename=filename,
- loader=loader, encoding=encoding, lookup=lookup)
-
- def _get_delims(self):
- return self._delims
- def _set_delims(self, delims):
- if len(delims) != 4:
- raise ValueError('delimiers tuple must have exactly four elements')
- self._delims = delims
- self._directive_re = re.compile(self._DIRECTIVE_RE % tuple(
- [re.escape(d) for d in delims]
- ), re.DOTALL)
- self._escape_re = re.compile(self._ESCAPE_RE % tuple(
- [re.escape(d) for d in delims[::2]]
- ))
- delimiters = property(_get_delims, _set_delims, """\
- The delimiters for directives and comments. This should be a four item tuple
- of the form ``(directive_start, directive_end, comment_start,
- comment_end)``, where each item is a string.
- """)
-
- def _parse(self, source, encoding):
- """Parse the template from text input."""
- stream = [] # list of events of the "compiled" template
- dirmap = {} # temporary mapping of directives to elements
- depth = 0
-
- source = source.read()
- if isinstance(source, str):
- source = source.decode(encoding or 'utf-8', 'replace')
- offset = 0
- lineno = 1
-
- _escape_sub = self._escape_re.sub
- def _escape_repl(mo):
- groups = [g for g in mo.groups() if g]
- if not groups:
- return ''
- return groups[0]
-
- for idx, mo in enumerate(self._directive_re.finditer(source)):
- start, end = mo.span(1)
- if start > offset:
- text = _escape_sub(_escape_repl, source[offset:start])
- for kind, data, pos in interpolate(text, self.filepath, lineno,
- lookup=self.lookup):
- stream.append((kind, data, pos))
- lineno += len(text.splitlines())
-
- lineno += len(source[start:end].splitlines())
- command, value = mo.group(2, 3)
-
- if command == 'include':
- pos = (self.filename, lineno, 0)
- value = list(interpolate(value, self.filepath, lineno, 0,
- lookup=self.lookup))
- if len(value) == 1 and value[0][0] is TEXT:
- value = value[0][1]
- stream.append((INCLUDE, (value, None, []), pos))
-
- elif command == 'python':
- if not self.allow_exec:
- raise TemplateSyntaxError('Python code blocks not allowed',
- self.filepath, lineno)
- try:
- suite = Suite(value, self.filepath, lineno,
- lookup=self.lookup)
- except SyntaxError, err:
- raise TemplateSyntaxError(err, self.filepath,
- lineno + (err.lineno or 1) - 1)
- pos = (self.filename, lineno, 0)
- stream.append((EXEC, suite, pos))
-
- elif command == 'end':
- depth -= 1
- if depth in dirmap:
- directive, start_offset = dirmap.pop(depth)
- substream = stream[start_offset:]
- stream[start_offset:] = [(SUB, ([directive], substream),
- (self.filepath, lineno, 0))]
-
- elif command:
- cls = self.get_directive(command)
- if cls is None:
- raise BadDirectiveError(command)
- directive = 0, cls, value, None, (self.filepath, lineno, 0)
- dirmap[depth] = (directive, len(stream))
- depth += 1
-
- offset = end
-
- if offset < len(source):
- text = _escape_sub(_escape_repl, source[offset:])
- for kind, data, pos in interpolate(text, self.filepath, lineno,
- lookup=self.lookup):
- stream.append((kind, data, pos))
-
- return stream
-
-
-class OldTextTemplate(Template):
- """Legacy implementation of the old syntax text-based templates. This class
- is provided in a transition phase for backwards compatibility. New code
- should use the `NewTextTemplate` class and the improved syntax it provides.
-
- >>> tmpl = OldTextTemplate('''Dear $name,
- ...
- ... We have the following items for you:
- ... #for item in items
- ... * $item
- ... #end
- ...
- ... All the best,
- ... Foobar''')
- >>> print(tmpl.generate(name='Joe', items=[1, 2, 3]).render(encoding=None))
- Dear Joe,
- <BLANKLINE>
- We have the following items for you:
- * 1
- * 2
- * 3
- <BLANKLINE>
- All the best,
- Foobar
- """
- directives = [('def', DefDirective),
- ('when', WhenDirective),
- ('otherwise', OtherwiseDirective),
- ('for', ForDirective),
- ('if', IfDirective),
- ('choose', ChooseDirective),
- ('with', WithDirective)]
- serializer = 'text'
-
- _DIRECTIVE_RE = re.compile(r'(?:^[ \t]*(?<!\\)#(end).*\n?)|'
- r'(?:^[ \t]*(?<!\\)#((?:\w+|#).*)\n?)',
- re.MULTILINE)
-
- def _parse(self, source, encoding):
- """Parse the template from text input."""
- stream = [] # list of events of the "compiled" template
- dirmap = {} # temporary mapping of directives to elements
- depth = 0
-
- source = source.read()
- if isinstance(source, str):
- source = source.decode(encoding or 'utf-8', 'replace')
- offset = 0
- lineno = 1
-
- for idx, mo in enumerate(self._DIRECTIVE_RE.finditer(source)):
- start, end = mo.span()
- if start > offset:
- text = source[offset:start]
- for kind, data, pos in interpolate(text, self.filepath, lineno,
- lookup=self.lookup):
- stream.append((kind, data, pos))
- lineno += len(text.splitlines())
-
- text = source[start:end].lstrip()[1:]
- lineno += len(text.splitlines())
- directive = text.split(None, 1)
- if len(directive) > 1:
- command, value = directive
- else:
- command, value = directive[0], None
-
- if command == 'end':
- depth -= 1
- if depth in dirmap:
- directive, start_offset = dirmap.pop(depth)
- substream = stream[start_offset:]
- stream[start_offset:] = [(SUB, ([directive], substream),
- (self.filepath, lineno, 0))]
- elif command == 'include':
- pos = (self.filename, lineno, 0)
- stream.append((INCLUDE, (value.strip(), None, []), pos))
- elif command != '#':
- cls = self.get_directive(command)
- if cls is None:
- raise BadDirectiveError(command)
- directive = 0, cls, value, None, (self.filepath, lineno, 0)
- dirmap[depth] = (directive, len(stream))
- depth += 1
-
- offset = end
-
- if offset < len(source):
- text = source[offset:].replace('\\#', '#')
- for kind, data, pos in interpolate(text, self.filepath, lineno,
- lookup=self.lookup):
- stream.append((kind, data, pos))
-
- return stream
-
-
-TextTemplate = OldTextTemplate
diff --git a/genshi/util.py b/genshi/util.py
deleted file mode 100644
index b964a01..0000000
--- a/genshi/util.py
+++ /dev/null
@@ -1,274 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Copyright (C) 2006-2009 Edgewall Software
-# All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://genshi.edgewall.org/wiki/License.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For the exact contribution history, see the revision
-# history and logs, available at http://genshi.edgewall.org/log/.
-
-"""Various utility classes and functions."""
-
-import htmlentitydefs as entities
-import re
-
-__docformat__ = 'restructuredtext en'
-
-
-class LRUCache(dict):
- """A dictionary-like object that stores only a certain number of items, and
- discards its least recently used item when full.
-
- >>> cache = LRUCache(3)
- >>> cache['A'] = 0
- >>> cache['B'] = 1
- >>> cache['C'] = 2
- >>> len(cache)
- 3
-
- >>> cache['A']
- 0
-
- Adding new items to the cache does not increase its size. Instead, the least
- recently used item is dropped:
-
- >>> cache['D'] = 3
- >>> len(cache)
- 3
- >>> 'B' in cache
- False
-
- Iterating over the cache returns the keys, starting with the most recently
- used:
-
- >>> for key in cache:
- ... print(key)
- D
- A
- C
-
- This code is based on the LRUCache class from ``myghtyutils.util``, written
- by Mike Bayer and released under the MIT license. See:
-
- http://svn.myghty.org/myghtyutils/trunk/lib/myghtyutils/util.py
- """
-
- class _Item(object):
- def __init__(self, key, value):
- self.prv = self.nxt = None
- self.key = key
- self.value = value
- def __repr__(self):
- return repr(self.value)
-
- def __init__(self, capacity):
- self._dict = dict()
- self.capacity = capacity
- self.head = None
- self.tail = None
-
- def __contains__(self, key):
- return key in self._dict
-
- def __iter__(self):
- cur = self.head
- while cur:
- yield cur.key
- cur = cur.nxt
-
- def __len__(self):
- return len(self._dict)
-
- def __getitem__(self, key):
- item = self._dict[key]
- self._update_item(item)
- return item.value
-
- def __setitem__(self, key, value):
- item = self._dict.get(key)
- if item is None:
- item = self._Item(key, value)
- self._dict[key] = item
- self._insert_item(item)
- else:
- item.value = value
- self._update_item(item)
- self._manage_size()
-
- def __repr__(self):
- return repr(self._dict)
-
- def _insert_item(self, item):
- item.prv = None
- item.nxt = self.head
- if self.head is not None:
- self.head.prv = item
- else:
- self.tail = item
- self.head = item
- self._manage_size()
-
- def _manage_size(self):
- while len(self._dict) > self.capacity:
- olditem = self._dict[self.tail.key]
- del self._dict[self.tail.key]
- if self.tail != self.head:
- self.tail = self.tail.prv
- self.tail.nxt = None
- else:
- self.head = self.tail = None
-
- def _update_item(self, item):
- if self.head == item:
- return
-
- prv = item.prv
- prv.nxt = item.nxt
- if item.nxt is not None:
- item.nxt.prv = prv
- else:
- self.tail = prv
-
- item.prv = None
- item.nxt = self.head
- self.head.prv = self.head = item
-
-
-def flatten(items):
- """Flattens a potentially nested sequence into a flat list.
-
- :param items: the sequence to flatten
-
- >>> flatten((1, 2))
- [1, 2]
- >>> flatten([1, (2, 3), 4])
- [1, 2, 3, 4]
- >>> flatten([1, (2, [3, 4]), 5])
- [1, 2, 3, 4, 5]
- """
- retval = []
- for item in items:
- if isinstance(item, (frozenset, list, set, tuple)):
- retval += flatten(item)
- else:
- retval.append(item)
- return retval
-
-
-def plaintext(text, keeplinebreaks=True):
- """Return the text with all entities and tags removed.
-
- >>> plaintext('<b>1 &lt; 2</b>')
- u'1 < 2'
-
- The `keeplinebreaks` parameter can be set to ``False`` to replace any line
- breaks by simple spaces:
-
- >>> plaintext('''<b>1
- ... &lt;
- ... 2</b>''', keeplinebreaks=False)
- u'1 < 2'
-
- :param text: the text to convert to plain text
- :param keeplinebreaks: whether line breaks in the text should be kept intact
- :return: the text with tags and entities removed
- """
- text = stripentities(striptags(text))
- if not keeplinebreaks:
- text = text.replace('\n', ' ')
- return text
-
-
-_STRIPENTITIES_RE = re.compile(r'&(?:#((?:\d+)|(?:[xX][0-9a-fA-F]+));?|(\w+);)')
-def stripentities(text, keepxmlentities=False):
- """Return a copy of the given text with any character or numeric entities
- replaced by the equivalent UTF-8 characters.
-
- >>> stripentities('1 &lt; 2')
- u'1 < 2'
- >>> stripentities('more &hellip;')
- u'more \u2026'
- >>> stripentities('&#8230;')
- u'\u2026'
- >>> stripentities('&#x2026;')
- u'\u2026'
-
- If the `keepxmlentities` parameter is provided and is a truth value, the
- core XML entities (&amp;, &apos;, &gt;, &lt; and &quot;) are left intact.
-
- >>> stripentities('1 &lt; 2 &hellip;', keepxmlentities=True)
- u'1 &lt; 2 \u2026'
- """
- def _replace_entity(match):
- if match.group(1): # numeric entity
- ref = match.group(1)
- if ref.startswith('x'):
- ref = int(ref[1:], 16)
- else:
- ref = int(ref, 10)
- return unichr(ref)
- else: # character entity
- ref = match.group(2)
- if keepxmlentities and ref in ('amp', 'apos', 'gt', 'lt', 'quot'):
- return '&%s;' % ref
- try:
- return unichr(entities.name2codepoint[ref])
- except KeyError:
- if keepxmlentities:
- return '&amp;%s;' % ref
- else:
- return ref
- return _STRIPENTITIES_RE.sub(_replace_entity, text)
-
-
-_STRIPTAGS_RE = re.compile(r'(<!--.*?-->|<[^>]*>)')
-def striptags(text):
- """Return a copy of the text with any XML/HTML tags removed.
-
- >>> striptags('<span>Foo</span> bar')
- 'Foo bar'
- >>> striptags('<span class="bar">Foo</span>')
- 'Foo'
- >>> striptags('Foo<br />')
- 'Foo'
-
- HTML/XML comments are stripped, too:
-
- >>> striptags('<!-- <blub>hehe</blah> -->test')
- 'test'
-
- :param text: the string to remove tags from
- :return: the text with tags removed
- """
- return _STRIPTAGS_RE.sub('', text)
-
-
-def stringrepr(string):
- ascii = string.encode('ascii', 'backslashreplace')
- quoted = "'" + ascii.replace("'", "\\'") + "'"
- if len(ascii) > len(string):
- return 'u' + quoted
- return quoted
-
-
-# Compatibility fallback implementations for older Python versions
-
-try:
- all = all
- any = any
-except NameError:
- def any(S):
- for x in S:
- if x:
- return True
- return False
-
- def all(S):
- for x in S:
- if not x:
- return False
- return True