diff options
author | Sebastian Silva <sebastian@sugarlabs.org> | 2011-09-28 00:19:33 (GMT) |
---|---|---|
committer | Sebastian Silva <sebastian@sugarlabs.org> | 2011-09-28 06:54:34 (GMT) |
commit | 5861585e94a32b3032ac473804bf90c6e1363940 (patch) | |
tree | fb3a5bab0d75bf8eb780e749737fea87369754db /genshi | |
parent | be7aa93d7ba3682d5189e1a7d72169c0b02a1ec1 (diff) |
Migrated to Flask, added JQuery sugar theme, fixed race condition
Diffstat (limited to 'genshi')
-rw-r--r-- | genshi/__init__.py | 26 | ||||
-rw-r--r-- | genshi/builder.py | 359 | ||||
-rw-r--r-- | genshi/core.py | 727 | ||||
-rw-r--r-- | genshi/filters/__init__.py | 20 | ||||
-rw-r--r-- | genshi/filters/html.py | 453 | ||||
-rw-r--r-- | genshi/filters/i18n.py | 1238 | ||||
-rw-r--r-- | genshi/filters/transform.py | 1310 | ||||
-rw-r--r-- | genshi/input.py | 443 | ||||
-rw-r--r-- | genshi/output.py | 838 | ||||
-rw-r--r-- | genshi/path.py | 1528 | ||||
-rw-r--r-- | genshi/template/__init__.py | 23 | ||||
-rw-r--r-- | genshi/template/_ast24.py | 446 | ||||
-rw-r--r-- | genshi/template/ast24.py | 505 | ||||
-rw-r--r-- | genshi/template/astutil.py | 784 | ||||
-rw-r--r-- | genshi/template/base.py | 634 | ||||
-rw-r--r-- | genshi/template/directives.py | 725 | ||||
-rw-r--r-- | genshi/template/eval.py | 629 | ||||
-rw-r--r-- | genshi/template/interpolation.py | 153 | ||||
-rw-r--r-- | genshi/template/loader.py | 344 | ||||
-rw-r--r-- | genshi/template/markup.py | 397 | ||||
-rw-r--r-- | genshi/template/plugin.py | 176 | ||||
-rw-r--r-- | genshi/template/text.py | 333 | ||||
-rw-r--r-- | genshi/util.py | 274 |
23 files changed, 0 insertions, 12365 deletions
diff --git a/genshi/__init__.py b/genshi/__init__.py deleted file mode 100644 index 02f4347..0000000 --- a/genshi/__init__.py +++ /dev/null @@ -1,26 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2006-2009 Edgewall Software -# All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://genshi.edgewall.org/wiki/License. -# -# This software consists of voluntary contributions made by many -# individuals. For the exact contribution history, see the revision -# history and logs, available at http://genshi.edgewall.org/log/. - -"""This package provides various means for generating and processing web markup -(XML or HTML). - -The design is centered around the concept of streams of markup events (similar -in concept to SAX parsing events) which can be processed in a uniform manner -independently of where or how they are produced. -""" - -__docformat__ = 'restructuredtext en' -__version__ = '0.6' - -from genshi.core import * -from genshi.input import ParseError, XML, HTML diff --git a/genshi/builder.py b/genshi/builder.py deleted file mode 100644 index 724e364..0000000 --- a/genshi/builder.py +++ /dev/null @@ -1,359 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2006-2009 Edgewall Software -# All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://genshi.edgewall.org/wiki/License. -# -# This software consists of voluntary contributions made by many -# individuals. For the exact contribution history, see the revision -# history and logs, available at http://genshi.edgewall.org/log/. - -"""Support for programmatically generating markup streams from Python code using -a very simple syntax. The main entry point to this module is the `tag` object -(which is actually an instance of the ``ElementFactory`` class). You should -rarely (if ever) need to directly import and use any of the other classes in -this module. - -Elements can be created using the `tag` object using attribute access. For -example: - ->>> doc = tag.p('Some text and ', tag.a('a link', href='http://example.org/'), '.') ->>> doc -<Element "p"> - -This produces an `Element` instance which can be further modified to add child -nodes and attributes. This is done by "calling" the element: positional -arguments are added as child nodes (alternatively, the `Element.append` method -can be used for that purpose), whereas keywords arguments are added as -attributes: - ->>> doc(tag.br) -<Element "p"> ->>> print(doc) -<p>Some text and <a href="http://example.org/">a link</a>.<br/></p> - -If an attribute name collides with a Python keyword, simply append an underscore -to the name: - ->>> doc(class_='intro') -<Element "p"> ->>> print(doc) -<p class="intro">Some text and <a href="http://example.org/">a link</a>.<br/></p> - -As shown above, an `Element` can easily be directly rendered to XML text by -printing it or using the Python ``str()`` function. This is basically a -shortcut for converting the `Element` to a stream and serializing that -stream: - ->>> stream = doc.generate() ->>> stream #doctest: +ELLIPSIS -<genshi.core.Stream object at ...> ->>> print(stream) -<p class="intro">Some text and <a href="http://example.org/">a link</a>.<br/></p> - - -The `tag` object also allows creating "fragments", which are basically lists -of nodes (elements or text) that don't have a parent element. This can be useful -for creating snippets of markup that are attached to a parent element later (for -example in a template). Fragments are created by calling the `tag` object, which -returns an object of type `Fragment`: - ->>> fragment = tag('Hello, ', tag.em('world'), '!') ->>> fragment -<Fragment> ->>> print(fragment) -Hello, <em>world</em>! -""" - -from genshi.core import Attrs, Markup, Namespace, QName, Stream, \ - START, END, TEXT - -__all__ = ['Fragment', 'Element', 'ElementFactory', 'tag'] -__docformat__ = 'restructuredtext en' - - -class Fragment(object): - """Represents a markup fragment, which is basically just a list of element - or text nodes. - """ - __slots__ = ['children'] - - def __init__(self): - """Create a new fragment.""" - self.children = [] - - def __add__(self, other): - return Fragment()(self, other) - - def __call__(self, *args): - """Append any positional arguments as child nodes. - - :see: `append` - """ - for arg in args: - self.append(arg) - return self - - def __iter__(self): - return self._generate() - - def __repr__(self): - return '<%s>' % type(self).__name__ - - def __str__(self): - return str(self.generate()) - - def __unicode__(self): - return unicode(self.generate()) - - def __html__(self): - return Markup(self.generate()) - - def append(self, node): - """Append an element or string as child node. - - :param node: the node to append; can be an `Element`, `Fragment`, or a - `Stream`, or a Python string or number - """ - if isinstance(node, (Stream, Element, basestring, int, float, long)): - # For objects of a known/primitive type, we avoid the check for - # whether it is iterable for better performance - self.children.append(node) - elif isinstance(node, Fragment): - self.children.extend(node.children) - elif node is not None: - try: - for child in node: - self.append(child) - except TypeError: - self.children.append(node) - - def _generate(self): - for child in self.children: - if isinstance(child, Fragment): - for event in child._generate(): - yield event - elif isinstance(child, Stream): - for event in child: - yield event - else: - if not isinstance(child, basestring): - child = unicode(child) - yield TEXT, child, (None, -1, -1) - - def generate(self): - """Return a markup event stream for the fragment. - - :rtype: `Stream` - """ - return Stream(self._generate()) - - -def _kwargs_to_attrs(kwargs): - attrs = [] - names = set() - for name, value in kwargs.items(): - name = name.rstrip('_').replace('_', '-') - if value is not None and name not in names: - attrs.append((QName(name), unicode(value))) - names.add(name) - return Attrs(attrs) - - -class Element(Fragment): - """Simple XML output generator based on the builder pattern. - - Construct XML elements by passing the tag name to the constructor: - - >>> print(Element('strong')) - <strong/> - - Attributes can be specified using keyword arguments. The values of the - arguments will be converted to strings and any special XML characters - escaped: - - >>> print(Element('textarea', rows=10, cols=60)) - <textarea rows="10" cols="60"/> - >>> print(Element('span', title='1 < 2')) - <span title="1 < 2"/> - >>> print(Element('span', title='"baz"')) - <span title=""baz""/> - - The " character is escaped using a numerical entity. - The order in which attributes are rendered is undefined. - - If an attribute value evaluates to `None`, that attribute is not included - in the output: - - >>> print(Element('a', name=None)) - <a/> - - Attribute names that conflict with Python keywords can be specified by - appending an underscore: - - >>> print(Element('div', class_='warning')) - <div class="warning"/> - - Nested elements can be added to an element using item access notation. - The call notation can also be used for this and for adding attributes - using keyword arguments, as one would do in the constructor. - - >>> print(Element('ul')(Element('li'), Element('li'))) - <ul><li/><li/></ul> - >>> print(Element('a')('Label')) - <a>Label</a> - >>> print(Element('a')('Label', href="target")) - <a href="target">Label</a> - - Text nodes can be nested in an element by adding strings instead of - elements. Any special characters in the strings are escaped automatically: - - >>> print(Element('em')('Hello world')) - <em>Hello world</em> - >>> print(Element('em')(42)) - <em>42</em> - >>> print(Element('em')('1 < 2')) - <em>1 < 2</em> - - This technique also allows mixed content: - - >>> print(Element('p')('Hello ', Element('b')('world'))) - <p>Hello <b>world</b></p> - - Quotes are not escaped inside text nodes: - >>> print(Element('p')('"Hello"')) - <p>"Hello"</p> - - Elements can also be combined with other elements or strings using the - addition operator, which results in a `Fragment` object that contains the - operands: - - >>> print(Element('br') + 'some text' + Element('br')) - <br/>some text<br/> - - Elements with a namespace can be generated using the `Namespace` and/or - `QName` classes: - - >>> from genshi.core import Namespace - >>> xhtml = Namespace('http://www.w3.org/1999/xhtml') - >>> print(Element(xhtml.html, lang='en')) - <html xmlns="http://www.w3.org/1999/xhtml" lang="en"/> - """ - __slots__ = ['tag', 'attrib'] - - def __init__(self, tag_, **attrib): - Fragment.__init__(self) - self.tag = QName(tag_) - self.attrib = _kwargs_to_attrs(attrib) - - def __call__(self, *args, **kwargs): - """Append any positional arguments as child nodes, and keyword arguments - as attributes. - - :return: the element itself so that calls can be chained - :rtype: `Element` - :see: `Fragment.append` - """ - self.attrib |= _kwargs_to_attrs(kwargs) - Fragment.__call__(self, *args) - return self - - def __repr__(self): - return '<%s "%s">' % (type(self).__name__, self.tag) - - def _generate(self): - yield START, (self.tag, self.attrib), (None, -1, -1) - for kind, data, pos in Fragment._generate(self): - yield kind, data, pos - yield END, self.tag, (None, -1, -1) - - def generate(self): - """Return a markup event stream for the fragment. - - :rtype: `Stream` - """ - return Stream(self._generate()) - - -class ElementFactory(object): - """Factory for `Element` objects. - - A new element is created simply by accessing a correspondingly named - attribute of the factory object: - - >>> factory = ElementFactory() - >>> print(factory.foo) - <foo/> - >>> print(factory.foo(id=2)) - <foo id="2"/> - - Markup fragments (lists of nodes without a parent element) can be created - by calling the factory: - - >>> print(factory('Hello, ', factory.em('world'), '!')) - Hello, <em>world</em>! - - A factory can also be bound to a specific namespace: - - >>> factory = ElementFactory('http://www.w3.org/1999/xhtml') - >>> print(factory.html(lang="en")) - <html xmlns="http://www.w3.org/1999/xhtml" lang="en"/> - - The namespace for a specific element can be altered on an existing factory - by specifying the new namespace using item access: - - >>> factory = ElementFactory() - >>> print(factory.html(factory['http://www.w3.org/2000/svg'].g(id=3))) - <html><g xmlns="http://www.w3.org/2000/svg" id="3"/></html> - - Usually, the `ElementFactory` class is not be used directly. Rather, the - `tag` instance should be used to create elements. - """ - - def __init__(self, namespace=None): - """Create the factory, optionally bound to the given namespace. - - :param namespace: the namespace URI for any created elements, or `None` - for no namespace - """ - if namespace and not isinstance(namespace, Namespace): - namespace = Namespace(namespace) - self.namespace = namespace - - def __call__(self, *args): - """Create a fragment that has the given positional arguments as child - nodes. - - :return: the created `Fragment` - :rtype: `Fragment` - """ - return Fragment()(*args) - - def __getitem__(self, namespace): - """Return a new factory that is bound to the specified namespace. - - :param namespace: the namespace URI or `Namespace` object - :return: an `ElementFactory` that produces elements bound to the given - namespace - :rtype: `ElementFactory` - """ - return ElementFactory(namespace) - - def __getattr__(self, name): - """Create an `Element` with the given name. - - :param name: the tag name of the element to create - :return: an `Element` with the specified name - :rtype: `Element` - """ - return Element(self.namespace and self.namespace[name] or name) - - -tag = ElementFactory() -"""Global `ElementFactory` bound to the default namespace. - -:type: `ElementFactory` -""" diff --git a/genshi/core.py b/genshi/core.py deleted file mode 100644 index f7cddff..0000000 --- a/genshi/core.py +++ /dev/null @@ -1,727 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2006-2009 Edgewall Software -# All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://genshi.edgewall.org/wiki/License. -# -# This software consists of voluntary contributions made by many -# individuals. For the exact contribution history, see the revision -# history and logs, available at http://genshi.edgewall.org/log/. - -"""Core classes for markup processing.""" - -try: - reduce # builtin in Python < 3 -except NameError: - from functools import reduce -from itertools import chain -import operator - -from genshi.util import plaintext, stripentities, striptags, stringrepr - -__all__ = ['Stream', 'Markup', 'escape', 'unescape', 'Attrs', 'Namespace', - 'QName'] -__docformat__ = 'restructuredtext en' - - -class StreamEventKind(str): - """A kind of event on a markup stream.""" - __slots__ = [] - _instances = {} - - def __new__(cls, val): - return cls._instances.setdefault(val, str.__new__(cls, val)) - - -class Stream(object): - """Represents a stream of markup events. - - This class is basically an iterator over the events. - - Stream events are tuples of the form:: - - (kind, data, position) - - where ``kind`` is the event kind (such as `START`, `END`, `TEXT`, etc), - ``data`` depends on the kind of event, and ``position`` is a - ``(filename, line, offset)`` tuple that contains the location of the - original element or text in the input. If the original location is unknown, - ``position`` is ``(None, -1, -1)``. - - Also provided are ways to serialize the stream to text. The `serialize()` - method will return an iterator over generated strings, while `render()` - returns the complete generated text at once. Both accept various parameters - that impact the way the stream is serialized. - """ - __slots__ = ['events', 'serializer'] - - START = StreamEventKind('START') #: a start tag - END = StreamEventKind('END') #: an end tag - TEXT = StreamEventKind('TEXT') #: literal text - XML_DECL = StreamEventKind('XML_DECL') #: XML declaration - DOCTYPE = StreamEventKind('DOCTYPE') #: doctype declaration - START_NS = StreamEventKind('START_NS') #: start namespace mapping - END_NS = StreamEventKind('END_NS') #: end namespace mapping - START_CDATA = StreamEventKind('START_CDATA') #: start CDATA section - END_CDATA = StreamEventKind('END_CDATA') #: end CDATA section - PI = StreamEventKind('PI') #: processing instruction - COMMENT = StreamEventKind('COMMENT') #: comment - - def __init__(self, events, serializer=None): - """Initialize the stream with a sequence of markup events. - - :param events: a sequence or iterable providing the events - :param serializer: the default serialization method to use for this - stream - - :note: Changed in 0.5: added the `serializer` argument - """ - self.events = events #: The underlying iterable producing the events - self.serializer = serializer #: The default serializion method - - def __iter__(self): - return iter(self.events) - - def __or__(self, function): - """Override the "bitwise or" operator to apply filters or serializers - to the stream, providing a syntax similar to pipes on Unix shells. - - Assume the following stream produced by the `HTML` function: - - >>> from genshi.input import HTML - >>> html = HTML('''<p onclick="alert('Whoa')">Hello, world!</p>''') - >>> print(html) - <p onclick="alert('Whoa')">Hello, world!</p> - - A filter such as the HTML sanitizer can be applied to that stream using - the pipe notation as follows: - - >>> from genshi.filters import HTMLSanitizer - >>> sanitizer = HTMLSanitizer() - >>> print(html | sanitizer) - <p>Hello, world!</p> - - Filters can be any function that accepts and produces a stream (where - a stream is anything that iterates over events): - - >>> def uppercase(stream): - ... for kind, data, pos in stream: - ... if kind is TEXT: - ... data = data.upper() - ... yield kind, data, pos - >>> print(html | sanitizer | uppercase) - <p>HELLO, WORLD!</p> - - Serializers can also be used with this notation: - - >>> from genshi.output import TextSerializer - >>> output = TextSerializer() - >>> print(html | sanitizer | uppercase | output) - HELLO, WORLD! - - Commonly, serializers should be used at the end of the "pipeline"; - using them somewhere in the middle may produce unexpected results. - - :param function: the callable object that should be applied as a filter - :return: the filtered stream - :rtype: `Stream` - """ - return Stream(_ensure(function(self)), serializer=self.serializer) - - def filter(self, *filters): - """Apply filters to the stream. - - This method returns a new stream with the given filters applied. The - filters must be callables that accept the stream object as parameter, - and return the filtered stream. - - The call:: - - stream.filter(filter1, filter2) - - is equivalent to:: - - stream | filter1 | filter2 - - :param filters: one or more callable objects that should be applied as - filters - :return: the filtered stream - :rtype: `Stream` - """ - return reduce(operator.or_, (self,) + filters) - - def render(self, method=None, encoding='utf-8', out=None, **kwargs): - """Return a string representation of the stream. - - Any additional keyword arguments are passed to the serializer, and thus - depend on the `method` parameter value. - - :param method: determines how the stream is serialized; can be either - "xml", "xhtml", "html", "text", or a custom serializer - class; if `None`, the default serialization method of - the stream is used - :param encoding: how the output string should be encoded; if set to - `None`, this method returns a `unicode` object - :param out: a file-like object that the output should be written to - instead of being returned as one big string; note that if - this is a file or socket (or similar), the `encoding` must - not be `None` (that is, the output must be encoded) - :return: a `str` or `unicode` object (depending on the `encoding` - parameter), or `None` if the `out` parameter is provided - :rtype: `basestring` - - :see: XMLSerializer, XHTMLSerializer, HTMLSerializer, TextSerializer - :note: Changed in 0.5: added the `out` parameter - """ - from genshi.output import encode - if method is None: - method = self.serializer or 'xml' - generator = self.serialize(method=method, **kwargs) - return encode(generator, method=method, encoding=encoding, out=out) - - def select(self, path, namespaces=None, variables=None): - """Return a new stream that contains the events matching the given - XPath expression. - - >>> from genshi import HTML - >>> stream = HTML('<doc><elem>foo</elem><elem>bar</elem></doc>') - >>> print(stream.select('elem')) - <elem>foo</elem><elem>bar</elem> - >>> print(stream.select('elem/text()')) - foobar - - Note that the outermost element of the stream becomes the *context - node* for the XPath test. That means that the expression "doc" would - not match anything in the example above, because it only tests against - child elements of the outermost element: - - >>> print(stream.select('doc')) - <BLANKLINE> - - You can use the "." expression to match the context node itself - (although that usually makes little sense): - - >>> print(stream.select('.')) - <doc><elem>foo</elem><elem>bar</elem></doc> - - :param path: a string containing the XPath expression - :param namespaces: mapping of namespace prefixes used in the path - :param variables: mapping of variable names to values - :return: the selected substream - :rtype: `Stream` - :raises PathSyntaxError: if the given path expression is invalid or not - supported - """ - from genshi.path import Path - return Path(path).select(self, namespaces, variables) - - def serialize(self, method='xml', **kwargs): - """Generate strings corresponding to a specific serialization of the - stream. - - Unlike the `render()` method, this method is a generator that returns - the serialized output incrementally, as opposed to returning a single - string. - - Any additional keyword arguments are passed to the serializer, and thus - depend on the `method` parameter value. - - :param method: determines how the stream is serialized; can be either - "xml", "xhtml", "html", "text", or a custom serializer - class; if `None`, the default serialization method of - the stream is used - :return: an iterator over the serialization results (`Markup` or - `unicode` objects, depending on the serialization method) - :rtype: ``iterator`` - :see: XMLSerializer, XHTMLSerializer, HTMLSerializer, TextSerializer - """ - from genshi.output import get_serializer - if method is None: - method = self.serializer or 'xml' - return get_serializer(method, **kwargs)(_ensure(self)) - - def __str__(self): - return self.render() - - def __unicode__(self): - return self.render(encoding=None) - - def __html__(self): - return self - - -START = Stream.START -END = Stream.END -TEXT = Stream.TEXT -XML_DECL = Stream.XML_DECL -DOCTYPE = Stream.DOCTYPE -START_NS = Stream.START_NS -END_NS = Stream.END_NS -START_CDATA = Stream.START_CDATA -END_CDATA = Stream.END_CDATA -PI = Stream.PI -COMMENT = Stream.COMMENT - - -def _ensure(stream): - """Ensure that every item on the stream is actually a markup event.""" - stream = iter(stream) - event = stream.next() - - # Check whether the iterable is a real markup event stream by examining the - # first item it yields; if it's not we'll need to do some conversion - if type(event) is not tuple or len(event) != 3: - for event in chain([event], stream): - if hasattr(event, 'totuple'): - event = event.totuple() - else: - event = TEXT, unicode(event), (None, -1, -1) - yield event - return - - # This looks like a markup event stream, so we'll just pass it through - # unchanged - yield event - for event in stream: - yield event - - -class Attrs(tuple): - """Immutable sequence type that stores the attributes of an element. - - Ordering of the attributes is preserved, while access by name is also - supported. - - >>> attrs = Attrs([('href', '#'), ('title', 'Foo')]) - >>> attrs - Attrs([('href', '#'), ('title', 'Foo')]) - - >>> 'href' in attrs - True - >>> 'tabindex' in attrs - False - >>> attrs.get('title') - 'Foo' - - Instances may not be manipulated directly. Instead, the operators ``|`` and - ``-`` can be used to produce new instances that have specific attributes - added, replaced or removed. - - To remove an attribute, use the ``-`` operator. The right hand side can be - either a string or a set/sequence of strings, identifying the name(s) of - the attribute(s) to remove: - - >>> attrs - 'title' - Attrs([('href', '#')]) - >>> attrs - ('title', 'href') - Attrs() - - The original instance is not modified, but the operator can of course be - used with an assignment: - - >>> attrs - Attrs([('href', '#'), ('title', 'Foo')]) - >>> attrs -= 'title' - >>> attrs - Attrs([('href', '#')]) - - To add a new attribute, use the ``|`` operator, where the right hand value - is a sequence of ``(name, value)`` tuples (which includes `Attrs` - instances): - - >>> attrs | [('title', 'Bar')] - Attrs([('href', '#'), ('title', 'Bar')]) - - If the attributes already contain an attribute with a given name, the value - of that attribute is replaced: - - >>> attrs | [('href', 'http://example.org/')] - Attrs([('href', 'http://example.org/')]) - """ - __slots__ = [] - - def __contains__(self, name): - """Return whether the list includes an attribute with the specified - name. - - :return: `True` if the list includes the attribute - :rtype: `bool` - """ - for attr, _ in self: - if attr == name: - return True - - def __getitem__(self, i): - """Return an item or slice of the attributes list. - - >>> attrs = Attrs([('href', '#'), ('title', 'Foo')]) - >>> attrs[1] - ('title', 'Foo') - >>> attrs[1:] - Attrs([('title', 'Foo')]) - """ - items = tuple.__getitem__(self, i) - if type(i) is slice: - return Attrs(items) - return items - - def __getslice__(self, i, j): - """Return a slice of the attributes list. - - >>> attrs = Attrs([('href', '#'), ('title', 'Foo')]) - >>> attrs[1:] - Attrs([('title', 'Foo')]) - """ - return Attrs(tuple.__getslice__(self, i, j)) - - def __or__(self, attrs): - """Return a new instance that contains the attributes in `attrs` in - addition to any already existing attributes. - - :return: a new instance with the merged attributes - :rtype: `Attrs` - """ - repl = dict([(an, av) for an, av in attrs if an in self]) - return Attrs([(sn, repl.get(sn, sv)) for sn, sv in self] + - [(an, av) for an, av in attrs if an not in self]) - - def __repr__(self): - if not self: - return 'Attrs()' - return 'Attrs([%s])' % ', '.join([repr(item) for item in self]) - - def __sub__(self, names): - """Return a new instance with all attributes with a name in `names` are - removed. - - :param names: the names of the attributes to remove - :return: a new instance with the attribute removed - :rtype: `Attrs` - """ - if isinstance(names, basestring): - names = (names,) - return Attrs([(name, val) for name, val in self if name not in names]) - - def get(self, name, default=None): - """Return the value of the attribute with the specified name, or the - value of the `default` parameter if no such attribute is found. - - :param name: the name of the attribute - :param default: the value to return when the attribute does not exist - :return: the attribute value, or the `default` value if that attribute - does not exist - :rtype: `object` - """ - for attr, value in self: - if attr == name: - return value - return default - - def totuple(self): - """Return the attributes as a markup event. - - The returned event is a `TEXT` event, the data is the value of all - attributes joined together. - - >>> Attrs([('href', '#'), ('title', 'Foo')]).totuple() - ('TEXT', '#Foo', (None, -1, -1)) - - :return: a `TEXT` event - :rtype: `tuple` - """ - return TEXT, ''.join([x[1] for x in self]), (None, -1, -1) - - -class Markup(unicode): - """Marks a string as being safe for inclusion in HTML/XML output without - needing to be escaped. - """ - __slots__ = [] - - def __add__(self, other): - return Markup(unicode.__add__(self, escape(other))) - - def __radd__(self, other): - return Markup(unicode.__add__(escape(other), self)) - - def __mod__(self, args): - if isinstance(args, dict): - args = dict(zip(args.keys(), map(escape, args.values()))) - elif isinstance(args, (list, tuple)): - args = tuple(map(escape, args)) - else: - args = escape(args) - return Markup(unicode.__mod__(self, args)) - - def __mul__(self, num): - return Markup(unicode.__mul__(self, num)) - __rmul__ = __mul__ - - def __repr__(self): - return "<%s %s>" % (type(self).__name__, unicode.__repr__(self)) - - def join(self, seq, escape_quotes=True): - """Return a `Markup` object which is the concatenation of the strings - in the given sequence, where this `Markup` object is the separator - between the joined elements. - - Any element in the sequence that is not a `Markup` instance is - automatically escaped. - - :param seq: the sequence of strings to join - :param escape_quotes: whether double quote characters in the elements - should be escaped - :return: the joined `Markup` object - :rtype: `Markup` - :see: `escape` - """ - return Markup(unicode.join(self, [escape(item, quotes=escape_quotes) - for item in seq])) - - @classmethod - def escape(cls, text, quotes=True): - """Create a Markup instance from a string and escape special characters - it may contain (<, >, & and \"). - - >>> escape('"1 < 2"') - <Markup u'"1 < 2"'> - - If the `quotes` parameter is set to `False`, the \" character is left - as is. Escaping quotes is generally only required for strings that are - to be used in attribute values. - - >>> escape('"1 < 2"', quotes=False) - <Markup u'"1 < 2"'> - - :param text: the text to escape - :param quotes: if ``True``, double quote characters are escaped in - addition to the other special characters - :return: the escaped `Markup` string - :rtype: `Markup` - """ - if not text: - return cls() - if type(text) is cls: - return text - if hasattr(text, '__html__'): - return Markup(text.__html__()) - - text = text.replace('&', '&') \ - .replace('<', '<') \ - .replace('>', '>') - if quotes: - text = text.replace('"', '"') - return cls(text) - - def unescape(self): - """Reverse-escapes &, <, >, and \" and returns a `unicode` object. - - >>> Markup('1 < 2').unescape() - u'1 < 2' - - :return: the unescaped string - :rtype: `unicode` - :see: `genshi.core.unescape` - """ - if not self: - return '' - return unicode(self).replace('"', '"') \ - .replace('>', '>') \ - .replace('<', '<') \ - .replace('&', '&') - - def stripentities(self, keepxmlentities=False): - """Return a copy of the text with any character or numeric entities - replaced by the equivalent UTF-8 characters. - - If the `keepxmlentities` parameter is provided and evaluates to `True`, - the core XML entities (``&``, ``'``, ``>``, ``<`` and - ``"``) are not stripped. - - :return: a `Markup` instance with entities removed - :rtype: `Markup` - :see: `genshi.util.stripentities` - """ - return Markup(stripentities(self, keepxmlentities=keepxmlentities)) - - def striptags(self): - """Return a copy of the text with all XML/HTML tags removed. - - :return: a `Markup` instance with all tags removed - :rtype: `Markup` - :see: `genshi.util.striptags` - """ - return Markup(striptags(self)) - - -try: - from genshi._speedups import Markup -except ImportError: - pass # just use the Python implementation - - -escape = Markup.escape - - -def unescape(text): - """Reverse-escapes &, <, >, and \" and returns a `unicode` object. - - >>> unescape(Markup('1 < 2')) - u'1 < 2' - - If the provided `text` object is not a `Markup` instance, it is returned - unchanged. - - >>> unescape('1 < 2') - '1 < 2' - - :param text: the text to unescape - :return: the unescsaped string - :rtype: `unicode` - """ - if not isinstance(text, Markup): - return text - return text.unescape() - - -class Namespace(object): - """Utility class creating and testing elements with a namespace. - - Internally, namespace URIs are encoded in the `QName` of any element or - attribute, the namespace URI being enclosed in curly braces. This class - helps create and test these strings. - - A `Namespace` object is instantiated with the namespace URI. - - >>> html = Namespace('http://www.w3.org/1999/xhtml') - >>> html - Namespace('http://www.w3.org/1999/xhtml') - >>> html.uri - u'http://www.w3.org/1999/xhtml' - - The `Namespace` object can than be used to generate `QName` objects with - that namespace: - - >>> html.body - QName('http://www.w3.org/1999/xhtml}body') - >>> html.body.localname - u'body' - >>> html.body.namespace - u'http://www.w3.org/1999/xhtml' - - The same works using item access notation, which is useful for element or - attribute names that are not valid Python identifiers: - - >>> html['body'] - QName('http://www.w3.org/1999/xhtml}body') - - A `Namespace` object can also be used to test whether a specific `QName` - belongs to that namespace using the ``in`` operator: - - >>> qname = html.body - >>> qname in html - True - >>> qname in Namespace('http://www.w3.org/2002/06/xhtml2') - False - """ - def __new__(cls, uri): - if type(uri) is cls: - return uri - return object.__new__(cls) - - def __getnewargs__(self): - return (self.uri,) - - def __getstate__(self): - return self.uri - - def __setstate__(self, uri): - self.uri = uri - - def __init__(self, uri): - self.uri = unicode(uri) - - def __contains__(self, qname): - return qname.namespace == self.uri - - def __ne__(self, other): - return not self == other - - def __eq__(self, other): - if isinstance(other, Namespace): - return self.uri == other.uri - return self.uri == other - - def __getitem__(self, name): - return QName(self.uri + '}' + name) - __getattr__ = __getitem__ - - def __hash__(self): - return hash(self.uri) - - def __repr__(self): - return '%s(%s)' % (type(self).__name__, stringrepr(self.uri)) - - def __str__(self): - return self.uri.encode('utf-8') - - def __unicode__(self): - return self.uri - - -# The namespace used by attributes such as xml:lang and xml:space -XML_NAMESPACE = Namespace('http://www.w3.org/XML/1998/namespace') - - -class QName(unicode): - """A qualified element or attribute name. - - The unicode value of instances of this class contains the qualified name of - the element or attribute, in the form ``{namespace-uri}local-name``. The - namespace URI can be obtained through the additional `namespace` attribute, - while the local name can be accessed through the `localname` attribute. - - >>> qname = QName('foo') - >>> qname - QName('foo') - >>> qname.localname - u'foo' - >>> qname.namespace - - >>> qname = QName('http://www.w3.org/1999/xhtml}body') - >>> qname - QName('http://www.w3.org/1999/xhtml}body') - >>> qname.localname - u'body' - >>> qname.namespace - u'http://www.w3.org/1999/xhtml' - """ - __slots__ = ['namespace', 'localname'] - - def __new__(cls, qname): - """Create the `QName` instance. - - :param qname: the qualified name as a string of the form - ``{namespace-uri}local-name``, where the leading curly - brace is optional - """ - if type(qname) is cls: - return qname - - parts = qname.lstrip('{').split('}', 1) - if len(parts) > 1: - self = unicode.__new__(cls, '{%s' % qname) - self.namespace, self.localname = map(unicode, parts) - else: - self = unicode.__new__(cls, qname) - self.namespace, self.localname = None, unicode(qname) - return self - - def __getnewargs__(self): - return (self.lstrip('{'),) - - def __repr__(self): - return '%s(%s)' % (type(self).__name__, stringrepr(self.lstrip('{'))) diff --git a/genshi/filters/__init__.py b/genshi/filters/__init__.py deleted file mode 100644 index efc2565..0000000 --- a/genshi/filters/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2007-2009 Edgewall Software -# All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://genshi.edgewall.org/wiki/License. -# -# This software consists of voluntary contributions made by many -# individuals. For the exact contribution history, see the revision -# history and logs, available at http://genshi.edgewall.org/log/. - -"""Implementation of a number of stream filters.""" - -from genshi.filters.html import HTMLFormFiller, HTMLSanitizer -from genshi.filters.i18n import Translator -from genshi.filters.transform import Transformer - -__docformat__ = 'restructuredtext en' diff --git a/genshi/filters/html.py b/genshi/filters/html.py deleted file mode 100644 index d554a54..0000000 --- a/genshi/filters/html.py +++ /dev/null @@ -1,453 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2006-2009 Edgewall Software -# All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://genshi.edgewall.org/wiki/License. -# -# This software consists of voluntary contributions made by many -# individuals. For the exact contribution history, see the revision -# history and logs, available at http://genshi.edgewall.org/log/. - -"""Implementation of a number of stream filters.""" - -try: - any -except NameError: - from genshi.util import any -import re - -from genshi.core import Attrs, QName, stripentities -from genshi.core import END, START, TEXT, COMMENT - -__all__ = ['HTMLFormFiller', 'HTMLSanitizer'] -__docformat__ = 'restructuredtext en' - - -class HTMLFormFiller(object): - """A stream filter that can populate HTML forms from a dictionary of values. - - >>> from genshi.input import HTML - >>> html = HTML('''<form> - ... <p><input type="text" name="foo" /></p> - ... </form>''') - >>> filler = HTMLFormFiller(data={'foo': 'bar'}) - >>> print(html | filler) - <form> - <p><input type="text" name="foo" value="bar"/></p> - </form> - """ - # TODO: only select the first radio button, and the first select option - # (if not in a multiple-select) - # TODO: only apply to elements in the XHTML namespace (or no namespace)? - - def __init__(self, name=None, id=None, data=None, passwords=False): - """Create the filter. - - :param name: The name of the form that should be populated. If this - parameter is given, only forms where the ``name`` attribute - value matches the parameter are processed. - :param id: The ID of the form that should be populated. If this - parameter is given, only forms where the ``id`` attribute - value matches the parameter are processed. - :param data: The dictionary of form values, where the keys are the names - of the form fields, and the values are the values to fill - in. - :param passwords: Whether password input fields should be populated. - This is off by default for security reasons (for - example, a password may end up in the browser cache) - :note: Changed in 0.5.2: added the `passwords` option - """ - self.name = name - self.id = id - if data is None: - data = {} - self.data = data - self.passwords = passwords - - def __call__(self, stream): - """Apply the filter to the given stream. - - :param stream: the markup event stream to filter - """ - in_form = in_select = in_option = in_textarea = False - select_value = option_value = textarea_value = None - option_start = None - option_text = [] - no_option_value = False - - for kind, data, pos in stream: - - if kind is START: - tag, attrs = data - tagname = tag.localname - - if tagname == 'form' and ( - self.name and attrs.get('name') == self.name or - self.id and attrs.get('id') == self.id or - not (self.id or self.name)): - in_form = True - - elif in_form: - if tagname == 'input': - type = attrs.get('type', '').lower() - if type in ('checkbox', 'radio'): - name = attrs.get('name') - if name and name in self.data: - value = self.data[name] - declval = attrs.get('value') - checked = False - if isinstance(value, (list, tuple)): - if declval: - checked = declval in [unicode(v) for v - in value] - else: - checked = any(value) - else: - if declval: - checked = declval == unicode(value) - elif type == 'checkbox': - checked = bool(value) - if checked: - attrs |= [(QName('checked'), 'checked')] - elif 'checked' in attrs: - attrs -= 'checked' - elif type in ('', 'hidden', 'text') \ - or type == 'password' and self.passwords: - name = attrs.get('name') - if name and name in self.data: - value = self.data[name] - if isinstance(value, (list, tuple)): - value = value[0] - if value is not None: - attrs |= [ - (QName('value'), unicode(value)) - ] - elif tagname == 'select': - name = attrs.get('name') - if name in self.data: - select_value = self.data[name] - in_select = True - elif tagname == 'textarea': - name = attrs.get('name') - if name in self.data: - textarea_value = self.data.get(name) - if isinstance(textarea_value, (list, tuple)): - textarea_value = textarea_value[0] - in_textarea = True - elif in_select and tagname == 'option': - option_start = kind, data, pos - option_value = attrs.get('value') - if option_value is None: - no_option_value = True - option_value = '' - in_option = True - continue - yield kind, (tag, attrs), pos - - elif in_form and kind is TEXT: - if in_select and in_option: - if no_option_value: - option_value += data - option_text.append((kind, data, pos)) - continue - elif in_textarea: - continue - yield kind, data, pos - - elif in_form and kind is END: - tagname = data.localname - if tagname == 'form': - in_form = False - elif tagname == 'select': - in_select = False - select_value = None - elif in_select and tagname == 'option': - if isinstance(select_value, (tuple, list)): - selected = option_value in [unicode(v) for v - in select_value] - else: - selected = option_value == unicode(select_value) - okind, (tag, attrs), opos = option_start - if selected: - attrs |= [(QName('selected'), 'selected')] - elif 'selected' in attrs: - attrs -= 'selected' - yield okind, (tag, attrs), opos - if option_text: - for event in option_text: - yield event - in_option = False - no_option_value = False - option_start = option_value = None - option_text = [] - elif tagname == 'textarea': - if textarea_value: - yield TEXT, unicode(textarea_value), pos - in_textarea = False - yield kind, data, pos - - else: - yield kind, data, pos - - -class HTMLSanitizer(object): - """A filter that removes potentially dangerous HTML tags and attributes - from the stream. - - >>> from genshi import HTML - >>> html = HTML('<div><script>alert(document.cookie)</script></div>') - >>> print(html | HTMLSanitizer()) - <div/> - - The default set of safe tags and attributes can be modified when the filter - is instantiated. For example, to allow inline ``style`` attributes, the - following instantation would work: - - >>> html = HTML('<div style="background: #000"></div>') - >>> sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style'])) - >>> print(html | sanitizer) - <div style="background: #000"/> - - Note that even in this case, the filter *does* attempt to remove dangerous - constructs from style attributes: - - >>> html = HTML('<div style="background: url(javascript:void); color: #000"></div>') - >>> print(html | sanitizer) - <div style="color: #000"/> - - This handles HTML entities, unicode escapes in CSS and Javascript text, as - well as a lot of other things. However, the style tag is still excluded by - default because it is very hard for such sanitizing to be completely safe, - especially considering how much error recovery current web browsers perform. - - It also does some basic filtering of CSS properties that may be used for - typical phishing attacks. For more sophisticated filtering, this class - provides a couple of hooks that can be overridden in sub-classes. - - :warn: Note that this special processing of CSS is currently only applied to - style attributes, **not** style elements. - """ - - SAFE_TAGS = frozenset(['a', 'abbr', 'acronym', 'address', 'area', 'b', - 'big', 'blockquote', 'br', 'button', 'caption', 'center', 'cite', - 'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt', - 'em', 'fieldset', 'font', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', - 'hr', 'i', 'img', 'input', 'ins', 'kbd', 'label', 'legend', 'li', 'map', - 'menu', 'ol', 'optgroup', 'option', 'p', 'pre', 'q', 's', 'samp', - 'select', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'table', - 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'tt', 'u', - 'ul', 'var']) - - SAFE_ATTRS = frozenset(['abbr', 'accept', 'accept-charset', 'accesskey', - 'action', 'align', 'alt', 'axis', 'bgcolor', 'border', 'cellpadding', - 'cellspacing', 'char', 'charoff', 'charset', 'checked', 'cite', 'class', - 'clear', 'cols', 'colspan', 'color', 'compact', 'coords', 'datetime', - 'dir', 'disabled', 'enctype', 'for', 'frame', 'headers', 'height', - 'href', 'hreflang', 'hspace', 'id', 'ismap', 'label', 'lang', - 'longdesc', 'maxlength', 'media', 'method', 'multiple', 'name', - 'nohref', 'noshade', 'nowrap', 'prompt', 'readonly', 'rel', 'rev', - 'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size', - 'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title', - 'type', 'usemap', 'valign', 'value', 'vspace', 'width']) - - SAFE_SCHEMES = frozenset(['file', 'ftp', 'http', 'https', 'mailto', None]) - - URI_ATTRS = frozenset(['action', 'background', 'dynsrc', 'href', 'lowsrc', - 'src']) - - def __init__(self, safe_tags=SAFE_TAGS, safe_attrs=SAFE_ATTRS, - safe_schemes=SAFE_SCHEMES, uri_attrs=URI_ATTRS): - """Create the sanitizer. - - The exact set of allowed elements and attributes can be configured. - - :param safe_tags: a set of tag names that are considered safe - :param safe_attrs: a set of attribute names that are considered safe - :param safe_schemes: a set of URI schemes that are considered safe - :param uri_attrs: a set of names of attributes that contain URIs - """ - self.safe_tags = safe_tags - "The set of tag names that are considered safe." - self.safe_attrs = safe_attrs - "The set of attribute names that are considered safe." - self.uri_attrs = uri_attrs - "The set of names of attributes that may contain URIs." - self.safe_schemes = safe_schemes - "The set of URI schemes that are considered safe." - - def __call__(self, stream): - """Apply the filter to the given stream. - - :param stream: the markup event stream to filter - """ - waiting_for = None - - for kind, data, pos in stream: - if kind is START: - if waiting_for: - continue - tag, attrs = data - if not self.is_safe_elem(tag, attrs): - waiting_for = tag - continue - - new_attrs = [] - for attr, value in attrs: - value = stripentities(value) - if attr not in self.safe_attrs: - continue - elif attr in self.uri_attrs: - # Don't allow URI schemes such as "javascript:" - if not self.is_safe_uri(value): - continue - elif attr == 'style': - # Remove dangerous CSS declarations from inline styles - decls = self.sanitize_css(value) - if not decls: - continue - value = '; '.join(decls) - new_attrs.append((attr, value)) - - yield kind, (tag, Attrs(new_attrs)), pos - - elif kind is END: - tag = data - if waiting_for: - if waiting_for == tag: - waiting_for = None - else: - yield kind, data, pos - - elif kind is not COMMENT: - if not waiting_for: - yield kind, data, pos - - def is_safe_css(self, propname, value): - """Determine whether the given css property declaration is to be - considered safe for inclusion in the output. - - :param propname: the CSS property name - :param value: the value of the property - :return: whether the property value should be considered safe - :rtype: bool - :since: version 0.6 - """ - if propname == 'position': - return False - if propname.startswith('margin') and '-' in value: - # Negative margins can be used for phishing - return False - return True - - def is_safe_elem(self, tag, attrs): - """Determine whether the given element should be considered safe for - inclusion in the output. - - :param tag: the tag name of the element - :type tag: QName - :param attrs: the element attributes - :type attrs: Attrs - :return: whether the element should be considered safe - :rtype: bool - :since: version 0.6 - """ - if tag not in self.safe_tags: - return False - if tag.localname == 'input': - input_type = attrs.get('type', '').lower() - if input_type == 'password': - return False - return True - - def is_safe_uri(self, uri): - """Determine whether the given URI is to be considered safe for - inclusion in the output. - - The default implementation checks whether the scheme of the URI is in - the set of allowed URIs (`safe_schemes`). - - >>> sanitizer = HTMLSanitizer() - >>> sanitizer.is_safe_uri('http://example.org/') - True - >>> sanitizer.is_safe_uri('javascript:alert(document.cookie)') - False - - :param uri: the URI to check - :return: `True` if the URI can be considered safe, `False` otherwise - :rtype: `bool` - :since: version 0.4.3 - """ - if '#' in uri: - uri = uri.split('#', 1)[0] # Strip out the fragment identifier - if ':' not in uri: - return True # This is a relative URI - chars = [char for char in uri.split(':', 1)[0] if char.isalnum()] - return ''.join(chars).lower() in self.safe_schemes - - def sanitize_css(self, text): - """Remove potentially dangerous property declarations from CSS code. - - In particular, properties using the CSS ``url()`` function with a scheme - that is not considered safe are removed: - - >>> sanitizer = HTMLSanitizer() - >>> sanitizer.sanitize_css(u''' - ... background: url(javascript:alert("foo")); - ... color: #000; - ... ''') - [u'color: #000'] - - Also, the proprietary Internet Explorer function ``expression()`` is - always stripped: - - >>> sanitizer.sanitize_css(u''' - ... background: #fff; - ... color: #000; - ... width: e/**/xpression(alert("foo")); - ... ''') - [u'background: #fff', u'color: #000'] - - :param text: the CSS text; this is expected to be `unicode` and to not - contain any character or numeric references - :return: a list of declarations that are considered safe - :rtype: `list` - :since: version 0.4.3 - """ - decls = [] - text = self._strip_css_comments(self._replace_unicode_escapes(text)) - for decl in text.split(';'): - decl = decl.strip() - if not decl: - continue - try: - propname, value = decl.split(':', 1) - except ValueError: - continue - if not self.is_safe_css(propname.strip().lower(), value.strip()): - continue - is_evil = False - if 'expression' in value: - is_evil = True - for match in re.finditer(r'url\s*\(([^)]+)', value): - if not self.is_safe_uri(match.group(1)): - is_evil = True - break - if not is_evil: - decls.append(decl.strip()) - return decls - - _NORMALIZE_NEWLINES = re.compile(r'\r\n').sub - _UNICODE_ESCAPE = re.compile(r'\\([0-9a-fA-F]{1,6})\s?').sub - - def _replace_unicode_escapes(self, text): - def _repl(match): - return unichr(int(match.group(1), 16)) - return self._UNICODE_ESCAPE(_repl, self._NORMALIZE_NEWLINES('\n', text)) - - _CSS_COMMENTS = re.compile(r'/\*.*?\*/').sub - - def _strip_css_comments(self, text): - return self._CSS_COMMENTS('', text) diff --git a/genshi/filters/i18n.py b/genshi/filters/i18n.py deleted file mode 100644 index 7852875..0000000 --- a/genshi/filters/i18n.py +++ /dev/null @@ -1,1238 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2007-2010 Edgewall Software -# All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://genshi.edgewall.org/wiki/License. -# -# This software consists of voluntary contributions made by many -# individuals. For the exact contribution history, see the revision -# history and logs, available at http://genshi.edgewall.org/log/. - -"""Directives and utilities for internationalization and localization of -templates. - -:since: version 0.4 -:note: Directives support added since version 0.6 -""" - -try: - any -except NameError: - from genshi.util import any -from gettext import NullTranslations -import os -import re -from types import FunctionType - -from genshi.core import Attrs, Namespace, QName, START, END, TEXT, \ - XML_NAMESPACE, _ensure, StreamEventKind -from genshi.template.eval import _ast -from genshi.template.base import DirectiveFactory, EXPR, SUB, _apply_directives -from genshi.template.directives import Directive, StripDirective -from genshi.template.markup import MarkupTemplate, EXEC - -__all__ = ['Translator', 'extract'] -__docformat__ = 'restructuredtext en' - - -I18N_NAMESPACE = Namespace('http://genshi.edgewall.org/i18n') - -MSGBUF = StreamEventKind('MSGBUF') -SUB_START = StreamEventKind('SUB_START') -SUB_END = StreamEventKind('SUB_END') - -GETTEXT_FUNCTIONS = ('_', 'gettext', 'ngettext', 'dgettext', 'dngettext', - 'ugettext', 'ungettext') - - -class I18NDirective(Directive): - """Simple interface for i18n directives to support messages extraction.""" - - def __call__(self, stream, directives, ctxt, **vars): - return _apply_directives(stream, directives, ctxt, vars) - - -class ExtractableI18NDirective(I18NDirective): - """Simple interface for directives to support messages extraction.""" - - def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS, - search_text=True, comment_stack=None): - raise NotImplementedError - - -class CommentDirective(I18NDirective): - """Implementation of the ``i18n:comment`` template directive which adds - translation comments. - - >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n"> - ... <p i18n:comment="As in Foo Bar">Foo</p> - ... </html>''') - >>> translator = Translator() - >>> translator.setup(tmpl) - >>> list(translator.extract(tmpl.stream)) - [(2, None, u'Foo', [u'As in Foo Bar'])] - """ - __slots__ = ['comment'] - - def __init__(self, value, template=None, namespaces=None, lineno=-1, - offset=-1): - Directive.__init__(self, None, template, namespaces, lineno, offset) - self.comment = value - - -class MsgDirective(ExtractableI18NDirective): - r"""Implementation of the ``i18n:msg`` directive which marks inner content - as translatable. Consider the following examples: - - >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n"> - ... <div i18n:msg=""> - ... <p>Foo</p> - ... <p>Bar</p> - ... </div> - ... <p i18n:msg="">Foo <em>bar</em>!</p> - ... </html>''') - - >>> translator = Translator() - >>> translator.setup(tmpl) - >>> list(translator.extract(tmpl.stream)) - [(2, None, u'[1:Foo]\n [2:Bar]', []), (6, None, u'Foo [1:bar]!', [])] - >>> print(tmpl.generate().render()) - <html> - <div><p>Foo</p> - <p>Bar</p></div> - <p>Foo <em>bar</em>!</p> - </html> - - >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n"> - ... <div i18n:msg="fname, lname"> - ... <p>First Name: ${fname}</p> - ... <p>Last Name: ${lname}</p> - ... </div> - ... <p i18n:msg="">Foo <em>bar</em>!</p> - ... </html>''') - >>> translator.setup(tmpl) - >>> list(translator.extract(tmpl.stream)) #doctest: +NORMALIZE_WHITESPACE - [(2, None, u'[1:First Name: %(fname)s]\n [2:Last Name: %(lname)s]', []), - (6, None, u'Foo [1:bar]!', [])] - - >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n"> - ... <div i18n:msg="fname, lname"> - ... <p>First Name: ${fname}</p> - ... <p>Last Name: ${lname}</p> - ... </div> - ... <p i18n:msg="">Foo <em>bar</em>!</p> - ... </html>''') - >>> translator.setup(tmpl) - >>> print(tmpl.generate(fname='John', lname='Doe').render()) - <html> - <div><p>First Name: John</p> - <p>Last Name: Doe</p></div> - <p>Foo <em>bar</em>!</p> - </html> - - Starting and ending white-space is stripped of to make it simpler for - translators. Stripping it is not that important since it's on the html - source, the rendered output will remain the same. - """ - __slots__ = ['params', 'lineno'] - - def __init__(self, value, template=None, namespaces=None, lineno=-1, - offset=-1): - Directive.__init__(self, None, template, namespaces, lineno, offset) - self.params = [param.strip() for param in value.split(',') if param] - self.lineno = lineno - - @classmethod - def attach(cls, template, stream, value, namespaces, pos): - if type(value) is dict: - value = value.get('params', '').strip() - return super(MsgDirective, cls).attach(template, stream, value.strip(), - namespaces, pos) - - def __call__(self, stream, directives, ctxt, **vars): - gettext = ctxt.get('_i18n.gettext') - if ctxt.get('_i18n.domain'): - dgettext = ctxt.get('_i18n.dgettext') - assert hasattr(dgettext, '__call__'), \ - 'No domain gettext function passed' - gettext = lambda msg: dgettext(ctxt.get('_i18n.domain'), msg) - - def _generate(): - msgbuf = MessageBuffer(self) - previous = stream.next() - if previous[0] is START: - yield previous - else: - msgbuf.append(*previous) - previous = stream.next() - for kind, data, pos in stream: - msgbuf.append(*previous) - previous = kind, data, pos - if previous[0] is not END: - msgbuf.append(*previous) - previous = None - for event in msgbuf.translate(gettext(msgbuf.format())): - yield event - if previous: - yield previous - - return _apply_directives(_generate(), directives, ctxt, vars) - - def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS, - search_text=True, comment_stack=None): - msgbuf = MessageBuffer(self) - strip = False - - stream = iter(stream) - previous = stream.next() - if previous[0] is START: - for message in translator._extract_attrs(previous, - gettext_functions, - search_text=search_text): - yield message - previous = stream.next() - strip = True - for event in stream: - if event[0] is START: - for message in translator._extract_attrs(event, - gettext_functions, - search_text=search_text): - yield message - msgbuf.append(*previous) - previous = event - if not strip: - msgbuf.append(*previous) - - yield self.lineno, None, msgbuf.format(), comment_stack[-1:] - - -class ChooseBranchDirective(I18NDirective): - __slots__ = ['params'] - - def __call__(self, stream, directives, ctxt, **vars): - self.params = ctxt.get('_i18n.choose.params', [])[:] - msgbuf = MessageBuffer(self) - stream = _apply_directives(stream, directives, ctxt, vars) - - previous = stream.next() - if previous[0] is START: - yield previous - else: - msgbuf.append(*previous) - - try: - previous = stream.next() - except StopIteration: - # For example <i18n:singular> or <i18n:plural> directives - yield MSGBUF, (), -1 # the place holder for msgbuf output - ctxt['_i18n.choose.%s' % self.tagname] = msgbuf - return - - for event in stream: - msgbuf.append(*previous) - previous = event - yield MSGBUF, (), -1 # the place holder for msgbuf output - - if previous[0] is END: - yield previous # the outer end tag - else: - msgbuf.append(*previous) - ctxt['_i18n.choose.%s' % self.tagname] = msgbuf - - def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS, - search_text=True, comment_stack=None, msgbuf=None): - stream = iter(stream) - previous = stream.next() - - if previous[0] is START: - # skip the enclosing element - for message in translator._extract_attrs(previous, - gettext_functions, - search_text=search_text): - yield message - previous = stream.next() - - for event in stream: - if previous[0] is START: - for message in translator._extract_attrs(previous, - gettext_functions, - search_text=search_text): - yield message - msgbuf.append(*previous) - previous = event - - if previous[0] is not END: - msgbuf.append(*previous) - - -class SingularDirective(ChooseBranchDirective): - """Implementation of the ``i18n:singular`` directive to be used with the - ``i18n:choose`` directive.""" - - -class PluralDirective(ChooseBranchDirective): - """Implementation of the ``i18n:plural`` directive to be used with the - ``i18n:choose`` directive.""" - - -class ChooseDirective(ExtractableI18NDirective): - """Implementation of the ``i18n:choose`` directive which provides plural - internationalisation of strings. - - This directive requires at least one parameter, the one which evaluates to - an integer which will allow to choose the plural/singular form. If you also - have expressions inside the singular and plural version of the string you - also need to pass a name for those parameters. Consider the following - examples: - - >>> tmpl = MarkupTemplate('''\ - <html xmlns:i18n="http://genshi.edgewall.org/i18n"> - ... <div i18n:choose="num; num"> - ... <p i18n:singular="">There is $num coin</p> - ... <p i18n:plural="">There are $num coins</p> - ... </div> - ... </html>''') - >>> translator = Translator() - >>> translator.setup(tmpl) - >>> list(translator.extract(tmpl.stream)) #doctest: +NORMALIZE_WHITESPACE - [(2, 'ngettext', (u'There is %(num)s coin', - u'There are %(num)s coins'), [])] - - >>> tmpl = MarkupTemplate('''\ - <html xmlns:i18n="http://genshi.edgewall.org/i18n"> - ... <div i18n:choose="num; num"> - ... <p i18n:singular="">There is $num coin</p> - ... <p i18n:plural="">There are $num coins</p> - ... </div> - ... </html>''') - >>> translator.setup(tmpl) - >>> print(tmpl.generate(num=1).render()) - <html> - <div> - <p>There is 1 coin</p> - </div> - </html> - >>> print(tmpl.generate(num=2).render()) - <html> - <div> - <p>There are 2 coins</p> - </div> - </html> - - When used as a element and not as an attribute: - - >>> tmpl = MarkupTemplate('''\ - <html xmlns:i18n="http://genshi.edgewall.org/i18n"> - ... <i18n:choose numeral="num" params="num"> - ... <p i18n:singular="">There is $num coin</p> - ... <p i18n:plural="">There are $num coins</p> - ... </i18n:choose> - ... </html>''') - >>> translator.setup(tmpl) - >>> list(translator.extract(tmpl.stream)) #doctest: +NORMALIZE_WHITESPACE - [(2, 'ngettext', (u'There is %(num)s coin', - u'There are %(num)s coins'), [])] - """ - __slots__ = ['numeral', 'params', 'lineno'] - - def __init__(self, value, template=None, namespaces=None, lineno=-1, - offset=-1): - Directive.__init__(self, None, template, namespaces, lineno, offset) - params = [v.strip() for v in value.split(';')] - self.numeral = self._parse_expr(params.pop(0), template, lineno, offset) - self.params = params and [name.strip() for name in - params[0].split(',') if name] or [] - self.lineno = lineno - - @classmethod - def attach(cls, template, stream, value, namespaces, pos): - if type(value) is dict: - numeral = value.get('numeral', '').strip() - assert numeral is not '', "at least pass the numeral param" - params = [v.strip() for v in value.get('params', '').split(',')] - value = '%s; ' % numeral + ', '.join(params) - return super(ChooseDirective, cls).attach(template, stream, value, - namespaces, pos) - - def __call__(self, stream, directives, ctxt, **vars): - ctxt.push({'_i18n.choose.params': self.params, - '_i18n.choose.singular': None, - '_i18n.choose.plural': None}) - - ngettext = ctxt.get('_i18n.ngettext') - assert hasattr(ngettext, '__call__'), 'No ngettext function available' - dngettext = ctxt.get('_i18n.dngettext') - if not dngettext: - dngettext = lambda d, s, p, n: ngettext(s, p, n) - - new_stream = [] - singular_stream = None - singular_msgbuf = None - plural_stream = None - plural_msgbuf = None - - numeral = self.numeral.evaluate(ctxt) - is_plural = self._is_plural(numeral, ngettext) - - for event in stream: - if event[0] is SUB and any(isinstance(d, ChooseBranchDirective) - for d in event[1][0]): - subdirectives, substream = event[1] - - if isinstance(subdirectives[0], SingularDirective): - singular_stream = list(_apply_directives(substream, - subdirectives, - ctxt, vars)) - new_stream.append((MSGBUF, None, (None, -1, -1))) - - elif isinstance(subdirectives[0], PluralDirective): - if is_plural: - plural_stream = list(_apply_directives(substream, - subdirectives, - ctxt, vars)) - - else: - new_stream.append(event) - - if ctxt.get('_i18n.domain'): - ngettext = lambda s, p, n: dngettext(ctxt.get('_i18n.domain'), - s, p, n) - - singular_msgbuf = ctxt.get('_i18n.choose.singular') - if is_plural: - plural_msgbuf = ctxt.get('_i18n.choose.plural') - msgbuf, choice = plural_msgbuf, plural_stream - else: - msgbuf, choice = singular_msgbuf, singular_stream - plural_msgbuf = MessageBuffer(self) - - for kind, data, pos in new_stream: - if kind is MSGBUF: - for event in choice: - if event[0] is MSGBUF: - translation = ngettext(singular_msgbuf.format(), - plural_msgbuf.format(), - numeral) - for subevent in msgbuf.translate(translation): - yield subevent - else: - yield event - else: - yield kind, data, pos - - ctxt.pop() - - def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS, - search_text=True, comment_stack=None): - strip = False - stream = iter(stream) - previous = stream.next() - - if previous[0] is START: - # skip the enclosing element - for message in translator._extract_attrs(previous, - gettext_functions, - search_text=search_text): - yield message - previous = stream.next() - strip = True - - singular_msgbuf = MessageBuffer(self) - plural_msgbuf = MessageBuffer(self) - - for event in stream: - if previous[0] is SUB: - directives, substream = previous[1] - for directive in directives: - if isinstance(directive, SingularDirective): - for message in directive.extract(translator, - substream, gettext_functions, search_text, - comment_stack, msgbuf=singular_msgbuf): - yield message - elif isinstance(directive, PluralDirective): - for message in directive.extract(translator, - substream, gettext_functions, search_text, - comment_stack, msgbuf=plural_msgbuf): - yield message - elif not isinstance(directive, StripDirective): - singular_msgbuf.append(*previous) - plural_msgbuf.append(*previous) - else: - if previous[0] is START: - for message in translator._extract_attrs(previous, - gettext_functions, - search_text): - yield message - singular_msgbuf.append(*previous) - plural_msgbuf.append(*previous) - previous = event - - if not strip: - singular_msgbuf.append(*previous) - plural_msgbuf.append(*previous) - - yield self.lineno, 'ngettext', \ - (singular_msgbuf.format(), plural_msgbuf.format()), \ - comment_stack[-1:] - - def _is_plural(self, numeral, ngettext): - # XXX: should we test which form was chosen like this!?!?!? - # There should be no match in any catalogue for these singular and - # plural test strings - singular = u'O\x85\xbe\xa9\xa8az\xc3?\xe6\xa1\x02n\x84\x93' - plural = u'\xcc\xfb+\xd3Pn\x9d\tT\xec\x1d\xda\x1a\x88\x00' - return ngettext(singular, plural, numeral) == plural - - -class DomainDirective(I18NDirective): - """Implementation of the ``i18n:domain`` directive which allows choosing - another i18n domain(catalog) to translate from. - - >>> from genshi.filters.tests.i18n import DummyTranslations - >>> tmpl = MarkupTemplate('''\ - <html xmlns:i18n="http://genshi.edgewall.org/i18n"> - ... <p i18n:msg="">Bar</p> - ... <div i18n:domain="foo"> - ... <p i18n:msg="">FooBar</p> - ... <p>Bar</p> - ... <p i18n:domain="bar" i18n:msg="">Bar</p> - ... <p i18n:domain="">Bar</p> - ... </div> - ... <p>Bar</p> - ... </html>''') - - >>> translations = DummyTranslations({'Bar': 'Voh'}) - >>> translations.add_domain('foo', {'FooBar': 'BarFoo', 'Bar': 'foo_Bar'}) - >>> translations.add_domain('bar', {'Bar': 'bar_Bar'}) - >>> translator = Translator(translations) - >>> translator.setup(tmpl) - - >>> print(tmpl.generate().render()) - <html> - <p>Voh</p> - <div> - <p>BarFoo</p> - <p>foo_Bar</p> - <p>bar_Bar</p> - <p>Voh</p> - </div> - <p>Voh</p> - </html> - """ - __slots__ = ['domain'] - - def __init__(self, value, template=None, namespaces=None, lineno=-1, - offset=-1): - Directive.__init__(self, None, template, namespaces, lineno, offset) - self.domain = value and value.strip() or '__DEFAULT__' - - @classmethod - def attach(cls, template, stream, value, namespaces, pos): - if type(value) is dict: - value = value.get('name') - return super(DomainDirective, cls).attach(template, stream, value, - namespaces, pos) - - def __call__(self, stream, directives, ctxt, **vars): - ctxt.push({'_i18n.domain': self.domain}) - for event in _apply_directives(stream, directives, ctxt, vars): - yield event - ctxt.pop() - - -class Translator(DirectiveFactory): - """Can extract and translate localizable strings from markup streams and - templates. - - For example, assume the following template: - - >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/"> - ... <head> - ... <title>Example</title> - ... </head> - ... <body> - ... <h1>Example</h1> - ... <p>${_("Hello, %(name)s") % dict(name=username)}</p> - ... </body> - ... </html>''', filename='example.html') - - For demonstration, we define a dummy ``gettext``-style function with a - hard-coded translation table, and pass that to the `Translator` initializer: - - >>> def pseudo_gettext(string): - ... return { - ... 'Example': 'Beispiel', - ... 'Hello, %(name)s': 'Hallo, %(name)s' - ... }[string] - >>> translator = Translator(pseudo_gettext) - - Next, the translator needs to be prepended to any already defined filters - on the template: - - >>> tmpl.filters.insert(0, translator) - - When generating the template output, our hard-coded translations should be - applied as expected: - - >>> print(tmpl.generate(username='Hans', _=pseudo_gettext)) - <html> - <head> - <title>Beispiel</title> - </head> - <body> - <h1>Beispiel</h1> - <p>Hallo, Hans</p> - </body> - </html> - - Note that elements defining ``xml:lang`` attributes that do not contain - variable expressions are ignored by this filter. That can be used to - exclude specific parts of a template from being extracted and translated. - """ - - directives = [ - ('domain', DomainDirective), - ('comment', CommentDirective), - ('msg', MsgDirective), - ('choose', ChooseDirective), - ('singular', SingularDirective), - ('plural', PluralDirective) - ] - - IGNORE_TAGS = frozenset([ - QName('script'), QName('http://www.w3.org/1999/xhtml}script'), - QName('style'), QName('http://www.w3.org/1999/xhtml}style') - ]) - INCLUDE_ATTRS = frozenset([ - 'abbr', 'alt', 'label', 'prompt', 'standby', 'summary', 'title' - ]) - NAMESPACE = I18N_NAMESPACE - - def __init__(self, translate=NullTranslations(), ignore_tags=IGNORE_TAGS, - include_attrs=INCLUDE_ATTRS, extract_text=True): - """Initialize the translator. - - :param translate: the translation function, for example ``gettext`` or - ``ugettext``. - :param ignore_tags: a set of tag names that should not be localized - :param include_attrs: a set of attribute names should be localized - :param extract_text: whether the content of text nodes should be - extracted, or only text in explicit ``gettext`` - function calls - - :note: Changed in 0.6: the `translate` parameter can now be either - a ``gettext``-style function, or an object compatible with the - ``NullTransalations`` or ``GNUTranslations`` interface - """ - self.translate = translate - self.ignore_tags = ignore_tags - self.include_attrs = include_attrs - self.extract_text = extract_text - - def __call__(self, stream, ctxt=None, translate_text=True, - translate_attrs=True): - """Translate any localizable strings in the given stream. - - This function shouldn't be called directly. Instead, an instance of - the `Translator` class should be registered as a filter with the - `Template` or the `TemplateLoader`, or applied as a regular stream - filter. If used as a template filter, it should be inserted in front of - all the default filters. - - :param stream: the markup event stream - :param ctxt: the template context (not used) - :param translate_text: whether text nodes should be translated (used - internally) - :param translate_attrs: whether attribute values should be translated - (used internally) - :return: the localized stream - """ - ignore_tags = self.ignore_tags - include_attrs = self.include_attrs - skip = 0 - xml_lang = XML_NAMESPACE['lang'] - if not self.extract_text: - translate_text = False - translate_attrs = False - - if type(self.translate) is FunctionType: - gettext = self.translate - if ctxt: - ctxt['_i18n.gettext'] = gettext - else: - gettext = self.translate.ugettext - ngettext = self.translate.ungettext - try: - dgettext = self.translate.dugettext - dngettext = self.translate.dungettext - except AttributeError: - dgettext = lambda _, y: gettext(y) - dngettext = lambda _, s, p, n: ngettext(s, p, n) - if ctxt: - ctxt['_i18n.gettext'] = gettext - ctxt['_i18n.ngettext'] = ngettext - ctxt['_i18n.dgettext'] = dgettext - ctxt['_i18n.dngettext'] = dngettext - - if ctxt and ctxt.get('_i18n.domain'): - gettext = lambda msg: dgettext(ctxt.get('_i18n.domain'), msg) - - for kind, data, pos in stream: - - # skip chunks that should not be localized - if skip: - if kind is START: - skip += 1 - elif kind is END: - skip -= 1 - yield kind, data, pos - continue - - # handle different events that can be localized - if kind is START: - tag, attrs = data - if tag in self.ignore_tags or \ - isinstance(attrs.get(xml_lang), basestring): - skip += 1 - yield kind, data, pos - continue - - new_attrs = [] - changed = False - - for name, value in attrs: - newval = value - if isinstance(value, basestring): - if translate_attrs and name in include_attrs: - newval = gettext(value) - else: - newval = list( - self(_ensure(value), ctxt, translate_text=False) - ) - if newval != value: - value = newval - changed = True - new_attrs.append((name, value)) - if changed: - attrs = Attrs(new_attrs) - - yield kind, (tag, attrs), pos - - elif translate_text and kind is TEXT: - text = data.strip() - if text: - data = data.replace(text, unicode(gettext(text))) - yield kind, data, pos - - elif kind is SUB: - directives, substream = data - current_domain = None - for idx, directive in enumerate(directives): - # Organize directives to make everything work - # FIXME: There's got to be a better way to do this! - if isinstance(directive, DomainDirective): - # Grab current domain and update context - current_domain = directive.domain - ctxt.push({'_i18n.domain': current_domain}) - # Put domain directive as the first one in order to - # update context before any other directives evaluation - directives.insert(0, directives.pop(idx)) - - # If this is an i18n directive, no need to translate text - # nodes here - is_i18n_directive = any([ - isinstance(d, ExtractableI18NDirective) - for d in directives - ]) - substream = list(self(substream, ctxt, - translate_text=not is_i18n_directive, - translate_attrs=translate_attrs)) - yield kind, (directives, substream), pos - - if current_domain: - ctxt.pop() - else: - yield kind, data, pos - - def extract(self, stream, gettext_functions=GETTEXT_FUNCTIONS, - search_text=True, comment_stack=None): - """Extract localizable strings from the given template stream. - - For every string found, this function yields a ``(lineno, function, - message, comments)`` tuple, where: - - * ``lineno`` is the number of the line on which the string was found, - * ``function`` is the name of the ``gettext`` function used (if the - string was extracted from embedded Python code), and - * ``message`` is the string itself (a ``unicode`` object, or a tuple - of ``unicode`` objects for functions with multiple string - arguments). - * ``comments`` is a list of comments related to the message, extracted - from ``i18n:comment`` attributes found in the markup - - >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/"> - ... <head> - ... <title>Example</title> - ... </head> - ... <body> - ... <h1>Example</h1> - ... <p>${_("Hello, %(name)s") % dict(name=username)}</p> - ... <p>${ngettext("You have %d item", "You have %d items", num)}</p> - ... </body> - ... </html>''', filename='example.html') - >>> for line, func, msg, comments in Translator().extract(tmpl.stream): - ... print('%d, %r, %r' % (line, func, msg)) - 3, None, u'Example' - 6, None, u'Example' - 7, '_', u'Hello, %(name)s' - 8, 'ngettext', (u'You have %d item', u'You have %d items', None) - - :param stream: the event stream to extract strings from; can be a - regular stream or a template stream - :param gettext_functions: a sequence of function names that should be - treated as gettext-style localization - functions - :param search_text: whether the content of text nodes should be - extracted (used internally) - - :note: Changed in 0.4.1: For a function with multiple string arguments - (such as ``ngettext``), a single item with a tuple of strings is - yielded, instead an item for each string argument. - :note: Changed in 0.6: The returned tuples now include a fourth - element, which is a list of comments for the translator. - """ - if not self.extract_text: - search_text = False - if comment_stack is None: - comment_stack = [] - skip = 0 - - xml_lang = XML_NAMESPACE['lang'] - - for kind, data, pos in stream: - if skip: - if kind is START: - skip += 1 - if kind is END: - skip -= 1 - - if kind is START and not skip: - tag, attrs = data - if tag in self.ignore_tags or \ - isinstance(attrs.get(xml_lang), basestring): - skip += 1 - continue - - for message in self._extract_attrs((kind, data, pos), - gettext_functions, - search_text=search_text): - yield message - - elif not skip and search_text and kind is TEXT: - text = data.strip() - if text and [ch for ch in text if ch.isalpha()]: - yield pos[1], None, text, comment_stack[-1:] - - elif kind is EXPR or kind is EXEC: - for funcname, strings in extract_from_code(data, - gettext_functions): - # XXX: Do we need to grab i18n:comment from comment_stack ??? - yield pos[1], funcname, strings, [] - - elif kind is SUB: - directives, substream = data - in_comment = False - - for idx, directive in enumerate(directives): - # Do a first loop to see if there's a comment directive - # If there is update context and pop it from directives - if isinstance(directive, CommentDirective): - in_comment = True - comment_stack.append(directive.comment) - if len(directives) == 1: - # in case we're in the presence of something like: - # <p i18n:comment="foo">Foo</p> - for message in self.extract( - substream, gettext_functions, - search_text=search_text and not skip, - comment_stack=comment_stack): - yield message - directives.pop(idx) - elif not isinstance(directive, I18NDirective): - # Remove all other non i18n directives from the process - directives.pop(idx) - - if not directives and not in_comment: - # Extract content if there's no directives because - # strip was pop'ed and not because comment was pop'ed. - # Extraction in this case has been taken care of. - for message in self.extract( - substream, gettext_functions, - search_text=search_text and not skip): - yield message - - for directive in directives: - if isinstance(directive, ExtractableI18NDirective): - for message in directive.extract(self, - substream, gettext_functions, - search_text=search_text and not skip, - comment_stack=comment_stack): - yield message - else: - for message in self.extract( - substream, gettext_functions, - search_text=search_text and not skip, - comment_stack=comment_stack): - yield message - - if in_comment: - comment_stack.pop() - - def get_directive_index(self, dir_cls): - total = len(self._dir_order) - if dir_cls in self._dir_order: - return self._dir_order.index(dir_cls) - total - return total - - def setup(self, template): - """Convenience function to register the `Translator` filter and the - related directives with the given template. - - :param template: a `Template` instance - """ - template.filters.insert(0, self) - if hasattr(template, 'add_directives'): - template.add_directives(Translator.NAMESPACE, self) - - def _extract_attrs(self, event, gettext_functions, search_text): - for name, value in event[1][1]: - if search_text and isinstance(value, basestring): - if name in self.include_attrs: - text = value.strip() - if text: - yield event[2][1], None, text, [] - else: - for message in self.extract(_ensure(value), gettext_functions, - search_text=False): - yield message - - -class MessageBuffer(object): - """Helper class for managing internationalized mixed content. - - :since: version 0.5 - """ - - def __init__(self, directive=None): - """Initialize the message buffer. - - :param directive: the directive owning the buffer - :type directive: I18NDirective - """ - # params list needs to be copied so that directives can be evaluated - # more than once - self.orig_params = self.params = directive.params[:] - self.directive = directive - self.string = [] - self.events = {} - self.values = {} - self.depth = 1 - self.order = 1 - self.stack = [0] - self.subdirectives = {} - - def append(self, kind, data, pos): - """Append a stream event to the buffer. - - :param kind: the stream event kind - :param data: the event data - :param pos: the position of the event in the source - """ - if kind is SUB: - # The order needs to be +1 because a new START kind event will - # happen and we we need to wrap those events into our custom kind(s) - order = self.stack[-1] + 1 - subdirectives, substream = data - # Store the directives that should be applied after translation - self.subdirectives.setdefault(order, []).extend(subdirectives) - self.events.setdefault(order, []).append((SUB_START, None, pos)) - for skind, sdata, spos in substream: - self.append(skind, sdata, spos) - self.events.setdefault(order, []).append((SUB_END, None, pos)) - elif kind is TEXT: - if '[' in data or ']' in data: - # Quote [ and ] if it ain't us adding it, ie, if the user is - # using those chars in his templates, escape them - data = data.replace('[', '\[').replace(']', '\]') - self.string.append(data) - self.events.setdefault(self.stack[-1], []).append((kind, data, pos)) - elif kind is EXPR: - if self.params: - param = self.params.pop(0) - else: - params = ', '.join(['"%s"' % p for p in self.orig_params if p]) - if params: - params = "(%s)" % params - raise IndexError("%d parameters%s given to 'i18n:%s' but " - "%d or more expressions used in '%s', line %s" - % (len(self.orig_params), params, - self.directive.tagname, - len(self.orig_params) + 1, - os.path.basename(pos[0] or - 'In-memory Template'), - pos[1])) - self.string.append('%%(%s)s' % param) - self.events.setdefault(self.stack[-1], []).append((kind, data, pos)) - self.values[param] = (kind, data, pos) - else: - if kind is START: - self.string.append('[%d:' % self.order) - self.stack.append(self.order) - self.events.setdefault(self.stack[-1], - []).append((kind, data, pos)) - self.depth += 1 - self.order += 1 - elif kind is END: - self.depth -= 1 - if self.depth: - self.events[self.stack[-1]].append((kind, data, pos)) - self.string.append(']') - self.stack.pop() - - def format(self): - """Return a message identifier representing the content in the - buffer. - """ - return ''.join(self.string).strip() - - def translate(self, string, regex=re.compile(r'%\((\w+)\)s')): - """Interpolate the given message translation with the events in the - buffer and return the translated stream. - - :param string: the translated message string - """ - substream = None - - def yield_parts(string): - for idx, part in enumerate(regex.split(string)): - if idx % 2: - yield self.values[part] - elif part: - yield (TEXT, - part.replace('\[', '[').replace('\]', ']'), - (None, -1, -1) - ) - - parts = parse_msg(string) - parts_counter = {} - for order, string in parts: - parts_counter.setdefault(order, []).append(None) - - while parts: - order, string = parts.pop(0) - if len(parts_counter[order]) == 1: - events = self.events[order] - else: - events = [self.events[order].pop(0)] - parts_counter[order].pop() - - for event in events: - if event[0] is SUB_START: - substream = [] - elif event[0] is SUB_END: - # Yield a substream which might have directives to be - # applied to it (after translation events) - yield SUB, (self.subdirectives[order], substream), event[2] - substream = None - elif event[0] is TEXT: - if string: - for part in yield_parts(string): - if substream is not None: - substream.append(part) - else: - yield part - # String handled, reset it - string = None - elif event[0] is START: - if substream is not None: - substream.append(event) - else: - yield event - if string: - for part in yield_parts(string): - if substream is not None: - substream.append(part) - else: - yield part - # String handled, reset it - string = None - elif event[0] is END: - if string: - for part in yield_parts(string): - if substream is not None: - substream.append(part) - else: - yield part - # String handled, reset it - string = None - if substream is not None: - substream.append(event) - else: - yield event - elif event[0] is EXPR: - # These are handled on the strings itself - continue - else: - if string: - for part in yield_parts(string): - if substream is not None: - substream.append(part) - else: - yield part - # String handled, reset it - string = None - if substream is not None: - substream.append(event) - else: - yield event - - -def parse_msg(string, regex=re.compile(r'(?:\[(\d+)\:)|(?<!\\)\]')): - """Parse a translated message using Genshi mixed content message - formatting. - - >>> parse_msg("See [1:Help].") - [(0, 'See '), (1, 'Help'), (0, '.')] - - >>> parse_msg("See [1:our [2:Help] page] for details.") - [(0, 'See '), (1, 'our '), (2, 'Help'), (1, ' page'), (0, ' for details.')] - - >>> parse_msg("[2:Details] finden Sie in [1:Hilfe].") - [(2, 'Details'), (0, ' finden Sie in '), (1, 'Hilfe'), (0, '.')] - - >>> parse_msg("[1:] Bilder pro Seite anzeigen.") - [(1, ''), (0, ' Bilder pro Seite anzeigen.')] - - :param string: the translated message string - :return: a list of ``(order, string)`` tuples - :rtype: `list` - """ - parts = [] - stack = [0] - while True: - mo = regex.search(string) - if not mo: - break - - if mo.start() or stack[-1]: - parts.append((stack[-1], string[:mo.start()])) - string = string[mo.end():] - - orderno = mo.group(1) - if orderno is not None: - stack.append(int(orderno)) - else: - stack.pop() - if not stack: - break - - if string: - parts.append((stack[-1], string)) - - return parts - - -def extract_from_code(code, gettext_functions): - """Extract strings from Python bytecode. - - >>> from genshi.template.eval import Expression - >>> expr = Expression('_("Hello")') - >>> list(extract_from_code(expr, GETTEXT_FUNCTIONS)) - [('_', u'Hello')] - - >>> expr = Expression('ngettext("You have %(num)s item", ' - ... '"You have %(num)s items", num)') - >>> list(extract_from_code(expr, GETTEXT_FUNCTIONS)) - [('ngettext', (u'You have %(num)s item', u'You have %(num)s items', None))] - - :param code: the `Code` object - :type code: `genshi.template.eval.Code` - :param gettext_functions: a sequence of function names - :since: version 0.5 - """ - def _walk(node): - if isinstance(node, _ast.Call) and isinstance(node.func, _ast.Name) \ - and node.func.id in gettext_functions: - strings = [] - def _add(arg): - if isinstance(arg, _ast.Str) and isinstance(arg.s, basestring): - strings.append(unicode(arg.s, 'utf-8')) - elif arg: - strings.append(None) - [_add(arg) for arg in node.args] - _add(node.starargs) - _add(node.kwargs) - if len(strings) == 1: - strings = strings[0] - else: - strings = tuple(strings) - yield node.func.id, strings - elif node._fields: - children = [] - for field in node._fields: - child = getattr(node, field, None) - if isinstance(child, list): - for elem in child: - children.append(elem) - elif isinstance(child, _ast.AST): - children.append(child) - for child in children: - for funcname, strings in _walk(child): - yield funcname, strings - return _walk(code.ast) - - -def extract(fileobj, keywords, comment_tags, options): - """Babel extraction method for Genshi templates. - - :param fileobj: the file-like object the messages should be extracted from - :param keywords: a list of keywords (i.e. function names) that should be - recognized as translation functions - :param comment_tags: a list of translator tags to search for and include - in the results - :param options: a dictionary of additional options (optional) - :return: an iterator over ``(lineno, funcname, message, comments)`` tuples - :rtype: ``iterator`` - """ - template_class = options.get('template_class', MarkupTemplate) - if isinstance(template_class, basestring): - module, clsname = template_class.split(':', 1) - template_class = getattr(__import__(module, {}, {}, [clsname]), clsname) - encoding = options.get('encoding', None) - - extract_text = options.get('extract_text', True) - if isinstance(extract_text, basestring): - extract_text = extract_text.lower() in ('1', 'on', 'yes', 'true') - - ignore_tags = options.get('ignore_tags', Translator.IGNORE_TAGS) - if isinstance(ignore_tags, basestring): - ignore_tags = ignore_tags.split() - ignore_tags = [QName(tag) for tag in ignore_tags] - - include_attrs = options.get('include_attrs', Translator.INCLUDE_ATTRS) - if isinstance(include_attrs, basestring): - include_attrs = include_attrs.split() - include_attrs = [QName(attr) for attr in include_attrs] - - tmpl = template_class(fileobj, filename=getattr(fileobj, 'name', None), - encoding=encoding) - tmpl.loader = None - - translator = Translator(None, ignore_tags, include_attrs, extract_text) - if hasattr(tmpl, 'add_directives'): - tmpl.add_directives(Translator.NAMESPACE, translator) - for message in translator.extract(tmpl.stream, gettext_functions=keywords): - yield message diff --git a/genshi/filters/transform.py b/genshi/filters/transform.py deleted file mode 100644 index 9b75b06..0000000 --- a/genshi/filters/transform.py +++ /dev/null @@ -1,1310 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2007-2009 Edgewall Software -# All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://genshi.edgewall.org/wiki/License. -# -# This software consists of voluntary contributions made by many -# individuals. For the exact contribution history, see the revision -# history and logs, available at http://genshi.edgewall.org/log/. - -"""A filter for functional-style transformations of markup streams. - -The `Transformer` filter provides a variety of transformations that can be -applied to parts of streams that match given XPath expressions. These -transformations can be chained to achieve results that would be comparitively -tedious to achieve by writing stream filters by hand. The approach of chaining -node selection and transformation has been inspired by the `jQuery`_ Javascript -library. - - .. _`jQuery`: http://jquery.com/ - -For example, the following transformation removes the ``<title>`` element from -the ``<head>`` of the input document: - ->>> from genshi.builder import tag ->>> html = HTML('''<html> -... <head><title>Some Title</title></head> -... <body> -... Some <em>body</em> text. -... </body> -... </html>''') ->>> print(html | Transformer('body/em').map(unicode.upper, TEXT) -... .unwrap().wrap(tag.u)) -<html> - <head><title>Some Title</title></head> - <body> - Some <u>BODY</u> text. - </body> -</html> - -The ``Transformer`` support a large number of useful transformations out of the -box, but custom transformations can be added easily. - -:since: version 0.5 -""" - -import re -import sys - -from genshi.builder import Element -from genshi.core import Stream, Attrs, QName, TEXT, START, END, _ensure, Markup -from genshi.path import Path - -__all__ = ['Transformer', 'StreamBuffer', 'InjectorTransformation', 'ENTER', - 'EXIT', 'INSIDE', 'OUTSIDE', 'BREAK'] - - -class TransformMark(str): - """A mark on a transformation stream.""" - __slots__ = [] - _instances = {} - - def __new__(cls, val): - return cls._instances.setdefault(val, str.__new__(cls, val)) - - -ENTER = TransformMark('ENTER') -"""Stream augmentation mark indicating that a selected element is being -entered.""" - -INSIDE = TransformMark('INSIDE') -"""Stream augmentation mark indicating that processing is currently inside a -selected element.""" - -OUTSIDE = TransformMark('OUTSIDE') -"""Stream augmentation mark indicating that a match occurred outside a selected -element.""" - -ATTR = TransformMark('ATTR') -"""Stream augmentation mark indicating a selected element attribute.""" - -EXIT = TransformMark('EXIT') -"""Stream augmentation mark indicating that a selected element is being -exited.""" - -BREAK = TransformMark('BREAK') -"""Stream augmentation mark indicating a break between two otherwise contiguous -blocks of marked events. - -This is used primarily by the cut() transform to provide later transforms with -an opportunity to operate on the cut buffer. -""" - - -class PushBackStream(object): - """Allows a single event to be pushed back onto the stream and re-consumed. - """ - def __init__(self, stream): - self.stream = iter(stream) - self.peek = None - - def push(self, event): - assert self.peek is None - self.peek = event - - def __iter__(self): - while True: - if self.peek is not None: - peek = self.peek - self.peek = None - yield peek - else: - try: - event = self.stream.next() - yield event - except StopIteration: - if self.peek is None: - raise - - -class Transformer(object): - """Stream filter that can apply a variety of different transformations to - a stream. - - This is achieved by selecting the events to be transformed using XPath, - then applying the transformations to the events matched by the path - expression. Each marked event is in the form (mark, (kind, data, pos)), - where mark can be any of `ENTER`, `INSIDE`, `EXIT`, `OUTSIDE`, or `None`. - - The first three marks match `START` and `END` events, and any events - contained `INSIDE` any selected XML/HTML element. A non-element match - outside a `START`/`END` container (e.g. ``text()``) will yield an `OUTSIDE` - mark. - - >>> html = HTML('<html><head><title>Some Title</title></head>' - ... '<body>Some <em>body</em> text.</body></html>') - - Transformations act on selected stream events matching an XPath expression. - Here's an example of removing some markup (the title, in this case) - selected by an expression: - - >>> print(html | Transformer('head/title').remove()) - <html><head/><body>Some <em>body</em> text.</body></html> - - Inserted content can be passed in the form of a string, or a markup event - stream, which includes streams generated programmatically via the - `builder` module: - - >>> from genshi.builder import tag - >>> print(html | Transformer('body').prepend(tag.h1('Document Title'))) - <html><head><title>Some Title</title></head><body><h1>Document - Title</h1>Some <em>body</em> text.</body></html> - - Each XPath expression determines the set of tags that will be acted upon by - subsequent transformations. In this example we select the ``<title>`` text, - copy it into a buffer, then select the ``<body>`` element and paste the - copied text into the body as ``<h1>`` enclosed text: - - >>> buffer = StreamBuffer() - >>> print(html | Transformer('head/title/text()').copy(buffer) - ... .end().select('body').prepend(tag.h1(buffer))) - <html><head><title>Some Title</title></head><body><h1>Some Title</h1>Some - <em>body</em> text.</body></html> - - Transformations can also be assigned and reused, although care must be - taken when using buffers, to ensure that buffers are cleared between - transforms: - - >>> emphasis = Transformer('body//em').attr('class', 'emphasis') - >>> print(html | emphasis) - <html><head><title>Some Title</title></head><body>Some <em - class="emphasis">body</em> text.</body></html> - """ - - __slots__ = ['transforms'] - - def __init__(self, path='.'): - """Construct a new transformation filter. - - :param path: an XPath expression (as string) or a `Path` instance - """ - self.transforms = [SelectTransformation(path)] - - def __call__(self, stream, keep_marks=False): - """Apply the transform filter to the marked stream. - - :param stream: the marked event stream to filter - :param keep_marks: Do not strip transformer selection marks from the - stream. Useful for testing. - :return: the transformed stream - :rtype: `Stream` - """ - transforms = self._mark(stream) - for link in self.transforms: - transforms = link(transforms) - if not keep_marks: - transforms = self._unmark(transforms) - return Stream(transforms, - serializer=getattr(stream, 'serializer', None)) - - def apply(self, function): - """Apply a transformation to the stream. - - Transformations can be chained, similar to stream filters. Any callable - accepting a marked stream can be used as a transform. - - As an example, here is a simple `TEXT` event upper-casing transform: - - >>> def upper(stream): - ... for mark, (kind, data, pos) in stream: - ... if mark and kind is TEXT: - ... yield mark, (kind, data.upper(), pos) - ... else: - ... yield mark, (kind, data, pos) - >>> short_stream = HTML('<body>Some <em>test</em> text</body>') - >>> print(short_stream | Transformer('.//em/text()').apply(upper)) - <body>Some <em>TEST</em> text</body> - """ - transformer = Transformer() - transformer.transforms = self.transforms[:] - if isinstance(function, Transformer): - transformer.transforms.extend(function.transforms) - else: - transformer.transforms.append(function) - return transformer - - #{ Selection operations - - def select(self, path): - """Mark events matching the given XPath expression, within the current - selection. - - >>> html = HTML('<body>Some <em>test</em> text</body>') - >>> print(html | Transformer().select('.//em').trace()) - (None, ('START', (QName('body'), Attrs()), (None, 1, 0))) - (None, ('TEXT', u'Some ', (None, 1, 6))) - ('ENTER', ('START', (QName('em'), Attrs()), (None, 1, 11))) - ('INSIDE', ('TEXT', u'test', (None, 1, 15))) - ('EXIT', ('END', QName('em'), (None, 1, 19))) - (None, ('TEXT', u' text', (None, 1, 24))) - (None, ('END', QName('body'), (None, 1, 29))) - <body>Some <em>test</em> text</body> - - :param path: an XPath expression (as string) or a `Path` instance - :return: the stream augmented by transformation marks - :rtype: `Transformer` - """ - return self.apply(SelectTransformation(path)) - - def invert(self): - """Invert selection so that marked events become unmarked, and vice - versa. - - Specificaly, all marks are converted to null marks, and all null marks - are converted to OUTSIDE marks. - - >>> html = HTML('<body>Some <em>test</em> text</body>') - >>> print(html | Transformer('//em').invert().trace()) - ('OUTSIDE', ('START', (QName('body'), Attrs()), (None, 1, 0))) - ('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6))) - (None, ('START', (QName('em'), Attrs()), (None, 1, 11))) - (None, ('TEXT', u'test', (None, 1, 15))) - (None, ('END', QName('em'), (None, 1, 19))) - ('OUTSIDE', ('TEXT', u' text', (None, 1, 24))) - ('OUTSIDE', ('END', QName('body'), (None, 1, 29))) - <body>Some <em>test</em> text</body> - - :rtype: `Transformer` - """ - return self.apply(InvertTransformation()) - - def end(self): - """End current selection, allowing all events to be selected. - - Example: - - >>> html = HTML('<body>Some <em>test</em> text</body>') - >>> print(html | Transformer('//em').end().trace()) - ('OUTSIDE', ('START', (QName('body'), Attrs()), (None, 1, 0))) - ('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6))) - ('OUTSIDE', ('START', (QName('em'), Attrs()), (None, 1, 11))) - ('OUTSIDE', ('TEXT', u'test', (None, 1, 15))) - ('OUTSIDE', ('END', QName('em'), (None, 1, 19))) - ('OUTSIDE', ('TEXT', u' text', (None, 1, 24))) - ('OUTSIDE', ('END', QName('body'), (None, 1, 29))) - <body>Some <em>test</em> text</body> - - :return: the stream augmented by transformation marks - :rtype: `Transformer` - """ - return self.apply(EndTransformation()) - - #{ Deletion operations - - def empty(self): - """Empty selected elements of all content. - - Example: - - >>> html = HTML('<html><head><title>Some Title</title></head>' - ... '<body>Some <em>body</em> text.</body></html>') - >>> print(html | Transformer('.//em').empty()) - <html><head><title>Some Title</title></head><body>Some <em/> - text.</body></html> - - :rtype: `Transformer` - """ - return self.apply(EmptyTransformation()) - - def remove(self): - """Remove selection from the stream. - - Example: - - >>> html = HTML('<html><head><title>Some Title</title></head>' - ... '<body>Some <em>body</em> text.</body></html>') - >>> print(html | Transformer('.//em').remove()) - <html><head><title>Some Title</title></head><body>Some - text.</body></html> - - :rtype: `Transformer` - """ - return self.apply(RemoveTransformation()) - - #{ Direct element operations - - def unwrap(self): - """Remove outermost enclosing elements from selection. - - Example: - - >>> html = HTML('<html><head><title>Some Title</title></head>' - ... '<body>Some <em>body</em> text.</body></html>') - >>> print(html | Transformer('.//em').unwrap()) - <html><head><title>Some Title</title></head><body>Some body - text.</body></html> - - :rtype: `Transformer` - """ - return self.apply(UnwrapTransformation()) - - def wrap(self, element): - """Wrap selection in an element. - - >>> html = HTML('<html><head><title>Some Title</title></head>' - ... '<body>Some <em>body</em> text.</body></html>') - >>> print(html | Transformer('.//em').wrap('strong')) - <html><head><title>Some Title</title></head><body>Some - <strong><em>body</em></strong> text.</body></html> - - :param element: either a tag name (as string) or an `Element` object - :rtype: `Transformer` - """ - return self.apply(WrapTransformation(element)) - - #{ Content insertion operations - - def replace(self, content): - """Replace selection with content. - - >>> html = HTML('<html><head><title>Some Title</title></head>' - ... '<body>Some <em>body</em> text.</body></html>') - >>> print(html | Transformer('.//title/text()').replace('New Title')) - <html><head><title>New Title</title></head><body>Some <em>body</em> - text.</body></html> - - :param content: Either a callable, an iterable of events, or a string - to insert. - :rtype: `Transformer` - """ - return self.apply(ReplaceTransformation(content)) - - def before(self, content): - """Insert content before selection. - - In this example we insert the word 'emphasised' before the <em> opening - tag: - - >>> html = HTML('<html><head><title>Some Title</title></head>' - ... '<body>Some <em>body</em> text.</body></html>') - >>> print(html | Transformer('.//em').before('emphasised ')) - <html><head><title>Some Title</title></head><body>Some emphasised - <em>body</em> text.</body></html> - - :param content: Either a callable, an iterable of events, or a string - to insert. - :rtype: `Transformer` - """ - return self.apply(BeforeTransformation(content)) - - def after(self, content): - """Insert content after selection. - - Here, we insert some text after the </em> closing tag: - - >>> html = HTML('<html><head><title>Some Title</title></head>' - ... '<body>Some <em>body</em> text.</body></html>') - >>> print(html | Transformer('.//em').after(' rock')) - <html><head><title>Some Title</title></head><body>Some <em>body</em> - rock text.</body></html> - - :param content: Either a callable, an iterable of events, or a string - to insert. - :rtype: `Transformer` - """ - return self.apply(AfterTransformation(content)) - - def prepend(self, content): - """Insert content after the ENTER event of the selection. - - Inserting some new text at the start of the <body>: - - >>> html = HTML('<html><head><title>Some Title</title></head>' - ... '<body>Some <em>body</em> text.</body></html>') - >>> print(html | Transformer('.//body').prepend('Some new body text. ')) - <html><head><title>Some Title</title></head><body>Some new body text. - Some <em>body</em> text.</body></html> - - :param content: Either a callable, an iterable of events, or a string - to insert. - :rtype: `Transformer` - """ - return self.apply(PrependTransformation(content)) - - def append(self, content): - """Insert content before the END event of the selection. - - >>> html = HTML('<html><head><title>Some Title</title></head>' - ... '<body>Some <em>body</em> text.</body></html>') - >>> print(html | Transformer('.//body').append(' Some new body text.')) - <html><head><title>Some Title</title></head><body>Some <em>body</em> - text. Some new body text.</body></html> - - :param content: Either a callable, an iterable of events, or a string - to insert. - :rtype: `Transformer` - """ - return self.apply(AppendTransformation(content)) - - #{ Attribute manipulation - - def attr(self, name, value): - """Add, replace or delete an attribute on selected elements. - - If `value` evaulates to `None` the attribute will be deleted from the - element: - - >>> html = HTML('<html><head><title>Some Title</title></head>' - ... '<body>Some <em class="before">body</em> <em>text</em>.</body>' - ... '</html>') - >>> print(html | Transformer('body/em').attr('class', None)) - <html><head><title>Some Title</title></head><body>Some <em>body</em> - <em>text</em>.</body></html> - - Otherwise the attribute will be set to `value`: - - >>> print(html | Transformer('body/em').attr('class', 'emphasis')) - <html><head><title>Some Title</title></head><body>Some <em - class="emphasis">body</em> <em class="emphasis">text</em>.</body></html> - - If `value` is a callable it will be called with the attribute name and - the `START` event for the matching element. Its return value will then - be used to set the attribute: - - >>> def print_attr(name, event): - ... attrs = event[1][1] - ... print(attrs) - ... return attrs.get(name) - >>> print(html | Transformer('body/em').attr('class', print_attr)) - Attrs([(QName('class'), u'before')]) - Attrs() - <html><head><title>Some Title</title></head><body>Some <em - class="before">body</em> <em>text</em>.</body></html> - - :param name: the name of the attribute - :param value: the value that should be set for the attribute. - :rtype: `Transformer` - """ - return self.apply(AttrTransformation(name, value)) - - #{ Buffer operations - - def copy(self, buffer, accumulate=False): - """Copy selection into buffer. - - The buffer is replaced by each *contiguous* selection before being passed - to the next transformation. If accumulate=True, further selections will - be appended to the buffer rather than replacing it. - - >>> from genshi.builder import tag - >>> buffer = StreamBuffer() - >>> html = HTML('<html><head><title>Some Title</title></head>' - ... '<body>Some <em>body</em> text.</body></html>') - >>> print(html | Transformer('head/title/text()').copy(buffer) - ... .end().select('body').prepend(tag.h1(buffer))) - <html><head><title>Some Title</title></head><body><h1>Some - Title</h1>Some <em>body</em> text.</body></html> - - This example illustrates that only a single contiguous selection will - be buffered: - - >>> print(html | Transformer('head/title/text()').copy(buffer) - ... .end().select('body/em').copy(buffer).end().select('body') - ... .prepend(tag.h1(buffer))) - <html><head><title>Some Title</title></head><body><h1>Some - Title</h1>Some <em>body</em> text.</body></html> - >>> print(buffer) - <em>body</em> - - Element attributes can also be copied for later use: - - >>> html = HTML('<html><head><title>Some Title</title></head>' - ... '<body><em>Some</em> <em class="before">body</em>' - ... '<em>text</em>.</body></html>') - >>> buffer = StreamBuffer() - >>> def apply_attr(name, entry): - ... return list(buffer)[0][1][1].get('class') - >>> print(html | Transformer('body/em[@class]/@class').copy(buffer) - ... .end().buffer().select('body/em[not(@class)]') - ... .attr('class', apply_attr)) - <html><head><title>Some Title</title></head><body><em - class="before">Some</em> <em class="before">body</em><em - class="before">text</em>.</body></html> - - - :param buffer: the `StreamBuffer` in which the selection should be - stored - :rtype: `Transformer` - :note: Copy (and cut) copy each individual selected object into the - buffer before passing to the next transform. For example, the - XPath ``*|text()`` will select all elements and text, each - instance of which will be copied to the buffer individually - before passing to the next transform. This has implications for - how ``StreamBuffer`` objects can be used, so some - experimentation may be required. - - """ - return self.apply(CopyTransformation(buffer, accumulate)) - - def cut(self, buffer, accumulate=False): - """Copy selection into buffer and remove the selection from the stream. - - >>> from genshi.builder import tag - >>> buffer = StreamBuffer() - >>> html = HTML('<html><head><title>Some Title</title></head>' - ... '<body>Some <em>body</em> text.</body></html>') - >>> print(html | Transformer('.//em/text()').cut(buffer) - ... .end().select('.//em').after(tag.h1(buffer))) - <html><head><title>Some Title</title></head><body>Some - <em/><h1>body</h1> text.</body></html> - - Specifying accumulate=True, appends all selected intervals onto the - buffer. Combining this with the .buffer() operation allows us operate - on all copied events rather than per-segment. See the documentation on - buffer() for more information. - - :param buffer: the `StreamBuffer` in which the selection should be - stored - :rtype: `Transformer` - :note: this transformation will buffer the entire input stream - """ - return self.apply(CutTransformation(buffer, accumulate)) - - def buffer(self): - """Buffer the entire stream (can consume a considerable amount of - memory). - - Useful in conjunction with copy(accumulate=True) and - cut(accumulate=True) to ensure that all marked events in the entire - stream are copied to the buffer before further transformations are - applied. - - For example, to move all <note> elements inside a <notes> tag at the - top of the document: - - >>> doc = HTML('<doc><notes></notes><body>Some <note>one</note> ' - ... 'text <note>two</note>.</body></doc>') - >>> buffer = StreamBuffer() - >>> print(doc | Transformer('body/note').cut(buffer, accumulate=True) - ... .end().buffer().select('notes').prepend(buffer)) - <doc><notes><note>one</note><note>two</note></notes><body>Some text - .</body></doc> - - """ - return self.apply(list) - - #{ Miscellaneous operations - - def filter(self, filter): - """Apply a normal stream filter to the selection. The filter is called - once for each contiguous block of marked events. - - >>> from genshi.filters.html import HTMLSanitizer - >>> html = HTML('<html><body>Some text<script>alert(document.cookie)' - ... '</script> and some more text</body></html>') - >>> print(html | Transformer('body/*').filter(HTMLSanitizer())) - <html><body>Some text and some more text</body></html> - - :param filter: The stream filter to apply. - :rtype: `Transformer` - """ - return self.apply(FilterTransformation(filter)) - - def map(self, function, kind): - """Applies a function to the ``data`` element of events of ``kind`` in - the selection. - - >>> html = HTML('<html><head><title>Some Title</title></head>' - ... '<body>Some <em>body</em> text.</body></html>') - >>> print(html | Transformer('head/title').map(unicode.upper, TEXT)) - <html><head><title>SOME TITLE</title></head><body>Some <em>body</em> - text.</body></html> - - :param function: the function to apply - :param kind: the kind of event the function should be applied to - :rtype: `Transformer` - """ - return self.apply(MapTransformation(function, kind)) - - def substitute(self, pattern, replace, count=1): - """Replace text matching a regular expression. - - Refer to the documentation for ``re.sub()`` for details. - - >>> html = HTML('<html><body>Some text, some more text and ' - ... '<b>some bold text</b>\\n' - ... '<i>some italicised text</i></body></html>') - >>> print(html | Transformer('body/b').substitute('(?i)some', 'SOME')) - <html><body>Some text, some more text and <b>SOME bold text</b> - <i>some italicised text</i></body></html> - >>> tags = tag.html(tag.body('Some text, some more text and\\n', - ... Markup('<b>some bold text</b>'))) - >>> print(tags.generate() | Transformer('body').substitute( - ... '(?i)some', 'SOME')) - <html><body>SOME text, some more text and - <b>SOME bold text</b></body></html> - - :param pattern: A regular expression object or string. - :param replace: Replacement pattern. - :param count: Number of replacements to make in each text fragment. - :rtype: `Transformer` - """ - return self.apply(SubstituteTransformation(pattern, replace, count)) - - def rename(self, name): - """Rename matching elements. - - >>> html = HTML('<html><body>Some text, some more text and ' - ... '<b>some bold text</b></body></html>') - >>> print(html | Transformer('body/b').rename('strong')) - <html><body>Some text, some more text and <strong>some bold text</strong></body></html> - """ - return self.apply(RenameTransformation(name)) - - def trace(self, prefix='', fileobj=None): - """Print events as they pass through the transform. - - >>> html = HTML('<body>Some <em>test</em> text</body>') - >>> print(html | Transformer('em').trace()) - (None, ('START', (QName('body'), Attrs()), (None, 1, 0))) - (None, ('TEXT', u'Some ', (None, 1, 6))) - ('ENTER', ('START', (QName('em'), Attrs()), (None, 1, 11))) - ('INSIDE', ('TEXT', u'test', (None, 1, 15))) - ('EXIT', ('END', QName('em'), (None, 1, 19))) - (None, ('TEXT', u' text', (None, 1, 24))) - (None, ('END', QName('body'), (None, 1, 29))) - <body>Some <em>test</em> text</body> - - :param prefix: a string to prefix each event with in the output - :param fileobj: the writable file-like object to write to; defaults to - the standard output stream - :rtype: `Transformer` - """ - return self.apply(TraceTransformation(prefix, fileobj=fileobj)) - - # Internal methods - - def _mark(self, stream): - for event in stream: - yield OUTSIDE, event - - def _unmark(self, stream): - for mark, event in stream: - kind = event[0] - if not (kind is None or kind is ATTR or kind is BREAK): - yield event - - -class SelectTransformation(object): - """Select and mark events that match an XPath expression.""" - - def __init__(self, path): - """Create selection. - - :param path: an XPath expression (as string) or a `Path` object - """ - if not isinstance(path, Path): - path = Path(path) - self.path = path - - def __call__(self, stream): - """Apply the transform filter to the marked stream. - - :param stream: the marked event stream to filter - """ - namespaces = {} - variables = {} - test = self.path.test() - stream = iter(stream) - next = stream.next - for mark, event in stream: - if mark is None: - yield mark, event - continue - result = test(event, namespaces, variables) - # XXX This is effectively genshi.core._ensure() for transform - # streams. - if result is True: - if event[0] is START: - yield ENTER, event - depth = 1 - while depth > 0: - mark, subevent = next() - if subevent[0] is START: - depth += 1 - elif subevent[0] is END: - depth -= 1 - if depth == 0: - yield EXIT, subevent - else: - yield INSIDE, subevent - test(subevent, namespaces, variables, updateonly=True) - else: - yield OUTSIDE, event - elif isinstance(result, Attrs): - # XXX Selected *attributes* are given a "kind" of None to - # indicate they are not really part of the stream. - yield ATTR, (ATTR, (QName(event[1][0] + '@*'), result), event[2]) - yield None, event - elif isinstance(result, tuple): - yield OUTSIDE, result - elif result: - # XXX Assume everything else is "text"? - yield None, (TEXT, unicode(result), (None, -1, -1)) - else: - yield None, event - - -class InvertTransformation(object): - """Invert selection so that marked events become unmarked, and vice versa. - - Specificaly, all input marks are converted to null marks, and all input - null marks are converted to OUTSIDE marks. - """ - - def __call__(self, stream): - """Apply the transform filter to the marked stream. - - :param stream: the marked event stream to filter - """ - for mark, event in stream: - if mark: - yield None, event - else: - yield OUTSIDE, event - - -class EndTransformation(object): - """End the current selection.""" - - def __call__(self, stream): - """Apply the transform filter to the marked stream. - - :param stream: the marked event stream to filter - """ - for mark, event in stream: - yield OUTSIDE, event - - -class EmptyTransformation(object): - """Empty selected elements of all content.""" - - def __call__(self, stream): - """Apply the transform filter to the marked stream. - - :param stream: the marked event stream to filter - """ - for mark, event in stream: - yield mark, event - if mark is ENTER: - for mark, event in stream: - if mark is EXIT: - yield mark, event - break - - -class RemoveTransformation(object): - """Remove selection from the stream.""" - - def __call__(self, stream): - """Apply the transform filter to the marked stream. - - :param stream: the marked event stream to filter - """ - for mark, event in stream: - if mark is None: - yield mark, event - - -class UnwrapTransformation(object): - """Remove outtermost enclosing elements from selection.""" - - def __call__(self, stream): - """Apply the transform filter to the marked stream. - - :param stream: the marked event stream to filter - """ - for mark, event in stream: - if mark not in (ENTER, EXIT): - yield mark, event - - -class WrapTransformation(object): - """Wrap selection in an element.""" - - def __init__(self, element): - if isinstance(element, Element): - self.element = element - else: - self.element = Element(element) - - def __call__(self, stream): - for mark, event in stream: - if mark: - element = list(self.element.generate()) - for prefix in element[:-1]: - yield None, prefix - yield mark, event - start = mark - stopped = False - for mark, event in stream: - if start is ENTER and mark is EXIT: - yield mark, event - stopped = True - break - if not mark: - break - yield mark, event - else: - stopped = True - yield None, element[-1] - if not stopped: - yield mark, event - else: - yield mark, event - - -class TraceTransformation(object): - """Print events as they pass through the transform.""" - - def __init__(self, prefix='', fileobj=None): - """Trace constructor. - - :param prefix: text to prefix each traced line with. - :param fileobj: the writable file-like object to write to - """ - self.prefix = prefix - self.fileobj = fileobj or sys.stdout - - def __call__(self, stream): - """Apply the transform filter to the marked stream. - - :param stream: the marked event stream to filter - """ - for event in stream: - self.fileobj.write('%s%s\n' % (self.prefix, event)) - yield event - - -class FilterTransformation(object): - """Apply a normal stream filter to the selection. The filter is called once - for each selection.""" - - def __init__(self, filter): - """Create the transform. - - :param filter: The stream filter to apply. - """ - self.filter = filter - - def __call__(self, stream): - """Apply the transform filter to the marked stream. - - :param stream: The marked event stream to filter - """ - def flush(queue): - if queue: - for event in self.filter(queue): - yield OUTSIDE, event - del queue[:] - - queue = [] - for mark, event in stream: - if mark is ENTER: - queue.append(event) - for mark, event in stream: - queue.append(event) - if mark is EXIT: - break - for queue_event in flush(queue): - yield queue_event - elif mark is OUTSIDE: - stopped = False - queue.append(event) - for mark, event in stream: - if mark is not OUTSIDE: - break - queue.append(event) - else: - stopped = True - for queue_event in flush(queue): - yield queue_event - if not stopped: - yield mark, event - else: - yield mark, event - for queue_event in flush(queue): - yield queue_event - - -class MapTransformation(object): - """Apply a function to the `data` element of events of ``kind`` in the - selection. - """ - - def __init__(self, function, kind): - """Create the transform. - - :param function: the function to apply; the function must take one - argument, the `data` element of each selected event - :param kind: the stream event ``kind`` to apply the `function` to - """ - self.function = function - self.kind = kind - - def __call__(self, stream): - """Apply the transform filter to the marked stream. - - :param stream: The marked event stream to filter - """ - for mark, (kind, data, pos) in stream: - if mark and self.kind in (None, kind): - yield mark, (kind, self.function(data), pos) - else: - yield mark, (kind, data, pos) - - -class SubstituteTransformation(object): - """Replace text matching a regular expression. - - Refer to the documentation for ``re.sub()`` for details. - """ - def __init__(self, pattern, replace, count=0): - """Create the transform. - - :param pattern: A regular expression object, or string. - :param replace: Replacement pattern. - :param count: Number of replacements to make in each text fragment. - """ - if isinstance(pattern, basestring): - self.pattern = re.compile(pattern) - else: - self.pattern = pattern - self.count = count - self.replace = replace - - def __call__(self, stream): - """Apply the transform filter to the marked stream. - - :param stream: The marked event stream to filter - """ - for mark, (kind, data, pos) in stream: - if mark is not None and kind is TEXT: - new_data = self.pattern.sub(self.replace, data, self.count) - if isinstance(data, Markup): - data = Markup(new_data) - else: - data = new_data - yield mark, (kind, data, pos) - - -class RenameTransformation(object): - """Rename matching elements.""" - def __init__(self, name): - """Create the transform. - - :param name: New element name. - """ - self.name = QName(name) - - def __call__(self, stream): - """Apply the transform filter to the marked stream. - - :param stream: The marked event stream to filter - """ - for mark, (kind, data, pos) in stream: - if mark is ENTER: - data = self.name, data[1] - elif mark is EXIT: - data = self.name - yield mark, (kind, data, pos) - - -class InjectorTransformation(object): - """Abstract base class for transformations that inject content into a - stream. - - >>> class Top(InjectorTransformation): - ... def __call__(self, stream): - ... for event in self._inject(): - ... yield event - ... for event in stream: - ... yield event - >>> html = HTML('<body>Some <em>test</em> text</body>') - >>> print(html | Transformer('.//em').apply(Top('Prefix '))) - Prefix <body>Some <em>test</em> text</body> - """ - def __init__(self, content): - """Create a new injector. - - :param content: An iterable of Genshi stream events, or a string to be - injected. - """ - self.content = content - - def _inject(self): - content = self.content - if hasattr(content, '__call__'): - content = content() - for event in _ensure(content): - yield None, event - - -class ReplaceTransformation(InjectorTransformation): - """Replace selection with content.""" - - def __call__(self, stream): - """Apply the transform filter to the marked stream. - - :param stream: The marked event stream to filter - """ - stream = PushBackStream(stream) - for mark, event in stream: - if mark is not None: - start = mark - for subevent in self._inject(): - yield subevent - for mark, event in stream: - if start is ENTER: - if mark is EXIT: - break - elif mark != start: - stream.push((mark, event)) - break - else: - yield mark, event - - -class BeforeTransformation(InjectorTransformation): - """Insert content before selection.""" - - def __call__(self, stream): - """Apply the transform filter to the marked stream. - - :param stream: The marked event stream to filter - """ - stream = PushBackStream(stream) - for mark, event in stream: - if mark is not None: - start = mark - for subevent in self._inject(): - yield subevent - yield mark, event - for mark, event in stream: - if mark != start and start is not ENTER: - stream.push((mark, event)) - break - yield mark, event - if start is ENTER and mark is EXIT: - break - else: - yield mark, event - - -class AfterTransformation(InjectorTransformation): - """Insert content after selection.""" - - def __call__(self, stream): - """Apply the transform filter to the marked stream. - - :param stream: The marked event stream to filter - """ - stream = PushBackStream(stream) - for mark, event in stream: - yield mark, event - if mark: - start = mark - for mark, event in stream: - if start is not ENTER and mark != start: - stream.push((mark, event)) - break - yield mark, event - if start is ENTER and mark is EXIT: - break - for subevent in self._inject(): - yield subevent - - -class PrependTransformation(InjectorTransformation): - """Prepend content to the inside of selected elements.""" - - def __call__(self, stream): - """Apply the transform filter to the marked stream. - - :param stream: The marked event stream to filter - """ - for mark, event in stream: - yield mark, event - if mark is ENTER: - for subevent in self._inject(): - yield subevent - - -class AppendTransformation(InjectorTransformation): - """Append content after the content of selected elements.""" - - def __call__(self, stream): - """Apply the transform filter to the marked stream. - - :param stream: The marked event stream to filter - """ - for mark, event in stream: - yield mark, event - if mark is ENTER: - for mark, event in stream: - if mark is EXIT: - break - yield mark, event - for subevent in self._inject(): - yield subevent - yield mark, event - - -class AttrTransformation(object): - """Set an attribute on selected elements.""" - - def __init__(self, name, value): - """Construct transform. - - :param name: name of the attribute that should be set - :param value: the value to set - """ - self.name = name - self.value = value - - def __call__(self, stream): - """Apply the transform filter to the marked stream. - - :param stream: The marked event stream to filter - """ - callable_value = hasattr(self.value, '__call__') - for mark, (kind, data, pos) in stream: - if mark is ENTER: - if callable_value: - value = self.value(self.name, (kind, data, pos)) - else: - value = self.value - if value is None: - attrs = data[1] - [QName(self.name)] - else: - attrs = data[1] | [(QName(self.name), value)] - data = (data[0], attrs) - yield mark, (kind, data, pos) - - - -class StreamBuffer(Stream): - """Stream event buffer used for cut and copy transformations.""" - - def __init__(self): - """Create the buffer.""" - Stream.__init__(self, []) - - def append(self, event): - """Add an event to the buffer. - - :param event: the markup event to add - """ - self.events.append(event) - - def reset(self): - """Empty the buffer of events.""" - del self.events[:] - - -class CopyTransformation(object): - """Copy selected events into a buffer for later insertion.""" - - def __init__(self, buffer, accumulate=False): - """Create the copy transformation. - - :param buffer: the `StreamBuffer` in which the selection should be - stored - """ - if not accumulate: - buffer.reset() - self.buffer = buffer - self.accumulate = accumulate - - def __call__(self, stream): - """Apply the transformation to the marked stream. - - :param stream: the marked event stream to filter - """ - stream = PushBackStream(stream) - - for mark, event in stream: - if mark: - if not self.accumulate: - self.buffer.reset() - events = [(mark, event)] - self.buffer.append(event) - start = mark - for mark, event in stream: - if start is not ENTER and mark != start: - stream.push((mark, event)) - break - events.append((mark, event)) - self.buffer.append(event) - if start is ENTER and mark is EXIT: - break - for i in events: - yield i - else: - yield mark, event - - -class CutTransformation(object): - """Cut selected events into a buffer for later insertion and remove the - selection. - """ - - def __init__(self, buffer, accumulate=False): - """Create the cut transformation. - - :param buffer: the `StreamBuffer` in which the selection should be - stored - """ - self.buffer = buffer - self.accumulate = accumulate - - - def __call__(self, stream): - """Apply the transform filter to the marked stream. - - :param stream: the marked event stream to filter - """ - attributes = [] - stream = PushBackStream(stream) - broken = False - if not self.accumulate: - self.buffer.reset() - for mark, event in stream: - if mark: - # Send a BREAK event if there was no other event sent between - if not self.accumulate: - if not broken and self.buffer: - yield BREAK, (BREAK, None, None) - self.buffer.reset() - self.buffer.append(event) - start = mark - if mark is ATTR: - attributes.extend([name for name, _ in event[1][1]]) - for mark, event in stream: - if start is mark is ATTR: - attributes.extend([name for name, _ in event[1][1]]) - # Handle non-element contiguous selection - if start is not ENTER and mark != start: - # Operating on the attributes of a START event - if start is ATTR: - kind, data, pos = event - assert kind is START - data = (data[0], data[1] - attributes) - attributes = None - stream.push((mark, (kind, data, pos))) - else: - stream.push((mark, event)) - break - self.buffer.append(event) - if start is ENTER and mark is EXIT: - break - broken = False - else: - broken = True - yield mark, event - if not broken and self.buffer: - yield BREAK, (BREAK, None, None) diff --git a/genshi/input.py b/genshi/input.py deleted file mode 100644 index 039e5e5..0000000 --- a/genshi/input.py +++ /dev/null @@ -1,443 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2006-2009 Edgewall Software -# All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://genshi.edgewall.org/wiki/License. -# -# This software consists of voluntary contributions made by many -# individuals. For the exact contribution history, see the revision -# history and logs, available at http://genshi.edgewall.org/log/. - -"""Support for constructing markup streams from files, strings, or other -sources. -""" - -from itertools import chain -import htmlentitydefs as entities -import HTMLParser as html -from StringIO import StringIO -from xml.parsers import expat - -from genshi.core import Attrs, QName, Stream, stripentities -from genshi.core import START, END, XML_DECL, DOCTYPE, TEXT, START_NS, \ - END_NS, START_CDATA, END_CDATA, PI, COMMENT - -__all__ = ['ET', 'ParseError', 'XMLParser', 'XML', 'HTMLParser', 'HTML'] -__docformat__ = 'restructuredtext en' - - -def ET(element): - """Convert a given ElementTree element to a markup stream. - - :param element: an ElementTree element - :return: a markup stream - """ - tag_name = QName(element.tag.lstrip('{')) - attrs = Attrs([(QName(attr.lstrip('{')), value) - for attr, value in element.items()]) - - yield START, (tag_name, attrs), (None, -1, -1) - if element.text: - yield TEXT, element.text, (None, -1, -1) - for child in element.getchildren(): - for item in ET(child): - yield item - yield END, tag_name, (None, -1, -1) - if element.tail: - yield TEXT, element.tail, (None, -1, -1) - - -class ParseError(Exception): - """Exception raised when fatal syntax errors are found in the input being - parsed. - """ - - def __init__(self, message, filename=None, lineno=-1, offset=-1): - """Exception initializer. - - :param message: the error message from the parser - :param filename: the path to the file that was parsed - :param lineno: the number of the line on which the error was encountered - :param offset: the column number where the error was encountered - """ - self.msg = message - if filename: - message += ', in ' + filename - Exception.__init__(self, message) - self.filename = filename or '<string>' - self.lineno = lineno - self.offset = offset - - -class XMLParser(object): - """Generator-based XML parser based on roughly equivalent code in - Kid/ElementTree. - - The parsing is initiated by iterating over the parser object: - - >>> parser = XMLParser(StringIO('<root id="2"><child>Foo</child></root>')) - >>> for kind, data, pos in parser: - ... print('%s %s' % (kind, data)) - START (QName('root'), Attrs([(QName('id'), u'2')])) - START (QName('child'), Attrs()) - TEXT Foo - END child - END root - """ - - _entitydefs = ['<!ENTITY %s "&#%d;">' % (name, value) for name, value in - entities.name2codepoint.items()] - _external_dtd = '\n'.join(_entitydefs) - - def __init__(self, source, filename=None, encoding=None): - """Initialize the parser for the given XML input. - - :param source: the XML text as a file-like object - :param filename: the name of the file, if appropriate - :param encoding: the encoding of the file; if not specified, the - encoding is assumed to be ASCII, UTF-8, or UTF-16, or - whatever the encoding specified in the XML declaration - (if any) - """ - self.source = source - self.filename = filename - - # Setup the Expat parser - parser = expat.ParserCreate(encoding, '}') - parser.buffer_text = True - parser.returns_unicode = True - parser.ordered_attributes = True - - parser.StartElementHandler = self._handle_start - parser.EndElementHandler = self._handle_end - parser.CharacterDataHandler = self._handle_data - parser.StartDoctypeDeclHandler = self._handle_doctype - parser.StartNamespaceDeclHandler = self._handle_start_ns - parser.EndNamespaceDeclHandler = self._handle_end_ns - parser.StartCdataSectionHandler = self._handle_start_cdata - parser.EndCdataSectionHandler = self._handle_end_cdata - parser.ProcessingInstructionHandler = self._handle_pi - parser.XmlDeclHandler = self._handle_xml_decl - parser.CommentHandler = self._handle_comment - - # Tell Expat that we'll handle non-XML entities ourselves - # (in _handle_other) - parser.DefaultHandler = self._handle_other - parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS) - parser.UseForeignDTD() - parser.ExternalEntityRefHandler = self._build_foreign - - self.expat = parser - self._queue = [] - - def parse(self): - """Generator that parses the XML source, yielding markup events. - - :return: a markup event stream - :raises ParseError: if the XML text is not well formed - """ - def _generate(): - try: - bufsize = 4 * 1024 # 4K - done = False - while 1: - while not done and len(self._queue) == 0: - data = self.source.read(bufsize) - if data == '': # end of data - if hasattr(self, 'expat'): - self.expat.Parse('', True) - del self.expat # get rid of circular references - done = True - else: - if isinstance(data, unicode): - data = data.encode('utf-8') - self.expat.Parse(data, False) - for event in self._queue: - yield event - self._queue = [] - if done: - break - except expat.ExpatError, e: - msg = str(e) - raise ParseError(msg, self.filename, e.lineno, e.offset) - return Stream(_generate()).filter(_coalesce) - - def __iter__(self): - return iter(self.parse()) - - def _build_foreign(self, context, base, sysid, pubid): - parser = self.expat.ExternalEntityParserCreate(context) - parser.ParseFile(StringIO(self._external_dtd)) - return 1 - - def _enqueue(self, kind, data=None, pos=None): - if pos is None: - pos = self._getpos() - if kind is TEXT: - # Expat reports the *end* of the text event as current position. We - # try to fix that up here as much as possible. Unfortunately, the - # offset is only valid for single-line text. For multi-line text, - # it is apparently not possible to determine at what offset it - # started - if '\n' in data: - lines = data.splitlines() - lineno = pos[1] - len(lines) + 1 - offset = -1 - else: - lineno = pos[1] - offset = pos[2] - len(data) - pos = (pos[0], lineno, offset) - self._queue.append((kind, data, pos)) - - def _getpos_unknown(self): - return (self.filename, -1, -1) - - def _getpos(self): - return (self.filename, self.expat.CurrentLineNumber, - self.expat.CurrentColumnNumber) - - def _handle_start(self, tag, attrib): - attrs = Attrs([(QName(name), value) for name, value in - zip(*[iter(attrib)] * 2)]) - self._enqueue(START, (QName(tag), attrs)) - - def _handle_end(self, tag): - self._enqueue(END, QName(tag)) - - def _handle_data(self, text): - self._enqueue(TEXT, text) - - def _handle_xml_decl(self, version, encoding, standalone): - self._enqueue(XML_DECL, (version, encoding, standalone)) - - def _handle_doctype(self, name, sysid, pubid, has_internal_subset): - self._enqueue(DOCTYPE, (name, pubid, sysid)) - - def _handle_start_ns(self, prefix, uri): - self._enqueue(START_NS, (prefix or '', uri)) - - def _handle_end_ns(self, prefix): - self._enqueue(END_NS, prefix or '') - - def _handle_start_cdata(self): - self._enqueue(START_CDATA) - - def _handle_end_cdata(self): - self._enqueue(END_CDATA) - - def _handle_pi(self, target, data): - self._enqueue(PI, (target, data)) - - def _handle_comment(self, text): - self._enqueue(COMMENT, text) - - def _handle_other(self, text): - if text.startswith('&'): - # deal with undefined entities - try: - text = unichr(entities.name2codepoint[text[1:-1]]) - self._enqueue(TEXT, text) - except KeyError: - filename, lineno, offset = self._getpos() - error = expat.error('undefined entity "%s": line %d, column %d' - % (text, lineno, offset)) - error.code = expat.errors.XML_ERROR_UNDEFINED_ENTITY - error.lineno = lineno - error.offset = offset - raise error - - -def XML(text): - """Parse the given XML source and return a markup stream. - - Unlike with `XMLParser`, the returned stream is reusable, meaning it can be - iterated over multiple times: - - >>> xml = XML('<doc><elem>Foo</elem><elem>Bar</elem></doc>') - >>> print(xml) - <doc><elem>Foo</elem><elem>Bar</elem></doc> - >>> print(xml.select('elem')) - <elem>Foo</elem><elem>Bar</elem> - >>> print(xml.select('elem/text()')) - FooBar - - :param text: the XML source - :return: the parsed XML event stream - :raises ParseError: if the XML text is not well-formed - """ - return Stream(list(XMLParser(StringIO(text)))) - - -class HTMLParser(html.HTMLParser, object): - """Parser for HTML input based on the Python `HTMLParser` module. - - This class provides the same interface for generating stream events as - `XMLParser`, and attempts to automatically balance tags. - - The parsing is initiated by iterating over the parser object: - - >>> parser = HTMLParser(StringIO('<UL compact><LI>Foo</UL>')) - >>> for kind, data, pos in parser: - ... print('%s %s' % (kind, data)) - START (QName('ul'), Attrs([(QName('compact'), u'compact')])) - START (QName('li'), Attrs()) - TEXT Foo - END li - END ul - """ - - _EMPTY_ELEMS = frozenset(['area', 'base', 'basefont', 'br', 'col', 'frame', - 'hr', 'img', 'input', 'isindex', 'link', 'meta', - 'param']) - - def __init__(self, source, filename=None, encoding='utf-8'): - """Initialize the parser for the given HTML input. - - :param source: the HTML text as a file-like object - :param filename: the name of the file, if known - :param filename: encoding of the file; ignored if the input is unicode - """ - html.HTMLParser.__init__(self) - self.source = source - self.filename = filename - self.encoding = encoding - self._queue = [] - self._open_tags = [] - - def parse(self): - """Generator that parses the HTML source, yielding markup events. - - :return: a markup event stream - :raises ParseError: if the HTML text is not well formed - """ - def _generate(): - try: - bufsize = 4 * 1024 # 4K - done = False - while 1: - while not done and len(self._queue) == 0: - data = self.source.read(bufsize) - if data == '': # end of data - self.close() - done = True - else: - self.feed(data) - for kind, data, pos in self._queue: - yield kind, data, pos - self._queue = [] - if done: - open_tags = self._open_tags - open_tags.reverse() - for tag in open_tags: - yield END, QName(tag), pos - break - except html.HTMLParseError, e: - msg = '%s: line %d, column %d' % (e.msg, e.lineno, e.offset) - raise ParseError(msg, self.filename, e.lineno, e.offset) - return Stream(_generate()).filter(_coalesce) - - def __iter__(self): - return iter(self.parse()) - - def _enqueue(self, kind, data, pos=None): - if pos is None: - pos = self._getpos() - self._queue.append((kind, data, pos)) - - def _getpos(self): - lineno, column = self.getpos() - return (self.filename, lineno, column) - - def handle_starttag(self, tag, attrib): - fixed_attrib = [] - for name, value in attrib: # Fixup minimized attributes - if value is None: - value = unicode(name) - elif not isinstance(value, unicode): - value = value.decode(self.encoding, 'replace') - fixed_attrib.append((QName(name), stripentities(value))) - - self._enqueue(START, (QName(tag), Attrs(fixed_attrib))) - if tag in self._EMPTY_ELEMS: - self._enqueue(END, QName(tag)) - else: - self._open_tags.append(tag) - - def handle_endtag(self, tag): - if tag not in self._EMPTY_ELEMS: - while self._open_tags: - open_tag = self._open_tags.pop() - self._enqueue(END, QName(open_tag)) - if open_tag.lower() == tag.lower(): - break - - def handle_data(self, text): - if not isinstance(text, unicode): - text = text.decode(self.encoding, 'replace') - self._enqueue(TEXT, text) - - def handle_charref(self, name): - if name.lower().startswith('x'): - text = unichr(int(name[1:], 16)) - else: - text = unichr(int(name)) - self._enqueue(TEXT, text) - - def handle_entityref(self, name): - try: - text = unichr(entities.name2codepoint[name]) - except KeyError: - text = '&%s;' % name - self._enqueue(TEXT, text) - - def handle_pi(self, data): - target, data = data.split(None, 1) - if data.endswith('?'): - data = data[:-1] - self._enqueue(PI, (target.strip(), data.strip())) - - def handle_comment(self, text): - self._enqueue(COMMENT, text) - - -def HTML(text, encoding='utf-8'): - """Parse the given HTML source and return a markup stream. - - Unlike with `HTMLParser`, the returned stream is reusable, meaning it can be - iterated over multiple times: - - >>> html = HTML('<body><h1>Foo</h1></body>') - >>> print(html) - <body><h1>Foo</h1></body> - >>> print(html.select('h1')) - <h1>Foo</h1> - >>> print(html.select('h1/text()')) - Foo - - :param text: the HTML source - :return: the parsed XML event stream - :raises ParseError: if the HTML text is not well-formed, and error recovery - fails - """ - return Stream(list(HTMLParser(StringIO(text), encoding=encoding))) - - -def _coalesce(stream): - """Coalesces adjacent TEXT events into a single event.""" - textbuf = [] - textpos = None - for kind, data, pos in chain(stream, [(None, None, None)]): - if kind is TEXT: - textbuf.append(data) - if textpos is None: - textpos = pos - else: - if textbuf: - yield TEXT, ''.join(textbuf), textpos - del textbuf[:] - textpos = None - if kind: - yield kind, data, pos diff --git a/genshi/output.py b/genshi/output.py deleted file mode 100644 index 2ebb38b..0000000 --- a/genshi/output.py +++ /dev/null @@ -1,838 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2006-2009 Edgewall Software -# All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://genshi.edgewall.org/wiki/License. -# -# This software consists of voluntary contributions made by many -# individuals. For the exact contribution history, see the revision -# history and logs, available at http://genshi.edgewall.org/log/. - -"""This module provides different kinds of serialization methods for XML event -streams. -""" - -from itertools import chain -import re - -from genshi.core import escape, Attrs, Markup, Namespace, QName, StreamEventKind -from genshi.core import START, END, TEXT, XML_DECL, DOCTYPE, START_NS, END_NS, \ - START_CDATA, END_CDATA, PI, COMMENT, XML_NAMESPACE - -__all__ = ['encode', 'get_serializer', 'DocType', 'XMLSerializer', - 'XHTMLSerializer', 'HTMLSerializer', 'TextSerializer'] -__docformat__ = 'restructuredtext en' - - -def encode(iterator, method='xml', encoding='utf-8', out=None): - """Encode serializer output into a string. - - :param iterator: the iterator returned from serializing a stream (basically - any iterator that yields unicode objects) - :param method: the serialization method; determines how characters not - representable in the specified encoding are treated - :param encoding: how the output string should be encoded; if set to `None`, - this method returns a `unicode` object - :param out: a file-like object that the output should be written to - instead of being returned as one big string; note that if - this is a file or socket (or similar), the `encoding` must - not be `None` (that is, the output must be encoded) - :return: a `str` or `unicode` object (depending on the `encoding` - parameter), or `None` if the `out` parameter is provided - - :since: version 0.4.1 - :note: Changed in 0.5: added the `out` parameter - """ - if encoding is not None: - errors = 'replace' - if method != 'text' and not isinstance(method, TextSerializer): - errors = 'xmlcharrefreplace' - _encode = lambda string: string.encode(encoding, errors) - else: - _encode = lambda string: string - if out is None: - return _encode(''.join(list(iterator))) - for chunk in iterator: - out.write(_encode(chunk)) - - -def get_serializer(method='xml', **kwargs): - """Return a serializer object for the given method. - - :param method: the serialization method; can be either "xml", "xhtml", - "html", "text", or a custom serializer class - - Any additional keyword arguments are passed to the serializer, and thus - depend on the `method` parameter value. - - :see: `XMLSerializer`, `XHTMLSerializer`, `HTMLSerializer`, `TextSerializer` - :since: version 0.4.1 - """ - if isinstance(method, basestring): - method = {'xml': XMLSerializer, - 'xhtml': XHTMLSerializer, - 'html': HTMLSerializer, - 'text': TextSerializer}[method.lower()] - return method(**kwargs) - - -class DocType(object): - """Defines a number of commonly used DOCTYPE declarations as constants.""" - - HTML_STRICT = ( - 'html', '-//W3C//DTD HTML 4.01//EN', - 'http://www.w3.org/TR/html4/strict.dtd' - ) - HTML_TRANSITIONAL = ( - 'html', '-//W3C//DTD HTML 4.01 Transitional//EN', - 'http://www.w3.org/TR/html4/loose.dtd' - ) - HTML_FRAMESET = ( - 'html', '-//W3C//DTD HTML 4.01 Frameset//EN', - 'http://www.w3.org/TR/html4/frameset.dtd' - ) - HTML = HTML_STRICT - - HTML5 = ('html', None, None) - - XHTML_STRICT = ( - 'html', '-//W3C//DTD XHTML 1.0 Strict//EN', - 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd' - ) - XHTML_TRANSITIONAL = ( - 'html', '-//W3C//DTD XHTML 1.0 Transitional//EN', - 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd' - ) - XHTML_FRAMESET = ( - 'html', '-//W3C//DTD XHTML 1.0 Frameset//EN', - 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd' - ) - XHTML = XHTML_STRICT - - XHTML11 = ( - 'html', '-//W3C//DTD XHTML 1.1//EN', - 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' - ) - - SVG_FULL = ( - 'svg', '-//W3C//DTD SVG 1.1//EN', - 'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd' - ) - SVG_BASIC = ( - 'svg', '-//W3C//DTD SVG Basic 1.1//EN', - 'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-basic.dtd' - ) - SVG_TINY = ( - 'svg', '-//W3C//DTD SVG Tiny 1.1//EN', - 'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-tiny.dtd' - ) - SVG = SVG_FULL - - @classmethod - def get(cls, name): - """Return the ``(name, pubid, sysid)`` tuple of the ``DOCTYPE`` - declaration for the specified name. - - The following names are recognized in this version: - * "html" or "html-strict" for the HTML 4.01 strict DTD - * "html-transitional" for the HTML 4.01 transitional DTD - * "html-frameset" for the HTML 4.01 frameset DTD - * "html5" for the ``DOCTYPE`` proposed for HTML5 - * "xhtml" or "xhtml-strict" for the XHTML 1.0 strict DTD - * "xhtml-transitional" for the XHTML 1.0 transitional DTD - * "xhtml-frameset" for the XHTML 1.0 frameset DTD - * "xhtml11" for the XHTML 1.1 DTD - * "svg" or "svg-full" for the SVG 1.1 DTD - * "svg-basic" for the SVG Basic 1.1 DTD - * "svg-tiny" for the SVG Tiny 1.1 DTD - - :param name: the name of the ``DOCTYPE`` - :return: the ``(name, pubid, sysid)`` tuple for the requested - ``DOCTYPE``, or ``None`` if the name is not recognized - :since: version 0.4.1 - """ - return { - 'html': cls.HTML, 'html-strict': cls.HTML_STRICT, - 'html-transitional': DocType.HTML_TRANSITIONAL, - 'html-frameset': DocType.HTML_FRAMESET, - 'html5': cls.HTML5, - 'xhtml': cls.XHTML, 'xhtml-strict': cls.XHTML_STRICT, - 'xhtml-transitional': cls.XHTML_TRANSITIONAL, - 'xhtml-frameset': cls.XHTML_FRAMESET, - 'xhtml11': cls.XHTML11, - 'svg': cls.SVG, 'svg-full': cls.SVG_FULL, - 'svg-basic': cls.SVG_BASIC, - 'svg-tiny': cls.SVG_TINY - }.get(name.lower()) - - -class XMLSerializer(object): - """Produces XML text from an event stream. - - >>> from genshi.builder import tag - >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True)) - >>> print(''.join(XMLSerializer()(elem.generate()))) - <div><a href="foo"/><br/><hr noshade="True"/></div> - """ - - _PRESERVE_SPACE = frozenset() - - def __init__(self, doctype=None, strip_whitespace=True, - namespace_prefixes=None, cache=True): - """Initialize the XML serializer. - - :param doctype: a ``(name, pubid, sysid)`` tuple that represents the - DOCTYPE declaration that should be included at the top - of the generated output, or the name of a DOCTYPE as - defined in `DocType.get` - :param strip_whitespace: whether extraneous whitespace should be - stripped from the output - :param cache: whether to cache the text output per event, which - improves performance for repetitive markup - :note: Changed in 0.4.2: The `doctype` parameter can now be a string. - :note: Changed in 0.6: The `cache` parameter was added - """ - self.filters = [EmptyTagFilter()] - if strip_whitespace: - self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE)) - self.filters.append(NamespaceFlattener(prefixes=namespace_prefixes, - cache=cache)) - if doctype: - self.filters.append(DocTypeInserter(doctype)) - self.cache = cache - - def __call__(self, stream): - have_decl = have_doctype = False - in_cdata = False - - cache = {} - cache_get = cache.get - if self.cache: - def _emit(kind, input, output): - cache[kind, input] = output - return output - else: - def _emit(kind, input, output): - return output - - for filter_ in self.filters: - stream = filter_(stream) - for kind, data, pos in stream: - cached = cache_get((kind, data)) - if cached is not None: - yield cached - - elif kind is START or kind is EMPTY: - tag, attrib = data - buf = ['<', tag] - for attr, value in attrib: - buf += [' ', attr, '="', escape(value), '"'] - buf.append(kind is EMPTY and '/>' or '>') - yield _emit(kind, data, Markup(''.join(buf))) - - elif kind is END: - yield _emit(kind, data, Markup('</%s>' % data)) - - elif kind is TEXT: - if in_cdata: - yield _emit(kind, data, data) - else: - yield _emit(kind, data, escape(data, quotes=False)) - - elif kind is COMMENT: - yield _emit(kind, data, Markup('<!--%s-->' % data)) - - elif kind is XML_DECL and not have_decl: - version, encoding, standalone = data - buf = ['<?xml version="%s"' % version] - if encoding: - buf.append(' encoding="%s"' % encoding) - if standalone != -1: - standalone = standalone and 'yes' or 'no' - buf.append(' standalone="%s"' % standalone) - buf.append('?>\n') - yield Markup(''.join(buf)) - have_decl = True - - elif kind is DOCTYPE and not have_doctype: - name, pubid, sysid = data - buf = ['<!DOCTYPE %s'] - if pubid: - buf.append(' PUBLIC "%s"') - elif sysid: - buf.append(' SYSTEM') - if sysid: - buf.append(' "%s"') - buf.append('>\n') - yield Markup(''.join(buf)) % tuple([p for p in data if p]) - have_doctype = True - - elif kind is START_CDATA: - yield Markup('<![CDATA[') - in_cdata = True - - elif kind is END_CDATA: - yield Markup(']]>') - in_cdata = False - - elif kind is PI: - yield _emit(kind, data, Markup('<?%s %s?>' % data)) - - -class XHTMLSerializer(XMLSerializer): - """Produces XHTML text from an event stream. - - >>> from genshi.builder import tag - >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True)) - >>> print(''.join(XHTMLSerializer()(elem.generate()))) - <div><a href="foo"></a><br /><hr noshade="noshade" /></div> - """ - - _EMPTY_ELEMS = frozenset(['area', 'base', 'basefont', 'br', 'col', 'frame', - 'hr', 'img', 'input', 'isindex', 'link', 'meta', - 'param']) - _BOOLEAN_ATTRS = frozenset(['selected', 'checked', 'compact', 'declare', - 'defer', 'disabled', 'ismap', 'multiple', - 'nohref', 'noresize', 'noshade', 'nowrap']) - _PRESERVE_SPACE = frozenset([ - QName('pre'), QName('http://www.w3.org/1999/xhtml}pre'), - QName('textarea'), QName('http://www.w3.org/1999/xhtml}textarea') - ]) - - def __init__(self, doctype=None, strip_whitespace=True, - namespace_prefixes=None, drop_xml_decl=True, cache=True): - super(XHTMLSerializer, self).__init__(doctype, False) - self.filters = [EmptyTagFilter()] - if strip_whitespace: - self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE)) - namespace_prefixes = namespace_prefixes or {} - namespace_prefixes['http://www.w3.org/1999/xhtml'] = '' - self.filters.append(NamespaceFlattener(prefixes=namespace_prefixes, - cache=cache)) - if doctype: - self.filters.append(DocTypeInserter(doctype)) - self.drop_xml_decl = drop_xml_decl - self.cache = cache - - def __call__(self, stream): - boolean_attrs = self._BOOLEAN_ATTRS - empty_elems = self._EMPTY_ELEMS - drop_xml_decl = self.drop_xml_decl - have_decl = have_doctype = False - in_cdata = False - - cache = {} - cache_get = cache.get - if self.cache: - def _emit(kind, input, output): - cache[kind, input] = output - return output - else: - def _emit(kind, input, output): - return output - - for filter_ in self.filters: - stream = filter_(stream) - for kind, data, pos in stream: - cached = cache_get((kind, data)) - if cached is not None: - yield cached - - elif kind is START or kind is EMPTY: - tag, attrib = data - buf = ['<', tag] - for attr, value in attrib: - if attr in boolean_attrs: - value = attr - elif attr == 'xml:lang' and 'lang' not in attrib: - buf += [' lang="', escape(value), '"'] - elif attr == 'xml:space': - continue - buf += [' ', attr, '="', escape(value), '"'] - if kind is EMPTY: - if tag in empty_elems: - buf.append(' />') - else: - buf.append('></%s>' % tag) - else: - buf.append('>') - yield _emit(kind, data, Markup(''.join(buf))) - - elif kind is END: - yield _emit(kind, data, Markup('</%s>' % data)) - - elif kind is TEXT: - if in_cdata: - yield _emit(kind, data, data) - else: - yield _emit(kind, data, escape(data, quotes=False)) - - elif kind is COMMENT: - yield _emit(kind, data, Markup('<!--%s-->' % data)) - - elif kind is DOCTYPE and not have_doctype: - name, pubid, sysid = data - buf = ['<!DOCTYPE %s'] - if pubid: - buf.append(' PUBLIC "%s"') - elif sysid: - buf.append(' SYSTEM') - if sysid: - buf.append(' "%s"') - buf.append('>\n') - yield Markup(''.join(buf)) % tuple([p for p in data if p]) - have_doctype = True - - elif kind is XML_DECL and not have_decl and not drop_xml_decl: - version, encoding, standalone = data - buf = ['<?xml version="%s"' % version] - if encoding: - buf.append(' encoding="%s"' % encoding) - if standalone != -1: - standalone = standalone and 'yes' or 'no' - buf.append(' standalone="%s"' % standalone) - buf.append('?>\n') - yield Markup(''.join(buf)) - have_decl = True - - elif kind is START_CDATA: - yield Markup('<![CDATA[') - in_cdata = True - - elif kind is END_CDATA: - yield Markup(']]>') - in_cdata = False - - elif kind is PI: - yield _emit(kind, data, Markup('<?%s %s?>' % data)) - - -class HTMLSerializer(XHTMLSerializer): - """Produces HTML text from an event stream. - - >>> from genshi.builder import tag - >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True)) - >>> print(''.join(HTMLSerializer()(elem.generate()))) - <div><a href="foo"></a><br><hr noshade></div> - """ - - _NOESCAPE_ELEMS = frozenset([ - QName('script'), QName('http://www.w3.org/1999/xhtml}script'), - QName('style'), QName('http://www.w3.org/1999/xhtml}style') - ]) - - def __init__(self, doctype=None, strip_whitespace=True, cache=True): - """Initialize the HTML serializer. - - :param doctype: a ``(name, pubid, sysid)`` tuple that represents the - DOCTYPE declaration that should be included at the top - of the generated output - :param strip_whitespace: whether extraneous whitespace should be - stripped from the output - :param cache: whether to cache the text output per event, which - improves performance for repetitive markup - :note: Changed in 0.6: The `cache` parameter was added - """ - super(HTMLSerializer, self).__init__(doctype, False) - self.filters = [EmptyTagFilter()] - if strip_whitespace: - self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE, - self._NOESCAPE_ELEMS)) - self.filters.append(NamespaceFlattener(prefixes={ - 'http://www.w3.org/1999/xhtml': '' - }, cache=cache)) - if doctype: - self.filters.append(DocTypeInserter(doctype)) - self.cache = True - - def __call__(self, stream): - boolean_attrs = self._BOOLEAN_ATTRS - empty_elems = self._EMPTY_ELEMS - noescape_elems = self._NOESCAPE_ELEMS - have_doctype = False - noescape = False - - cache = {} - cache_get = cache.get - if self.cache: - def _emit(kind, input, output): - cache[kind, input] = output - return output - else: - def _emit(kind, input, output): - return output - - for filter_ in self.filters: - stream = filter_(stream) - for kind, data, _ in stream: - output = cache_get((kind, data)) - if output is not None: - yield output - if (kind is START or kind is EMPTY) \ - and data[0] in noescape_elems: - noescape = True - elif kind is END: - noescape = False - - elif kind is START or kind is EMPTY: - tag, attrib = data - buf = ['<', tag] - for attr, value in attrib: - if attr in boolean_attrs: - if value: - buf += [' ', attr] - elif ':' in attr: - if attr == 'xml:lang' and 'lang' not in attrib: - buf += [' lang="', escape(value), '"'] - elif attr != 'xmlns': - buf += [' ', attr, '="', escape(value), '"'] - buf.append('>') - if kind is EMPTY: - if tag not in empty_elems: - buf.append('</%s>' % tag) - yield _emit(kind, data, Markup(''.join(buf))) - if tag in noescape_elems: - noescape = True - - elif kind is END: - yield _emit(kind, data, Markup('</%s>' % data)) - noescape = False - - elif kind is TEXT: - if noescape: - yield _emit(kind, data, data) - else: - yield _emit(kind, data, escape(data, quotes=False)) - - elif kind is COMMENT: - yield _emit(kind, data, Markup('<!--%s-->' % data)) - - elif kind is DOCTYPE and not have_doctype: - name, pubid, sysid = data - buf = ['<!DOCTYPE %s'] - if pubid: - buf.append(' PUBLIC "%s"') - elif sysid: - buf.append(' SYSTEM') - if sysid: - buf.append(' "%s"') - buf.append('>\n') - yield Markup(''.join(buf)) % tuple([p for p in data if p]) - have_doctype = True - - elif kind is PI: - yield _emit(kind, data, Markup('<?%s %s?>' % data)) - - -class TextSerializer(object): - """Produces plain text from an event stream. - - Only text events are included in the output. Unlike the other serializer, - special XML characters are not escaped: - - >>> from genshi.builder import tag - >>> elem = tag.div(tag.a('<Hello!>', href='foo'), tag.br) - >>> print(elem) - <div><a href="foo"><Hello!></a><br/></div> - >>> print(''.join(TextSerializer()(elem.generate()))) - <Hello!> - - If text events contain literal markup (instances of the `Markup` class), - that markup is by default passed through unchanged: - - >>> elem = tag.div(Markup('<a href="foo">Hello & Bye!</a><br/>')) - >>> print(elem.generate().render(TextSerializer, encoding=None)) - <a href="foo">Hello & Bye!</a><br/> - - You can use the ``strip_markup`` to change this behavior, so that tags and - entities are stripped from the output (or in the case of entities, - replaced with the equivalent character): - - >>> print(elem.generate().render(TextSerializer, strip_markup=True, - ... encoding=None)) - Hello & Bye! - """ - - def __init__(self, strip_markup=False): - """Create the serializer. - - :param strip_markup: whether markup (tags and encoded characters) found - in the text should be removed - """ - self.strip_markup = strip_markup - - def __call__(self, stream): - strip_markup = self.strip_markup - for event in stream: - if event[0] is TEXT: - data = event[1] - if strip_markup and type(data) is Markup: - data = data.striptags().stripentities() - yield unicode(data) - - -class EmptyTagFilter(object): - """Combines `START` and `STOP` events into `EMPTY` events for elements that - have no contents. - """ - - EMPTY = StreamEventKind('EMPTY') - - def __call__(self, stream): - prev = (None, None, None) - for ev in stream: - if prev[0] is START: - if ev[0] is END: - prev = EMPTY, prev[1], prev[2] - yield prev - continue - else: - yield prev - if ev[0] is not START: - yield ev - prev = ev - - -EMPTY = EmptyTagFilter.EMPTY - - -class NamespaceFlattener(object): - r"""Output stream filter that removes namespace information from the stream, - instead adding namespace attributes and prefixes as needed. - - :param prefixes: optional mapping of namespace URIs to prefixes - - >>> from genshi.input import XML - >>> xml = XML('''<doc xmlns="NS1" xmlns:two="NS2"> - ... <two:item/> - ... </doc>''') - >>> for kind, data, pos in NamespaceFlattener()(xml): - ... print('%s %r' % (kind, data)) - START (u'doc', Attrs([('xmlns', u'NS1'), (u'xmlns:two', u'NS2')])) - TEXT u'\n ' - START (u'two:item', Attrs()) - END u'two:item' - TEXT u'\n' - END u'doc' - """ - - def __init__(self, prefixes=None, cache=True): - self.prefixes = {XML_NAMESPACE.uri: 'xml'} - if prefixes is not None: - self.prefixes.update(prefixes) - self.cache = cache - - def __call__(self, stream): - cache = {} - cache_get = cache.get - if self.cache: - def _emit(kind, input, output, pos): - cache[kind, input] = output - return kind, output, pos - else: - def _emit(kind, input, output, pos): - return output - - prefixes = dict([(v, [k]) for k, v in self.prefixes.items()]) - namespaces = {XML_NAMESPACE.uri: ['xml']} - def _push_ns(prefix, uri): - namespaces.setdefault(uri, []).append(prefix) - prefixes.setdefault(prefix, []).append(uri) - cache.clear() - def _pop_ns(prefix): - uris = prefixes.get(prefix) - uri = uris.pop() - if not uris: - del prefixes[prefix] - if uri not in uris or uri != uris[-1]: - uri_prefixes = namespaces[uri] - uri_prefixes.pop() - if not uri_prefixes: - del namespaces[uri] - cache.clear() - return uri - - ns_attrs = [] - _push_ns_attr = ns_attrs.append - def _make_ns_attr(prefix, uri): - return 'xmlns%s' % (prefix and ':%s' % prefix or ''), uri - - def _gen_prefix(): - val = 0 - while 1: - val += 1 - yield 'ns%d' % val - _gen_prefix = _gen_prefix().next - - for kind, data, pos in stream: - output = cache_get((kind, data)) - if output is not None: - yield kind, output, pos - - elif kind is START or kind is EMPTY: - tag, attrs = data - - tagname = tag.localname - tagns = tag.namespace - if tagns: - if tagns in namespaces: - prefix = namespaces[tagns][-1] - if prefix: - tagname = '%s:%s' % (prefix, tagname) - else: - _push_ns_attr(('xmlns', tagns)) - _push_ns('', tagns) - - new_attrs = [] - for attr, value in attrs: - attrname = attr.localname - attrns = attr.namespace - if attrns: - if attrns not in namespaces: - prefix = _gen_prefix() - _push_ns(prefix, attrns) - _push_ns_attr(('xmlns:%s' % prefix, attrns)) - else: - prefix = namespaces[attrns][-1] - if prefix: - attrname = '%s:%s' % (prefix, attrname) - new_attrs.append((attrname, value)) - - yield _emit(kind, data, (tagname, Attrs(ns_attrs + new_attrs)), pos) - del ns_attrs[:] - - elif kind is END: - tagname = data.localname - tagns = data.namespace - if tagns: - prefix = namespaces[tagns][-1] - if prefix: - tagname = '%s:%s' % (prefix, tagname) - yield _emit(kind, data, tagname, pos) - - elif kind is START_NS: - prefix, uri = data - if uri not in namespaces: - prefix = prefixes.get(uri, [prefix])[-1] - _push_ns_attr(_make_ns_attr(prefix, uri)) - _push_ns(prefix, uri) - - elif kind is END_NS: - if data in prefixes: - uri = _pop_ns(data) - if ns_attrs: - attr = _make_ns_attr(data, uri) - if attr in ns_attrs: - ns_attrs.remove(attr) - - else: - yield kind, data, pos - - -class WhitespaceFilter(object): - """A filter that removes extraneous ignorable white space from the - stream. - """ - - def __init__(self, preserve=None, noescape=None): - """Initialize the filter. - - :param preserve: a set or sequence of tag names for which white-space - should be preserved - :param noescape: a set or sequence of tag names for which text content - should not be escaped - - The `noescape` set is expected to refer to elements that cannot contain - further child elements (such as ``<style>`` or ``<script>`` in HTML - documents). - """ - if preserve is None: - preserve = [] - self.preserve = frozenset(preserve) - if noescape is None: - noescape = [] - self.noescape = frozenset(noescape) - - def __call__(self, stream, ctxt=None, space=XML_NAMESPACE['space'], - trim_trailing_space=re.compile('[ \t]+(?=\n)').sub, - collapse_lines=re.compile('\n{2,}').sub): - mjoin = Markup('').join - preserve_elems = self.preserve - preserve = 0 - noescape_elems = self.noescape - noescape = False - - textbuf = [] - push_text = textbuf.append - pop_text = textbuf.pop - for kind, data, pos in chain(stream, [(None, None, None)]): - - if kind is TEXT: - if noescape: - data = Markup(data) - push_text(data) - else: - if textbuf: - if len(textbuf) > 1: - text = mjoin(textbuf, escape_quotes=False) - del textbuf[:] - else: - text = escape(pop_text(), quotes=False) - if not preserve: - text = collapse_lines('\n', trim_trailing_space('', text)) - yield TEXT, Markup(text), pos - - if kind is START: - tag, attrs = data - if preserve or (tag in preserve_elems or - attrs.get(space) == 'preserve'): - preserve += 1 - if not noescape and tag in noescape_elems: - noescape = True - - elif kind is END: - noescape = False - if preserve: - preserve -= 1 - - elif kind is START_CDATA: - noescape = True - - elif kind is END_CDATA: - noescape = False - - if kind: - yield kind, data, pos - - -class DocTypeInserter(object): - """A filter that inserts the DOCTYPE declaration in the correct location, - after the XML declaration. - """ - def __init__(self, doctype): - """Initialize the filter. - - :param doctype: DOCTYPE as a string or DocType object. - """ - if isinstance(doctype, basestring): - doctype = DocType.get(doctype) - self.doctype_event = (DOCTYPE, doctype, (None, -1, -1)) - - def __call__(self, stream): - doctype_inserted = False - for kind, data, pos in stream: - if not doctype_inserted: - doctype_inserted = True - if kind is XML_DECL: - yield (kind, data, pos) - yield self.doctype_event - continue - yield self.doctype_event - - yield (kind, data, pos) - - if not doctype_inserted: - yield self.doctype_event diff --git a/genshi/path.py b/genshi/path.py deleted file mode 100644 index 122fbf0..0000000 --- a/genshi/path.py +++ /dev/null @@ -1,1528 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2006-2009 Edgewall Software -# All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://genshi.edgewall.org/wiki/License. -# -# This software consists of voluntary contributions made by many -# individuals. For the exact contribution history, see the revision -# history and logs, available at http://genshi.edgewall.org/log/. - -"""Basic support for evaluating XPath expressions against streams. - ->>> from genshi.input import XML ->>> doc = XML('''<doc> -... <items count="4"> -... <item status="new"> -... <summary>Foo</summary> -... </item> -... <item status="closed"> -... <summary>Bar</summary> -... </item> -... <item status="closed" resolution="invalid"> -... <summary>Baz</summary> -... </item> -... <item status="closed" resolution="fixed"> -... <summary>Waz</summary> -... </item> -... </items> -... </doc>''') ->>> print(doc.select('items/item[@status="closed" and ' -... '(@resolution="invalid" or not(@resolution))]/summary/text()')) -BarBaz - -Because the XPath engine operates on markup streams (as opposed to tree -structures), it only implements a subset of the full XPath 1.0 language. -""" - -from collections import deque -try: - reduce # builtin in Python < 3 -except NameError: - from functools import reduce -from math import ceil, floor -import operator -import re -from itertools import chain - -from genshi.core import Stream, Attrs, Namespace, QName -from genshi.core import START, END, TEXT, START_NS, END_NS, COMMENT, PI, \ - START_CDATA, END_CDATA - -__all__ = ['Path', 'PathSyntaxError'] -__docformat__ = 'restructuredtext en' - - -class Axis(object): - """Defines constants for the various supported XPath axes.""" - - ATTRIBUTE = 'attribute' - CHILD = 'child' - DESCENDANT = 'descendant' - DESCENDANT_OR_SELF = 'descendant-or-self' - SELF = 'self' - - @classmethod - def forname(cls, name): - """Return the axis constant for the given name, or `None` if no such - axis was defined. - """ - return getattr(cls, name.upper().replace('-', '_'), None) - - -ATTRIBUTE = Axis.ATTRIBUTE -CHILD = Axis.CHILD -DESCENDANT = Axis.DESCENDANT -DESCENDANT_OR_SELF = Axis.DESCENDANT_OR_SELF -SELF = Axis.SELF - - -class GenericStrategy(object): - - @classmethod - def supports(cls, path): - return True - - def __init__(self, path): - self.path = path - - def test(self, ignore_context): - p = self.path - if ignore_context: - if p[0][0] is ATTRIBUTE: - steps = [_DOTSLASHSLASH] + p - else: - steps = [(DESCENDANT_OR_SELF, p[0][1], p[0][2])] + p[1:] - elif p[0][0] is CHILD or p[0][0] is ATTRIBUTE \ - or p[0][0] is DESCENDANT: - steps = [_DOTSLASH] + p - else: - steps = p - - # for node it contains all positions of xpath expression - # where its child should start checking for matches - # with list of corresponding context counters - # there can be many of them, because position that is from - # descendant-like axis can be achieved from different nodes - # for example <a><a><b/></a></a> should match both //a//b[1] - # and //a//b[2] - # positions always form increasing sequence (invariant) - stack = [[(0, [[]])]] - - def _test(event, namespaces, variables, updateonly=False): - kind, data, pos = event[:3] - retval = None - - # Manage the stack that tells us "where we are" in the stream - if kind is END: - if stack: - stack.pop() - return None - if kind is START_NS or kind is END_NS \ - or kind is START_CDATA or kind is END_CDATA: - # should we make namespaces work? - return None - - pos_queue = deque([(pos, cou, []) for pos, cou in stack[-1]]) - next_pos = [] - - # length of real part of path - we omit attribute axis - real_len = len(steps) - ((steps[-1][0] == ATTRIBUTE) or 1 and 0) - last_checked = -1 - - # places where we have to check for match, are these - # provided by parent - while pos_queue: - x, pcou, mcou = pos_queue.popleft() - axis, nodetest, predicates = steps[x] - - # we need to push descendant-like positions from parent - # further - if (axis is DESCENDANT or axis is DESCENDANT_OR_SELF) and pcou: - if next_pos and next_pos[-1][0] == x: - next_pos[-1][1].extend(pcou) - else: - next_pos.append((x, pcou)) - - # nodetest first - if not nodetest(kind, data, pos, namespaces, variables): - continue - - # counters packs that were already bad - missed = set() - counters_len = len(pcou) + len(mcou) - - # number of counters - we have to create one - # for every context position based predicate - cnum = 0 - - # tells if we have match with position x - matched = True - - if predicates: - for predicate in predicates: - pretval = predicate(kind, data, pos, - namespaces, - variables) - if type(pretval) is float: # FIXME <- need to check - # this for other types that - # can be coerced to float - - # each counter pack needs to be checked - for i, cou in enumerate(chain(pcou, mcou)): - # it was bad before - if i in missed: - continue - - if len(cou) < cnum + 1: - cou.append(0) - cou[cnum] += 1 - - # it is bad now - if cou[cnum] != int(pretval): - missed.add(i) - - # none of counters pack was good - if len(missed) == counters_len: - pretval = False - cnum += 1 - - if not pretval: - matched = False - break - - if not matched: - continue - - # counter for next position with current node as context node - child_counter = [] - - if x + 1 == real_len: - # we reached end of expression, because x + 1 - # is equal to the length of expression - matched = True - axis, nodetest, predicates = steps[-1] - if axis is ATTRIBUTE: - matched = nodetest(kind, data, pos, namespaces, - variables) - if matched: - retval = matched - else: - next_axis = steps[x + 1][0] - - # if next axis allows matching self we have - # to add next position to our queue - if next_axis is DESCENDANT_OR_SELF or next_axis is SELF: - if not pos_queue or pos_queue[0][0] > x + 1: - pos_queue.appendleft((x + 1, [], [child_counter])) - else: - pos_queue[0][2].append(child_counter) - - # if axis is not self we have to add it to child's list - if next_axis is not SELF: - next_pos.append((x + 1, [child_counter])) - - if kind is START: - stack.append(next_pos) - - return retval - - return _test - - -class SimplePathStrategy(object): - """Strategy for path with only local names, attributes and text nodes.""" - - @classmethod - def supports(cls, path): - if path[0][0] is ATTRIBUTE: - return False - allowed_tests = (LocalNameTest, CommentNodeTest, TextNodeTest) - for _, nodetest, predicates in path: - if predicates: - return False - if not isinstance(nodetest, allowed_tests): - return False - return True - - def __init__(self, path): - # fragments is list of tuples (fragment, pi, attr, self_beginning) - # fragment is list of nodetests for fragment of path with only - # child:: axes between - # pi is KMP partial match table for this fragment - # attr is attribute nodetest if fragment ends with @ and None otherwise - # self_beginning is True if axis for first fragment element - # was self (first fragment) or descendant-or-self (farther fragment) - self.fragments = [] - - self_beginning = False - fragment = [] - - def nodes_equal(node1, node2): - """Tests if two node tests are equal""" - if type(node1) is not type(node2): - return False - if type(node1) == LocalNameTest: - return node1.name == node2.name - return True - - def calculate_pi(f): - """KMP prefix calculation for table""" - # the indexes in prefix table are shifted by one - # in comparision with common implementations - # pi[i] = NORMAL_PI[i + 1] - if len(f) == 0: - return [] - pi = [0] - s = 0 - for i in range(1, len(f)): - while s > 0 and not nodes_equal(f[s], f[i]): - s = pi[s-1] - if nodes_equal(f[s], f[i]): - s += 1 - pi.append(s) - return pi - - for axis in path: - if axis[0] is SELF: - if len(fragment) != 0: - # if element is not first in fragment it has to be - # the same as previous one - # for example child::a/self::b is always wrong - if axis[1] != fragment[-1][1]: - self.fragments = None - return - else: - self_beginning = True - fragment.append(axis[1]) - elif axis[0] is CHILD: - fragment.append(axis[1]) - elif axis[0] is ATTRIBUTE: - pi = calculate_pi(fragment) - self.fragments.append((fragment, pi, axis[1], self_beginning)) - # attribute has always to be at the end, so we can jump out - return - else: - pi = calculate_pi(fragment) - self.fragments.append((fragment, pi, None, self_beginning)) - fragment = [axis[1]] - if axis[0] is DESCENDANT: - self_beginning = False - else: # DESCENDANT_OR_SELF - self_beginning = True - pi = calculate_pi(fragment) - self.fragments.append((fragment, pi, None, self_beginning)) - - def test(self, ignore_context): - # stack of triples (fid, p, ic) - # fid is index of current fragment - # p is position in this fragment - # ic is if we ignore context in this fragment - stack = [] - stack_push = stack.append - stack_pop = stack.pop - frags = self.fragments - frags_len = len(frags) - - def _test(event, namespaces, variables, updateonly=False): - # expression found impossible during init - if frags is None: - return None - - kind, data, pos = event[:3] - - # skip events we don't care about - if kind is END: - if stack: - stack_pop() - return None - if kind is START_NS or kind is END_NS \ - or kind is START_CDATA or kind is END_CDATA: - return None - - if not stack: - # root node, nothing on stack, special case - fid = 0 - # skip empty fragments (there can be actually only one) - while not frags[fid][0]: - fid += 1 - p = 0 - # empty fragment means descendant node at beginning - ic = ignore_context or (fid > 0) - - # expression can match first node, if first axis is self::, - # descendant-or-self:: or if ignore_context is True and - # axis is not descendant:: - if not frags[fid][3] and (not ignore_context or fid > 0): - # axis is not self-beggining, we have to skip this node - stack_push((fid, p, ic)) - return None - else: - # take position of parent - fid, p, ic = stack[-1] - - if fid is not None and not ic: - # fragment not ignoring context - we can't jump back - frag, pi, attrib, _ = frags[fid] - frag_len = len(frag) - - if p == frag_len: - # that probably means empty first fragment - pass - elif frag[p](kind, data, pos, namespaces, variables): - # match, so we can go further - p += 1 - else: - # not matched, so there will be no match in subtree - fid, p = None, None - - if p == frag_len and fid + 1 != frags_len: - # we made it to end of fragment, we can go to following - fid += 1 - p = 0 - ic = True - - if fid is None: - # there was no match in fragment not ignoring context - if kind is START: - stack_push((fid, p, ic)) - return None - - if ic: - # we are in fragment ignoring context - while True: - frag, pi, attrib, _ = frags[fid] - frag_len = len(frag) - - # KMP new "character" - while p > 0 and (p >= frag_len or not \ - frag[p](kind, data, pos, namespaces, variables)): - p = pi[p-1] - if frag[p](kind, data, pos, namespaces, variables): - p += 1 - - if p == frag_len: - # end of fragment reached - if fid + 1 == frags_len: - # that was last fragment - break - else: - fid += 1 - p = 0 - ic = True - if not frags[fid][3]: - # next fragment not self-beginning - break - else: - break - - if kind is START: - # we have to put new position on stack, for children - - if not ic and fid + 1 == frags_len and p == frag_len: - # it is end of the only, not context ignoring fragment - # so there will be no matches in subtree - stack_push((None, None, ic)) - else: - stack_push((fid, p, ic)) - - # have we reached the end of the last fragment? - if fid + 1 == frags_len and p == frag_len: - if attrib: # attribute ended path, return value - return attrib(kind, data, pos, namespaces, variables) - return True - - return None - - return _test - - -class SingleStepStrategy(object): - - @classmethod - def supports(cls, path): - return len(path) == 1 - - def __init__(self, path): - self.path = path - - def test(self, ignore_context): - steps = self.path - if steps[0][0] is ATTRIBUTE: - steps = [_DOTSLASH] + steps - select_attr = steps[-1][0] is ATTRIBUTE and steps[-1][1] or None - - # for every position in expression stores counters' list - # it is used for position based predicates - counters = [] - depth = [0] - - def _test(event, namespaces, variables, updateonly=False): - kind, data, pos = event[:3] - - # Manage the stack that tells us "where we are" in the stream - if kind is END: - if not ignore_context: - depth[0] -= 1 - return None - elif kind is START_NS or kind is END_NS \ - or kind is START_CDATA or kind is END_CDATA: - # should we make namespaces work? - return None - - if not ignore_context: - outside = (steps[0][0] is SELF and depth[0] != 0) \ - or (steps[0][0] is CHILD and depth[0] != 1) \ - or (steps[0][0] is DESCENDANT and depth[0] < 1) - if kind is START: - depth[0] += 1 - if outside: - return None - - axis, nodetest, predicates = steps[0] - if not nodetest(kind, data, pos, namespaces, variables): - return None - - if predicates: - cnum = 0 - for predicate in predicates: - pretval = predicate(kind, data, pos, namespaces, variables) - if type(pretval) is float: # FIXME <- need to check this - # for other types that can be - # coerced to float - if len(counters) < cnum + 1: - counters.append(0) - counters[cnum] += 1 - if counters[cnum] != int(pretval): - pretval = False - cnum += 1 - if not pretval: - return None - - if select_attr: - return select_attr(kind, data, pos, namespaces, variables) - - return True - - return _test - - -class Path(object): - """Implements basic XPath support on streams. - - Instances of this class represent a "compiled" XPath expression, and - provide methods for testing the path against a stream, as well as - extracting a substream matching that path. - """ - - STRATEGIES = (SingleStepStrategy, SimplePathStrategy, GenericStrategy) - - def __init__(self, text, filename=None, lineno=-1): - """Create the path object from a string. - - :param text: the path expression - :param filename: the name of the file in which the path expression was - found (used in error messages) - :param lineno: the line on which the expression was found - """ - self.source = text - self.paths = PathParser(text, filename, lineno).parse() - self.strategies = [] - for path in self.paths: - for strategy_class in self.STRATEGIES: - if strategy_class.supports(path): - self.strategies.append(strategy_class(path)) - break - else: - raise NotImplemented('No strategy found for path') - - def __repr__(self): - paths = [] - for path in self.paths: - steps = [] - for axis, nodetest, predicates in path: - steps.append('%s::%s' % (axis, nodetest)) - for predicate in predicates: - steps[-1] += '[%s]' % predicate - paths.append('/'.join(steps)) - return '<%s "%s">' % (type(self).__name__, '|'.join(paths)) - - def select(self, stream, namespaces=None, variables=None): - """Returns a substream of the given stream that matches the path. - - If there are no matches, this method returns an empty stream. - - >>> from genshi.input import XML - >>> xml = XML('<root><elem><child>Text</child></elem></root>') - - >>> print(Path('.//child').select(xml)) - <child>Text</child> - - >>> print(Path('.//child/text()').select(xml)) - Text - - :param stream: the stream to select from - :param namespaces: (optional) a mapping of namespace prefixes to URIs - :param variables: (optional) a mapping of variable names to values - :return: the substream matching the path, or an empty stream - :rtype: `Stream` - """ - if namespaces is None: - namespaces = {} - if variables is None: - variables = {} - stream = iter(stream) - def _generate(stream=stream, ns=namespaces, vs=variables): - next = stream.next - test = self.test() - for event in stream: - result = test(event, ns, vs) - if result is True: - yield event - if event[0] is START: - depth = 1 - while depth > 0: - subevent = next() - if subevent[0] is START: - depth += 1 - elif subevent[0] is END: - depth -= 1 - yield subevent - test(subevent, ns, vs, updateonly=True) - elif result: - yield result - return Stream(_generate(), - serializer=getattr(stream, 'serializer', None)) - - def test(self, ignore_context=False): - """Returns a function that can be used to track whether the path matches - a specific stream event. - - The function returned expects the positional arguments ``event``, - ``namespaces`` and ``variables``. The first is a stream event, while the - latter two are a mapping of namespace prefixes to URIs, and a mapping - of variable names to values, respectively. In addition, the function - accepts an ``updateonly`` keyword argument that default to ``False``. If - it is set to ``True``, the function only updates its internal state, - but does not perform any tests or return a result. - - If the path matches the event, the function returns the match (for - example, a `START` or `TEXT` event.) Otherwise, it returns ``None``. - - >>> from genshi.input import XML - >>> xml = XML('<root><elem><child id="1"/></elem><child id="2"/></root>') - >>> test = Path('child').test() - >>> namespaces, variables = {}, {} - >>> for event in xml: - ... if test(event, namespaces, variables): - ... print('%s %r' % (event[0], event[1])) - START (QName('child'), Attrs([(QName('id'), u'2')])) - - :param ignore_context: if `True`, the path is interpreted like a pattern - in XSLT, meaning for example that it will match - at any depth - :return: a function that can be used to test individual events in a - stream against the path - :rtype: ``function`` - """ - tests = [s.test(ignore_context) for s in self.strategies] - if len(tests) == 1: - return tests[0] - - def _multi(event, namespaces, variables, updateonly=False): - retval = None - for test in tests: - val = test(event, namespaces, variables, updateonly=updateonly) - if retval is None: - retval = val - return retval - return _multi - - -class PathSyntaxError(Exception): - """Exception raised when an XPath expression is syntactically incorrect.""" - - def __init__(self, message, filename=None, lineno=-1, offset=-1): - if filename: - message = '%s (%s, line %d)' % (message, filename, lineno) - Exception.__init__(self, message) - self.filename = filename - self.lineno = lineno - self.offset = offset - - -class PathParser(object): - """Tokenizes and parses an XPath expression.""" - - _QUOTES = (("'", "'"), ('"', '"')) - _TOKENS = ('::', ':', '..', '.', '//', '/', '[', ']', '()', '(', ')', '@', - '=', '!=', '!', '|', ',', '>=', '>', '<=', '<', '$') - _tokenize = re.compile('("[^"]*")|(\'[^\']*\')|((?:\d+)?\.\d+)|(%s)|([^%s\s]+)|\s+' % ( - '|'.join([re.escape(t) for t in _TOKENS]), - ''.join([re.escape(t[0]) for t in _TOKENS]))).findall - - def __init__(self, text, filename=None, lineno=-1): - self.filename = filename - self.lineno = lineno - self.tokens = [t for t in [dqstr or sqstr or number or token or name - for dqstr, sqstr, number, token, name in - self._tokenize(text)] if t] - self.pos = 0 - - # Tokenizer - - @property - def at_end(self): - return self.pos == len(self.tokens) - 1 - - @property - def cur_token(self): - return self.tokens[self.pos] - - def next_token(self): - self.pos += 1 - return self.tokens[self.pos] - - def peek_token(self): - if not self.at_end: - return self.tokens[self.pos + 1] - return None - - # Recursive descent parser - - def parse(self): - """Parses the XPath expression and returns a list of location path - tests. - - For union expressions (such as `*|text()`), this function returns one - test for each operand in the union. For patch expressions that don't - use the union operator, the function always returns a list of size 1. - - Each path test in turn is a sequence of tests that correspond to the - location steps, each tuples of the form `(axis, testfunc, predicates)` - """ - paths = [self._location_path()] - while self.cur_token == '|': - self.next_token() - paths.append(self._location_path()) - if not self.at_end: - raise PathSyntaxError('Unexpected token %r after end of expression' - % self.cur_token, self.filename, self.lineno) - return paths - - def _location_path(self): - steps = [] - while True: - if self.cur_token.startswith('/'): - if not steps: - if self.cur_token == '//': - # hack to make //* match every node - also root - self.next_token() - axis, nodetest, predicates = self._location_step() - steps.append((DESCENDANT_OR_SELF, nodetest, - predicates)) - if self.at_end or not self.cur_token.startswith('/'): - break - continue - else: - raise PathSyntaxError('Absolute location paths not ' - 'supported', self.filename, - self.lineno) - elif self.cur_token == '//': - steps.append((DESCENDANT_OR_SELF, NodeTest(), [])) - self.next_token() - - axis, nodetest, predicates = self._location_step() - if not axis: - axis = CHILD - steps.append((axis, nodetest, predicates)) - if self.at_end or not self.cur_token.startswith('/'): - break - - return steps - - def _location_step(self): - if self.cur_token == '@': - axis = ATTRIBUTE - self.next_token() - elif self.cur_token == '.': - axis = SELF - elif self.cur_token == '..': - raise PathSyntaxError('Unsupported axis "parent"', self.filename, - self.lineno) - elif self.peek_token() == '::': - axis = Axis.forname(self.cur_token) - if axis is None: - raise PathSyntaxError('Unsupport axis "%s"' % axis, - self.filename, self.lineno) - self.next_token() - self.next_token() - else: - axis = None - nodetest = self._node_test(axis or CHILD) - predicates = [] - while self.cur_token == '[': - predicates.append(self._predicate()) - return axis, nodetest, predicates - - def _node_test(self, axis=None): - test = prefix = None - next_token = self.peek_token() - if next_token in ('(', '()'): # Node type test - test = self._node_type() - - elif next_token == ':': # Namespace prefix - prefix = self.cur_token - self.next_token() - localname = self.next_token() - if localname == '*': - test = QualifiedPrincipalTypeTest(axis, prefix) - else: - test = QualifiedNameTest(axis, prefix, localname) - - else: # Name test - if self.cur_token == '*': - test = PrincipalTypeTest(axis) - elif self.cur_token == '.': - test = NodeTest() - else: - test = LocalNameTest(axis, self.cur_token) - - if not self.at_end: - self.next_token() - return test - - def _node_type(self): - name = self.cur_token - self.next_token() - - args = [] - if self.cur_token != '()': - # The processing-instruction() function optionally accepts the - # name of the PI as argument, which must be a literal string - self.next_token() # ( - if self.cur_token != ')': - string = self.cur_token - if (string[0], string[-1]) in self._QUOTES: - string = string[1:-1] - args.append(string) - - cls = _nodetest_map.get(name) - if not cls: - raise PathSyntaxError('%s() not allowed here' % name, self.filename, - self.lineno) - return cls(*args) - - def _predicate(self): - assert self.cur_token == '[' - self.next_token() - expr = self._or_expr() - if self.cur_token != ']': - raise PathSyntaxError('Expected "]" to close predicate, ' - 'but found "%s"' % self.cur_token, - self.filename, self.lineno) - if not self.at_end: - self.next_token() - return expr - - def _or_expr(self): - expr = self._and_expr() - while self.cur_token == 'or': - self.next_token() - expr = OrOperator(expr, self._and_expr()) - return expr - - def _and_expr(self): - expr = self._equality_expr() - while self.cur_token == 'and': - self.next_token() - expr = AndOperator(expr, self._equality_expr()) - return expr - - def _equality_expr(self): - expr = self._relational_expr() - while self.cur_token in ('=', '!='): - op = _operator_map[self.cur_token] - self.next_token() - expr = op(expr, self._relational_expr()) - return expr - - def _relational_expr(self): - expr = self._sub_expr() - while self.cur_token in ('>', '>=', '<', '>='): - op = _operator_map[self.cur_token] - self.next_token() - expr = op(expr, self._sub_expr()) - return expr - - def _sub_expr(self): - token = self.cur_token - if token != '(': - return self._primary_expr() - self.next_token() - expr = self._or_expr() - if self.cur_token != ')': - raise PathSyntaxError('Expected ")" to close sub-expression, ' - 'but found "%s"' % self.cur_token, - self.filename, self.lineno) - self.next_token() - return expr - - def _primary_expr(self): - token = self.cur_token - if len(token) > 1 and (token[0], token[-1]) in self._QUOTES: - self.next_token() - return StringLiteral(token[1:-1]) - elif token[0].isdigit() or token[0] == '.': - self.next_token() - return NumberLiteral(as_float(token)) - elif token == '$': - token = self.next_token() - self.next_token() - return VariableReference(token) - elif not self.at_end and self.peek_token().startswith('('): - return self._function_call() - else: - axis = None - if token == '@': - axis = ATTRIBUTE - self.next_token() - return self._node_test(axis) - - def _function_call(self): - name = self.cur_token - if self.next_token() == '()': - args = [] - else: - assert self.cur_token == '(' - self.next_token() - args = [self._or_expr()] - while self.cur_token == ',': - self.next_token() - args.append(self._or_expr()) - if not self.cur_token == ')': - raise PathSyntaxError('Expected ")" to close function argument ' - 'list, but found "%s"' % self.cur_token, - self.filename, self.lineno) - self.next_token() - cls = _function_map.get(name) - if not cls: - raise PathSyntaxError('Unsupported function "%s"' % name, - self.filename, self.lineno) - return cls(*args) - - -# Type coercion - -def as_scalar(value): - """Convert value to a scalar. If a single element Attrs() object is passed - the value of the single attribute will be returned.""" - if isinstance(value, Attrs): - assert len(value) == 1 - return value[0][1] - else: - return value - -def as_float(value): - # FIXME - if value is a bool it will be coerced to 0.0 and consequently - # compared as a float. This is probably not ideal. - return float(as_scalar(value)) - -def as_long(value): - return long(as_scalar(value)) - -def as_string(value): - value = as_scalar(value) - if value is False: - return '' - return unicode(value) - -def as_bool(value): - return bool(as_scalar(value)) - - -# Node tests - -class PrincipalTypeTest(object): - """Node test that matches any event with the given principal type.""" - __slots__ = ['principal_type'] - def __init__(self, principal_type): - self.principal_type = principal_type - def __call__(self, kind, data, pos, namespaces, variables): - if kind is START: - if self.principal_type is ATTRIBUTE: - return data[1] or None - else: - return True - def __repr__(self): - return '*' - -class QualifiedPrincipalTypeTest(object): - """Node test that matches any event with the given principal type in a - specific namespace.""" - __slots__ = ['principal_type', 'prefix'] - def __init__(self, principal_type, prefix): - self.principal_type = principal_type - self.prefix = prefix - def __call__(self, kind, data, pos, namespaces, variables): - namespace = Namespace(namespaces.get(self.prefix)) - if kind is START: - if self.principal_type is ATTRIBUTE and data[1]: - return Attrs([(name, value) for name, value in data[1] - if name in namespace]) or None - else: - return data[0] in namespace - def __repr__(self): - return '%s:*' % self.prefix - -class LocalNameTest(object): - """Node test that matches any event with the given principal type and - local name. - """ - __slots__ = ['principal_type', 'name'] - def __init__(self, principal_type, name): - self.principal_type = principal_type - self.name = name - def __call__(self, kind, data, pos, namespaces, variables): - if kind is START: - if self.principal_type is ATTRIBUTE and self.name in data[1]: - return Attrs([(self.name, data[1].get(self.name))]) - else: - return data[0].localname == self.name - def __repr__(self): - return self.name - -class QualifiedNameTest(object): - """Node test that matches any event with the given principal type and - qualified name. - """ - __slots__ = ['principal_type', 'prefix', 'name'] - def __init__(self, principal_type, prefix, name): - self.principal_type = principal_type - self.prefix = prefix - self.name = name - def __call__(self, kind, data, pos, namespaces, variables): - qname = QName('%s}%s' % (namespaces.get(self.prefix), self.name)) - if kind is START: - if self.principal_type is ATTRIBUTE and qname in data[1]: - return Attrs([(self.name, data[1].get(self.name))]) - else: - return data[0] == qname - def __repr__(self): - return '%s:%s' % (self.prefix, self.name) - -class CommentNodeTest(object): - """Node test that matches any comment events.""" - __slots__ = [] - def __call__(self, kind, data, pos, namespaces, variables): - return kind is COMMENT - def __repr__(self): - return 'comment()' - -class NodeTest(object): - """Node test that matches any node.""" - __slots__ = [] - def __call__(self, kind, data, pos, namespaces, variables): - if kind is START: - return True - return kind, data, pos - def __repr__(self): - return 'node()' - -class ProcessingInstructionNodeTest(object): - """Node test that matches any processing instruction event.""" - __slots__ = ['target'] - def __init__(self, target=None): - self.target = target - def __call__(self, kind, data, pos, namespaces, variables): - return kind is PI and (not self.target or data[0] == self.target) - def __repr__(self): - arg = '' - if self.target: - arg = '"' + self.target + '"' - return 'processing-instruction(%s)' % arg - -class TextNodeTest(object): - """Node test that matches any text event.""" - __slots__ = [] - def __call__(self, kind, data, pos, namespaces, variables): - return kind is TEXT - def __repr__(self): - return 'text()' - -_nodetest_map = {'comment': CommentNodeTest, 'node': NodeTest, - 'processing-instruction': ProcessingInstructionNodeTest, - 'text': TextNodeTest} - -# Functions - -class Function(object): - """Base class for function nodes in XPath expressions.""" - -class BooleanFunction(Function): - """The `boolean` function, which converts its argument to a boolean - value. - """ - __slots__ = ['expr'] - _return_type = bool - def __init__(self, expr): - self.expr = expr - def __call__(self, kind, data, pos, namespaces, variables): - val = self.expr(kind, data, pos, namespaces, variables) - return as_bool(val) - def __repr__(self): - return 'boolean(%r)' % self.expr - -class CeilingFunction(Function): - """The `ceiling` function, which returns the nearest lower integer number - for the given number. - """ - __slots__ = ['number'] - def __init__(self, number): - self.number = number - def __call__(self, kind, data, pos, namespaces, variables): - number = self.number(kind, data, pos, namespaces, variables) - return ceil(as_float(number)) - def __repr__(self): - return 'ceiling(%r)' % self.number - -class ConcatFunction(Function): - """The `concat` function, which concatenates (joins) the variable number of - strings it gets as arguments. - """ - __slots__ = ['exprs'] - def __init__(self, *exprs): - self.exprs = exprs - def __call__(self, kind, data, pos, namespaces, variables): - strings = [] - for item in [expr(kind, data, pos, namespaces, variables) - for expr in self.exprs]: - strings.append(as_string(item)) - return ''.join(strings) - def __repr__(self): - return 'concat(%s)' % ', '.join([repr(expr) for expr in self.exprs]) - -class ContainsFunction(Function): - """The `contains` function, which returns whether a string contains a given - substring. - """ - __slots__ = ['string1', 'string2'] - def __init__(self, string1, string2): - self.string1 = string1 - self.string2 = string2 - def __call__(self, kind, data, pos, namespaces, variables): - string1 = self.string1(kind, data, pos, namespaces, variables) - string2 = self.string2(kind, data, pos, namespaces, variables) - return as_string(string2) in as_string(string1) - def __repr__(self): - return 'contains(%r, %r)' % (self.string1, self.string2) - -class MatchesFunction(Function): - """The `matches` function, which returns whether a string matches a regular - expression. - """ - __slots__ = ['string1', 'string2'] - flag_mapping = {'s': re.S, 'm': re.M, 'i': re.I, 'x': re.X} - - def __init__(self, string1, string2, flags=''): - self.string1 = string1 - self.string2 = string2 - self.flags = self._map_flags(flags) - def __call__(self, kind, data, pos, namespaces, variables): - string1 = as_string(self.string1(kind, data, pos, namespaces, variables)) - string2 = as_string(self.string2(kind, data, pos, namespaces, variables)) - return re.search(string2, string1, self.flags) - def _map_flags(self, flags): - return reduce(operator.or_, - [self.flag_map[flag] for flag in flags], re.U) - def __repr__(self): - return 'contains(%r, %r)' % (self.string1, self.string2) - -class FalseFunction(Function): - """The `false` function, which always returns the boolean `false` value.""" - __slots__ = [] - def __call__(self, kind, data, pos, namespaces, variables): - return False - def __repr__(self): - return 'false()' - -class FloorFunction(Function): - """The `ceiling` function, which returns the nearest higher integer number - for the given number. - """ - __slots__ = ['number'] - def __init__(self, number): - self.number = number - def __call__(self, kind, data, pos, namespaces, variables): - number = self.number(kind, data, pos, namespaces, variables) - return floor(as_float(number)) - def __repr__(self): - return 'floor(%r)' % self.number - -class LocalNameFunction(Function): - """The `local-name` function, which returns the local name of the current - element. - """ - __slots__ = [] - def __call__(self, kind, data, pos, namespaces, variables): - if kind is START: - return data[0].localname - def __repr__(self): - return 'local-name()' - -class NameFunction(Function): - """The `name` function, which returns the qualified name of the current - element. - """ - __slots__ = [] - def __call__(self, kind, data, pos, namespaces, variables): - if kind is START: - return data[0] - def __repr__(self): - return 'name()' - -class NamespaceUriFunction(Function): - """The `namespace-uri` function, which returns the namespace URI of the - current element. - """ - __slots__ = [] - def __call__(self, kind, data, pos, namespaces, variables): - if kind is START: - return data[0].namespace - def __repr__(self): - return 'namespace-uri()' - -class NotFunction(Function): - """The `not` function, which returns the negated boolean value of its - argument. - """ - __slots__ = ['expr'] - def __init__(self, expr): - self.expr = expr - def __call__(self, kind, data, pos, namespaces, variables): - return not as_bool(self.expr(kind, data, pos, namespaces, variables)) - def __repr__(self): - return 'not(%s)' % self.expr - -class NormalizeSpaceFunction(Function): - """The `normalize-space` function, which removes leading and trailing - whitespace in the given string, and replaces multiple adjacent whitespace - characters inside the string with a single space. - """ - __slots__ = ['expr'] - _normalize = re.compile(r'\s{2,}').sub - def __init__(self, expr): - self.expr = expr - def __call__(self, kind, data, pos, namespaces, variables): - string = self.expr(kind, data, pos, namespaces, variables) - return self._normalize(' ', as_string(string).strip()) - def __repr__(self): - return 'normalize-space(%s)' % repr(self.expr) - -class NumberFunction(Function): - """The `number` function that converts its argument to a number.""" - __slots__ = ['expr'] - def __init__(self, expr): - self.expr = expr - def __call__(self, kind, data, pos, namespaces, variables): - val = self.expr(kind, data, pos, namespaces, variables) - return as_float(val) - def __repr__(self): - return 'number(%r)' % self.expr - -class RoundFunction(Function): - """The `round` function, which returns the nearest integer number for the - given number. - """ - __slots__ = ['number'] - def __init__(self, number): - self.number = number - def __call__(self, kind, data, pos, namespaces, variables): - number = self.number(kind, data, pos, namespaces, variables) - return round(as_float(number)) - def __repr__(self): - return 'round(%r)' % self.number - -class StartsWithFunction(Function): - """The `starts-with` function that returns whether one string starts with - a given substring. - """ - __slots__ = ['string1', 'string2'] - def __init__(self, string1, string2): - self.string1 = string1 - self.string2 = string2 - def __call__(self, kind, data, pos, namespaces, variables): - string1 = self.string1(kind, data, pos, namespaces, variables) - string2 = self.string2(kind, data, pos, namespaces, variables) - return as_string(string1).startswith(as_string(string2)) - def __repr__(self): - return 'starts-with(%r, %r)' % (self.string1, self.string2) - -class StringLengthFunction(Function): - """The `string-length` function that returns the length of the given - string. - """ - __slots__ = ['expr'] - def __init__(self, expr): - self.expr = expr - def __call__(self, kind, data, pos, namespaces, variables): - string = self.expr(kind, data, pos, namespaces, variables) - return len(as_string(string)) - def __repr__(self): - return 'string-length(%r)' % self.expr - -class SubstringFunction(Function): - """The `substring` function that returns the part of a string that starts - at the given offset, and optionally limited to the given length. - """ - __slots__ = ['string', 'start', 'length'] - def __init__(self, string, start, length=None): - self.string = string - self.start = start - self.length = length - def __call__(self, kind, data, pos, namespaces, variables): - string = self.string(kind, data, pos, namespaces, variables) - start = self.start(kind, data, pos, namespaces, variables) - length = 0 - if self.length is not None: - length = self.length(kind, data, pos, namespaces, variables) - return string[as_long(start):len(as_string(string)) - as_long(length)] - def __repr__(self): - if self.length is not None: - return 'substring(%r, %r, %r)' % (self.string, self.start, - self.length) - else: - return 'substring(%r, %r)' % (self.string, self.start) - -class SubstringAfterFunction(Function): - """The `substring-after` function that returns the part of a string that - is found after the given substring. - """ - __slots__ = ['string1', 'string2'] - def __init__(self, string1, string2): - self.string1 = string1 - self.string2 = string2 - def __call__(self, kind, data, pos, namespaces, variables): - string1 = as_string(self.string1(kind, data, pos, namespaces, variables)) - string2 = as_string(self.string2(kind, data, pos, namespaces, variables)) - index = string1.find(string2) - if index >= 0: - return string1[index + len(string2):] - return '' - def __repr__(self): - return 'substring-after(%r, %r)' % (self.string1, self.string2) - -class SubstringBeforeFunction(Function): - """The `substring-before` function that returns the part of a string that - is found before the given substring. - """ - __slots__ = ['string1', 'string2'] - def __init__(self, string1, string2): - self.string1 = string1 - self.string2 = string2 - def __call__(self, kind, data, pos, namespaces, variables): - string1 = as_string(self.string1(kind, data, pos, namespaces, variables)) - string2 = as_string(self.string2(kind, data, pos, namespaces, variables)) - index = string1.find(string2) - if index >= 0: - return string1[:index] - return '' - def __repr__(self): - return 'substring-after(%r, %r)' % (self.string1, self.string2) - -class TranslateFunction(Function): - """The `translate` function that translates a set of characters in a - string to target set of characters. - """ - __slots__ = ['string', 'fromchars', 'tochars'] - def __init__(self, string, fromchars, tochars): - self.string = string - self.fromchars = fromchars - self.tochars = tochars - def __call__(self, kind, data, pos, namespaces, variables): - string = as_string(self.string(kind, data, pos, namespaces, variables)) - fromchars = as_string(self.fromchars(kind, data, pos, namespaces, variables)) - tochars = as_string(self.tochars(kind, data, pos, namespaces, variables)) - table = dict(zip([ord(c) for c in fromchars], - [ord(c) for c in tochars])) - return string.translate(table) - def __repr__(self): - return 'translate(%r, %r, %r)' % (self.string, self.fromchars, - self.tochars) - -class TrueFunction(Function): - """The `true` function, which always returns the boolean `true` value.""" - __slots__ = [] - def __call__(self, kind, data, pos, namespaces, variables): - return True - def __repr__(self): - return 'true()' - -_function_map = {'boolean': BooleanFunction, 'ceiling': CeilingFunction, - 'concat': ConcatFunction, 'contains': ContainsFunction, - 'matches': MatchesFunction, 'false': FalseFunction, 'floor': - FloorFunction, 'local-name': LocalNameFunction, 'name': - NameFunction, 'namespace-uri': NamespaceUriFunction, - 'normalize-space': NormalizeSpaceFunction, 'not': NotFunction, - 'number': NumberFunction, 'round': RoundFunction, - 'starts-with': StartsWithFunction, 'string-length': - StringLengthFunction, 'substring': SubstringFunction, - 'substring-after': SubstringAfterFunction, 'substring-before': - SubstringBeforeFunction, 'translate': TranslateFunction, - 'true': TrueFunction} - -# Literals & Variables - -class Literal(object): - """Abstract base class for literal nodes.""" - -class StringLiteral(Literal): - """A string literal node.""" - __slots__ = ['text'] - def __init__(self, text): - self.text = text - def __call__(self, kind, data, pos, namespaces, variables): - return self.text - def __repr__(self): - return '"%s"' % self.text - -class NumberLiteral(Literal): - """A number literal node.""" - __slots__ = ['number'] - def __init__(self, number): - self.number = number - def __call__(self, kind, data, pos, namespaces, variables): - return self.number - def __repr__(self): - return str(self.number) - -class VariableReference(Literal): - """A variable reference node.""" - __slots__ = ['name'] - def __init__(self, name): - self.name = name - def __call__(self, kind, data, pos, namespaces, variables): - return variables.get(self.name) - def __repr__(self): - return str(self.name) - -# Operators - -class AndOperator(object): - """The boolean operator `and`.""" - __slots__ = ['lval', 'rval'] - def __init__(self, lval, rval): - self.lval = lval - self.rval = rval - def __call__(self, kind, data, pos, namespaces, variables): - lval = as_bool(self.lval(kind, data, pos, namespaces, variables)) - if not lval: - return False - rval = self.rval(kind, data, pos, namespaces, variables) - return as_bool(rval) - def __repr__(self): - return '%s and %s' % (self.lval, self.rval) - -class EqualsOperator(object): - """The equality operator `=`.""" - __slots__ = ['lval', 'rval'] - def __init__(self, lval, rval): - self.lval = lval - self.rval = rval - def __call__(self, kind, data, pos, namespaces, variables): - lval = as_scalar(self.lval(kind, data, pos, namespaces, variables)) - rval = as_scalar(self.rval(kind, data, pos, namespaces, variables)) - return lval == rval - def __repr__(self): - return '%s=%s' % (self.lval, self.rval) - -class NotEqualsOperator(object): - """The equality operator `!=`.""" - __slots__ = ['lval', 'rval'] - def __init__(self, lval, rval): - self.lval = lval - self.rval = rval - def __call__(self, kind, data, pos, namespaces, variables): - lval = as_scalar(self.lval(kind, data, pos, namespaces, variables)) - rval = as_scalar(self.rval(kind, data, pos, namespaces, variables)) - return lval != rval - def __repr__(self): - return '%s!=%s' % (self.lval, self.rval) - -class OrOperator(object): - """The boolean operator `or`.""" - __slots__ = ['lval', 'rval'] - def __init__(self, lval, rval): - self.lval = lval - self.rval = rval - def __call__(self, kind, data, pos, namespaces, variables): - lval = as_bool(self.lval(kind, data, pos, namespaces, variables)) - if lval: - return True - rval = self.rval(kind, data, pos, namespaces, variables) - return as_bool(rval) - def __repr__(self): - return '%s or %s' % (self.lval, self.rval) - -class GreaterThanOperator(object): - """The relational operator `>` (greater than).""" - __slots__ = ['lval', 'rval'] - def __init__(self, lval, rval): - self.lval = lval - self.rval = rval - def __call__(self, kind, data, pos, namespaces, variables): - lval = self.lval(kind, data, pos, namespaces, variables) - rval = self.rval(kind, data, pos, namespaces, variables) - return as_float(lval) > as_float(rval) - def __repr__(self): - return '%s>%s' % (self.lval, self.rval) - -class GreaterThanOrEqualOperator(object): - """The relational operator `>=` (greater than or equal).""" - __slots__ = ['lval', 'rval'] - def __init__(self, lval, rval): - self.lval = lval - self.rval = rval - def __call__(self, kind, data, pos, namespaces, variables): - lval = self.lval(kind, data, pos, namespaces, variables) - rval = self.rval(kind, data, pos, namespaces, variables) - return as_float(lval) >= as_float(rval) - def __repr__(self): - return '%s>=%s' % (self.lval, self.rval) - -class LessThanOperator(object): - """The relational operator `<` (less than).""" - __slots__ = ['lval', 'rval'] - def __init__(self, lval, rval): - self.lval = lval - self.rval = rval - def __call__(self, kind, data, pos, namespaces, variables): - lval = self.lval(kind, data, pos, namespaces, variables) - rval = self.rval(kind, data, pos, namespaces, variables) - return as_float(lval) < as_float(rval) - def __repr__(self): - return '%s<%s' % (self.lval, self.rval) - -class LessThanOrEqualOperator(object): - """The relational operator `<=` (less than or equal).""" - __slots__ = ['lval', 'rval'] - def __init__(self, lval, rval): - self.lval = lval - self.rval = rval - def __call__(self, kind, data, pos, namespaces, variables): - lval = self.lval(kind, data, pos, namespaces, variables) - rval = self.rval(kind, data, pos, namespaces, variables) - return as_float(lval) <= as_float(rval) - def __repr__(self): - return '%s<=%s' % (self.lval, self.rval) - -_operator_map = {'=': EqualsOperator, '!=': NotEqualsOperator, - '>': GreaterThanOperator, '>=': GreaterThanOrEqualOperator, - '<': LessThanOperator, '>=': LessThanOrEqualOperator} - - -_DOTSLASHSLASH = (DESCENDANT_OR_SELF, PrincipalTypeTest(None), ()) -_DOTSLASH = (SELF, PrincipalTypeTest(None), ()) diff --git a/genshi/template/__init__.py b/genshi/template/__init__.py deleted file mode 100644 index 47a9310..0000000 --- a/genshi/template/__init__.py +++ /dev/null @@ -1,23 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2006-2007 Edgewall Software -# All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://genshi.edgewall.org/wiki/License. -# -# This software consists of voluntary contributions made by many -# individuals. For the exact contribution history, see the revision -# history and logs, available at http://genshi.edgewall.org/log/. - -"""Implementation of the template engine.""" - -from genshi.template.base import Context, Template, TemplateError, \ - TemplateRuntimeError, TemplateSyntaxError, \ - BadDirectiveError -from genshi.template.loader import TemplateLoader, TemplateNotFound -from genshi.template.markup import MarkupTemplate -from genshi.template.text import TextTemplate, OldTextTemplate, NewTextTemplate - -__docformat__ = 'restructuredtext en' diff --git a/genshi/template/_ast24.py b/genshi/template/_ast24.py deleted file mode 100644 index 05d241b..0000000 --- a/genshi/template/_ast24.py +++ /dev/null @@ -1,446 +0,0 @@ -# Generated automatically, please do not edit -# Generator can be found in Genshi SVN, scripts/ast-generator.py - -__version__ = 43614 - -class AST(object): - _fields = None - __doc__ = None - -class operator(AST): - _fields = None - __doc__ = None - _attributes = [] -class Add(operator): - _fields = None - __doc__ = None - -class boolop(AST): - _fields = None - __doc__ = None - _attributes = [] -class And(boolop): - _fields = None - __doc__ = None - -class stmt(AST): - _fields = None - __doc__ = None - _attributes = ['lineno', 'col_offset'] -class Assert(stmt): - _fields = ('test', 'msg') - __doc__ = None - -class Assign(stmt): - _fields = ('targets', 'value') - __doc__ = None - -class expr(AST): - _fields = None - __doc__ = None - _attributes = ['lineno', 'col_offset'] -class Attribute(expr): - _fields = ('value', 'attr', 'ctx') - __doc__ = None - -class AugAssign(stmt): - _fields = ('target', 'op', 'value') - __doc__ = None - -class expr_context(AST): - _fields = None - __doc__ = None - _attributes = [] -class AugLoad(expr_context): - _fields = None - __doc__ = None - -class AugStore(expr_context): - _fields = None - __doc__ = None - -class BinOp(expr): - _fields = ('left', 'op', 'right') - __doc__ = None - -class BitAnd(operator): - _fields = None - __doc__ = None - -class BitOr(operator): - _fields = None - __doc__ = None - -class BitXor(operator): - _fields = None - __doc__ = None - -class BoolOp(expr): - _fields = ('op', 'values') - __doc__ = None - -class Break(stmt): - _fields = None - __doc__ = None - -class Call(expr): - _fields = ('func', 'args', 'keywords', 'starargs', 'kwargs') - __doc__ = None - -class ClassDef(stmt): - _fields = ('name', 'bases', 'body') - __doc__ = None - -class Compare(expr): - _fields = ('left', 'ops', 'comparators') - __doc__ = None - -class Continue(stmt): - _fields = None - __doc__ = None - -class Del(expr_context): - _fields = None - __doc__ = None - -class Delete(stmt): - _fields = ('targets',) - __doc__ = None - -class Dict(expr): - _fields = ('keys', 'values') - __doc__ = None - -class Div(operator): - _fields = None - __doc__ = None - -class slice(AST): - _fields = None - __doc__ = None - _attributes = [] -class Ellipsis(slice): - _fields = None - __doc__ = None - -class cmpop(AST): - _fields = None - __doc__ = None - _attributes = [] -class Eq(cmpop): - _fields = None - __doc__ = None - -class Exec(stmt): - _fields = ('body', 'globals', 'locals') - __doc__ = None - -class Expr(stmt): - _fields = ('value',) - __doc__ = None - -class mod(AST): - _fields = None - __doc__ = None - _attributes = [] -class Expression(mod): - _fields = ('body',) - __doc__ = None - -class ExtSlice(slice): - _fields = ('dims',) - __doc__ = None - -class FloorDiv(operator): - _fields = None - __doc__ = None - -class For(stmt): - _fields = ('target', 'iter', 'body', 'orelse') - __doc__ = None - -class FunctionDef(stmt): - _fields = ('name', 'args', 'body', 'decorators') - __doc__ = None - -class GeneratorExp(expr): - _fields = ('elt', 'generators') - __doc__ = None - -class Global(stmt): - _fields = ('names',) - __doc__ = None - -class Gt(cmpop): - _fields = None - __doc__ = None - -class GtE(cmpop): - _fields = None - __doc__ = None - -class If(stmt): - _fields = ('test', 'body', 'orelse') - __doc__ = None - -class IfExp(expr): - _fields = ('test', 'body', 'orelse') - __doc__ = None - -class Import(stmt): - _fields = ('names',) - __doc__ = None - -class ImportFrom(stmt): - _fields = ('module', 'names', 'level') - __doc__ = None - -class In(cmpop): - _fields = None - __doc__ = None - -class Index(slice): - _fields = ('value',) - __doc__ = None - -class Interactive(mod): - _fields = ('body',) - __doc__ = None - -class unaryop(AST): - _fields = None - __doc__ = None - _attributes = [] -class Invert(unaryop): - _fields = None - __doc__ = None - -class Is(cmpop): - _fields = None - __doc__ = None - -class IsNot(cmpop): - _fields = None - __doc__ = None - -class LShift(operator): - _fields = None - __doc__ = None - -class Lambda(expr): - _fields = ('args', 'body') - __doc__ = None - -class List(expr): - _fields = ('elts', 'ctx') - __doc__ = None - -class ListComp(expr): - _fields = ('elt', 'generators') - __doc__ = None - -class Load(expr_context): - _fields = None - __doc__ = None - -class Lt(cmpop): - _fields = None - __doc__ = None - -class LtE(cmpop): - _fields = None - __doc__ = None - -class Mod(operator): - _fields = None - __doc__ = None - -class Module(mod): - _fields = ('body',) - __doc__ = None - -class Mult(operator): - _fields = None - __doc__ = None - -class Name(expr): - _fields = ('id', 'ctx') - __doc__ = None - -class Not(unaryop): - _fields = None - __doc__ = None - -class NotEq(cmpop): - _fields = None - __doc__ = None - -class NotIn(cmpop): - _fields = None - __doc__ = None - -class Num(expr): - _fields = ('n',) - __doc__ = None - -class Or(boolop): - _fields = None - __doc__ = None - -class Param(expr_context): - _fields = None - __doc__ = None - -class Pass(stmt): - _fields = None - __doc__ = None - -class Pow(operator): - _fields = None - __doc__ = None - -class Print(stmt): - _fields = ('dest', 'values', 'nl') - __doc__ = None - -class RShift(operator): - _fields = None - __doc__ = None - -class Raise(stmt): - _fields = ('type', 'inst', 'tback') - __doc__ = None - -class Repr(expr): - _fields = ('value',) - __doc__ = None - -class Return(stmt): - _fields = ('value',) - __doc__ = None - -class Slice(slice): - _fields = ('lower', 'upper', 'step') - __doc__ = None - -class Store(expr_context): - _fields = None - __doc__ = None - -class Str(expr): - _fields = ('s',) - __doc__ = None - -class Sub(operator): - _fields = None - __doc__ = None - -class Subscript(expr): - _fields = ('value', 'slice', 'ctx') - __doc__ = None - -class Suite(mod): - _fields = ('body',) - __doc__ = None - -class TryExcept(stmt): - _fields = ('body', 'handlers', 'orelse') - __doc__ = None - -class TryFinally(stmt): - _fields = ('body', 'finalbody') - __doc__ = None - -class Tuple(expr): - _fields = ('elts', 'ctx') - __doc__ = None - -class UAdd(unaryop): - _fields = None - __doc__ = None - -class USub(unaryop): - _fields = None - __doc__ = None - -class UnaryOp(expr): - _fields = ('op', 'operand') - __doc__ = None - -class While(stmt): - _fields = ('test', 'body', 'orelse') - __doc__ = None - -class With(stmt): - _fields = ('context_expr', 'optional_vars', 'body') - __doc__ = None - -class Yield(expr): - _fields = ('value',) - __doc__ = None - -class alias(AST): - _fields = ('name', 'asname') - __doc__ = None - -class arguments(AST): - _fields = ('args', 'vararg', 'kwarg', 'defaults') - __doc__ = None - -class boolop(AST): - _fields = None - __doc__ = None - _attributes = [] - -class cmpop(AST): - _fields = None - __doc__ = None - _attributes = [] - -class comprehension(AST): - _fields = ('target', 'iter', 'ifs') - __doc__ = None - -class excepthandler(AST): - _fields = ('type', 'name', 'body', 'lineno', 'col_offset') - __doc__ = None - -class expr(AST): - _fields = None - __doc__ = None - _attributes = ['lineno', 'col_offset'] - -class expr_context(AST): - _fields = None - __doc__ = None - _attributes = [] - -class keyword(AST): - _fields = ('arg', 'value') - __doc__ = None - -class mod(AST): - _fields = None - __doc__ = None - _attributes = [] - -class operator(AST): - _fields = None - __doc__ = None - _attributes = [] - -class slice(AST): - _fields = None - __doc__ = None - _attributes = [] - -class stmt(AST): - _fields = None - __doc__ = None - _attributes = ['lineno', 'col_offset'] - -class unaryop(AST): - _fields = None - __doc__ = None - _attributes = [] - diff --git a/genshi/template/ast24.py b/genshi/template/ast24.py deleted file mode 100644 index af6dce9..0000000 --- a/genshi/template/ast24.py +++ /dev/null @@ -1,505 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2008-2009 Edgewall Software -# All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://genshi.edgewall.org/wiki/License. -# -# This software consists of voluntary contributions made by many -# individuals. For the exact contribution history, see the revision -# history and logs, available at http://genshi.edgewall.org/log/. - -"""Emulation of the proper abstract syntax tree API for Python 2.4.""" - -import compiler -import compiler.ast - -from genshi.template import _ast24 as _ast - -__all__ = ['_ast', 'parse'] -__docformat__ = 'restructuredtext en' - - -def _new(cls, *args, **kwargs): - ret = cls() - if ret._fields: - for attr, value in zip(ret._fields, args): - if attr in kwargs: - raise ValueError('Field set both in args and kwargs') - setattr(ret, attr, value) - for attr in kwargs: - if (getattr(ret, '_fields', None) and attr in ret._fields) \ - or (getattr(ret, '_attributes', None) and - attr in ret._attributes): - setattr(ret, attr, kwargs[attr]) - return ret - - -class ASTUpgrader(object): - """Transformer changing structure of Python 2.4 ASTs to - Python 2.5 ones. - - Transforms ``compiler.ast`` Abstract Syntax Tree to builtin ``_ast``. - It can use fake`` _ast`` classes and this way allow ``_ast`` emulation - in Python 2.4. - """ - - def __init__(self): - self.out_flags = None - self.lines = [-1] - - def _new(self, *args, **kwargs): - return _new(lineno = self.lines[-1], *args, **kwargs) - - def visit(self, node): - if node is None: - return None - if type(node) is tuple: - return tuple([self.visit(n) for n in node]) - lno = getattr(node, 'lineno', None) - if lno is not None: - self.lines.append(lno) - visitor = getattr(self, 'visit_%s' % node.__class__.__name__, None) - if visitor is None: - raise Exception('Unhandled node type %r' % type(node)) - - retval = visitor(node) - if lno is not None: - self.lines.pop() - return retval - - def visit_Module(self, node): - body = self.visit(node.node) - if node.doc: - body = [self._new(_ast.Expr, self._new(_ast.Str, node.doc))] + body - return self._new(_ast.Module, body) - - def visit_Expression(self, node): - return self._new(_ast.Expression, self.visit(node.node)) - - def _extract_args(self, node): - tab = node.argnames[:] - if node.flags & compiler.ast.CO_VARKEYWORDS: - kwarg = tab[-1] - tab = tab[:-1] - else: - kwarg = None - - if node.flags & compiler.ast.CO_VARARGS: - vararg = tab[-1] - tab = tab[:-1] - else: - vararg = None - - def _tup(t): - if isinstance(t, str): - return self._new(_ast.Name, t, _ast.Store()) - elif isinstance(t, tuple): - elts = [_tup(x) for x in t] - return self._new(_ast.Tuple, elts, _ast.Store()) - else: - raise NotImplemented - - args = [] - for arg in tab: - if isinstance(arg, str): - args.append(self._new(_ast.Name, arg, _ast.Param())) - elif isinstance(arg, tuple): - args.append(_tup(arg)) - else: - assert False, node.__class__ - - defaults = [self.visit(d) for d in node.defaults] - return self._new(_ast.arguments, args, vararg, kwarg, defaults) - - - def visit_Function(self, node): - if getattr(node, 'decorators', ()): - decorators = [self.visit(d) for d in node.decorators.nodes] - else: - decorators = [] - - args = self._extract_args(node) - body = self.visit(node.code) - if node.doc: - body = [self._new(_ast.Expr, self._new(_ast.Str, node.doc))] + body - return self._new(_ast.FunctionDef, node.name, args, body, decorators) - - def visit_Class(self, node): - #self.name_types.append(_ast.Load) - bases = [self.visit(b) for b in node.bases] - #self.name_types.pop() - body = self.visit(node.code) - if node.doc: - body = [self._new(_ast.Expr, self._new(_ast.Str, node.doc))] + body - return self._new(_ast.ClassDef, node.name, bases, body) - - def visit_Return(self, node): - return self._new(_ast.Return, self.visit(node.value)) - - def visit_Assign(self, node): - #self.name_types.append(_ast.Store) - targets = [self.visit(t) for t in node.nodes] - #self.name_types.pop() - return self._new(_ast.Assign, targets, self.visit(node.expr)) - - aug_operators = { - '+=': _ast.Add, - '/=': _ast.Div, - '//=': _ast.FloorDiv, - '<<=': _ast.LShift, - '%=': _ast.Mod, - '*=': _ast.Mult, - '**=': _ast.Pow, - '>>=': _ast.RShift, - '-=': _ast.Sub, - } - - def visit_AugAssign(self, node): - target = self.visit(node.node) - - # Because it's AugAssign target can't be list nor tuple - # so we only have to change context of one node - target.ctx = _ast.Store() - op = self.aug_operators[node.op]() - return self._new(_ast.AugAssign, target, op, self.visit(node.expr)) - - def _visit_Print(nl): - def _visit(self, node): - values = [self.visit(v) for v in node.nodes] - return self._new(_ast.Print, self.visit(node.dest), values, nl) - return _visit - - visit_Print = _visit_Print(False) - visit_Printnl = _visit_Print(True) - del _visit_Print - - def visit_For(self, node): - return self._new(_ast.For, self.visit(node.assign), self.visit(node.list), - self.visit(node.body), self.visit(node.else_)) - - def visit_While(self, node): - return self._new(_ast.While, self.visit(node.test), self.visit(node.body), - self.visit(node.else_)) - - def visit_If(self, node): - def _level(tests, else_): - test = self.visit(tests[0][0]) - body = self.visit(tests[0][1]) - if len(tests) == 1: - orelse = self.visit(else_) - else: - orelse = [_level(tests[1:], else_)] - return self._new(_ast.If, test, body, orelse) - return _level(node.tests, node.else_) - - def visit_With(self, node): - return self._new(_ast.With, self.visit(node.expr), - self.visit(node.vars), self.visit(node.body)) - - def visit_Raise(self, node): - return self._new(_ast.Raise, self.visit(node.expr1), - self.visit(node.expr2), self.visit(node.expr3)) - - def visit_TryExcept(self, node): - handlers = [] - for type, name, body in node.handlers: - handlers.append(self._new(_ast.excepthandler, self.visit(type), - self.visit(name), self.visit(body))) - return self._new(_ast.TryExcept, self.visit(node.body), - handlers, self.visit(node.else_)) - - def visit_TryFinally(self, node): - return self._new(_ast.TryFinally, self.visit(node.body), - self.visit(node.final)) - - def visit_Assert(self, node): - return self._new(_ast.Assert, self.visit(node.test), self.visit(node.fail)) - - def visit_Import(self, node): - names = [self._new(_ast.alias, n[0], n[1]) for n in node.names] - return self._new(_ast.Import, names) - - def visit_From(self, node): - names = [self._new(_ast.alias, n[0], n[1]) for n in node.names] - return self._new(_ast.ImportFrom, node.modname, names, 0) - - def visit_Exec(self, node): - return self._new(_ast.Exec, self.visit(node.expr), - self.visit(node.locals), self.visit(node.globals)) - - def visit_Global(self, node): - return self._new(_ast.Global, node.names[:]) - - def visit_Discard(self, node): - return self._new(_ast.Expr, self.visit(node.expr)) - - def _map_class(to): - def _visit(self, node): - return self._new(to) - return _visit - - visit_Pass = _map_class(_ast.Pass) - visit_Break = _map_class(_ast.Break) - visit_Continue = _map_class(_ast.Continue) - - def _visit_BinOperator(opcls): - def _visit(self, node): - return self._new(_ast.BinOp, self.visit(node.left), - opcls(), self.visit(node.right)) - return _visit - visit_Add = _visit_BinOperator(_ast.Add) - visit_Div = _visit_BinOperator(_ast.Div) - visit_FloorDiv = _visit_BinOperator(_ast.FloorDiv) - visit_LeftShift = _visit_BinOperator(_ast.LShift) - visit_Mod = _visit_BinOperator(_ast.Mod) - visit_Mul = _visit_BinOperator(_ast.Mult) - visit_Power = _visit_BinOperator(_ast.Pow) - visit_RightShift = _visit_BinOperator(_ast.RShift) - visit_Sub = _visit_BinOperator(_ast.Sub) - del _visit_BinOperator - - def _visit_BitOperator(opcls): - def _visit(self, node): - def _make(nodes): - if len(nodes) == 1: - return self.visit(nodes[0]) - left = _make(nodes[:-1]) - right = self.visit(nodes[-1]) - return self._new(_ast.BinOp, left, opcls(), right) - return _make(node.nodes) - return _visit - visit_Bitand = _visit_BitOperator(_ast.BitAnd) - visit_Bitor = _visit_BitOperator(_ast.BitOr) - visit_Bitxor = _visit_BitOperator(_ast.BitXor) - del _visit_BitOperator - - def _visit_UnaryOperator(opcls): - def _visit(self, node): - return self._new(_ast.UnaryOp, opcls(), self.visit(node.expr)) - return _visit - - visit_Invert = _visit_UnaryOperator(_ast.Invert) - visit_Not = _visit_UnaryOperator(_ast.Not) - visit_UnaryAdd = _visit_UnaryOperator(_ast.UAdd) - visit_UnarySub = _visit_UnaryOperator(_ast.USub) - del _visit_UnaryOperator - - def _visit_BoolOperator(opcls): - def _visit(self, node): - values = [self.visit(n) for n in node.nodes] - return self._new(_ast.BoolOp, opcls(), values) - return _visit - visit_And = _visit_BoolOperator(_ast.And) - visit_Or = _visit_BoolOperator(_ast.Or) - del _visit_BoolOperator - - cmp_operators = { - '==': _ast.Eq, - '!=': _ast.NotEq, - '<': _ast.Lt, - '<=': _ast.LtE, - '>': _ast.Gt, - '>=': _ast.GtE, - 'is': _ast.Is, - 'is not': _ast.IsNot, - 'in': _ast.In, - 'not in': _ast.NotIn, - } - - def visit_Compare(self, node): - left = self.visit(node.expr) - ops = [] - comparators = [] - for optype, expr in node.ops: - ops.append(self.cmp_operators[optype]()) - comparators.append(self.visit(expr)) - return self._new(_ast.Compare, left, ops, comparators) - - def visit_Lambda(self, node): - args = self._extract_args(node) - body = self.visit(node.code) - return self._new(_ast.Lambda, args, body) - - def visit_IfExp(self, node): - return self._new(_ast.IfExp, self.visit(node.test), self.visit(node.then), - self.visit(node.else_)) - - def visit_Dict(self, node): - keys = [self.visit(x[0]) for x in node.items] - values = [self.visit(x[1]) for x in node.items] - return self._new(_ast.Dict, keys, values) - - def visit_ListComp(self, node): - generators = [self.visit(q) for q in node.quals] - return self._new(_ast.ListComp, self.visit(node.expr), generators) - - def visit_GenExprInner(self, node): - generators = [self.visit(q) for q in node.quals] - return self._new(_ast.GeneratorExp, self.visit(node.expr), generators) - - def visit_GenExpr(self, node): - return self.visit(node.code) - - def visit_GenExprFor(self, node): - ifs = [self.visit(i) for i in node.ifs] - return self._new(_ast.comprehension, self.visit(node.assign), - self.visit(node.iter), ifs) - - def visit_ListCompFor(self, node): - ifs = [self.visit(i) for i in node.ifs] - return self._new(_ast.comprehension, self.visit(node.assign), - self.visit(node.list), ifs) - - def visit_GenExprIf(self, node): - return self.visit(node.test) - visit_ListCompIf = visit_GenExprIf - - def visit_Yield(self, node): - return self._new(_ast.Yield, self.visit(node.value)) - - def visit_CallFunc(self, node): - args = [] - keywords = [] - for arg in node.args: - if isinstance(arg, compiler.ast.Keyword): - keywords.append(self._new(_ast.keyword, arg.name, - self.visit(arg.expr))) - else: - args.append(self.visit(arg)) - return self._new(_ast.Call, self.visit(node.node), args, keywords, - self.visit(node.star_args), self.visit(node.dstar_args)) - - def visit_Backquote(self, node): - return self._new(_ast.Repr, self.visit(node.expr)) - - def visit_Const(self, node): - if node.value is None: # appears in slices - return None - elif isinstance(node.value, basestring): - return self._new(_ast.Str, node.value) - else: - return self._new(_ast.Num, node.value) - - def visit_Name(self, node): - return self._new(_ast.Name, node.name, _ast.Load()) - - def visit_Getattr(self, node): - return self._new(_ast.Attribute, self.visit(node.expr), node.attrname, - _ast.Load()) - - def visit_Tuple(self, node): - nodes = [self.visit(n) for n in node.nodes] - return self._new(_ast.Tuple, nodes, _ast.Load()) - - def visit_List(self, node): - nodes = [self.visit(n) for n in node.nodes] - return self._new(_ast.List, nodes, _ast.Load()) - - def get_ctx(self, flags): - if flags == 'OP_DELETE': - return _ast.Del() - elif flags == 'OP_APPLY': - return _ast.Load() - elif flags == 'OP_ASSIGN': - return _ast.Store() - else: - # FIXME Exception here - assert False, repr(flags) - - def visit_AssName(self, node): - self.out_flags = node.flags - ctx = self.get_ctx(node.flags) - return self._new(_ast.Name, node.name, ctx) - - def visit_AssAttr(self, node): - self.out_flags = node.flags - ctx = self.get_ctx(node.flags) - return self._new(_ast.Attribute, self.visit(node.expr), - node.attrname, ctx) - - def _visit_AssCollection(cls): - def _visit(self, node): - flags = None - elts = [] - for n in node.nodes: - elts.append(self.visit(n)) - if flags is None: - flags = self.out_flags - else: - assert flags == self.out_flags - self.out_flags = flags - ctx = self.get_ctx(flags) - return self._new(cls, elts, ctx) - return _visit - - visit_AssList = _visit_AssCollection(_ast.List) - visit_AssTuple = _visit_AssCollection(_ast.Tuple) - del _visit_AssCollection - - def visit_Slice(self, node): - lower = self.visit(node.lower) - upper = self.visit(node.upper) - ctx = self.get_ctx(node.flags) - self.out_flags = node.flags - return self._new(_ast.Subscript, self.visit(node.expr), - self._new(_ast.Slice, lower, upper, None), ctx) - - def visit_Subscript(self, node): - ctx = self.get_ctx(node.flags) - subs = [self.visit(s) for s in node.subs] - - advanced = (_ast.Slice, _ast.Ellipsis) - slices = [] - nonindex = False - for sub in subs: - if isinstance(sub, advanced): - nonindex = True - slices.append(sub) - else: - slices.append(self._new(_ast.Index, sub)) - if len(slices) == 1: - slice = slices[0] - elif nonindex: - slice = self._new(_ast.ExtSlice, slices) - else: - slice = self._new(_ast.Tuple, slices, _ast.Load()) - - self.out_flags = node.flags - return self._new(_ast.Subscript, self.visit(node.expr), slice, ctx) - - def visit_Sliceobj(self, node): - a = [self.visit(n) for n in node.nodes + [None]*(3 - len(node.nodes))] - return self._new(_ast.Slice, a[0], a[1], a[2]) - - def visit_Ellipsis(self, node): - return self._new(_ast.Ellipsis) - - def visit_Stmt(self, node): - def _check_del(n): - # del x is just AssName('x', 'OP_DELETE') - # we want to transform it to Delete([Name('x', Del())]) - dcls = (_ast.Name, _ast.List, _ast.Subscript, _ast.Attribute) - if isinstance(n, dcls) and isinstance(n.ctx, _ast.Del): - return self._new(_ast.Delete, [n]) - elif isinstance(n, _ast.Tuple) and isinstance(n.ctx, _ast.Del): - # unpack last tuple to avoid making del (x, y, z,); - # out of del x, y, z; (there's no difference between - # this two in compiler.ast) - return self._new(_ast.Delete, n.elts) - else: - return n - def _keep(n): - if isinstance(n, _ast.Expr) and n.value is None: - return False - else: - return True - return [s for s in [_check_del(self.visit(n)) for n in node.nodes] - if _keep(s)] - - -def parse(source, mode): - node = compiler.parse(source, mode) - return ASTUpgrader().visit(node) diff --git a/genshi/template/astutil.py b/genshi/template/astutil.py deleted file mode 100644 index c3ad107..0000000 --- a/genshi/template/astutil.py +++ /dev/null @@ -1,784 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2008-2010 Edgewall Software -# All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://genshi.edgewall.org/wiki/License. -# -# This software consists of voluntary contributions made by many -# individuals. For the exact contribution history, see the revision -# history and logs, available at http://genshi.edgewall.org/log/. - -"""Support classes for generating code from abstract syntax trees.""" - -try: - import _ast -except ImportError: - from genshi.template.ast24 import _ast, parse -else: - def parse(source, mode): - return compile(source, '', mode, _ast.PyCF_ONLY_AST) - - -__docformat__ = 'restructuredtext en' - - -class ASTCodeGenerator(object): - """General purpose base class for AST transformations. - - Every visitor method can be overridden to return an AST node that has been - altered or replaced in some way. - """ - def __init__(self, tree): - self.lines_info = [] - self.line_info = None - self.code = '' - self.line = None - self.last = None - self.indent = 0 - self.blame_stack = [] - self.visit(tree) - if self.line.strip(): - self.code += self.line + '\n' - self.lines_info.append(self.line_info) - self.line = None - self.line_info = None - - def _change_indent(self, delta): - self.indent += delta - - def _new_line(self): - if self.line is not None: - self.code += self.line + '\n' - self.lines_info.append(self.line_info) - self.line = ' '*4*self.indent - if len(self.blame_stack) == 0: - self.line_info = [] - self.last = None - else: - self.line_info = [(0, self.blame_stack[-1],)] - self.last = self.blame_stack[-1] - - def _write(self, s): - if len(s) == 0: - return - if len(self.blame_stack) == 0: - if self.last is not None: - self.last = None - self.line_info.append((len(self.line), self.last)) - else: - if self.last != self.blame_stack[-1]: - self.last = self.blame_stack[-1] - self.line_info.append((len(self.line), self.last)) - self.line += s - - def visit(self, node): - if node is None: - return None - if type(node) is tuple: - return tuple([self.visit(n) for n in node]) - try: - self.blame_stack.append((node.lineno, node.col_offset,)) - info = True - except AttributeError: - info = False - visitor = getattr(self, 'visit_%s' % node.__class__.__name__, None) - if visitor is None: - raise Exception('Unhandled node type %r' % type(node)) - ret = visitor(node) - if info: - self.blame_stack.pop() - return ret - - def visit_Module(self, node): - for n in node.body: - self.visit(n) - visit_Interactive = visit_Module - visit_Suite = visit_Module - - def visit_Expression(self, node): - self._new_line() - return self.visit(node.body) - - # arguments = (expr* args, identifier? vararg, - # identifier? kwarg, expr* defaults) - def visit_arguments(self, node): - first = True - no_default_count = len(node.args) - len(node.defaults) - for i, arg in enumerate(node.args): - if not first: - self._write(', ') - else: - first = False - self.visit(arg) - if i >= no_default_count: - self._write('=') - self.visit(node.defaults[i - no_default_count]) - if getattr(node, 'vararg', None): - if not first: - self._write(', ') - else: - first = False - self._write('*' + node.vararg) - if getattr(node, 'kwarg', None): - if not first: - self._write(', ') - else: - first = False - self._write('**' + node.kwarg) - - # FunctionDef(identifier name, arguments args, - # stmt* body, expr* decorator_list) - def visit_FunctionDef(self, node): - decarators = () - if hasattr(node, 'decorator_list'): - decorators = getattr(node, 'decorator_list') - else: # different name in earlier Python versions - decorators = getattr(node, 'decorators', ()) - for decorator in decorators: - self._new_line() - self._write('@') - self.visit(decorator) - self._new_line() - self._write('def ' + node.name + '(') - self.visit(node.args) - self._write('):') - self._change_indent(1) - for statement in node.body: - self.visit(statement) - self._change_indent(-1) - - # ClassDef(identifier name, expr* bases, stmt* body) - def visit_ClassDef(self, node): - self._new_line() - self._write('class ' + node.name) - if node.bases: - self._write('(') - self.visit(node.bases[0]) - for base in node.bases[1:]: - self._write(', ') - self.visit(base) - self._write(')') - self._write(':') - self._change_indent(1) - for statement in node.body: - self.visit(statement) - self._change_indent(-1) - - # Return(expr? value) - def visit_Return(self, node): - self._new_line() - self._write('return') - if getattr(node, 'value', None): - self._write(' ') - self.visit(node.value) - - # Delete(expr* targets) - def visit_Delete(self, node): - self._new_line() - self._write('del ') - self.visit(node.targets[0]) - for target in node.targets[1:]: - self._write(', ') - self.visit(target) - - # Assign(expr* targets, expr value) - def visit_Assign(self, node): - self._new_line() - for target in node.targets: - self.visit(target) - self._write(' = ') - self.visit(node.value) - - # AugAssign(expr target, operator op, expr value) - def visit_AugAssign(self, node): - self._new_line() - self.visit(node.target) - self._write(' ' + self.binary_operators[node.op.__class__] + '= ') - self.visit(node.value) - - # Print(expr? dest, expr* values, bool nl) - def visit_Print(self, node): - self._new_line() - self._write('print') - if getattr(node, 'dest', None): - self._write(' >> ') - self.visit(node.dest) - if getattr(node, 'values', None): - self._write(', ') - else: - self._write(' ') - if getattr(node, 'values', None): - self.visit(node.values[0]) - for value in node.values[1:]: - self._write(', ') - self.visit(value) - if not node.nl: - self._write(',') - - # For(expr target, expr iter, stmt* body, stmt* orelse) - def visit_For(self, node): - self._new_line() - self._write('for ') - self.visit(node.target) - self._write(' in ') - self.visit(node.iter) - self._write(':') - self._change_indent(1) - for statement in node.body: - self.visit(statement) - self._change_indent(-1) - if getattr(node, 'orelse', None): - self._new_line() - self._write('else:') - self._change_indent(1) - for statement in node.orelse: - self.visit(statement) - self._change_indent(-1) - - # While(expr test, stmt* body, stmt* orelse) - def visit_While(self, node): - self._new_line() - self._write('while ') - self.visit(node.test) - self._write(':') - self._change_indent(1) - for statement in node.body: - self.visit(statement) - self._change_indent(-1) - if getattr(node, 'orelse', None): - self._new_line() - self._write('else:') - self._change_indent(1) - for statement in node.orelse: - self.visit(statement) - self._change_indent(-1) - - # If(expr test, stmt* body, stmt* orelse) - def visit_If(self, node): - self._new_line() - self._write('if ') - self.visit(node.test) - self._write(':') - self._change_indent(1) - for statement in node.body: - self.visit(statement) - self._change_indent(-1) - if getattr(node, 'orelse', None): - self._new_line() - self._write('else:') - self._change_indent(1) - for statement in node.orelse: - self.visit(statement) - self._change_indent(-1) - - # With(expr context_expr, expr? optional_vars, stmt* body) - def visit_With(self, node): - self._new_line() - self._write('with ') - self.visit(node.context_expr) - if getattr(node, 'optional_vars', None): - self._write(' as ') - self.visit(node.optional_vars) - self._write(':') - self._change_indent(1) - for statement in node.body: - self.visit(statement) - self._change_indent(-1) - - - # Raise(expr? type, expr? inst, expr? tback) - def visit_Raise(self, node): - self._new_line() - self._write('raise') - if not node.type: - return - self._write(' ') - self.visit(node.type) - if not node.inst: - return - self._write(', ') - self.visit(node.inst) - if not node.tback: - return - self._write(', ') - self.visit(node.tback) - - # TryExcept(stmt* body, excepthandler* handlers, stmt* orelse) - def visit_TryExcept(self, node): - self._new_line() - self._write('try:') - self._change_indent(1) - for statement in node.body: - self.visit(statement) - self._change_indent(-1) - if getattr(node, 'handlers', None): - for handler in node.handlers: - self.visit(handler) - self._new_line() - if getattr(node, 'orelse', None): - self._write('else:') - self._change_indent(1) - for statement in node.orelse: - self.visit(statement) - self._change_indent(-1) - - # excepthandler = (expr? type, expr? name, stmt* body) - def visit_ExceptHandler(self, node): - self._new_line() - self._write('except') - if getattr(node, 'type', None): - self._write(' ') - self.visit(node.type) - if getattr(node, 'name', None): - self._write(', ') - self.visit(node.name) - self._write(':') - self._change_indent(1) - for statement in node.body: - self.visit(statement) - self._change_indent(-1) - visit_excepthandler = visit_ExceptHandler - - # TryFinally(stmt* body, stmt* finalbody) - def visit_TryFinally(self, node): - self._new_line() - self._write('try:') - self._change_indent(1) - for statement in node.body: - self.visit(statement) - self._change_indent(-1) - - if getattr(node, 'finalbody', None): - self._new_line() - self._write('finally:') - self._change_indent(1) - for statement in node.finalbody: - self.visit(statement) - self._change_indent(-1) - - # Assert(expr test, expr? msg) - def visit_Assert(self, node): - self._new_line() - self._write('assert ') - self.visit(node.test) - if getattr(node, 'msg', None): - self._write(', ') - self.visit(node.msg) - - def visit_alias(self, node): - self._write(node.name) - if getattr(node, 'asname', None): - self._write(' as ') - self._write(node.asname) - - # Import(alias* names) - def visit_Import(self, node): - self._new_line() - self._write('import ') - self.visit(node.names[0]) - for name in node.names[1:]: - self._write(', ') - self.visit(name) - - # ImportFrom(identifier module, alias* names, int? level) - def visit_ImportFrom(self, node): - self._new_line() - self._write('from ') - if node.level: - self._write('.' * node.level) - self._write(node.module) - self._write(' import ') - self.visit(node.names[0]) - for name in node.names[1:]: - self._write(', ') - self.visit(name) - - # Exec(expr body, expr? globals, expr? locals) - def visit_Exec(self, node): - self._new_line() - self._write('exec ') - self.visit(node.body) - if not node.globals: - return - self._write(', ') - self.visit(node.globals) - if not node.locals: - return - self._write(', ') - self.visit(node.locals) - - # Global(identifier* names) - def visit_Global(self, node): - self._new_line() - self._write('global ') - self.visit(node.names[0]) - for name in node.names[1:]: - self._write(', ') - self.visit(name) - - # Expr(expr value) - def visit_Expr(self, node): - self._new_line() - self.visit(node.value) - - # Pass - def visit_Pass(self, node): - self._new_line() - self._write('pass') - - # Break - def visit_Break(self, node): - self._new_line() - self._write('break') - - # Continue - def visit_Continue(self, node): - self._new_line() - self._write('continue') - - ### EXPRESSIONS - def with_parens(f): - def _f(self, node): - self._write('(') - f(self, node) - self._write(')') - return _f - - bool_operators = {_ast.And: 'and', _ast.Or: 'or'} - - # BoolOp(boolop op, expr* values) - @with_parens - def visit_BoolOp(self, node): - joiner = ' ' + self.bool_operators[node.op.__class__] + ' ' - self.visit(node.values[0]) - for value in node.values[1:]: - self._write(joiner) - self.visit(value) - - binary_operators = { - _ast.Add: '+', - _ast.Sub: '-', - _ast.Mult: '*', - _ast.Div: '/', - _ast.Mod: '%', - _ast.Pow: '**', - _ast.LShift: '<<', - _ast.RShift: '>>', - _ast.BitOr: '|', - _ast.BitXor: '^', - _ast.BitAnd: '&', - _ast.FloorDiv: '//' - } - - # BinOp(expr left, operator op, expr right) - @with_parens - def visit_BinOp(self, node): - self.visit(node.left) - self._write(' ' + self.binary_operators[node.op.__class__] + ' ') - self.visit(node.right) - - unary_operators = { - _ast.Invert: '~', - _ast.Not: 'not', - _ast.UAdd: '+', - _ast.USub: '-', - } - - # UnaryOp(unaryop op, expr operand) - def visit_UnaryOp(self, node): - self._write(self.unary_operators[node.op.__class__] + ' ') - self.visit(node.operand) - - # Lambda(arguments args, expr body) - @with_parens - def visit_Lambda(self, node): - self._write('lambda ') - self.visit(node.args) - self._write(': ') - self.visit(node.body) - - # IfExp(expr test, expr body, expr orelse) - @with_parens - def visit_IfExp(self, node): - self.visit(node.body) - self._write(' if ') - self.visit(node.test) - self._write(' else ') - self.visit(node.orelse) - - # Dict(expr* keys, expr* values) - def visit_Dict(self, node): - self._write('{') - for key, value in zip(node.keys, node.values): - self.visit(key) - self._write(': ') - self.visit(value) - self._write(', ') - self._write('}') - - # ListComp(expr elt, comprehension* generators) - def visit_ListComp(self, node): - self._write('[') - self.visit(node.elt) - for generator in node.generators: - # comprehension = (expr target, expr iter, expr* ifs) - self._write(' for ') - self.visit(generator.target) - self._write(' in ') - self.visit(generator.iter) - for ifexpr in generator.ifs: - self._write(' if ') - self.visit(ifexpr) - self._write(']') - - # GeneratorExp(expr elt, comprehension* generators) - def visit_GeneratorExp(self, node): - self._write('(') - self.visit(node.elt) - for generator in node.generators: - # comprehension = (expr target, expr iter, expr* ifs) - self._write(' for ') - self.visit(generator.target) - self._write(' in ') - self.visit(generator.iter) - for ifexpr in generator.ifs: - self._write(' if ') - self.visit(ifexpr) - self._write(')') - - # Yield(expr? value) - def visit_Yield(self, node): - self._write('yield') - if getattr(node, 'value', None): - self._write(' ') - self.visit(node.value) - - comparision_operators = { - _ast.Eq: '==', - _ast.NotEq: '!=', - _ast.Lt: '<', - _ast.LtE: '<=', - _ast.Gt: '>', - _ast.GtE: '>=', - _ast.Is: 'is', - _ast.IsNot: 'is not', - _ast.In: 'in', - _ast.NotIn: 'not in', - } - - # Compare(expr left, cmpop* ops, expr* comparators) - @with_parens - def visit_Compare(self, node): - self.visit(node.left) - for op, comparator in zip(node.ops, node.comparators): - self._write(' ' + self.comparision_operators[op.__class__] + ' ') - self.visit(comparator) - - # Call(expr func, expr* args, keyword* keywords, - # expr? starargs, expr? kwargs) - def visit_Call(self, node): - self.visit(node.func) - self._write('(') - first = True - for arg in node.args: - if not first: - self._write(', ') - first = False - self.visit(arg) - - for keyword in node.keywords: - if not first: - self._write(', ') - first = False - # keyword = (identifier arg, expr value) - self._write(keyword.arg) - self._write('=') - self.visit(keyword.value) - if getattr(node, 'starargs', None): - if not first: - self._write(', ') - first = False - self._write('*') - self.visit(node.starargs) - - if getattr(node, 'kwargs', None): - if not first: - self._write(', ') - first = False - self._write('**') - self.visit(node.kwargs) - self._write(')') - - # Repr(expr value) - def visit_Repr(self, node): - self._write('`') - self.visit(node.value) - self._write('`') - - # Num(object n) - def visit_Num(self, node): - self._write(repr(node.n)) - - # Str(string s) - def visit_Str(self, node): - self._write(repr(node.s)) - - # Attribute(expr value, identifier attr, expr_context ctx) - def visit_Attribute(self, node): - self.visit(node.value) - self._write('.') - self._write(node.attr) - - # Subscript(expr value, slice slice, expr_context ctx) - def visit_Subscript(self, node): - self.visit(node.value) - self._write('[') - def _process_slice(node): - if isinstance(node, _ast.Ellipsis): - self._write('...') - elif isinstance(node, _ast.Slice): - if getattr(node, 'lower', 'None'): - self.visit(node.lower) - self._write(':') - if getattr(node, 'upper', None): - self.visit(node.upper) - if getattr(node, 'step', None): - self._write(':') - self.visit(node.step) - elif isinstance(node, _ast.Index): - self.visit(node.value) - elif isinstance(node, _ast.ExtSlice): - self.visit(node.dims[0]) - for dim in node.dims[1:]: - self._write(', ') - self.visit(dim) - else: - raise NotImplemented('Slice type not implemented') - _process_slice(node.slice) - self._write(']') - - # Name(identifier id, expr_context ctx) - def visit_Name(self, node): - self._write(node.id) - - # List(expr* elts, expr_context ctx) - def visit_List(self, node): - self._write('[') - for elt in node.elts: - self.visit(elt) - self._write(', ') - self._write(']') - - # Tuple(expr *elts, expr_context ctx) - def visit_Tuple(self, node): - self._write('(') - for elt in node.elts: - self.visit(elt) - self._write(', ') - self._write(')') - - -class ASTTransformer(object): - """General purpose base class for AST transformations. - - Every visitor method can be overridden to return an AST node that has been - altered or replaced in some way. - """ - - def visit(self, node): - if node is None: - return None - if type(node) is tuple: - return tuple([self.visit(n) for n in node]) - visitor = getattr(self, 'visit_%s' % node.__class__.__name__, None) - if visitor is None: - return node - return visitor(node) - - def _clone(self, node): - clone = node.__class__() - for name in getattr(clone, '_attributes', ()): - try: - setattr(clone, 'name', getattr(node, name)) - except AttributeError: - pass - for name in clone._fields: - try: - value = getattr(node, name) - except AttributeError: - pass - else: - if value is None: - pass - elif isinstance(value, list): - value = [self.visit(x) for x in value] - elif isinstance(value, tuple): - value = tuple(self.visit(x) for x in value) - else: - value = self.visit(value) - setattr(clone, name, value) - return clone - - visit_Module = _clone - visit_Interactive = _clone - visit_Expression = _clone - visit_Suite = _clone - - visit_FunctionDef = _clone - visit_ClassDef = _clone - visit_Return = _clone - visit_Delete = _clone - visit_Assign = _clone - visit_AugAssign = _clone - visit_Print = _clone - visit_For = _clone - visit_While = _clone - visit_If = _clone - visit_With = _clone - visit_Raise = _clone - visit_TryExcept = _clone - visit_TryFinally = _clone - visit_Assert = _clone - visit_ExceptHandler = _clone - - visit_Import = _clone - visit_ImportFrom = _clone - visit_Exec = _clone - visit_Global = _clone - visit_Expr = _clone - # Pass, Break, Continue don't need to be copied - - visit_BoolOp = _clone - visit_BinOp = _clone - visit_UnaryOp = _clone - visit_Lambda = _clone - visit_IfExp = _clone - visit_Dict = _clone - visit_ListComp = _clone - visit_GeneratorExp = _clone - visit_Yield = _clone - visit_Compare = _clone - visit_Call = _clone - visit_Repr = _clone - # Num, Str don't need to be copied - - visit_Attribute = _clone - visit_Subscript = _clone - visit_Name = _clone - visit_List = _clone - visit_Tuple = _clone - - visit_comprehension = _clone - visit_excepthandler = _clone - visit_arguments = _clone - visit_keyword = _clone - visit_alias = _clone - - visit_Slice = _clone - visit_ExtSlice = _clone - visit_Index = _clone - - del _clone diff --git a/genshi/template/base.py b/genshi/template/base.py deleted file mode 100644 index 202faae..0000000 --- a/genshi/template/base.py +++ /dev/null @@ -1,634 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2006-2010 Edgewall Software -# All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://genshi.edgewall.org/wiki/License. -# -# This software consists of voluntary contributions made by many -# individuals. For the exact contribution history, see the revision -# history and logs, available at http://genshi.edgewall.org/log/. - -"""Basic templating functionality.""" - -from collections import deque -import os -from StringIO import StringIO -import sys - -from genshi.core import Attrs, Stream, StreamEventKind, START, TEXT, _ensure -from genshi.input import ParseError - -__all__ = ['Context', 'DirectiveFactory', 'Template', 'TemplateError', - 'TemplateRuntimeError', 'TemplateSyntaxError', 'BadDirectiveError'] -__docformat__ = 'restructuredtext en' - - -class TemplateError(Exception): - """Base exception class for errors related to template processing.""" - - def __init__(self, message, filename=None, lineno=-1, offset=-1): - """Create the exception. - - :param message: the error message - :param filename: the filename of the template - :param lineno: the number of line in the template at which the error - occurred - :param offset: the column number at which the error occurred - """ - if filename is None: - filename = '<string>' - self.msg = message #: the error message string - if filename != '<string>' or lineno >= 0: - message = '%s (%s, line %d)' % (self.msg, filename, lineno) - Exception.__init__(self, message) - self.filename = filename #: the name of the template file - self.lineno = lineno #: the number of the line containing the error - self.offset = offset #: the offset on the line - - -class TemplateSyntaxError(TemplateError): - """Exception raised when an expression in a template causes a Python syntax - error, or the template is not well-formed. - """ - - def __init__(self, message, filename=None, lineno=-1, offset=-1): - """Create the exception - - :param message: the error message - :param filename: the filename of the template - :param lineno: the number of line in the template at which the error - occurred - :param offset: the column number at which the error occurred - """ - if isinstance(message, SyntaxError) and message.lineno is not None: - message = str(message).replace(' (line %d)' % message.lineno, '') - TemplateError.__init__(self, message, filename, lineno) - - -class BadDirectiveError(TemplateSyntaxError): - """Exception raised when an unknown directive is encountered when parsing - a template. - - An unknown directive is any attribute using the namespace for directives, - with a local name that doesn't match any registered directive. - """ - - def __init__(self, name, filename=None, lineno=-1): - """Create the exception - - :param name: the name of the directive - :param filename: the filename of the template - :param lineno: the number of line in the template at which the error - occurred - """ - TemplateSyntaxError.__init__(self, 'bad directive "%s"' % name, - filename, lineno) - - -class TemplateRuntimeError(TemplateError): - """Exception raised when an the evaluation of a Python expression in a - template causes an error. - """ - - -class Context(object): - """Container for template input data. - - A context provides a stack of scopes (represented by dictionaries). - - Template directives such as loops can push a new scope on the stack with - data that should only be available inside the loop. When the loop - terminates, that scope can get popped off the stack again. - - >>> ctxt = Context(one='foo', other=1) - >>> ctxt.get('one') - 'foo' - >>> ctxt.get('other') - 1 - >>> ctxt.push(dict(one='frost')) - >>> ctxt.get('one') - 'frost' - >>> ctxt.get('other') - 1 - >>> ctxt.pop() - {'one': 'frost'} - >>> ctxt.get('one') - 'foo' - """ - - def __init__(self, **data): - """Initialize the template context with the given keyword arguments as - data. - """ - self.frames = deque([data]) - self.pop = self.frames.popleft - self.push = self.frames.appendleft - self._match_templates = [] - self._choice_stack = [] - - # Helper functions for use in expressions - def defined(name): - """Return whether a variable with the specified name exists in the - expression scope.""" - return name in self - def value_of(name, default=None): - """If a variable of the specified name is defined, return its value. - Otherwise, return the provided default value, or ``None``.""" - return self.get(name, default) - data.setdefault('defined', defined) - data.setdefault('value_of', value_of) - - def __repr__(self): - return repr(list(self.frames)) - - def __contains__(self, key): - """Return whether a variable exists in any of the scopes. - - :param key: the name of the variable - """ - return self._find(key)[1] is not None - has_key = __contains__ - - def __delitem__(self, key): - """Remove a variable from all scopes. - - :param key: the name of the variable - """ - for frame in self.frames: - if key in frame: - del frame[key] - - def __getitem__(self, key): - """Get a variables's value, starting at the current scope and going - upward. - - :param key: the name of the variable - :return: the variable value - :raises KeyError: if the requested variable wasn't found in any scope - """ - value, frame = self._find(key) - if frame is None: - raise KeyError(key) - return value - - def __len__(self): - """Return the number of distinctly named variables in the context. - - :return: the number of variables in the context - """ - return len(self.items()) - - def __setitem__(self, key, value): - """Set a variable in the current scope. - - :param key: the name of the variable - :param value: the variable value - """ - self.frames[0][key] = value - - def _find(self, key, default=None): - """Retrieve a given variable's value and the frame it was found in. - - Intended primarily for internal use by directives. - - :param key: the name of the variable - :param default: the default value to return when the variable is not - found - """ - for frame in self.frames: - if key in frame: - return frame[key], frame - return default, None - - def get(self, key, default=None): - """Get a variable's value, starting at the current scope and going - upward. - - :param key: the name of the variable - :param default: the default value to return when the variable is not - found - """ - for frame in self.frames: - if key in frame: - return frame[key] - return default - - def keys(self): - """Return the name of all variables in the context. - - :return: a list of variable names - """ - keys = [] - for frame in self.frames: - keys += [key for key in frame if key not in keys] - return keys - - def items(self): - """Return a list of ``(name, value)`` tuples for all variables in the - context. - - :return: a list of variables - """ - return [(key, self.get(key)) for key in self.keys()] - - def update(self, mapping): - """Update the context from the mapping provided.""" - self.frames[0].update(mapping) - - def push(self, data): - """Push a new scope on the stack. - - :param data: the data dictionary to push on the context stack. - """ - - def pop(self): - """Pop the top-most scope from the stack.""" - - -def _apply_directives(stream, directives, ctxt, vars): - """Apply the given directives to the stream. - - :param stream: the stream the directives should be applied to - :param directives: the list of directives to apply - :param ctxt: the `Context` - :param vars: additional variables that should be available when Python - code is executed - :return: the stream with the given directives applied - """ - if directives: - stream = directives[0](iter(stream), directives[1:], ctxt, **vars) - return stream - - -def _eval_expr(expr, ctxt, vars=None): - """Evaluate the given `Expression` object. - - :param expr: the expression to evaluate - :param ctxt: the `Context` - :param vars: additional variables that should be available to the - expression - :return: the result of the evaluation - """ - if vars: - ctxt.push(vars) - retval = expr.evaluate(ctxt) - if vars: - ctxt.pop() - return retval - - -def _exec_suite(suite, ctxt, vars=None): - """Execute the given `Suite` object. - - :param suite: the code suite to execute - :param ctxt: the `Context` - :param vars: additional variables that should be available to the - code - """ - if vars: - ctxt.push(vars) - ctxt.push({}) - suite.execute(ctxt) - if vars: - top = ctxt.pop() - ctxt.pop() - ctxt.frames[0].update(top) - - -class DirectiveFactoryMeta(type): - """Meta class for directive factories.""" - - def __new__(cls, name, bases, d): - if 'directives' in d: - d['_dir_by_name'] = dict(d['directives']) - d['_dir_order'] = [directive[1] for directive in d['directives']] - - return type.__new__(cls, name, bases, d) - - -class DirectiveFactory(object): - """Base for classes that provide a set of template directives. - - :since: version 0.6 - """ - __metaclass__ = DirectiveFactoryMeta - - directives = [] - """A list of ``(name, cls)`` tuples that define the set of directives - provided by this factory. - """ - - def get_directive(self, name): - """Return the directive class for the given name. - - :param name: the directive name as used in the template - :return: the directive class - :see: `Directive` - """ - return self._dir_by_name.get(name) - - def get_directive_index(self, dir_cls): - """Return a key for the given directive class that should be used to - sort it among other directives on the same `SUB` event. - - The default implementation simply returns the index of the directive in - the `directives` list. - - :param dir_cls: the directive class - :return: the sort key - """ - if dir_cls in self._dir_order: - return self._dir_order.index(dir_cls) - return len(self._dir_order) - - -class Template(DirectiveFactory): - """Abstract template base class. - - This class implements most of the template processing model, but does not - specify the syntax of templates. - """ - - EXEC = StreamEventKind('EXEC') - """Stream event kind representing a Python code suite to execute.""" - - EXPR = StreamEventKind('EXPR') - """Stream event kind representing a Python expression.""" - - INCLUDE = StreamEventKind('INCLUDE') - """Stream event kind representing the inclusion of another template.""" - - SUB = StreamEventKind('SUB') - """Stream event kind representing a nested stream to which one or more - directives should be applied. - """ - - serializer = None - _number_conv = unicode # function used to convert numbers to event data - - def __init__(self, source, filepath=None, filename=None, loader=None, - encoding=None, lookup='strict', allow_exec=True): - """Initialize a template from either a string, a file-like object, or - an already parsed markup stream. - - :param source: a string, file-like object, or markup stream to read the - template from - :param filepath: the absolute path to the template file - :param filename: the path to the template file relative to the search - path - :param loader: the `TemplateLoader` to use for loading included - templates - :param encoding: the encoding of the `source` - :param lookup: the variable lookup mechanism; either "strict" (the - default), "lenient", or a custom lookup class - :param allow_exec: whether Python code blocks in templates should be - allowed - - :note: Changed in 0.5: Added the `allow_exec` argument - """ - self.filepath = filepath or filename - self.filename = filename - self.loader = loader - self.lookup = lookup - self.allow_exec = allow_exec - self._init_filters() - self._init_loader() - self._prepared = False - - if isinstance(source, basestring): - source = StringIO(source) - else: - source = source - try: - self._stream = self._parse(source, encoding) - except ParseError, e: - raise TemplateSyntaxError(e.msg, self.filepath, e.lineno, e.offset) - - def __getstate__(self): - state = self.__dict__.copy() - state['filters'] = [] - return state - - def __setstate__(self, state): - self.__dict__ = state - self._init_filters() - - def __repr__(self): - return '<%s "%s">' % (type(self).__name__, self.filename) - - def _init_filters(self): - self.filters = [self._flatten, self._include] - - def _init_loader(self): - if self.loader is None: - from genshi.template.loader import TemplateLoader - if self.filename: - if self.filepath != self.filename: - basedir = os.path.normpath(self.filepath)[:-len( - os.path.normpath(self.filename)) - ] - else: - basedir = os.path.dirname(self.filename) - else: - basedir = '.' - self.loader = TemplateLoader([os.path.abspath(basedir)]) - - @property - def stream(self): - if not self._prepared: - self._stream = list(self._prepare(self._stream)) - self._prepared = True - return self._stream - - def _parse(self, source, encoding): - """Parse the template. - - The parsing stage parses the template and constructs a list of - directives that will be executed in the render stage. The input is - split up into literal output (text that does not depend on the context - data) and directives or expressions. - - :param source: a file-like object containing the XML source of the - template, or an XML event stream - :param encoding: the encoding of the `source` - """ - raise NotImplementedError - - def _prepare(self, stream): - """Call the `attach` method of every directive found in the template. - - :param stream: the event stream of the template - """ - from genshi.template.loader import TemplateNotFound - - for kind, data, pos in stream: - if kind is SUB: - directives = [] - substream = data[1] - for _, cls, value, namespaces, pos in sorted(data[0]): - directive, substream = cls.attach(self, substream, value, - namespaces, pos) - if directive: - directives.append(directive) - substream = self._prepare(substream) - if directives: - yield kind, (directives, list(substream)), pos - else: - for event in substream: - yield event - else: - if kind is INCLUDE: - href, cls, fallback = data - if isinstance(href, basestring) and \ - not getattr(self.loader, 'auto_reload', True): - # If the path to the included template is static, and - # auto-reloading is disabled on the template loader, - # the template is inlined into the stream - try: - tmpl = self.loader.load(href, relative_to=pos[0], - cls=cls or self.__class__) - for event in tmpl.stream: - yield event - except TemplateNotFound: - if fallback is None: - raise - for event in self._prepare(fallback): - yield event - continue - elif fallback: - # Otherwise the include is performed at run time - data = href, cls, list(self._prepare(fallback)) - - yield kind, data, pos - - def generate(self, *args, **kwargs): - """Apply the template to the given context data. - - Any keyword arguments are made available to the template as context - data. - - Only one positional argument is accepted: if it is provided, it must be - an instance of the `Context` class, and keyword arguments are ignored. - This calling style is used for internal processing. - - :return: a markup event stream representing the result of applying - the template to the context data. - """ - vars = {} - if args: - assert len(args) == 1 - ctxt = args[0] - if ctxt is None: - ctxt = Context(**kwargs) - else: - vars = kwargs - assert isinstance(ctxt, Context) - else: - ctxt = Context(**kwargs) - - stream = self.stream - for filter_ in self.filters: - stream = filter_(iter(stream), ctxt, **vars) - return Stream(stream, self.serializer) - - def _flatten(self, stream, ctxt, **vars): - number_conv = self._number_conv - stack = [] - push = stack.append - pop = stack.pop - stream = iter(stream) - - while 1: - for kind, data, pos in stream: - - if kind is START and data[1]: - # Attributes may still contain expressions in start tags at - # this point, so do some evaluation - tag, attrs = data - new_attrs = [] - for name, value in attrs: - if type(value) is list: # this is an interpolated string - values = [event[1] - for event in self._flatten(value, ctxt, **vars) - if event[0] is TEXT and event[1] is not None - ] - if not values: - continue - value = ''.join(values) - new_attrs.append((name, value)) - yield kind, (tag, Attrs(new_attrs)), pos - - elif kind is EXPR: - result = _eval_expr(data, ctxt, vars) - if result is not None: - # First check for a string, otherwise the iterable test - # below succeeds, and the string will be chopped up into - # individual characters - if isinstance(result, basestring): - yield TEXT, result, pos - elif isinstance(result, (int, float, long)): - yield TEXT, number_conv(result), pos - elif hasattr(result, '__iter__'): - push(stream) - stream = _ensure(result) - break - else: - yield TEXT, unicode(result), pos - - elif kind is SUB: - # This event is a list of directives and a list of nested - # events to which those directives should be applied - push(stream) - stream = _apply_directives(data[1], data[0], ctxt, vars) - break - - elif kind is EXEC: - _exec_suite(data, ctxt, vars) - - else: - yield kind, data, pos - - else: - if not stack: - break - stream = pop() - - def _include(self, stream, ctxt, **vars): - """Internal stream filter that performs inclusion of external - template files. - """ - from genshi.template.loader import TemplateNotFound - - for event in stream: - if event[0] is INCLUDE: - href, cls, fallback = event[1] - if not isinstance(href, basestring): - parts = [] - for subkind, subdata, subpos in self._flatten(href, ctxt, - **vars): - if subkind is TEXT: - parts.append(subdata) - href = ''.join([x for x in parts if x is not None]) - try: - tmpl = self.loader.load(href, relative_to=event[2][0], - cls=cls or self.__class__) - for event in tmpl.generate(ctxt, **vars): - yield event - except TemplateNotFound: - if fallback is None: - raise - for filter_ in self.filters: - fallback = filter_(iter(fallback), ctxt, **vars) - for event in fallback: - yield event - else: - yield event - - -EXEC = Template.EXEC -EXPR = Template.EXPR -INCLUDE = Template.INCLUDE -SUB = Template.SUB diff --git a/genshi/template/directives.py b/genshi/template/directives.py deleted file mode 100644 index e2c9424..0000000 --- a/genshi/template/directives.py +++ /dev/null @@ -1,725 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2006-2009 Edgewall Software -# All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://genshi.edgewall.org/wiki/License. -# -# This software consists of voluntary contributions made by many -# individuals. For the exact contribution history, see the revision -# history and logs, available at http://genshi.edgewall.org/log/. - -"""Implementation of the various template directives.""" - -from genshi.core import QName, Stream -from genshi.path import Path -from genshi.template.base import TemplateRuntimeError, TemplateSyntaxError, \ - EXPR, _apply_directives, _eval_expr -from genshi.template.eval import Expression, ExpressionASTTransformer, \ - _ast, _parse - -__all__ = ['AttrsDirective', 'ChooseDirective', 'ContentDirective', - 'DefDirective', 'ForDirective', 'IfDirective', 'MatchDirective', - 'OtherwiseDirective', 'ReplaceDirective', 'StripDirective', - 'WhenDirective', 'WithDirective'] -__docformat__ = 'restructuredtext en' - - -class DirectiveMeta(type): - """Meta class for template directives.""" - - def __new__(cls, name, bases, d): - d['tagname'] = name.lower().replace('directive', '') - return type.__new__(cls, name, bases, d) - - -class Directive(object): - """Abstract base class for template directives. - - A directive is basically a callable that takes three positional arguments: - ``ctxt`` is the template data context, ``stream`` is an iterable over the - events that the directive applies to, and ``directives`` is is a list of - other directives on the same stream that need to be applied. - - Directives can be "anonymous" or "registered". Registered directives can be - applied by the template author using an XML attribute with the - corresponding name in the template. Such directives should be subclasses of - this base class that can be instantiated with the value of the directive - attribute as parameter. - - Anonymous directives are simply functions conforming to the protocol - described above, and can only be applied programmatically (for example by - template filters). - """ - __metaclass__ = DirectiveMeta - __slots__ = ['expr'] - - def __init__(self, value, template=None, namespaces=None, lineno=-1, - offset=-1): - self.expr = self._parse_expr(value, template, lineno, offset) - - @classmethod - def attach(cls, template, stream, value, namespaces, pos): - """Called after the template stream has been completely parsed. - - :param template: the `Template` object - :param stream: the event stream associated with the directive - :param value: the argument value for the directive; if the directive was - specified as an element, this will be an `Attrs` instance - with all specified attributes, otherwise it will be a - `unicode` object with just the attribute value - :param namespaces: a mapping of namespace URIs to prefixes - :param pos: a ``(filename, lineno, offset)`` tuple describing the - location where the directive was found in the source - - This class method should return a ``(directive, stream)`` tuple. If - ``directive`` is not ``None``, it should be an instance of the `Directive` - class, and gets added to the list of directives applied to the substream - at runtime. `stream` is an event stream that replaces the original - stream associated with the directive. - """ - return cls(value, template, namespaces, *pos[1:]), stream - - def __call__(self, stream, directives, ctxt, **vars): - """Apply the directive to the given stream. - - :param stream: the event stream - :param directives: a list of the remaining directives that should - process the stream - :param ctxt: the context data - :param vars: additional variables that should be made available when - Python code is executed - """ - raise NotImplementedError - - def __repr__(self): - expr = '' - if getattr(self, 'expr', None) is not None: - expr = ' "%s"' % self.expr.source - return '<%s%s>' % (type(self).__name__, expr) - - @classmethod - def _parse_expr(cls, expr, template, lineno=-1, offset=-1): - """Parses the given expression, raising a useful error message when a - syntax error is encountered. - """ - try: - return expr and Expression(expr, template.filepath, lineno, - lookup=template.lookup) or None - except SyntaxError, err: - err.msg += ' in expression "%s" of "%s" directive' % (expr, - cls.tagname) - raise TemplateSyntaxError(err, template.filepath, lineno, - offset + (err.offset or 0)) - - -def _assignment(ast): - """Takes the AST representation of an assignment, and returns a - function that applies the assignment of a given value to a dictionary. - """ - def _names(node): - if isinstance(node, _ast.Tuple): - return tuple([_names(child) for child in node.elts]) - elif isinstance(node, _ast.Name): - return node.id - def _assign(data, value, names=_names(ast)): - if type(names) is tuple: - for idx in range(len(names)): - _assign(data, value[idx], names[idx]) - else: - data[names] = value - return _assign - - -class AttrsDirective(Directive): - """Implementation of the ``py:attrs`` template directive. - - The value of the ``py:attrs`` attribute should be a dictionary or a sequence - of ``(name, value)`` tuples. The items in that dictionary or sequence are - added as attributes to the element: - - >>> from genshi.template import MarkupTemplate - >>> tmpl = MarkupTemplate('''<ul xmlns:py="http://genshi.edgewall.org/"> - ... <li py:attrs="foo">Bar</li> - ... </ul>''') - >>> print(tmpl.generate(foo={'class': 'collapse'})) - <ul> - <li class="collapse">Bar</li> - </ul> - >>> print(tmpl.generate(foo=[('class', 'collapse')])) - <ul> - <li class="collapse">Bar</li> - </ul> - - If the value evaluates to ``None`` (or any other non-truth value), no - attributes are added: - - >>> print(tmpl.generate(foo=None)) - <ul> - <li>Bar</li> - </ul> - """ - __slots__ = [] - - def __call__(self, stream, directives, ctxt, **vars): - def _generate(): - kind, (tag, attrib), pos = stream.next() - attrs = _eval_expr(self.expr, ctxt, vars) - if attrs: - if isinstance(attrs, Stream): - try: - attrs = iter(attrs).next() - except StopIteration: - attrs = [] - elif not isinstance(attrs, list): # assume it's a dict - attrs = attrs.items() - attrib -= [name for name, val in attrs if val is None] - attrib |= [(QName(name), unicode(val).strip()) for name, val - in attrs if val is not None] - yield kind, (tag, attrib), pos - for event in stream: - yield event - - return _apply_directives(_generate(), directives, ctxt, vars) - - -class ContentDirective(Directive): - """Implementation of the ``py:content`` template directive. - - This directive replaces the content of the element with the result of - evaluating the value of the ``py:content`` attribute: - - >>> from genshi.template import MarkupTemplate - >>> tmpl = MarkupTemplate('''<ul xmlns:py="http://genshi.edgewall.org/"> - ... <li py:content="bar">Hello</li> - ... </ul>''') - >>> print(tmpl.generate(bar='Bye')) - <ul> - <li>Bye</li> - </ul> - """ - __slots__ = [] - - @classmethod - def attach(cls, template, stream, value, namespaces, pos): - if type(value) is dict: - raise TemplateSyntaxError('The content directive can not be used ' - 'as an element', template.filepath, - *pos[1:]) - expr = cls._parse_expr(value, template, *pos[1:]) - return None, [stream[0], (EXPR, expr, pos), stream[-1]] - - -class DefDirective(Directive): - """Implementation of the ``py:def`` template directive. - - This directive can be used to create "Named Template Functions", which - are template snippets that are not actually output during normal - processing, but rather can be expanded from expressions in other places - in the template. - - A named template function can be used just like a normal Python function - from template expressions: - - >>> from genshi.template import MarkupTemplate - >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/"> - ... <p py:def="echo(greeting, name='world')" class="message"> - ... ${greeting}, ${name}! - ... </p> - ... ${echo('Hi', name='you')} - ... </div>''') - >>> print(tmpl.generate(bar='Bye')) - <div> - <p class="message"> - Hi, you! - </p> - </div> - - If a function does not require parameters, the parenthesis can be omitted - in the definition: - - >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/"> - ... <p py:def="helloworld" class="message"> - ... Hello, world! - ... </p> - ... ${helloworld()} - ... </div>''') - >>> print(tmpl.generate(bar='Bye')) - <div> - <p class="message"> - Hello, world! - </p> - </div> - """ - __slots__ = ['name', 'args', 'star_args', 'dstar_args', 'defaults'] - - def __init__(self, args, template, namespaces=None, lineno=-1, offset=-1): - Directive.__init__(self, None, template, namespaces, lineno, offset) - ast = _parse(args).body - self.args = [] - self.star_args = None - self.dstar_args = None - self.defaults = {} - if isinstance(ast, _ast.Call): - self.name = ast.func.id - for arg in ast.args: - # only names - self.args.append(arg.id) - for kwd in ast.keywords: - self.args.append(kwd.arg) - exp = Expression(kwd.value, template.filepath, - lineno, lookup=template.lookup) - self.defaults[kwd.arg] = exp - if getattr(ast, 'starargs', None): - self.star_args = ast.starargs.id - if getattr(ast, 'kwargs', None): - self.dstar_args = ast.kwargs.id - else: - self.name = ast.id - - @classmethod - def attach(cls, template, stream, value, namespaces, pos): - if type(value) is dict: - value = value.get('function') - return super(DefDirective, cls).attach(template, stream, value, - namespaces, pos) - - def __call__(self, stream, directives, ctxt, **vars): - stream = list(stream) - - def function(*args, **kwargs): - scope = {} - args = list(args) # make mutable - for name in self.args: - if args: - scope[name] = args.pop(0) - else: - if name in kwargs: - val = kwargs.pop(name) - else: - val = _eval_expr(self.defaults.get(name), ctxt, vars) - scope[name] = val - if not self.star_args is None: - scope[self.star_args] = args - if not self.dstar_args is None: - scope[self.dstar_args] = kwargs - ctxt.push(scope) - for event in _apply_directives(stream, directives, ctxt, vars): - yield event - ctxt.pop() - function.__name__ = self.name - - # Store the function reference in the bottom context frame so that it - # doesn't get popped off before processing the template has finished - # FIXME: this makes context data mutable as a side-effect - ctxt.frames[-1][self.name] = function - - return [] - - def __repr__(self): - return '<%s "%s">' % (type(self).__name__, self.name) - - -class ForDirective(Directive): - """Implementation of the ``py:for`` template directive for repeating an - element based on an iterable in the context data. - - >>> from genshi.template import MarkupTemplate - >>> tmpl = MarkupTemplate('''<ul xmlns:py="http://genshi.edgewall.org/"> - ... <li py:for="item in items">${item}</li> - ... </ul>''') - >>> print(tmpl.generate(items=[1, 2, 3])) - <ul> - <li>1</li><li>2</li><li>3</li> - </ul> - """ - __slots__ = ['assign', 'filename'] - - def __init__(self, value, template, namespaces=None, lineno=-1, offset=-1): - if ' in ' not in value: - raise TemplateSyntaxError('"in" keyword missing in "for" directive', - template.filepath, lineno, offset) - assign, value = value.split(' in ', 1) - ast = _parse(assign, 'exec') - value = 'iter(%s)' % value.strip() - self.assign = _assignment(ast.body[0].value) - self.filename = template.filepath - Directive.__init__(self, value, template, namespaces, lineno, offset) - - @classmethod - def attach(cls, template, stream, value, namespaces, pos): - if type(value) is dict: - value = value.get('each') - return super(ForDirective, cls).attach(template, stream, value, - namespaces, pos) - - def __call__(self, stream, directives, ctxt, **vars): - iterable = _eval_expr(self.expr, ctxt, vars) - if iterable is None: - return - - assign = self.assign - scope = {} - stream = list(stream) - for item in iterable: - assign(scope, item) - ctxt.push(scope) - for event in _apply_directives(stream, directives, ctxt, vars): - yield event - ctxt.pop() - - def __repr__(self): - return '<%s>' % type(self).__name__ - - -class IfDirective(Directive): - """Implementation of the ``py:if`` template directive for conditionally - excluding elements from being output. - - >>> from genshi.template import MarkupTemplate - >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/"> - ... <b py:if="foo">${bar}</b> - ... </div>''') - >>> print(tmpl.generate(foo=True, bar='Hello')) - <div> - <b>Hello</b> - </div> - """ - __slots__ = [] - - @classmethod - def attach(cls, template, stream, value, namespaces, pos): - if type(value) is dict: - value = value.get('test') - return super(IfDirective, cls).attach(template, stream, value, - namespaces, pos) - - def __call__(self, stream, directives, ctxt, **vars): - value = _eval_expr(self.expr, ctxt, vars) - if value: - return _apply_directives(stream, directives, ctxt, vars) - return [] - - -class MatchDirective(Directive): - """Implementation of the ``py:match`` template directive. - - >>> from genshi.template import MarkupTemplate - >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/"> - ... <span py:match="greeting"> - ... Hello ${select('@name')} - ... </span> - ... <greeting name="Dude" /> - ... </div>''') - >>> print(tmpl.generate()) - <div> - <span> - Hello Dude - </span> - </div> - """ - __slots__ = ['path', 'namespaces', 'hints'] - - def __init__(self, value, template, hints=None, namespaces=None, - lineno=-1, offset=-1): - Directive.__init__(self, None, template, namespaces, lineno, offset) - self.path = Path(value, template.filepath, lineno) - self.namespaces = namespaces or {} - self.hints = hints or () - - @classmethod - def attach(cls, template, stream, value, namespaces, pos): - hints = [] - if type(value) is dict: - if value.get('buffer', '').lower() == 'false': - hints.append('not_buffered') - if value.get('once', '').lower() == 'true': - hints.append('match_once') - if value.get('recursive', '').lower() == 'false': - hints.append('not_recursive') - value = value.get('path') - return cls(value, template, frozenset(hints), namespaces, *pos[1:]), \ - stream - - def __call__(self, stream, directives, ctxt, **vars): - ctxt._match_templates.append((self.path.test(ignore_context=True), - self.path, list(stream), self.hints, - self.namespaces, directives)) - return [] - - def __repr__(self): - return '<%s "%s">' % (type(self).__name__, self.path.source) - - -class ReplaceDirective(Directive): - """Implementation of the ``py:replace`` template directive. - - This directive replaces the element with the result of evaluating the - value of the ``py:replace`` attribute: - - >>> from genshi.template import MarkupTemplate - >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/"> - ... <span py:replace="bar">Hello</span> - ... </div>''') - >>> print(tmpl.generate(bar='Bye')) - <div> - Bye - </div> - - This directive is equivalent to ``py:content`` combined with ``py:strip``, - providing a less verbose way to achieve the same effect: - - >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/"> - ... <span py:content="bar" py:strip="">Hello</span> - ... </div>''') - >>> print(tmpl.generate(bar='Bye')) - <div> - Bye - </div> - """ - __slots__ = [] - - @classmethod - def attach(cls, template, stream, value, namespaces, pos): - if type(value) is dict: - value = value.get('value') - if not value: - raise TemplateSyntaxError('missing value for "replace" directive', - template.filepath, *pos[1:]) - expr = cls._parse_expr(value, template, *pos[1:]) - return None, [(EXPR, expr, pos)] - - -class StripDirective(Directive): - """Implementation of the ``py:strip`` template directive. - - When the value of the ``py:strip`` attribute evaluates to ``True``, the - element is stripped from the output - - >>> from genshi.template import MarkupTemplate - >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/"> - ... <div py:strip="True"><b>foo</b></div> - ... </div>''') - >>> print(tmpl.generate()) - <div> - <b>foo</b> - </div> - - Leaving the attribute value empty is equivalent to a truth value. - - This directive is particulary interesting for named template functions or - match templates that do not generate a top-level element: - - >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/"> - ... <div py:def="echo(what)" py:strip=""> - ... <b>${what}</b> - ... </div> - ... ${echo('foo')} - ... </div>''') - >>> print(tmpl.generate()) - <div> - <b>foo</b> - </div> - """ - __slots__ = [] - - def __call__(self, stream, directives, ctxt, **vars): - def _generate(): - if not self.expr or _eval_expr(self.expr, ctxt, vars): - stream.next() # skip start tag - previous = stream.next() - for event in stream: - yield previous - previous = event - else: - for event in stream: - yield event - return _apply_directives(_generate(), directives, ctxt, vars) - - -class ChooseDirective(Directive): - """Implementation of the ``py:choose`` directive for conditionally selecting - one of several body elements to display. - - If the ``py:choose`` expression is empty the expressions of nested - ``py:when`` directives are tested for truth. The first true ``py:when`` - body is output. If no ``py:when`` directive is matched then the fallback - directive ``py:otherwise`` will be used. - - >>> from genshi.template import MarkupTemplate - >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/" - ... py:choose=""> - ... <span py:when="0 == 1">0</span> - ... <span py:when="1 == 1">1</span> - ... <span py:otherwise="">2</span> - ... </div>''') - >>> print(tmpl.generate()) - <div> - <span>1</span> - </div> - - If the ``py:choose`` directive contains an expression, the nested - ``py:when`` directives are tested for equality to the ``py:choose`` - expression: - - >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/" - ... py:choose="2"> - ... <span py:when="1">1</span> - ... <span py:when="2">2</span> - ... </div>''') - >>> print(tmpl.generate()) - <div> - <span>2</span> - </div> - - Behavior is undefined if a ``py:choose`` block contains content outside a - ``py:when`` or ``py:otherwise`` block. Behavior is also undefined if a - ``py:otherwise`` occurs before ``py:when`` blocks. - """ - __slots__ = ['matched', 'value'] - - @classmethod - def attach(cls, template, stream, value, namespaces, pos): - if type(value) is dict: - value = value.get('test') - return super(ChooseDirective, cls).attach(template, stream, value, - namespaces, pos) - - def __call__(self, stream, directives, ctxt, **vars): - info = [False, bool(self.expr), None] - if self.expr: - info[2] = _eval_expr(self.expr, ctxt, vars) - ctxt._choice_stack.append(info) - for event in _apply_directives(stream, directives, ctxt, vars): - yield event - ctxt._choice_stack.pop() - - -class WhenDirective(Directive): - """Implementation of the ``py:when`` directive for nesting in a parent with - the ``py:choose`` directive. - - See the documentation of the `ChooseDirective` for usage. - """ - __slots__ = ['filename'] - - def __init__(self, value, template, namespaces=None, lineno=-1, offset=-1): - Directive.__init__(self, value, template, namespaces, lineno, offset) - self.filename = template.filepath - - @classmethod - def attach(cls, template, stream, value, namespaces, pos): - if type(value) is dict: - value = value.get('test') - return super(WhenDirective, cls).attach(template, stream, value, - namespaces, pos) - - def __call__(self, stream, directives, ctxt, **vars): - info = ctxt._choice_stack and ctxt._choice_stack[-1] - if not info: - raise TemplateRuntimeError('"when" directives can only be used ' - 'inside a "choose" directive', - self.filename, *stream.next()[2][1:]) - if info[0]: - return [] - if not self.expr and not info[1]: - raise TemplateRuntimeError('either "choose" or "when" directive ' - 'must have a test expression', - self.filename, *stream.next()[2][1:]) - if info[1]: - value = info[2] - if self.expr: - matched = value == _eval_expr(self.expr, ctxt, vars) - else: - matched = bool(value) - else: - matched = bool(_eval_expr(self.expr, ctxt, vars)) - info[0] = matched - if not matched: - return [] - - return _apply_directives(stream, directives, ctxt, vars) - - -class OtherwiseDirective(Directive): - """Implementation of the ``py:otherwise`` directive for nesting in a parent - with the ``py:choose`` directive. - - See the documentation of `ChooseDirective` for usage. - """ - __slots__ = ['filename'] - - def __init__(self, value, template, namespaces=None, lineno=-1, offset=-1): - Directive.__init__(self, None, template, namespaces, lineno, offset) - self.filename = template.filepath - - def __call__(self, stream, directives, ctxt, **vars): - info = ctxt._choice_stack and ctxt._choice_stack[-1] - if not info: - raise TemplateRuntimeError('an "otherwise" directive can only be ' - 'used inside a "choose" directive', - self.filename, *stream.next()[2][1:]) - if info[0]: - return [] - info[0] = True - - return _apply_directives(stream, directives, ctxt, vars) - - -class WithDirective(Directive): - """Implementation of the ``py:with`` template directive, which allows - shorthand access to variables and expressions. - - >>> from genshi.template import MarkupTemplate - >>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/"> - ... <span py:with="y=7; z=x+10">$x $y $z</span> - ... </div>''') - >>> print(tmpl.generate(x=42)) - <div> - <span>42 7 52</span> - </div> - """ - __slots__ = ['vars'] - - def __init__(self, value, template, namespaces=None, lineno=-1, offset=-1): - Directive.__init__(self, None, template, namespaces, lineno, offset) - self.vars = [] - value = value.strip() - try: - ast = _parse(value, 'exec') - for node in ast.body: - if not isinstance(node, _ast.Assign): - raise TemplateSyntaxError('only assignment allowed in ' - 'value of the "with" directive', - template.filepath, lineno, offset) - self.vars.append(([_assignment(n) for n in node.targets], - Expression(node.value, template.filepath, - lineno, lookup=template.lookup))) - except SyntaxError, err: - err.msg += ' in expression "%s" of "%s" directive' % (value, - self.tagname) - raise TemplateSyntaxError(err, template.filepath, lineno, - offset + (err.offset or 0)) - - @classmethod - def attach(cls, template, stream, value, namespaces, pos): - if type(value) is dict: - value = value.get('vars') - return super(WithDirective, cls).attach(template, stream, value, - namespaces, pos) - - def __call__(self, stream, directives, ctxt, **vars): - frame = {} - ctxt.push(frame) - for targets, expr in self.vars: - value = _eval_expr(expr, ctxt, vars) - for assign in targets: - assign(frame, value) - for event in _apply_directives(stream, directives, ctxt, vars): - yield event - ctxt.pop() - - def __repr__(self): - return '<%s>' % (type(self).__name__) diff --git a/genshi/template/eval.py b/genshi/template/eval.py deleted file mode 100644 index 8593aaa..0000000 --- a/genshi/template/eval.py +++ /dev/null @@ -1,629 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2006-2010 Edgewall Software -# All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://genshi.edgewall.org/wiki/License. -# -# This software consists of voluntary contributions made by many -# individuals. For the exact contribution history, see the revision -# history and logs, available at http://genshi.edgewall.org/log/. - -"""Support for "safe" evaluation of Python expressions.""" - -import __builtin__ - -from textwrap import dedent -from types import CodeType - -from genshi.core import Markup -from genshi.template.astutil import ASTTransformer, ASTCodeGenerator, \ - _ast, parse -from genshi.template.base import TemplateRuntimeError -from genshi.util import flatten - -__all__ = ['Code', 'Expression', 'Suite', 'LenientLookup', 'StrictLookup', - 'Undefined', 'UndefinedError'] -__docformat__ = 'restructuredtext en' - - -# Check for a Python 2.4 bug in the eval loop -has_star_import_bug = False -try: - class _FakeMapping(object): - __getitem__ = __setitem__ = lambda *a: None - exec 'from sys import *' in {}, _FakeMapping() -except SystemError: - has_star_import_bug = True -del _FakeMapping - - -def _star_import_patch(mapping, modname): - """This function is used as helper if a Python version with a broken - star-import opcode is in use. - """ - module = __import__(modname, None, None, ['__all__']) - if hasattr(module, '__all__'): - members = module.__all__ - else: - members = [x for x in module.__dict__ if not x.startswith('_')] - mapping.update([(name, getattr(module, name)) for name in members]) - - -class Code(object): - """Abstract base class for the `Expression` and `Suite` classes.""" - __slots__ = ['source', 'code', 'ast', '_globals'] - - def __init__(self, source, filename=None, lineno=-1, lookup='strict', - xform=None): - """Create the code object, either from a string, or from an AST node. - - :param source: either a string containing the source code, or an AST - node - :param filename: the (preferably absolute) name of the file containing - the code - :param lineno: the number of the line on which the code was found - :param lookup: the lookup class that defines how variables are looked - up in the context; can be either "strict" (the default), - "lenient", or a custom lookup class - :param xform: the AST transformer that should be applied to the code; - if `None`, the appropriate transformation is chosen - depending on the mode - """ - if isinstance(source, basestring): - self.source = source - node = _parse(source, mode=self.mode) - else: - assert isinstance(source, _ast.AST), \ - 'Expected string or AST node, but got %r' % source - self.source = '?' - if self.mode == 'eval': - node = _ast.Expression() - node.body = source - else: - node = _ast.Module() - node.body = [source] - - self.ast = node - self.code = _compile(node, self.source, mode=self.mode, - filename=filename, lineno=lineno, xform=xform) - if lookup is None: - lookup = LenientLookup - elif isinstance(lookup, basestring): - lookup = {'lenient': LenientLookup, 'strict': StrictLookup}[lookup] - self._globals = lookup.globals - - def __getstate__(self): - state = {'source': self.source, 'ast': self.ast, - 'lookup': self._globals.im_self} - c = self.code - state['code'] = (c.co_nlocals, c.co_stacksize, c.co_flags, c.co_code, - c.co_consts, c.co_names, c.co_varnames, c.co_filename, - c.co_name, c.co_firstlineno, c.co_lnotab, (), ()) - return state - - def __setstate__(self, state): - self.source = state['source'] - self.ast = state['ast'] - self.code = CodeType(0, *state['code']) - self._globals = state['lookup'].globals - - def __eq__(self, other): - return (type(other) == type(self)) and (self.code == other.code) - - def __hash__(self): - return hash(self.code) - - def __ne__(self, other): - return not self == other - - def __repr__(self): - return '%s(%r)' % (type(self).__name__, self.source) - - -class Expression(Code): - """Evaluates Python expressions used in templates. - - >>> data = dict(test='Foo', items=[1, 2, 3], dict={'some': 'thing'}) - >>> Expression('test').evaluate(data) - 'Foo' - - >>> Expression('items[0]').evaluate(data) - 1 - >>> Expression('items[-1]').evaluate(data) - 3 - >>> Expression('dict["some"]').evaluate(data) - 'thing' - - Similar to e.g. Javascript, expressions in templates can use the dot - notation for attribute access to access items in mappings: - - >>> Expression('dict.some').evaluate(data) - 'thing' - - This also works the other way around: item access can be used to access - any object attribute: - - >>> class MyClass(object): - ... myattr = 'Bar' - >>> data = dict(mine=MyClass(), key='myattr') - >>> Expression('mine.myattr').evaluate(data) - 'Bar' - >>> Expression('mine["myattr"]').evaluate(data) - 'Bar' - >>> Expression('mine[key]').evaluate(data) - 'Bar' - - All of the standard Python operators are available to template expressions. - Built-in functions such as ``len()`` are also available in template - expressions: - - >>> data = dict(items=[1, 2, 3]) - >>> Expression('len(items)').evaluate(data) - 3 - """ - __slots__ = [] - mode = 'eval' - - def evaluate(self, data): - """Evaluate the expression against the given data dictionary. - - :param data: a mapping containing the data to evaluate against - :return: the result of the evaluation - """ - __traceback_hide__ = 'before_and_this' - _globals = self._globals(data) - return eval(self.code, _globals, {'__data__': data}) - - -class Suite(Code): - """Executes Python statements used in templates. - - >>> data = dict(test='Foo', items=[1, 2, 3], dict={'some': 'thing'}) - >>> Suite("foo = dict['some']").execute(data) - >>> data['foo'] - 'thing' - """ - __slots__ = [] - mode = 'exec' - - def execute(self, data): - """Execute the suite in the given data dictionary. - - :param data: a mapping containing the data to execute in - """ - __traceback_hide__ = 'before_and_this' - _globals = self._globals(data) - exec self.code in _globals, data - - -UNDEFINED = object() - - -class UndefinedError(TemplateRuntimeError): - """Exception thrown when a template expression attempts to access a variable - not defined in the context. - - :see: `LenientLookup`, `StrictLookup` - """ - def __init__(self, name, owner=UNDEFINED): - if owner is not UNDEFINED: - message = '%s has no member named "%s"' % (repr(owner), name) - else: - message = '"%s" not defined' % name - TemplateRuntimeError.__init__(self, message) - - -class Undefined(object): - """Represents a reference to an undefined variable. - - Unlike the Python runtime, template expressions can refer to an undefined - variable without causing a `NameError` to be raised. The result will be an - instance of the `Undefined` class, which is treated the same as ``False`` in - conditions, but raise an exception on any other operation: - - >>> foo = Undefined('foo') - >>> bool(foo) - False - >>> list(foo) - [] - >>> print(foo) - undefined - - However, calling an undefined variable, or trying to access an attribute - of that variable, will raise an exception that includes the name used to - reference that undefined variable. - - >>> foo('bar') - Traceback (most recent call last): - ... - UndefinedError: "foo" not defined - - >>> foo.bar - Traceback (most recent call last): - ... - UndefinedError: "foo" not defined - - :see: `LenientLookup` - """ - __slots__ = ['_name', '_owner'] - - def __init__(self, name, owner=UNDEFINED): - """Initialize the object. - - :param name: the name of the reference - :param owner: the owning object, if the variable is accessed as a member - """ - self._name = name - self._owner = owner - - def __iter__(self): - return iter([]) - - def __nonzero__(self): - return False - - def __repr__(self): - return '<%s %r>' % (type(self).__name__, self._name) - - def __str__(self): - return 'undefined' - - def _die(self, *args, **kwargs): - """Raise an `UndefinedError`.""" - __traceback_hide__ = True - raise UndefinedError(self._name, self._owner) - __call__ = __getattr__ = __getitem__ = _die - - # Hack around some behavior introduced in Python 2.6.2 - # http://genshi.edgewall.org/ticket/324 - __length_hint__ = None - - -class LookupBase(object): - """Abstract base class for variable lookup implementations.""" - - @classmethod - def globals(cls, data): - """Construct the globals dictionary to use as the execution context for - the expression or suite. - """ - return { - '__data__': data, - '_lookup_name': cls.lookup_name, - '_lookup_attr': cls.lookup_attr, - '_lookup_item': cls.lookup_item, - '_star_import_patch': _star_import_patch, - 'UndefinedError': UndefinedError, - } - - @classmethod - def lookup_name(cls, data, name): - __traceback_hide__ = True - val = data.get(name, UNDEFINED) - if val is UNDEFINED: - val = BUILTINS.get(name, val) - if val is UNDEFINED: - val = cls.undefined(name) - return val - - @classmethod - def lookup_attr(cls, obj, key): - __traceback_hide__ = True - try: - val = getattr(obj, key) - except AttributeError: - if hasattr(obj.__class__, key): - raise - else: - try: - val = obj[key] - except (KeyError, TypeError): - val = cls.undefined(key, owner=obj) - return val - - @classmethod - def lookup_item(cls, obj, key): - __traceback_hide__ = True - if len(key) == 1: - key = key[0] - try: - return obj[key] - except (AttributeError, KeyError, IndexError, TypeError), e: - if isinstance(key, basestring): - val = getattr(obj, key, UNDEFINED) - if val is UNDEFINED: - val = cls.undefined(key, owner=obj) - return val - raise - - @classmethod - def undefined(cls, key, owner=UNDEFINED): - """Can be overridden by subclasses to specify behavior when undefined - variables are accessed. - - :param key: the name of the variable - :param owner: the owning object, if the variable is accessed as a member - """ - raise NotImplementedError - - -class LenientLookup(LookupBase): - """Default variable lookup mechanism for expressions. - - When an undefined variable is referenced using this lookup style, the - reference evaluates to an instance of the `Undefined` class: - - >>> expr = Expression('nothing', lookup='lenient') - >>> undef = expr.evaluate({}) - >>> undef - <Undefined 'nothing'> - - The same will happen when a non-existing attribute or item is accessed on - an existing object: - - >>> expr = Expression('something.nil', lookup='lenient') - >>> expr.evaluate({'something': dict()}) - <Undefined 'nil'> - - See the documentation of the `Undefined` class for details on the behavior - of such objects. - - :see: `StrictLookup` - """ - - @classmethod - def undefined(cls, key, owner=UNDEFINED): - """Return an ``Undefined`` object.""" - __traceback_hide__ = True - return Undefined(key, owner=owner) - - -class StrictLookup(LookupBase): - """Strict variable lookup mechanism for expressions. - - Referencing an undefined variable using this lookup style will immediately - raise an ``UndefinedError``: - - >>> expr = Expression('nothing', lookup='strict') - >>> expr.evaluate({}) - Traceback (most recent call last): - ... - UndefinedError: "nothing" not defined - - The same happens when a non-existing attribute or item is accessed on an - existing object: - - >>> expr = Expression('something.nil', lookup='strict') - >>> expr.evaluate({'something': dict()}) - Traceback (most recent call last): - ... - UndefinedError: {} has no member named "nil" - """ - - @classmethod - def undefined(cls, key, owner=UNDEFINED): - """Raise an ``UndefinedError`` immediately.""" - __traceback_hide__ = True - raise UndefinedError(key, owner=owner) - - -def _parse(source, mode='eval'): - source = source.strip() - if mode == 'exec': - lines = [line.expandtabs() for line in source.splitlines()] - if lines: - first = lines[0] - rest = dedent('\n'.join(lines[1:])).rstrip() - if first.rstrip().endswith(':') and not rest[0].isspace(): - rest = '\n'.join([' %s' % line for line in rest.splitlines()]) - source = '\n'.join([first, rest]) - if isinstance(source, unicode): - source = '\xef\xbb\xbf' + source.encode('utf-8') - return parse(source, mode) - - -def _compile(node, source=None, mode='eval', filename=None, lineno=-1, - xform=None): - if isinstance(filename, unicode): - # unicode file names not allowed for code objects - filename = filename.encode('utf-8', 'replace') - elif not filename: - filename = '<string>' - if lineno <= 0: - lineno = 1 - - if xform is None: - xform = { - 'eval': ExpressionASTTransformer - }.get(mode, TemplateASTTransformer) - tree = xform().visit(node) - - if mode == 'eval': - name = '<Expression %r>' % (source or '?') - else: - lines = source.splitlines() - if not lines: - extract = '' - else: - extract = lines[0] - if len(lines) > 1: - extract += ' ...' - name = '<Suite %r>' % (extract) - new_source = ASTCodeGenerator(tree).code - code = compile(new_source, filename, mode) - - try: - # We'd like to just set co_firstlineno, but it's readonly. So we need - # to clone the code object while adjusting the line number - return CodeType(0, code.co_nlocals, code.co_stacksize, - code.co_flags | 0x0040, code.co_code, code.co_consts, - code.co_names, code.co_varnames, filename, name, - lineno, code.co_lnotab, (), ()) - except RuntimeError: - return code - - -def _new(class_, *args, **kwargs): - ret = class_() - for attr, value in zip(ret._fields, args): - if attr in kwargs: - raise ValueError('Field set both in args and kwargs') - setattr(ret, attr, value) - for attr, value in kwargs: - setattr(ret, attr, value) - return ret - - -BUILTINS = __builtin__.__dict__.copy() -BUILTINS.update({'Markup': Markup, 'Undefined': Undefined}) -CONSTANTS = frozenset(['False', 'True', 'None', 'NotImplemented', 'Ellipsis']) - - -class TemplateASTTransformer(ASTTransformer): - """Concrete AST transformer that implements the AST transformations needed - for code embedded in templates. - """ - - def __init__(self): - self.locals = [CONSTANTS] - - def _extract_names(self, node): - names = set() - def _process(node): - if isinstance(node, _ast.Name): - names.add(node.id) - elif isinstance(node, _ast.alias): - names.add(node.asname or node.name) - elif isinstance(node, _ast.Tuple): - for elt in node.elts: - _process(elt) - if hasattr(node, 'args'): - for arg in node.args: - _process(arg) - if hasattr(node, 'vararg'): - names.add(node.vararg) - if hasattr(node, 'kwarg'): - names.add(node.kwarg) - elif hasattr(node, 'names'): - for elt in node.names: - _process(elt) - return names - - def visit_Str(self, node): - if isinstance(node.s, str): - try: # If the string is ASCII, return a `str` object - node.s.decode('ascii') - except ValueError: # Otherwise return a `unicode` object - return _new(_ast.Str, node.s.decode('utf-8')) - return node - - def visit_ClassDef(self, node): - if len(self.locals) > 1: - self.locals[-1].add(node.name) - self.locals.append(set()) - try: - return ASTTransformer.visit_ClassDef(self, node) - finally: - self.locals.pop() - - def visit_Import(self, node): - if len(self.locals) > 1: - self.locals[-1].update(self._extract_names(node)) - return ASTTransformer.visit_Import(self, node) - - def visit_ImportFrom(self, node): - if [a.name for a in node.names] == ['*']: - if has_star_import_bug: - # This is a Python 2.4 bug. Only if we have a broken Python - # version do we need to apply this hack - node = _new(_ast.Expr, _new(_ast.Call, - _new(_ast.Name, '_star_import_patch'), [ - _new(_ast.Name, '__data__'), - _new(_ast.Str, node.module) - ], (), ())) - return node - if len(self.locals) > 1: - self.locals[-1].update(self._extract_names(node)) - return ASTTransformer.visit_ImportFrom(self, node) - - def visit_FunctionDef(self, node): - if len(self.locals) > 1: - self.locals[-1].add(node.name) - - self.locals.append(self._extract_names(node.args)) - try: - return ASTTransformer.visit_FunctionDef(self, node) - finally: - self.locals.pop() - - # GeneratorExp(expr elt, comprehension* generators) - def visit_GeneratorExp(self, node): - gens = [] - for generator in node.generators: - # comprehension = (expr target, expr iter, expr* ifs) - self.locals.append(set()) - gen = _new(_ast.comprehension, self.visit(generator.target), - self.visit(generator.iter), - [self.visit(if_) for if_ in generator.ifs]) - gens.append(gen) - - # use node.__class__ to make it reusable as ListComp - ret = _new(node.__class__, self.visit(node.elt), gens) - #delete inserted locals - del self.locals[-len(node.generators):] - return ret - - # ListComp(expr elt, comprehension* generators) - visit_ListComp = visit_GeneratorExp - - def visit_Lambda(self, node): - self.locals.append(self._extract_names(node.args)) - try: - return ASTTransformer.visit_Lambda(self, node) - finally: - self.locals.pop() - - def visit_Name(self, node): - # If the name refers to a local inside a lambda, list comprehension, or - # generator expression, leave it alone - if isinstance(node.ctx, _ast.Load) and \ - node.id not in flatten(self.locals): - # Otherwise, translate the name ref into a context lookup - name = _new(_ast.Name, '_lookup_name', _ast.Load()) - namearg = _new(_ast.Name, '__data__', _ast.Load()) - strarg = _new(_ast.Str, node.id) - node = _new(_ast.Call, name, [namearg, strarg], []) - elif isinstance(node.ctx, _ast.Store): - if len(self.locals) > 1: - self.locals[-1].add(node.id) - - return node - - -class ExpressionASTTransformer(TemplateASTTransformer): - """Concrete AST transformer that implements the AST transformations needed - for code embedded in templates. - """ - - def visit_Attribute(self, node): - if not isinstance(node.ctx, _ast.Load): - return ASTTransformer.visit_Attribute(self, node) - - func = _new(_ast.Name, '_lookup_attr', _ast.Load()) - args = [self.visit(node.value), _new(_ast.Str, node.attr)] - return _new(_ast.Call, func, args, []) - - def visit_Subscript(self, node): - if not isinstance(node.ctx, _ast.Load) or \ - not isinstance(node.slice, _ast.Index): - return ASTTransformer.visit_Subscript(self, node) - - func = _new(_ast.Name, '_lookup_item', _ast.Load()) - args = [ - self.visit(node.value), - _new(_ast.Tuple, (self.visit(node.slice.value),), _ast.Load()) - ] - return _new(_ast.Call, func, args, []) diff --git a/genshi/template/interpolation.py b/genshi/template/interpolation.py deleted file mode 100644 index 1e1a385..0000000 --- a/genshi/template/interpolation.py +++ /dev/null @@ -1,153 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2007-2009 Edgewall Software -# All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://genshi.edgewall.org/wiki/License. -# -# This software consists of voluntary contributions made by many -# individuals. For the exact contribution history, see the revision -# history and logs, available at http://genshi.edgewall.org/log/. - -"""String interpolation routines, i.e. the splitting up a given text into some -parts that are literal strings, and others that are Python expressions. -""" - -from itertools import chain -import os -import re -from tokenize import PseudoToken - -from genshi.core import TEXT -from genshi.template.base import TemplateSyntaxError, EXPR -from genshi.template.eval import Expression - -__all__ = ['interpolate'] -__docformat__ = 'restructuredtext en' - -NAMESTART = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_' -NAMECHARS = NAMESTART + '.0123456789' -PREFIX = '$' - -token_re = re.compile('%s|%s(?s)' % ( - r'[uU]?[rR]?("""|\'\'\')((?<!\\)\\\1|.)*?\1', - PseudoToken -)) - - -def interpolate(text, filepath=None, lineno=-1, offset=0, lookup='strict'): - """Parse the given string and extract expressions. - - This function is a generator that yields `TEXT` events for literal strings, - and `EXPR` events for expressions, depending on the results of parsing the - string. - - >>> for kind, data, pos in interpolate("hey ${foo}bar"): - ... print('%s %r' % (kind, data)) - TEXT 'hey ' - EXPR Expression('foo') - TEXT 'bar' - - :param text: the text to parse - :param filepath: absolute path to the file in which the text was found - (optional) - :param lineno: the line number at which the text was found (optional) - :param offset: the column number at which the text starts in the source - (optional) - :param lookup: the variable lookup mechanism; either "lenient" (the - default), "strict", or a custom lookup class - :return: a list of `TEXT` and `EXPR` events - :raise TemplateSyntaxError: when a syntax error in an expression is - encountered - """ - pos = [filepath, lineno, offset] - - textbuf = [] - textpos = None - for is_expr, chunk in chain(lex(text, pos, filepath), [(True, '')]): - if is_expr: - if textbuf: - yield TEXT, ''.join(textbuf), textpos - del textbuf[:] - textpos = None - if chunk: - try: - expr = Expression(chunk.strip(), pos[0], pos[1], - lookup=lookup) - yield EXPR, expr, tuple(pos) - except SyntaxError, err: - raise TemplateSyntaxError(err, filepath, pos[1], - pos[2] + (err.offset or 0)) - else: - textbuf.append(chunk) - if textpos is None: - textpos = tuple(pos) - - if '\n' in chunk: - lines = chunk.splitlines() - pos[1] += len(lines) - 1 - pos[2] += len(lines[-1]) - else: - pos[2] += len(chunk) - - -def lex(text, textpos, filepath): - offset = pos = 0 - end = len(text) - escaped = False - - while 1: - if escaped: - offset = text.find(PREFIX, offset + 2) - escaped = False - else: - offset = text.find(PREFIX, pos) - if offset < 0 or offset == end - 1: - break - next = text[offset + 1] - - if next == '{': - if offset > pos: - yield False, text[pos:offset] - pos = offset + 2 - level = 1 - while level: - match = token_re.match(text, pos) - if match is None: - raise TemplateSyntaxError('invalid syntax', filepath, - *textpos[1:]) - pos = match.end() - tstart, tend = match.regs[3] - token = text[tstart:tend] - if token == '{': - level += 1 - elif token == '}': - level -= 1 - yield True, text[offset + 2:pos - 1] - - elif next in NAMESTART: - if offset > pos: - yield False, text[pos:offset] - pos = offset - pos += 1 - while pos < end: - char = text[pos] - if char not in NAMECHARS: - break - pos += 1 - yield True, text[offset + 1:pos].strip() - - elif not escaped and next == PREFIX: - if offset > pos: - yield False, text[pos:offset] - escaped = True - pos = offset + 1 - - else: - yield False, text[pos:offset + 1] - pos = offset + 1 - - if pos < end: - yield False, text[pos:] diff --git a/genshi/template/loader.py b/genshi/template/loader.py deleted file mode 100644 index 0e7cda7..0000000 --- a/genshi/template/loader.py +++ /dev/null @@ -1,344 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2006-2010 Edgewall Software -# All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://genshi.edgewall.org/wiki/License. -# -# This software consists of voluntary contributions made by many -# individuals. For the exact contribution history, see the revision -# history and logs, available at http://genshi.edgewall.org/log/. - -"""Template loading and caching.""" - -import os -try: - import threading -except ImportError: - import dummy_threading as threading - -from genshi.template.base import TemplateError -from genshi.util import LRUCache - -__all__ = ['TemplateLoader', 'TemplateNotFound', 'directory', 'package', - 'prefixed'] -__docformat__ = 'restructuredtext en' - - -class TemplateNotFound(TemplateError): - """Exception raised when a specific template file could not be found.""" - - def __init__(self, name, search_path): - """Create the exception. - - :param name: the filename of the template - :param search_path: the search path used to lookup the template - """ - TemplateError.__init__(self, 'Template "%s" not found' % name) - self.search_path = search_path - - -class TemplateLoader(object): - """Responsible for loading templates from files on the specified search - path. - - >>> import tempfile - >>> fd, path = tempfile.mkstemp(suffix='.html', prefix='template') - >>> os.write(fd, '<p>$var</p>') - 11 - >>> os.close(fd) - - The template loader accepts a list of directory paths that are then used - when searching for template files, in the given order: - - >>> loader = TemplateLoader([os.path.dirname(path)]) - - The `load()` method first checks the template cache whether the requested - template has already been loaded. If not, it attempts to locate the - template file, and returns the corresponding `Template` object: - - >>> from genshi.template import MarkupTemplate - >>> template = loader.load(os.path.basename(path)) - >>> isinstance(template, MarkupTemplate) - True - - Template instances are cached: requesting a template with the same name - results in the same instance being returned: - - >>> loader.load(os.path.basename(path)) is template - True - - The `auto_reload` option can be used to control whether a template should - be automatically reloaded when the file it was loaded from has been - changed. Disable this automatic reloading to improve performance. - - >>> os.remove(path) - """ - def __init__(self, search_path=None, auto_reload=False, - default_encoding=None, max_cache_size=25, default_class=None, - variable_lookup='strict', allow_exec=True, callback=None): - """Create the template laoder. - - :param search_path: a list of absolute path names that should be - searched for template files, or a string containing - a single absolute path; alternatively, any item on - the list may be a ''load function'' that is passed - a filename and returns a file-like object and some - metadata - :param auto_reload: whether to check the last modification time of - template files, and reload them if they have changed - :param default_encoding: the default encoding to assume when loading - templates; defaults to UTF-8 - :param max_cache_size: the maximum number of templates to keep in the - cache - :param default_class: the default `Template` subclass to use when - instantiating templates - :param variable_lookup: the variable lookup mechanism; either "strict" - (the default), "lenient", or a custom lookup - class - :param allow_exec: whether to allow Python code blocks in templates - :param callback: (optional) a callback function that is invoked after a - template was initialized by this loader; the function - is passed the template object as only argument. This - callback can be used for example to add any desired - filters to the template - :see: `LenientLookup`, `StrictLookup` - - :note: Changed in 0.5: Added the `allow_exec` argument - """ - from genshi.template.markup import MarkupTemplate - - self.search_path = search_path - if self.search_path is None: - self.search_path = [] - elif not isinstance(self.search_path, (list, tuple)): - self.search_path = [self.search_path] - - self.auto_reload = auto_reload - """Whether templates should be reloaded when the underlying file is - changed""" - - self.default_encoding = default_encoding - self.default_class = default_class or MarkupTemplate - self.variable_lookup = variable_lookup - self.allow_exec = allow_exec - if callback is not None and not hasattr(callback, '__call__'): - raise TypeError('The "callback" parameter needs to be callable') - self.callback = callback - self._cache = LRUCache(max_cache_size) - self._uptodate = {} - self._lock = threading.RLock() - - def __getstate__(self): - state = self.__dict__.copy() - state['_lock'] = None - return state - - def __setstate__(self, state): - self.__dict__ = state - self._lock = threading.RLock() - - def load(self, filename, relative_to=None, cls=None, encoding=None): - """Load the template with the given name. - - If the `filename` parameter is relative, this method searches the - search path trying to locate a template matching the given name. If the - file name is an absolute path, the search path is ignored. - - If the requested template is not found, a `TemplateNotFound` exception - is raised. Otherwise, a `Template` object is returned that represents - the parsed template. - - Template instances are cached to avoid having to parse the same - template file more than once. Thus, subsequent calls of this method - with the same template file name will return the same `Template` - object (unless the ``auto_reload`` option is enabled and the file was - changed since the last parse.) - - If the `relative_to` parameter is provided, the `filename` is - interpreted as being relative to that path. - - :param filename: the relative path of the template file to load - :param relative_to: the filename of the template from which the new - template is being loaded, or ``None`` if the - template is being loaded directly - :param cls: the class of the template object to instantiate - :param encoding: the encoding of the template to load; defaults to the - ``default_encoding`` of the loader instance - :return: the loaded `Template` instance - :raises TemplateNotFound: if a template with the given name could not - be found - """ - if cls is None: - cls = self.default_class - search_path = self.search_path - - # Make the filename relative to the template file its being loaded - # from, but only if that file is specified as a relative path, or no - # search path has been set up - if relative_to and (not search_path or not os.path.isabs(relative_to)): - filename = os.path.join(os.path.dirname(relative_to), filename) - - filename = os.path.normpath(filename) - cachekey = filename - - self._lock.acquire() - try: - # First check the cache to avoid reparsing the same file - try: - tmpl = self._cache[cachekey] - if not self.auto_reload: - return tmpl - uptodate = self._uptodate[cachekey] - if uptodate is not None and uptodate(): - return tmpl - except (KeyError, OSError): - pass - - isabs = False - - if os.path.isabs(filename): - # Bypass the search path if the requested filename is absolute - search_path = [os.path.dirname(filename)] - isabs = True - - elif relative_to and os.path.isabs(relative_to): - # Make sure that the directory containing the including - # template is on the search path - dirname = os.path.dirname(relative_to) - if dirname not in search_path: - search_path = list(search_path) + [dirname] - isabs = True - - elif not search_path: - # Uh oh, don't know where to look for the template - raise TemplateError('Search path for templates not configured') - - for loadfunc in search_path: - if isinstance(loadfunc, basestring): - loadfunc = directory(loadfunc) - try: - filepath, filename, fileobj, uptodate = loadfunc(filename) - except IOError: - continue - else: - try: - if isabs: - # If the filename of either the included or the - # including template is absolute, make sure the - # included template gets an absolute path, too, - # so that nested includes work properly without a - # search path - filename = filepath - tmpl = self._instantiate(cls, fileobj, filepath, - filename, encoding=encoding) - if self.callback: - self.callback(tmpl) - self._cache[cachekey] = tmpl - self._uptodate[cachekey] = uptodate - finally: - if hasattr(fileobj, 'close'): - fileobj.close() - return tmpl - - raise TemplateNotFound(filename, search_path) - - finally: - self._lock.release() - - def _instantiate(self, cls, fileobj, filepath, filename, encoding=None): - """Instantiate and return the `Template` object based on the given - class and parameters. - - This function is intended for subclasses to override if they need to - implement special template instantiation logic. Code that just uses - the `TemplateLoader` should use the `load` method instead. - - :param cls: the class of the template object to instantiate - :param fileobj: a readable file-like object containing the template - source - :param filepath: the absolute path to the template file - :param filename: the path to the template file relative to the search - path - :param encoding: the encoding of the template to load; defaults to the - ``default_encoding`` of the loader instance - :return: the loaded `Template` instance - :rtype: `Template` - """ - if encoding is None: - encoding = self.default_encoding - return cls(fileobj, filepath=filepath, filename=filename, loader=self, - encoding=encoding, lookup=self.variable_lookup, - allow_exec=self.allow_exec) - - @staticmethod - def directory(path): - """Loader factory for loading templates from a local directory. - - :param path: the path to the local directory containing the templates - :return: the loader function to load templates from the given directory - :rtype: ``function`` - """ - def _load_from_directory(filename): - filepath = os.path.join(path, filename) - fileobj = open(filepath, 'U') - mtime = os.path.getmtime(filepath) - def _uptodate(): - return mtime == os.path.getmtime(filepath) - return filepath, filename, fileobj, _uptodate - return _load_from_directory - - @staticmethod - def package(name, path): - """Loader factory for loading templates from egg package data. - - :param name: the name of the package containing the resources - :param path: the path inside the package data - :return: the loader function to load templates from the given package - :rtype: ``function`` - """ - from pkg_resources import resource_stream - def _load_from_package(filename): - filepath = os.path.join(path, filename) - return filepath, filename, resource_stream(name, filepath), None - return _load_from_package - - @staticmethod - def prefixed(**delegates): - """Factory for a load function that delegates to other loaders - depending on the prefix of the requested template path. - - The prefix is stripped from the filename when passing on the load - request to the delegate. - - >>> load = prefixed( - ... app1 = lambda filename: ('app1', filename, None, None), - ... app2 = lambda filename: ('app2', filename, None, None) - ... ) - >>> print(load('app1/foo.html')) - ('app1', 'app1/foo.html', None, None) - >>> print(load('app2/bar.html')) - ('app2', 'app2/bar.html', None, None) - - :param delegates: mapping of path prefixes to loader functions - :return: the loader function - :rtype: ``function`` - """ - def _dispatch_by_prefix(filename): - for prefix, delegate in delegates.items(): - if filename.startswith(prefix): - if isinstance(delegate, basestring): - delegate = directory(delegate) - filepath, _, fileobj, uptodate = delegate( - filename[len(prefix):].lstrip('/\\') - ) - return filepath, filename, fileobj, uptodate - raise TemplateNotFound(filename, list(delegates.keys())) - return _dispatch_by_prefix - - -directory = TemplateLoader.directory -package = TemplateLoader.package -prefixed = TemplateLoader.prefixed diff --git a/genshi/template/markup.py b/genshi/template/markup.py deleted file mode 100644 index 0e31632..0000000 --- a/genshi/template/markup.py +++ /dev/null @@ -1,397 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2006-2010 Edgewall Software -# All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://genshi.edgewall.org/wiki/License. -# -# This software consists of voluntary contributions made by many -# individuals. For the exact contribution history, see the revision -# history and logs, available at http://genshi.edgewall.org/log/. - -"""Markup templating engine.""" - -from itertools import chain - -from genshi.core import Attrs, Markup, Namespace, Stream, StreamEventKind -from genshi.core import START, END, START_NS, END_NS, TEXT, PI, COMMENT -from genshi.input import XMLParser -from genshi.template.base import BadDirectiveError, Template, \ - TemplateSyntaxError, _apply_directives, \ - EXEC, INCLUDE, SUB -from genshi.template.eval import Suite -from genshi.template.interpolation import interpolate -from genshi.template.directives import * -from genshi.template.text import NewTextTemplate - -__all__ = ['MarkupTemplate'] -__docformat__ = 'restructuredtext en' - - -class MarkupTemplate(Template): - """Implementation of the template language for XML-based templates. - - >>> tmpl = MarkupTemplate('''<ul xmlns:py="http://genshi.edgewall.org/"> - ... <li py:for="item in items">${item}</li> - ... </ul>''') - >>> print(tmpl.generate(items=[1, 2, 3])) - <ul> - <li>1</li><li>2</li><li>3</li> - </ul> - """ - - DIRECTIVE_NAMESPACE = 'http://genshi.edgewall.org/' - XINCLUDE_NAMESPACE = 'http://www.w3.org/2001/XInclude' - - directives = [('def', DefDirective), - ('match', MatchDirective), - ('when', WhenDirective), - ('otherwise', OtherwiseDirective), - ('for', ForDirective), - ('if', IfDirective), - ('choose', ChooseDirective), - ('with', WithDirective), - ('replace', ReplaceDirective), - ('content', ContentDirective), - ('attrs', AttrsDirective), - ('strip', StripDirective)] - serializer = 'xml' - _number_conv = Markup - - def __init__(self, source, filepath=None, filename=None, loader=None, - encoding=None, lookup='strict', allow_exec=True): - Template.__init__(self, source, filepath=filepath, filename=filename, - loader=loader, encoding=encoding, lookup=lookup, - allow_exec=allow_exec) - self.add_directives(self.DIRECTIVE_NAMESPACE, self) - - def _init_filters(self): - Template._init_filters(self) - # Make sure the include filter comes after the match filter - self.filters.remove(self._include) - self.filters += [self._match, self._include] - - def _parse(self, source, encoding): - if not isinstance(source, Stream): - source = XMLParser(source, filename=self.filename, - encoding=encoding) - stream = [] - - for kind, data, pos in source: - - if kind is TEXT: - for kind, data, pos in interpolate(data, self.filepath, pos[1], - pos[2], lookup=self.lookup): - stream.append((kind, data, pos)) - - elif kind is PI and data[0] == 'python': - if not self.allow_exec: - raise TemplateSyntaxError('Python code blocks not allowed', - self.filepath, *pos[1:]) - try: - suite = Suite(data[1], self.filepath, pos[1], - lookup=self.lookup) - except SyntaxError, err: - raise TemplateSyntaxError(err, self.filepath, - pos[1] + (err.lineno or 1) - 1, - pos[2] + (err.offset or 0)) - stream.append((EXEC, suite, pos)) - - elif kind is COMMENT: - if not data.lstrip().startswith('!'): - stream.append((kind, data, pos)) - - else: - stream.append((kind, data, pos)) - - return stream - - def _extract_directives(self, stream, namespace, factory): - depth = 0 - dirmap = {} # temporary mapping of directives to elements - new_stream = [] - ns_prefix = {} # namespace prefixes in use - - for kind, data, pos in stream: - - if kind is START: - tag, attrs = data - directives = [] - strip = False - - if tag.namespace == namespace: - cls = factory.get_directive(tag.localname) - if cls is None: - raise BadDirectiveError(tag.localname, - self.filepath, pos[1]) - args = dict([(name.localname, value) for name, value - in attrs if not name.namespace]) - directives.append((factory.get_directive_index(cls), cls, - args, ns_prefix.copy(), pos)) - strip = True - - new_attrs = [] - for name, value in attrs: - if name.namespace == namespace: - cls = factory.get_directive(name.localname) - if cls is None: - raise BadDirectiveError(name.localname, - self.filepath, pos[1]) - if type(value) is list and len(value) == 1: - value = value[0][1] - directives.append((factory.get_directive_index(cls), - cls, value, ns_prefix.copy(), pos)) - else: - new_attrs.append((name, value)) - new_attrs = Attrs(new_attrs) - - if directives: - directives.sort() - dirmap[(depth, tag)] = (directives, len(new_stream), - strip) - - new_stream.append((kind, (tag, new_attrs), pos)) - depth += 1 - - elif kind is END: - depth -= 1 - new_stream.append((kind, data, pos)) - - # If there have have directive attributes with the - # corresponding start tag, move the events inbetween into - # a "subprogram" - if (depth, data) in dirmap: - directives, offset, strip = dirmap.pop((depth, data)) - substream = new_stream[offset:] - if strip: - substream = substream[1:-1] - new_stream[offset:] = [ - (SUB, (directives, substream), pos) - ] - - elif kind is SUB: - directives, substream = data - substream = self._extract_directives(substream, namespace, - factory) - - if len(substream) == 1 and substream[0][0] is SUB: - added_directives, substream = substream[0][1] - directives += added_directives - - new_stream.append((kind, (directives, substream), pos)) - - elif kind is START_NS: - # Strip out the namespace declaration for template - # directives - prefix, uri = data - ns_prefix[prefix] = uri - if uri != namespace: - new_stream.append((kind, data, pos)) - - elif kind is END_NS: - uri = ns_prefix.pop(data, None) - if uri and uri != namespace: - new_stream.append((kind, data, pos)) - - else: - new_stream.append((kind, data, pos)) - - return new_stream - - def _extract_includes(self, stream): - streams = [[]] # stacked lists of events of the "compiled" template - prefixes = {} - fallbacks = [] - includes = [] - xinclude_ns = Namespace(self.XINCLUDE_NAMESPACE) - - for kind, data, pos in stream: - stream = streams[-1] - - if kind is START: - # Record any directive attributes in start tags - tag, attrs = data - if tag in xinclude_ns: - if tag.localname == 'include': - include_href = attrs.get('href') - if not include_href: - raise TemplateSyntaxError('Include misses required ' - 'attribute "href"', - self.filepath, *pos[1:]) - includes.append((include_href, attrs.get('parse'))) - streams.append([]) - elif tag.localname == 'fallback': - streams.append([]) - fallbacks.append(streams[-1]) - else: - stream.append((kind, (tag, attrs), pos)) - - elif kind is END: - if fallbacks and data == xinclude_ns['fallback']: - assert streams.pop() is fallbacks[-1] - elif data == xinclude_ns['include']: - fallback = None - if len(fallbacks) == len(includes): - fallback = fallbacks.pop() - streams.pop() # discard anything between the include tags - # and the fallback element - stream = streams[-1] - href, parse = includes.pop() - try: - cls = { - 'xml': MarkupTemplate, - 'text': NewTextTemplate - }.get(parse) or self.__class__ - except KeyError: - raise TemplateSyntaxError('Invalid value for "parse" ' - 'attribute of include', - self.filepath, *pos[1:]) - stream.append((INCLUDE, (href, cls, fallback), pos)) - else: - stream.append((kind, data, pos)) - - elif kind is START_NS and data[1] == xinclude_ns: - # Strip out the XInclude namespace - prefixes[data[0]] = data[1] - - elif kind is END_NS and data in prefixes: - prefixes.pop(data) - - else: - stream.append((kind, data, pos)) - - assert len(streams) == 1 - return streams[0] - - def _interpolate_attrs(self, stream): - for kind, data, pos in stream: - - if kind is START: - # Record any directive attributes in start tags - tag, attrs = data - new_attrs = [] - for name, value in attrs: - if value: - value = list(interpolate(value, self.filepath, pos[1], - pos[2], lookup=self.lookup)) - if len(value) == 1 and value[0][0] is TEXT: - value = value[0][1] - new_attrs.append((name, value)) - data = tag, Attrs(new_attrs) - - yield kind, data, pos - - def _prepare(self, stream): - return Template._prepare(self, - self._extract_includes(self._interpolate_attrs(stream)) - ) - - def add_directives(self, namespace, factory): - """Register a custom `DirectiveFactory` for a given namespace. - - :param namespace: the namespace URI - :type namespace: `basestring` - :param factory: the directive factory to register - :type factory: `DirectiveFactory` - :since: version 0.6 - """ - assert not self._prepared, 'Too late for adding directives, ' \ - 'template already prepared' - self._stream = self._extract_directives(self._stream, namespace, - factory) - - def _match(self, stream, ctxt, start=0, end=None, **vars): - """Internal stream filter that applies any defined match templates - to the stream. - """ - match_templates = ctxt._match_templates - - tail = [] - def _strip(stream, append=tail.append): - depth = 1 - next = stream.next - while 1: - event = next() - if event[0] is START: - depth += 1 - elif event[0] is END: - depth -= 1 - if depth > 0: - yield event - else: - append(event) - break - - for event in stream: - - # We (currently) only care about start and end events for matching - # We might care about namespace events in the future, though - if not match_templates or (event[0] is not START and - event[0] is not END): - yield event - continue - - for idx, (test, path, template, hints, namespaces, directives) \ - in enumerate(match_templates): - if idx < start or end is not None and idx >= end: - continue - - if test(event, namespaces, ctxt) is True: - if 'match_once' in hints: - del match_templates[idx] - idx -= 1 - - # Let the remaining match templates know about the event so - # they get a chance to update their internal state - for test in [mt[0] for mt in match_templates[idx + 1:]]: - test(event, namespaces, ctxt, updateonly=True) - - # Consume and store all events until an end event - # corresponding to this start event is encountered - pre_end = idx + 1 - if 'match_once' not in hints and 'not_recursive' in hints: - pre_end -= 1 - inner = _strip(stream) - if pre_end > 0: - inner = self._match(inner, ctxt, start=start, - end=pre_end, **vars) - content = self._include(chain([event], inner, tail), ctxt) - if 'not_buffered' not in hints: - content = list(content) - content = Stream(content) - - # Make the select() function available in the body of the - # match template - selected = [False] - def select(path): - selected[0] = True - return content.select(path, namespaces, ctxt) - vars = dict(select=select) - - # Recursively process the output - template = _apply_directives(template, directives, ctxt, - vars) - for event in self._match(self._flatten(template, ctxt, - **vars), - ctxt, start=idx + 1, **vars): - yield event - - # If the match template did not actually call select to - # consume the matched stream, the original events need to - # be consumed here or they'll get appended to the output - if not selected[0]: - for event in content: - pass - - # Let the remaining match templates know about the last - # event in the matched content, so they can update their - # internal state accordingly - for test in [mt[0] for mt in match_templates[idx + 1:]]: - test(tail[0], namespaces, ctxt, updateonly=True) - - break - - else: # no matches - yield event diff --git a/genshi/template/plugin.py b/genshi/template/plugin.py deleted file mode 100644 index 70d56af..0000000 --- a/genshi/template/plugin.py +++ /dev/null @@ -1,176 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2006-2009 Edgewall Software -# Copyright (C) 2006 Matthew Good -# All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://genshi.edgewall.org/wiki/License. -# -# This software consists of voluntary contributions made by many -# individuals. For the exact contribution history, see the revision -# history and logs, available at http://genshi.edgewall.org/log/. - -"""Basic support for the template engine plugin API used by TurboGears and -CherryPy/Buffet. -""" - -from genshi.input import ET, HTML, XML -from genshi.output import DocType -from genshi.template.base import Template -from genshi.template.loader import TemplateLoader -from genshi.template.markup import MarkupTemplate -from genshi.template.text import TextTemplate, NewTextTemplate - -__all__ = ['ConfigurationError', 'AbstractTemplateEnginePlugin', - 'MarkupTemplateEnginePlugin', 'TextTemplateEnginePlugin'] -__docformat__ = 'restructuredtext en' - - -class ConfigurationError(ValueError): - """Exception raised when invalid plugin options are encountered.""" - - -class AbstractTemplateEnginePlugin(object): - """Implementation of the plugin API.""" - - template_class = None - extension = None - - def __init__(self, extra_vars_func=None, options=None): - self.get_extra_vars = extra_vars_func - if options is None: - options = {} - self.options = options - - self.default_encoding = options.get('genshi.default_encoding', 'utf-8') - auto_reload = options.get('genshi.auto_reload', '1') - if isinstance(auto_reload, basestring): - auto_reload = auto_reload.lower() in ('1', 'on', 'yes', 'true') - search_path = [p for p in - options.get('genshi.search_path', '').split(':') if p] - self.use_package_naming = not search_path - try: - max_cache_size = int(options.get('genshi.max_cache_size', 25)) - except ValueError: - raise ConfigurationError('Invalid value for max_cache_size: "%s"' % - options.get('genshi.max_cache_size')) - - loader_callback = options.get('genshi.loader_callback', None) - if loader_callback and not hasattr(loader_callback, '__call__'): - raise ConfigurationError('loader callback must be a function') - - lookup_errors = options.get('genshi.lookup_errors', 'strict') - if lookup_errors not in ('lenient', 'strict'): - raise ConfigurationError('Unknown lookup errors mode "%s"' % - lookup_errors) - - try: - allow_exec = bool(options.get('genshi.allow_exec', True)) - except ValueError: - raise ConfigurationError('Invalid value for allow_exec "%s"' % - options.get('genshi.allow_exec')) - - self.loader = TemplateLoader([p for p in search_path if p], - auto_reload=auto_reload, - max_cache_size=max_cache_size, - default_class=self.template_class, - variable_lookup=lookup_errors, - allow_exec=allow_exec, - callback=loader_callback) - - def load_template(self, templatename, template_string=None): - """Find a template specified in python 'dot' notation, or load one from - a string. - """ - if template_string is not None: - return self.template_class(template_string) - - if self.use_package_naming: - divider = templatename.rfind('.') - if divider >= 0: - from pkg_resources import resource_filename - package = templatename[:divider] - basename = templatename[divider + 1:] + self.extension - templatename = resource_filename(package, basename) - - return self.loader.load(templatename) - - def _get_render_options(self, format=None, fragment=False): - if format is None: - format = self.default_format - kwargs = {'method': format} - if self.default_encoding: - kwargs['encoding'] = self.default_encoding - return kwargs - - def render(self, info, format=None, fragment=False, template=None): - """Render the template to a string using the provided info.""" - kwargs = self._get_render_options(format=format, fragment=fragment) - return self.transform(info, template).render(**kwargs) - - def transform(self, info, template): - """Render the output to an event stream.""" - if not isinstance(template, Template): - template = self.load_template(template) - return template.generate(**info) - - -class MarkupTemplateEnginePlugin(AbstractTemplateEnginePlugin): - """Implementation of the plugin API for markup templates.""" - - template_class = MarkupTemplate - extension = '.html' - - def __init__(self, extra_vars_func=None, options=None): - AbstractTemplateEnginePlugin.__init__(self, extra_vars_func, options) - - default_doctype = self.options.get('genshi.default_doctype') - if default_doctype: - doctype = DocType.get(default_doctype) - if doctype is None: - raise ConfigurationError('Unknown doctype %r' % default_doctype) - self.default_doctype = doctype - else: - self.default_doctype = None - - format = self.options.get('genshi.default_format', 'html').lower() - if format not in ('html', 'xhtml', 'xml', 'text'): - raise ConfigurationError('Unknown output format %r' % format) - self.default_format = format - - def _get_render_options(self, format=None, fragment=False): - kwargs = super(MarkupTemplateEnginePlugin, - self)._get_render_options(format, fragment) - if self.default_doctype and not fragment: - kwargs['doctype'] = self.default_doctype - return kwargs - - def transform(self, info, template): - """Render the output to an event stream.""" - data = {'ET': ET, 'HTML': HTML, 'XML': XML} - if self.get_extra_vars: - data.update(self.get_extra_vars()) - data.update(info) - return super(MarkupTemplateEnginePlugin, self).transform(data, template) - - -class TextTemplateEnginePlugin(AbstractTemplateEnginePlugin): - """Implementation of the plugin API for text templates.""" - - template_class = TextTemplate - extension = '.txt' - default_format = 'text' - - def __init__(self, extra_vars_func=None, options=None): - if options is None: - options = {} - - new_syntax = options.get('genshi.new_text_syntax') - if isinstance(new_syntax, basestring): - new_syntax = new_syntax.lower() in ('1', 'on', 'yes', 'true') - if new_syntax: - self.template_class = NewTextTemplate - - AbstractTemplateEnginePlugin.__init__(self, extra_vars_func, options) diff --git a/genshi/template/text.py b/genshi/template/text.py deleted file mode 100644 index 746226c..0000000 --- a/genshi/template/text.py +++ /dev/null @@ -1,333 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2006-2009 Edgewall Software -# All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://genshi.edgewall.org/wiki/License. -# -# This software consists of voluntary contributions made by many -# individuals. For the exact contribution history, see the revision -# history and logs, available at http://genshi.edgewall.org/log/. - -"""Plain text templating engine. - -This module implements two template language syntaxes, at least for a certain -transitional period. `OldTextTemplate` (aliased to just `TextTemplate`) defines -a syntax that was inspired by Cheetah/Velocity. `NewTextTemplate` on the other -hand is inspired by the syntax of the Django template language, which has more -explicit delimiting of directives, and is more flexible with regards to -white space and line breaks. - -In a future release, `OldTextTemplate` will be phased out in favor of -`NewTextTemplate`, as the names imply. Therefore the new syntax is strongly -recommended for new projects, and existing projects may want to migrate to the -new syntax to remain compatible with future Genshi releases. -""" - -import re - -from genshi.core import TEXT -from genshi.template.base import BadDirectiveError, Template, \ - TemplateSyntaxError, EXEC, INCLUDE, SUB -from genshi.template.eval import Suite -from genshi.template.directives import * -from genshi.template.directives import Directive -from genshi.template.interpolation import interpolate - -__all__ = ['NewTextTemplate', 'OldTextTemplate', 'TextTemplate'] -__docformat__ = 'restructuredtext en' - - -class NewTextTemplate(Template): - r"""Implementation of a simple text-based template engine. This class will - replace `OldTextTemplate` in a future release. - - It uses a more explicit delimiting style for directives: instead of the old - style which required putting directives on separate lines that were prefixed - with a ``#`` sign, directives and commenbtsr are enclosed in delimiter pairs - (by default ``{% ... %}`` and ``{# ... #}``, respectively). - - Variable substitution uses the same interpolation syntax as for markup - languages: simple references are prefixed with a dollar sign, more complex - expression enclosed in curly braces. - - >>> tmpl = NewTextTemplate('''Dear $name, - ... - ... {# This is a comment #} - ... We have the following items for you: - ... {% for item in items %} - ... * ${'Item %d' % item} - ... {% end %} - ... ''') - >>> print(tmpl.generate(name='Joe', items=[1, 2, 3]).render(encoding=None)) - Dear Joe, - <BLANKLINE> - <BLANKLINE> - We have the following items for you: - <BLANKLINE> - * Item 1 - <BLANKLINE> - * Item 2 - <BLANKLINE> - * Item 3 - <BLANKLINE> - <BLANKLINE> - - By default, no spaces or line breaks are removed. If a line break should - not be included in the output, prefix it with a backslash: - - >>> tmpl = NewTextTemplate('''Dear $name, - ... - ... {# This is a comment #}\ - ... We have the following items for you: - ... {% for item in items %}\ - ... * $item - ... {% end %}\ - ... ''') - >>> print(tmpl.generate(name='Joe', items=[1, 2, 3]).render(encoding=None)) - Dear Joe, - <BLANKLINE> - We have the following items for you: - * 1 - * 2 - * 3 - <BLANKLINE> - - Backslashes are also used to escape the start delimiter of directives and - comments: - - >>> tmpl = NewTextTemplate('''Dear $name, - ... - ... \{# This is a comment #} - ... We have the following items for you: - ... {% for item in items %}\ - ... * $item - ... {% end %}\ - ... ''') - >>> print(tmpl.generate(name='Joe', items=[1, 2, 3]).render(encoding=None)) - Dear Joe, - <BLANKLINE> - {# This is a comment #} - We have the following items for you: - * 1 - * 2 - * 3 - <BLANKLINE> - - :since: version 0.5 - """ - directives = [('def', DefDirective), - ('when', WhenDirective), - ('otherwise', OtherwiseDirective), - ('for', ForDirective), - ('if', IfDirective), - ('choose', ChooseDirective), - ('with', WithDirective)] - serializer = 'text' - - _DIRECTIVE_RE = r'((?<!\\)%s\s*(\w+)\s*(.*?)\s*%s|(?<!\\)%s.*?%s)' - _ESCAPE_RE = r'\\\n|\\(\\)|\\(%s)|\\(%s)' - - def __init__(self, source, filepath=None, filename=None, loader=None, - encoding=None, lookup='strict', allow_exec=False, - delims=('{%', '%}', '{#', '#}')): - self.delimiters = delims - Template.__init__(self, source, filepath=filepath, filename=filename, - loader=loader, encoding=encoding, lookup=lookup) - - def _get_delims(self): - return self._delims - def _set_delims(self, delims): - if len(delims) != 4: - raise ValueError('delimiers tuple must have exactly four elements') - self._delims = delims - self._directive_re = re.compile(self._DIRECTIVE_RE % tuple( - [re.escape(d) for d in delims] - ), re.DOTALL) - self._escape_re = re.compile(self._ESCAPE_RE % tuple( - [re.escape(d) for d in delims[::2]] - )) - delimiters = property(_get_delims, _set_delims, """\ - The delimiters for directives and comments. This should be a four item tuple - of the form ``(directive_start, directive_end, comment_start, - comment_end)``, where each item is a string. - """) - - def _parse(self, source, encoding): - """Parse the template from text input.""" - stream = [] # list of events of the "compiled" template - dirmap = {} # temporary mapping of directives to elements - depth = 0 - - source = source.read() - if isinstance(source, str): - source = source.decode(encoding or 'utf-8', 'replace') - offset = 0 - lineno = 1 - - _escape_sub = self._escape_re.sub - def _escape_repl(mo): - groups = [g for g in mo.groups() if g] - if not groups: - return '' - return groups[0] - - for idx, mo in enumerate(self._directive_re.finditer(source)): - start, end = mo.span(1) - if start > offset: - text = _escape_sub(_escape_repl, source[offset:start]) - for kind, data, pos in interpolate(text, self.filepath, lineno, - lookup=self.lookup): - stream.append((kind, data, pos)) - lineno += len(text.splitlines()) - - lineno += len(source[start:end].splitlines()) - command, value = mo.group(2, 3) - - if command == 'include': - pos = (self.filename, lineno, 0) - value = list(interpolate(value, self.filepath, lineno, 0, - lookup=self.lookup)) - if len(value) == 1 and value[0][0] is TEXT: - value = value[0][1] - stream.append((INCLUDE, (value, None, []), pos)) - - elif command == 'python': - if not self.allow_exec: - raise TemplateSyntaxError('Python code blocks not allowed', - self.filepath, lineno) - try: - suite = Suite(value, self.filepath, lineno, - lookup=self.lookup) - except SyntaxError, err: - raise TemplateSyntaxError(err, self.filepath, - lineno + (err.lineno or 1) - 1) - pos = (self.filename, lineno, 0) - stream.append((EXEC, suite, pos)) - - elif command == 'end': - depth -= 1 - if depth in dirmap: - directive, start_offset = dirmap.pop(depth) - substream = stream[start_offset:] - stream[start_offset:] = [(SUB, ([directive], substream), - (self.filepath, lineno, 0))] - - elif command: - cls = self.get_directive(command) - if cls is None: - raise BadDirectiveError(command) - directive = 0, cls, value, None, (self.filepath, lineno, 0) - dirmap[depth] = (directive, len(stream)) - depth += 1 - - offset = end - - if offset < len(source): - text = _escape_sub(_escape_repl, source[offset:]) - for kind, data, pos in interpolate(text, self.filepath, lineno, - lookup=self.lookup): - stream.append((kind, data, pos)) - - return stream - - -class OldTextTemplate(Template): - """Legacy implementation of the old syntax text-based templates. This class - is provided in a transition phase for backwards compatibility. New code - should use the `NewTextTemplate` class and the improved syntax it provides. - - >>> tmpl = OldTextTemplate('''Dear $name, - ... - ... We have the following items for you: - ... #for item in items - ... * $item - ... #end - ... - ... All the best, - ... Foobar''') - >>> print(tmpl.generate(name='Joe', items=[1, 2, 3]).render(encoding=None)) - Dear Joe, - <BLANKLINE> - We have the following items for you: - * 1 - * 2 - * 3 - <BLANKLINE> - All the best, - Foobar - """ - directives = [('def', DefDirective), - ('when', WhenDirective), - ('otherwise', OtherwiseDirective), - ('for', ForDirective), - ('if', IfDirective), - ('choose', ChooseDirective), - ('with', WithDirective)] - serializer = 'text' - - _DIRECTIVE_RE = re.compile(r'(?:^[ \t]*(?<!\\)#(end).*\n?)|' - r'(?:^[ \t]*(?<!\\)#((?:\w+|#).*)\n?)', - re.MULTILINE) - - def _parse(self, source, encoding): - """Parse the template from text input.""" - stream = [] # list of events of the "compiled" template - dirmap = {} # temporary mapping of directives to elements - depth = 0 - - source = source.read() - if isinstance(source, str): - source = source.decode(encoding or 'utf-8', 'replace') - offset = 0 - lineno = 1 - - for idx, mo in enumerate(self._DIRECTIVE_RE.finditer(source)): - start, end = mo.span() - if start > offset: - text = source[offset:start] - for kind, data, pos in interpolate(text, self.filepath, lineno, - lookup=self.lookup): - stream.append((kind, data, pos)) - lineno += len(text.splitlines()) - - text = source[start:end].lstrip()[1:] - lineno += len(text.splitlines()) - directive = text.split(None, 1) - if len(directive) > 1: - command, value = directive - else: - command, value = directive[0], None - - if command == 'end': - depth -= 1 - if depth in dirmap: - directive, start_offset = dirmap.pop(depth) - substream = stream[start_offset:] - stream[start_offset:] = [(SUB, ([directive], substream), - (self.filepath, lineno, 0))] - elif command == 'include': - pos = (self.filename, lineno, 0) - stream.append((INCLUDE, (value.strip(), None, []), pos)) - elif command != '#': - cls = self.get_directive(command) - if cls is None: - raise BadDirectiveError(command) - directive = 0, cls, value, None, (self.filepath, lineno, 0) - dirmap[depth] = (directive, len(stream)) - depth += 1 - - offset = end - - if offset < len(source): - text = source[offset:].replace('\\#', '#') - for kind, data, pos in interpolate(text, self.filepath, lineno, - lookup=self.lookup): - stream.append((kind, data, pos)) - - return stream - - -TextTemplate = OldTextTemplate diff --git a/genshi/util.py b/genshi/util.py deleted file mode 100644 index b964a01..0000000 --- a/genshi/util.py +++ /dev/null @@ -1,274 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2006-2009 Edgewall Software -# All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://genshi.edgewall.org/wiki/License. -# -# This software consists of voluntary contributions made by many -# individuals. For the exact contribution history, see the revision -# history and logs, available at http://genshi.edgewall.org/log/. - -"""Various utility classes and functions.""" - -import htmlentitydefs as entities -import re - -__docformat__ = 'restructuredtext en' - - -class LRUCache(dict): - """A dictionary-like object that stores only a certain number of items, and - discards its least recently used item when full. - - >>> cache = LRUCache(3) - >>> cache['A'] = 0 - >>> cache['B'] = 1 - >>> cache['C'] = 2 - >>> len(cache) - 3 - - >>> cache['A'] - 0 - - Adding new items to the cache does not increase its size. Instead, the least - recently used item is dropped: - - >>> cache['D'] = 3 - >>> len(cache) - 3 - >>> 'B' in cache - False - - Iterating over the cache returns the keys, starting with the most recently - used: - - >>> for key in cache: - ... print(key) - D - A - C - - This code is based on the LRUCache class from ``myghtyutils.util``, written - by Mike Bayer and released under the MIT license. See: - - http://svn.myghty.org/myghtyutils/trunk/lib/myghtyutils/util.py - """ - - class _Item(object): - def __init__(self, key, value): - self.prv = self.nxt = None - self.key = key - self.value = value - def __repr__(self): - return repr(self.value) - - def __init__(self, capacity): - self._dict = dict() - self.capacity = capacity - self.head = None - self.tail = None - - def __contains__(self, key): - return key in self._dict - - def __iter__(self): - cur = self.head - while cur: - yield cur.key - cur = cur.nxt - - def __len__(self): - return len(self._dict) - - def __getitem__(self, key): - item = self._dict[key] - self._update_item(item) - return item.value - - def __setitem__(self, key, value): - item = self._dict.get(key) - if item is None: - item = self._Item(key, value) - self._dict[key] = item - self._insert_item(item) - else: - item.value = value - self._update_item(item) - self._manage_size() - - def __repr__(self): - return repr(self._dict) - - def _insert_item(self, item): - item.prv = None - item.nxt = self.head - if self.head is not None: - self.head.prv = item - else: - self.tail = item - self.head = item - self._manage_size() - - def _manage_size(self): - while len(self._dict) > self.capacity: - olditem = self._dict[self.tail.key] - del self._dict[self.tail.key] - if self.tail != self.head: - self.tail = self.tail.prv - self.tail.nxt = None - else: - self.head = self.tail = None - - def _update_item(self, item): - if self.head == item: - return - - prv = item.prv - prv.nxt = item.nxt - if item.nxt is not None: - item.nxt.prv = prv - else: - self.tail = prv - - item.prv = None - item.nxt = self.head - self.head.prv = self.head = item - - -def flatten(items): - """Flattens a potentially nested sequence into a flat list. - - :param items: the sequence to flatten - - >>> flatten((1, 2)) - [1, 2] - >>> flatten([1, (2, 3), 4]) - [1, 2, 3, 4] - >>> flatten([1, (2, [3, 4]), 5]) - [1, 2, 3, 4, 5] - """ - retval = [] - for item in items: - if isinstance(item, (frozenset, list, set, tuple)): - retval += flatten(item) - else: - retval.append(item) - return retval - - -def plaintext(text, keeplinebreaks=True): - """Return the text with all entities and tags removed. - - >>> plaintext('<b>1 < 2</b>') - u'1 < 2' - - The `keeplinebreaks` parameter can be set to ``False`` to replace any line - breaks by simple spaces: - - >>> plaintext('''<b>1 - ... < - ... 2</b>''', keeplinebreaks=False) - u'1 < 2' - - :param text: the text to convert to plain text - :param keeplinebreaks: whether line breaks in the text should be kept intact - :return: the text with tags and entities removed - """ - text = stripentities(striptags(text)) - if not keeplinebreaks: - text = text.replace('\n', ' ') - return text - - -_STRIPENTITIES_RE = re.compile(r'&(?:#((?:\d+)|(?:[xX][0-9a-fA-F]+));?|(\w+);)') -def stripentities(text, keepxmlentities=False): - """Return a copy of the given text with any character or numeric entities - replaced by the equivalent UTF-8 characters. - - >>> stripentities('1 < 2') - u'1 < 2' - >>> stripentities('more …') - u'more \u2026' - >>> stripentities('…') - u'\u2026' - >>> stripentities('…') - u'\u2026' - - If the `keepxmlentities` parameter is provided and is a truth value, the - core XML entities (&, ', >, < and ") are left intact. - - >>> stripentities('1 < 2 …', keepxmlentities=True) - u'1 < 2 \u2026' - """ - def _replace_entity(match): - if match.group(1): # numeric entity - ref = match.group(1) - if ref.startswith('x'): - ref = int(ref[1:], 16) - else: - ref = int(ref, 10) - return unichr(ref) - else: # character entity - ref = match.group(2) - if keepxmlentities and ref in ('amp', 'apos', 'gt', 'lt', 'quot'): - return '&%s;' % ref - try: - return unichr(entities.name2codepoint[ref]) - except KeyError: - if keepxmlentities: - return '&%s;' % ref - else: - return ref - return _STRIPENTITIES_RE.sub(_replace_entity, text) - - -_STRIPTAGS_RE = re.compile(r'(<!--.*?-->|<[^>]*>)') -def striptags(text): - """Return a copy of the text with any XML/HTML tags removed. - - >>> striptags('<span>Foo</span> bar') - 'Foo bar' - >>> striptags('<span class="bar">Foo</span>') - 'Foo' - >>> striptags('Foo<br />') - 'Foo' - - HTML/XML comments are stripped, too: - - >>> striptags('<!-- <blub>hehe</blah> -->test') - 'test' - - :param text: the string to remove tags from - :return: the text with tags removed - """ - return _STRIPTAGS_RE.sub('', text) - - -def stringrepr(string): - ascii = string.encode('ascii', 'backslashreplace') - quoted = "'" + ascii.replace("'", "\\'") + "'" - if len(ascii) > len(string): - return 'u' + quoted - return quoted - - -# Compatibility fallback implementations for older Python versions - -try: - all = all - any = any -except NameError: - def any(S): - for x in S: - if x: - return True - return False - - def all(S): - for x in S: - if not x: - return False - return True |