From 570a268e7562303690ef6b599ea244945a3100ce Mon Sep 17 00:00:00 2001 From: Sebastian Silva Date: Sat, 09 Jul 2011 00:17:44 +0000 Subject: Still importing WebSDK. Need to read up on GIT. --- (limited to 'genshi/util.py') diff --git a/genshi/util.py b/genshi/util.py new file mode 100644 index 0000000..b964a01 --- /dev/null +++ b/genshi/util.py @@ -0,0 +1,274 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2006-2009 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://genshi.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://genshi.edgewall.org/log/. + +"""Various utility classes and functions.""" + +import htmlentitydefs as entities +import re + +__docformat__ = 'restructuredtext en' + + +class LRUCache(dict): + """A dictionary-like object that stores only a certain number of items, and + discards its least recently used item when full. + + >>> cache = LRUCache(3) + >>> cache['A'] = 0 + >>> cache['B'] = 1 + >>> cache['C'] = 2 + >>> len(cache) + 3 + + >>> cache['A'] + 0 + + Adding new items to the cache does not increase its size. Instead, the least + recently used item is dropped: + + >>> cache['D'] = 3 + >>> len(cache) + 3 + >>> 'B' in cache + False + + Iterating over the cache returns the keys, starting with the most recently + used: + + >>> for key in cache: + ... print(key) + D + A + C + + This code is based on the LRUCache class from ``myghtyutils.util``, written + by Mike Bayer and released under the MIT license. See: + + http://svn.myghty.org/myghtyutils/trunk/lib/myghtyutils/util.py + """ + + class _Item(object): + def __init__(self, key, value): + self.prv = self.nxt = None + self.key = key + self.value = value + def __repr__(self): + return repr(self.value) + + def __init__(self, capacity): + self._dict = dict() + self.capacity = capacity + self.head = None + self.tail = None + + def __contains__(self, key): + return key in self._dict + + def __iter__(self): + cur = self.head + while cur: + yield cur.key + cur = cur.nxt + + def __len__(self): + return len(self._dict) + + def __getitem__(self, key): + item = self._dict[key] + self._update_item(item) + return item.value + + def __setitem__(self, key, value): + item = self._dict.get(key) + if item is None: + item = self._Item(key, value) + self._dict[key] = item + self._insert_item(item) + else: + item.value = value + self._update_item(item) + self._manage_size() + + def __repr__(self): + return repr(self._dict) + + def _insert_item(self, item): + item.prv = None + item.nxt = self.head + if self.head is not None: + self.head.prv = item + else: + self.tail = item + self.head = item + self._manage_size() + + def _manage_size(self): + while len(self._dict) > self.capacity: + olditem = self._dict[self.tail.key] + del self._dict[self.tail.key] + if self.tail != self.head: + self.tail = self.tail.prv + self.tail.nxt = None + else: + self.head = self.tail = None + + def _update_item(self, item): + if self.head == item: + return + + prv = item.prv + prv.nxt = item.nxt + if item.nxt is not None: + item.nxt.prv = prv + else: + self.tail = prv + + item.prv = None + item.nxt = self.head + self.head.prv = self.head = item + + +def flatten(items): + """Flattens a potentially nested sequence into a flat list. + + :param items: the sequence to flatten + + >>> flatten((1, 2)) + [1, 2] + >>> flatten([1, (2, 3), 4]) + [1, 2, 3, 4] + >>> flatten([1, (2, [3, 4]), 5]) + [1, 2, 3, 4, 5] + """ + retval = [] + for item in items: + if isinstance(item, (frozenset, list, set, tuple)): + retval += flatten(item) + else: + retval.append(item) + return retval + + +def plaintext(text, keeplinebreaks=True): + """Return the text with all entities and tags removed. + + >>> plaintext('1 < 2') + u'1 < 2' + + The `keeplinebreaks` parameter can be set to ``False`` to replace any line + breaks by simple spaces: + + >>> plaintext('''1 + ... < + ... 2''', keeplinebreaks=False) + u'1 < 2' + + :param text: the text to convert to plain text + :param keeplinebreaks: whether line breaks in the text should be kept intact + :return: the text with tags and entities removed + """ + text = stripentities(striptags(text)) + if not keeplinebreaks: + text = text.replace('\n', ' ') + return text + + +_STRIPENTITIES_RE = re.compile(r'&(?:#((?:\d+)|(?:[xX][0-9a-fA-F]+));?|(\w+);)') +def stripentities(text, keepxmlentities=False): + """Return a copy of the given text with any character or numeric entities + replaced by the equivalent UTF-8 characters. + + >>> stripentities('1 < 2') + u'1 < 2' + >>> stripentities('more …') + u'more \u2026' + >>> stripentities('…') + u'\u2026' + >>> stripentities('…') + u'\u2026' + + If the `keepxmlentities` parameter is provided and is a truth value, the + core XML entities (&, ', >, < and ") are left intact. + + >>> stripentities('1 < 2 …', keepxmlentities=True) + u'1 < 2 \u2026' + """ + def _replace_entity(match): + if match.group(1): # numeric entity + ref = match.group(1) + if ref.startswith('x'): + ref = int(ref[1:], 16) + else: + ref = int(ref, 10) + return unichr(ref) + else: # character entity + ref = match.group(2) + if keepxmlentities and ref in ('amp', 'apos', 'gt', 'lt', 'quot'): + return '&%s;' % ref + try: + return unichr(entities.name2codepoint[ref]) + except KeyError: + if keepxmlentities: + return '&%s;' % ref + else: + return ref + return _STRIPENTITIES_RE.sub(_replace_entity, text) + + +_STRIPTAGS_RE = re.compile(r'(|<[^>]*>)') +def striptags(text): + """Return a copy of the text with any XML/HTML tags removed. + + >>> striptags('Foo bar') + 'Foo bar' + >>> striptags('Foo') + 'Foo' + >>> striptags('Foo
') + 'Foo' + + HTML/XML comments are stripped, too: + + >>> striptags('test') + 'test' + + :param text: the string to remove tags from + :return: the text with tags removed + """ + return _STRIPTAGS_RE.sub('', text) + + +def stringrepr(string): + ascii = string.encode('ascii', 'backslashreplace') + quoted = "'" + ascii.replace("'", "\\'") + "'" + if len(ascii) > len(string): + return 'u' + quoted + return quoted + + +# Compatibility fallback implementations for older Python versions + +try: + all = all + any = any +except NameError: + def any(S): + for x in S: + if x: + return True + return False + + def all(S): + for x in S: + if not x: + return False + return True -- cgit v0.9.1