diff options
Diffstat (limited to 'babel/messages/catalog.py')
-rw-r--r-- | babel/messages/catalog.py | 721 |
1 files changed, 0 insertions, 721 deletions
diff --git a/babel/messages/catalog.py b/babel/messages/catalog.py deleted file mode 100644 index b8a066c..0000000 --- a/babel/messages/catalog.py +++ /dev/null @@ -1,721 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2007 Edgewall Software -# All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://babel.edgewall.org/wiki/License. -# -# This software consists of voluntary contributions made by many -# individuals. For the exact contribution history, see the revision -# history and logs, available at http://babel.edgewall.org/log/. - -"""Data structures for message catalogs.""" - -from cgi import parse_header -from datetime import datetime -from difflib import get_close_matches -from email import message_from_string -from copy import copy -import re -try: - set -except NameError: - from sets import Set as set -import time - -from babel import __version__ as VERSION -from babel.core import Locale -from babel.dates import format_datetime -from babel.messages.plurals import get_plural -from babel.util import odict, distinct, LOCALTZ, UTC, FixedOffsetTimezone - -__all__ = ['Message', 'Catalog', 'TranslationError'] -__docformat__ = 'restructuredtext en' - - -PYTHON_FORMAT = re.compile(r'''(?x) - \% - (?:\(([\w]*)\))? - ( - [-#0\ +]?(?:\*|[\d]+)? - (?:\.(?:\*|[\d]+))? - [hlL]? - ) - ([diouxXeEfFgGcrs%]) -''') - - -class Message(object): - """Representation of a single message in a catalog.""" - - def __init__(self, id, string=u'', locations=(), flags=(), auto_comments=(), - user_comments=(), previous_id=(), lineno=None): - """Create the message object. - - :param id: the message ID, or a ``(singular, plural)`` tuple for - pluralizable messages - :param string: the translated message string, or a - ``(singular, plural)`` tuple for pluralizable messages - :param locations: a sequence of ``(filenname, lineno)`` tuples - :param flags: a set or sequence of flags - :param auto_comments: a sequence of automatic comments for the message - :param user_comments: a sequence of user comments for the message - :param previous_id: the previous message ID, or a ``(singular, plural)`` - tuple for pluralizable messages - :param lineno: the line number on which the msgid line was found in the - PO file, if any - """ - self.id = id #: The message ID - if not string and self.pluralizable: - string = (u'', u'') - self.string = string #: The message translation - self.locations = list(distinct(locations)) - self.flags = set(flags) - if id and self.python_format: - self.flags.add('python-format') - else: - self.flags.discard('python-format') - self.auto_comments = list(distinct(auto_comments)) - self.user_comments = list(distinct(user_comments)) - if isinstance(previous_id, basestring): - self.previous_id = [previous_id] - else: - self.previous_id = list(previous_id) - self.lineno = lineno - - def __repr__(self): - return '<%s %r (flags: %r)>' % (type(self).__name__, self.id, - list(self.flags)) - - def __cmp__(self, obj): - """Compare Messages, taking into account plural ids""" - if isinstance(obj, Message): - plural = self.pluralizable - obj_plural = obj.pluralizable - if plural and obj_plural: - return cmp(self.id[0], obj.id[0]) - elif plural: - return cmp(self.id[0], obj.id) - elif obj_plural: - return cmp(self.id, obj.id[0]) - return cmp(self.id, obj.id) - - def clone(self): - return Message(*map(copy, (self.id, self.string, self.locations, - self.flags, self.auto_comments, - self.user_comments, self.previous_id, - self.lineno))) - - def check(self, catalog=None): - """Run various validation checks on the message. Some validations - are only performed if the catalog is provided. This method returns - a sequence of `TranslationError` objects. - - :rtype: ``iterator`` - :param catalog: A catalog instance that is passed to the checkers - :see: `Catalog.check` for a way to perform checks for all messages - in a catalog. - """ - from babel.messages.checkers import checkers - errors = [] - for checker in checkers: - try: - checker(catalog, self) - except TranslationError, e: - errors.append(e) - return errors - - def fuzzy(self): - return 'fuzzy' in self.flags - fuzzy = property(fuzzy, doc="""\ - Whether the translation is fuzzy. - - >>> Message('foo').fuzzy - False - >>> msg = Message('foo', 'foo', flags=['fuzzy']) - >>> msg.fuzzy - True - >>> msg - <Message 'foo' (flags: ['fuzzy'])> - - :type: `bool` - """) - - def pluralizable(self): - return isinstance(self.id, (list, tuple)) - pluralizable = property(pluralizable, doc="""\ - Whether the message is plurizable. - - >>> Message('foo').pluralizable - False - >>> Message(('foo', 'bar')).pluralizable - True - - :type: `bool` - """) - - def python_format(self): - ids = self.id - if not isinstance(ids, (list, tuple)): - ids = [ids] - return bool(filter(None, [PYTHON_FORMAT.search(id) for id in ids])) - python_format = property(python_format, doc="""\ - Whether the message contains Python-style parameters. - - >>> Message('foo %(name)s bar').python_format - True - >>> Message(('foo %(name)s', 'foo %(name)s')).python_format - True - - :type: `bool` - """) - - -class TranslationError(Exception): - """Exception thrown by translation checkers when invalid message - translations are encountered.""" - - -DEFAULT_HEADER = u"""\ -# Translations template for PROJECT. -# Copyright (C) YEAR ORGANIZATION -# This file is distributed under the same license as the PROJECT project. -# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR. -#""" - - -class Catalog(object): - """Representation of a message catalog.""" - - def __init__(self, locale=None, domain=None, header_comment=DEFAULT_HEADER, - project=None, version=None, copyright_holder=None, - msgid_bugs_address=None, creation_date=None, - revision_date=None, last_translator=None, language_team=None, - charset='utf-8', fuzzy=True): - """Initialize the catalog object. - - :param locale: the locale identifier or `Locale` object, or `None` - if the catalog is not bound to a locale (which basically - means it's a template) - :param domain: the message domain - :param header_comment: the header comment as string, or `None` for the - default header - :param project: the project's name - :param version: the project's version - :param copyright_holder: the copyright holder of the catalog - :param msgid_bugs_address: the email address or URL to submit bug - reports to - :param creation_date: the date the catalog was created - :param revision_date: the date the catalog was revised - :param last_translator: the name and email of the last translator - :param language_team: the name and email of the language team - :param charset: the encoding to use in the output - :param fuzzy: the fuzzy bit on the catalog header - """ - self.domain = domain #: The message domain - if locale: - locale = Locale.parse(locale) - self.locale = locale #: The locale or `None` - self._header_comment = header_comment - self._messages = odict() - - self.project = project or 'PROJECT' #: The project name - self.version = version or 'VERSION' #: The project version - self.copyright_holder = copyright_holder or 'ORGANIZATION' - self.msgid_bugs_address = msgid_bugs_address or 'EMAIL@ADDRESS' - - self.last_translator = last_translator or 'FULL NAME <EMAIL@ADDRESS>' - """Name and email address of the last translator.""" - self.language_team = language_team or 'LANGUAGE <LL@li.org>' - """Name and email address of the language team.""" - - self.charset = charset or 'utf-8' - - if creation_date is None: - creation_date = datetime.now(LOCALTZ) - elif isinstance(creation_date, datetime) and not creation_date.tzinfo: - creation_date = creation_date.replace(tzinfo=LOCALTZ) - self.creation_date = creation_date #: Creation date of the template - if revision_date is None: - revision_date = datetime.now(LOCALTZ) - elif isinstance(revision_date, datetime) and not revision_date.tzinfo: - revision_date = revision_date.replace(tzinfo=LOCALTZ) - self.revision_date = revision_date #: Last revision date of the catalog - self.fuzzy = fuzzy #: Catalog header fuzzy bit (`True` or `False`) - - self.obsolete = odict() #: Dictionary of obsolete messages - self._num_plurals = None - self._plural_expr = None - - def _get_header_comment(self): - comment = self._header_comment - comment = comment.replace('PROJECT', self.project) \ - .replace('VERSION', self.version) \ - .replace('YEAR', self.revision_date.strftime('%Y')) \ - .replace('ORGANIZATION', self.copyright_holder) - if self.locale: - comment = comment.replace('Translations template', '%s translations' - % self.locale.english_name) - return comment - - def _set_header_comment(self, string): - self._header_comment = string - - header_comment = property(_get_header_comment, _set_header_comment, doc="""\ - The header comment for the catalog. - - >>> catalog = Catalog(project='Foobar', version='1.0', - ... copyright_holder='Foo Company') - >>> print catalog.header_comment #doctest: +ELLIPSIS - # Translations template for Foobar. - # Copyright (C) ... Foo Company - # This file is distributed under the same license as the Foobar project. - # FIRST AUTHOR <EMAIL@ADDRESS>, .... - # - - The header can also be set from a string. Any known upper-case variables - will be replaced when the header is retrieved again: - - >>> catalog = Catalog(project='Foobar', version='1.0', - ... copyright_holder='Foo Company') - >>> catalog.header_comment = '''\\ - ... # The POT for my really cool PROJECT project. - ... # Copyright (C) 1990-2003 ORGANIZATION - ... # This file is distributed under the same license as the PROJECT - ... # project. - ... #''' - >>> print catalog.header_comment - # The POT for my really cool Foobar project. - # Copyright (C) 1990-2003 Foo Company - # This file is distributed under the same license as the Foobar - # project. - # - - :type: `unicode` - """) - - def _get_mime_headers(self): - headers = [] - headers.append(('Project-Id-Version', - '%s %s' % (self.project, self.version))) - headers.append(('Report-Msgid-Bugs-To', self.msgid_bugs_address)) - headers.append(('POT-Creation-Date', - format_datetime(self.creation_date, 'yyyy-MM-dd HH:mmZ', - locale='en'))) - if self.locale is None: - headers.append(('PO-Revision-Date', 'YEAR-MO-DA HO:MI+ZONE')) - headers.append(('Last-Translator', 'FULL NAME <EMAIL@ADDRESS>')) - headers.append(('Language-Team', 'LANGUAGE <LL@li.org>')) - else: - headers.append(('PO-Revision-Date', - format_datetime(self.revision_date, - 'yyyy-MM-dd HH:mmZ', locale='en'))) - headers.append(('Last-Translator', self.last_translator)) - headers.append(('Language-Team', - self.language_team.replace('LANGUAGE', - str(self.locale)))) - headers.append(('Plural-Forms', self.plural_forms)) - headers.append(('MIME-Version', '1.0')) - headers.append(('Content-Type', - 'text/plain; charset=%s' % self.charset)) - headers.append(('Content-Transfer-Encoding', '8bit')) - headers.append(('Generated-By', 'Babel %s\n' % VERSION)) - return headers - - def _set_mime_headers(self, headers): - for name, value in headers: - if name.lower() == 'content-type': - mimetype, params = parse_header(value) - if 'charset' in params: - self.charset = params['charset'].lower() - break - for name, value in headers: - name = name.lower().decode(self.charset) - value = value.decode(self.charset) - if name == 'project-id-version': - parts = value.split(' ') - self.project = u' '.join(parts[:-1]) - self.version = parts[-1] - elif name == 'report-msgid-bugs-to': - self.msgid_bugs_address = value - elif name == 'last-translator': - self.last_translator = value - elif name == 'language-team': - self.language_team = value - elif name == 'plural-forms': - _, params = parse_header(' ;' + value) - self._num_plurals = int(params.get('nplurals', 2)) - self._plural_expr = params.get('plural', '(n != 1)') - elif name == 'pot-creation-date': - # FIXME: this should use dates.parse_datetime as soon as that - # is ready - value, tzoffset, _ = re.split('[+-](\d{4})$', value, 1) - tt = time.strptime(value, '%Y-%m-%d %H:%M') - ts = time.mktime(tt) - tzoffset = FixedOffsetTimezone(int(tzoffset[:2]) * 60 + - int(tzoffset[2:])) - dt = datetime.fromtimestamp(ts) - self.creation_date = dt.replace(tzinfo=tzoffset) - - mime_headers = property(_get_mime_headers, _set_mime_headers, doc="""\ - The MIME headers of the catalog, used for the special ``msgid ""`` entry. - - The behavior of this property changes slightly depending on whether a locale - is set or not, the latter indicating that the catalog is actually a template - for actual translations. - - Here's an example of the output for such a catalog template: - - >>> created = datetime(1990, 4, 1, 15, 30, tzinfo=UTC) - >>> catalog = Catalog(project='Foobar', version='1.0', - ... creation_date=created) - >>> for name, value in catalog.mime_headers: - ... print '%s: %s' % (name, value) - Project-Id-Version: Foobar 1.0 - Report-Msgid-Bugs-To: EMAIL@ADDRESS - POT-Creation-Date: 1990-04-01 15:30+0000 - PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE - Last-Translator: FULL NAME <EMAIL@ADDRESS> - Language-Team: LANGUAGE <LL@li.org> - MIME-Version: 1.0 - Content-Type: text/plain; charset=utf-8 - Content-Transfer-Encoding: 8bit - Generated-By: Babel ... - - And here's an example of the output when the locale is set: - - >>> revised = datetime(1990, 8, 3, 12, 0, tzinfo=UTC) - >>> catalog = Catalog(locale='de_DE', project='Foobar', version='1.0', - ... creation_date=created, revision_date=revised, - ... last_translator='John Doe <jd@example.com>', - ... language_team='de_DE <de@example.com>') - >>> for name, value in catalog.mime_headers: - ... print '%s: %s' % (name, value) - Project-Id-Version: Foobar 1.0 - Report-Msgid-Bugs-To: EMAIL@ADDRESS - POT-Creation-Date: 1990-04-01 15:30+0000 - PO-Revision-Date: 1990-08-03 12:00+0000 - Last-Translator: John Doe <jd@example.com> - Language-Team: de_DE <de@example.com> - Plural-Forms: nplurals=2; plural=(n != 1) - MIME-Version: 1.0 - Content-Type: text/plain; charset=utf-8 - Content-Transfer-Encoding: 8bit - Generated-By: Babel ... - - :type: `list` - """) - - def num_plurals(self): - if self._num_plurals is None: - num = 2 - if self.locale: - num = get_plural(self.locale)[0] - self._num_plurals = num - return self._num_plurals - num_plurals = property(num_plurals, doc="""\ - The number of plurals used by the catalog or locale. - - >>> Catalog(locale='en').num_plurals - 2 - >>> Catalog(locale='ga').num_plurals - 3 - - :type: `int` - """) - - def plural_expr(self): - if self._plural_expr is None: - expr = '(n != 1)' - if self.locale: - expr = get_plural(self.locale)[1] - self._plural_expr = expr - return self._plural_expr - plural_expr = property(plural_expr, doc="""\ - The plural expression used by the catalog or locale. - - >>> Catalog(locale='en').plural_expr - '(n != 1)' - >>> Catalog(locale='ga').plural_expr - '(n==1 ? 0 : n==2 ? 1 : 2)' - - :type: `basestring` - """) - - def plural_forms(self): - return 'nplurals=%s; plural=%s' % (self.num_plurals, self.plural_expr) - plural_forms = property(plural_forms, doc="""\ - Return the plural forms declaration for the locale. - - >>> Catalog(locale='en').plural_forms - 'nplurals=2; plural=(n != 1)' - >>> Catalog(locale='pt_BR').plural_forms - 'nplurals=2; plural=(n > 1)' - - :type: `str` - """) - - def __contains__(self, id): - """Return whether the catalog has a message with the specified ID.""" - return self._key_for(id) in self._messages - - def __len__(self): - """The number of messages in the catalog. - - This does not include the special ``msgid ""`` entry. - """ - return len(self._messages) - - def __iter__(self): - """Iterates through all the entries in the catalog, in the order they - were added, yielding a `Message` object for every entry. - - :rtype: ``iterator`` - """ - buf = [] - for name, value in self.mime_headers: - buf.append('%s: %s' % (name, value)) - flags = set() - if self.fuzzy: - flags |= set(['fuzzy']) - yield Message(u'', '\n'.join(buf), flags=flags) - for key in self._messages: - yield self._messages[key] - - def __repr__(self): - locale = '' - if self.locale: - locale = ' %s' % self.locale - return '<%s %r%s>' % (type(self).__name__, self.domain, locale) - - def __delitem__(self, id): - """Delete the message with the specified ID.""" - key = self._key_for(id) - if key in self._messages: - del self._messages[key] - - def __getitem__(self, id): - """Return the message with the specified ID. - - :param id: the message ID - :return: the message with the specified ID, or `None` if no such message - is in the catalog - :rtype: `Message` - """ - return self._messages.get(self._key_for(id)) - - def __setitem__(self, id, message): - """Add or update the message with the specified ID. - - >>> catalog = Catalog() - >>> catalog[u'foo'] = Message(u'foo') - >>> catalog[u'foo'] - <Message u'foo' (flags: [])> - - If a message with that ID is already in the catalog, it is updated - to include the locations and flags of the new message. - - >>> catalog = Catalog() - >>> catalog[u'foo'] = Message(u'foo', locations=[('main.py', 1)]) - >>> catalog[u'foo'].locations - [('main.py', 1)] - >>> catalog[u'foo'] = Message(u'foo', locations=[('utils.py', 5)]) - >>> catalog[u'foo'].locations - [('main.py', 1), ('utils.py', 5)] - - :param id: the message ID - :param message: the `Message` object - """ - assert isinstance(message, Message), 'expected a Message object' - key = self._key_for(id) - current = self._messages.get(key) - if current: - if message.pluralizable and not current.pluralizable: - # The new message adds pluralization - current.id = message.id - current.string = message.string - current.locations = list(distinct(current.locations + - message.locations)) - current.auto_comments = list(distinct(current.auto_comments + - message.auto_comments)) - current.user_comments = list(distinct(current.user_comments + - message.user_comments)) - current.flags |= message.flags - message = current - elif id == '': - # special treatment for the header message - headers = message_from_string(message.string.encode(self.charset)) - self.mime_headers = headers.items() - self.header_comment = '\n'.join(['# %s' % comment for comment - in message.user_comments]) - self.fuzzy = message.fuzzy - else: - if isinstance(id, (list, tuple)): - assert isinstance(message.string, (list, tuple)), \ - 'Expected sequence but got %s' % type(message.string) - self._messages[key] = message - - def add(self, id, string=None, locations=(), flags=(), auto_comments=(), - user_comments=(), previous_id=(), lineno=None): - """Add or update the message with the specified ID. - - >>> catalog = Catalog() - >>> catalog.add(u'foo') - >>> catalog[u'foo'] - <Message u'foo' (flags: [])> - - This method simply constructs a `Message` object with the given - arguments and invokes `__setitem__` with that object. - - :param id: the message ID, or a ``(singular, plural)`` tuple for - pluralizable messages - :param string: the translated message string, or a - ``(singular, plural)`` tuple for pluralizable messages - :param locations: a sequence of ``(filenname, lineno)`` tuples - :param flags: a set or sequence of flags - :param auto_comments: a sequence of automatic comments - :param user_comments: a sequence of user comments - :param previous_id: the previous message ID, or a ``(singular, plural)`` - tuple for pluralizable messages - :param lineno: the line number on which the msgid line was found in the - PO file, if any - """ - self[id] = Message(id, string, list(locations), flags, auto_comments, - user_comments, previous_id, lineno=lineno) - - def check(self): - """Run various validation checks on the translations in the catalog. - - For every message which fails validation, this method yield a - ``(message, errors)`` tuple, where ``message`` is the `Message` object - and ``errors`` is a sequence of `TranslationError` objects. - - :rtype: ``iterator`` - """ - for message in self._messages.values(): - errors = message.check(catalog=self) - if errors: - yield message, errors - - def update(self, template, no_fuzzy_matching=False): - """Update the catalog based on the given template catalog. - - >>> from babel.messages import Catalog - >>> template = Catalog() - >>> template.add('green', locations=[('main.py', 99)]) - >>> template.add('blue', locations=[('main.py', 100)]) - >>> template.add(('salad', 'salads'), locations=[('util.py', 42)]) - >>> catalog = Catalog(locale='de_DE') - >>> catalog.add('blue', u'blau', locations=[('main.py', 98)]) - >>> catalog.add('head', u'Kopf', locations=[('util.py', 33)]) - >>> catalog.add(('salad', 'salads'), (u'Salat', u'Salate'), - ... locations=[('util.py', 38)]) - - >>> catalog.update(template) - >>> len(catalog) - 3 - - >>> msg1 = catalog['green'] - >>> msg1.string - >>> msg1.locations - [('main.py', 99)] - - >>> msg2 = catalog['blue'] - >>> msg2.string - u'blau' - >>> msg2.locations - [('main.py', 100)] - - >>> msg3 = catalog['salad'] - >>> msg3.string - (u'Salat', u'Salate') - >>> msg3.locations - [('util.py', 42)] - - Messages that are in the catalog but not in the template are removed - from the main collection, but can still be accessed via the `obsolete` - member: - - >>> 'head' in catalog - False - >>> catalog.obsolete.values() - [<Message 'head' (flags: [])>] - - :param template: the reference catalog, usually read from a POT file - :param no_fuzzy_matching: whether to use fuzzy matching of message IDs - """ - messages = self._messages - remaining = messages.copy() - self._messages = odict() - - # Prepare for fuzzy matching - fuzzy_candidates = [] - if not no_fuzzy_matching: - fuzzy_candidates = [ - self._key_for(msgid) for msgid in messages - if msgid and messages[msgid].string - ] - fuzzy_matches = set() - - def _merge(message, oldkey, newkey): - message = message.clone() - fuzzy = False - if oldkey != newkey: - fuzzy = True - fuzzy_matches.add(oldkey) - oldmsg = messages.get(oldkey) - if isinstance(oldmsg.id, basestring): - message.previous_id = [oldmsg.id] - else: - message.previous_id = list(oldmsg.id) - else: - oldmsg = remaining.pop(oldkey, None) - message.string = oldmsg.string - if isinstance(message.id, (list, tuple)): - if not isinstance(message.string, (list, tuple)): - fuzzy = True - message.string = tuple( - [message.string] + ([u''] * (len(message.id) - 1)) - ) - elif len(message.string) != self.num_plurals: - fuzzy = True - message.string = tuple(message.string[:len(oldmsg.string)]) - elif isinstance(message.string, (list, tuple)): - fuzzy = True - message.string = message.string[0] - message.flags |= oldmsg.flags - if fuzzy: - message.flags |= set([u'fuzzy']) - self[message.id] = message - - for message in template: - if message.id: - key = self._key_for(message.id) - if key in messages: - _merge(message, key, key) - else: - if no_fuzzy_matching is False: - # do some fuzzy matching with difflib - matches = get_close_matches(key.lower().strip(), - fuzzy_candidates, 1) - if matches: - _merge(message, matches[0], key) - continue - - self[message.id] = message - - self.obsolete = odict() - for msgid in remaining: - if no_fuzzy_matching or msgid not in fuzzy_matches: - self.obsolete[msgid] = remaining[msgid] - - def _key_for(self, id): - """The key for a message is just the singular ID even for pluralizable - messages. - """ - key = id - if isinstance(key, (list, tuple)): - key = id[0] - return key |