Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/babel/messages/pofile.py
diff options
context:
space:
mode:
Diffstat (limited to 'babel/messages/pofile.py')
-rw-r--r--babel/messages/pofile.py455
1 files changed, 455 insertions, 0 deletions
diff --git a/babel/messages/pofile.py b/babel/messages/pofile.py
new file mode 100644
index 0000000..c92f991
--- /dev/null
+++ b/babel/messages/pofile.py
@@ -0,0 +1,455 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2007 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://babel.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://babel.edgewall.org/log/.
+
+"""Reading and writing of files in the ``gettext`` PO (portable object)
+format.
+
+:see: `The Format of PO Files
+ <http://www.gnu.org/software/gettext/manual/gettext.html#PO-Files>`_
+"""
+
+from datetime import date, datetime
+import os
+import re
+try:
+ set
+except NameError:
+ from sets import Set as set
+
+from babel import __version__ as VERSION
+from babel.messages.catalog import Catalog, Message
+from babel.util import wraptext, LOCALTZ
+
+__all__ = ['read_po', 'write_po']
+__docformat__ = 'restructuredtext en'
+
+def unescape(string):
+ r"""Reverse `escape` the given string.
+
+ >>> print unescape('"Say:\\n \\"hello, world!\\"\\n"')
+ Say:
+ "hello, world!"
+ <BLANKLINE>
+
+ :param string: the string to unescape
+ :return: the unescaped string
+ :rtype: `str` or `unicode`
+ """
+ return string[1:-1].replace('\\\\', '\\') \
+ .replace('\\t', '\t') \
+ .replace('\\r', '\r') \
+ .replace('\\n', '\n') \
+ .replace('\\"', '\"')
+
+def denormalize(string):
+ r"""Reverse the normalization done by the `normalize` function.
+
+ >>> print denormalize(r'''""
+ ... "Say:\n"
+ ... " \"hello, world!\"\n"''')
+ Say:
+ "hello, world!"
+ <BLANKLINE>
+
+ >>> print denormalize(r'''""
+ ... "Say:\n"
+ ... " \"Lorem ipsum dolor sit "
+ ... "amet, consectetur adipisicing"
+ ... " elit, \"\n"''')
+ Say:
+ "Lorem ipsum dolor sit amet, consectetur adipisicing elit, "
+ <BLANKLINE>
+
+ :param string: the string to denormalize
+ :return: the denormalized string
+ :rtype: `unicode` or `str`
+ """
+ if string.startswith('""'):
+ lines = []
+ for line in string.splitlines()[1:]:
+ lines.append(unescape(line))
+ return ''.join(lines)
+ else:
+ return unescape(string)
+
+def read_po(fileobj, locale=None, domain=None, ignore_obsolete=False):
+ """Read messages from a ``gettext`` PO (portable object) file from the given
+ file-like object and return a `Catalog`.
+
+ >>> from StringIO import StringIO
+ >>> buf = StringIO('''
+ ... #: main.py:1
+ ... #, fuzzy, python-format
+ ... msgid "foo %(name)s"
+ ... msgstr ""
+ ...
+ ... # A user comment
+ ... #. An auto comment
+ ... #: main.py:3
+ ... msgid "bar"
+ ... msgid_plural "baz"
+ ... msgstr[0] ""
+ ... msgstr[1] ""
+ ... ''')
+ >>> catalog = read_po(buf)
+ >>> catalog.revision_date = datetime(2007, 04, 01)
+
+ >>> for message in catalog:
+ ... if message.id:
+ ... print (message.id, message.string)
+ ... print ' ', (message.locations, message.flags)
+ ... print ' ', (message.user_comments, message.auto_comments)
+ (u'foo %(name)s', '')
+ ([(u'main.py', 1)], set([u'fuzzy', u'python-format']))
+ ([], [])
+ ((u'bar', u'baz'), ('', ''))
+ ([(u'main.py', 3)], set([]))
+ ([u'A user comment'], [u'An auto comment'])
+
+ :param fileobj: the file-like object to read the PO file from
+ :param locale: the locale identifier or `Locale` object, or `None`
+ if the catalog is not bound to a locale (which basically
+ means it's a template)
+ :param domain: the message domain
+ :param ignore_obsolete: whether to ignore obsolete messages in the input
+ :return: an iterator over ``(message, translation, location)`` tuples
+ :rtype: ``iterator``
+ """
+ catalog = Catalog(locale=locale, domain=domain)
+
+ counter = [0]
+ offset = [0]
+ messages = []
+ translations = []
+ locations = []
+ flags = []
+ user_comments = []
+ auto_comments = []
+ obsolete = [False]
+ in_msgid = [False]
+ in_msgstr = [False]
+
+ def _add_message():
+ translations.sort()
+ if len(messages) > 1:
+ msgid = tuple([denormalize(m) for m in messages])
+ else:
+ msgid = denormalize(messages[0])
+ if isinstance(msgid, (list, tuple)):
+ string = []
+ for idx in range(catalog.num_plurals):
+ try:
+ string.append(translations[idx])
+ except IndexError:
+ string.append((idx, ''))
+ string = tuple([denormalize(t[1]) for t in string])
+ else:
+ string = denormalize(translations[0][1])
+ message = Message(msgid, string, list(locations), set(flags),
+ auto_comments, user_comments, lineno=offset[0] + 1)
+ if obsolete[0]:
+ if not ignore_obsolete:
+ catalog.obsolete[msgid] = message
+ else:
+ catalog[msgid] = message
+ del messages[:]; del translations[:]; del locations[:];
+ del flags[:]; del auto_comments[:]; del user_comments[:]
+ obsolete[0] = False
+ counter[0] += 1
+
+ def _process_message_line(lineno, line):
+ if line.startswith('msgid_plural'):
+ in_msgid[0] = True
+ msg = line[12:].lstrip()
+ messages.append(msg)
+ elif line.startswith('msgid'):
+ in_msgid[0] = True
+ offset[0] = lineno
+ txt = line[5:].lstrip()
+ if messages:
+ _add_message()
+ messages.append(txt)
+ elif line.startswith('msgstr'):
+ in_msgid[0] = False
+ in_msgstr[0] = True
+ msg = line[6:].lstrip()
+ if msg.startswith('['):
+ idx, msg = msg[1:].split(']', 1)
+ translations.append([int(idx), msg.lstrip()])
+ else:
+ translations.append([0, msg])
+ elif line.startswith('"'):
+ if in_msgid[0]:
+ messages[-1] += u'\n' + line.rstrip()
+ elif in_msgstr[0]:
+ translations[-1][1] += u'\n' + line.rstrip()
+
+ for lineno, line in enumerate(fileobj.readlines()):
+ line = line.strip().decode(catalog.charset)
+ if line.startswith('#'):
+ in_msgid[0] = in_msgstr[0] = False
+ if messages and translations:
+ _add_message()
+ if line[1:].startswith(':'):
+ for location in line[2:].lstrip().split():
+ pos = location.rfind(':')
+ if pos >= 0:
+ try:
+ lineno = int(location[pos + 1:])
+ except ValueError:
+ continue
+ locations.append((location[:pos], lineno))
+ elif line[1:].startswith(','):
+ for flag in line[2:].lstrip().split(','):
+ flags.append(flag.strip())
+ elif line[1:].startswith('~'):
+ obsolete[0] = True
+ _process_message_line(lineno, line[2:].lstrip())
+ elif line[1:].startswith('.'):
+ # These are called auto-comments
+ comment = line[2:].strip()
+ if comment: # Just check that we're not adding empty comments
+ auto_comments.append(comment)
+ else:
+ # These are called user comments
+ user_comments.append(line[1:].strip())
+ else:
+ _process_message_line(lineno, line)
+
+ if messages:
+ _add_message()
+
+ # No actual messages found, but there was some info in comments, from which
+ # we'll construct an empty header message
+ elif not counter[0] and (flags or user_comments or auto_comments):
+ messages.append(u'')
+ translations.append([0, u''])
+ _add_message()
+
+ return catalog
+
+WORD_SEP = re.compile('('
+ r'\s+|' # any whitespace
+ r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words
+ r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w)' # em-dash
+')')
+
+def escape(string):
+ r"""Escape the given string so that it can be included in double-quoted
+ strings in ``PO`` files.
+
+ >>> escape('''Say:
+ ... "hello, world!"
+ ... ''')
+ '"Say:\\n \\"hello, world!\\"\\n"'
+
+ :param string: the string to escape
+ :return: the escaped string
+ :rtype: `str` or `unicode`
+ """
+ return '"%s"' % string.replace('\\', '\\\\') \
+ .replace('\t', '\\t') \
+ .replace('\r', '\\r') \
+ .replace('\n', '\\n') \
+ .replace('\"', '\\"')
+
+def normalize(string, prefix='', width=76):
+ r"""Convert a string into a format that is appropriate for .po files.
+
+ >>> print normalize('''Say:
+ ... "hello, world!"
+ ... ''', width=None)
+ ""
+ "Say:\n"
+ " \"hello, world!\"\n"
+
+ >>> print normalize('''Say:
+ ... "Lorem ipsum dolor sit amet, consectetur adipisicing elit, "
+ ... ''', width=32)
+ ""
+ "Say:\n"
+ " \"Lorem ipsum dolor sit "
+ "amet, consectetur adipisicing"
+ " elit, \"\n"
+
+ :param string: the string to normalize
+ :param prefix: a string that should be prepended to every line
+ :param width: the maximum line width; use `None`, 0, or a negative number
+ to completely disable line wrapping
+ :return: the normalized string
+ :rtype: `unicode`
+ """
+ if width and width > 0:
+ prefixlen = len(prefix)
+ lines = []
+ for idx, line in enumerate(string.splitlines(True)):
+ if len(escape(line)) + prefixlen > width:
+ chunks = WORD_SEP.split(line)
+ chunks.reverse()
+ while chunks:
+ buf = []
+ size = 2
+ while chunks:
+ l = len(escape(chunks[-1])) - 2 + prefixlen
+ if size + l < width:
+ buf.append(chunks.pop())
+ size += l
+ else:
+ if not buf:
+ # handle long chunks by putting them on a
+ # separate line
+ buf.append(chunks.pop())
+ break
+ lines.append(u''.join(buf))
+ else:
+ lines.append(line)
+ else:
+ lines = string.splitlines(True)
+
+ if len(lines) <= 1:
+ return escape(string)
+
+ # Remove empty trailing line
+ if lines and not lines[-1]:
+ del lines[-1]
+ lines[-1] += '\n'
+ return u'""\n' + u'\n'.join([(prefix + escape(l)) for l in lines])
+
+def write_po(fileobj, catalog, width=76, no_location=False, omit_header=False,
+ sort_output=False, sort_by_file=False, ignore_obsolete=False,
+ include_previous=False):
+ r"""Write a ``gettext`` PO (portable object) template file for a given
+ message catalog to the provided file-like object.
+
+ >>> catalog = Catalog()
+ >>> catalog.add(u'foo %(name)s', locations=[('main.py', 1)],
+ ... flags=('fuzzy',))
+ >>> catalog.add((u'bar', u'baz'), locations=[('main.py', 3)])
+ >>> from StringIO import StringIO
+ >>> buf = StringIO()
+ >>> write_po(buf, catalog, omit_header=True)
+ >>> print buf.getvalue()
+ #: main.py:1
+ #, fuzzy, python-format
+ msgid "foo %(name)s"
+ msgstr ""
+ <BLANKLINE>
+ #: main.py:3
+ msgid "bar"
+ msgid_plural "baz"
+ msgstr[0] ""
+ msgstr[1] ""
+ <BLANKLINE>
+ <BLANKLINE>
+
+ :param fileobj: the file-like object to write to
+ :param catalog: the `Catalog` instance
+ :param width: the maximum line width for the generated output; use `None`,
+ 0, or a negative number to completely disable line wrapping
+ :param no_location: do not emit a location comment for every message
+ :param omit_header: do not include the ``msgid ""`` entry at the top of the
+ output
+ :param sort_output: whether to sort the messages in the output by msgid
+ :param sort_by_file: whether to sort the messages in the output by their
+ locations
+ :param ignore_obsolete: whether to ignore obsolete messages and not include
+ them in the output; by default they are included as
+ comments
+ :param include_previous: include the old msgid as a comment when
+ updating the catalog
+ """
+ def _normalize(key, prefix=''):
+ return normalize(key, prefix=prefix, width=width) \
+ .encode(catalog.charset, 'backslashreplace')
+
+ def _write(text):
+ if isinstance(text, unicode):
+ text = text.encode(catalog.charset)
+ fileobj.write(text)
+
+ def _write_comment(comment, prefix=''):
+ lines = comment
+ if width and width > 0:
+ lines = wraptext(comment, width)
+ for line in lines:
+ _write('#%s %s\n' % (prefix, line.strip()))
+
+ def _write_message(message, prefix=''):
+ if isinstance(message.id, (list, tuple)):
+ _write('%smsgid %s\n' % (prefix, _normalize(message.id[0], prefix)))
+ _write('%smsgid_plural %s\n' % (
+ prefix, _normalize(message.id[1], prefix)
+ ))
+
+ for idx in range(catalog.num_plurals):
+ try:
+ string = message.string[idx]
+ except IndexError:
+ string = ''
+ _write('%smsgstr[%d] %s\n' % (
+ prefix, idx, _normalize(string, prefix)
+ ))
+ else:
+ _write('%smsgid %s\n' % (prefix, _normalize(message.id, prefix)))
+ _write('%smsgstr %s\n' % (
+ prefix, _normalize(message.string or '', prefix)
+ ))
+
+ messages = list(catalog)
+ if sort_output:
+ messages.sort()
+ elif sort_by_file:
+ messages.sort(lambda x,y: cmp(x.locations, y.locations))
+
+ for message in messages:
+ if not message.id: # This is the header "message"
+ if omit_header:
+ continue
+ comment_header = catalog.header_comment
+ if width and width > 0:
+ lines = []
+ for line in comment_header.splitlines():
+ lines += wraptext(line, width=width,
+ subsequent_indent='# ')
+ comment_header = u'\n'.join(lines) + u'\n'
+ _write(comment_header)
+
+ for comment in message.user_comments:
+ _write_comment(comment)
+ for comment in message.auto_comments:
+ _write_comment(comment, prefix='.')
+
+ if not no_location:
+ locs = u' '.join([u'%s:%d' % (filename.replace(os.sep, '/'), lineno)
+ for filename, lineno in message.locations])
+ _write_comment(locs, prefix=':')
+ if message.flags:
+ _write('#%s\n' % ', '.join([''] + list(message.flags)))
+
+ if message.previous_id and include_previous:
+ _write_comment('msgid %s' % _normalize(message.previous_id[0]),
+ prefix='|')
+ if len(message.previous_id) > 1:
+ _write_comment('msgid_plural %s' % _normalize(
+ message.previous_id[1]
+ ), prefix='|')
+
+ _write_message(message)
+ _write('\n')
+
+ if not ignore_obsolete:
+ for message in catalog.obsolete.values():
+ for comment in message.user_comments:
+ _write_comment(comment)
+ _write_message(message, prefix='#~ ')
+ _write('\n')