Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/babel/messages/extract.py
diff options
context:
space:
mode:
Diffstat (limited to 'babel/messages/extract.py')
-rw-r--r--babel/messages/extract.py554
1 files changed, 0 insertions, 554 deletions
diff --git a/babel/messages/extract.py b/babel/messages/extract.py
deleted file mode 100644
index 1f3a662..0000000
--- a/babel/messages/extract.py
+++ /dev/null
@@ -1,554 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Copyright (C) 2007 Edgewall Software
-# All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://babel.edgewall.org/wiki/License.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For the exact contribution history, see the revision
-# history and logs, available at http://babel.edgewall.org/log/.
-
-"""Basic infrastructure for extracting localizable messages from source files.
-
-This module defines an extensible system for collecting localizable message
-strings from a variety of sources. A native extractor for Python source files
-is builtin, extractors for other sources can be added using very simple plugins.
-
-The main entry points into the extraction functionality are the functions
-`extract_from_dir` and `extract_from_file`.
-"""
-
-import os
-try:
- set
-except NameError:
- from sets import Set as set
-import sys
-from tokenize import generate_tokens, COMMENT, NAME, OP, STRING
-
-from babel.util import parse_encoding, pathmatch, relpath
-from textwrap import dedent
-
-__all__ = ['extract', 'extract_from_dir', 'extract_from_file']
-__docformat__ = 'restructuredtext en'
-
-GROUP_NAME = 'babel.extractors'
-
-DEFAULT_KEYWORDS = {
- '_': None,
- 'gettext': None,
- 'ngettext': (1, 2),
- 'ugettext': None,
- 'ungettext': (1, 2),
- 'dgettext': (2,),
- 'dngettext': (2, 3),
- 'N_': None
-}
-
-DEFAULT_MAPPING = [('**.py', 'python')]
-
-empty_msgid_warning = (
-'%s: warning: Empty msgid. It is reserved by GNU gettext: gettext("") '
-'returns the header entry with meta information, not the empty string.')
-
-
-def _strip_comment_tags(comments, tags):
- """Helper function for `extract` that strips comment tags from strings
- in a list of comment lines. This functions operates in-place.
- """
- def _strip(line):
- for tag in tags:
- if line.startswith(tag):
- return line[len(tag):].strip()
- return line
- comments[:] = map(_strip, comments)
-
-
-def extract_from_dir(dirname=os.getcwd(), method_map=DEFAULT_MAPPING,
- options_map=None, keywords=DEFAULT_KEYWORDS,
- comment_tags=(), callback=None, strip_comment_tags=False):
- """Extract messages from any source files found in the given directory.
-
- This function generates tuples of the form:
-
- ``(filename, lineno, message, comments)``
-
- Which extraction method is used per file is determined by the `method_map`
- parameter, which maps extended glob patterns to extraction method names.
- For example, the following is the default mapping:
-
- >>> method_map = [
- ... ('**.py', 'python')
- ... ]
-
- This basically says that files with the filename extension ".py" at any
- level inside the directory should be processed by the "python" extraction
- method. Files that don't match any of the mapping patterns are ignored. See
- the documentation of the `pathmatch` function for details on the pattern
- syntax.
-
- The following extended mapping would also use the "genshi" extraction
- method on any file in "templates" subdirectory:
-
- >>> method_map = [
- ... ('**/templates/**.*', 'genshi'),
- ... ('**.py', 'python')
- ... ]
-
- The dictionary provided by the optional `options_map` parameter augments
- these mappings. It uses extended glob patterns as keys, and the values are
- dictionaries mapping options names to option values (both strings).
-
- The glob patterns of the `options_map` do not necessarily need to be the
- same as those used in the method mapping. For example, while all files in
- the ``templates`` folders in an application may be Genshi applications, the
- options for those files may differ based on extension:
-
- >>> options_map = {
- ... '**/templates/**.txt': {
- ... 'template_class': 'genshi.template:TextTemplate',
- ... 'encoding': 'latin-1'
- ... },
- ... '**/templates/**.html': {
- ... 'include_attrs': ''
- ... }
- ... }
-
- :param dirname: the path to the directory to extract messages from
- :param method_map: a list of ``(pattern, method)`` tuples that maps of
- extraction method names to extended glob patterns
- :param options_map: a dictionary of additional options (optional)
- :param keywords: a dictionary mapping keywords (i.e. names of functions
- that should be recognized as translation functions) to
- tuples that specify which of their arguments contain
- localizable strings
- :param comment_tags: a list of tags of translator comments to search for
- and include in the results
- :param callback: a function that is called for every file that message are
- extracted from, just before the extraction itself is
- performed; the function is passed the filename, the name
- of the extraction method and and the options dictionary as
- positional arguments, in that order
- :param strip_comment_tags: a flag that if set to `True` causes all comment
- tags to be removed from the collected comments.
- :return: an iterator over ``(filename, lineno, funcname, message)`` tuples
- :rtype: ``iterator``
- :see: `pathmatch`
- """
- if options_map is None:
- options_map = {}
-
- absname = os.path.abspath(dirname)
- for root, dirnames, filenames in os.walk(absname):
- for subdir in dirnames:
- if subdir.startswith('.') or subdir.startswith('_'):
- dirnames.remove(subdir)
- dirnames.sort()
- filenames.sort()
- for filename in filenames:
- filename = relpath(
- os.path.join(root, filename).replace(os.sep, '/'),
- dirname
- )
- for pattern, method in method_map:
- if pathmatch(pattern, filename):
- filepath = os.path.join(absname, filename)
- options = {}
- for opattern, odict in options_map.items():
- if pathmatch(opattern, filename):
- options = odict
- if callback:
- callback(filename, method, options)
- for lineno, message, comments in \
- extract_from_file(method, filepath,
- keywords=keywords,
- comment_tags=comment_tags,
- options=options,
- strip_comment_tags=
- strip_comment_tags):
- yield filename, lineno, message, comments
- break
-
-
-def extract_from_file(method, filename, keywords=DEFAULT_KEYWORDS,
- comment_tags=(), options=None, strip_comment_tags=False):
- """Extract messages from a specific file.
-
- This function returns a list of tuples of the form:
-
- ``(lineno, funcname, message)``
-
- :param filename: the path to the file to extract messages from
- :param method: a string specifying the extraction method (.e.g. "python")
- :param keywords: a dictionary mapping keywords (i.e. names of functions
- that should be recognized as translation functions) to
- tuples that specify which of their arguments contain
- localizable strings
- :param comment_tags: a list of translator tags to search for and include
- in the results
- :param strip_comment_tags: a flag that if set to `True` causes all comment
- tags to be removed from the collected comments.
- :param options: a dictionary of additional options (optional)
- :return: the list of extracted messages
- :rtype: `list`
- """
- fileobj = open(filename, 'U')
- try:
- return list(extract(method, fileobj, keywords, comment_tags, options,
- strip_comment_tags))
- finally:
- fileobj.close()
-
-
-def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(),
- options=None, strip_comment_tags=False):
- """Extract messages from the given file-like object using the specified
- extraction method.
-
- This function returns a list of tuples of the form:
-
- ``(lineno, message, comments)``
-
- The implementation dispatches the actual extraction to plugins, based on the
- value of the ``method`` parameter.
-
- >>> source = '''# foo module
- ... def run(argv):
- ... print _('Hello, world!')
- ... '''
-
- >>> from StringIO import StringIO
- >>> for message in extract('python', StringIO(source)):
- ... print message
- (3, u'Hello, world!', [])
-
- :param method: a string specifying the extraction method (.e.g. "python");
- if this is a simple name, the extraction function will be
- looked up by entry point; if it is an explicit reference
- to a function (of the form ``package.module:funcname`` or
- ``package.module.funcname``), the corresponding function
- will be imported and used
- :param fileobj: the file-like object the messages should be extracted from
- :param keywords: a dictionary mapping keywords (i.e. names of functions
- that should be recognized as translation functions) to
- tuples that specify which of their arguments contain
- localizable strings
- :param comment_tags: a list of translator tags to search for and include
- in the results
- :param options: a dictionary of additional options (optional)
- :param strip_comment_tags: a flag that if set to `True` causes all comment
- tags to be removed from the collected comments.
- :return: the list of extracted messages
- :rtype: `list`
- :raise ValueError: if the extraction method is not registered
- """
- func = None
- if ':' in method or '.' in method:
- if ':' not in method:
- lastdot = method.rfind('.')
- module, attrname = method[:lastdot], method[lastdot + 1:]
- else:
- module, attrname = method.split(':', 1)
- func = getattr(__import__(module, {}, {}, [attrname]), attrname)
- else:
- try:
- from pkg_resources import working_set
- except ImportError:
- # pkg_resources is not available, so we resort to looking up the
- # builtin extractors directly
- builtin = {'ignore': extract_nothing, 'python': extract_python}
- func = builtin.get(method)
- else:
- for entry_point in working_set.iter_entry_points(GROUP_NAME,
- method):
- func = entry_point.load(require=True)
- break
- if func is None:
- raise ValueError('Unknown extraction method %r' % method)
-
- results = func(fileobj, keywords.keys(), comment_tags,
- options=options or {})
-
- for lineno, funcname, messages, comments in results:
- if funcname:
- spec = keywords[funcname] or (1,)
- else:
- spec = (1,)
- if not isinstance(messages, (list, tuple)):
- messages = [messages]
- if not messages:
- continue
-
- # Validate the messages against the keyword's specification
- msgs = []
- invalid = False
- # last_index is 1 based like the keyword spec
- last_index = len(messages)
- for index in spec:
- if last_index < index:
- # Not enough arguments
- invalid = True
- break
- message = messages[index - 1]
- if message is None:
- invalid = True
- break
- msgs.append(message)
- if invalid:
- continue
-
- first_msg_index = spec[0] - 1
- if not messages[first_msg_index]:
- # An empty string msgid isn't valid, emit a warning
- where = '%s:%i' % (hasattr(fileobj, 'name') and \
- fileobj.name or '(unknown)', lineno)
- print >> sys.stderr, empty_msgid_warning % where
- continue
-
- messages = tuple(msgs)
- if len(messages) == 1:
- messages = messages[0]
-
- if strip_comment_tags:
- _strip_comment_tags(comments, comment_tags)
- yield lineno, messages, comments
-
-
-def extract_nothing(fileobj, keywords, comment_tags, options):
- """Pseudo extractor that does not actually extract anything, but simply
- returns an empty list.
- """
- return []
-
-
-def extract_python(fileobj, keywords, comment_tags, options):
- """Extract messages from Python source code.
-
- :param fileobj: the seekable, file-like object the messages should be
- extracted from
- :param keywords: a list of keywords (i.e. function names) that should be
- recognized as translation functions
- :param comment_tags: a list of translator tags to search for and include
- in the results
- :param options: a dictionary of additional options (optional)
- :return: an iterator over ``(lineno, funcname, message, comments)`` tuples
- :rtype: ``iterator``
- """
- funcname = lineno = message_lineno = None
- call_stack = -1
- buf = []
- messages = []
- translator_comments = []
- in_def = in_translator_comments = False
- comment_tag = None
-
- encoding = parse_encoding(fileobj) or options.get('encoding', 'iso-8859-1')
-
- tokens = generate_tokens(fileobj.readline)
- for tok, value, (lineno, _), _, _ in tokens:
- if call_stack == -1 and tok == NAME and value in ('def', 'class'):
- in_def = True
- elif tok == OP and value == '(':
- if in_def:
- # Avoid false positives for declarations such as:
- # def gettext(arg='message'):
- in_def = False
- continue
- if funcname:
- message_lineno = lineno
- call_stack += 1
- elif in_def and tok == OP and value == ':':
- # End of a class definition without parens
- in_def = False
- continue
- elif call_stack == -1 and tok == COMMENT:
- # Strip the comment token from the line
- value = value.decode(encoding)[1:].strip()
- if in_translator_comments and \
- translator_comments[-1][0] == lineno - 1:
- # We're already inside a translator comment, continue appending
- translator_comments.append((lineno, value))
- continue
- # If execution reaches this point, let's see if comment line
- # starts with one of the comment tags
- for comment_tag in comment_tags:
- if value.startswith(comment_tag):
- in_translator_comments = True
- translator_comments.append((lineno, value))
- break
- elif funcname and call_stack == 0:
- if tok == OP and value == ')':
- if buf:
- messages.append(''.join(buf))
- del buf[:]
- else:
- messages.append(None)
-
- if len(messages) > 1:
- messages = tuple(messages)
- else:
- messages = messages[0]
- # Comments don't apply unless they immediately preceed the
- # message
- if translator_comments and \
- translator_comments[-1][0] < message_lineno - 1:
- translator_comments = []
-
- yield (message_lineno, funcname, messages,
- [comment[1] for comment in translator_comments])
-
- funcname = lineno = message_lineno = None
- call_stack = -1
- messages = []
- translator_comments = []
- in_translator_comments = False
- elif tok == STRING:
- # Unwrap quotes in a safe manner, maintaining the string's
- # encoding
- # https://sourceforge.net/tracker/?func=detail&atid=355470&
- # aid=617979&group_id=5470
- value = eval('# coding=%s\n%s' % (encoding, value),
- {'__builtins__':{}}, {})
- if isinstance(value, str):
- value = value.decode(encoding)
- buf.append(value)
- elif tok == OP and value == ',':
- if buf:
- messages.append(''.join(buf))
- del buf[:]
- else:
- messages.append(None)
- if translator_comments:
- # We have translator comments, and since we're on a
- # comma(,) user is allowed to break into a new line
- # Let's increase the last comment's lineno in order
- # for the comment to still be a valid one
- old_lineno, old_comment = translator_comments.pop()
- translator_comments.append((old_lineno+1, old_comment))
- elif call_stack > 0 and tok == OP and value == ')':
- call_stack -= 1
- elif funcname and call_stack == -1:
- funcname = None
- elif tok == NAME and value in keywords:
- funcname = value
-
-
-def extract_javascript(fileobj, keywords, comment_tags, options):
- """Extract messages from JavaScript source code.
-
- :param fileobj: the seekable, file-like object the messages should be
- extracted from
- :param keywords: a list of keywords (i.e. function names) that should be
- recognized as translation functions
- :param comment_tags: a list of translator tags to search for and include
- in the results
- :param options: a dictionary of additional options (optional)
- :return: an iterator over ``(lineno, funcname, message, comments)`` tuples
- :rtype: ``iterator``
- """
- from babel.messages.jslexer import tokenize, unquote_string
- funcname = message_lineno = None
- messages = []
- last_argument = None
- translator_comments = []
- concatenate_next = False
- encoding = options.get('encoding', 'utf-8')
- last_token = None
- call_stack = -1
-
- for token in tokenize(fileobj.read().decode(encoding)):
- if token.type == 'operator' and token.value == '(':
- if funcname:
- message_lineno = token.lineno
- call_stack += 1
-
- elif call_stack == -1 and token.type == 'linecomment':
- value = token.value[2:].strip()
- if translator_comments and \
- translator_comments[-1][0] == token.lineno - 1:
- translator_comments.append((token.lineno, value))
- continue
-
- for comment_tag in comment_tags:
- if value.startswith(comment_tag):
- translator_comments.append((token.lineno, value.strip()))
- break
-
- elif token.type == 'multilinecomment':
- # only one multi-line comment may preceed a translation
- translator_comments = []
- value = token.value[2:-2].strip()
- for comment_tag in comment_tags:
- if value.startswith(comment_tag):
- lines = value.splitlines()
- if lines:
- lines[0] = lines[0].strip()
- lines[1:] = dedent('\n'.join(lines[1:])).splitlines()
- for offset, line in enumerate(lines):
- translator_comments.append((token.lineno + offset,
- line))
- break
-
- elif funcname and call_stack == 0:
- if token.type == 'operator' and token.value == ')':
- if last_argument is not None:
- messages.append(last_argument)
- if len(messages) > 1:
- messages = tuple(messages)
- elif messages:
- messages = messages[0]
- else:
- messages = None
-
- # Comments don't apply unless they immediately preceed the
- # message
- if translator_comments and \
- translator_comments[-1][0] < message_lineno - 1:
- translator_comments = []
-
- if messages is not None:
- yield (message_lineno, funcname, messages,
- [comment[1] for comment in translator_comments])
-
- funcname = message_lineno = last_argument = None
- concatenate_next = False
- translator_comments = []
- messages = []
- call_stack = -1
-
- elif token.type == 'string':
- new_value = unquote_string(token.value)
- if concatenate_next:
- last_argument = (last_argument or '') + new_value
- concatenate_next = False
- else:
- last_argument = new_value
-
- elif token.type == 'operator':
- if token.value == ',':
- if last_argument is not None:
- messages.append(last_argument)
- last_argument = None
- else:
- messages.append(None)
- concatenate_next = False
- elif token.value == '+':
- concatenate_next = True
-
- elif call_stack > 0 and token.type == 'operator' \
- and token.value == ')':
- call_stack -= 1
-
- elif funcname and call_stack == -1:
- funcname = None
-
- elif call_stack == -1 and token.type == 'name' and \
- token.value in keywords and \
- (last_token is None or last_token.type != 'name' or
- last_token.value != 'function'):
- funcname = token.value
-
- last_token = token