diff options
Diffstat (limited to 'babel/core.py')
-rw-r--r-- | babel/core.py | 784 |
1 files changed, 784 insertions, 0 deletions
diff --git a/babel/core.py b/babel/core.py new file mode 100644 index 0000000..cc677d0 --- /dev/null +++ b/babel/core.py @@ -0,0 +1,784 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2007 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://babel.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://babel.edgewall.org/log/. + +"""Core locale representation and locale data access.""" + +import os +import pickle + +from babel import localedata + +__all__ = ['UnknownLocaleError', 'Locale', 'default_locale', 'negotiate_locale', + 'parse_locale'] +__docformat__ = 'restructuredtext en' + +_global_data = None + +def get_global(key): + """Return the dictionary for the given key in the global data. + + The global data is stored in the ``babel/global.dat`` file and contains + information independent of individual locales. + + >>> get_global('zone_aliases')['UTC'] + 'Etc/GMT' + >>> get_global('zone_territories')['Europe/Berlin'] + 'DE' + + :param key: the data key + :return: the dictionary found in the global data under the given key + :rtype: `dict` + :since: version 0.9 + """ + global _global_data + if _global_data is None: + dirname = os.path.join(os.path.dirname(__file__)) + filename = os.path.join(dirname, 'global.dat') + fileobj = open(filename, 'rb') + try: + _global_data = pickle.load(fileobj) + finally: + fileobj.close() + return _global_data.get(key, {}) + + +LOCALE_ALIASES = { + 'ar': 'ar_SY', 'bg': 'bg_BG', 'bs': 'bs_BA', 'ca': 'ca_ES', 'cs': 'cs_CZ', + 'da': 'da_DK', 'de': 'de_DE', 'el': 'el_GR', 'en': 'en_US', 'es': 'es_ES', + 'et': 'et_EE', 'fa': 'fa_IR', 'fi': 'fi_FI', 'fr': 'fr_FR', 'gl': 'gl_ES', + 'he': 'he_IL', 'hu': 'hu_HU', 'id': 'id_ID', 'is': 'is_IS', 'it': 'it_IT', + 'ja': 'ja_JP', 'km': 'km_KH', 'ko': 'ko_KR', 'lt': 'lt_LT', 'lv': 'lv_LV', + 'mk': 'mk_MK', 'nl': 'nl_NL', 'nn': 'nn_NO', 'no': 'nb_NO', 'pl': 'pl_PL', + 'pt': 'pt_PT', 'ro': 'ro_RO', 'ru': 'ru_RU', 'sk': 'sk_SK', 'sl': 'sl_SI', + 'sv': 'sv_SE', 'th': 'th_TH', 'tr': 'tr_TR', 'uk': 'uk_UA' +} + + +class UnknownLocaleError(Exception): + """Exception thrown when a locale is requested for which no locale data + is available. + """ + + def __init__(self, identifier): + """Create the exception. + + :param identifier: the identifier string of the unsupported locale + """ + Exception.__init__(self, 'unknown locale %r' % identifier) + self.identifier = identifier + + +class Locale(object): + """Representation of a specific locale. + + >>> locale = Locale('en', 'US') + >>> repr(locale) + '<Locale "en_US">' + >>> locale.display_name + u'English (United States)' + + A `Locale` object can also be instantiated from a raw locale string: + + >>> locale = Locale.parse('en-US', sep='-') + >>> repr(locale) + '<Locale "en_US">' + + `Locale` objects provide access to a collection of locale data, such as + territory and language names, number and date format patterns, and more: + + >>> locale.number_symbols['decimal'] + u'.' + + If a locale is requested for which no locale data is available, an + `UnknownLocaleError` is raised: + + >>> Locale.parse('en_DE') + Traceback (most recent call last): + ... + UnknownLocaleError: unknown locale 'en_DE' + + :see: `IETF RFC 3066 <http://www.ietf.org/rfc/rfc3066.txt>`_ + """ + + def __init__(self, language, territory=None, script=None, variant=None): + """Initialize the locale object from the given identifier components. + + >>> locale = Locale('en', 'US') + >>> locale.language + 'en' + >>> locale.territory + 'US' + + :param language: the language code + :param territory: the territory (country or region) code + :param script: the script code + :param variant: the variant code + :raise `UnknownLocaleError`: if no locale data is available for the + requested locale + """ + self.language = language + self.territory = territory + self.script = script + self.variant = variant + self.__data = None + + identifier = str(self) + if not localedata.exists(identifier): + raise UnknownLocaleError(identifier) + + def default(cls, category=None, aliases=LOCALE_ALIASES): + """Return the system default locale for the specified category. + + >>> for name in ['LANGUAGE', 'LC_ALL', 'LC_CTYPE']: + ... os.environ[name] = '' + >>> os.environ['LANG'] = 'fr_FR.UTF-8' + >>> Locale.default('LC_MESSAGES') + <Locale "fr_FR"> + + :param category: one of the ``LC_XXX`` environment variable names + :param aliases: a dictionary of aliases for locale identifiers + :return: the value of the variable, or any of the fallbacks + (``LANGUAGE``, ``LC_ALL``, ``LC_CTYPE``, and ``LANG``) + :rtype: `Locale` + :see: `default_locale` + """ + return cls(default_locale(category, aliases=aliases)) + default = classmethod(default) + + def negotiate(cls, preferred, available, sep='_', aliases=LOCALE_ALIASES): + """Find the best match between available and requested locale strings. + + >>> Locale.negotiate(['de_DE', 'en_US'], ['de_DE', 'de_AT']) + <Locale "de_DE"> + >>> Locale.negotiate(['de_DE', 'en_US'], ['en', 'de']) + <Locale "de"> + >>> Locale.negotiate(['de_DE', 'de'], ['en_US']) + + You can specify the character used in the locale identifiers to separate + the differnet components. This separator is applied to both lists. Also, + case is ignored in the comparison: + + >>> Locale.negotiate(['de-DE', 'de'], ['en-us', 'de-de'], sep='-') + <Locale "de_DE"> + + :param preferred: the list of locale identifers preferred by the user + :param available: the list of locale identifiers available + :param aliases: a dictionary of aliases for locale identifiers + :return: the `Locale` object for the best match, or `None` if no match + was found + :rtype: `Locale` + :see: `negotiate_locale` + """ + identifier = negotiate_locale(preferred, available, sep=sep, + aliases=aliases) + if identifier: + return Locale.parse(identifier, sep=sep) + negotiate = classmethod(negotiate) + + def parse(cls, identifier, sep='_'): + """Create a `Locale` instance for the given locale identifier. + + >>> l = Locale.parse('de-DE', sep='-') + >>> l.display_name + u'Deutsch (Deutschland)' + + If the `identifier` parameter is not a string, but actually a `Locale` + object, that object is returned: + + >>> Locale.parse(l) + <Locale "de_DE"> + + :param identifier: the locale identifier string + :param sep: optional component separator + :return: a corresponding `Locale` instance + :rtype: `Locale` + :raise `ValueError`: if the string does not appear to be a valid locale + identifier + :raise `UnknownLocaleError`: if no locale data is available for the + requested locale + :see: `parse_locale` + """ + if isinstance(identifier, basestring): + return cls(*parse_locale(identifier, sep=sep)) + return identifier + parse = classmethod(parse) + + def __eq__(self, other): + return str(self) == str(other) + + def __repr__(self): + return '<Locale "%s">' % str(self) + + def __str__(self): + return '_'.join(filter(None, [self.language, self.script, + self.territory, self.variant])) + + def _data(self): + if self.__data is None: + self.__data = localedata.LocaleDataDict(localedata.load(str(self))) + return self.__data + _data = property(_data) + + def get_display_name(self, locale=None): + """Return the display name of the locale using the given locale. + + The display name will include the language, territory, script, and + variant, if those are specified. + + >>> Locale('zh', 'CN', script='Hans').get_display_name('en') + u'Chinese (Simplified Han, China)' + + :param locale: the locale to use + :return: the display name + """ + if locale is None: + locale = self + locale = Locale.parse(locale) + retval = locale.languages.get(self.language) + if self.territory or self.script or self.variant: + details = [] + if self.script: + details.append(locale.scripts.get(self.script)) + if self.territory: + details.append(locale.territories.get(self.territory)) + if self.variant: + details.append(locale.variants.get(self.variant)) + details = filter(None, details) + if details: + retval += ' (%s)' % u', '.join(details) + return retval + + display_name = property(get_display_name, doc="""\ + The localized display name of the locale. + + >>> Locale('en').display_name + u'English' + >>> Locale('en', 'US').display_name + u'English (United States)' + >>> Locale('sv').display_name + u'svenska' + + :type: `unicode` + """) + + def english_name(self): + return self.get_display_name(Locale('en')) + english_name = property(english_name, doc="""\ + The english display name of the locale. + + >>> Locale('de').english_name + u'German' + >>> Locale('de', 'DE').english_name + u'German (Germany)' + + :type: `unicode` + """) + + #{ General Locale Display Names + + def languages(self): + return self._data['languages'] + languages = property(languages, doc="""\ + Mapping of language codes to translated language names. + + >>> Locale('de', 'DE').languages['ja'] + u'Japanisch' + + :type: `dict` + :see: `ISO 639 <http://www.loc.gov/standards/iso639-2/>`_ + """) + + def scripts(self): + return self._data['scripts'] + scripts = property(scripts, doc="""\ + Mapping of script codes to translated script names. + + >>> Locale('en', 'US').scripts['Hira'] + u'Hiragana' + + :type: `dict` + :see: `ISO 15924 <http://www.evertype.com/standards/iso15924/>`_ + """) + + def territories(self): + return self._data['territories'] + territories = property(territories, doc="""\ + Mapping of script codes to translated script names. + + >>> Locale('es', 'CO').territories['DE'] + u'Alemania' + + :type: `dict` + :see: `ISO 3166 <http://www.iso.org/iso/en/prods-services/iso3166ma/>`_ + """) + + def variants(self): + return self._data['variants'] + variants = property(variants, doc="""\ + Mapping of script codes to translated script names. + + >>> Locale('de', 'DE').variants['1901'] + u'Alte deutsche Rechtschreibung' + + :type: `dict` + """) + + #{ Number Formatting + + def currencies(self): + return self._data['currency_names'] + currencies = property(currencies, doc="""\ + Mapping of currency codes to translated currency names. + + >>> Locale('en').currencies['COP'] + u'Colombian Peso' + >>> Locale('de', 'DE').currencies['COP'] + u'Kolumbianischer Peso' + + :type: `dict` + """) + + def currency_symbols(self): + return self._data['currency_symbols'] + currency_symbols = property(currency_symbols, doc="""\ + Mapping of currency codes to symbols. + + >>> Locale('en', 'US').currency_symbols['USD'] + u'$' + >>> Locale('es', 'CO').currency_symbols['USD'] + u'US$' + + :type: `dict` + """) + + def number_symbols(self): + return self._data['number_symbols'] + number_symbols = property(number_symbols, doc="""\ + Symbols used in number formatting. + + >>> Locale('fr', 'FR').number_symbols['decimal'] + u',' + + :type: `dict` + """) + + def decimal_formats(self): + return self._data['decimal_formats'] + decimal_formats = property(decimal_formats, doc="""\ + Locale patterns for decimal number formatting. + + >>> Locale('en', 'US').decimal_formats[None] + <NumberPattern u'#,##0.###'> + + :type: `dict` + """) + + def currency_formats(self): + return self._data['currency_formats'] + currency_formats = property(currency_formats, doc=r"""\ + Locale patterns for currency number formatting. + + >>> print Locale('en', 'US').currency_formats[None] + <NumberPattern u'\xa4#,##0.00'> + + :type: `dict` + """) + + def percent_formats(self): + return self._data['percent_formats'] + percent_formats = property(percent_formats, doc="""\ + Locale patterns for percent number formatting. + + >>> Locale('en', 'US').percent_formats[None] + <NumberPattern u'#,##0%'> + + :type: `dict` + """) + + def scientific_formats(self): + return self._data['scientific_formats'] + scientific_formats = property(scientific_formats, doc="""\ + Locale patterns for scientific number formatting. + + >>> Locale('en', 'US').scientific_formats[None] + <NumberPattern u'#E0'> + + :type: `dict` + """) + + #{ Calendar Information and Date Formatting + + def periods(self): + return self._data['periods'] + periods = property(periods, doc="""\ + Locale display names for day periods (AM/PM). + + >>> Locale('en', 'US').periods['am'] + u'AM' + + :type: `dict` + """) + + def days(self): + return self._data['days'] + days = property(days, doc="""\ + Locale display names for weekdays. + + >>> Locale('de', 'DE').days['format']['wide'][3] + u'Donnerstag' + + :type: `dict` + """) + + def months(self): + return self._data['months'] + months = property(months, doc="""\ + Locale display names for months. + + >>> Locale('de', 'DE').months['format']['wide'][10] + u'Oktober' + + :type: `dict` + """) + + def quarters(self): + return self._data['quarters'] + quarters = property(quarters, doc="""\ + Locale display names for quarters. + + >>> Locale('de', 'DE').quarters['format']['wide'][1] + u'1. Quartal' + + :type: `dict` + """) + + def eras(self): + return self._data['eras'] + eras = property(eras, doc="""\ + Locale display names for eras. + + >>> Locale('en', 'US').eras['wide'][1] + u'Anno Domini' + >>> Locale('en', 'US').eras['abbreviated'][0] + u'BC' + + :type: `dict` + """) + + def time_zones(self): + return self._data['time_zones'] + time_zones = property(time_zones, doc="""\ + Locale display names for time zones. + + >>> Locale('en', 'US').time_zones['Europe/London']['long']['daylight'] + u'British Summer Time' + >>> Locale('en', 'US').time_zones['America/St_Johns']['city'] + u"St. John's" + + :type: `dict` + """) + + def meta_zones(self): + return self._data['meta_zones'] + meta_zones = property(meta_zones, doc="""\ + Locale display names for meta time zones. + + Meta time zones are basically groups of different Olson time zones that + have the same GMT offset and daylight savings time. + + >>> Locale('en', 'US').meta_zones['Europe_Central']['long']['daylight'] + u'Central European Summer Time' + + :type: `dict` + :since: version 0.9 + """) + + def zone_formats(self): + return self._data['zone_formats'] + zone_formats = property(zone_formats, doc=r"""\ + Patterns related to the formatting of time zones. + + >>> Locale('en', 'US').zone_formats['fallback'] + u'%(1)s (%(0)s)' + >>> Locale('pt', 'BR').zone_formats['region'] + u'Hor\xe1rio %s' + + :type: `dict` + :since: version 0.9 + """) + + def first_week_day(self): + return self._data['week_data']['first_day'] + first_week_day = property(first_week_day, doc="""\ + The first day of a week, with 0 being Monday. + + >>> Locale('de', 'DE').first_week_day + 0 + >>> Locale('en', 'US').first_week_day + 6 + + :type: `int` + """) + + def weekend_start(self): + return self._data['week_data']['weekend_start'] + weekend_start = property(weekend_start, doc="""\ + The day the weekend starts, with 0 being Monday. + + >>> Locale('de', 'DE').weekend_start + 5 + + :type: `int` + """) + + def weekend_end(self): + return self._data['week_data']['weekend_end'] + weekend_end = property(weekend_end, doc="""\ + The day the weekend ends, with 0 being Monday. + + >>> Locale('de', 'DE').weekend_end + 6 + + :type: `int` + """) + + def min_week_days(self): + return self._data['week_data']['min_days'] + min_week_days = property(min_week_days, doc="""\ + The minimum number of days in a week so that the week is counted as the + first week of a year or month. + + >>> Locale('de', 'DE').min_week_days + 4 + + :type: `int` + """) + + def date_formats(self): + return self._data['date_formats'] + date_formats = property(date_formats, doc="""\ + Locale patterns for date formatting. + + >>> Locale('en', 'US').date_formats['short'] + <DateTimePattern u'M/d/yy'> + >>> Locale('fr', 'FR').date_formats['long'] + <DateTimePattern u'd MMMM yyyy'> + + :type: `dict` + """) + + def time_formats(self): + return self._data['time_formats'] + time_formats = property(time_formats, doc="""\ + Locale patterns for time formatting. + + >>> Locale('en', 'US').time_formats['short'] + <DateTimePattern u'h:mm a'> + >>> Locale('fr', 'FR').time_formats['long'] + <DateTimePattern u'HH:mm:ss z'> + + :type: `dict` + """) + + def datetime_formats(self): + return self._data['datetime_formats'] + datetime_formats = property(datetime_formats, doc="""\ + Locale patterns for datetime formatting. + + >>> Locale('en').datetime_formats[None] + u'{1} {0}' + >>> Locale('th').datetime_formats[None] + u'{1}, {0}' + + :type: `dict` + """) + + +def default_locale(category=None, aliases=LOCALE_ALIASES): + """Returns the system default locale for a given category, based on + environment variables. + + >>> for name in ['LANGUAGE', 'LC_ALL', 'LC_CTYPE']: + ... os.environ[name] = '' + >>> os.environ['LANG'] = 'fr_FR.UTF-8' + >>> default_locale('LC_MESSAGES') + 'fr_FR' + + The "C" or "POSIX" pseudo-locales are treated as aliases for the + "en_US_POSIX" locale: + + >>> os.environ['LC_MESSAGES'] = 'POSIX' + >>> default_locale('LC_MESSAGES') + 'en_US_POSIX' + + :param category: one of the ``LC_XXX`` environment variable names + :param aliases: a dictionary of aliases for locale identifiers + :return: the value of the variable, or any of the fallbacks (``LANGUAGE``, + ``LC_ALL``, ``LC_CTYPE``, and ``LANG``) + :rtype: `str` + """ + varnames = (category, 'LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LANG') + for name in filter(None, varnames): + locale = os.getenv(name) + if locale: + if name == 'LANGUAGE' and ':' in locale: + # the LANGUAGE variable may contain a colon-separated list of + # language codes; we just pick the language on the list + locale = locale.split(':')[0] + if locale in ('C', 'POSIX'): + locale = 'en_US_POSIX' + elif aliases and locale in aliases: + locale = aliases[locale] + return '_'.join(filter(None, parse_locale(locale))) + +def negotiate_locale(preferred, available, sep='_', aliases=LOCALE_ALIASES): + """Find the best match between available and requested locale strings. + + >>> negotiate_locale(['de_DE', 'en_US'], ['de_DE', 'de_AT']) + 'de_DE' + >>> negotiate_locale(['de_DE', 'en_US'], ['en', 'de']) + 'de' + + Case is ignored by the algorithm, the result uses the case of the preferred + locale identifier: + + >>> negotiate_locale(['de_DE', 'en_US'], ['de_de', 'de_at']) + 'de_DE' + + >>> negotiate_locale(['de_DE', 'en_US'], ['de_de', 'de_at']) + 'de_DE' + + By default, some web browsers unfortunately do not include the territory + in the locale identifier for many locales, and some don't even allow the + user to easily add the territory. So while you may prefer using qualified + locale identifiers in your web-application, they would not normally match + the language-only locale sent by such browsers. To workaround that, this + function uses a default mapping of commonly used langauge-only locale + identifiers to identifiers including the territory: + + >>> negotiate_locale(['ja', 'en_US'], ['ja_JP', 'en_US']) + 'ja_JP' + + Some browsers even use an incorrect or outdated language code, such as "no" + for Norwegian, where the correct locale identifier would actually be "nb_NO" + (Bokmål) or "nn_NO" (Nynorsk). The aliases are intended to take care of + such cases, too: + + >>> negotiate_locale(['no', 'sv'], ['nb_NO', 'sv_SE']) + 'nb_NO' + + You can override this default mapping by passing a different `aliases` + dictionary to this function, or you can bypass the behavior althogher by + setting the `aliases` parameter to `None`. + + :param preferred: the list of locale strings preferred by the user + :param available: the list of locale strings available + :param sep: character that separates the different parts of the locale + strings + :param aliases: a dictionary of aliases for locale identifiers + :return: the locale identifier for the best match, or `None` if no match + was found + :rtype: `str` + """ + available = [a.lower() for a in available if a] + for locale in preferred: + ll = locale.lower() + if ll in available: + return locale + if aliases: + alias = aliases.get(ll) + if alias: + alias = alias.replace('_', sep) + if alias.lower() in available: + return alias + parts = locale.split(sep) + if len(parts) > 1 and parts[0].lower() in available: + return parts[0] + return None + +def parse_locale(identifier, sep='_'): + """Parse a locale identifier into a tuple of the form:: + + ``(language, territory, script, variant)`` + + >>> parse_locale('zh_CN') + ('zh', 'CN', None, None) + >>> parse_locale('zh_Hans_CN') + ('zh', 'CN', 'Hans', None) + + The default component separator is "_", but a different separator can be + specified using the `sep` parameter: + + >>> parse_locale('zh-CN', sep='-') + ('zh', 'CN', None, None) + + If the identifier cannot be parsed into a locale, a `ValueError` exception + is raised: + + >>> parse_locale('not_a_LOCALE_String') + Traceback (most recent call last): + ... + ValueError: 'not_a_LOCALE_String' is not a valid locale identifier + + Encoding information and locale modifiers are removed from the identifier: + + >>> parse_locale('it_IT@euro') + ('it', 'IT', None, None) + >>> parse_locale('en_US.UTF-8') + ('en', 'US', None, None) + >>> parse_locale('de_DE.iso885915@euro') + ('de', 'DE', None, None) + + :param identifier: the locale identifier string + :param sep: character that separates the different components of the locale + identifier + :return: the ``(language, territory, script, variant)`` tuple + :rtype: `tuple` + :raise `ValueError`: if the string does not appear to be a valid locale + identifier + + :see: `IETF RFC 4646 <http://www.ietf.org/rfc/rfc4646.txt>`_ + """ + if '.' in identifier: + # this is probably the charset/encoding, which we don't care about + identifier = identifier.split('.', 1)[0] + if '@' in identifier: + # this is a locale modifier such as @euro, which we don't care about + # either + identifier = identifier.split('@', 1)[0] + + parts = identifier.split(sep) + lang = parts.pop(0).lower() + if not lang.isalpha(): + raise ValueError('expected only letters, got %r' % lang) + + script = territory = variant = None + if parts: + if len(parts[0]) == 4 and parts[0].isalpha(): + script = parts.pop(0).title() + + if parts: + if len(parts[0]) == 2 and parts[0].isalpha(): + territory = parts.pop(0).upper() + elif len(parts[0]) == 3 and parts[0].isdigit(): + territory = parts.pop(0) + + if parts: + if len(parts[0]) == 4 and parts[0][0].isdigit() or \ + len(parts[0]) >= 5 and parts[0][0].isalpha(): + variant = parts.pop() + + if parts: + raise ValueError('%r is not a valid locale identifier' % identifier) + + return lang, territory, script, variant |