diff options
Diffstat (limited to 'babel/numbers.py')
-rw-r--r-- | babel/numbers.py | 583 |
1 files changed, 583 insertions, 0 deletions
diff --git a/babel/numbers.py b/babel/numbers.py new file mode 100644 index 0000000..1a52074 --- /dev/null +++ b/babel/numbers.py @@ -0,0 +1,583 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2007 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://babel.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://babel.edgewall.org/log/. + +"""Locale dependent formatting and parsing of numeric data. + +The default locale for the functions in this module is determined by the +following environment variables, in that order: + + * ``LC_NUMERIC``, + * ``LC_ALL``, and + * ``LANG`` +""" +# TODO: +# Padding and rounding increments in pattern: +# - http://www.unicode.org/reports/tr35/ (Appendix G.6) +import math +import re +try: + from decimal import Decimal + have_decimal = True +except ImportError: + have_decimal = False + +from babel.core import default_locale, Locale + +__all__ = ['format_number', 'format_decimal', 'format_currency', + 'format_percent', 'format_scientific', 'parse_number', + 'parse_decimal', 'NumberFormatError'] +__docformat__ = 'restructuredtext en' + +LC_NUMERIC = default_locale('LC_NUMERIC') + +def get_currency_name(currency, locale=LC_NUMERIC): + """Return the name used by the locale for the specified currency. + + >>> get_currency_name('USD', 'en_US') + u'US Dollar' + + :param currency: the currency code + :param locale: the `Locale` object or locale identifier + :return: the currency symbol + :rtype: `unicode` + :since: version 0.9.4 + """ + return Locale.parse(locale).currencies.get(currency, currency) + +def get_currency_symbol(currency, locale=LC_NUMERIC): + """Return the symbol used by the locale for the specified currency. + + >>> get_currency_symbol('USD', 'en_US') + u'$' + + :param currency: the currency code + :param locale: the `Locale` object or locale identifier + :return: the currency symbol + :rtype: `unicode` + """ + return Locale.parse(locale).currency_symbols.get(currency, currency) + +def get_decimal_symbol(locale=LC_NUMERIC): + """Return the symbol used by the locale to separate decimal fractions. + + >>> get_decimal_symbol('en_US') + u'.' + + :param locale: the `Locale` object or locale identifier + :return: the decimal symbol + :rtype: `unicode` + """ + return Locale.parse(locale).number_symbols.get('decimal', u'.') + +def get_plus_sign_symbol(locale=LC_NUMERIC): + """Return the plus sign symbol used by the current locale. + + >>> get_plus_sign_symbol('en_US') + u'+' + + :param locale: the `Locale` object or locale identifier + :return: the plus sign symbol + :rtype: `unicode` + """ + return Locale.parse(locale).number_symbols.get('plusSign', u'+') + +def get_minus_sign_symbol(locale=LC_NUMERIC): + """Return the plus sign symbol used by the current locale. + + >>> get_minus_sign_symbol('en_US') + u'-' + + :param locale: the `Locale` object or locale identifier + :return: the plus sign symbol + :rtype: `unicode` + """ + return Locale.parse(locale).number_symbols.get('minusSign', u'-') + +def get_exponential_symbol(locale=LC_NUMERIC): + """Return the symbol used by the locale to separate mantissa and exponent. + + >>> get_exponential_symbol('en_US') + u'E' + + :param locale: the `Locale` object or locale identifier + :return: the exponential symbol + :rtype: `unicode` + """ + return Locale.parse(locale).number_symbols.get('exponential', u'E') + +def get_group_symbol(locale=LC_NUMERIC): + """Return the symbol used by the locale to separate groups of thousands. + + >>> get_group_symbol('en_US') + u',' + + :param locale: the `Locale` object or locale identifier + :return: the group symbol + :rtype: `unicode` + """ + return Locale.parse(locale).number_symbols.get('group', u',') + +def format_number(number, locale=LC_NUMERIC): + """Return the given number formatted for a specific locale. + + >>> format_number(1099, locale='en_US') + u'1,099' + + :param number: the number to format + :param locale: the `Locale` object or locale identifier + :return: the formatted number + :rtype: `unicode` + """ + # Do we really need this one? + return format_decimal(number, locale=locale) + +def format_decimal(number, format=None, locale=LC_NUMERIC): + """Return the given decimal number formatted for a specific locale. + + >>> format_decimal(1.2345, locale='en_US') + u'1.234' + >>> format_decimal(1.2346, locale='en_US') + u'1.235' + >>> format_decimal(-1.2346, locale='en_US') + u'-1.235' + >>> format_decimal(1.2345, locale='sv_SE') + u'1,234' + >>> format_decimal(12345, locale='de') + u'12.345' + + The appropriate thousands grouping and the decimal separator are used for + each locale: + + >>> format_decimal(12345.5, locale='en_US') + u'12,345.5' + + :param number: the number to format + :param format: + :param locale: the `Locale` object or locale identifier + :return: the formatted decimal number + :rtype: `unicode` + """ + locale = Locale.parse(locale) + if not format: + format = locale.decimal_formats.get(format) + pattern = parse_pattern(format) + return pattern.apply(number, locale) + +def format_currency(number, currency, format=None, locale=LC_NUMERIC): + u"""Return formatted currency value. + + >>> format_currency(1099.98, 'USD', locale='en_US') + u'$1,099.98' + >>> format_currency(1099.98, 'USD', locale='es_CO') + u'US$\\xa01.099,98' + >>> format_currency(1099.98, 'EUR', locale='de_DE') + u'1.099,98\\xa0\\u20ac' + + The pattern can also be specified explicitly: + + >>> format_currency(1099.98, 'EUR', u'\xa4\xa4 #,##0.00', locale='en_US') + u'EUR 1,099.98' + + :param number: the number to format + :param currency: the currency code + :param locale: the `Locale` object or locale identifier + :return: the formatted currency value + :rtype: `unicode` + """ + locale = Locale.parse(locale) + if not format: + format = locale.currency_formats.get(format) + pattern = parse_pattern(format) + return pattern.apply(number, locale, currency=currency) + +def format_percent(number, format=None, locale=LC_NUMERIC): + """Return formatted percent value for a specific locale. + + >>> format_percent(0.34, locale='en_US') + u'34%' + >>> format_percent(25.1234, locale='en_US') + u'2,512%' + >>> format_percent(25.1234, locale='sv_SE') + u'2\\xa0512\\xa0%' + + The format pattern can also be specified explicitly: + + >>> format_percent(25.1234, u'#,##0\u2030', locale='en_US') + u'25,123\u2030' + + :param number: the percent number to format + :param format: + :param locale: the `Locale` object or locale identifier + :return: the formatted percent number + :rtype: `unicode` + """ + locale = Locale.parse(locale) + if not format: + format = locale.percent_formats.get(format) + pattern = parse_pattern(format) + return pattern.apply(number, locale) + +def format_scientific(number, format=None, locale=LC_NUMERIC): + """Return value formatted in scientific notation for a specific locale. + + >>> format_scientific(10000, locale='en_US') + u'1E4' + + The format pattern can also be specified explicitly: + + >>> format_scientific(1234567, u'##0E00', locale='en_US') + u'1.23E06' + + :param number: the number to format + :param format: + :param locale: the `Locale` object or locale identifier + :return: value formatted in scientific notation. + :rtype: `unicode` + """ + locale = Locale.parse(locale) + if not format: + format = locale.scientific_formats.get(format) + pattern = parse_pattern(format) + return pattern.apply(number, locale) + + +class NumberFormatError(ValueError): + """Exception raised when a string cannot be parsed into a number.""" + + +def parse_number(string, locale=LC_NUMERIC): + """Parse localized number string into a long integer. + + >>> parse_number('1,099', locale='en_US') + 1099L + >>> parse_number('1.099', locale='de_DE') + 1099L + + When the given string cannot be parsed, an exception is raised: + + >>> parse_number('1.099,98', locale='de') + Traceback (most recent call last): + ... + NumberFormatError: '1.099,98' is not a valid number + + :param string: the string to parse + :param locale: the `Locale` object or locale identifier + :return: the parsed number + :rtype: `long` + :raise `NumberFormatError`: if the string can not be converted to a number + """ + try: + return long(string.replace(get_group_symbol(locale), '')) + except ValueError: + raise NumberFormatError('%r is not a valid number' % string) + +def parse_decimal(string, locale=LC_NUMERIC): + """Parse localized decimal string into a float. + + >>> parse_decimal('1,099.98', locale='en_US') + 1099.98 + >>> parse_decimal('1.099,98', locale='de') + 1099.98 + + When the given string cannot be parsed, an exception is raised: + + >>> parse_decimal('2,109,998', locale='de') + Traceback (most recent call last): + ... + NumberFormatError: '2,109,998' is not a valid decimal number + + :param string: the string to parse + :param locale: the `Locale` object or locale identifier + :return: the parsed decimal number + :rtype: `float` + :raise `NumberFormatError`: if the string can not be converted to a + decimal number + """ + locale = Locale.parse(locale) + try: + return float(string.replace(get_group_symbol(locale), '') + .replace(get_decimal_symbol(locale), '.')) + except ValueError: + raise NumberFormatError('%r is not a valid decimal number' % string) + + +PREFIX_END = r'[^0-9@#.,]' +NUMBER_TOKEN = r'[0-9@#.\-,E+]' + +PREFIX_PATTERN = r"(?P<prefix>(?:'[^']*'|%s)*)" % PREFIX_END +NUMBER_PATTERN = r"(?P<number>%s+)" % NUMBER_TOKEN +SUFFIX_PATTERN = r"(?P<suffix>.*)" + +number_re = re.compile(r"%s%s%s" % (PREFIX_PATTERN, NUMBER_PATTERN, + SUFFIX_PATTERN)) + +def split_number(value): + """Convert a number into a (intasstring, fractionasstring) tuple""" + if have_decimal and isinstance(value, Decimal): + text = str(value) + else: + text = ('%.9f' % value).rstrip('0') + if '.' in text: + a, b = text.split('.', 1) + if b == '0': + b = '' + else: + a, b = text, '' + return a, b + +def bankersround(value, ndigits=0): + """Round a number to a given precision. + + Works like round() except that the round-half-even (banker's rounding) + algorithm is used instead of round-half-up. + + >>> bankersround(5.5, 0) + 6.0 + >>> bankersround(6.5, 0) + 6.0 + >>> bankersround(-6.5, 0) + -6.0 + >>> bankersround(1234.0, -2) + 1200.0 + """ + sign = int(value < 0) and -1 or 1 + value = abs(value) + a, b = split_number(value) + digits = a + b + add = 0 + i = len(a) + ndigits + if i < 0 or i >= len(digits): + pass + elif digits[i] > '5': + add = 1 + elif digits[i] == '5' and digits[i-1] in '13579': + add = 1 + scale = 10**ndigits + if have_decimal and isinstance(value, Decimal): + return Decimal(int(value * scale + add)) / scale * sign + else: + return float(int(value * scale + add)) / scale * sign + +def parse_pattern(pattern): + """Parse number format patterns""" + if isinstance(pattern, NumberPattern): + return pattern + + # Do we have a negative subpattern? + if ';' in pattern: + pattern, neg_pattern = pattern.split(';', 1) + pos_prefix, number, pos_suffix = number_re.search(pattern).groups() + neg_prefix, _, neg_suffix = number_re.search(neg_pattern).groups() + else: + pos_prefix, number, pos_suffix = number_re.search(pattern).groups() + neg_prefix = '-' + pos_prefix + neg_suffix = pos_suffix + if 'E' in number: + number, exp = number.split('E', 1) + else: + exp = None + if '@' in number: + if '.' in number and '0' in number: + raise ValueError('Significant digit patterns can not contain ' + '"@" or "0"') + if '.' in number: + integer, fraction = number.rsplit('.', 1) + else: + integer = number + fraction = '' + min_frac = max_frac = 0 + + def parse_precision(p): + """Calculate the min and max allowed digits""" + min = max = 0 + for c in p: + if c in '@0': + min += 1 + max += 1 + elif c == '#': + max += 1 + elif c == ',': + continue + else: + break + return min, max + + def parse_grouping(p): + """Parse primary and secondary digit grouping + + >>> parse_grouping('##') + 0, 0 + >>> parse_grouping('#,###') + 3, 3 + >>> parse_grouping('#,####,###') + 3, 4 + """ + width = len(p) + g1 = p.rfind(',') + if g1 == -1: + return 1000, 1000 + g1 = width - g1 - 1 + g2 = p[:-g1 - 1].rfind(',') + if g2 == -1: + return g1, g1 + g2 = width - g1 - g2 - 2 + return g1, g2 + + int_prec = parse_precision(integer) + frac_prec = parse_precision(fraction) + if exp: + frac_prec = parse_precision(integer+fraction) + exp_plus = exp.startswith('+') + exp = exp.lstrip('+') + exp_prec = parse_precision(exp) + else: + exp_plus = None + exp_prec = None + grouping = parse_grouping(integer) + return NumberPattern(pattern, (pos_prefix, neg_prefix), + (pos_suffix, neg_suffix), grouping, + int_prec, frac_prec, + exp_prec, exp_plus) + + +class NumberPattern(object): + + def __init__(self, pattern, prefix, suffix, grouping, + int_prec, frac_prec, exp_prec, exp_plus): + self.pattern = pattern + self.prefix = prefix + self.suffix = suffix + self.grouping = grouping + self.int_prec = int_prec + self.frac_prec = frac_prec + self.exp_prec = exp_prec + self.exp_plus = exp_plus + if '%' in ''.join(self.prefix + self.suffix): + self.scale = 100 + elif u'‰' in ''.join(self.prefix + self.suffix): + self.scale = 1000 + else: + self.scale = 1 + + def __repr__(self): + return '<%s %r>' % (type(self).__name__, self.pattern) + + def apply(self, value, locale, currency=None): + value *= self.scale + is_negative = int(value < 0) + if self.exp_prec: # Scientific notation + value = abs(value) + if value: + exp = int(math.floor(math.log(value, 10))) + else: + exp = 0 + # Minimum number of integer digits + if self.int_prec[0] == self.int_prec[1]: + exp -= self.int_prec[0] - 1 + # Exponent grouping + elif self.int_prec[1]: + exp = int(exp) / self.int_prec[1] * self.int_prec[1] + if not have_decimal or not isinstance(value, Decimal): + value = float(value) + if exp < 0: + value = value * 10**(-exp) + else: + value = value / 10**exp + exp_sign = '' + if exp < 0: + exp_sign = get_minus_sign_symbol(locale) + elif self.exp_plus: + exp_sign = get_plus_sign_symbol(locale) + exp = abs(exp) + number = u'%s%s%s%s' % \ + (self._format_sigdig(value, self.frac_prec[0], + self.frac_prec[1]), + get_exponential_symbol(locale), exp_sign, + self._format_int(str(exp), self.exp_prec[0], + self.exp_prec[1], locale)) + elif '@' in self.pattern: # Is it a siginificant digits pattern? + text = self._format_sigdig(abs(value), + self.int_prec[0], + self.int_prec[1]) + if '.' in text: + a, b = text.split('.') + a = self._format_int(a, 0, 1000, locale) + if b: + b = get_decimal_symbol(locale) + b + number = a + b + else: + number = self._format_int(text, 0, 1000, locale) + else: # A normal number pattern + a, b = split_number(bankersround(abs(value), + self.frac_prec[1])) + b = b or '0' + a = self._format_int(a, self.int_prec[0], + self.int_prec[1], locale) + b = self._format_frac(b, locale) + number = a + b + retval = u'%s%s%s' % (self.prefix[is_negative], number, + self.suffix[is_negative]) + if u'¤' in retval: + retval = retval.replace(u'¤¤', currency.upper()) + retval = retval.replace(u'¤', get_currency_symbol(currency, locale)) + return retval + + def _format_sigdig(self, value, min, max): + """Convert value to a string. + + The resulting string will contain between (min, max) number of + significant digits. + """ + a, b = split_number(value) + ndecimals = len(a) + if a == '0' and b != '': + ndecimals = 0 + while b.startswith('0'): + b = b[1:] + ndecimals -= 1 + a, b = split_number(bankersround(value, max - ndecimals)) + digits = len((a + b).lstrip('0')) + if not digits: + digits = 1 + # Figure out if we need to add any trailing '0':s + if len(a) >= max and a != '0': + return a + if digits < min: + b += ('0' * (min - digits)) + if b: + return '%s.%s' % (a, b) + return a + + def _format_int(self, value, min, max, locale): + width = len(value) + if width < min: + value = '0' * (min - width) + value + gsize = self.grouping[0] + ret = '' + symbol = get_group_symbol(locale) + while len(value) > gsize: + ret = symbol + value[-gsize:] + ret + value = value[:-gsize] + gsize = self.grouping[1] + return value + ret + + def _format_frac(self, value, locale): + min, max = self.frac_prec + if len(value) < min: + value += ('0' * (min - len(value))) + if max == 0 or (min == 0 and int(value) == 0): + return '' + width = len(value) + while len(value) > min and value[-1] == '0': + value = value[:-1] + return get_decimal_symbol(locale) + value |