# -*- coding: utf-8 -*- # # Copyright (C) 2007 Edgewall Software # All rights reserved. # # This software is licensed as described in the file COPYING, which # you should have received as part of this distribution. The terms # are also available at http://babel.edgewall.org/wiki/License. # # This software consists of voluntary contributions made by many # individuals. For the exact contribution history, see the revision # history and logs, available at http://babel.edgewall.org/log/. """Locale dependent formatting and parsing of numeric data. The default locale for the functions in this module is determined by the following environment variables, in that order: * ``LC_NUMERIC``, * ``LC_ALL``, and * ``LANG`` """ # TODO: # Padding and rounding increments in pattern: # - http://www.unicode.org/reports/tr35/ (Appendix G.6) import math import re try: from decimal import Decimal have_decimal = True except ImportError: have_decimal = False from babel.core import default_locale, Locale __all__ = ['format_number', 'format_decimal', 'format_currency', 'format_percent', 'format_scientific', 'parse_number', 'parse_decimal', 'NumberFormatError'] __docformat__ = 'restructuredtext en' LC_NUMERIC = default_locale('LC_NUMERIC') def get_currency_name(currency, locale=LC_NUMERIC): """Return the name used by the locale for the specified currency. >>> get_currency_name('USD', 'en_US') u'US Dollar' :param currency: the currency code :param locale: the `Locale` object or locale identifier :return: the currency symbol :rtype: `unicode` :since: version 0.9.4 """ return Locale.parse(locale).currencies.get(currency, currency) def get_currency_symbol(currency, locale=LC_NUMERIC): """Return the symbol used by the locale for the specified currency. >>> get_currency_symbol('USD', 'en_US') u'$' :param currency: the currency code :param locale: the `Locale` object or locale identifier :return: the currency symbol :rtype: `unicode` """ return Locale.parse(locale).currency_symbols.get(currency, currency) def get_decimal_symbol(locale=LC_NUMERIC): """Return the symbol used by the locale to separate decimal fractions. >>> get_decimal_symbol('en_US') u'.' :param locale: the `Locale` object or locale identifier :return: the decimal symbol :rtype: `unicode` """ return Locale.parse(locale).number_symbols.get('decimal', u'.') def get_plus_sign_symbol(locale=LC_NUMERIC): """Return the plus sign symbol used by the current locale. >>> get_plus_sign_symbol('en_US') u'+' :param locale: the `Locale` object or locale identifier :return: the plus sign symbol :rtype: `unicode` """ return Locale.parse(locale).number_symbols.get('plusSign', u'+') def get_minus_sign_symbol(locale=LC_NUMERIC): """Return the plus sign symbol used by the current locale. >>> get_minus_sign_symbol('en_US') u'-' :param locale: the `Locale` object or locale identifier :return: the plus sign symbol :rtype: `unicode` """ return Locale.parse(locale).number_symbols.get('minusSign', u'-') def get_exponential_symbol(locale=LC_NUMERIC): """Return the symbol used by the locale to separate mantissa and exponent. >>> get_exponential_symbol('en_US') u'E' :param locale: the `Locale` object or locale identifier :return: the exponential symbol :rtype: `unicode` """ return Locale.parse(locale).number_symbols.get('exponential', u'E') def get_group_symbol(locale=LC_NUMERIC): """Return the symbol used by the locale to separate groups of thousands. >>> get_group_symbol('en_US') u',' :param locale: the `Locale` object or locale identifier :return: the group symbol :rtype: `unicode` """ return Locale.parse(locale).number_symbols.get('group', u',') def format_number(number, locale=LC_NUMERIC): """Return the given number formatted for a specific locale. >>> format_number(1099, locale='en_US') u'1,099' :param number: the number to format :param locale: the `Locale` object or locale identifier :return: the formatted number :rtype: `unicode` """ # Do we really need this one? return format_decimal(number, locale=locale) def format_decimal(number, format=None, locale=LC_NUMERIC): """Return the given decimal number formatted for a specific locale. >>> format_decimal(1.2345, locale='en_US') u'1.234' >>> format_decimal(1.2346, locale='en_US') u'1.235' >>> format_decimal(-1.2346, locale='en_US') u'-1.235' >>> format_decimal(1.2345, locale='sv_SE') u'1,234' >>> format_decimal(12345, locale='de') u'12.345' The appropriate thousands grouping and the decimal separator are used for each locale: >>> format_decimal(12345.5, locale='en_US') u'12,345.5' :param number: the number to format :param format: :param locale: the `Locale` object or locale identifier :return: the formatted decimal number :rtype: `unicode` """ locale = Locale.parse(locale) if not format: format = locale.decimal_formats.get(format) pattern = parse_pattern(format) return pattern.apply(number, locale) def format_currency(number, currency, format=None, locale=LC_NUMERIC): u"""Return formatted currency value. >>> format_currency(1099.98, 'USD', locale='en_US') u'$1,099.98' >>> format_currency(1099.98, 'USD', locale='es_CO') u'US$\\xa01.099,98' >>> format_currency(1099.98, 'EUR', locale='de_DE') u'1.099,98\\xa0\\u20ac' The pattern can also be specified explicitly: >>> format_currency(1099.98, 'EUR', u'\xa4\xa4 #,##0.00', locale='en_US') u'EUR 1,099.98' :param number: the number to format :param currency: the currency code :param locale: the `Locale` object or locale identifier :return: the formatted currency value :rtype: `unicode` """ locale = Locale.parse(locale) if not format: format = locale.currency_formats.get(format) pattern = parse_pattern(format) return pattern.apply(number, locale, currency=currency) def format_percent(number, format=None, locale=LC_NUMERIC): """Return formatted percent value for a specific locale. >>> format_percent(0.34, locale='en_US') u'34%' >>> format_percent(25.1234, locale='en_US') u'2,512%' >>> format_percent(25.1234, locale='sv_SE') u'2\\xa0512\\xa0%' The format pattern can also be specified explicitly: >>> format_percent(25.1234, u'#,##0\u2030', locale='en_US') u'25,123\u2030' :param number: the percent number to format :param format: :param locale: the `Locale` object or locale identifier :return: the formatted percent number :rtype: `unicode` """ locale = Locale.parse(locale) if not format: format = locale.percent_formats.get(format) pattern = parse_pattern(format) return pattern.apply(number, locale) def format_scientific(number, format=None, locale=LC_NUMERIC): """Return value formatted in scientific notation for a specific locale. >>> format_scientific(10000, locale='en_US') u'1E4' The format pattern can also be specified explicitly: >>> format_scientific(1234567, u'##0E00', locale='en_US') u'1.23E06' :param number: the number to format :param format: :param locale: the `Locale` object or locale identifier :return: value formatted in scientific notation. :rtype: `unicode` """ locale = Locale.parse(locale) if not format: format = locale.scientific_formats.get(format) pattern = parse_pattern(format) return pattern.apply(number, locale) class NumberFormatError(ValueError): """Exception raised when a string cannot be parsed into a number.""" def parse_number(string, locale=LC_NUMERIC): """Parse localized number string into a long integer. >>> parse_number('1,099', locale='en_US') 1099L >>> parse_number('1.099', locale='de_DE') 1099L When the given string cannot be parsed, an exception is raised: >>> parse_number('1.099,98', locale='de') Traceback (most recent call last): ... NumberFormatError: '1.099,98' is not a valid number :param string: the string to parse :param locale: the `Locale` object or locale identifier :return: the parsed number :rtype: `long` :raise `NumberFormatError`: if the string can not be converted to a number """ try: return long(string.replace(get_group_symbol(locale), '')) except ValueError: raise NumberFormatError('%r is not a valid number' % string) def parse_decimal(string, locale=LC_NUMERIC): """Parse localized decimal string into a float. >>> parse_decimal('1,099.98', locale='en_US') 1099.98 >>> parse_decimal('1.099,98', locale='de') 1099.98 When the given string cannot be parsed, an exception is raised: >>> parse_decimal('2,109,998', locale='de') Traceback (most recent call last): ... NumberFormatError: '2,109,998' is not a valid decimal number :param string: the string to parse :param locale: the `Locale` object or locale identifier :return: the parsed decimal number :rtype: `float` :raise `NumberFormatError`: if the string can not be converted to a decimal number """ locale = Locale.parse(locale) try: return float(string.replace(get_group_symbol(locale), '') .replace(get_decimal_symbol(locale), '.')) except ValueError: raise NumberFormatError('%r is not a valid decimal number' % string) PREFIX_END = r'[^0-9@#.,]' NUMBER_TOKEN = r'[0-9@#.\-,E+]' PREFIX_PATTERN = r"(?P(?:'[^']*'|%s)*)" % PREFIX_END NUMBER_PATTERN = r"(?P%s+)" % NUMBER_TOKEN SUFFIX_PATTERN = r"(?P.*)" number_re = re.compile(r"%s%s%s" % (PREFIX_PATTERN, NUMBER_PATTERN, SUFFIX_PATTERN)) def split_number(value): """Convert a number into a (intasstring, fractionasstring) tuple""" if have_decimal and isinstance(value, Decimal): text = str(value) else: text = ('%.9f' % value).rstrip('0') if '.' in text: a, b = text.split('.', 1) if b == '0': b = '' else: a, b = text, '' return a, b def bankersround(value, ndigits=0): """Round a number to a given precision. Works like round() except that the round-half-even (banker's rounding) algorithm is used instead of round-half-up. >>> bankersround(5.5, 0) 6.0 >>> bankersround(6.5, 0) 6.0 >>> bankersround(-6.5, 0) -6.0 >>> bankersround(1234.0, -2) 1200.0 """ sign = int(value < 0) and -1 or 1 value = abs(value) a, b = split_number(value) digits = a + b add = 0 i = len(a) + ndigits if i < 0 or i >= len(digits): pass elif digits[i] > '5': add = 1 elif digits[i] == '5' and digits[i-1] in '13579': add = 1 scale = 10**ndigits if have_decimal and isinstance(value, Decimal): return Decimal(int(value * scale + add)) / scale * sign else: return float(int(value * scale + add)) / scale * sign def parse_pattern(pattern): """Parse number format patterns""" if isinstance(pattern, NumberPattern): return pattern # Do we have a negative subpattern? if ';' in pattern: pattern, neg_pattern = pattern.split(';', 1) pos_prefix, number, pos_suffix = number_re.search(pattern).groups() neg_prefix, _, neg_suffix = number_re.search(neg_pattern).groups() else: pos_prefix, number, pos_suffix = number_re.search(pattern).groups() neg_prefix = '-' + pos_prefix neg_suffix = pos_suffix if 'E' in number: number, exp = number.split('E', 1) else: exp = None if '@' in number: if '.' in number and '0' in number: raise ValueError('Significant digit patterns can not contain ' '"@" or "0"') if '.' in number: integer, fraction = number.rsplit('.', 1) else: integer = number fraction = '' min_frac = max_frac = 0 def parse_precision(p): """Calculate the min and max allowed digits""" min = max = 0 for c in p: if c in '@0': min += 1 max += 1 elif c == '#': max += 1 elif c == ',': continue else: break return min, max def parse_grouping(p): """Parse primary and secondary digit grouping >>> parse_grouping('##') 0, 0 >>> parse_grouping('#,###') 3, 3 >>> parse_grouping('#,####,###') 3, 4 """ width = len(p) g1 = p.rfind(',') if g1 == -1: return 1000, 1000 g1 = width - g1 - 1 g2 = p[:-g1 - 1].rfind(',') if g2 == -1: return g1, g1 g2 = width - g1 - g2 - 2 return g1, g2 int_prec = parse_precision(integer) frac_prec = parse_precision(fraction) if exp: frac_prec = parse_precision(integer+fraction) exp_plus = exp.startswith('+') exp = exp.lstrip('+') exp_prec = parse_precision(exp) else: exp_plus = None exp_prec = None grouping = parse_grouping(integer) return NumberPattern(pattern, (pos_prefix, neg_prefix), (pos_suffix, neg_suffix), grouping, int_prec, frac_prec, exp_prec, exp_plus) class NumberPattern(object): def __init__(self, pattern, prefix, suffix, grouping, int_prec, frac_prec, exp_prec, exp_plus): self.pattern = pattern self.prefix = prefix self.suffix = suffix self.grouping = grouping self.int_prec = int_prec self.frac_prec = frac_prec self.exp_prec = exp_prec self.exp_plus = exp_plus if '%' in ''.join(self.prefix + self.suffix): self.scale = 100 elif u'‰' in ''.join(self.prefix + self.suffix): self.scale = 1000 else: self.scale = 1 def __repr__(self): return '<%s %r>' % (type(self).__name__, self.pattern) def apply(self, value, locale, currency=None): value *= self.scale is_negative = int(value < 0) if self.exp_prec: # Scientific notation value = abs(value) if value: exp = int(math.floor(math.log(value, 10))) else: exp = 0 # Minimum number of integer digits if self.int_prec[0] == self.int_prec[1]: exp -= self.int_prec[0] - 1 # Exponent grouping elif self.int_prec[1]: exp = int(exp) / self.int_prec[1] * self.int_prec[1] if not have_decimal or not isinstance(value, Decimal): value = float(value) if exp < 0: value = value * 10**(-exp) else: value = value / 10**exp exp_sign = '' if exp < 0: exp_sign = get_minus_sign_symbol(locale) elif self.exp_plus: exp_sign = get_plus_sign_symbol(locale) exp = abs(exp) number = u'%s%s%s%s' % \ (self._format_sigdig(value, self.frac_prec[0], self.frac_prec[1]), get_exponential_symbol(locale), exp_sign, self._format_int(str(exp), self.exp_prec[0], self.exp_prec[1], locale)) elif '@' in self.pattern: # Is it a siginificant digits pattern? text = self._format_sigdig(abs(value), self.int_prec[0], self.int_prec[1]) if '.' in text: a, b = text.split('.') a = self._format_int(a, 0, 1000, locale) if b: b = get_decimal_symbol(locale) + b number = a + b else: number = self._format_int(text, 0, 1000, locale) else: # A normal number pattern a, b = split_number(bankersround(abs(value), self.frac_prec[1])) b = b or '0' a = self._format_int(a, self.int_prec[0], self.int_prec[1], locale) b = self._format_frac(b, locale) number = a + b retval = u'%s%s%s' % (self.prefix[is_negative], number, self.suffix[is_negative]) if u'¤' in retval: retval = retval.replace(u'¤¤', currency.upper()) retval = retval.replace(u'¤', get_currency_symbol(currency, locale)) return retval def _format_sigdig(self, value, min, max): """Convert value to a string. The resulting string will contain between (min, max) number of significant digits. """ a, b = split_number(value) ndecimals = len(a) if a == '0' and b != '': ndecimals = 0 while b.startswith('0'): b = b[1:] ndecimals -= 1 a, b = split_number(bankersround(value, max - ndecimals)) digits = len((a + b).lstrip('0')) if not digits: digits = 1 # Figure out if we need to add any trailing '0':s if len(a) >= max and a != '0': return a if digits < min: b += ('0' * (min - digits)) if b: return '%s.%s' % (a, b) return a def _format_int(self, value, min, max, locale): width = len(value) if width < min: value = '0' * (min - width) + value gsize = self.grouping[0] ret = '' symbol = get_group_symbol(locale) while len(value) > gsize: ret = symbol + value[-gsize:] + ret value = value[:-gsize] gsize = self.grouping[1] return value + ret def _format_frac(self, value, locale): min, max = self.frac_prec if len(value) < min: value += ('0' * (min - len(value))) if max == 0 or (min == 0 and int(value) == 0): return '' width = len(value) while len(value) > min and value[-1] == '0': value = value[:-1] return get_decimal_symbol(locale) + value