diff options
Diffstat (limited to 'i18n/html2po.py')
-rwxr-xr-x | i18n/html2po.py | 119 |
1 files changed, 119 insertions, 0 deletions
diff --git a/i18n/html2po.py b/i18n/html2po.py new file mode 100755 index 0000000..9d1b2ef --- /dev/null +++ b/i18n/html2po.py @@ -0,0 +1,119 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright Peter Gijsels, 2010 +# under the MIT license http://www.opensource.org/licenses/mit-license.php + +import sys +sys.path.append(sys.path[0] + '/BeautifulSoup-3.0.8') + +import re +import time +import BeautifulSoup + +def tag_text(tag): + return ''.join([str(c) for c in tag.contents]) + + +# Adapted from pygettext.py in the standard Python distribution + +pot_header = '''\ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR ORGANIZATION +# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR. +# +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\\n" +"POT-Creation-Date: %(time)s\\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n" +"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n" +"Language-Team: LANGUAGE <LL@li.org>\\n" +"MIME-Version: 1.0\\n" +"Content-Type: text/plain; charset=CHARSET\\n" +"Content-Transfer-Encoding: ENCODING\\n" +"Generated-By: html2po.py %(version)s\\n" + +''' + +__version__ = '1.0' +EMPTYSTRING = '' + +escapes = [] + +def make_escapes(pass_iso8859): + global escapes + if pass_iso8859: + # Allow iso-8859 characters to pass through so that e.g. 'msgid + # "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we + # escape any character outside the 32..126 range. + mod = 128 + else: + mod = 256 + for i in range(256): + if 32 <= (i % mod) <= 126: + escapes.append(chr(i)) + else: + escapes.append("\\%03o" % i) + escapes[ord('\\')] = '\\\\' + escapes[ord('\t')] = '\\t' + escapes[ord('\r')] = '\\r' + escapes[ord('\n')] = '\\n' + escapes[ord('\"')] = '\\"' + +make_escapes(True) + +def escape(s): + global escapes + s = list(s) + for i in range(len(s)): + s[i] = escapes[ord(s[i])] + return EMPTYSTRING.join(s) + +def normalize(s): + # This converts the various Python string types into a format that is + # appropriate for .po files, namely much closer to C style. + lines = s.split('\n') + if len(lines) == 1: + s = '"' + escape(s) + '"' + else: + if not lines[-1]: + del lines[-1] + lines[-1] = lines[-1] + '\n' + for i in range(len(lines)): + lines[i] = escape(lines[i]) + lineterm = '\\n"\n"' + s = '""\n"' + lineterm.join(lines) + '"' + return s + +def write(list, fp): + timestamp = time.strftime('%Y-%m-%d %H:%M+%Z') + # The time stamp in the header doesn't have the same format as that + # generated by xgettext... + print >> fp, pot_header % {'time': timestamp, 'version': __version__} + list.sort() + for k in list: + print >> fp, 'msgid', normalize(k) + print >> fp, 'msgstr ""\n' + +# End of the code adapted from pygettext.py + + +def html2po(html, po): + soup = BeautifulSoup.BeautifulSoup(open(html)) + translatables = soup.findAll( + attrs = {'class' : re.compile('\\btranslate\\b')} + ) + write(map(tag_text, translatables), open(po, 'w')) + +if __name__ == '__main__': + from optparse import OptionParser + parser = OptionParser() + parser.add_option('-i', '--input', dest='html', + help='input html file', metavar='FILE', + default='index.html') + parser.add_option('-o', '--output', dest='po', + help='output po file', metavar='FILE', + default='messages.po') + (options, args) = parser.parse_args() + html2po(options.html, options.po) |