Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/i18n/html2po.py
diff options
context:
space:
mode:
Diffstat (limited to 'i18n/html2po.py')
-rwxr-xr-xi18n/html2po.py119
1 files changed, 119 insertions, 0 deletions
diff --git a/i18n/html2po.py b/i18n/html2po.py
new file mode 100755
index 0000000..9d1b2ef
--- /dev/null
+++ b/i18n/html2po.py
@@ -0,0 +1,119 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright Peter Gijsels, 2010
+# under the MIT license http://www.opensource.org/licenses/mit-license.php
+
+import sys
+sys.path.append(sys.path[0] + '/BeautifulSoup-3.0.8')
+
+import re
+import time
+import BeautifulSoup
+
+def tag_text(tag):
+ return ''.join([str(c) for c in tag.contents])
+
+
+# Adapted from pygettext.py in the standard Python distribution
+
+pot_header = '''\
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR ORGANIZATION
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\\n"
+"POT-Creation-Date: %(time)s\\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
+"Language-Team: LANGUAGE <LL@li.org>\\n"
+"MIME-Version: 1.0\\n"
+"Content-Type: text/plain; charset=CHARSET\\n"
+"Content-Transfer-Encoding: ENCODING\\n"
+"Generated-By: html2po.py %(version)s\\n"
+
+'''
+
+__version__ = '1.0'
+EMPTYSTRING = ''
+
+escapes = []
+
+def make_escapes(pass_iso8859):
+ global escapes
+ if pass_iso8859:
+ # Allow iso-8859 characters to pass through so that e.g. 'msgid
+ # "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we
+ # escape any character outside the 32..126 range.
+ mod = 128
+ else:
+ mod = 256
+ for i in range(256):
+ if 32 <= (i % mod) <= 126:
+ escapes.append(chr(i))
+ else:
+ escapes.append("\\%03o" % i)
+ escapes[ord('\\')] = '\\\\'
+ escapes[ord('\t')] = '\\t'
+ escapes[ord('\r')] = '\\r'
+ escapes[ord('\n')] = '\\n'
+ escapes[ord('\"')] = '\\"'
+
+make_escapes(True)
+
+def escape(s):
+ global escapes
+ s = list(s)
+ for i in range(len(s)):
+ s[i] = escapes[ord(s[i])]
+ return EMPTYSTRING.join(s)
+
+def normalize(s):
+ # This converts the various Python string types into a format that is
+ # appropriate for .po files, namely much closer to C style.
+ lines = s.split('\n')
+ if len(lines) == 1:
+ s = '"' + escape(s) + '"'
+ else:
+ if not lines[-1]:
+ del lines[-1]
+ lines[-1] = lines[-1] + '\n'
+ for i in range(len(lines)):
+ lines[i] = escape(lines[i])
+ lineterm = '\\n"\n"'
+ s = '""\n"' + lineterm.join(lines) + '"'
+ return s
+
+def write(list, fp):
+ timestamp = time.strftime('%Y-%m-%d %H:%M+%Z')
+ # The time stamp in the header doesn't have the same format as that
+ # generated by xgettext...
+ print >> fp, pot_header % {'time': timestamp, 'version': __version__}
+ list.sort()
+ for k in list:
+ print >> fp, 'msgid', normalize(k)
+ print >> fp, 'msgstr ""\n'
+
+# End of the code adapted from pygettext.py
+
+
+def html2po(html, po):
+ soup = BeautifulSoup.BeautifulSoup(open(html))
+ translatables = soup.findAll(
+ attrs = {'class' : re.compile('\\btranslate\\b')}
+ )
+ write(map(tag_text, translatables), open(po, 'w'))
+
+if __name__ == '__main__':
+ from optparse import OptionParser
+ parser = OptionParser()
+ parser.add_option('-i', '--input', dest='html',
+ help='input html file', metavar='FILE',
+ default='index.html')
+ parser.add_option('-o', '--output', dest='po',
+ help='output po file', metavar='FILE',
+ default='messages.po')
+ (options, args) = parser.parse_args()
+ html2po(options.html, options.po)