diff options
Diffstat (limited to 'translate-toolkit-1.3.0/translate/tools')
34 files changed, 3863 insertions, 0 deletions
diff --git a/translate-toolkit-1.3.0/translate/tools/TODO b/translate-toolkit-1.3.0/translate/tools/TODO new file mode 100644 index 0000000..315c246 --- /dev/null +++ b/translate-toolkit-1.3.0/translate/tools/TODO @@ -0,0 +1,32 @@ +pomerge + * allow status of input PO to override destination ie if input is not fuzzy + and destination is fuzzy that is you corrected it then make merged message + not fuzzy + - A header is currently skipped. When we can handle header merging + a header should be merged to indicates last translators info, etc but merging + should be optional with ignoring being the default. + - msgblock headers ie #: should remain unchanged. There seems to be a bug + that adds entries on one line when the output templates have them on + separate lines. + - Allow merge without a template and thus us the output as a template. Or + at least don't complain when you don't find the templates. + +New tools + Some ideas for tools that would be helpful + podiff - normal diff show too much spurious data. We need something that + can ignore the location and just shaw the changes in text. So you + could receive an updated PO file but the diff would not show spurious + changes in the header and could match strings in different location. + The output could then be something like diff -u + glossary_creator - take input text or PO file and extract a) the most + frequent words and b) the most frequent word pairs. So things like + "Proxy Server" are extracted. The idea being that you can create a + glossary for a single PO file or for the whole project like KDE. + Translators get to define important words before beginning to translate + pomigrate - need a python based tool that takes an input file the new + template and outputs the new migrated PO files. Initialising as needed + and merging if possible. + +pocount + - Fix po class so that it handles KDE style plurals there not in pocount + - Count word\nword and word<tag>word as 2 words diff --git a/translate-toolkit-1.3.0/translate/tools/__init__.py b/translate-toolkit-1.3.0/translate/tools/__init__.py new file mode 100644 index 0000000..3bba49b --- /dev/null +++ b/translate-toolkit-1.3.0/translate/tools/__init__.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright 2004-2006 Zuza Software Foundation +# +# This file is part of translate. +# +# translate is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# translate is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with translate; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""translate.tools is part of the translate package +It contains code to perform various operations (mostly on po files)""" + diff --git a/translate-toolkit-1.3.0/translate/tools/build_tmdb b/translate-toolkit-1.3.0/translate/tools/build_tmdb new file mode 100755 index 0000000..f84c8b2 --- /dev/null +++ b/translate-toolkit-1.3.0/translate/tools/build_tmdb @@ -0,0 +1,27 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright 2008 Zuza Software Foundation +# +# This file is part of translate. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/>. + +"""Import units from translations files into tmdb.""" + +from translate.tools import build_tmdb + +if __name__ == '__main__': + build_tmdb.main() + diff --git a/translate-toolkit-1.3.0/translate/tools/build_tmdb.py b/translate-toolkit-1.3.0/translate/tools/build_tmdb.py new file mode 100755 index 0000000..9ff6613 --- /dev/null +++ b/translate-toolkit-1.3.0/translate/tools/build_tmdb.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright 2008 Zuza Software Foundation +# +# This file is part of Virtaal. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/>. + +"""Import units from translations files into tmdb.""" + +import sys +import os +from optparse import OptionParser +from translate.storage import factory +from translate.storage import tmdb + + +class Builder: + def __init__(self, tmdbfile, source_lang, target_lang, filenames): + self.tmdb = tmdb.TMDB(tmdbfile) + self.source_lang = source_lang + self.target_lang = target_lang + + for filename in filenames: + if not os.path.exists(filename): + print >> sys.stderr, "cannot process %s: does not exist" % filename + continue + elif os.path.isdir(filename): + self.handledir(filename) + else: + self.handlefile(filename) + self.tmdb.connection.commit() + + + def handlefile(self, filename): + try: + store = factory.getobject(filename) + except Exception, e: + print >> sys.stderr, str(e) + return + # do something useful with the store and db + try: + self.tmdb.add_store(store, self.source_lang, self.target_lang, commit=False) + except Exception, e: + print e + print "new file:", filename + + + def handlefiles(self, dirname, filenames): + for filename in filenames: + pathname = os.path.join(dirname, filename) + if os.path.isdir(pathname): + self.handledir(pathname) + else: + self.handlefile(pathname) + + + def handledir(self, dirname): + path, name = os.path.split(dirname) + if name in ["CVS", ".svn", "_darcs", ".git", ".hg", ".bzr"]: + return + entries = os.listdir(dirname) + self.handlefiles(dirname, entries) + +def main(): + try: + import psyco + psyco.full() + except Exception: + pass + parser = OptionParser() + parser.add_option("-d", "--tmdb", dest="tmdbfile", + help="translation memory database file") + parser.add_option("-s", "--import-source-lang", dest="source_lang", + help="source language of translation files") + parser.add_option("-t", "--import-target-lang", dest="target_lang", + help="target language of translation files") + (options, args) = parser.parse_args() + + Builder(options.tmdbfile, options.source_lang, options.target_lang, args) + +if __name__ == '__main__': + main() diff --git a/translate-toolkit-1.3.0/translate/tools/poclean b/translate-toolkit-1.3.0/translate/tools/poclean new file mode 100755 index 0000000..c7d3a8a --- /dev/null +++ b/translate-toolkit-1.3.0/translate/tools/poclean @@ -0,0 +1,27 @@ +#!/usr/bin/env python +# +# Copyright 2003, 2004 Zuza Software Foundation +# +# This file is part of translate. +# +# translate is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# translate is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with translate; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""produces a clean file from an unclean file (Trados/Wordfast)""" + +from translate.tools import poclean + +if __name__ == '__main__': + poclean.main() + diff --git a/translate-toolkit-1.3.0/translate/tools/poclean.py b/translate-toolkit-1.3.0/translate/tools/poclean.py new file mode 100644 index 0000000..b7066cb --- /dev/null +++ b/translate-toolkit-1.3.0/translate/tools/poclean.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright 2007 Zuza Software Foundation +# +# This file is part of translate. +# +# translate is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# translate is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with translate; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Produces a clean file from an unclean file (Trados/Wordfast) by stripping +out the tw4win indicators. + +This does not convert an RTF file to PO/XLIFF, but produces the target file +with only the target text in from a text version of the RTF. +""" + +from translate.storage import factory +from translate.misc.multistring import multistring +import re + +tw4winre = re.compile(r"\{0>.*?<\}\d{1,3}\{>(.*?)<0\}", re.M | re.S) + +def cleanunit(unit): + """cleans the targets in the given unit""" + if isinstance(unit.target, multistring): + strings = unit.target.strings + else: + strings = [unit.target] + for index, string in enumerate(strings): + string = string.replace("\par", "") + strings[index] = tw4winre.sub(r"\1", string) + if len(strings) == 1: + unit.target = strings[0] + else: + unit.target = strings + +def cleanfile(thefile): + """cleans the given file""" + for unit in thefile.units: + cleanunit(unit) + return thefile + +def runclean(inputfile, outputfile, templatefile): + """reads in inputfile, cleans, writes to outputfile""" + fromfile = factory.getobject(inputfile) + + cleanfile(fromfile) +# if fromfile.isempty(): +# return False + outputfile.write(str(fromfile)) + return True + +def main(): + from translate.convert import convert + formats = {"po":("po", runclean), "xlf":("xlf", runclean), None:("po", runclean)} + parser = convert.ConvertOptionParser(formats, usetemplates=False, description=__doc__) + parser.run() + +if __name__ == '__main__': + main() diff --git a/translate-toolkit-1.3.0/translate/tools/pocompile b/translate-toolkit-1.3.0/translate/tools/pocompile new file mode 100755 index 0000000..142eacc --- /dev/null +++ b/translate-toolkit-1.3.0/translate/tools/pocompile @@ -0,0 +1,27 @@ +#!/usr/bin/env python +# +# Copyright 2005 Zuza Software Foundation +# +# This file is part of the translate-toolkit +# +# translate is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# translate is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with translate; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""simple script to convert a gettext .po localization file to a comma-separated values (.csv) file""" + +from translate.tools import pocompile + +if __name__ == '__main__': + pocompile.main() + diff --git a/translate-toolkit-1.3.0/translate/tools/pocompile.py b/translate-toolkit-1.3.0/translate/tools/pocompile.py new file mode 100644 index 0000000..1fb915c --- /dev/null +++ b/translate-toolkit-1.3.0/translate/tools/pocompile.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright 2005, 2006 Zuza Software Foundation +# +# This file is part of the translate-toolkit +# +# translate is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# translate is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with translate; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Compile XLIFF and Gettext PO localization files into Gettext MO (Machine Object) files + +See: http://translate.sourceforge.net/wiki/toolkit/pocompile for examples and +usage instructions +""" + +from translate.storage import factory +from translate.storage import po +from translate.storage import mo + +class POCompile: + + def convertstore(self, inputfile, includefuzzy=False): + outputfile = mo.mofile() + for unit in inputfile.units: + if unit.istranslated() or (unit.isfuzzy() and includefuzzy and unit.target): + mounit = mo.mounit() + if unit.isheader(): + mounit.source = "" + else: + mounit.source = unit.source + if hasattr(unit, "msgidcomments"): + mounit.source.strings[0] = po.unquotefrompo(unit.msgidcomments) + mounit.source.strings[0] + if hasattr(unit, "msgctxt"): + mounit.msgctxt = po.unquotefrompo(unit.msgctxt) + mounit.target = unit.target + outputfile.addunit(mounit) + return str(outputfile) + +def convertmo(inputfile, outputfile, templatefile, includefuzzy=False): + """reads in a base class derived inputfile, converts using pocompile, writes to outputfile""" + # note that templatefile is not used, but it is required by the converter... + inputstore = factory.getobject(inputfile) + if inputstore.isempty(): + return 0 + convertor = POCompile() + outputmo = convertor.convertstore(inputstore, includefuzzy) + # We have to make sure that we write the files in binary mode, therefore we + # reopen the file accordingly + outputfile.close() + outputfile = open(outputfile.name, 'wb') + outputfile.write(outputmo) + return 1 + +def main(): + from translate.convert import convert + formats = {"po":("mo", convertmo), "xlf":("mo", convertmo)} + parser = convert.ConvertOptionParser(formats, usepots=False, description=__doc__) + parser.add_fuzzy_option() + parser.run() + +if __name__ == '__main__': + main() diff --git a/translate-toolkit-1.3.0/translate/tools/poconflicts b/translate-toolkit-1.3.0/translate/tools/poconflicts new file mode 100755 index 0000000..2b1e7ff --- /dev/null +++ b/translate-toolkit-1.3.0/translate/tools/poconflicts @@ -0,0 +1,27 @@ +#!/usr/bin/env python +# +# Copyright 2005 Zuza Software Foundation +# +# This file is part of translate. +# +# translate is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# translate is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with translate; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""conflict finder for gettext .po localization files""" + +from translate.tools import poconflicts + +if __name__ == '__main__': + poconflicts.main() + diff --git a/translate-toolkit-1.3.0/translate/tools/poconflicts.py b/translate-toolkit-1.3.0/translate/tools/poconflicts.py new file mode 100644 index 0000000..5ce2330 --- /dev/null +++ b/translate-toolkit-1.3.0/translate/tools/poconflicts.py @@ -0,0 +1,197 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright 2005-2007 Zuza Software Foundation +# +# This file is part of translate. +# +# translate is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# translate is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with translate; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Conflict finder for Gettext PO localization files + +See: http://translate.sourceforge.net/wiki/toolkit/poconflicts for examples and +usage instructions +""" + +from translate.storage import factory +from translate.storage import po +from translate.misc import optrecurse +import sys +import os + +class ConflictOptionParser(optrecurse.RecursiveOptionParser): + """a specialized Option Parser for the conflict tool...""" + def parse_args(self, args=None, values=None): + """parses the command line options, handling implicit input/output args""" + (options, args) = optrecurse.optparse.OptionParser.parse_args(self, args, values) + # some intelligence as to what reasonable people might give on the command line + if args and not options.input: + if not options.output: + options.input = args[:-1] + args = args[-1:] + else: + options.input = args + args = [] + if args and not options.output: + options.output = args[-1] + args = args[:-1] + if not options.output: + self.error("output file is required") + if args: + self.error("You have used an invalid combination of --input, --output and freestanding args") + if isinstance(options.input, list) and len(options.input) == 1: + options.input = options.input[0] + return (options, args) + + def set_usage(self, usage=None): + """sets the usage string - if usage not given, uses getusagestring for each option""" + if usage is None: + self.usage = "%prog " + " ".join([self.getusagestring(option) for option in self.option_list]) + \ + "\n input directory is searched for PO files, PO files with name of conflicting string are output in output directory" + else: + super(ConflictOptionParser, self).set_usage(usage) + + def run(self): + """parses the arguments, and runs recursiveprocess with the resulting options""" + (options, args) = self.parse_args() + options.inputformats = self.inputformats + options.outputoptions = self.outputoptions + self.usepsyco(options) + self.recursiveprocess(options) + + def recursiveprocess(self, options): + """recurse through directories and process files""" + if self.isrecursive(options.input, 'input') and getattr(options, "allowrecursiveinput", True): + if not self.isrecursive(options.output, 'output'): + try: + self.warning("Output directory does not exist. Attempting to create") + os.mkdir(options.output) + except: + self.error(optrecurse.optparse.OptionValueError("Output directory does not exist, attempt to create failed")) + if isinstance(options.input, list): + inputfiles = self.recurseinputfilelist(options) + else: + inputfiles = self.recurseinputfiles(options) + else: + if options.input: + inputfiles = [os.path.basename(options.input)] + options.input = os.path.dirname(options.input) + else: + inputfiles = [options.input] + self.textmap = {} + self.initprogressbar(inputfiles, options) + for inputpath in inputfiles: + fullinputpath = self.getfullinputpath(options, inputpath) + try: + success = self.processfile(None, options, fullinputpath) + except Exception, error: + if isinstance(error, KeyboardInterrupt): + raise + self.warning("Error processing: input %s" % (fullinputpath), options, sys.exc_info()) + success = False + self.reportprogress(inputpath, success) + del self.progressbar + self.buildconflictmap() + self.outputconflicts(options) + + def clean(self, string, options): + """returns the cleaned string that contains the text to be matched""" + if options.ignorecase: + string = string.lower() + for accelerator in options.accelchars: + string = string.replace(accelerator, "") + string = string.strip() + return string + + def processfile(self, fileprocessor, options, fullinputpath): + """process an individual file""" + inputfile = self.openinputfile(options, fullinputpath) + inputfile = factory.getobject(inputfile) + for unit in inputfile.units: + if unit.isheader() or not unit.istranslated(): + continue + if unit.hasplural(): + continue + if not options.invert: + source = self.clean(unit.source, options) + target = self.clean(unit.target, options) + else: + target = self.clean(unit.source, options) + source = self.clean(unit.target, options) + self.textmap.setdefault(source, []).append((target, unit, fullinputpath)) + + def flatten(self, text, joinchar): + """flattens text to just be words""" + flattext = "" + for c in text: + if c.isalnum(): + flattext += c + elif flattext[-1:].isalnum(): + flattext += joinchar + return flattext.rstrip(joinchar) + + def buildconflictmap(self): + """work out which strings are conflicting""" + self.conflictmap = {} + for source, translations in self.textmap.iteritems(): + source = self.flatten(source, " ") + if len(source) <= 1: + continue + if len(translations) > 1: + uniquetranslations = dict.fromkeys([target for target, unit, filename in translations]) + if len(uniquetranslations) > 1: + self.conflictmap[source] = translations + + def outputconflicts(self, options): + """saves the result of the conflict match""" + print "%d/%d different strings have conflicts" % (len(self.conflictmap), len(self.textmap)) + reducedmap = {} + for source, translations in self.conflictmap.iteritems(): + words = source.split() + words.sort(lambda x, y: cmp(len(x), len(y))) + source = words[-1] + reducedmap.setdefault(source, []).extend(translations) + # reduce plurals + plurals = {} + for word in reducedmap: + if word + "s" in reducedmap: + plurals[word] = word + "s" + for word, pluralword in plurals.iteritems(): + reducedmap[word].extend(reducedmap.pop(pluralword)) + for source, translations in reducedmap.iteritems(): + flatsource = self.flatten(source, "-") + fulloutputpath = os.path.join(options.output, flatsource + os.extsep + "po") + conflictfile = po.pofile() + for target, unit, filename in translations: + unit.othercomments.append("# (poconflicts) %s\n" % filename) + conflictfile.units.append(unit) + open(fulloutputpath, "w").write(str(conflictfile)) + +def main(): + formats = {"po":("po", None), None:("po", None)} + parser = ConflictOptionParser(formats) + parser.add_option("-I", "--ignore-case", dest="ignorecase", + action="store_true", default=False, help="ignore case distinctions") + parser.add_option("-v", "--invert", dest="invert", + action="store_true", default=False, help="invert the conflicts thus extracting conflicting destination words") + parser.add_option("", "--accelerator", dest="accelchars", default="", + metavar="ACCELERATORS", help="ignores the given accelerator characters when matching") + parser.set_usage() + parser.description = __doc__ + parser.run() + + +if __name__ == '__main__': + main() diff --git a/translate-toolkit-1.3.0/translate/tools/pocount b/translate-toolkit-1.3.0/translate/tools/pocount new file mode 100755 index 0000000..cb90dcc --- /dev/null +++ b/translate-toolkit-1.3.0/translate/tools/pocount @@ -0,0 +1,27 @@ +#!/usr/bin/python +# +# Copyright 2004 Zuza Software Foundation +# +# This file is part of the translate-toolkit +# +# translate is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# translate is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with translate; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""takes a .po translation file and produces word counts and other statistics""" + +from translate.tools import pocount + +if __name__ == '__main__': + pocount.main() + diff --git a/translate-toolkit-1.3.0/translate/tools/pocount.py b/translate-toolkit-1.3.0/translate/tools/pocount.py new file mode 100644 index 0000000..df992a9 --- /dev/null +++ b/translate-toolkit-1.3.0/translate/tools/pocount.py @@ -0,0 +1,196 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright 2003-2007 Zuza Software Foundation +# +# This file is part of translate. +# +# translate is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# translate is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with translate; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Create string and word counts for supported localization files including: +XLIFF, TMX, Gettex PO and MO, Qt .ts and .qm, Wordfast TM, etc + +See: http://translate.sourceforge.net/wiki/toolkit/pocount for examples and +usage instructions +""" + +from translate.storage import factory +from translate.storage import statsdb +import sys +import os + +def calcstats_old(filename): + """This is the previous implementation of calcstats() and is left for + comparison and debuging purposes.""" + # ignore totally blank or header units + try: + store = factory.getobject(filename) + except ValueError, e: + print str(e) + return {} + units = filter(lambda unit: not unit.isheader(), store.units) + translated = translatedmessages(units) + fuzzy = fuzzymessages(units) + review = filter(lambda unit: unit.isreview(), units) + untranslated = untranslatedmessages(units) + wordcounts = dict(map(lambda unit: (unit, statsdb.wordsinunit(unit)), units)) + sourcewords = lambda elementlist: sum(map(lambda unit: wordcounts[unit][0], elementlist)) + targetwords = lambda elementlist: sum(map(lambda unit: wordcounts[unit][1], elementlist)) + stats = {} + + #units + stats["translated"] = len(translated) + stats["fuzzy"] = len(fuzzy) + stats["untranslated"] = len(untranslated) + stats["review"] = len(review) + stats["total"] = stats["translated"] + stats["fuzzy"] + stats["untranslated"] + + #words + stats["translatedsourcewords"] = sourcewords(translated) + stats["translatedtargetwords"] = targetwords(translated) + stats["fuzzysourcewords"] = sourcewords(fuzzy) + stats["untranslatedsourcewords"] = sourcewords(untranslated) + stats["reviewsourcewords"] = sourcewords(review) + stats["totalsourcewords"] = stats["translatedsourcewords"] + \ + stats["fuzzysourcewords"] + \ + stats["untranslatedsourcewords"] + return stats + +def calcstats(filename): + statscache = statsdb.StatsCache() + return statscache.filetotals(filename) + +def summarize(title, stats, CSVstyle=False): + def percent(denominator, devisor): + if devisor == 0: + return 0 + else: + return denominator*100/devisor + + if CSVstyle: + print "%s, " % title, + print "%d, %d, %d," % (stats["translated"], stats["translatedsourcewords"], stats["translatedtargetwords"]), + print "%d, %d," % (stats["fuzzy"], stats["fuzzysourcewords"]), + print "%d, %d," % (stats["untranslated"], stats["untranslatedsourcewords"]), + print "%d, %d" % (stats["total"], stats["totalsourcewords"]), + if stats["review"] > 0: + print ", %d, %d" % (stats["review"], stats["reviewsourdcewords"]), + print + else: + print title + print "type strings words (source) words (translation)" + print "translated: %5d (%3d%%) %10d (%3d%%) %15d" % \ + (stats["translated"], \ + percent(stats["translated"], stats["total"]), \ + stats["translatedsourcewords"], \ + percent(stats["translatedsourcewords"], stats["totalsourcewords"]), \ + stats["translatedtargetwords"]) + print "fuzzy: %5d (%3d%%) %10d (%3d%%) n/a" % \ + (stats["fuzzy"], \ + percent(stats["fuzzy"], stats["total"]), \ + stats["fuzzysourcewords"], \ + percent(stats["fuzzysourcewords"], stats["totalsourcewords"])) + print "untranslated: %5d (%3d%%) %10d (%3d%%) n/a" % \ + (stats["untranslated"], \ + percent(stats["untranslated"], stats["total"]), \ + stats["untranslatedsourcewords"], \ + percent(stats["untranslatedsourcewords"], stats["totalsourcewords"])) + print "Total: %5d %17d %22d" % \ + (stats["total"], \ + stats["totalsourcewords"], \ + stats["translatedtargetwords"]) + if stats["review"] > 0: + print "review: %5d %17d n/a" % \ + (stats["review"], stats["reviewsourcewords"]) + print + +def fuzzymessages(units): + return filter(lambda unit: unit.isfuzzy() and unit.target, units) + +def translatedmessages(units): + return filter(lambda unit: unit.istranslated(), units) + +def untranslatedmessages(units): + return filter(lambda unit: not (unit.istranslated() or unit.isfuzzy()) and unit.source, units) + +class summarizer: + def __init__(self, filenames, CSVstyle): + self.totals = {} + self.filecount = 0 + self.CSVstyle = CSVstyle + if self.CSVstyle: + print "Filename, Translated Messages, Translated Source Words, Translated \ +Target Words, Fuzzy Messages, Fuzzy Source Words, Untranslated Messages, \ +Untranslated Source Words, Total Message, Total Source Words, \ +Review Messages, Review Source Words" + for filename in filenames: + if not os.path.exists(filename): + print >> sys.stderr, "cannot process %s: does not exist" % filename + continue + elif os.path.isdir(filename): + self.handledir(filename) + else: + self.handlefile(filename) + if self.filecount > 1 and not self.CSVstyle: + summarize("TOTAL:", self.totals) + print "File count: %5d" % (self.filecount) + print + + def updatetotals(self, stats): + """Update self.totals with the statistics in stats.""" + for key in stats.keys(): + if not self.totals.has_key(key): + self.totals[key] = 0 + self.totals[key] += stats[key] + + def handlefile(self, filename): + try: + stats = calcstats(filename) + self.updatetotals(stats) + summarize(filename, stats, self.CSVstyle) + self.filecount += 1 + except: # This happens if we have a broken file. + print >> sys.stderr, sys.exc_info()[1] + + def handlefiles(self, dirname, filenames): + for filename in filenames: + pathname = os.path.join(dirname, filename) + if os.path.isdir(pathname): + self.handledir(pathname) + else: + self.handlefile(pathname) + + def handledir(self, dirname): + path, name = os.path.split(dirname) + if name in ["CVS", ".svn", "_darcs", ".git", ".hg", ".bzr"]: + return + entries = os.listdir(dirname) + self.handlefiles(dirname, entries) + +def main(): + # TODO: make this handle command line options using optparse... + CSVstyle = False + if "--csv" in sys.argv: + sys.argv.remove("--csv") + CSVstyle = True + try: + import psyco + psyco.full() + except Exception: + pass + summarizer(sys.argv[1:], CSVstyle) + +if __name__ == '__main__': + main() diff --git a/translate-toolkit-1.3.0/translate/tools/podebug b/translate-toolkit-1.3.0/translate/tools/podebug new file mode 100755 index 0000000..f7e9952 --- /dev/null +++ b/translate-toolkit-1.3.0/translate/tools/podebug @@ -0,0 +1,27 @@ +#!/usr/bin/env python +# +# Copyright 2004 Zuza Software Foundation +# +# This file is part of translate. +# +# translate is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# translate is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with translate; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""simple script to insert debug messages into po file translations""" + +from translate.tools import podebug + +if __name__ == '__main__': + podebug.main() + diff --git a/translate-toolkit-1.3.0/translate/tools/podebug.py b/translate-toolkit-1.3.0/translate/tools/podebug.py new file mode 100644 index 0000000..59438de --- /dev/null +++ b/translate-toolkit-1.3.0/translate/tools/podebug.py @@ -0,0 +1,243 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright 2004-2006,2008 Zuza Software Foundation +# +# This file is part of translate. +# +# translate is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# translate is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with translate; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Insert debug messages into XLIFF and Gettext PO localization files + +See: http://translate.sourceforge.net/wiki/toolkit/podebug for examples and +usage instructions +""" + +from translate.storage import factory +from translate.misc.rich import map_rich, only_strings +import os +import re +from translate.misc import hash + +def add_prefix(prefix, strings): + for string in strings: + string.insert(0, prefix) + return strings + +class podebug: + def __init__(self, format=None, rewritestyle=None, hash=None, ignoreoption=None): + if format is None: + self.format = "" + else: + self.format = format + self.rewritefunc = getattr(self, "rewrite_%s" % rewritestyle, None) + self.ignorefunc = getattr(self, "ignore_%s" % ignoreoption, None) + self.hash = hash + + def rewritelist(cls): + return [rewrite.replace("rewrite_", "") for rewrite in dir(cls) if rewrite.startswith("rewrite_")] + rewritelist = classmethod(rewritelist) + + def rewrite_xxx(self, string): + if string.endswith("\n"): + return "xxx%sxxx\n" % string[:-1] + return "xxx%sxxx" % string + + def rewrite_en(self, string): + return string + + def rewrite_blank(self, string): + return "" + + def rewrite_chef(self, string): + """Rewrite using Mock Swedish as made famous by Monty Python""" + # From Dive into Python which itself got it elsewhere http://www.renderx.com/demos/examples/diveintopython.pdf + subs = ( + (r'a([nu])', r'u\1'), + (r'A([nu])', r'U\1'), + (r'a\B', r'e'), + (r'A\B', r'E'), + (r'en\b', r'ee'), + (r'\Bew', r'oo'), + (r'\Be\b', r'e-a'), + (r'\be', r'i'), + (r'\bE', r'I'), + (r'\Bf', r'ff'), + (r'\Bir', r'ur'), + (r'(\w*?)i(\w*?)$', r'\1ee\2'), + (r'\bow', r'oo'), + (r'\bo', r'oo'), + (r'\bO', r'Oo'), + (r'the', r'zee'), + (r'The', r'Zee'), + (r'th\b', r't'), + (r'\Btion', r'shun'), + (r'\Bu', r'oo'), + (r'\BU', r'Oo'), + (r'v', r'f'), + (r'V', r'F'), + (r'w', r'w'), + (r'W', r'W'), + (r'([a-z])[.]', r'\1. Bork Bork Bork!')) + for a, b in subs: + string = re.sub(a, b, string) + return string + + REWRITE_UNICODE_MAP = u"ȦƁƇḒḖƑƓĦĪĴĶĿḾȠǾƤɊŘŞŦŬṼẆẊẎẐ" + u"[\\]^_`" + u"ȧƀƈḓḗƒɠħīĵķŀḿƞǿƥɋřşŧŭṽẇẋẏẑ" + def rewrite_unicode(self, string): + """Convert to Unicode characters that look like the source string""" + def transpose(char): + loc = ord(char)-65 + if loc < 0 or loc > 56: + return char + return self.REWRITE_UNICODE_MAP[loc] + return "".join(map(transpose, string)) + + def ignorelist(cls): + return [ignore.replace("ignore_", "") for ignore in dir(cls) if ignore.startswith("ignore_")] + ignorelist = classmethod(ignorelist) + + def ignore_openoffice(self, unit): + for location in unit.getlocations(): + if location.startswith("Common.xcu#..Common.View.Localisation"): + return True + elif location.startswith("profile.lng#STR_DIR_MENU_NEW_"): + return True + elif location.startswith("profile.lng#STR_DIR_MENU_WIZARD_"): + return True + return False + + def ignore_mozilla(self, unit): + locations = unit.getlocations() + if len(locations) == 1 and locations[0].lower().endswith(".accesskey"): + return True + for location in locations: + if location.endswith(".height") or location.endswith(".width") or \ + location.endswith(".macWidth") or location.endswith(".unixWidth"): + return True + if location == "brandShortName" or location == "brandFullName" or location == "vendorShortName": + return True + if location.lower().endswith(".commandkey") or location.endswith(".key"): + return True + return False + + def ignore_gtk(self, unit): + if unit.source == "default:LTR": + return True + return False + + def ignore_kde(self, unit): + if unit.source == "LTR": + return True + return False + + def convertunit(self, unit, prefix): + if self.ignorefunc: + if self.ignorefunc(unit): + return unit + if self.hash: + if unit.getlocations(): + hashable = unit.getlocations()[0] + else: + hashable = unit.source + prefix = hash.md5_f(hashable).hexdigest()[:self.hash] + " " + if self.rewritefunc: + unit.rich_target = map_rich(only_strings(self.rewritefunc), unit.rich_source) + elif not unit.istranslated(): + unit.rich_target = unit.rich_source + unit.rich_target = add_prefix(prefix, unit.rich_target) + return unit + + def convertstore(self, store): + filename = self.shrinkfilename(store.filename) + prefix = self.format + for formatstr in re.findall("%[0-9c]*[sfFbBd]", self.format): + if formatstr.endswith("s"): + formatted = self.shrinkfilename(store.filename) + elif formatstr.endswith("f"): + formatted = store.filename + formatted = os.path.splitext(formatted)[0] + elif formatstr.endswith("F"): + formatted = store.filename + elif formatstr.endswith("b"): + formatted = os.path.basename(store.filename) + formatted = os.path.splitext(formatted)[0] + elif formatstr.endswith("B"): + formatted = os.path.basename(store.filename) + elif formatstr.endswith("d"): + formatted = os.path.dirname(store.filename) + else: + continue + formatoptions = formatstr[1:-1] + if formatoptions: + if "c" in formatoptions and formatted: + formatted = formatted[0] + filter(lambda x: x.lower() not in "aeiou", formatted[1:]) + length = filter(str.isdigit, formatoptions) + if length: + formatted = formatted[:int(length)] + prefix = prefix.replace(formatstr, formatted) + for unit in store.units: + if unit.isheader() or unit.isblank(): + continue + unit = self.convertunit(unit, prefix) + return store + + def shrinkfilename(self, filename): + if filename.startswith("." + os.sep): + filename = filename.replace("." + os.sep, "", 1) + dirname = os.path.dirname(filename) + dirparts = dirname.split(os.sep) + if not dirparts: + dirshrunk = "" + else: + dirshrunk = dirparts[0][:4] + "-" + if len(dirparts) > 1: + dirshrunk += "".join([dirpart[0] for dirpart in dirparts[1:]]) + "-" + baseshrunk = os.path.basename(filename)[:4] + if "." in baseshrunk: + baseshrunk = baseshrunk[:baseshrunk.find(".")] + return dirshrunk + baseshrunk + +def convertpo(inputfile, outputfile, templatefile, format=None, rewritestyle=None, hash=None, ignoreoption=None): + """reads in inputfile using po, changes to have debug strings, writes to outputfile""" + # note that templatefile is not used, but it is required by the converter... + inputstore = factory.getobject(inputfile) + if inputstore.isempty(): + return 0 + convertor = podebug(format=format, rewritestyle=rewritestyle, hash=hash, ignoreoption=ignoreoption) + outputstore = convertor.convertstore(inputstore) + outputfile.write(str(outputstore)) + return 1 + +def main(): + from translate.convert import convert + formats = {"po":("po", convertpo), "pot":("po", convertpo), "xlf":("xlf", convertpo)} + parser = convert.ConvertOptionParser(formats, description=__doc__) + # TODO: add documentation on format strings... + parser.add_option("-f", "--format", dest="format", default="", help="specify format string") + parser.add_option("", "--rewrite", dest="rewritestyle", + type="choice", choices=podebug.rewritelist(), metavar="STYLE", help="the translation rewrite style: %s" % ", ".join(podebug.rewritelist())) + parser.add_option("", "--ignore", dest="ignoreoption", + type="choice", choices=podebug.ignorelist(), metavar="APPLICATION", help="apply tagging ignore rules for the given application: %s" % ", ".join(podebug.ignorelist())) + parser.add_option("", "--hash", dest="hash", metavar="LENGTH", type="int", help="add an md5 hash to translations") + parser.passthrough.append("format") + parser.passthrough.append("rewritestyle") + parser.passthrough.append("ignoreoption") + parser.passthrough.append("hash") + parser.run() + + +if __name__ == '__main__': + main() diff --git a/translate-toolkit-1.3.0/translate/tools/pogrep b/translate-toolkit-1.3.0/translate/tools/pogrep new file mode 100755 index 0000000..5d959f6 --- /dev/null +++ b/translate-toolkit-1.3.0/translate/tools/pogrep @@ -0,0 +1,27 @@ +#!/usr/bin/env python +# +# Copyright 2003, 2004 Zuza Software Foundation +# +# This file is part of translate. +# +# translate is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# translate is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with translate; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""grep for localization files in various formats, eg. po and xliff""" + +from translate.tools import pogrep + +if __name__ == '__main__': + pogrep.main() + diff --git a/translate-toolkit-1.3.0/translate/tools/pogrep.py b/translate-toolkit-1.3.0/translate/tools/pogrep.py new file mode 100644 index 0000000..f27ef11 --- /dev/null +++ b/translate-toolkit-1.3.0/translate/tools/pogrep.py @@ -0,0 +1,348 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright 2002-2008 Zuza Software Foundation +# +# This file is part of translate. +# +# translate is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# translate is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with translate; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Grep XLIFF, Gettext PO and TMX localization files + +Matches are output to snippet files of the same type which can then be reviewed +and later merged using pomerge + +See: http://translate.sourceforge.net/wiki/toolkit/pogrep for examples and +usage instructions +""" + +from translate.storage import factory +from translate.misc import optrecurse +from translate.misc.multistring import multistring +from translate.lang import data +import re +import locale + + +class GrepMatch(object): + """Just a small data structure that represents a search match.""" + + # INITIALIZERS # + def __init__(self, unit, part='target', part_n=0, start=0, end=0): + self.unit = unit + self.part = part + self.part_n = part_n + self.start = start + self.end = end + + # ACCESSORS # + def get_getter(self): + if self.part == 'target': + if self.unit.hasplural(): + getter = lambda: self.unit.target.strings[self.part_n] + else: + getter = lambda: self.unit.target + return getter + elif self.part == 'source': + if self.unit.hasplural(): + getter = lambda: self.unit.source.strings[self.part_n] + else: + getter = lambda: self.unit.source + return getter + elif self.part == 'notes': + def getter(): + return self.unit.getnotes()[self.part_n] + return getter + elif self.part == 'locations': + def getter(): + return self.unit.getlocations()[self.part_n] + return getter + + def get_setter(self): + if self.part == 'target': + if self.unit.hasplural(): + def setter(value): + strings = self.unit.target.strings + strings[self.part_n] = value + self.unit.target = strings + else: + def setter(value): + self.unit.target = value + return setter + + # SPECIAL METHODS # + def __str__(self): + start, end = self.start, self.end + if start < 3: + start = 3 + if end > len(self.get_getter()()) - 3: + end = len(self.get_getter()()) - 3 + matchpart = self.get_getter()()[start-2:end+2] + return '<GrepMatch "%s" part=%s[%d] start=%d end=%d>' % (matchpart, self.part, self.part_n, self.start, self.end) + + def __repr__(self): + return str(self) + +def real_index(string, nfc_index): + """Calculate the real index in the unnormalized string that corresponds to + the index nfc_index in the normalized string.""" + length = nfc_index + max_length = len(string) + while len(data.normalize(string[:length])) <= nfc_index: + if length == max_length: + return length + length += 1 + return length - 1 + + +def find_matches(unit, part, strings, re_search): + """Return the GrepFilter objects where re_search matches in strings.""" + matches = [] + part_n = 0 + for string in strings: + normalized = data.normalize(string) + for matchobj in re_search.finditer(normalized): + start = real_index(string, matchobj.start()) + end = real_index(string, matchobj.end()) + matches.append(GrepMatch(unit, part=part, part_n=part_n, start=start, end=end)) + return matches + +class GrepFilter: + def __init__(self, searchstring, searchparts, ignorecase=False, useregexp=False, + invertmatch=False, accelchar=None, encoding='utf-8', includeheader=False, + max_matches=0): + """builds a checkfilter using the given checker""" + if isinstance(searchstring, unicode): + self.searchstring = searchstring + else: + self.searchstring = searchstring.decode(encoding) + self.searchstring = data.normalize(self.searchstring) + if searchparts: + # For now we still support the old terminology, except for the old 'source' + # which has a new meaning now. + self.search_source = ('source' in searchparts) or ('msgid' in searchparts) + self.search_target = ('target' in searchparts) or ('msgstr' in searchparts) + self.search_notes = ('notes' in searchparts) or ('comment' in searchparts) + self.search_locations = 'locations' in searchparts + else: + self.search_source = True + self.search_target = True + self.search_notes = False + self.search_locations = False + self.ignorecase = ignorecase + if self.ignorecase: + self.searchstring = self.searchstring.lower() + self.useregexp = useregexp + if self.useregexp: + self.searchpattern = re.compile(self.searchstring) + self.invertmatch = invertmatch + self.accelchar = accelchar + self.includeheader = includeheader + self.max_matches = max_matches + + def matches(self, teststr): + if teststr is None: + return False + teststr = data.normalize(teststr) + if self.ignorecase: + teststr = teststr.lower() + if self.accelchar: + teststr = re.sub(self.accelchar + self.accelchar, "#", teststr) + teststr = re.sub(self.accelchar, "", teststr) + if self.useregexp: + found = self.searchpattern.search(teststr) + else: + found = teststr.find(self.searchstring) != -1 + if self.invertmatch: + found = not found + return found + + def filterunit(self, unit): + """runs filters on an element""" + if unit.isheader(): return [] + + if self.search_source: + if isinstance(unit.source, multistring): + strings = unit.source.strings + else: + strings = [unit.source] + for string in strings: + if self.matches(string): + return True + + if self.search_target: + if isinstance(unit.target, multistring): + strings = unit.target.strings + else: + strings = [unit.target] + for string in strings: + if self.matches(string): + return True + + if self.search_notes: + return self.matches(unit.getnotes()) + if self.search_locations: + return self.matches(u" ".join(unit.getlocations())) + return False + + def filterfile(self, thefile): + """runs filters on a translation file object""" + thenewfile = type(thefile)() + thenewfile.setsourcelanguage(thefile.sourcelanguage) + thenewfile.settargetlanguage(thefile.targetlanguage) + for unit in thefile.units: + if self.filterunit(unit): + thenewfile.addunit(unit) + if self.includeheader and thenewfile.units > 0: + if thefile.units[0].isheader(): + thenewfile.units.insert(0, thefile.units[0]) + else: + thenewfile.units.insert(0, thenewfile.makeheader()) + return thenewfile + + def getmatches(self, units): + if not self.searchstring: + return [], [] + + searchstring = self.searchstring + flags = re.LOCALE | re.MULTILINE | re.UNICODE + + if self.ignorecase: + flags |= re.IGNORECASE + if not self.useregexp: + searchstring = re.escape(searchstring) + self.re_search = re.compile(u'(%s)' % (searchstring), flags) + + matches = [] + indexes = [] + + for index, unit in enumerate(units): + old_length = len(matches) + + if self.search_target: + if unit.hasplural(): + targets = unit.target.strings + else: + targets = [unit.target] + matches.extend(find_matches(unit, 'target', targets, self.re_search)) + if self.search_source: + if unit.hasplural(): + sources = unit.source.strings + else: + sources = [unit.source] + matches.extend(find_matches(unit, 'source', sources, self.re_search)) + if self.search_notes: + matches.extend(find_matches(unit, 'notes', unit.getnotes(), self.re_search)) + + if self.search_locations: + matches.extend(find_matches(unit, 'locations', unit.getlocations(), self.re_search)) + + # A search for a single letter or an all-inclusive regular + # expression could give enough results to cause performance + # problems. The answer is probably not very useful at this scale. + if self.max_matches and len(matches) > self.max_matches: + raise Exception("Too many matches found") + + if len(matches) > old_length: + old_length = len(matches) + indexes.append(index) + + return matches, indexes + +class GrepOptionParser(optrecurse.RecursiveOptionParser): + """a specialized Option Parser for the grep tool...""" + def parse_args(self, args=None, values=None): + """parses the command line options, handling implicit input/output args""" + (options, args) = optrecurse.optparse.OptionParser.parse_args(self, args, values) + # some intelligence as to what reasonable people might give on the command line + if args: + options.searchstring = args[0] + args = args[1:] + else: + self.error("At least one argument must be given for the search string") + if args and not options.input: + if not options.output: + options.input = args[:-1] + args = args[-1:] + else: + options.input = args + args = [] + if args and not options.output: + options.output = args[-1] + args = args[:-1] + if args: + self.error("You have used an invalid combination of --input, --output and freestanding args") + if isinstance(options.input, list) and len(options.input) == 1: + options.input = options.input[0] + return (options, args) + + def set_usage(self, usage=None): + """sets the usage string - if usage not given, uses getusagestring for each option""" + if usage is None: + self.usage = "%prog searchstring " + " ".join([self.getusagestring(option) for option in self.option_list]) + else: + super(GrepOptionParser, self).set_usage(usage) + + def run(self): + """parses the arguments, and runs recursiveprocess with the resulting options""" + (options, args) = self.parse_args() + options.inputformats = self.inputformats + options.outputoptions = self.outputoptions + options.checkfilter = GrepFilter(options.searchstring, options.searchparts, options.ignorecase, options.useregexp, options.invertmatch, options.accelchar, locale.getpreferredencoding(), options.includeheader) + self.usepsyco(options) + self.recursiveprocess(options) + +def rungrep(inputfile, outputfile, templatefile, checkfilter): + """reads in inputfile, filters using checkfilter, writes to outputfile""" + fromfile = factory.getobject(inputfile) + tofile = checkfilter.filterfile(fromfile) + if tofile.isempty(): + return False + outputfile.write(str(tofile)) + return True + +def cmdlineparser(): + formats = {"po":("po", rungrep), "pot":("pot", rungrep), + "mo":("mo", rungrep), "gmo":("gmo", rungrep), + "tmx":("tmx", rungrep), + "xliff":("xliff", rungrep), "xlf":("xlf", rungrep), "xlff":("xlff", rungrep), + None:("po", rungrep)} + parser = GrepOptionParser(formats) + parser.add_option("", "--search", dest="searchparts", + action="append", type="choice", choices=["source", "target", "notes", "locations", "msgid", "msgstr", "comment" ], + metavar="SEARCHPARTS", help="searches the given parts (source, target, notes and locations)") + parser.add_option("-I", "--ignore-case", dest="ignorecase", + action="store_true", default=False, help="ignore case distinctions") + parser.add_option("-e", "--regexp", dest="useregexp", + action="store_true", default=False, help="use regular expression matching") + parser.add_option("-v", "--invert-match", dest="invertmatch", + action="store_true", default=False, help="select non-matching lines") + parser.add_option("", "--accelerator", dest="accelchar", + action="store", type="choice", choices=["&", "_", "~"], + metavar="ACCELERATOR", help="ignores the given accelerator when matching") + parser.add_option("", "--header", dest="includeheader", + action="store_true", default=False, + help="include a PO header in the output") + parser.set_usage() + parser.passthrough.append('checkfilter') + parser.description = __doc__ + return parser + +def main(): + parser = cmdlineparser() + parser.run() + +if __name__ == '__main__': + main() diff --git a/translate-toolkit-1.3.0/translate/tools/pomerge b/translate-toolkit-1.3.0/translate/tools/pomerge new file mode 100755 index 0000000..e3ce59e --- /dev/null +++ b/translate-toolkit-1.3.0/translate/tools/pomerge @@ -0,0 +1,27 @@ +#!/usr/bin/env python +# +# Copyright 2004 Zuza Software Foundation +# +# This file is part of translate. +# +# translate is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# translate is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with translate; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""script that merges .po files and overrides translations""" + +from translate.tools import pomerge + +if __name__ == '__main__': + pomerge.main() + diff --git a/translate-toolkit-1.3.0/translate/tools/pomerge.py b/translate-toolkit-1.3.0/translate/tools/pomerge.py new file mode 100644 index 0000000..b4bcedc --- /dev/null +++ b/translate-toolkit-1.3.0/translate/tools/pomerge.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright 2002-2006 Zuza Software Foundation +# +# This file is part of translate. +# +# translate is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# translate is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with translate; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Merges XLIFF and Gettext PO localization files + +Snippet file produced by pogrep or updated by a translator can be merged into +existing files + +See: http://translate.sourceforge.net/wiki/toolkit/pomerge for examples and +usage instructions +""" + +import sys +from translate.storage import factory +from translate.storage import po +from translate.storage import xliff +from translate.storage.poheader import poheader + +def mergestores(store1, store2, mergeblanks, mergecomments): + """Take any new translations in store2 and write them into store1.""" + + for unit2 in store2.units: + if unit2.isheader(): + if isinstance(store1, poheader): + store1.mergeheaders(store2) + # Skip header units + continue + # there may be more than one entity due to msguniq merge + entities = unit2.getlocations() + if len(entities) == 0: + source = unit2.source + unit1 = None + if source in store1.sourceindex: + unit1 = store1.sourceindex[source] + if unit1 is None: + sys.stderr.write(str(unit2) + "\n") + else: + # finally set the new definition in unit1 + unit1.merge(unit2, overwrite=True) + for entity in entities: + unit1 = None + if store1.locationindex.has_key(entity): + # now we need to replace the definition of entity with msgstr + unit1 = store1.locationindex[entity] # find the other po + # check if this is a duplicate in store2... + if store2.locationindex.has_key(entity): + if store2.locationindex[entity] is None: + unit1 = None + # if locationindex was not unique, use the sourceindex + if unit1 is None: + source = unit2.source + if source in store1.sourceindex: + unit1 = store1.sourceindex[source] + # check if we found a matching po element + if unit1 is None: + print >> sys.stderr, "# the following po element was not found" + sys.stderr.write(str(unit2) + "\n") + else: + if not mergeblanks: + target = unit2.target + if len(target.strip()) == 0: continue + # finally set the new definition in unit1 + unit1.merge(unit2, overwrite=True, comments=mergecomments) + return store1 + +def str2bool(option): + """Convert a string value to boolean + + @param option: yes, true, 1, no, false, 0 + @type option: String + @rtype: Boolean + + """ + option = option.lower() + if option in ("yes", "true", "1"): + return True + elif option in ("no", "false", "0"): + return False + else: + raise ValueError("invalid boolean value: %r" % option) + +def mergestore(inputfile, outputfile, templatefile, mergeblanks="no", mergecomments="yes"): + try: + mergecomments = str2bool(mergecomments) + except ValueError: + raise ValueError("invalid mergecomments value: %r" % mergecomments) + try: + mergeblanks = str2bool(mergeblanks) + except ValueError: + raise ValueError("invalid mergeblanks value: %r" % mergeblanks) + inputstore = factory.getobject(inputfile) + if templatefile is None: + # just merge nothing + templatestore = type(inputstore)() + else: + templatestore = factory.getobject(templatefile) + templatestore.makeindex() + inputstore.makeindex() + outputstore = mergestores(templatestore, inputstore, mergeblanks, mergecomments) + if outputstore.isempty(): + return 0 + outputfile.write(str(outputstore)) + return 1 + +def main(): + from translate.convert import convert + pooutput = ("po", mergestore) + potoutput = ("pot", mergestore) + xliffoutput = ("xlf", mergestore) + formats = {("po", "po"): pooutput, ("po", "pot"): pooutput, ("pot", "po"): pooutput, ("pot", "pot"): potoutput, + "po": pooutput, "pot": pooutput, + ("xlf", "po"): pooutput, ("xlf", "pot"): pooutput, + ("xlf", "xlf"): xliffoutput, ("po", "xlf"): xliffoutput} + mergeblanksoption = convert.optparse.Option("", "--mergeblanks", dest="mergeblanks", + action="store", default="yes", help="whether to overwrite existing translations with blank translations (yes/no)") + mergecommentsoption = convert.optparse.Option("", "--mergecomments", dest="mergecomments", + action="store", default="yes", help="whether to merge comments as well as translations (yes/no)") + parser = convert.ConvertOptionParser(formats, usetemplates=True, description=__doc__) + parser.add_option(mergeblanksoption) + parser.passthrough.append("mergeblanks") + parser.add_option(mergecommentsoption) + parser.passthrough.append("mergecomments") + parser.run() + + +if __name__ == '__main__': + main() diff --git a/translate-toolkit-1.3.0/translate/tools/porestructure b/translate-toolkit-1.3.0/translate/tools/porestructure new file mode 100755 index 0000000..aa70883 --- /dev/null +++ b/translate-toolkit-1.3.0/translate/tools/porestructure @@ -0,0 +1,27 @@ +#!/usr/bin/env python +# +# Copyright 2005 Zuza Software Foundation +# +# This file is part of translate. +# +# translate is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# translate is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with translate; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""tool to restructure po files according to poconflicts directives""" + +from translate.tools import porestructure + +if __name__ == '__main__': + porestructure.main() + diff --git a/translate-toolkit-1.3.0/translate/tools/porestructure.py b/translate-toolkit-1.3.0/translate/tools/porestructure.py new file mode 100644 index 0000000..d4c55a3 --- /dev/null +++ b/translate-toolkit-1.3.0/translate/tools/porestructure.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright 2005, 2006 Zuza Software Foundation +# +# This file is part of translate. +# +# translate is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# translate is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with translate; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Restructure Gettxt PO files produced by poconflicts into the original +directory tree for merging using pomerge + +See: http://translate.sourceforge.net/wiki/toolkit/porestructure for examples and +usage instructions +""" + +import sys, os +from translate.storage import po +from translate.misc import optrecurse + +class SplitOptionParser(optrecurse.RecursiveOptionParser): + """a specialized Option Parser for posplit""" + def parse_args(self, args=None, values=None): + """parses the command line options, handling implicit input/output args""" + (options, args) = optrecurse.RecursiveOptionParser.parse_args(self, args, values) + if not options.output: + self.error("Output file is rquired") + return (options, args) + + def set_usage(self, usage=None): + """sets the usage string - if usage not given, uses getusagestring for each option""" + if usage is None: + self.usage = "%prog " + " ".join([self.getusagestring(option) for option in self.option_list]) + \ + "\n input directory is searched for PO files with (poconflicts) comments, all entries are written to files in a directory structure for pomerge" + else: + super(SplitOptionParser, self).set_usage(usage) + + def recursiveprocess(self, options): + """recurse through directories and process files""" + if not self.isrecursive(options.output, 'output'): + try: + self.warning("Output directory does not exist. Attempting to create") + #TODO: maybe we should only allow it to be created, otherwise we mess up an existing tree... + os.mkdir(options.output) + except: + self.error(optrecurse.optparse.OptionValueError("Output directory does not exist, attempt to create failed")) + if self.isrecursive(options.input, 'input') and getattr(options, "allowrecursiveinput", True): + if isinstance(options.input, list): + inputfiles = self.recurseinputfilelist(options) + else: + inputfiles = self.recurseinputfiles(options) + else: + if options.input: + inputfiles = [os.path.basename(options.input)] + options.input = os.path.dirname(options.input) + else: + inputfiles = [options.input] + self.textmap = {} + self.initprogressbar(inputfiles, options) + for inputpath in inputfiles: + fullinputpath = self.getfullinputpath(options, inputpath) + try: + success = self.processfile(options, fullinputpath) + except Exception, error: + if isinstance(error, KeyboardInterrupt): + raise self.warning("Error processing: input %s" % (fullinputpath), options, sys.exc_info()) + success = False + self.reportprogress(inputpath, success) + del self.progressbar + + def processfile(self, options, fullinputpath): + """process an individual file""" + inputfile = self.openinputfile(options, fullinputpath) + inputpofile = po.pofile(inputfile) + for pounit in inputpofile.units: + if not (pounit.isheader() or pounit.hasplural()): #XXX + if pounit.hasmarkedcomment("poconflicts"): + for comment in pounit.othercomments: + if comment.find("# (poconflicts)") == 0: + pounit.othercomments.remove(comment) + break + #TODO: refactor writing out + outputpath = comment[comment.find(")") + 2:].strip() + self.checkoutputsubdir(options, os.path.dirname(outputpath)) + fulloutputpath = os.path.join(options.output, outputpath) + if os.path.isfile(fulloutputpath): + outputfile = open(fulloutputpath, 'r') + outputpofile = po.pofile(outputfile) + else: + outputpofile = po.pofile() + outputpofile.units.append(pounit) #TODO:perhaps check to see if it's already there... + outputfile = open(fulloutputpath, 'w') + outputfile.write(str(outputpofile)) + +def main(): + #outputfile extentions will actually be determined by the comments in the po files + pooutput = ("po", None) + formats = {(None, None): pooutput, ("po", "po"): pooutput, "po": pooutput} + parser = SplitOptionParser(formats, description=__doc__) + parser.set_usage() + parser.run() + + +if __name__ == '__main__': + main() diff --git a/translate-toolkit-1.3.0/translate/tools/posegment b/translate-toolkit-1.3.0/translate/tools/posegment new file mode 100755 index 0000000..237f5eb --- /dev/null +++ b/translate-toolkit-1.3.0/translate/tools/posegment @@ -0,0 +1,27 @@ +#!/usr/bin/env python +# +# Copyright 2007 Zuza Software Foundation +# +# This file is part of translate. +# +# translate is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# translate is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with translate; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Segment PO files at the sentence level""" + +from translate.tools import posegment + +if __name__ == '__main__': + posegment.main() + diff --git a/translate-toolkit-1.3.0/translate/tools/posegment.py b/translate-toolkit-1.3.0/translate/tools/posegment.py new file mode 100644 index 0000000..a69bc10 --- /dev/null +++ b/translate-toolkit-1.3.0/translate/tools/posegment.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright 2007 Zuza Software Foundation +# +# This file is part of translate. +# +# translate is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# translate is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with translate; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Segment Gettext PO, XLIFF and TMX localization files at the sentence level + +See: http://translate.sourceforge.net/wiki/toolkit/posegment for examples and +usage instructions +""" + +from translate.storage import factory +from translate.lang import factory as lang_factory +import os +import re + +class segment: + + def __init__(self, sourcelang, targetlang, stripspaces=True): + self.sourcelang = sourcelang + self.targetlang = targetlang + self.stripspaces = stripspaces + + def segmentunit(self, unit): + if unit.isheader() or unit.hasplural(): + return [unit] + sourcesegments = self.sourcelang.sentences(unit.source, strip=self.stripspaces) + targetsegments = self.targetlang.sentences(unit.target, strip=self.stripspaces) + if unit.istranslated() and (len(sourcesegments) != len(targetsegments)): + return [unit] + units = [] + for i in range(len(sourcesegments)): + newunit = unit.copy() + newunit.source = sourcesegments[i] + if not unit.istranslated(): + newunit.target = "" + else: + newunit.target = targetsegments[i] + units.append(newunit) + return units + + def convertstore(self, fromstore): + tostore = type(fromstore)() + for unit in fromstore.units: + newunits = self.segmentunit(unit) + for newunit in newunits: + tostore.addunit(newunit) + return tostore + +def segmentfile(inputfile, outputfile, templatefile, sourcelanguage="en", targetlanguage=None, stripspaces=True): + """reads in inputfile, segments it then, writes to outputfile""" + # note that templatefile is not used, but it is required by the converter... + inputstore = factory.getobject(inputfile) + if inputstore.isempty(): + return 0 + sourcelang = lang_factory.getlanguage(sourcelanguage) + targetlang = lang_factory.getlanguage(targetlanguage) + convertor = segment(sourcelang, targetlang, stripspaces=stripspaces) + outputstore = convertor.convertstore(inputstore) + outputfile.write(str(outputstore)) + return 1 + +def main(): + from translate.convert import convert + formats = {"po":("po", segmentfile), "xlf":("xlf", segmentfile), "tmx": ("tmx", segmentfile)} + parser = convert.ConvertOptionParser(formats, usepots=True, description=__doc__) + parser.add_option("-l", "--language", dest="targetlanguage", default=None, + help="the target language code", metavar="LANG") + parser.add_option("", "--source-language", dest="sourcelanguage", default=None, + help="the source language code (default 'en')", metavar="LANG") + parser.passthrough.append("sourcelanguage") + parser.passthrough.append("targetlanguage") + parser.add_option("", "--keepspaces", dest="stripspaces", action="store_false", + default=True, help="Disable automatic stripping of whitespace") + parser.passthrough.append("stripspaces") + parser.run() + + +if __name__ == '__main__': + main() diff --git a/translate-toolkit-1.3.0/translate/tools/poswap b/translate-toolkit-1.3.0/translate/tools/poswap new file mode 100755 index 0000000..2dcc52c --- /dev/null +++ b/translate-toolkit-1.3.0/translate/tools/poswap @@ -0,0 +1,28 @@ +#!/usr/bin/env python +# +# Copyright 2007 Zuza Software Foundation +# +# This file is part of translate. +# +# translate is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# translate is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with translate; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Builds a new translation file with the target of the input language as +source language.""" + +from translate.tools import poswap + +if __name__ == '__main__': + poswap.main() + diff --git a/translate-toolkit-1.3.0/translate/tools/poswap.py b/translate-toolkit-1.3.0/translate/tools/poswap.py new file mode 100644 index 0000000..f242b71 --- /dev/null +++ b/translate-toolkit-1.3.0/translate/tools/poswap.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright 2007 Zuza Software Foundation +# +# This file is part of translate. +# +# translate is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# translate is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with translate; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Builds a new translation file with the target of the input language as +source language. + +Ensure that the two po files correspond 100% to the same pot file before using +this. + +To translate Kurdish (ku) through French:: + po2swap -i fr/ -t ku -o fr-ku + +To convert the fr-ku files back to en-ku:: + po2swap --reverse -i fr/ -t fr-ku -o en-ku + +See: http://translate.sourceforge.net/wiki/toolkit/poswap for further examples and +usage instructions +""" + +from translate.storage import po +from translate.convert import convert + +def swapdir(store): + """Swap the source and target of each unit.""" + for unit in store.units: + if unit.isheader(): + continue + if not unit.target or unit.isfuzzy(): + unit.target = unit.source + else: + unit.source, unit.target = unit.target, unit.source + +def convertpo(inputpofile, outputpotfile, template, reverse=False): + """reads in inputpofile, removes the header, writes to outputpotfile.""" + inputpo = po.pofile(inputpofile) + templatepo = po.pofile(template) + if reverse: + swapdir(inputpo) + templatepo.makeindex() + header = inputpo.header() + if header: + inputpo.units = inputpo.units[1:] + + for i, unit in enumerate(inputpo.units): + for location in unit.getlocations(): + templateunit = templatepo.locationindex.get(location, None) + if templateunit and templateunit.source == unit.source: + break + else: + templateunit = templatepo.findunit(unit.source) + + unit.othercomments = [] + if unit.target and not unit.isfuzzy(): + unit.source = unit.target + elif not reverse: + if inputpo.filename: + unit.addnote("No translation found in %s" % inputpo.filename, origin="programmer") + else: + unit.addnote("No translation found in the supplied source language", origin="programmer") + unit.target = "" + unit.markfuzzy(False) + if templateunit: + unit.addnote(templateunit.getnotes(origin="translator")) + unit.markfuzzy(templateunit.isfuzzy()) + unit.target = templateunit.target + if unit.isobsolete(): + del inputpo.units[i] + outputpotfile.write(str(inputpo)) + return 1 + +def main(argv=None): + formats = {("po", "po"): ("po", convertpo), ("po", "pot"): ("po", convertpo), "po": ("po", convertpo)} + parser = convert.ConvertOptionParser(formats, usetemplates=True, description=__doc__) + parser.add_option("", "--reverse", dest="reverse", default=False, action="store_true", + help="reverse the process of intermediate language conversion") + parser.passthrough.append("reverse") + parser.run(argv) + +if __name__ == '__main__': + main() + diff --git a/translate-toolkit-1.3.0/translate/tools/poterminology b/translate-toolkit-1.3.0/translate/tools/poterminology new file mode 100755 index 0000000..38431c3 --- /dev/null +++ b/translate-toolkit-1.3.0/translate/tools/poterminology @@ -0,0 +1,28 @@ +#!/usr/bin/python +# +# Copyright 2008 Zuza Software Foundation +# +# This file is part of the translate-toolkit +# +# translate is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# translate is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with translate; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""reads a set of .po or .pot files to produce a pootle-terminology.pot""" + +from translate.tools import poterminology + +if __name__ == '__main__': + poterminology.main() + + diff --git a/translate-toolkit-1.3.0/translate/tools/poterminology.py b/translate-toolkit-1.3.0/translate/tools/poterminology.py new file mode 100644 index 0000000..d63387b --- /dev/null +++ b/translate-toolkit-1.3.0/translate/tools/poterminology.py @@ -0,0 +1,458 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# This file is part of translate. +# +# translate is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# translate is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with translate; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""reads a set of .po or .pot files to produce a pootle-terminology.pot + +See: http://translate.sourceforge.net/wiki/toolkit/poterminology for examples and +usage instructions +""" + +from translate.lang import factory as lang_factory +from translate.misc import optrecurse +from translate.storage import po +from translate.storage import factory +from translate.misc import file_discovery +import os +import re +import sys + +class TerminologyOptionParser(optrecurse.RecursiveOptionParser): + """a specialized Option Parser for the terminology tool...""" + + # handles c-format and python-format + formatpat = re.compile(r"%(?:\([^)]+\)|[0-9]+\$)?[-+#0]*[0-9.*]*(?:[hlLzjt][hl])?[EFGXc-ginoprsux]") + # handles XML/HTML elements (<foo>text</foo> => text) + xmlelpat = re.compile(r"<(?:![[-]|[/?]?[A-Za-z_:])[^>]*>") + # handles XML/HTML entities (    & &my_entity;) + xmlentpat = re.compile(r"&(?:#(?:[0-9]+|x[0-9a-f]+)|[a-z_:][\w.-:]*);", + flags=re.UNICODE|re.IGNORECASE) + + sortorders = [ "frequency", "dictionary", "length" ] + + files = 0 + units = 0 + + def parse_args(self, args=None, values=None): + """parses the command line options, handling implicit input/output args""" + (options, args) = optrecurse.optparse.OptionParser.parse_args(self, args, values) + # some intelligence as to what reasonable people might give on the command line + if args and not options.input: + if not options.output and not options.update and len(args) > 1: + options.input = args[:-1] + args = args[-1:] + else: + options.input = args + args = [] + # don't overwrite last freestanding argument file, to avoid accidents + # due to shell wildcard expansion + if args and not options.output and not options.update: + if os.path.lexists(args[-1]) and not os.path.isdir(args[-1]): + self.error("To overwrite %s, specify it with -o/--output or -u/--update" % (args[-1])) + options.output = args[-1] + args = args[:-1] + if options.output and options.update: + self.error("You cannot use both -u/--update and -o/--output") + if args: + self.error("You have used an invalid combination of -i/--input, -o/--output, -u/--update and freestanding args") + if isinstance(options.input, list) and len(options.input) == 1: + options.input = options.input[0] + if options.inputmin == None: + options.inputmin = 1 + elif not isinstance(options.input, list) and not os.path.isdir(options.input): + if options.inputmin == None: + options.inputmin = 1 + elif options.inputmin == None: + options.inputmin = 2 + if options.update: + options.output = options.update + if isinstance(options.input, list): + options.input.append(options.update) + elif options.input: + options.input = [options.input, options.update] + else: + options.input = options.update + if not options.output: + options.output = "pootle-terminology.pot" + return (options, args) + + def set_usage(self, usage=None): + """sets the usage string - if usage not given, uses getusagestring for each option""" + if usage is None: + self.usage = "%prog " + " ".join([self.getusagestring(option) for option in self.option_list]) + \ + "\n input directory is searched for PO files, terminology PO file is output file" + else: + super(TerminologyOptionParser, self).set_usage(usage) + + def run(self): + """parses the arguments, and runs recursiveprocess with the resulting options""" + (options, args) = self.parse_args() + options.inputformats = self.inputformats + options.outputoptions = self.outputoptions + self.usepsyco(options) + self.recursiveprocess(options) + + def recursiveprocess(self, options): + """recurse through directories and process files""" + if self.isrecursive(options.input, 'input') and getattr(options, "allowrecursiveinput", True): + if isinstance(options.input, list): + inputfiles = self.recurseinputfilelist(options) + else: + inputfiles = self.recurseinputfiles(options) + else: + if options.input: + inputfiles = [os.path.basename(options.input)] + options.input = os.path.dirname(options.input) + else: + inputfiles = [options.input] + if os.path.isdir(options.output): + options.output = os.path.join(options.output,"pootle-terminology.pot") + # load default stopfile if no -S options were given + if self.defaultstopfile: + parse_stopword_file(None, "-S", self.defaultstopfile, self) + self.glossary = {} + self.initprogressbar(inputfiles, options) + for inputpath in inputfiles: + self.files += 1 + fullinputpath = self.getfullinputpath(options, inputpath) + success = True + try: + self.processfile(None, options, fullinputpath) + except Exception, error: + if isinstance(error, KeyboardInterrupt): + raise + self.warning("Error processing: input %s" % (fullinputpath), options, sys.exc_info()) + success = False + self.reportprogress(inputpath, success) + del self.progressbar + self.outputterminology(options) + + def clean(self, string, options): + """returns the cleaned string that contains the text to be matched""" + for accelerator in options.accelchars: + string = string.replace(accelerator, "") + string = self.formatpat.sub(" ", string) + string = self.xmlelpat.sub(" ", string) + string = self.xmlentpat.sub(" ", string) + string = string.strip() + return string + + def stopmap(self, word): + """return case-mapped stopword for input word""" + if self.stopignorecase or (self.stopfoldtitle and word.istitle()): + word = word.lower() + return word + + def stopword(self, word, defaultset=frozenset()): + """return stoplist frozenset for input word""" + return self.stopwords.get(self.stopmap(word),defaultset) + + def addphrases(self, words, skips, translation, partials=True): + """adds (sub)phrases with non-skipwords and more than one word""" + if (len(words) > skips + 1 and + 'skip' not in self.stopword(words[0]) and + 'skip' not in self.stopword(words[-1])): + self.glossary.setdefault(' '.join(words), []).append(translation) + if partials: + part = list(words) + while len(part) > 2: + if 'skip' in self.stopword(part.pop()): + skips -= 1 + if (len(part) > skips + 1 and + 'skip' not in self.stopword(part[0]) and + 'skip' not in self.stopword(part[-1])): + self.glossary.setdefault(' '.join(part), []).append(translation) + + def processfile(self, fileprocessor, options, fullinputpath): + """process an individual file""" + inputfile = self.openinputfile(options, fullinputpath) + inputfile = factory.getobject(inputfile) + sourcelang = lang_factory.getlanguage(options.sourcelanguage) + rematchignore = frozenset(('word','phrase')) + defaultignore = frozenset() + for unit in inputfile.units: + self.units += 1 + if unit.isheader(): + continue + if unit.hasplural(): + continue + if not options.invert: + source = self.clean(unit.source, options) + target = self.clean(unit.target, options) + else: + target = self.clean(unit.source, options) + source = self.clean(unit.target, options) + if len(source) <= 1: + continue + for sentence in sourcelang.sentences(source): + words = [] + skips = 0 + for word in sourcelang.words(sentence): + stword = self.stopmap(word) + if options.ignorecase or (options.foldtitle and word.istitle()): + word = word.lower() + ignore = defaultignore + if stword in self.stopwords: + ignore = self.stopwords[stword] + else: + for stopre in self.stoprelist: + if stopre.match(stword) != None: + ignore = rematchignore + break + translation = (source, target, unit, fullinputpath) + if 'word' not in ignore: + # reduce plurals + root = word + if len(word) > 3 and word[-1] == 's' and word[0:-1] in self.glossary: + root = word[0:-1] + elif len(root) > 2 and root + 's' in self.glossary: + self.glossary[root] = self.glossary.pop(root + 's') + self.glossary.setdefault(root, []).append(translation) + if options.termlength > 1: + if 'phrase' in ignore: + # add trailing phrases in previous words + while len(words) > 2: + if 'skip' in self.stopword(words.pop(0)): + skips -= 1 + self.addphrases(words, skips, translation) + words = [] + skips = 0 + else: + words.append(word) + if 'skip' in ignore: + skips += 1 + if len(words) > options.termlength + skips: + while len(words) > options.termlength + skips: + if 'skip' in self.stopword(words.pop(0)): + skips -= 1 + self.addphrases(words, skips, translation) + else: + self.addphrases(words, skips, translation, partials=False) + if options.termlength > 1: + # add trailing phrases in sentence after reaching end + while options.termlength > 1 and len(words) > 2: + + if 'skip' in self.stopword(words.pop(0)): + skips -= 1 + self.addphrases(words, skips, translation) + + def outputterminology(self, options): + """saves the generated terminology glossary""" + termfile = po.pofile() + terms = {} + locre = re.compile(r":[0-9]+$") + print >> sys.stderr, ("%d terms from %d units in %d files" % + (len(self.glossary), self.units, self.files)) + for term, translations in self.glossary.iteritems(): + if len(translations) <= 1: + continue + filecounts = {} + sources = {} + termunit = po.pounit(term) + locations = {} + sourcenotes = {} + transnotes = {} + targets = {} + fullmsg = False + for source, target, unit, filename in translations: + sources[source] = 1 + filecounts[filename] = filecounts.setdefault(filename, 0) + 1 + if term.lower() == self.clean(unit.source, options).lower(): + fullmsg = True + target = self.clean(unit.target, options) + if options.ignorecase or (options.foldtitle and target.istitle()): + target = target.lower() + unit.settarget(target) + if target != "": + targets.setdefault(target, []).append(filename) + if term.lower() == unit.source.strip().lower(): + sourcenotes[unit.getnotes("source code")] = None + transnotes[unit.getnotes("translator")] = None + else: + unit.settarget("") + unit.setsource(term) + termunit.merge(unit, overwrite=False, comments=False) + for loc in unit.getlocations(): + locations.setdefault(locre.sub("", loc)) + numsources = len(sources) + numfiles = len(filecounts) + numlocs = len(locations) + if numfiles < options.inputmin or numlocs < options.locmin: + continue + if fullmsg: + if numsources < options.fullmsgmin: + continue + elif numsources < options.substrmin: + continue + if len(targets.keys()) > 1: + txt = '; '.join(["%s {%s}" % (target, ', '.join(files)) + for target, files in targets.iteritems()]) + if termunit.gettarget().find('};') < 0: + termunit.settarget(txt) + termunit.markfuzzy() + else: + # if annotated multiple terms already present, keep as-is + termunit.addnote(txt, "translator") + locmax = 2 * options.locmin + if numlocs > locmax: + for location in locations.keys()[0:locmax]: + termunit.addlocation(location) + termunit.addlocation("(poterminology) %d more locations" + % (numlocs - locmax)) + else: + for location in locations.keys(): + termunit.addlocation(location) + for sourcenote in sourcenotes.keys(): + termunit.addnote(sourcenote, "source code") + for transnote in transnotes.keys(): + termunit.addnote(transnote, "translator") + for filename, count in filecounts.iteritems(): + termunit.othercomments.append("# (poterminology) %s (%d)\n" % (filename, count)) + terms[term] = (((10 * numfiles) + numsources, termunit)) + # reduce subphrase + termlist = terms.keys() + print >> sys.stderr, "%d terms after thresholding" % len(termlist) + termlist.sort(lambda x, y: cmp(len(x), len(y))) + for term in termlist: + words = term.split() + if len(words) <= 2: + continue + while len(words) > 2: + words.pop() + if terms[term][0] == terms.get(' '.join(words), [0])[0]: + del terms[' '.join(words)] + words = term.split() + while len(words) > 2: + words.pop(0) + if terms[term][0] == terms.get(' '.join(words), [0])[0]: + del terms[' '.join(words)] + print >> sys.stderr, "%d terms after subphrase reduction" % len(terms.keys()) + termitems = terms.values() + if options.sortorders == None: + options.sortorders = self.sortorders + while len(options.sortorders) > 0: + order = options.sortorders.pop() + if order == "frequency": + termitems.sort(lambda x, y: cmp(y[0], x[0])) + elif order == "dictionary": + termitems.sort(lambda x, y: cmp(x[1].source.lower(), y[1].source.lower())) + elif order == "length": + termitems.sort(lambda x, y: cmp(len(x[1].source), len(y[1].source))) + else: + self.warning("unknown sort order %s" % order, options) + for count, unit in termitems: + termfile.units.append(unit) + open(options.output, "w").write(str(termfile)) + +def fold_case_option(option, opt_str, value, parser): + parser.values.ignorecase = False + parser.values.foldtitle = True + +def preserve_case_option(option, opt_str, value, parser): + parser.values.ignorecase = parser.values.foldtitle = False + +def parse_stopword_file(option, opt_str, value, parser): + + actions = { '+': frozenset(), ':': frozenset(['skip']), + '<': frozenset(['phrase']), '=': frozenset(['word']), + '>': frozenset(['word','skip']), + '@': frozenset(['word','phrase']) } + + stopfile = open(value, "r") + line = 0 + try: + for stopline in stopfile: + line += 1 + stoptype = stopline[0] + if stoptype == '#' or stoptype == "\n": + continue + elif stoptype == '!': + if stopline[1] == 'C': + parser.stopfoldtitle = False + parser.stopignorecase = False + elif stopline[1] == 'F': + parser.stopfoldtitle = True + parser.stopignorecase = False + elif stopline[1] == 'I': + parser.stopignorecase = True + else: + parser.warning("%s line %d - bad case mapping directive" % (value, line), parser.values, ("", stopline[:2])) + elif stoptype == '/': + parser.stoprelist.append(re.compile(stopline[1:-1]+'$')) + else: + parser.stopwords[stopline[1:-1]] = actions[stoptype] + except KeyError, character: + parser.warning("%s line %d - bad stopword entry starts with" % (value, line), parser.values, sys.exc_info()) + parser.warning("%s line %d" % (value, line + 1), parser.values, ("", "all lines after error ignored" )) + stopfile.close() + parser.defaultstopfile = None + +def main(): + formats = {"po":("po", None), "pot": ("pot", None), None:("po", None)} + parser = TerminologyOptionParser(formats) + + parser.add_option("-u", "--update", type="string", dest="update", + metavar="UPDATEFILE", help="update terminology in UPDATEFILE") + + parser.stopwords = {} + parser.stoprelist = [] + parser.stopfoldtitle = True + parser.stopignorecase = False + parser.defaultstopfile = file_discovery.get_abs_data_filename('stoplist-en') + parser.add_option("-S", "--stopword-list", type="string", metavar="STOPFILE", + action="callback", callback=parse_stopword_file, + help="read stopword (term exclusion) list from STOPFILE (default %s)" % parser.defaultstopfile, + default=parser.defaultstopfile) + + parser.set_defaults(foldtitle = True, ignorecase = False) + parser.add_option("-F", "--fold-titlecase", callback=fold_case_option, + action="callback", help="fold \"Title Case\" to lowercase (default)") + parser.add_option("-C", "--preserve-case", callback=preserve_case_option, + action="callback", help="preserve all uppercase/lowercase") + parser.add_option("-I", "--ignore-case", dest="ignorecase", + action="store_true", help="make all terms lowercase") + + parser.add_option("", "--accelerator", dest="accelchars", default="", + metavar="ACCELERATORS", help="ignores the given accelerator characters when matching") + + parser.add_option("-t", "--term-words", type="int", dest="termlength", default="3", + help="generate terms of up to LENGTH words (default 3)", metavar="LENGTH") + parser.add_option("", "--inputs-needed", type="int", dest="inputmin", + help="omit terms appearing in less than MIN input files (default 2, or 1 if only one input file)", metavar="MIN") + parser.add_option("", "--fullmsg-needed", type="int", dest="fullmsgmin", default="1", + help="omit full message terms appearing in less than MIN different messages (default 1)", metavar="MIN") + parser.add_option("", "--substr-needed", type="int", dest="substrmin", default="2", + help="omit substring-only terms appearing in less than MIN different messages (default 2)", metavar="MIN") + parser.add_option("", "--locs-needed", type="int", dest="locmin", default="2", + help="omit terms appearing in less than MIN different original source files (default 2)", metavar="MIN") + + parser.add_option("", "--sort", dest="sortorders", action="append", + type="choice", choices=parser.sortorders, metavar="ORDER", + help="output sort order(s): %s (default is all orders in the above priority)" % ', '.join(parser.sortorders)) + + parser.add_option("", "--source-language", dest="sourcelanguage", default="en", + help="the source language code (default 'en')", metavar="LANG") + parser.add_option("-v", "--invert", dest="invert", + action="store_true", default=False, help="invert the source and target languages for terminology") + parser.set_usage() + parser.description = __doc__ + parser.run() + + +if __name__ == '__main__': + main() diff --git a/translate-toolkit-1.3.0/translate/tools/pretranslate b/translate-toolkit-1.3.0/translate/tools/pretranslate new file mode 100755 index 0000000..620762e --- /dev/null +++ b/translate-toolkit-1.3.0/translate/tools/pretranslate @@ -0,0 +1,29 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright 2008 Zuza Software Foundation +# +# This file is part of translate. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/>. + +"""fill localization files with suggested translations based on +translation memory and existing translations +""" + +from translate.tools import pretranslate + +if __name__ == '__main__': + pretranslate.main() + diff --git a/translate-toolkit-1.3.0/translate/tools/pretranslate.py b/translate-toolkit-1.3.0/translate/tools/pretranslate.py new file mode 100644 index 0000000..451b7df --- /dev/null +++ b/translate-toolkit-1.3.0/translate/tools/pretranslate.py @@ -0,0 +1,179 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright 2008 Zuza Software Foundation +# +# This file is part of translate. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/>. + +"""fill localization files with suggested translations based on +translation memory and existing translations +""" + +from translate.storage import factory +from translate.storage import xliff +from translate.search import match + + +# We don't want to reinitialise the TM each time, so let's store it here. +tmmatcher = None + + +def memory(tmfiles, max_candidates=1, min_similarity=75, max_length=1000): + """Returns the TM store to use. Only initialises on first call.""" + global tmmatcher + # Only initialise first time + if tmmatcher is None: + if isinstance(tmfiles, list): + tmstore = [factory.getobject(tmfile) for tmfile in tmfiles] + else: + tmstore = factory.getobject(tmfiles) + tmmatcher = match.matcher(tmstore, max_candidates=max_candidates, min_similarity=min_similarity, max_length=max_length) + return tmmatcher + + +def pretranslate_file(input_file, output_file, template_file, tm=None, min_similarity=75, fuzzymatching=True): + """pretranslate any factory supported file with old translations and translation memory""" + input_store = factory.getobject(input_file) + template_store = None + if template_file is not None: + template_store = factory.getobject(template_file) + + output = pretranslate_store(input_store, template_store, tm, min_similarity, fuzzymatching) + output_file.write(str(output)) + return 1 + + +def match_template_id (input_unit, template_store): + """returns a matching unit from a template""" + #since oo2po and moz2po use localtion as unique identifiers for strings + #we match against location first, then check for matching source strings + #FIXME: this makes absolutely no sense for other po files + for location in input_unit.getlocations(): + matching_unit = template_store.locationindex.get(location, None) + #do we really want to discard units with matching locations but no matching source? + if matching_unit is not None and matching_unit.source == input_unit.source and len(matching_unit.target) > 0: + return matching_unit + else: + #if no match by location information search for identical source strings + #FIXME: need a better method for matching strings, we don't take context into account + #FIXME: need a better test for when not to use location info for matching + return template_store.findunit(input_unit.source) + + +def match_fuzzy(input_unit, matchers): + """returns a fuzzy match from a queue of matchers""" + for matcher in matchers: + fuzzycandidates = matcher.matches(input_unit.source) + if fuzzycandidates: + return fuzzycandidates[0] + + +def pretranslate_unit(input_unit, template_store, matchers=None, mark_reused=False) : + """returns a pretranslated unit, if no translation was found return input unit unchanged""" + + matching_unit = None + #do template matching + if template_store: + matching_unit = match_template_id(input_unit, template_store) + + if matching_unit and len(matching_unit.target) > 0: + input_unit.merge(matching_unit, authoritative=True) + elif matchers: + #do fuzzy matching + matching_unit = match_fuzzy(input_unit, matchers) + if matching_unit and len(matching_unit.target) > 0: + #FIXME: should we dispatch here instead of this crude type check + if isinstance(input_unit, xliff.xliffunit): + #FIXME: what about origin, lang and matchquality + input_unit.addalttrans(matching_unit.target, origin="fish", sourcetxt=matching_unit.source) + else: + input_unit.merge(matching_unit, authoritative=True) + + #FIXME: ugly hack required by pot2po to mark old + #translations reused for new file. loops over + if mark_reused and matching_unit and template_store: + original_unit = template_store.findunit(matching_unit.source) + if original_unit is not None: + original_unit.reused = True + + return input_unit + +def prepare_template_pofile(template_store): + """po format specific template preparation logic""" + #do we want to consider obsolete translations? + for unit in template_store.units: + if unit.isobsolete(): + unit.resurrect() + + +def pretranslate_store(input_store, template_store, tm=None, min_similarity=75, fuzzymatching=True): + """does the actual pretranslation""" + #preperation + matchers = [] + #prepare template + if template_store is not None: + template_store.makeindex() + #template preparation based on type + prepare_template = "prepare_template_%s" % template_store.__class__.__name__ + if globals().has_key(prepare_template): + globals()[prepare_template](template_store) + + if fuzzymatching: + #create template matcher + #FIXME: max_length hardcoded + matcher = match.matcher(template_store, max_candidates=1, min_similarity=min_similarity, max_length=3000, usefuzzy=True) + matcher.addpercentage = False + matchers.append(matcher) + + #prepare tm + #create tm matcher + if tm and fuzzymatching: + #FIXME: max_length hardcoded + matcher = memory(tm, max_candidates=1, min_similarity=min_similarity, max_length=1000) + matcher.addpercentage = False + matchers.append(matcher) + + #main loop + for input_unit in input_store.units: + if input_unit.istranslatable(): + input_unit = pretranslate_unit(input_unit, template_store, matchers) + + return input_store + + +def main(argv=None): + from translate.convert import convert + formats = {"pot": ("po", pretranslate_file), ("pot", "po"): ("po", pretranslate_file), + "po": ("po", pretranslate_file), ("po", "po"): ("po", pretranslate_file), + "xlf": ("xlf", pretranslate_file), ("xlf", "xlf"): ("xlf", pretranslate_file), + } + parser = convert.ConvertOptionParser(formats, usetemplates=True, + allowmissingtemplate=True, description=__doc__) + parser.add_option("", "--tm", dest="tm", default=None, + help="The file to use as translation memory when fuzzy matching") + parser.passthrough.append("tm") + defaultsimilarity = 75 + parser.add_option("-s", "--similarity", dest="min_similarity", default=defaultsimilarity, + type="float", help="The minimum similarity for inclusion (default: %d%%)" % defaultsimilarity) + parser.passthrough.append("min_similarity") + parser.add_option("--nofuzzymatching", dest="fuzzymatching", action="store_false", + default=True, help="Disable fuzzy matching") + parser.passthrough.append("fuzzymatching") + parser.run(argv) + + +if __name__ == '__main__': + main() diff --git a/translate-toolkit-1.3.0/translate/tools/pydiff.py b/translate-toolkit-1.3.0/translate/tools/pydiff.py new file mode 100644 index 0000000..5122736 --- /dev/null +++ b/translate-toolkit-1.3.0/translate/tools/pydiff.py @@ -0,0 +1,251 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright 2005, 2006 Zuza Software Foundation +# +# This file is part of translate. +# +# translate is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# translate is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with translate; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""diff tool like GNU diff, but lets you have special options that are useful in dealing with PO files""" + +import difflib +import optparse +import time +import os +import sys +import fnmatch + +lineterm = "\n" + +def main(): + """main program for pydiff""" + usage = "usage: %prog [options] fromfile tofile" + parser = optparse.OptionParser(usage) + # GNU diff like options + parser.add_option("-i", "--ignore-case", default=False, action="store_true", + help='Ignore case differences in file contents.') + parser.add_option("-U", "--unified", type="int", metavar="NUM", default=3, dest="unified_lines", + help='Output NUM (default 3) lines of unified context') + parser.add_option("-r", "--recursive", default=False, action="store_true", + help='Recursively compare any subdirectories found.') + parser.add_option("-N", "--new-file", default=False, action="store_true", + help='Treat absent files as empty.') + parser.add_option("", "--unidirectional-new-file", default=False, action="store_true", + help='Treat absent first files as empty.') + parser.add_option("-s", "--report-identical-files", default=False, action="store_true", + help='Report when two files are the same.') + parser.add_option("-x", "--exclude", default=["CVS", "*.po~"], action="append", metavar="PAT", + help='Exclude files that match PAT.') + # our own options + parser.add_option("", "--fromcontains", type="string", default=None, metavar="TEXT", + help='Only show changes where fromfile contains TEXT') + parser.add_option("", "--tocontains", type="string", default=None, metavar="TEXT", + help='Only show changes where tofile contains TEXT') + parser.add_option("", "--contains", type="string", default=None, metavar="TEXT", + help='Only show changes where fromfile or tofile contains TEXT') + parser.add_option("-I", "--ignore-case-contains", default=False, action="store_true", + help='Ignore case differences when matching any of the changes') + parser.add_option("", "--accelerator", dest="accelchars", default="", + metavar="ACCELERATORS", help="ignores the given accelerator characters when matching") + (options, args) = parser.parse_args() + + if len(args) != 2: + parser.error("fromfile and tofile required") + fromfile, tofile = args + if fromfile == "-" and tofile == "-": + parser.error("Only one of fromfile and tofile can be read from stdin") + + if os.path.isdir(fromfile): + if os.path.isdir(tofile): + differ = DirDiffer(fromfile, tofile, options) + else: + parser.error("File %s is a directory while file %s is a regular file" % (fromfile, tofile)) + else: + if os.path.isdir(tofile): + parser.error("File %s is a regular file while file %s is a directory" % (fromfile, tofile)) + else: + differ = FileDiffer(fromfile, tofile, options) + differ.writediff(sys.stdout) + +class DirDiffer: + """generates diffs between directories""" + def __init__(self, fromdir, todir, options): + """constructs a comparison between the two dirs using the given options""" + self.fromdir = fromdir + self.todir = todir + self.options = options + + def isexcluded(self, difffile): + """checks if the given filename has been excluded from the diff""" + for exclude_pat in self.options.exclude: + if fnmatch.fnmatch(difffile, exclude_pat): + return True + return False + + def writediff(self, outfile): + """writes the actual diff to the given file""" + fromfiles = os.listdir(self.fromdir) + tofiles = os.listdir(self.todir) + difffiles = dict.fromkeys(fromfiles + tofiles).keys() + difffiles.sort() + for difffile in difffiles: + if self.isexcluded(difffile): + continue + from_ok = (difffile in fromfiles or self.options.new_file or self.options.unidirectional_new_file) + to_ok = (difffile in tofiles or self.options.new_file) + if from_ok and to_ok: + fromfile = os.path.join(self.fromdir, difffile) + tofile = os.path.join(self.todir, difffile) + if os.path.isdir(fromfile): + if os.path.isdir(tofile): + if self.options.recursive: + differ = DirDiffer(fromfile, tofile, self.options) + differ.writediff(outfile) + else: + outfile.write("Common subdirectories: %s and %s\n" % (fromfile, tofile)) + else: + outfile.write("File %s is a directory while file %s is a regular file\n" % (fromfile, tofile)) + else: + if os.path.isdir(tofile): + parser.error("File %s is a regular file while file %s is a directory\n" % (fromfile, tofile)) + else: + filediffer = FileDiffer(fromfile, tofile, self.options) + filediffer.writediff(outfile) + elif from_ok: + outfile.write("Only in %s: %s\n" % (self.fromdir, difffile)) + elif to_ok: + outfile.write("Only in %s: %s\n" % (self.todir, difffile)) + +class FileDiffer: + """generates diffs between files""" + def __init__(self, fromfile, tofile, options): + """constructs a comparison between the two files using the given options""" + self.fromfile = fromfile + self.tofile = tofile + self.options = options + + def writediff(self, outfile): + """writes the actual diff to the given file""" + validfiles = True + if os.path.exists(self.fromfile): + self.from_lines = open(self.fromfile, 'U').readlines() + fromfiledate = os.stat(self.fromfile).st_mtime + elif self.fromfile == "-": + self.from_lines = sys.stdin.readlines() + fromfiledate = time.time() + elif self.options.new_file or self.options.unidirectional_new_file: + self.from_lines = [] + fromfiledate = 0 + else: + outfile.write("%s: No such file or directory\n" % self.fromfile) + validfiles = False + if os.path.exists(self.tofile): + self.to_lines = open(self.tofile, 'U').readlines() + tofiledate = os.stat(self.tofile).st_mtime + elif self.tofile == "-": + self.to_lines = sys.stdin.readlines() + tofiledate = time.time() + elif self.options.new_file: + self.to_lines = [] + tofiledate = 0 + else: + outfile.write("%s: No such file or directory\n" % self.tofile) + validfiles = False + if not validfiles: + return + fromfiledate = time.ctime(fromfiledate) + tofiledate = time.ctime(tofiledate) + compare_from_lines = self.from_lines + compare_to_lines = self.to_lines + if self.options.ignore_case: + compare_from_lines = [line.lower() for line in compare_from_lines] + compare_to_lines = [line.lower() for line in compare_to_lines] + matcher = difflib.SequenceMatcher(None, compare_from_lines, compare_to_lines) + groups = matcher.get_grouped_opcodes(self.options.unified_lines) + started = False + fromstring = '--- %s\t%s%s' % (self.fromfile, fromfiledate, lineterm) + tostring = '+++ %s\t%s%s' % (self.tofile, tofiledate, lineterm) + + for group in groups: + hunk = "".join([line for line in self.unified_diff(group)]) + if self.options.fromcontains: + if self.options.ignore_case_contains: + hunk_from_lines = "".join([line.lower() for line in self.get_from_lines(group)]) + else: + hunk_from_lines = "".join(self.get_from_lines(group)) + for accelerator in self.options.accelchars: + hunk_from_lines = hunk_from_lines.replace(accelerator, "") + if self.options.fromcontains not in hunk_from_lines: + continue + if self.options.tocontains: + if self.options.ignore_case_contains: + hunk_to_lines = "".join([line.lower() for line in self.get_to_lines(group)]) + else: + hunk_to_lines = "".join(self.get_to_lines(group)) + for accelerator in self.options.accelchars: + hunk_to_lines = hunk_to_lines.replace(accelerator, "") + if self.options.tocontains not in hunk_to_lines: + continue + if self.options.contains: + if self.options.ignore_case_contains: + hunk_lines = "".join([line.lower() for line in self.get_from_lines(group) + self.get_to_lines(group)]) + else: + hunk_lines = "".join(self.get_from_lines(group) + self.get_to_lines(group)) + for accelerator in self.options.accelchars: + hunk_lines = hunk_lines.replace(accelerator, "") + if self.options.contains not in hunk_lines: + continue + if not started: + outfile.write(fromstring) + outfile.write(tostring) + started = True + outfile.write(hunk) + if not started and self.options.report_identical_files: + outfile.write("Files %s and %s are identical\n" % (self.fromfile, self.tofile)) + + def get_from_lines(self, group): + """returns the lines referred to by group, from the fromfile""" + from_lines = [] + for tag, i1, i2, j1, j2 in group: + from_lines.extend(self.from_lines[i1:i2]) + return from_lines + + def get_to_lines(self, group): + """returns the lines referred to by group, from the tofile""" + to_lines = [] + for tag, i1, i2, j1, j2 in group: + to_lines.extend(self.to_lines[j1:j2]) + return to_lines + + def unified_diff(self, group): + """takes the group of opcodes and generates a unified diff line by line""" + i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4] + yield "@@ -%d,%d +%d,%d @@%s" % (i1+1, i2-i1, j1+1, j2-j1, lineterm) + for tag, i1, i2, j1, j2 in group: + if tag == 'equal': + for line in self.from_lines[i1:i2]: + yield ' ' + line + continue + if tag == 'replace' or tag == 'delete': + for line in self.from_lines[i1:i2]: + yield '-' + line + if tag == 'replace' or tag == 'insert': + for line in self.to_lines[j1:j2]: + yield '+' + line + +if __name__ == "__main__": + main() + diff --git a/translate-toolkit-1.3.0/translate/tools/test_pocount.py b/translate-toolkit-1.3.0/translate/tools/test_pocount.py new file mode 100644 index 0000000..0a0ca1d --- /dev/null +++ b/translate-toolkit-1.3.0/translate/tools/test_pocount.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from translate.storage import po +from translate.storage import statsdb + +class TestPOCount: + def count(self, source, expectedsource, target=None, expectedtarget=None): + """simple helper to check the respective word counts""" + poelement = po.pounit(source) + if target is not None: + poelement.target = target + wordssource, wordstarget = statsdb.wordsinunit(poelement) + print 'Source (expected=%d; actual=%d): "%s"' % (expectedsource, wordssource, source) + assert wordssource == expectedsource + if target is not None: + print 'Target (expected=%d; actual=%d): "%s"' % (expectedtarget, wordstarget, target) + assert wordstarget == expectedtarget + + def test_simple_count_zero(self): + """no content""" + self.count("", 0) + + def test_simple_count_one(self): + """simplest one word count""" + self.count("One", 1) + + def test_simple_count_two(self): + """simplest one word count""" + self.count("One two", 2) + + def test_punctuation_divides_words(self): + """test that we break words when there is punctuation""" + self.count("One. Two", 2) + self.count("One.Two", 2) + + def test_xml_tags(self): + """test that we do not count XML tags as words""" + # <br> is a word break + self.count("A word<br>Another word", 4) + self.count("A word<br/>Another word", 4) + self.count("A word<br />Another word", 4) + # \n is a word break + self.count("<p>A word</p>\n<p>Another word</p>", 4) + + def test_newlines(self): + """test to see that newlines divide words""" + # newlines break words + self.count("A word.\nAnother word", 4) + self.count(r"A word.\\n\nAnother word", 4) + + def test_variables_are_words(self): + """test that we count variables as words""" + self.count("%PROGRAMNAME %PROGRAM% %s $file $1", 5) + + def test_plurals(self): + """test that we can handle plural PO elements""" + # #: gdk-pixbuf/gdk-pixdata.c:430 + # #, c-format + # msgid "failed to allocate image buffer of %u byte" + # msgid_plural "failed to allocate image buffer of %u bytes" + # msgstr[0] "e paletšwe go hwetša sešireletši sa seswantšho sa paete ya %u" + # msgstr[1] "e paletšwe go hwetša sešireletši sa seswantšho sa dipaete tša %u" + + def test_plurals_kde(self): + """test that we correcly count old style KDE plurals""" + self.count("_n: Singular\\n\nPlural", 2, "Een\\n\ntwee\\n\ndrie", 3) + + def test_msgid_blank(self): + """counts a message id""" + self.count(" ", 0) + + # Counting strings + # We need to check how we count strings also and if we call it translated or untranslated + # ie an all spaces msgid should be translated if there are spaces in the msgstr + + # Make sure we don't count obsolete messages + + # Do we correctly identify a translated yet blank message? + + # Need to test that we can differentiate between fuzzy, translated and untranslated + diff --git a/translate-toolkit-1.3.0/translate/tools/test_podebug.py b/translate-toolkit-1.3.0/translate/tools/test_podebug.py new file mode 100644 index 0000000..757eca1 --- /dev/null +++ b/translate-toolkit-1.3.0/translate/tools/test_podebug.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -*- + +from translate.tools import podebug +from translate.storage import base + +class TestPODebug: + + debug = podebug.podebug() + + def test_ignore_gtk(self): + """Test operation of GTK message ignoring""" + unit = base.TranslationUnit("default:LTR") + assert self.debug.ignore_gtk(unit) == True + + def test_rewrite_blank(self): + """Test the blank rewrite function""" + assert self.debug.rewrite_blank("Test") == "" + + def test_rewrite_en(self): + """Test the en rewrite function""" + assert self.debug.rewrite_en("Test") == "Test" + + def test_rewrite_xxx(self): + """Test the xxx rewrite function""" + assert self.debug.rewrite_xxx("Test") == "xxxTestxxx" + assert self.debug.rewrite_xxx("Newline\n") == "xxxNewlinexxx\n" + + def test_rewrite_unicode(self): + """Test the unicode rewrite function""" + assert self.debug.rewrite_unicode("Test") == u"Ŧḗşŧ" + + def test_rewrite_chef(self): + """Test the chef rewrite function + + This is not realy critical to test but a simple tests ensures + that it stays working. + """ + assert self.debug.rewrite_chef("Mock Swedish test you muppet") == "Mock Swedish test yooo mooppet" diff --git a/translate-toolkit-1.3.0/translate/tools/test_pogrep.py b/translate-toolkit-1.3.0/translate/tools/test_pogrep.py new file mode 100644 index 0000000..d71eec4 --- /dev/null +++ b/translate-toolkit-1.3.0/translate/tools/test_pogrep.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from translate.storage import po +from translate.storage import xliff +from translate.tools import pogrep +from translate.misc import wStringIO + +class TestPOGrep: + def poparse(self, posource): + """helper that parses po source without requiring files""" + dummyfile = wStringIO.StringIO(posource) + pofile = po.pofile(dummyfile) + return pofile + + def pogrep(self, posource, searchstring, cmdlineoptions=None): + """helper that parses po source and passes it through a filter""" + if cmdlineoptions is None: + cmdlineoptions = [] + options, args = pogrep.cmdlineparser().parse_args(["xxx.po"] + cmdlineoptions) + grepfilter = pogrep.GrepFilter(searchstring, options.searchparts, options.ignorecase, options.useregexp, options.invertmatch, options.accelchar) + tofile = grepfilter.filterfile(self.poparse(posource)) + print str(tofile) + return str(tofile) + + def test_simplegrep_msgid(self): + """grep for a string in the source""" + posource = '#: test.c\nmsgid "test"\nmsgstr "rest"\n' + poresult = self.pogrep(posource, "test", ["--search=msgid"]) + assert poresult == posource + poresult = self.pogrep(posource, "rest", ["--search=msgid"]) + assert poresult == "" + + def test_simplegrep_msgstr(self): + """grep for a string in the target""" + posource = '#: test.c\nmsgid "test"\nmsgstr "rest"\n' + poresult = self.pogrep(posource, "rest", ["--search=msgstr"]) + assert poresult == posource + poresult = self.pogrep(posource, "test", ["--search=msgstr"]) + assert poresult == "" + + def test_simplegrep_locations(self): + """grep for a string in the location comments""" + posource = '#: test.c\nmsgid "test"\nmsgstr "rest"\n' + poresult = self.pogrep(posource, "test.c", ["--search=locations"]) + assert poresult == posource + poresult = self.pogrep(posource, "rest.c", ["--search=locations"]) + assert poresult == "" + + def test_simplegrep_comments(self): + """grep for a string in the comments""" + posource = '# (review) comment\n#: test.c\nmsgid "test"\nmsgstr "rest"\n' + poresult = self.pogrep(posource, "review", ["--search=comment"]) + assert poresult == posource + poresult = self.pogrep(posource, "test", ["--search=comment"]) + assert poresult == "" + + def test_unicode_message_searchstring(self): + """check that we can grep unicode messages and use unicode search strings""" + poascii = '# comment\n#: test.c\nmsgid "test"\nmsgstr "rest"\n' + pounicode = '# comment\n#: test.c\nmsgid "test"\nmsgstr "rešṱ"\n' + queryascii = 'rest' + queryunicode = 'rešṱ' + for source, search, expected in [(poascii, queryascii, poascii), + (poascii, queryunicode, ''), + (pounicode, queryascii, ''), + (pounicode, queryunicode, pounicode)]: + print "Source:\n%s\nSearch: %s\n" % (source, search) + poresult = self.pogrep(source, search) + assert poresult == expected + + def test_unicode_message_regex_searchstring(self): + """check that we can grep unicode messages and use unicode regex search strings""" + poascii = '# comment\n#: test.c\nmsgid "test"\nmsgstr "rest"\n' + pounicode = '# comment\n#: test.c\nmsgid "test"\nmsgstr "rešṱ"\n' + queryascii = 'rest' + queryunicode = 'rešṱ' + for source, search, expected in [(poascii, queryascii, poascii), + (poascii, queryunicode, ''), + (pounicode, queryascii, ''), + (pounicode, queryunicode, pounicode)]: + print "Source:\n%s\nSearch: %s\n" % (source, search) + poresult = self.pogrep(source, search, ["--regexp"]) + assert poresult == expected + + def test_unicode_normalise(self): + """check that we normlise unicode strings before comparing""" + source_template = u'# comment\n#: test.c\nmsgid "test"\nmsgstr "t%sst"\n' + # é, e + ' + # Ḽ, L + ^ + # Ṏ + groups = [(u"\u00e9", u"\u0065\u0301"), \ + (u"\u1e3c", u"\u004c\u032d"), \ + (u"\u1e4e", u"\u004f\u0303\u0308", u"\u00d5\u0308")] + for letters in groups: + for source_letter in letters: + source = source_template % source_letter + for search_letter in letters: + print search_letter.encode('utf-8') + poresult = self.pogrep(source, search_letter) + assert poresult == source.encode('utf-8') + +class TestXLiffGrep: + xliff_skeleton = '''<?xml version="1.0" ?> +<xliff version="1.1" xmlns="urn:oasis:names:tc:xliff:document:1.1"> + <file original="filename.po" source-language="en-US" datatype="po"> + <body> + %s + </body> + </file> +</xliff>''' + + xliff_text = xliff_skeleton % '''<trans-unit> + <source>rêd</source> + <target>rooi</target> +</trans-unit>''' + + def xliff_parse(self, xliff_text): + """helper that parses po source without requiring files""" + dummyfile = wStringIO.StringIO(xliff_text) + xliff_file = xliff.xlifffile(dummyfile) + return xliff_file + + def xliff_grep(self, xliff_text, searchstring, cmdlineoptions=None): + """helper that parses xliff text and passes it through a filter""" + if cmdlineoptions is None: + cmdlineoptions = [] + options, args = pogrep.cmdlineparser().parse_args(["xxx.xliff"] + cmdlineoptions) + grepfilter = pogrep.GrepFilter(searchstring, options.searchparts, options.ignorecase, options.useregexp, options.invertmatch, options.accelchar) + tofile = grepfilter.filterfile(self.xliff_parse(xliff_text)) + return str(tofile) + + def test_simplegrep(self): + """grep for a simple string.""" + xliff_text = self.xliff_text + xliff_file = self.xliff_parse(xliff_text) + xliff_result = self.xliff_parse(self.xliff_grep(xliff_text, "rêd")) + assert xliff_result.units[0].getsource() == u"rêd" + assert xliff_result.units[0].gettarget() == u"rooi" + + xliff_result = self.xliff_parse(self.xliff_grep(xliff_text, "unavailable string")) + assert xliff_result.isempty() + + diff --git a/translate-toolkit-1.3.0/translate/tools/test_pomerge.py b/translate-toolkit-1.3.0/translate/tools/test_pomerge.py new file mode 100644 index 0000000..0dbd452 --- /dev/null +++ b/translate-toolkit-1.3.0/translate/tools/test_pomerge.py @@ -0,0 +1,367 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from translate.tools import pomerge +from translate.storage import factory +from translate.storage import po +from translate.storage import xliff +from translate.misc import wStringIO + +class TestPOMerge: + xliffskeleton = '''<?xml version="1.0" ?> +<xliff version="1.1" xmlns="urn:oasis:names:tc:xliff:document:1.1"> + <file original="filename.po" source-language="en-US" datatype="po"> + <body> + %s + </body> + </file> +</xliff>''' + + def mergestore(self, templatesource, inputsource): + """merges the sources of the given files and returns a new pofile object""" + templatefile = wStringIO.StringIO(templatesource) + inputfile = wStringIO.StringIO(inputsource) + outputfile = wStringIO.StringIO() + assert pomerge.mergestore(inputfile, outputfile, templatefile) + outputpostring = outputfile.getvalue() + outputpofile = po.pofile(outputpostring) + return outputpofile + + def mergexliff(self, templatesource, inputsource): + """merges the sources of the given files and returns a new xlifffile object""" + templatefile = wStringIO.StringIO(templatesource) + inputfile = wStringIO.StringIO(inputsource) + outputfile = wStringIO.StringIO() + assert pomerge.mergestore(inputfile, outputfile, templatefile) + outputxliffstring = outputfile.getvalue() + print "Generated XML:" + print outputxliffstring + outputxlifffile = xliff.xlifffile(outputxliffstring) + return outputxlifffile + + def countunits(self, pofile): + """returns the number of non-header items""" + if pofile.units[0].isheader(): + return len(pofile.units) - 1 + else: + return len(pofile.units) + + def singleunit(self, pofile): + """checks that the pofile contains a single non-header unit, and returns it""" + assert self.countunits(pofile) == 1 + return pofile.units[-1] + + def test_simplemerge(self): + """checks that a simple po entry merges OK""" + templatepo = '''#: simple.test\nmsgid "Simple String"\nmsgstr ""\n''' + inputpo = '''#: simple.test\nmsgid "Simple String"\nmsgstr "Dimpled Ring"\n''' + pofile = self.mergestore(templatepo, inputpo) + pounit = self.singleunit(pofile) + assert pounit.source == "Simple String" + assert pounit.target == "Dimpled Ring" + + def test_replacemerge(self): + """checks that a simple po entry merges OK""" + templatepo = '''#: simple.test\nmsgid "Simple String"\nmsgstr "Dimpled Ring"\n''' + inputpo = '''#: simple.test\nmsgid "Simple String"\nmsgstr "Dimpled King"\n''' + pofile = self.mergestore(templatepo, inputpo) + pounit = self.singleunit(pofile) + assert pounit.source == "Simple String" + assert pounit.target == "Dimpled King" + + def test_merging_locations(self): + """check that locations on seperate lines are output in Gettext form of all on one line""" + templatepo = '''#: location.c:1\n#: location.c:2\nmsgid "Simple String"\nmsgstr ""\n''' + inputpo = '''#: location.c:1\n#: location.c:2\nmsgid "Simple String"\nmsgstr "Dimpled Ring"\n''' + expectedpo = '''#: location.c:1%slocation.c:2\nmsgid "Simple String"\nmsgstr "Dimpled Ring"\n''' % po.lsep + pofile = self.mergestore(templatepo, inputpo) + print pofile + assert str(pofile) == expectedpo + + def test_reflowed_source_comments(self): + """ensure that we don't duplicate source comments (locations) if they have been reflowed""" + templatepo = '''#: newMenu.label\n#: newMenu.accesskey\nmsgid "&New"\nmsgstr ""\n''' + newpo = '''#: newMenu.label newMenu.accesskey\nmsgid "&New"\nmsgstr "&Nuwe"\n''' + expectedpo = '''#: newMenu.label%snewMenu.accesskey\nmsgid "&New"\nmsgstr "&Nuwe"\n''' % po.lsep + pofile = self.mergestore(templatepo, newpo) + pounit = self.singleunit(pofile) + print pofile + assert str(pofile) == expectedpo + + def test_comments_with_blank_lines(self): + """ensure that we don't loose empty newlines in comments""" + templatepo = '''# # ***** BEGIN LICENSE BLOCK ***** +# Version: MPL 1.1/GPL 2.0/LGPL 2.1 +# +# bla bla +msgid "bla" +msgstr "blabla" +''' + newpo = templatepo + expectedpo = templatepo + pofile = self.mergestore(templatepo, newpo) + pounit = self.singleunit(pofile) + print pofile + assert str(pofile) == expectedpo + + def test_merge_dont_delete_unassociated_comments(self): + """ensure that we do not delete comments in the PO file that are not assocaited with a message block""" + templatepo = '''# Lonely comment\n\n# Translation comment\nmsgid "Bob"\nmsgstr "Toolmaker"\n''' + mergepo = '''# Translation comment\nmsgid "Bob"\nmsgstr "Builder"\n''' + expectedpo = '''# Lonely comment\n# Translation comment\nmsgid "Bob"\nmsgstr "Builder"\n''' + pofile = self.mergestore(templatepo, mergepo) +# pounit = self.singleunit(pofile) + print pofile + assert str(pofile) == expectedpo + + def test_preserve_format_trailing_newlines(self): + """Test that we can merge messages correctly that end with a newline""" + templatepo = '''msgid "Simple string\\n"\nmsgstr ""\n''' + mergepo = '''msgid "Simple string\\n"\nmsgstr "Dimpled ring\\n"\n''' + expectedpo = '''msgid "Simple string\\n"\nmsgstr "Dimpled ring\\n"\n''' + pofile = self.mergestore(templatepo, mergepo) + print "Expected:\n%s\n\nMerged:\n%s" % (expectedpo, str(pofile)) + assert str(pofile) == expectedpo + + templatepo = '''msgid ""\n"Simple string\\n"\nmsgstr ""\n''' + mergepo = '''msgid ""\n"Simple string\\n"\nmsgstr ""\n"Dimpled ring\\n"\n''' + expectedpo = '''msgid ""\n"Simple string\\n"\nmsgstr "Dimpled ring\\n"\n''' + pofile = self.mergestore(templatepo, mergepo) + print "Expected:\n%s\n\nMerged:\n%s" % (expectedpo, str(pofile)) + assert str(pofile) == expectedpo + + def test_preserve_format_minor_start_and_end_of_sentence_changes(self): + """Test that we are not too fussy about large diffs for simple changes at the start or end of a sentence""" + templatepo = '''msgid "Target type:"\nmsgstr "Doelsoort"\n\n''' + mergepo = '''msgid "Target type:"\nmsgstr "Doelsoort:"\n''' + expectedpo = mergepo + pofile = self.mergestore(templatepo, mergepo) + print "Expected:\n%s\n\nMerged:\n%s" % (expectedpo, str(pofile)) + assert str(pofile) == expectedpo + + templatepo = '''msgid "&Select"\nmsgstr "Kies"\n\n''' + mergepo = '''msgid "&Select"\nmsgstr "&Kies"\n''' + expectedpo = mergepo + pofile = self.mergestore(templatepo, mergepo) + print "Expected:\n%s\n\nMerged:\n%s" % (expectedpo, str(pofile)) + assert str(pofile) == expectedpo + + templatepo = '''msgid "en-us, en"\nmsgstr "en-us, en"\n''' + mergepo = '''msgid "en-us, en"\nmsgstr "af-za, af, en-za, en-gb, en-us, en"\n''' + expectedpo = mergepo + pofile = self.mergestore(templatepo, mergepo) + print "Expected:\n%s\n\nMerged:\n%s" % (expectedpo, str(pofile)) + assert str(pofile) == expectedpo + + def test_preserve_format_last_entry_in_a_file(self): + """The last entry in a PO file is usualy not followed by an empty line. Test that we preserve this""" + templatepo = '''msgid "First"\nmsgstr ""\n\nmsgid "Second"\nmsgstr ""\n''' + mergepo = '''msgid "First"\nmsgstr "Eerste"\n\nmsgid "Second"\nmsgstr "Tweede"\n''' + expectedpo = '''msgid "First"\nmsgstr "Eerste"\n\nmsgid "Second"\nmsgstr "Tweede"\n''' + pofile = self.mergestore(templatepo, mergepo) + print "Expected:\n%s\n\nMerged:\n%s" % (expectedpo, str(pofile)) + assert str(pofile) == expectedpo + + templatepo = '''msgid "First"\nmsgstr ""\n\nmsgid "Second"\nmsgstr ""\n\n''' + mergepo = '''msgid "First"\nmsgstr "Eerste"\n\nmsgid "Second"\nmsgstr "Tweede"\n''' + expectedpo = '''msgid "First"\nmsgstr "Eerste"\n\nmsgid "Second"\nmsgstr "Tweede"\n''' + pofile = self.mergestore(templatepo, mergepo) + print "Expected:\n%s\n\nMerged:\n%s" % (expectedpo, str(pofile)) + assert str(pofile) == expectedpo + + def xtest_escape_tabs(self): + """Ensure that input tabs are escaped in the output, like gettext does.""" + + # The strings below contains the tab character, not spaces. + templatepo = '''msgid "First Second"\nmsgstr ""\n\n''' + mergepo = '''msgid "First Second"\nmsgstr "Eerste Tweede"\n''' + expectedpo = r'''imsgid "First\tSecond" +msgstr "Eerste\tTweede" +''' + pofile = self.mergestore(templatepo, mergepo) + print "Expected:\n%s\n\nMerged:\n%s" % (expectedpo, str(pofile)) + assert str(pofile) == expectedpo + + def test_preserve_comments_layout(self): + """Ensure that when we merge with new '# (poconflict)' or other comments that we don't mess formating""" + templatepo = '''#: filename\nmsgid "Desktop Background.bmp"\nmsgstr "Desktop Background.bmp"\n\n''' + mergepo = '''# (pofilter) unchanged: please translate\n#: filename\nmsgid "Desktop Background.bmp"\nmsgstr "Desktop Background.bmp"\n''' + expectedpo = mergepo + pofile = self.mergestore(templatepo, mergepo) + print "Expected:\n%s\n\nMerged:\n%s" % (expectedpo, str(pofile)) + assert str(pofile) == expectedpo + + def test_merge_dos2unix(self): + """Test that merging a comment line with dos newlines doesn't add a new line""" + templatepo = '''# User comment\n# (pofilter) Translate Toolkit comment\n#. Automatic comment\n#: location_comment.c:110\nmsgid "File"\nmsgstr "File"\n\n''' + mergepo = '''# User comment\r\n# (pofilter) Translate Toolkit comment\r\n#. Automatic comment\r\n#: location_comment.c:110\r\nmsgid "File"\r\nmsgstr "Ifayile"\r\n\r\n''' + expectedpo = '''# User comment\n# (pofilter) Translate Toolkit comment\n#. Automatic comment\n#: location_comment.c:110\nmsgid "File"\nmsgstr "Ifayile"\n''' + pofile = self.mergestore(templatepo, mergepo) + assert str(pofile) == expectedpo + + # Unassociated comment + templatepo = '''# Lonely comment\n\n#: location_comment.c:110\nmsgid "Bob"\nmsgstr "Toolmaker"\n''' + mergepo = '''# Lonely comment\r\n\r\n#: location_comment.c:110\r\nmsgid "Bob"\r\nmsgstr "Builder"\r\n\r\n''' + expectedpo = '''# Lonely comment\n#: location_comment.c:110\nmsgid "Bob"\nmsgstr "Builder"\n''' + pofile = self.mergestore(templatepo, mergepo) + assert str(pofile) == expectedpo + + # New comment + templatepo = '''#: location_comment.c:110\nmsgid "File"\nmsgstr "File"\n\n''' + mergepo = '''# User comment\r\n# (pofilter) Translate Toolkit comment\r\n#: location_comment.c:110\r\nmsgid "File"\r\nmsgstr "Ifayile"\r\n\r\n''' + expectedpo = '''# User comment\n# (pofilter) Translate Toolkit comment\n#: location_comment.c:110\nmsgid "File"\nmsgstr "Ifayile"\n''' + pofile = self.mergestore(templatepo, mergepo) + assert str(pofile) == expectedpo + + def test_xliff_into_xliff(self): + templatexliff = self.xliffskeleton % '''<trans-unit> + <source>red</source> + <target></target> +</trans-unit>''' + mergexliff = self.xliffskeleton % '''<trans-unit> + <source>red</source> + <target>rooi</target> +</trans-unit>''' + xlifffile = self.mergexliff(templatexliff, mergexliff) + assert len(xlifffile.units) == 1 + unit = xlifffile.units[0] + assert unit.source == "red" + assert unit.target == "rooi" + + def test_po_into_xliff(self): + templatexliff = self.xliffskeleton % '''<trans-unit> + <source>red</source> + <target></target> +</trans-unit>''' + mergepo = 'msgid "red"\nmsgstr "rooi"' + xlifffile = self.mergexliff(templatexliff, mergepo) + assert len(xlifffile.units) == 1 + unit = xlifffile.units[0] + assert unit.source == "red" + assert unit.target == "rooi" + + def test_xliff_into_po(self): + templatepo = '# my comment\nmsgid "red"\nmsgstr ""' + mergexliff = self.xliffskeleton % '''<trans-unit> + <source>red</source> + <target>rooi</target> +</trans-unit>''' + expectedpo = '# my comment\nmsgid "red"\nmsgstr "rooi"\n' + pofile = self.mergestore(templatepo, mergexliff) + assert str(pofile) == expectedpo + + def test_merging_dont_merge_kde_comments_found_in_translation(self): + """If we find a KDE comment in the translation (target) then do not merge it.""" + + templatepo = '''msgid "_: KDE comment\\n"\n"File"\nmsgstr "File"\n\n''' + mergepo = '''msgid "_: KDE comment\\n"\n"File"\nmsgstr "_: KDE comment\\n"\n"Ifayile"\n\n''' + expectedpo = '''msgid ""\n"_: KDE comment\\n"\n"File"\nmsgstr "Ifayile"\n''' + pofile = self.mergestore(templatepo, mergepo) + print "Expected:\n%s\n\nMerged:\n%s" % (expectedpo, str(pofile)) + assert str(pofile) == expectedpo + + # Translated kde comment. + mergepo = '''msgid "_: KDE comment\\n"\n"File"\nmsgstr "_: KDE kommentaar\\n"\n"Ifayile"\n\n''' + print "Expected:\n%s\n\nMerged:\n%s" % (expectedpo, str(pofile)) + assert str(pofile) == expectedpo + + # multiline KDE comment + templatepo = '''msgid "_: KDE "\n"comment\\n"\n"File"\nmsgstr "File"\n\n''' + mergepo = '''msgid "_: KDE "\n"comment\\n"\n"File"\nmsgstr "_: KDE "\n"comment\\n"\n"Ifayile"\n\n''' + expectedpo = '''msgid ""\n"_: KDE comment\\n"\n"File"\nmsgstr "Ifayile"\n''' + pofile = self.mergestore(templatepo, mergepo) + print "Expected:\n%s\n\nMerged:\n%s" % (expectedpo, str(pofile)) + assert str(pofile) == expectedpo + + def test_merging_untranslated_with_kde_disambiguation(self): + """test merging untranslated messages that are the same except for KDE disambiguation""" + templatepo = r'''#: sendMsgTitle +#: sendMsgTitle.accesskey +msgid "_: sendMsgTitle sendMsgTitle.accesskey\n" +"Send Message" +msgstr "" + +#: sendMessageCheckWindowTitle +#: sendMessageCheckWindowTitle.accesskey +msgid "_: sendMessageCheckWindowTitle sendMessageCheckWindowTitle.accesskey\n" +"Send Message" +msgstr "" +''' + mergepo = r'''#: sendMsgTitle%ssendMsgTitle.accesskey +msgid "" +"_: sendMsgTitle sendMsgTitle.accesskey\n" +"Send Message" +msgstr "Stuur" + +#: sendMessageCheckWindowTitle%ssendMessageCheckWindowTitle.accesskey +msgid "" +"_: sendMessageCheckWindowTitle sendMessageCheckWindowTitle.accesskey\n" +"Send Message" +msgstr "Stuur" +''' % (po.lsep, po.lsep) + expectedpo = mergepo + pofile = self.mergestore(templatepo, mergepo) + print "Expected:\n%s\n---\nMerged:\n%s\n---" % (expectedpo, str(pofile)) + assert str(pofile) == expectedpo + + def test_merging_header_entries(self): + """Check that we do the right thing if we have header entries in the input PO.""" + + templatepo = r'''#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"Report-Msgid-Bugs-To: new@example.com\n" +"POT-Creation-Date: 2006-11-11 11:11+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n" +"Language-Team: LANGUAGE <LL@li.org>\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Plural-Forms: nplurals=INTEGER; plural=EXPRESSION;\n" +"X-Generator: Translate Toolkit 0.10rc2\n" + +#: simple.test +msgid "Simple String" +msgstr "" +''' + mergepo = r'''msgid "" +msgstr "" +"Project-Id-Version: Pootle 0.10\n" +"Report-Msgid-Bugs-To: old@example.com\n" +"POT-Creation-Date: 2006-01-01 01:01+0100\n" +"PO-Revision-Date: 2006-09-09 09:09+0900\n" +"Last-Translator: Joe Translate <joe@example.com>\n" +"Language-Team: Pig Latin <piglatin@example.com>\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Plural-Forms: nplurals=2; plural=(n != 1);\n" +"X-Generator: Translate Toolkit 0.9\n" + +#: simple.test +msgid "Simple String" +msgstr "Dimpled Ring" +''' + expectedpo = r'''msgid "" +msgstr "" +"Project-Id-Version: Pootle 0.10\n" +"Report-Msgid-Bugs-To: new@example.com\n" +"POT-Creation-Date: 2006-11-11 11:11+0000\n" +"PO-Revision-Date: 2006-09-09 09:09+0900\n" +"Last-Translator: Joe Translate <joe@example.com>\n" +"Language-Team: Pig Latin <piglatin@example.com>\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Plural-Forms: nplurals=2; plural=(n != 1);\n" +"X-Generator: Translate Toolkit 0.10rc2\n" + +#: simple.test +msgid "Simple String" +msgstr "Dimpled Ring" +''' + pofile = self.mergestore(templatepo, mergepo) + print "Expected:\n%s\n---\nMerged:\n%s\n---" % (expectedpo, str(pofile)) + assert str(pofile) == expectedpo diff --git a/translate-toolkit-1.3.0/translate/tools/test_pretranslate.py b/translate-toolkit-1.3.0/translate/tools/test_pretranslate.py new file mode 100644 index 0000000..0a557e3 --- /dev/null +++ b/translate-toolkit-1.3.0/translate/tools/test_pretranslate.py @@ -0,0 +1,251 @@ +#!/usr/bin/env python + +from translate.tools import pretranslate +from translate.convert import test_convert +from translate.misc import wStringIO +from translate.storage import po +import warnings + +class TestPretranslate: + def setup_method(self, method): + warnings.resetwarnings() + + def teardown_method(self, method): + warnings.resetwarnings() + + def pretranslatepo(self, input_source, template_source=None): + """helper that converts strings to po source without requiring files""" + input_file = wStringIO.StringIO(input_source) + if template_source: + template_file = wStringIO.StringIO(template_source) + else: + template_file = None + output_file = wStringIO.StringIO() + + pretranslate.pretranslate_file(input_file, output_file, template_file) + output_file.seek(0) + return po.pofile(output_file.read()) + + def singleunit(self, pofile): + """checks that the pofile contains a single non-header unit, and returns it""" + if len(pofile.units) == 2 and pofile.units[0].isheader(): + print pofile.units[1] + return pofile.units[1] + else: + print pofile.units[0] + return pofile.units[0] + + def test_pretranslatepo_blank(self): + """checks that the pretranslatepo function is working for a simple file initialisation""" + input_source = '''#: simple.label%ssimple.accesskey\nmsgid "A &hard coded newline.\\n"\nmsgstr ""\n''' % po.lsep + newpo = self.pretranslatepo(input_source) + assert str(self.singleunit(newpo)) == input_source + + def test_merging_simple(self): + """checks that the pretranslatepo function is working for a simple merge""" + input_source = '''#: simple.label%ssimple.accesskey\nmsgid "A &hard coded newline.\\n"\nmsgstr ""\n''' % po.lsep + template_source = '''#: simple.label%ssimple.accesskey\nmsgid "A &hard coded newline.\\n"\nmsgstr "&Hart gekoeerde nuwe lyne\\n"\n''' % po.lsep + newpo = self.pretranslatepo(input_source, template_source) + assert str(self.singleunit(newpo)) == template_source + + def test_merging_messages_marked_fuzzy(self): + """test that when we merge PO files with a fuzzy message that it remains fuzzy""" + input_source = '''#: simple.label%ssimple.accesskey\nmsgid "A &hard coded newline.\\n"\nmsgstr ""\n''' % po.lsep + template_source = '''#: simple.label%ssimple.accesskey\n#, fuzzy\nmsgid "A &hard coded newline.\\n"\nmsgstr "&Hart gekoeerde nuwe lyne\\n"\n''' % po.lsep + newpo = self.pretranslatepo(input_source, template_source) + assert str(self.singleunit(newpo)) == template_source + + def test_merging_plurals_with_fuzzy_matching(self): + """test that when we merge PO files with a fuzzy message that it remains fuzzy""" + input_source = r'''#: file.cpp:2 +msgid "%d manual" +msgid_plural "%d manuals" +msgstr[0] "" +msgstr[1] "" +''' + template_source = r'''#: file.cpp:3 +#, fuzzy +msgid "%d manual" +msgid_plural "%d manuals" +msgstr[0] "%d handleiding." +msgstr[1] "%d handleidings." +''' + # The #: comment and msgid's are different between the pot and the po + poexpected = r'''#: file.cpp:2 +#, fuzzy +msgid "%d manual" +msgid_plural "%d manuals" +msgstr[0] "%d handleiding." +msgstr[1] "%d handleidings." +''' + newpo = self.pretranslatepo(input_source, template_source) + assert str(self.singleunit(newpo)) == poexpected + + def xtest_merging_msgid_change(self): + """tests that if the msgid changes but the location stays the same that we merge""" + input_source = '''#: simple.label\n#: simple.accesskey\nmsgid "Its &hard coding a newline.\\n"\nmsgstr ""\n''' + template_source = '''#: simple.label\n#: simple.accesskey\nmsgid "A &hard coded newline.\\n"\nmsgstr "&Hart gekoeerde nuwe lyne\\n"\n''' + poexpected = '''#: simple.label\n#: simple.accesskey\n#, fuzzy\nmsgid "Its &hard coding a newline.\\n"\nmsgstr "&Hart gekoeerde nuwe lyne\\n"\n''' + newpo = self.pretranslatepo(input_source, template_source) + print newpo + assert str(newpo) == poexpected + + def test_merging_location_change(self): + """tests that if the location changes but the msgid stays the same that we merge""" + input_source = '''#: new_simple.label%snew_simple.accesskey\nmsgid "A &hard coded newline.\\n"\nmsgstr ""\n''' % po.lsep + template_source = '''#: simple.label%ssimple.accesskey\nmsgid "A &hard coded newline.\\n"\nmsgstr "&Hart gekoeerde nuwe lyne\\n"\n''' % po.lsep + poexpected = '''#: new_simple.label%snew_simple.accesskey\nmsgid "A &hard coded newline.\\n"\nmsgstr "&Hart gekoeerde nuwe lyne\\n"\n''' % po.lsep + newpo = self.pretranslatepo(input_source, template_source) + print newpo + assert str(newpo) == poexpected + + def test_merging_location_and_whitespace_change(self): + """test that even if the location changes that if the msgid only has whitespace changes we can still merge""" + input_source = '''#: singlespace.label%ssinglespace.accesskey\nmsgid "&We have spaces"\nmsgstr ""\n''' % po.lsep + template_source = '''#: doublespace.label%sdoublespace.accesskey\nmsgid "&We have spaces"\nmsgstr "&One het spasies"\n''' % po.lsep + poexpected = '''#: singlespace.label%ssinglespace.accesskey\n#, fuzzy\nmsgid "&We have spaces"\nmsgstr "&One het spasies"\n''' % po.lsep + newpo = self.pretranslatepo(input_source, template_source) + print newpo + assert str(newpo) == poexpected + + def wtest_merging_accelerator_changes(self): + """test that a change in the accelerator localtion still allows merging""" + input_source = '''#: someline.c\nmsgid "A&bout"\nmsgstr ""\n''' + template_source = '''#: someline.c\nmsgid "&About"\nmsgstr "&Info"\n''' + poexpected = '''#: someline.c\nmsgid "A&bout"\nmsgstr "&Info"\n''' + newpo = self.pretranslatepo(input_source, template_source) + print newpo + assert str(newpo) == poexpected + + def xtest_lines_cut_differently(self): + """Checks that the correct formatting is preserved when pot an po lines differ.""" + input_source = '''#: simple.label\nmsgid "Line split "\n"differently"\nmsgstr ""\n''' + template_source = '''#: simple.label\nmsgid "Line"\n" split differently"\nmsgstr "Lyne verskillend gesny"\n''' + newpo = self.pretranslatepo(input_source, template_source) + newpounit = self.singleunit(newpo) + assert str(newpounit) == template_source + + def test_merging_automatic_comments_dont_duplicate(self): + """ensure that we can merge #. comments correctly""" + input_source = '''#. Row 35\nmsgid "&About"\nmsgstr ""\n''' + template_source = '''#. Row 35\nmsgid "&About"\nmsgstr "&Info"\n''' + newpo = self.pretranslatepo(input_source, template_source) + newpounit = self.singleunit(newpo) + assert str(newpounit) == template_source + + def test_merging_automatic_comments_new_overides_old(self): + """ensure that new #. comments override the old comments""" + input_source = '''#. new comment\n#: someline.c\nmsgid "&About"\nmsgstr ""\n''' + template_source = '''#. old comment\n#: someline.c\nmsgid "&About"\nmsgstr "&Info"\n''' + poexpected = '''#. new comment\n#: someline.c\nmsgid "&About"\nmsgstr "&Info"\n''' + newpo = self.pretranslatepo(input_source, template_source) + newpounit = self.singleunit(newpo) + assert str(newpounit) == poexpected + + def test_merging_comments_with_blank_comment_lines(self): + """test that when we merge a comment that has a blank line we keep the blank line""" + input_source = '''#: someline.c\nmsgid "About"\nmsgstr ""\n''' + template_source = '''# comment1\n#\n# comment2\n#: someline.c\nmsgid "About"\nmsgstr "Omtrent"\n''' + poexpected = template_source + newpo = self.pretranslatepo(input_source, template_source) + newpounit = self.singleunit(newpo) + assert str(newpounit) == poexpected + + def test_empty_commentlines(self): + input_source = '''#: paneSecurity.title +msgid "Security" +msgstr "" +''' + template_source = '''# - Contributor(s): +# - +# - Alternatively, the +# - +#: paneSecurity.title +msgid "Security" +msgstr "Sekuriteit" +''' + poexpected = template_source + newpo = self.pretranslatepo(input_source, template_source) + newpounit = self.singleunit(newpo) + print "expected" + print poexpected + print "got:" + print str(newpounit) + assert str(newpounit) == poexpected + + def test_merging_msgidcomments(self): + """ensure that we can merge msgidcomments messages""" + input_source = r'''#: window.width +msgid "" +"_: Do not translate this.\n" +"36em" +msgstr "" +''' + template_source = r'''#: window.width +msgid "" +"_: Do not translate this.\n" +"36em" +msgstr "36em" +''' + newpo = self.pretranslatepo(input_source, template_source) + newpounit = self.singleunit(newpo) + assert str(newpounit) == template_source + + def test_merging_plurals(self): + """ensure that we can merge plural messages""" + input_source = '''msgid "One"\nmsgid_plural "Two"\nmsgstr[0] ""\nmsgstr[1] ""\n''' + template_source = '''msgid "One"\nmsgid_plural "Two"\nmsgstr[0] "Een"\nmsgstr[1] "Twee"\nmsgstr[2] "Drie"\n''' + newpo = self.pretranslatepo(input_source, template_source) + print newpo + newpounit = self.singleunit(newpo) + assert str(newpounit) == template_source + + def test_merging_resurect_obsolete_messages(self): + """check that we can reuse old obsolete messages if the message comes back""" + input_source = '''#: resurect.c\nmsgid "&About"\nmsgstr ""\n''' + template_source = '''#~ msgid "&About"\n#~ msgstr "&Omtrent"\n''' + expected = '''#: resurect.c\nmsgid "&About"\nmsgstr "&Omtrent"\n''' + newpo = self.pretranslatepo(input_source, template_source) + print newpo + assert str(newpo) == expected + + def test_merging_comments(self): + """Test that we can merge comments correctly""" + input_source = '''#. Don't do it!\n#: file.py:1\nmsgid "One"\nmsgstr ""\n''' + template_source = '''#. Don't do it!\n#: file.py:2\nmsgid "One"\nmsgstr "Een"\n''' + poexpected = '''#. Don't do it!\n#: file.py:1\nmsgid "One"\nmsgstr "Een"\n''' + newpo = self.pretranslatepo(input_source, template_source) + print newpo + newpounit = self.singleunit(newpo) + assert str(newpounit) == poexpected + + def test_merging_typecomments(self): + """Test that we can merge with typecomments""" + input_source = '''#: file.c:1\n#, c-format\nmsgid "%d pipes"\nmsgstr ""\n''' + template_source = '''#: file.c:2\nmsgid "%d pipes"\nmsgstr "%d pype"\n''' + poexpected = '''#: file.c:1\n#, c-format\nmsgid "%d pipes"\nmsgstr "%d pype"\n''' + newpo = self.pretranslatepo(input_source, template_source) + newpounit = self.singleunit(newpo) + print newpounit + assert str(newpounit) == poexpected + + input_source = '''#: file.c:1\n#, c-format\nmsgid "%d computers"\nmsgstr ""\n''' + template_source = '''#: file.c:2\n#, c-format\nmsgid "%s computers "\nmsgstr "%s-rekenaars"\n''' + poexpected = '''#: file.c:1\n#, fuzzy, c-format\nmsgid "%d computers"\nmsgstr "%s-rekenaars"\n''' + newpo = self.pretranslatepo(input_source, template_source) + newpounit = self.singleunit(newpo) + assert newpounit.isfuzzy() + assert newpounit.hastypecomment("c-format") + +class TestPretranslateCommand(test_convert.TestConvertCommand, TestPretranslate): + """Tests running actual pretranslate commands on files""" + convertmodule = pretranslate + + def test_help(self): + """tests getting help""" + options = test_convert.TestConvertCommand.test_help(self) + options = self.help_check(options, "-t TEMPLATE, --template=TEMPLATE") + options = self.help_check(options, "--tm") + options = self.help_check(options, "-s MIN_SIMILARITY, --similarity=MIN_SIMILARITY") + options = self.help_check(options, "--nofuzzymatching", last=True) + |