#!/usr/bin/env python # -*- coding: utf-8 -*- # # Copyright 2005-2009 Zuza Software Foundation # # This file is part of the Translate Toolkit. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, see . """module for parsing TMX translation memeory files""" from translate.storage import lisa from lxml import etree from translate import __version__ class tmxunit(lisa.LISAunit): """A single unit in the TMX file.""" rootNode = "tu" languageNode = "tuv" textNode = "seg" def createlanguageNode(self, lang, text, purpose): """returns a langset xml Element setup with given parameters""" if isinstance(text, str): text = text.decode("utf-8") langset = etree.Element(self.languageNode) lisa.setXMLlang(langset, lang) seg = etree.SubElement(langset, self.textNode) # implied by the standard: # lisa.setXMLspace(seg, "preserve") seg.text = text return langset def getid(self): """Returns the identifier for this unit. The optional tuid property is used if available, otherwise we inherit .getid(). Note that the tuid property is only mandated to be unique from TMX 2.0.""" id = self.xmlelement.get("tuid", "") return id or super(tmxunit, self).getid() def istranslatable(self): return bool(self.source) def addnote(self, text, origin=None): """Add a note specifically in a "note" tag. The origin parameter is ignored""" if isinstance(text, str): text = text.decode("utf-8") note = etree.SubElement(self.xmlelement, self.namespaced("note")) note.text = text.strip() def getnotelist(self, origin=None): """Private method that returns the text from notes. The origin parameter is ignored..""" note_nodes = self.xmlelement.iterdescendants(self.namespaced("note")) note_list = [lisa.getText(note) for note in note_nodes] return note_list def getnotes(self, origin=None): return '\n'.join(self.getnotelist(origin=origin)) def removenotes(self): """Remove all the translator notes.""" notes = self.xmlelement.iterdescendants(self.namespaced("note")) for note in notes: self.xmlelement.remove(note) def adderror(self, errorname, errortext): """Adds an error message to this unit.""" #TODO: consider factoring out: some duplication between XLIFF and TMX text = errorname + ': ' + errortext self.addnote(text, origin="pofilter") def geterrors(self): """Get all error messages.""" #TODO: consider factoring out: some duplication between XLIFF and TMX notelist = self.getnotelist(origin="pofilter") errordict = {} for note in notelist: errorname, errortext = note.split(': ') errordict[errorname] = errortext return errordict def copy(self): """Make a copy of the translation unit. We don't want to make a deep copy - this could duplicate the whole XML tree. For now we just serialise and reparse the unit's XML.""" #TODO: check performance new_unit = self.__class__(None, empty=True) new_unit.xmlelement = etree.fromstring(etree.tostring(self.xmlelement)) return new_unit class tmxfile(lisa.LISAfile): """Class representing a TMX file store.""" UnitClass = tmxunit Name = _("TMX Translation Memory") Mimetypes = ["application/x-tmx"] Extensions = ["tmx"] rootNode = "tmx" bodyNode = "body" XMLskeleton = '''
''' def addheader(self): headernode = self.document.getroot().iterchildren(self.namespaced("header")).next() headernode.set("creationtool", "Translate Toolkit - po2tmx") headernode.set("creationtoolversion", __version__.sver) headernode.set("segtype", "sentence") headernode.set("o-tmf", "UTF-8") headernode.set("adminlang", "en") #TODO: consider adminlang. Used for notes, etc. Possibly same as targetlanguage headernode.set("srclang", self.sourcelanguage) headernode.set("datatype", "PlainText") #headernode.set("creationdate", "YYYYMMDDTHHMMSSZ" #headernode.set("creationid", "CodeSyntax" def addtranslation(self, source, srclang, translation, translang): """addtranslation method for testing old unit tests""" unit = self.addsourceunit(source) unit.target = translation tuvs = unit.xmlelement.iterdescendants(self.namespaced('tuv')) lisa.setXMLlang(tuvs.next(), srclang) lisa.setXMLlang(tuvs.next(), translang) def translate(self, sourcetext, sourcelang=None, targetlang=None): """method to test old unit tests""" return getattr(self.findunit(sourcetext), "target", None)