Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/translate-toolkit-1.5.1/translate/storage/tmx.py
blob: 143550d8c360fbad9c6938d5f2618d6c54a2e077 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2005-2009 Zuza Software Foundation
#
# This file is part of the Translate Toolkit.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>.

"""module for parsing TMX translation memeory files"""

from translate.storage import lisa
from lxml import etree

from translate import __version__

class tmxunit(lisa.LISAunit):
    """A single unit in the TMX file."""
    rootNode = "tu"
    languageNode = "tuv"
    textNode = "seg"

    def createlanguageNode(self, lang, text, purpose):
        """returns a langset xml Element setup with given parameters"""
        if isinstance(text, str):
            text = text.decode("utf-8")
        langset = etree.Element(self.languageNode)
        lisa.setXMLlang(langset, lang)
        seg = etree.SubElement(langset, self.textNode)
        # implied by the standard:
        # lisa.setXMLspace(seg, "preserve")
        seg.text = text
        return langset

    def getid(self):
        """Returns the identifier for this unit. The optional tuid property is
        used if available, otherwise we inherit .getid(). Note that the tuid
        property is only mandated to be unique from TMX 2.0."""
        id = self.xmlelement.get("tuid", "")
        return id or super(tmxunit, self).getid()

    def istranslatable(self):
        return bool(self.source)

    def addnote(self, text, origin=None):
        """Add a note specifically in a "note" tag.

        The origin parameter is ignored"""
        if isinstance(text, str):
            text = text.decode("utf-8")
        note = etree.SubElement(self.xmlelement, self.namespaced("note"))
        note.text = text.strip()

    def getnotelist(self, origin=None):
        """Private method that returns the text from notes.

        The origin parameter is ignored.."""
        note_nodes = self.xmlelement.iterdescendants(self.namespaced("note"))
        note_list = [lisa.getText(note) for note in note_nodes]

        return note_list

    def getnotes(self, origin=None):
        return '\n'.join(self.getnotelist(origin=origin))

    def removenotes(self):
        """Remove all the translator notes."""
        notes = self.xmlelement.iterdescendants(self.namespaced("note"))
        for note in notes:
            self.xmlelement.remove(note)

    def adderror(self, errorname, errortext):
        """Adds an error message to this unit."""
        #TODO: consider factoring out: some duplication between XLIFF and TMX
        text = errorname + ': ' + errortext
        self.addnote(text, origin="pofilter")

    def geterrors(self):
        """Get all error messages."""
        #TODO: consider factoring out: some duplication between XLIFF and TMX
        notelist = self.getnotelist(origin="pofilter")
        errordict = {}
        for note in notelist:
            errorname, errortext = note.split(': ')
            errordict[errorname] = errortext
        return errordict

    def copy(self):
        """Make a copy of the translation unit.

        We don't want to make a deep copy - this could duplicate the whole XML
        tree. For now we just serialise and reparse the unit's XML."""
        #TODO: check performance
        new_unit = self.__class__(None, empty=True)
        new_unit.xmlelement = etree.fromstring(etree.tostring(self.xmlelement))
        return new_unit


class tmxfile(lisa.LISAfile):
    """Class representing a TMX file store."""
    UnitClass = tmxunit
    Name = _("TMX Translation Memory")
    Mimetypes  = ["application/x-tmx"]
    Extensions = ["tmx"]
    rootNode = "tmx"
    bodyNode = "body"
    XMLskeleton = '''<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE tmx SYSTEM "tmx14.dtd">
<tmx version="1.4">
<header></header>
<body></body>
</tmx>'''

    def addheader(self):
        headernode = self.document.getroot().iterchildren(self.namespaced("header")).next()
        headernode.set("creationtool", "Translate Toolkit - po2tmx")
        headernode.set("creationtoolversion", __version__.sver)
        headernode.set("segtype", "sentence")
        headernode.set("o-tmf", "UTF-8")
        headernode.set("adminlang", "en")
        #TODO: consider adminlang. Used for notes, etc. Possibly same as targetlanguage
        headernode.set("srclang", self.sourcelanguage)
        headernode.set("datatype", "PlainText")
        #headernode.set("creationdate", "YYYYMMDDTHHMMSSZ"
        #headernode.set("creationid", "CodeSyntax"

    def addtranslation(self, source, srclang, translation, translang):
        """addtranslation method for testing old unit tests"""
        unit = self.addsourceunit(source)
        unit.target = translation
        tuvs = unit.xmlelement.iterdescendants(self.namespaced('tuv'))
        lisa.setXMLlang(tuvs.next(), srclang)
        lisa.setXMLlang(tuvs.next(), translang)

    def translate(self, sourcetext, sourcelang=None, targetlang=None):
        """method to test old unit tests"""
        return getattr(self.findunit(sourcetext), "target", None)