#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2008-2009 Zuza Software Foundation
#
# This file is part of the Translate Toolkit.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see .
"""Module for handling Qt linguist (.ts) files.
This will eventually replace the older ts.py which only supports the older
format. While converters haven't been updated to use this module, we retain
both.
U{TS file format 4.3},
U{http://doc.trolltech.com/4.5/linguist-ts-file-format.html>},
U{Example},
U{Plurals forms}
U{Specification of the valid variable entries },
U{2 }
"""
from translate.storage import base, lisa
from translate.storage.placeables import general, StringElem
from translate.misc.multistring import multistring
from translate.lang import data
from lxml import etree
# TODO: handle translation types
NPLURALS = {
'jp': 1,
'en': 2,
'fr': 2,
'lv': 3,
'ga': 3,
'cs': 3,
'sk': 3,
'mk': 3,
'lt': 3,
'ru': 3,
'pl': 3,
'ro': 3,
'sl': 4,
'mt': 4,
'cy': 5,
'ar': 6,
}
class tsunit(lisa.LISAunit):
"""A single term in the xliff file."""
rootNode = "message"
languageNode = "source"
textNode = ""
namespace = ''
rich_parsers = general.parsers
def createlanguageNode(self, lang, text, purpose):
"""Returns an xml Element setup with given parameters."""
assert purpose
if purpose == "target":
purpose = "translation"
langset = etree.Element(self.namespaced(purpose))
#TODO: check language
# lisa.setXMLlang(langset, lang)
langset.text = text
return langset
def _getsourcenode(self):
return self.xmlelement.find(self.namespaced(self.languageNode))
def _gettargetnode(self):
return self.xmlelement.find(self.namespaced("translation"))
def getlanguageNodes(self):
"""We override this to get source and target nodes."""
def not_none(node):
return not node is None
return filter(not_none, [self._getsourcenode(), self._gettargetnode()])
def getsource(self):
# TODO: support . See bug 528.
sourcenode = self._getsourcenode()
if self.hasplural():
return multistring([sourcenode.text])
else:
return data.forceunicode(sourcenode.text)
source = property(getsource, lisa.LISAunit.setsource)
rich_source = property(base.TranslationUnit._get_rich_source, base.TranslationUnit._set_rich_source)
def settarget(self, text):
# This is a fairly destructive implementation. Don't assume that this
# is necessarily correct in all regards, but it does deal with a lot of
# cases. It is hard to deal with plurals, since
#Firstly deal with reinitialising to None or setting to identical string
if self.gettarget() == text:
return
strings = []
if isinstance(text, multistring):
strings = text.strings
elif isinstance(text, list):
strings = text
else:
strings = [text]
targetnode = self._gettargetnode()
type = targetnode.get("type")
targetnode.clear()
if type:
targetnode.set("type", type)
if self.hasplural() or len(strings) > 1:
self.xmlelement.set("numerus", "yes")
for string in strings:
numerus = etree.SubElement(targetnode, self.namespaced("numerusform"))
numerus.text = data.forceunicode(string) or u""
else:
targetnode.text = data.forceunicode(text) or u""
def gettarget(self):
targetnode = self._gettargetnode()
if targetnode is None:
etree.SubElement(self.xmlelement, self.namespaced("translation"))
return None
if self.hasplural():
numerus_nodes = targetnode.findall(self.namespaced("numerusform"))
return multistring([node.text or u"" for node in numerus_nodes])
else:
return data.forceunicode(targetnode.text) or u""
target = property(gettarget, settarget)
rich_target = property(base.TranslationUnit._get_rich_target, base.TranslationUnit._set_rich_target)
def hasplural(self):
return self.xmlelement.get("numerus") == "yes"
def addnote(self, text, origin=None):
"""Add a note specifically in a "comment" tag"""
if isinstance(text, str):
text = text.decode("utf-8")
current_notes = self.getnotes(origin)
self.removenotes()
note = etree.SubElement(self.xmlelement, self.namespaced("comment"))
note.text = "\n".join(filter(None, [current_notes, text.strip()]))
def getnotes(self, origin=None):
#TODO: consider only responding when origin has certain values
notenode = self.xmlelement.find(self.namespaced("comment"))
comment = ''
if not notenode is None:
comment = notenode.text
return comment
def removenotes(self):
"""Remove all the translator notes."""
note = self.xmlelement.find(self.namespaced("comment"))
if not note is None:
self.xmlelement.remove(note)
def _gettype(self):
"""Returns the type of this translation."""
targetnode = self._gettargetnode()
if targetnode is not None:
return targetnode.get("type")
return None
def _settype(self, value=None):
"""Set the type of this translation."""
if value is None and self._gettype:
# lxml recommends against using .attrib, but there seems to be no
# other way
self._gettargetnode().attrib.pop("type")
else:
self._gettargetnode().set("type", value)
def isreview(self):
"""States whether this unit needs to be reviewed"""
return self._gettype() == "unfinished"
def isfuzzy(self):
return self._gettype() == "unfinished"
def markfuzzy(self, value=True):
if value:
self._settype("unfinished")
else:
self._settype(None)
def getid(self):
context_name = self.getcontext()
#XXX: context_name is not supposed to be able to be None (the
# tag is compulsary in the tag)
if context_name is not None:
return context_name + self.source
else:
return self.source
def getcontext(self):
return self.xmlelement.getparent().find("name").text
def addlocation(self, location):
if isinstance(location, str):
text = text.decode("utf-8")
location = etree.SubElement(self.xmlelement, self.namespaced("location"))
filename, line = location.split(':', 1)
location.set("filename", filename)
location.set("line", line or "")
def getlocations(self):
location = self.xmlelement.find(self.namespaced("location"))
if location is None:
return []
else:
return [':'.join([location.get("filename"), location.get("line")])]
def merge(self, otherunit, overwrite=False, comments=True):
super(tsunit, self).merge(otherunit, overwrite, comments)
#TODO: check if this is necessary:
if otherunit.isfuzzy():
self.markfuzzy()
def isobsolete(self):
return self._gettype() == "obsolete"
class tsfile(lisa.LISAfile):
"""Class representing a XLIFF file store."""
UnitClass = tsunit
Name = _("Qt Linguist Translation File")
Mimetypes = ["application/x-linguist"]
Extensions = ["ts"]
rootNode = "TS"
# We will switch out .body to fit with the context we are working on
bodyNode = "context"
XMLskeleton = '''
'''
namespace = ''
def __init__(self, *args, **kwargs):
self._contextname = None
lisa.LISAfile.__init__(self, *args, **kwargs)
def initbody(self):
"""Initialises self.body."""
self.namespace = self.document.getroot().nsmap.get(None, None)
if self._contextname:
self.body = self.getcontextnode(self._contextname)
else:
self.body = self.document.getroot()
def gettargetlanguage(self):
"""Get the target language for this .ts file.
@return: ISO code e.g. af, fr, pt_BR
@rtype: String
"""
return self.body.get('language')
def settargetlanguage(self, targetlanguage):
"""Set the target language for this .ts file to L{targetlanguage}.
@param targetlanguage: ISO code e.g. af, fr, pt_BR
@type targetlanguage: String
"""
if targetlanguage:
self.body.set('language', targetlanguage)
def _createcontext(self, contextname, comment=None):
"""Creates a context node with an optional comment"""
context = etree.SubElement(self.document.getroot(), self.namespaced(self.bodyNode))
name = etree.SubElement(context, self.namespaced("name"))
name.text = contextname
if comment:
comment_node = context.SubElement(context, "comment")
comment_node.text = comment
return context
def _getcontextname(self, contextnode):
"""Returns the name of the given context node."""
return filenode.find(self.namespaced("name")).text
def _getcontextnames(self):
"""Returns all contextnames in this TS file."""
contextnodes = self.document.findall(self.namespaced("context"))
contextnames = [self.getcontextname(contextnode) for contextnode in contextnodes]
return contextnames
def _getcontextnode(self, contextname):
"""Returns the context node with the given name."""
contextnodes = self.document.findall(self.namespaced("context"))
for contextnode in contextnodes:
if self.getcontextname(contextnode) == contextname:
return contextnode
return None
def addunit(self, unit, new=True, contextname=None, createifmissing=False):
"""Adds the given unit to the last used body node (current context).
If the contextname is specified, switch to that context (creating it
if allowed by createifmissing)."""
if self._contextname != contextname:
if not self._switchcontext(contextname, createifmissing):
return None
super(tsfile, self).addunit(unit, new)
# lisa.setXMLspace(unit.xmlelement, "preserve")
return unit
def _switchcontext(self, contextname, createifmissing=False):
"""Switch the current context to the one named contextname, optionally
creating it if it doesn't exist."""
self._contextname = contextname
contextnode = self._getcontextnode(contextname)
if contextnode is None:
if not createifmissing:
return False
contextnode = self._createcontext(contextname)
self.body = contextnode
if self.body is None:
return False
return True
def nplural(self):
lang = self.body.get("language")
if NPLURALS.has_key(lang):
return NPLURALS[lang]
else:
return 1
def __str__(self):
"""Converts to a string containing the file's XML.
We have to override this to ensure mimic the Qt convention:
- no XML decleration
- plain DOCTYPE that lxml seems to ignore
"""
# A bug in lxml means we have to output the doctype ourselves. For
# more information, see:
# http://codespeak.net/pipermail/lxml-dev/2008-October/004112.html
# The problem was fixed in lxml 2.1.3
output = etree.tostring(self.document, pretty_print=True,
xml_declaration=False, encoding='utf-8')
if not "" in output[:30]:
output = "" + output
return output