#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2008 Mozilla Corporation, Zuza Software Foundation
#
# This file is part of translate.
#
# translate is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# translate is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with translate; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
"""Class that manages TikiWiki files for translation. Tiki files are ugly and
inconsistent formatted as a single large PHP array with several special
sections identified by comments. Example current as of 2008-12-01::
"zzz",
// ### end of unused words
// ### start of untranslated words
// "bbb" => "yyy",
// ### end of untranslated words
// ### start of possibly untranslated words
"ccc" => "xxx",
// ### end of possibly untranslated words
"ddd" => "www",
"###end###"=>"###end###");
?>
In addition there are several auto-generated //-style comments scattered through the
page and array, some of which matter when being parsed.
This has all been gleaned from the
U{TikiWiki source}.
As far as I know no detailed documentation exists for the tiki language.php files.
"""
from translate.storage import base
from translate.misc import wStringIO
import re
import datetime
class TikiUnit(base.TranslationUnit):
"""A tiki unit entry."""
def __init__(self, source=None, encoding="UTF-8"):
self.location = []
super(TikiUnit, self).__init__(source)
def __unicode__(self):
"""Returns a string formatted to be inserted into a tiki language.php file."""
ret = u'"%s" => "%s",' % (self.source, self.target)
if self.location == ["untranslated"]:
ret = u'// ' + ret
return ret + "\n"
def addlocation(self, location):
"""Location is defined by the comments in the file. This function will only
set valid locations.
@param location: Where the string is located in the file. Must be a valid location.
"""
if location in ['unused', 'untranslated', 'possiblyuntranslated', 'translated']:
self.location.append(location)
def getlocations(self):
"""Returns the a list of the location(s) of the string."""
return self.location
class TikiStore(base.TranslationStore):
"""Represents a tiki language.php file."""
def __init__(self, inputfile=None):
"""If an inputfile is specified it will be parsed.
@param inputfile: Either a string or a filehandle of the source file
"""
base.TranslationStore.__init__(self, TikiUnit)
self.units = []
self.filename = getattr(inputfile, 'name', '')
if inputfile is not None:
self.parse(inputfile)
def __str__(self):
"""Will return a formatted tiki-style language.php file."""
_unused = []
_untranslated = []
_possiblyuntranslated = []
_translated = []
output = self._tiki_header()
# Reorder all the units into their groups
for unit in self.units:
if unit.getlocations() == ["unused"]:
_unused.append(unit)
elif unit.getlocations() == ["untranslated"]:
_untranslated.append(unit)
elif unit.getlocations() == ["possiblyuntranslated"]:
_possiblyuntranslated.append(unit)
else:
_translated.append(unit)
output += "// ### Start of unused words\n"
for unit in _unused:
output += unicode(unit)
output += "// ### end of unused words\n\n"
output += "// ### start of untranslated words\n"
for unit in _untranslated:
output += unicode(unit)
output += "// ### end of untranslated words\n\n"
output += "// ### start of possibly untranslated words\n"
for unit in _possiblyuntranslated:
output += unicode(unit)
output += "// ### end of possibly untranslated words\n\n"
for unit in _translated:
output += unicode(unit)
output += self._tiki_footer()
return output.encode('UTF-8')
def _tiki_header(self):
"""Returns a tiki-file header string."""
return u""###end###");\n?>'
def parse(self, input):
"""Parse the given input into source units.
@param input: the source, either a string or filehandle
"""
if hasattr(input, "name"):
self.filename = input.name
if isinstance(input, str):
input = wStringIO.StringIO(input)
_split_regex = re.compile(r"^(?:// )?\"(.*)\" => \"(.*)\",$", re.UNICODE)
try:
_location = "translated"
for line in input:
# The tiki file fails to identify each section so we have to look for start and end
# points and if we're outside of them we assume the string is translated
if line.count("### Start of unused words"):
_location = "unused"
elif line.count("### start of untranslated words"):
_location = "untranslated"
elif line.count("### start of possibly untranslated words"):
_location = "possiblyuntranslated"
elif line.count("### end of unused words"):
_location = "translated"
elif line.count("### end of untranslated words"):
_location = "translated"
elif line.count("### end of possibly untranslated words"):
_location = "translated"
match = _split_regex.match(line)
if match:
unit = self.addsourceunit("".join(match.group(1)))
# Untranslated words get an empty msgstr
if not _location == "untranslated":
unit.settarget(match.group(2))
unit.addlocation(_location)
finally:
input.close()