Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/translate-toolkit-1.5.1/translate/storage/tiki.py
blob: 50884620afc20131c5f64f8b23c885686dce4d5c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# 
# Copyright 2008 Mozilla Corporation, Zuza Software Foundation
#
# This file is part of translate.
#
# translate is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# 
# translate is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with translate; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

"""Class that manages TikiWiki files for translation.  Tiki files are <strike>ugly and
inconsistent</strike> formatted as a single large PHP array with several special 
sections identified by comments.  Example current as of 2008-12-01::

  <?php
    // Many comments at the top
    $lang=Array(
    // ### Start of unused words
    "aaa" => "zzz",
    // ### end of unused words
    
    // ### start of untranslated words
    // "bbb" => "yyy",
    // ### end of untranslated words
    
    // ### start of possibly untranslated words
    "ccc" => "xxx",
    // ### end of possibly untranslated words
    
    "ddd" => "www",
    "###end###"=>"###end###");
  ?>

In addition there are several auto-generated //-style comments scattered through the 
page and array, some of which matter when being parsed.

This has all been gleaned from the 
U{TikiWiki source<http://tikiwiki.svn.sourceforge.net/viewvc/tikiwiki/trunk/get_strings.php?view=markup>}.
As far as I know no detailed documentation exists for the tiki language.php files.

"""

from translate.storage import base
from translate.misc import wStringIO
import re
import datetime

class TikiUnit(base.TranslationUnit):
    """A tiki unit entry."""
    def __init__(self, source=None, encoding="UTF-8"):
        self.location = []
        super(TikiUnit, self).__init__(source)

    def __unicode__(self):
        """Returns a string formatted to be inserted into a tiki language.php file."""
        ret = u'"%s" => "%s",' % (self.source, self.target)
        if self.location == ["untranslated"]:
            ret = u'// ' + ret
        return ret + "\n"

    def addlocation(self, location):
        """Location is defined by the comments in the file. This function will only
        set valid locations.
        
        @param location: Where the string is located in the file.  Must be a valid location.
        """
        if location in ['unused', 'untranslated', 'possiblyuntranslated', 'translated']:
            self.location.append(location)

    def getlocations(self):
        """Returns the a list of the location(s) of the string."""
        return self.location

class TikiStore(base.TranslationStore):
    """Represents a tiki language.php file."""
    def __init__(self, inputfile=None):
        """If an inputfile is specified it will be parsed.

        @param inputfile: Either a string or a filehandle of the source file
        """
        base.TranslationStore.__init__(self, TikiUnit)
        self.units = []
        self.filename = getattr(inputfile, 'name', '')
        if inputfile is not None:
            self.parse(inputfile)

    def __str__(self):
        """Will return a formatted tiki-style language.php file."""
        _unused = []
        _untranslated = []
        _possiblyuntranslated = []
        _translated = []

        output = self._tiki_header()

        # Reorder all the units into their groups
        for unit in self.units:
            if unit.getlocations() == ["unused"]:
                _unused.append(unit)
            elif unit.getlocations() == ["untranslated"]:
                _untranslated.append(unit)
            elif unit.getlocations() == ["possiblyuntranslated"]:
                _possiblyuntranslated.append(unit)
            else:
                _translated.append(unit)

        output += "// ### Start of unused words\n"
        for unit in _unused:
            output += unicode(unit)
        output += "// ### end of unused words\n\n"
        output += "// ### start of untranslated words\n"
        for unit in _untranslated:
            output += unicode(unit)
        output += "// ### end of untranslated words\n\n"
        output += "// ### start of possibly untranslated words\n"
        for unit in _possiblyuntranslated:
            output += unicode(unit)
        output += "// ### end of possibly untranslated words\n\n"
        for unit in _translated:
            output += unicode(unit)

        output += self._tiki_footer()
        return output.encode('UTF-8')

    def _tiki_header(self):
        """Returns a tiki-file header string."""
        return u"<?php // -*- coding:utf-8 -*-\n// Generated from po2tiki on %s\n\n$lang=Array(\n" % datetime.datetime.now()

    def _tiki_footer(self):
        """Returns a tiki-file footer string."""
        return u'"###end###"=>"###end###");\n?>'

    def parse(self, input):
        """Parse the given input into source units.
        
        @param input: the source, either a string or filehandle
        """
        if hasattr(input, "name"):
            self.filename = input.name

        if isinstance(input, str):
            input = wStringIO.StringIO(input)

        _split_regex = re.compile(r"^(?:// )?\"(.*)\" => \"(.*)\",$", re.UNICODE)

        try:
            _location = "translated"

            for line in input:
                # The tiki file fails to identify each section so we have to look for start and end
                # points and if we're outside of them we assume the string is translated
                if line.count("### Start of unused words"):
                    _location = "unused"
                elif line.count("### start of untranslated words"):
                    _location = "untranslated"
                elif line.count("### start of possibly untranslated words"):
                    _location = "possiblyuntranslated"
                elif line.count("### end of unused words"):
                    _location = "translated"
                elif line.count("### end of untranslated words"):
                    _location = "translated"
                elif line.count("### end of possibly untranslated words"):
                    _location = "translated"

                match = _split_regex.match(line)

                if match:
                    unit = self.addsourceunit("".join(match.group(1)))
                    # Untranslated words get an empty msgstr
                    if not _location == "untranslated":
                        unit.settarget(match.group(2))
                    unit.addlocation(_location)
        finally:
            input.close()