translate-toolkit-1.5.1/translate/storage/properties.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# 
# Copyright 2004-2006 Zuza Software Foundation
# 
# This file is part of translate.
#
# translate is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# 
# translate is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with translate; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

"""classes that hold units of .properties files (propunit) or entire files
   (propfile) these files are used in translating Mozilla and other software
   
   The following U{.properties file
   description<http://java.sun.com/j2se/1.4.2/docs/api/java/util/Properties.html#load(java.io.InputStream)>}
   and U{example <http://www.exampledepot.com/egs/java.util/Props.html>} give some
   good references to the .properties specification.

   Properties file may also hold Java
   U{MessageFormat<http://java.sun.com/j2se/1.4.2/docs/api/java/text/MessageFormat.html>} 
   messages.  No special handling is provided in this storage class for MessageFormat,
   but this may be implemented in future.

   Implementation
   ==============
   A simple summary of what is permissible follows.
   
   Comments::
     # a comment
     ! a comment
       
   Name and Value pairs::
     # Note that the b and c are escaped for epydoc rendering
     a = a string
     d.e.f = another string
     b = a string with escape sequences \\t \\n \\r \\\\ \\" \\' \\ (space) \u0123
     c = a string with a continuation line \\
         continuation line
"""

from translate.storage import base
from translate.misc import quote
from translate.lang import data
import re

# the rstripeols convert dos <-> unix nicely as well
# output will be appropriate for the platform

eol = "\n"

def find_delimeter(line):
    """Find the type and position of the delimeter in a property line.

    Property files can be delimeted by "=", ":" or whitespace (space for now).
    We find the position of each delimeter, then find the one that appears 
    first.

    @param line: A properties line
    @type line: str
    @return: Delimeter character and offset within L{line}
    @rtype: Tuple (Delimeter char, Offset Integer)
    """
    delimeters = {"=": -1, ":": -1, " ": -1}
    # Find the position of each delimeter type
    for delimeter, pos in delimeters.iteritems():
        prewhitespace = len(line) - len(line.lstrip())
        pos = line.find(delimeter, prewhitespace)
        while pos != -1:
            if delimeters[delimeter] == -1 and line[pos-1] != "\\":
                delimeters[delimeter] = pos
                break
            pos = line.find(delimeter, pos+1)
    # Find the first "=" or ":" delimeter
    mindelimeter = None
    minpos = -1
    for delimeter, pos in delimeters.iteritems():
        if pos == -1 or delimeter == " ":
            continue
        if minpos == -1 or pos < minpos:
            minpos = pos
            mindelimeter = delimeter
    if mindelimeter is None and delimeters[" "] != -1:
        # Use space delimeter if we found nothing else
        return (" ", delimeters[" "])
    if mindelimeter is not None and delimeters[" "] < delimeters[mindelimeter]:
        # If space delimeter occurs earlier then ":" or "=" then it is the 
        # delimeter only if there are non-whitespace characters between it and
        # the other detected delimeter.
        if len(line[delimeters[" "]:delimeters[mindelimeter]].strip()) > 0:
            return (" ", delimeters[" "])
    return (mindelimeter, minpos)

def is_line_continuation(line):
    """Determine whether L{line} has a line continuation marker.

    .properties files can be terminated with a backslash (\\) indicating
    that the 'value' continues on the next line.  Continuation is only 
    valid if there are an odd number of backslashses (an even number
    would result in a set of N/2 slashes not an escape)

    @param line: A properties line
    @type line: str
    @return: Does L{line} end with a line continuation
    @rtype: Boolean
    """
    pos = -1
    count = 0
    if len(line) == 0:
        return False
    # Count the slashes from the end of the line. Ensure we don't
    # go into infinite loop.
    while len(line) >= -pos and line[pos:][0] == "\\":
        pos -= 1
        count += 1
    return (count % 2) == 1  # Odd is a line continuation, even is not

def key_strip(key):
    """Cleanup whitespace found around a key

    @param key: A properties key
    @type key: str
    @return: Key without any uneeded whitespace
    @rtype: str
    """
    newkey = key.rstrip()
    # If line now end in \ we put back the whitespace that was escaped
    if newkey[-1:] == "\\":
        newkey += key[len(newkey):len(newkey)+1]
    return newkey.lstrip()

default_encoding = {"java": "latin1", "mozilla": "utf-8"}

class propunit(base.TranslationUnit):
    """an element of a properties file i.e. a name and value, and any comments
    associated"""
    def __init__(self, source="", personality="java"):
        """construct a blank propunit"""
        self.personality = personality
        super(propunit, self).__init__(source)
        self.name = ""
        self.value = u""
        self.delimeter = u"="
        self.comments = []
        self.source = source

    def setsource(self, source):
        """Sets the source AND the target to be equal"""
        source = data.forceunicode(source)
        if self.personality == "mozilla":
            self.value = quote.mozillapropertiesencode(source or u"")
        else:
            self.value = quote.javapropertiesencode(source or u"")

    def getsource(self):
        value = quote.propertiesdecode(self.value)
        value = re.sub(u"\\\\ ", u" ", value)
        return value

    source = property(getsource, setsource)

    def settarget(self, target):
        """Note: this also sets the .source attribute!"""
        self.source = target

    def gettarget(self):
        return self.source
    target = property(gettarget, settarget)

    def __str__(self):
        """convert to a string. double check that unicode is handled somehow here"""
        source = self.getoutput()
        if isinstance(source, unicode):
            return source.encode(default_encoding[self.personality])
        return source

    def getoutput(self):
        """convert the element back into formatted lines for a .properties file"""
        notes = self.getnotes()
        if notes:
            notes += u"\n"
        if self.isblank():
            return notes
        else:
            if "\\u" in self.value and self.personality == "mozilla":
                self.value = quote.mozillapropertiesencode(self.source)
            return u"%s%s%s%s\n" % (notes, self.name, self.delimeter, self.value)

    def getlocations(self):
        return [self.name]

    def addnote(self, note, origin=None):
        note = data.forceunicode(note)
        self.comments.append(note)

    def getnotes(self, origin=None):
        return u'\n'.join(self.comments)

    def removenotes(self):
        self.comments = []

    def isblank(self):
        """returns whether this is a blank element, containing only comments..."""
        return not (self.name or self.value)

class propfile(base.TranslationStore):
    """this class represents a .properties file, made up of propunits"""
    UnitClass = propunit
    def __init__(self, inputfile=None, personality="java"):
        """construct a propfile, optionally reading in from inputfile"""
        super(propfile, self).__init__(unitclass = self.UnitClass)
        self.filename = getattr(inputfile, 'name', '')
        if inputfile is not None:
            propsrc = inputfile.read()
            inputfile.close()
            self.parse(propsrc, personality)

    def parse(self, propsrc, personality="java"):
        """read the source of a properties file in and include them as units"""
        newunit = propunit("", personality)
        inmultilinevalue = False
        if personality == "mozilla":
            propsrc = unicode(propsrc, 'utf-8')
        else:
            propsrc = unicode(propsrc, 'latin1')
        for line in propsrc.split(u"\n"):
            # handle multiline value if we're in one
            line = quote.rstripeol(line)
            if inmultilinevalue:
                newunit.value += line.lstrip()
                # see if there's more
                inmultilinevalue = is_line_continuation(newunit.value)
                # if we're still waiting for more...
                if inmultilinevalue:
                    # strip the backslash
                    newunit.value = newunit.value[:-1]
                if not inmultilinevalue:
                    # we're finished, add it to the list...
                    self.addunit(newunit)
                    newunit = propunit("", personality)
            # otherwise, this could be a comment
            elif line.strip()[:1] in (u'#', u'!'):
                # add a comment
                newunit.comments.append(line)
            elif not line.strip():
                # this is a blank line...
                if str(newunit).strip():
                    self.addunit(newunit)
                    newunit = propunit("", personality)
            else:
                delimeter_char, delimeter_pos = find_delimeter(line)
                if delimeter_pos == -1:
                    continue
                # otherwise, this is a definition
                else:
                    newunit.delimeter = delimeter_char
                    newunit.name = key_strip(line[:delimeter_pos])
                    newunit.value = line[delimeter_pos+1:].lstrip()
                    # backslash at end means carry string on to next line
                    if is_line_continuation(newunit.value):
                        inmultilinevalue = True
                        newunit.value = newunit.value[:-1]
                    else:
                        self.addunit(newunit)
                        newunit = propunit("", personality)
        # see if there is a leftover one...
        if inmultilinevalue or len(newunit.comments) > 0:
            self.addunit(newunit)

    def __str__(self):
        """convert the units back to lines"""
        lines = []
        for unit in self.units:
            lines.append(str(unit))
        return "".join(lines)