#!/usr/bin/env python # -*- coding: utf-8 -*- # # Copyright 2004-2006 Zuza Software Foundation # # This file is part of translate. # # translate is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # translate is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with translate; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA """classes that hold units of .properties files (propunit) or entire files (propfile) these files are used in translating Mozilla and other software The following U{.properties file description} and U{example } give some good references to the .properties specification. Properties file may also hold Java U{MessageFormat} messages. No special handling is provided in this storage class for MessageFormat, but this may be implemented in future. Implementation ============== A simple summary of what is permissible follows. Comments:: # a comment ! a comment Name and Value pairs:: # Note that the b and c are escaped for epydoc rendering a = a string d.e.f = another string b = a string with escape sequences \\t \\n \\r \\\\ \\" \\' \\ (space) \u0123 c = a string with a continuation line \\ continuation line """ from translate.storage import base from translate.misc import quote from translate.lang import data import re # the rstripeols convert dos <-> unix nicely as well # output will be appropriate for the platform eol = "\n" def find_delimeter(line): """Find the type and position of the delimeter in a property line. Property files can be delimeted by "=", ":" or whitespace (space for now). We find the position of each delimeter, then find the one that appears first. @param line: A properties line @type line: str @return: Delimeter character and offset within L{line} @rtype: Tuple (Delimeter char, Offset Integer) """ delimeters = {"=": -1, ":": -1, " ": -1} # Find the position of each delimeter type for delimeter, pos in delimeters.iteritems(): prewhitespace = len(line) - len(line.lstrip()) pos = line.find(delimeter, prewhitespace) while pos != -1: if delimeters[delimeter] == -1 and line[pos-1] != "\\": delimeters[delimeter] = pos break pos = line.find(delimeter, pos+1) # Find the first "=" or ":" delimeter mindelimeter = None minpos = -1 for delimeter, pos in delimeters.iteritems(): if pos == -1 or delimeter == " ": continue if minpos == -1 or pos < minpos: minpos = pos mindelimeter = delimeter if mindelimeter is None and delimeters[" "] != -1: # Use space delimeter if we found nothing else return (" ", delimeters[" "]) if mindelimeter is not None and delimeters[" "] < delimeters[mindelimeter]: # If space delimeter occurs earlier then ":" or "=" then it is the # delimeter only if there are non-whitespace characters between it and # the other detected delimeter. if len(line[delimeters[" "]:delimeters[mindelimeter]].strip()) > 0: return (" ", delimeters[" "]) return (mindelimeter, minpos) def is_line_continuation(line): """Determine whether L{line} has a line continuation marker. .properties files can be terminated with a backslash (\\) indicating that the 'value' continues on the next line. Continuation is only valid if there are an odd number of backslashses (an even number would result in a set of N/2 slashes not an escape) @param line: A properties line @type line: str @return: Does L{line} end with a line continuation @rtype: Boolean """ pos = -1 count = 0 if len(line) == 0: return False # Count the slashes from the end of the line. Ensure we don't # go into infinite loop. while len(line) >= -pos and line[pos:][0] == "\\": pos -= 1 count += 1 return (count % 2) == 1 # Odd is a line continuation, even is not def key_strip(key): """Cleanup whitespace found around a key @param key: A properties key @type key: str @return: Key without any uneeded whitespace @rtype: str """ newkey = key.rstrip() # If line now end in \ we put back the whitespace that was escaped if newkey[-1:] == "\\": newkey += key[len(newkey):len(newkey)+1] return newkey.lstrip() default_encoding = {"java": "latin1", "mozilla": "utf-8"} class propunit(base.TranslationUnit): """an element of a properties file i.e. a name and value, and any comments associated""" def __init__(self, source="", personality="java"): """construct a blank propunit""" self.personality = personality super(propunit, self).__init__(source) self.name = "" self.value = u"" self.delimeter = u"=" self.comments = [] self.source = source def setsource(self, source): """Sets the source AND the target to be equal""" source = data.forceunicode(source) if self.personality == "mozilla": self.value = quote.mozillapropertiesencode(source or u"") else: self.value = quote.javapropertiesencode(source or u"") def getsource(self): value = quote.propertiesdecode(self.value) value = re.sub(u"\\\\ ", u" ", value) return value source = property(getsource, setsource) def settarget(self, target): """Note: this also sets the .source attribute!""" self.source = target def gettarget(self): return self.source target = property(gettarget, settarget) def __str__(self): """convert to a string. double check that unicode is handled somehow here""" source = self.getoutput() if isinstance(source, unicode): return source.encode(default_encoding[self.personality]) return source def getoutput(self): """convert the element back into formatted lines for a .properties file""" notes = self.getnotes() if notes: notes += u"\n" if self.isblank(): return notes else: if "\\u" in self.value and self.personality == "mozilla": self.value = quote.mozillapropertiesencode(self.source) return u"%s%s%s%s\n" % (notes, self.name, self.delimeter, self.value) def getlocations(self): return [self.name] def addnote(self, note, origin=None): note = data.forceunicode(note) self.comments.append(note) def getnotes(self, origin=None): return u'\n'.join(self.comments) def removenotes(self): self.comments = [] def isblank(self): """returns whether this is a blank element, containing only comments...""" return not (self.name or self.value) class propfile(base.TranslationStore): """this class represents a .properties file, made up of propunits""" UnitClass = propunit def __init__(self, inputfile=None, personality="java"): """construct a propfile, optionally reading in from inputfile""" super(propfile, self).__init__(unitclass = self.UnitClass) self.filename = getattr(inputfile, 'name', '') if inputfile is not None: propsrc = inputfile.read() inputfile.close() self.parse(propsrc, personality) def parse(self, propsrc, personality="java"): """read the source of a properties file in and include them as units""" newunit = propunit("", personality) inmultilinevalue = False if personality == "mozilla": propsrc = unicode(propsrc, 'utf-8') else: propsrc = unicode(propsrc, 'latin1') for line in propsrc.split(u"\n"): # handle multiline value if we're in one line = quote.rstripeol(line) if inmultilinevalue: newunit.value += line.lstrip() # see if there's more inmultilinevalue = is_line_continuation(newunit.value) # if we're still waiting for more... if inmultilinevalue: # strip the backslash newunit.value = newunit.value[:-1] if not inmultilinevalue: # we're finished, add it to the list... self.addunit(newunit) newunit = propunit("", personality) # otherwise, this could be a comment elif line.strip()[:1] in (u'#', u'!'): # add a comment newunit.comments.append(line) elif not line.strip(): # this is a blank line... if str(newunit).strip(): self.addunit(newunit) newunit = propunit("", personality) else: delimeter_char, delimeter_pos = find_delimeter(line) if delimeter_pos == -1: continue # otherwise, this is a definition else: newunit.delimeter = delimeter_char newunit.name = key_strip(line[:delimeter_pos]) newunit.value = line[delimeter_pos+1:].lstrip() # backslash at end means carry string on to next line if is_line_continuation(newunit.value): inmultilinevalue = True newunit.value = newunit.value[:-1] else: self.addunit(newunit) newunit = propunit("", personality) # see if there is a leftover one... if inmultilinevalue or len(newunit.comments) > 0: self.addunit(newunit) def __str__(self): """convert the units back to lines""" lines = [] for unit in self.units: lines.append(str(unit)) return "".join(lines)