#!/usr/bin/env python # -*- coding: utf-8 -*- # # Copyright 2004-2008 Zuza Software Foundation # # This file is part of the Translate Toolkit. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, see . """Classes that hold units of PHP localisation files L{phpunit} or entire files L{phpfile}. These files are used in translating many PHP based applications. Only PHP files written with these conventions are supported:: $lang['item'] = "vale"; # Array of values $some_entity = "value"; # Named variables The parser does not support other array conventions such as:: $lang = array( 'item1' => 'value1', 'item2' => 'value2', ); The working of PHP strings and specifically the escaping conventions which differ between single quote (') and double quote (") characters are outlined in the PHP documentation for the U{String type} """ from translate.storage import base import re def phpencode(text, quotechar="'"): """convert Python string to PHP escaping The encoding is implemented for U{'single quote'} and U{"double quote"} syntax. heredoc and nowdoc are not implemented and it is not certain whether this would ever be needed for PHP localisation needs. """ if not text: return text if quotechar == '"': # \n may be converted to \\n but we don't. This allows us to preserve pretty layout that might have appeared in muliline entries # we might lose some "blah\nblah" layouts but that's probably not the most frequent use case. See bug 588 escapes = (("\\", "\\\\"), ("\r", "\\r"), ("\t", "\\t"), ("\v", "\\v"), ("\f", "\\f"), ("\\\\$", "\\$"), ('"', '\\"'), ("\\\\", "\\")) for a, b in escapes: text = text.replace(a, b) return text else: return text.replace("%s" % quotechar, "\\%s" % quotechar) def phpdecode(text, quotechar="'"): """convert PHP escaped string to a Python string""" def decode_octal_hex(match): """decode Octal \NNN and Hex values""" if match.groupdict().has_key("octal"): return match.groupdict()['octal'].decode("string_escape") elif match.groupdict().has_key("hex"): return match.groupdict()['hex'].decode("string_escape") else: return match.group if not text: return text if quotechar == '"': # We do not escape \$ as it is used by variables and we can't roundtrip that item. text = text.replace('\\"', '"').replace("\\\\", "\\") text = text.replace("\\n", "\n").replace("\\r", "\r").replace("\\t", "\t").replace("\\v", "\v").replace("\\f", "\f") text = re.sub(r"(?P\\[0-7]{1,3})", decode_octal_hex, text) text = re.sub(r"(?P\\x[0-9A-Fa-f]{1,2})", decode_octal_hex, text) else: text = text.replace("\\'", "'").replace("\\\\", "\\") return text class phpunit(base.TranslationUnit): """a unit of a PHP file i.e. a name and value, and any comments associated""" def __init__(self, source=""): """construct a blank phpunit""" self.escape_type = None super(phpunit, self).__init__(source) self.name = "" self.value = "" self._comments = [] self.source = source def setsource(self, source): """Sets the source AND the target to be equal""" self.value = phpencode(source, self.escape_type) def getsource(self): return phpdecode(self.value, self.escape_type) source = property(getsource, setsource) def settarget(self, target): """Note: this also sets the .source attribute!""" # TODO: shouldn't this just call the .source property? no quoting done here... self.source = target def gettarget(self): return self.source target = property(gettarget, settarget) def __str__(self): """convert to a string. double check that unicode is handled somehow here""" source = self.getoutput() if isinstance(source, unicode): return source.encode(getattr(self, "encoding", "UTF-8")) return source def getoutput(self): """convert the unit back into formatted lines for a php file""" return "".join(self._comments + ["%s='%s';\n" % (self.name, self.value)]) def addlocation(self, location): self.name = location def getlocations(self): return [self.name] def addnote(self, note, origin=None): self._comments.append(note) def getnotes(self, origin=None): return '\n'.join(self._comments) def removenotes(self): self._comments = [] def isblank(self): """Returns whether this is a blank element, containing only comments.""" return not (self.name or self.value) class phpfile(base.TranslationStore): """This class represents a PHP file, made up of phpunits""" UnitClass = phpunit def __init__(self, inputfile=None, encoding='utf-8'): """construct a phpfile, optionally reading in from inputfile""" super(phpfile, self).__init__(unitclass = self.UnitClass) self.filename = getattr(inputfile, 'name', '') self._encoding = encoding if inputfile is not None: phpsrc = inputfile.read() inputfile.close() self.parse(phpsrc) def parse(self, phpsrc): """Read the source of a PHP file in and include them as units""" newunit = phpunit() lastvalue = "" value = "" comment = [] invalue = False incomment = False valuequote = "" # either ' or " for line in phpsrc.decode(self._encoding).split("\n"): commentstartpos = line.find("/*") commentendpos = line.rfind("*/") if commentstartpos != -1: incomment = True if commentendpos != -1: newunit.addnote(line[commentstartpos:commentendpos].strip(), "developer") incomment = False else: newunit.addnote(line[commentstartpos:].strip(), "developer") if commentendpos != -1 and incomment: newunit.addnote(line[:commentendpos+2].strip(), "developer") incomment = False if incomment and commentstartpos == -1: newunit.addnote(line.strip(), "developer") continue equalpos = line.find("=") if equalpos != -1 and not invalue: newunit.addlocation(line[:equalpos].strip().replace(" ", "")) value = line[equalpos+1:].lstrip()[1:] valuequote = line[equalpos+1:].lstrip()[0] lastvalue = "" invalue = True else: if invalue: value = line colonpos = value.rfind(";") while colonpos != -1: if value[colonpos-1] == valuequote: newunit.value = lastvalue + value[:colonpos-1] newunit.escape_type = valuequote lastvalue = "" invalue = False if not invalue and colonpos != len(value)-1: commentinlinepos = value.find("//", colonpos) if commentinlinepos != -1: newunit.addnote(value[commentinlinepos+2:].strip(), "developer") if not invalue: self.addunit(newunit) value = "" newunit = phpunit() colonpos = value.rfind(";", 0, colonpos) if invalue: lastvalue = lastvalue + value + "\n" def __str__(self): """Convert the units back to lines.""" lines = [] for unit in self.units: lines.append(str(unit)) return "".join(lines)