#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2004-2008 Zuza Software Foundation
#
# This file is part of the Translate Toolkit.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see .
"""Classes that hold units of PHP localisation files L{phpunit} or entire files
L{phpfile}. These files are used in translating many PHP based applications.
Only PHP files written with these conventions are supported::
$lang['item'] = "vale"; # Array of values
$some_entity = "value"; # Named variables
The parser does not support other array conventions such as::
$lang = array(
'item1' => 'value1',
'item2' => 'value2',
);
The working of PHP strings and specifically the escaping conventions which
differ between single quote (') and double quote (") characters are outlined
in the PHP documentation for the U{String type}
"""
from translate.storage import base
import re
def phpencode(text, quotechar="'"):
"""convert Python string to PHP escaping
The encoding is implemented for
U{'single quote'}
and U{"double quote"}
syntax.
heredoc and nowdoc are not implemented and it is not certain whether this would
ever be needed for PHP localisation needs.
"""
if not text:
return text
if quotechar == '"':
# \n may be converted to \\n but we don't. This allows us to preserve pretty layout that might have appeared in muliline entries
# we might lose some "blah\nblah" layouts but that's probably not the most frequent use case. See bug 588
escapes = (("\\", "\\\\"), ("\r", "\\r"), ("\t", "\\t"), ("\v", "\\v"), ("\f", "\\f"), ("\\\\$", "\\$"), ('"', '\\"'), ("\\\\", "\\"))
for a, b in escapes:
text = text.replace(a, b)
return text
else:
return text.replace("%s" % quotechar, "\\%s" % quotechar)
def phpdecode(text, quotechar="'"):
"""convert PHP escaped string to a Python string"""
def decode_octal_hex(match):
"""decode Octal \NNN and Hex values"""
if match.groupdict().has_key("octal"):
return match.groupdict()['octal'].decode("string_escape")
elif match.groupdict().has_key("hex"):
return match.groupdict()['hex'].decode("string_escape")
else:
return match.group
if not text:
return text
if quotechar == '"':
# We do not escape \$ as it is used by variables and we can't roundtrip that item.
text = text.replace('\\"', '"').replace("\\\\", "\\")
text = text.replace("\\n", "\n").replace("\\r", "\r").replace("\\t", "\t").replace("\\v", "\v").replace("\\f", "\f")
text = re.sub(r"(?P\\[0-7]{1,3})", decode_octal_hex, text)
text = re.sub(r"(?P\\x[0-9A-Fa-f]{1,2})", decode_octal_hex, text)
else:
text = text.replace("\\'", "'").replace("\\\\", "\\")
return text
class phpunit(base.TranslationUnit):
"""a unit of a PHP file i.e. a name and value, and any comments
associated"""
def __init__(self, source=""):
"""construct a blank phpunit"""
self.escape_type = None
super(phpunit, self).__init__(source)
self.name = ""
self.value = ""
self._comments = []
self.source = source
def setsource(self, source):
"""Sets the source AND the target to be equal"""
self.value = phpencode(source, self.escape_type)
def getsource(self):
return phpdecode(self.value, self.escape_type)
source = property(getsource, setsource)
def settarget(self, target):
"""Note: this also sets the .source attribute!"""
# TODO: shouldn't this just call the .source property? no quoting done here...
self.source = target
def gettarget(self):
return self.source
target = property(gettarget, settarget)
def __str__(self):
"""convert to a string. double check that unicode is handled somehow here"""
source = self.getoutput()
if isinstance(source, unicode):
return source.encode(getattr(self, "encoding", "UTF-8"))
return source
def getoutput(self):
"""convert the unit back into formatted lines for a php file"""
return "".join(self._comments + ["%s='%s';\n" % (self.name, self.value)])
def addlocation(self, location):
self.name = location
def getlocations(self):
return [self.name]
def addnote(self, note, origin=None):
self._comments.append(note)
def getnotes(self, origin=None):
return '\n'.join(self._comments)
def removenotes(self):
self._comments = []
def isblank(self):
"""Returns whether this is a blank element, containing only comments."""
return not (self.name or self.value)
class phpfile(base.TranslationStore):
"""This class represents a PHP file, made up of phpunits"""
UnitClass = phpunit
def __init__(self, inputfile=None, encoding='utf-8'):
"""construct a phpfile, optionally reading in from inputfile"""
super(phpfile, self).__init__(unitclass = self.UnitClass)
self.filename = getattr(inputfile, 'name', '')
self._encoding = encoding
if inputfile is not None:
phpsrc = inputfile.read()
inputfile.close()
self.parse(phpsrc)
def parse(self, phpsrc):
"""Read the source of a PHP file in and include them as units"""
newunit = phpunit()
lastvalue = ""
value = ""
comment = []
invalue = False
incomment = False
valuequote = "" # either ' or "
for line in phpsrc.decode(self._encoding).split("\n"):
commentstartpos = line.find("/*")
commentendpos = line.rfind("*/")
if commentstartpos != -1:
incomment = True
if commentendpos != -1:
newunit.addnote(line[commentstartpos:commentendpos].strip(), "developer")
incomment = False
else:
newunit.addnote(line[commentstartpos:].strip(), "developer")
if commentendpos != -1 and incomment:
newunit.addnote(line[:commentendpos+2].strip(), "developer")
incomment = False
if incomment and commentstartpos == -1:
newunit.addnote(line.strip(), "developer")
continue
equalpos = line.find("=")
if equalpos != -1 and not invalue:
newunit.addlocation(line[:equalpos].strip().replace(" ", ""))
value = line[equalpos+1:].lstrip()[1:]
valuequote = line[equalpos+1:].lstrip()[0]
lastvalue = ""
invalue = True
else:
if invalue:
value = line
colonpos = value.rfind(";")
while colonpos != -1:
if value[colonpos-1] == valuequote:
newunit.value = lastvalue + value[:colonpos-1]
newunit.escape_type = valuequote
lastvalue = ""
invalue = False
if not invalue and colonpos != len(value)-1:
commentinlinepos = value.find("//", colonpos)
if commentinlinepos != -1:
newunit.addnote(value[commentinlinepos+2:].strip(), "developer")
if not invalue:
self.addunit(newunit)
value = ""
newunit = phpunit()
colonpos = value.rfind(";", 0, colonpos)
if invalue:
lastvalue = lastvalue + value + "\n"
def __str__(self):
"""Convert the units back to lines."""
lines = []
for unit in self.units:
lines.append(str(unit))
return "".join(lines)