#!/usr/bin/env python # -*- coding: utf-8 -*- # # Copyright 2002-2006 Zuza Software Foundation # # This file is part of translate. # # translate is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # translate is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with translate; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA """classes that hold units of comma-separated values (.csv) files (csvunit) or entire files (csvfile) for use with localisation """ import csv from translate.misc import sparse from translate.storage import base class SimpleDictReader: def __init__(self, fileobj, fieldnames): self.fieldnames = fieldnames self.contents = fileobj.read() self.parser = sparse.SimpleParser(defaulttokenlist=[",", "\n"], whitespacechars="\r") self.parser.stringescaping = 0 self.parser.quotechars = '"' self.tokens = self.parser.tokenize(self.contents) self.tokenpos = 0 def __iter__(self): return self def getvalue(self, value): """returns a value, evaluating strings as neccessary""" if (value.startswith("'") and value.endswith("'")) or (value.startswith('"') and value.endswith('"')): return sparse.stringeval(value) else: return value def next(self): lentokens = len(self.tokens) while self.tokenpos < lentokens and self.tokens[self.tokenpos] == "\n": self.tokenpos += 1 if self.tokenpos >= lentokens: raise StopIteration() thistokens = [] while self.tokenpos < lentokens and self.tokens[self.tokenpos] != "\n": thistokens.append(self.tokens[self.tokenpos]) self.tokenpos += 1 while self.tokenpos < lentokens and self.tokens[self.tokenpos] == "\n": self.tokenpos += 1 fields = [] # patch together fields since we can have quotes inside a field currentfield = '' fieldparts = 0 for token in thistokens: if token == ',': # a field is only quoted if the whole thing is quoted if fieldparts == 1: currentfield = self.getvalue(currentfield) fields.append(currentfield) currentfield = '' fieldparts = 0 else: currentfield += token fieldparts += 1 # things after the last comma... if fieldparts: if fieldparts == 1: currentfield = self.getvalue(currentfield) fields.append(currentfield) values = {} for fieldnum in range(len(self.fieldnames)): if fieldnum >= len(fields): values[self.fieldnames[fieldnum]] = "" else: values[self.fieldnames[fieldnum]] = fields[fieldnum] return values class csvunit(base.TranslationUnit): spreadsheetescapes = [("+", "\\+"), ("-", "\\-"), ("=", "\\="), ("'", "\\'")] def __init__(self, source=None): super(csvunit, self).__init__(source) self.comment = "" self.source = source self.target = "" def add_spreadsheet_escapes(self, source, target): """add common spreadsheet escapes to two strings""" for unescaped, escaped in self.spreadsheetescapes: if source.startswith(unescaped): source = source.replace(unescaped, escaped, 1) if target.startswith(unescaped): target = target.replace(unescaped, escaped, 1) return source, target def remove_spreadsheet_escapes(self, source, target): """remove common spreadsheet escapes from two strings""" for unescaped, escaped in self.spreadsheetescapes: if source.startswith(escaped): source = source.replace(escaped, unescaped, 1) if target.startswith(escaped): target = target.replace(escaped, unescaped, 1) return source, target def fromdict(self, cedict): self.comment = cedict.get('location', '').decode('utf-8') self.source = cedict.get('source', '').decode('utf-8') self.target = cedict.get('target', '').decode('utf-8') if self.comment is None: self.comment = '' if self.source is None: self.source = '' if self.target is None: self.target = '' self.source, self.target = self.remove_spreadsheet_escapes(self.source, self.target) def todict(self, encoding='utf-8'): comment, source, target = self.comment, self.source, self.target source, target = self.add_spreadsheet_escapes(source, target) if isinstance(comment, unicode): comment = comment.encode(encoding) if isinstance(source, unicode): source = source.encode(encoding) if isinstance(target, unicode): target = target.encode(encoding) return {'location':comment, 'source': source, 'target': target} class csvfile(base.TranslationStore): """This class represents a .csv file with various lines. The default format contains three columns: location, source, target""" UnitClass = csvunit Name = _("Comma Separated Value") Mimetypes = ['text/comma-separated-values', 'text/csv'] Extensions = ["csv"] def __init__(self, inputfile=None, fieldnames=None): base.TranslationStore.__init__(self, unitclass = self.UnitClass) self.units = [] if fieldnames is None: self.fieldnames = ['location', 'source', 'target'] else: if isinstance(fieldnames, basestring): fieldnames = [fieldname.strip() for fieldname in fieldnames.split(",")] self.fieldnames = fieldnames self.filename = getattr(inputfile, 'name', '') if inputfile is not None: csvsrc = inputfile.read() inputfile.close() self.parse(csvsrc) def parse(self, csvsrc): csvfile = csv.StringIO(csvsrc) reader = SimpleDictReader(csvfile, self.fieldnames) for row in reader: newce = self.UnitClass() newce.fromdict(row) self.addunit(newce) def __str__(self): """convert to a string. double check that unicode is handled somehow here""" source = self.getoutput() if isinstance(source, unicode): return source.encode(getattr(self, "encoding", "UTF-8")) return source def getoutput(self): csvfile = csv.StringIO() writer = csv.DictWriter(csvfile, self.fieldnames) for ce in self.units: cedict = ce.todict() writer.writerow(cedict) csvfile.reset() return "".join(csvfile.readlines())