Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
path: root/translate-toolkit-1.5.1/translate/storage/statsdb.py
diff options
Diffstat (limited to 'translate-toolkit-1.5.1/translate/storage/statsdb.py')
1 files changed, 654 insertions, 0 deletions
diff --git a/translate-toolkit-1.5.1/translate/storage/statsdb.py b/translate-toolkit-1.5.1/translate/storage/statsdb.py
new file mode 100644
index 0000000..cccbb5b
--- /dev/null
+++ b/translate-toolkit-1.5.1/translate/storage/statsdb.py
@@ -0,0 +1,654 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright 2007-2009 Zuza Software Foundation
+# This file is part of the Translate Toolkit.
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# GNU General Public License for more details.
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
+"""Module to provide a cache of statistics in a database.
+@organization: Zuza Software Foundation
+@copyright: 2007 Zuza Software Foundation
+@license: U{GPL <http://www.fsf.org/licensing/licenses/gpl.html>}
+from UserDict import UserDict
+from translate import __version__ as toolkitversion
+from translate.storage import factory
+from translate.misc.multistring import multistring
+from translate.lang.common import Common
+ from sqlite3 import dbapi2
+except ImportError:
+ from pysqlite2 import dbapi2
+import os.path
+import re
+import sys
+import stat
+import thread
+kdepluralre = re.compile("^_n: ")
+brtagre = re.compile("<br\s*?/?>")
+xmltagre = re.compile("<[^>]+>")
+numberre = re.compile("\\D\\.\\D")
+state_strings = {0: "untranslated", 1: "translated", 2: "fuzzy"}
+def wordcount(string):
+ # TODO: po class should understand KDE style plurals
+ string = kdepluralre.sub("", string)
+ string = brtagre.sub("\n", string)
+ string = xmltagre.sub("", string)
+ string = numberre.sub(" ", string)
+ #TODO: This should still use the correct language to count in the target
+ #language
+ return len(Common.words(string))
+def wordsinunit(unit):
+ """Counts the words in the unit's source and target, taking plurals into
+ account. The target words are only counted if the unit is translated."""
+ (sourcewords, targetwords) = (0, 0)
+ if isinstance(unit.source, multistring):
+ sourcestrings = unit.source.strings
+ else:
+ sourcestrings = [unit.source or ""]
+ for s in sourcestrings:
+ sourcewords += wordcount(s)
+ if not unit.istranslated():
+ return sourcewords, targetwords
+ if isinstance(unit.target, multistring):
+ targetstrings = unit.target.strings
+ else:
+ targetstrings = [unit.target or ""]
+ for s in targetstrings:
+ targetwords += wordcount(s)
+ return sourcewords, targetwords
+class Record(UserDict):
+ def __init__(self, record_keys, record_values=None, compute_derived_values = lambda x: x):
+ if record_values == None:
+ record_values = (0 for _i in record_keys)
+ self.record_keys = record_keys
+ self.data = dict(zip(record_keys, record_values))
+ self._compute_derived_values = compute_derived_values
+ self._compute_derived_values(self)
+ def to_tuple(self):
+ return tuple(self[key] for key in self.record_keys)
+ def __add__(self, other):
+ result = Record(self.record_keys)
+ for key in self.keys():
+ result[key] = self[key] + other[key]
+ self._compute_derived_values(self)
+ return result
+ def __sub__(self, other):
+ result = Record(self.record_keys)
+ for key in self.keys():
+ result[key] = self[key] - other[key]
+ self._compute_derived_values(self)
+ return result
+ def as_string_for_db(self):
+ return ",".join([repr(x) for x in self.to_tuple()])
+def transaction(f):
+ """Modifies f to commit database changes if it executes without exceptions.
+ Otherwise it rolls back the database.
+ ALL publicly accessible methods in StatsCache MUST be decorated with this
+ decorator.
+ """
+ def decorated_f(self, *args, **kwargs):
+ try:
+ result = f(self, *args, **kwargs)
+ self.con.commit()
+ return result
+ except:
+ # If ANY exception is raised, we're left in an
+ # uncertain state and we MUST roll back any changes to avoid getting
+ # stuck in an inconsistent state.
+ if self.con:
+ self.con.rollback()
+ raise
+ return decorated_f
+def statefordb(unit):
+ """Returns the numeric database state for the unit."""
+ if unit.istranslated():
+ if unit.isfuzzy() and unit.target:
+ return FUZZY
+class FileTotals(object):
+ keys = ['translatedsourcewords',
+ 'fuzzysourcewords',
+ 'untranslatedsourcewords',
+ 'translated',
+ 'fuzzy',
+ 'untranslated',
+ 'translatedtargetwords']
+ def db_keys(self):
+ return ",".join(self.keys)
+ def __init__(self, cur):
+ self.cur = cur
+ self.cur.execute("""
+ translatedsourcewords INTEGER NOT NULL,
+ fuzzysourcewords INTEGER NOT NULL,
+ untranslatedsourcewords INTEGER NOT NULL,
+ translated INTEGER NOT NULL,
+ untranslated INTEGER NOT NULL,
+ translatedtargetwords INTEGER NOT NULL);""")
+ def new_record(cls, state_for_db=None, sourcewords=None, targetwords=None):
+ record = Record(cls.keys, compute_derived_values = cls._compute_derived_values)
+ if state_for_db is not None:
+ if state_for_db is UNTRANSLATED:
+ record['untranslated'] = 1
+ record['untranslatedsourcewords'] = sourcewords
+ if state_for_db is TRANSLATED:
+ record['translated'] = 1
+ record['translatedsourcewords'] = sourcewords
+ record['translatedtargetwords'] = targetwords
+ elif state_for_db is FUZZY:
+ record['fuzzy'] = 1
+ record['fuzzysourcewords'] = sourcewords
+ return record
+ new_record = classmethod(new_record)
+ def _compute_derived_values(cls, record):
+ record["total"] = record["untranslated"] + \
+ record["translated"] + \
+ record["fuzzy"]
+ record["totalsourcewords"] = record["untranslatedsourcewords"] + \
+ record["translatedsourcewords"] + \
+ record["fuzzysourcewords"]
+ record["review"] = 0
+ _compute_derived_values = classmethod(_compute_derived_values)
+ def __getitem__(self, fileid):
+ result = self.cur.execute("""
+ SELECT %(keys)s
+ FROM filetotals
+ WHERE fileid=?;""" % {'keys': self.db_keys()}, (fileid,))
+ return Record(FileTotals.keys, result.fetchone(), self._compute_derived_values)
+ def __setitem__(self, fileid, record):
+ self.cur.execute("""
+ INSERT OR REPLACE into filetotals
+ VALUES (%(fileid)d, %(vals)s);
+ """ % {'fileid': fileid, 'vals': record.as_string_for_db()})
+ def __delitem__(self, fileid):
+ self.cur.execute("""
+ DELETE FROM filetotals
+ WHERE fileid=?;
+ """, (fileid,))
+def emptyfiletotals():
+ """Returns a dictionary with all statistics initalised to 0."""
+ return FileTotals.new_record()
+def emptyfilechecks():
+ return {}
+def emptyfilestats():
+ return {"total": [], "translated": [], "fuzzy": [], "untranslated": []}
+def emptyunitstats():
+ return {"sourcewordcount": [], "targetwordcount": []}
+# We allow the caller to specify which value to return when errors_return_empty
+# is True. We do this, since Poolte wants None to be returned when it calls
+# get_mod_info directly, whereas we want an integer to be returned for
+# uses of get_mod_info within this module.
+# TODO: Get rid of empty_return when Pootle code is improved to not require
+# this.
+def get_mod_info(file_path):
+ file_stat = os.stat(file_path)
+ assert not stat.S_ISDIR(file_stat.st_mode)
+ return file_stat.st_mtime, file_stat.st_size
+def suggestion_extension():
+ return os.path.extsep + 'pending'
+def suggestion_filename(filename):
+ return filename + suggestion_extension()
+class StatsCache(object):
+ """An object instantiated as a singleton for each statsfile that provides
+ access to the database cache from a pool of StatsCache objects."""
+ _caches = {}
+ defaultfile = None
+ con = None
+ """This cache's connection"""
+ cur = None
+ """The current cursor"""
+ def __new__(cls, statsfile=None):
+ current_thread = thread.get_ident()
+ def make_database(statsfile):
+ def connect(cache):
+ cache.con = dbapi2.connect(statsfile)
+ cache.cur = cache.con.cursor()
+ def clear_old_data(cache):
+ try:
+ cache.cur.execute("""SELECT toolkitbuild FROM files""")
+ val = cache.cur.fetchone()
+ # If the database is empty, we have no idea whether its layout
+ # is correct, so we might as well delete it.
+ if val is None or val[0] < toolkitversion.build:
+ cache.con.close()
+ del cache
+ os.unlink(statsfile)
+ return True
+ return False
+ except dbapi2.OperationalError:
+ return False
+ cache = cls._caches.setdefault(current_thread, {})[statsfile] = object.__new__(cls)
+ connect(cache)
+ if clear_old_data(cache):
+ connect(cache)
+ cache.create()
+ return cache
+ if not statsfile:
+ if not cls.defaultfile:
+ userdir = os.path.expanduser("~")
+ cachedir = None
+ if os.name == "nt":
+ cachedir = os.path.join(userdir, "Translate Toolkit")
+ else:
+ cachedir = os.path.join(userdir, ".translate_toolkit")
+ if not os.path.exists(cachedir):
+ os.mkdir(cachedir)
+ cls.defaultfile = os.path.realpath(os.path.join(cachedir, "stats.db"))
+ statsfile = cls.defaultfile
+ else:
+ statsfile = os.path.realpath(statsfile)
+ # First see if a cache for this file already exists:
+ if current_thread in cls._caches and statsfile in cls._caches[current_thread]:
+ return cls._caches[current_thread][statsfile]
+ # No existing cache. Let's build a new one and keep a copy
+ return make_database(statsfile)
+ @transaction
+ def create(self):
+ """Create all tables and indexes."""
+ self.file_totals = FileTotals(self.cur)
+ self.cur.execute("""CREATE TABLE IF NOT EXISTS files(
+ st_mtime INTEGER NOT NULL,
+ toolkitbuild INTEGER NOT NULL);""")
+ self.cur.execute("""CREATE UNIQUE INDEX IF NOT EXISTS filepathindex
+ ON files (path);""")
+ self.cur.execute("""CREATE TABLE IF NOT EXISTS units(
+ unitindex INTEGER NOT NULL,
+ target VARCHAR,
+ state INTEGER,
+ sourcewords INTEGER,
+ targetwords INTEGER);""")
+ self.cur.execute("""CREATE INDEX IF NOT EXISTS fileidindex
+ ON units(fileid);""")
+ self.cur.execute("""CREATE TABLE IF NOT EXISTS checkerconfigs(
+ config VARCHAR);""")
+ self.cur.execute("""CREATE INDEX IF NOT EXISTS configindex
+ ON checkerconfigs(config);""")
+ self.cur.execute("""CREATE TABLE IF NOT EXISTS uniterrors(
+ unitindex INTEGER NOT NULL,
+ configid INTEGER NOT NULL,
+ message VARCHAR);""")
+ self.cur.execute("""CREATE INDEX IF NOT EXISTS uniterrorindex
+ ON uniterrors(fileid, configid);""")
+ @transaction
+ def _getfileid(self, filename, check_mod_info=True, store=None):
+ """return fileid representing the given file in the statscache.
+ if file not in cache or has been updated since last record
+ update, recalculate stats.
+ optional argument store can be used to avoid unnessecary
+ reparsing of already loaded translation files.
+ store can be a TranslationFile object or a callback that returns one.
+ """
+ realpath = os.path.realpath(filename)
+ self.cur.execute("""SELECT fileid, st_mtime, st_size FROM files
+ WHERE path=?;""", (realpath,))
+ filerow = self.cur.fetchone()
+ mod_info = get_mod_info(realpath)
+ if filerow:
+ fileid = filerow[0]
+ if not check_mod_info:
+ # Update the mod_info of the file
+ self.cur.execute("""UPDATE files
+ SET st_mtime=?, st_size=?
+ WHERE fileid=?;""", (mod_info[0], mod_info[1], fileid))
+ return fileid
+ if (filerow[1], filerow[2]) == mod_info:
+ return fileid
+ # file wasn't in db at all, lets recache it
+ if callable(store):
+ store = store()
+ else:
+ store = store or factory.getobject(realpath)
+ return self._cachestore(store, realpath, mod_info)
+ def _getstoredcheckerconfig(self, checker):
+ """See if this checker configuration has been used before."""
+ config = str(checker.config.__dict__)
+ self.cur.execute("""SELECT configid, config FROM checkerconfigs WHERE
+ config=?;""", (config,))
+ configrow = self.cur.fetchone()
+ if not configrow or configrow[1] != config:
+ return None
+ else:
+ return configrow[0]
+ @transaction
+ def _cacheunitstats(self, units, fileid, unitindex=None, file_totals_record=FileTotals.new_record()):
+ """Cache the statistics for the supplied unit(s)."""
+ unitvalues = []
+ for index, unit in enumerate(units):
+ if unit.istranslatable():
+ sourcewords, targetwords = wordsinunit(unit)
+ if unitindex:
+ index = unitindex
+ # what about plurals in .source and .target?
+ unitvalues.append((unit.getid(), fileid, index, \
+ unit.source, unit.target, \
+ sourcewords, targetwords, \
+ statefordb(unit)))
+ file_totals_record = file_totals_record + FileTotals.new_record(statefordb(unit), sourcewords, targetwords)
+ # XXX: executemany is non-standard
+ self.cur.executemany("""INSERT INTO units
+ (unitid, fileid, unitindex, source, target, sourcewords, targetwords, state)
+ values (?, ?, ?, ?, ?, ?, ?, ?);""",
+ unitvalues)
+ self.file_totals[fileid] = file_totals_record
+ if unitindex:
+ return state_strings[statefordb(units[0])]
+ return ""
+ @transaction
+ def _cachestore(self, store, realpath, mod_info):
+ """Calculates and caches the statistics of the given store
+ unconditionally."""
+ self.cur.execute("""DELETE FROM files WHERE
+ path=?;""", (realpath,))
+ self.cur.execute("""INSERT INTO files
+ (fileid, path, st_mtime, st_size, toolkitbuild) values (NULL, ?, ?, ?, ?);""",
+ (realpath, mod_info[0], mod_info[1], toolkitversion.build))
+ fileid = self.cur.lastrowid
+ self.cur.execute("""DELETE FROM units WHERE
+ fileid=?""", (fileid,))
+ self._cacheunitstats(store.units, fileid)
+ return fileid
+ def filetotals(self, filename, store=None):
+ """Retrieves the statistics for the given file if possible, otherwise
+ delegates to cachestore()."""
+ return self.file_totals[self._getfileid(filename, store=store)]
+ @transaction
+ def _cacheunitschecks(self, units, fileid, configid, checker, unitindex=None):
+ """Helper method for cachestorechecks() and recacheunit()"""
+ # We always want to store one dummy error to know that we have actually
+ # run the checks on this file with the current checker configuration
+ dummy = (-1, fileid, configid, "noerror", "")
+ unitvalues = [dummy]
+ # if we are doing a single unit, we want to return the checknames
+ errornames = []
+ for index, unit in enumerate(units):
+ if unit.istranslatable():
+ # Correctly assign the unitindex
+ if unitindex:
+ index = unitindex
+ failures = checker.run_filters(unit)
+ for checkname, checkmessage in failures.iteritems():
+ unitvalues.append((index, fileid, configid, checkname, checkmessage))
+ errornames.append("check-" + checkname)
+ checker.setsuggestionstore(None)
+ if unitindex:
+ # We are only updating a single unit, so we don't want to add an
+ # extra noerror-entry
+ unitvalues.remove(dummy)
+ errornames.append("total")
+ # XXX: executemany is non-standard
+ self.cur.executemany("""INSERT INTO uniterrors
+ (unitindex, fileid, configid, name, message)
+ values (?, ?, ?, ?, ?);""",
+ unitvalues)
+ return errornames
+ @transaction
+ def _cachestorechecks(self, fileid, store, checker, configid):
+ """Calculates and caches the error statistics of the given store
+ unconditionally."""
+ # Let's purge all previous failures because they will probably just
+ # fill up the database without much use.
+ self.cur.execute("""DELETE FROM uniterrors WHERE
+ fileid=?;""", (fileid,))
+ self._cacheunitschecks(store.units, fileid, configid, checker)
+ return fileid
+ def get_unit_stats(self, fileid, unitid):
+ values = self.cur.execute("""
+ SELECT state, sourcewords, targetwords
+ FROM units
+ WHERE fileid=? AND unitid=?
+ """, (fileid, unitid))
+ result = values.fetchone()
+ if result is not None:
+ return result
+ else:
+ print >> sys.stderr, """WARNING: Database in inconsistent state.
+ fileid %d and unitid %s have no entries in the table units.""" % (fileid, unitid)
+ # If values.fetchone() is None, then we return an empty list,
+ # to make FileTotals.new_record(*self.get_unit_stats(fileid, unitid))
+ # do the right thing.
+ return []
+ @transaction
+ def recacheunit(self, filename, checker, unit):
+ """Recalculate all information for a specific unit. This is necessary
+ for updating all statistics when a translation of a unit took place,
+ for example.
+ This method assumes that everything was up to date before (file totals,
+ checks, checker config, etc."""
+ fileid = self._getfileid(filename, check_mod_info=False)
+ configid = self._get_config_id(fileid, checker)
+ unitid = unit.getid()
+ # get the unit index
+ totals_without_unit = self.file_totals[fileid] - \
+ FileTotals.new_record(*self.get_unit_stats(fileid, unitid))
+ self.cur.execute("""SELECT unitindex FROM units WHERE
+ fileid=? AND unitid=?;""", (fileid, unitid))
+ unitindex = self.cur.fetchone()[0]
+ self.cur.execute("""DELETE FROM units WHERE
+ fileid=? AND unitid=?;""", (fileid, unitid))
+ state = [self._cacheunitstats([unit], fileid, unitindex, totals_without_unit)]
+ # remove the current errors
+ self.cur.execute("""DELETE FROM uniterrors WHERE
+ fileid=? AND unitindex=?;""", (fileid, unitindex))
+ if os.path.exists(suggestion_filename(filename)):
+ checker.setsuggestionstore(factory.getobject(suggestion_filename(filename), ignore=suggestion_extension()))
+ state.extend(self._cacheunitschecks([unit], fileid, configid, checker, unitindex))
+ return state
+ def _checkerrors(self, filename, fileid, configid, checker, store):
+ def geterrors():
+ self.cur.execute("""SELECT
+ name,
+ unitindex
+ FROM uniterrors WHERE fileid=? and configid=?
+ ORDER BY unitindex;""", (fileid, configid))
+ return self.cur.fetchone(), self.cur
+ first, cur = geterrors()
+ if first is not None:
+ return first, cur
+ # This could happen if we haven't done the checks before, or the
+ # file changed, or we are using a different configuration
+ if callable(store):
+ store = store()
+ else:
+ store = store or factory.getobject(filename)
+ if os.path.exists(suggestion_filename(filename)):
+ checker.setsuggestionstore(factory.getobject(suggestion_filename(filename), ignore=suggestion_extension()))
+ self._cachestorechecks(fileid, store, checker, configid)
+ return geterrors()
+ def _geterrors(self, filename, fileid, configid, checker, store):
+ result = []
+ first, cur = self._checkerrors(filename, fileid, configid, checker, store)
+ result.append(first)
+ result.extend(cur.fetchall())
+ return result
+ @transaction
+ def _get_config_id(self, fileid, checker):
+ configid = self._getstoredcheckerconfig(checker)
+ if configid:
+ return configid
+ self.cur.execute("""INSERT INTO checkerconfigs
+ (configid, config) values (NULL, ?);""",
+ (str(checker.config.__dict__),))
+ return self.cur.lastrowid
+ def filechecks(self, filename, checker, store=None):
+ """Retrieves the error statistics for the given file if possible,
+ otherwise delegates to cachestorechecks()."""
+ fileid = self._getfileid(filename, store=store)
+ configid = self._get_config_id(fileid, checker)
+ values = self._geterrors(filename, fileid, configid, checker, store)
+ errors = emptyfilechecks()
+ for value in values:
+ if value[1] == -1:
+ continue
+ checkkey = 'check-' + value[0] #value[0] is the error name
+ if not checkkey in errors:
+ errors[checkkey] = []
+ errors[checkkey].append(value[1]) #value[1] is the unitindex
+ return errors
+ def file_fails_test(self, filename, checker, name):
+ fileid = self._getfileid(filename)
+ configid = self._get_config_id(fileid, checker)
+ self._checkerrors(filename, fileid, configid, checker, None)
+ self.cur.execute("""SELECT
+ name,
+ unitindex
+ FROM uniterrors
+ WHERE fileid=? and configid=? and name=?;""", (fileid, configid, name))
+ return self.cur.fetchone() is not None
+ def filestatestats(self, filename, store=None):
+ """Return a dictionary of unit stats mapping sets of unit
+ indices with those states"""
+ stats = emptyfilestats()
+ fileid = self._getfileid(filename, store=store)
+ self.cur.execute("""SELECT
+ state,
+ unitindex
+ FROM units WHERE fileid=?
+ ORDER BY unitindex;""", (fileid,))
+ values = self.cur.fetchall()
+ for value in values:
+ stats[state_strings[value[0]]].append(value[1])
+ stats["total"].append(value[1])
+ return stats
+ def filestats(self, filename, checker, store=None):
+ """Return a dictionary of property names mapping sets of unit
+ indices with those properties."""
+ stats = emptyfilestats()
+ stats.update(self.filechecks(filename, checker, store))
+ stats.update(self.filestatestats(filename, store))
+ return stats
+ def unitstats(self, filename, _lang=None, store=None):
+ # For now, lang and store are unused. lang will allow the user to
+ # base stats information on the given language. See the commented
+ # line containing stats.update below.
+ """Return a dictionary of property names mapping to arrays which
+ map unit indices to property values.
+ Please note that this is different from filestats, since filestats
+ supplies sets of unit indices with a given property, whereas this
+ method supplies arrays which map unit indices to given values."""
+ stats = emptyunitstats()
+ #stats.update(self.unitchecks(filename, lang, store))
+ fileid = self._getfileid(filename, store=store)
+ self.cur.execute("""SELECT
+ sourcewords, targetwords
+ FROM units WHERE fileid=?
+ ORDER BY unitindex;""", (fileid,))
+ for sourcecount, targetcount in self.cur.fetchall():
+ stats["sourcewordcount"].append(sourcecount)
+ stats["targetwordcount"].append(targetcount)
+ return stats