Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/translate-toolkit-1.3.0/translate/misc/sparse.py
diff options
context:
space:
mode:
Diffstat (limited to 'translate-toolkit-1.3.0/translate/misc/sparse.py')
-rw-r--r--translate-toolkit-1.3.0/translate/misc/sparse.py188
1 files changed, 188 insertions, 0 deletions
diff --git a/translate-toolkit-1.3.0/translate/misc/sparse.py b/translate-toolkit-1.3.0/translate/misc/sparse.py
new file mode 100644
index 0000000..bf2ba04
--- /dev/null
+++ b/translate-toolkit-1.3.0/translate/misc/sparse.py
@@ -0,0 +1,188 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""simple parser / string tokenizer
+rather than returning a list of token types etc, we simple return a list of tokens...
+each tokenizing function takes a string as input and returns a list of tokens
+"""
+
+# Copyright 2002, 2003 St James Software
+#
+# This file is part of translate.
+#
+# translate is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# translate is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with translate; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+def stringeval(text):
+ """takes away repeated quotes (escapes) and returns the string represented by the text"""
+ stringchar = text[0]
+ if text[-1] != stringchar or stringchar not in ("'",'"'):
+ # scratch your head
+ raise ValueError, "error parsing escaped string: %r" % text
+ return text[1:-1].replace(stringchar+stringchar,stringchar)
+
+def stringquote(text):
+ """escapes quotes as neccessary and returns a string representing the text"""
+ if "'" in text:
+ if '"' in text:
+ return '"' + text.replace('"', '""') + '"'
+ else:
+ return '"' + text + '"'
+ else:
+ return "'" + text + "'"
+
+class ParserError(ValueError):
+ """Intelligent parser error"""
+ def __init__(self, parser, message, tokennum):
+ """takes a message and the number of the token that caused the error"""
+ tokenpos = parser.findtokenpos(tokennum)
+ line, charpos = parser.getlinepos(tokenpos)
+ ValueError.__init__(self, "%s at line %d, char %d (token %r)" % \
+ (message, line, charpos, parser.tokens[tokennum]))
+ self.parser = parser
+ self.tokennum = tokennum
+
+class SimpleParser:
+ """this is a simple parser"""
+ def __init__(self, defaulttokenlist=None, whitespacechars=" \t\r\n", includewhitespacetokens=0):
+ if defaulttokenlist is None:
+ self.defaulttokenlist = ['<=', '>=', '==', '!=', '+=', '-=', '*=', '/=', '<>']
+ self.defaulttokenlist.extend('(),[]:=+-')
+ else:
+ self.defaulttokenlist = defaulttokenlist
+ self.whitespacechars = whitespacechars
+ self.includewhitespacetokens = includewhitespacetokens
+ self.standardtokenizers = [self.stringtokenize, self.removewhitespace, self.separatetokens]
+ self.quotechars = ('"', "'")
+ self.endquotechars = {'"':'"',"'":"'"}
+ self.stringescaping = 1
+
+ def stringtokenize(self, text):
+ """makes strings in text into tokens..."""
+ tokens = []
+ laststart = 0
+ instring = 0
+ endstringchar, escapechar = '', '\\'
+ gotclose, gotescape = 0, 0
+ for pos in range(len(text)):
+ char = text[pos]
+ if instring:
+ if self.stringescaping and (gotescape or char == escapechar) and not gotclose:
+ gotescape = not gotescape
+ elif char == endstringchar:
+ gotclose = not gotclose
+ elif gotclose:
+ tokens.append(text[laststart:pos])
+ instring, laststart, endstringchar = 0, pos, ''
+ if not instring:
+ if char in self.quotechars:
+ if pos > laststart: tokens.append(text[laststart:pos])
+ instring, laststart, endstringchar, gotclose = 1, pos, self.endquotechars[char], 0
+ if laststart < len(text): tokens.append(text[laststart:])
+ return tokens
+
+ def keeptogether(self, text):
+ """checks whether a token should be kept together"""
+ return self.isstringtoken(text)
+
+ def isstringtoken(self, text):
+ """checks whether a token is a string token"""
+ return text[:1] in self.quotechars
+
+ def separatetokens(self, text, tokenlist = None):
+ """this separates out tokens in tokenlist from whitespace etc"""
+ if self.keeptogether(text): return [text]
+ if tokenlist is None:
+ tokenlist = self.defaulttokenlist
+ # loop through and put tokens into a list
+ tokens = []
+ pos = 0
+ laststart = 0
+ lentext = len(text)
+ while pos < lentext:
+ foundtoken = 0
+ for token in tokenlist:
+ lentoken = len(token)
+ if text[pos:pos+lentoken] == token:
+ if laststart < pos: tokens.append(text[laststart:pos])
+ tokens.append(token)
+ pos += lentoken
+ foundtoken, laststart = 1, pos
+ break
+ if not foundtoken: pos += 1
+ if laststart < lentext: tokens.append(text[laststart:])
+ return tokens
+
+ def removewhitespace(self, text):
+ """this removes whitespace but lets it separate things out into separate tokens"""
+ if self.keeptogether(text): return [text]
+ # loop through and put tokens into a list
+ tokens = []
+ pos = 0
+ inwhitespace = 0
+ laststart = 0
+ for pos in range(len(text)):
+ char = text[pos]
+ if inwhitespace:
+ if char not in self.whitespacechars:
+ if laststart < pos and self.includewhitespacetokens: tokens.append(text[laststart:pos])
+ inwhitespace, laststart = 0, pos
+ else:
+ if char in self.whitespacechars:
+ if laststart < pos: tokens.append(text[laststart:pos])
+ inwhitespace, laststart = 1, pos
+ if laststart < len(text) and (not inwhitespace or self.includewhitespacetokens):
+ tokens.append(text[laststart:])
+ return tokens
+
+ def applytokenizer(self, inputlist, tokenizer):
+ """apply a tokenizer to a set of text, flattening the result"""
+ tokenizedlists = [tokenizer(text) for text in inputlist]
+ joined = []
+ map(joined.extend, tokenizedlists)
+ return joined
+
+ def applytokenizers(self, inputlist, tokenizers):
+ """apply a set of tokenizers to a set of text, flattening each time"""
+ for tokenizer in tokenizers:
+ inputlist = self.applytokenizer(inputlist, tokenizer)
+ return inputlist
+
+ def tokenize(self, source, tokenizers=None):
+ """tokenize the text string with the standard tokenizers"""
+ self.source = source
+ if tokenizers is None:
+ tokenizers = self.standardtokenizers
+ self.tokens = self.applytokenizers([self.source], tokenizers)
+ return self.tokens
+
+ def findtokenpos(self, tokennum):
+ """finds the position of the given token in the text"""
+ currenttokenpos = 0
+ for currenttokennum in range(tokennum+1):
+ currenttokenpos = self.source.find(self.tokens[currenttokennum], currenttokenpos)
+ return currenttokenpos
+
+ def getlinepos(self, tokenpos):
+ """finds the line and character position of the given character"""
+ sourcecut = self.source[:tokenpos]
+ line = sourcecut.count("\n")+1
+ charpos = tokenpos - sourcecut.rfind("\n")
+ return line, charpos
+
+ def raiseerror(self, message, tokennum):
+ """raises a ParserError"""
+ raise ParserError(self, message, tokennum)
+
+