From b2d38f17e56bf3b391fe20b77532df23e5721413 Mon Sep 17 00:00:00 2001 From: Aleksey Lim Date: Thu, 12 Aug 2010 16:20:23 +0000 Subject: Add copyright notes to python files --- (limited to 'aiml/WordSub.py') diff --git a/aiml/WordSub.py b/aiml/WordSub.py new file mode 100644 index 0000000..9cae856 --- /dev/null +++ b/aiml/WordSub.py @@ -0,0 +1,95 @@ +"""This module implements the WordSub class, modelled after a recipe +in "Python Cookbook" (Recipe 3.14, "Replacing Multiple Patterns in a +Single Pass" by Xavier Defrang). + +Usage: +Use this class like a dictionary to add before/after pairs: + > subber = TextSub() + > subber["before"] = "after" + > subber["begin"] = "end" +Use the sub() method to perform the substitution: + > print subber.sub("before we begin") + after we end +All matching is intelligently case-insensitive: + > print subber.sub("Before we BEGIN") + After we END +The 'before' words must be complete words -- no prefixes. +The following example illustrates this point: + > subber["he"] = "she" + > print subber.sub("he says he'd like to help her") + she says she'd like to help her +Note that "he" and "he'd" were replaced, but "help" and "her" were +not. +""" + +# 'dict' objects weren't available to subclass from until version 2.2. +# Get around this by importing UserDict.UserDict if the built-in dict +# object isn't available. +try: dict +except: from UserDict import UserDict as dict + +import ConfigParser +import re +import string + +class WordSub(dict): + """All-in-one multiple-string-substitution class.""" + + def _wordToRegex(self, word): + """Convert a word to a regex object which matches the word.""" + return r"\b%s\b" % re.escape(word) + + def _update_regex(self): + """Build re object based on the keys of the current + dictionary. + + """ + self._regex = re.compile("|".join(map(self._wordToRegex, self.keys()))) + self._regexIsDirty = False + + def __init__(self, defaults = {}): + """Initialize the object, and populate it with the entries in + the defaults dictionary. + + """ + self._regex = None + self._regexIsDirty = True + for k,v in defaults.items(): + self[k] = v + + def __call__(self, match): + """Handler invoked for each regex match.""" + return self[match.group(0)] + + def __setitem__(self, i, y): + self._regexIsDirty = True + # for each entry the user adds, we actually add three entrys: + super(type(self),self).__setitem__(string.lower(i),string.lower(y)) # key = value + super(type(self),self).__setitem__(string.capwords(i), string.capwords(y)) # Key = Value + super(type(self),self).__setitem__(string.upper(i), string.upper(y)) # KEY = VALUE + + def sub(self, text): + """Translate text, returns the modified text.""" + if self._regexIsDirty: + self._update_regex() + return self._regex.sub(self, text) + +# self-test +if __name__ == "__main__": + subber = WordSub() + subber["apple"] = "banana" + subber["orange"] = "pear" + subber["banana" ] = "apple" + subber["he"] = "she" + subber["I'd"] = "I would" + + # test case insensitivity + inStr = "I'd like one apple, one Orange and one BANANA." + outStr = "I Would like one banana, one Pear and one APPLE." + if subber.sub(inStr) == outStr: print "Test #1 PASSED" + else: print "Test #1 FAILED: '%s'" % subber.sub(inStr) + + inStr = "He said he'd like to go with me" + outStr = "She said she'd like to go with me" + if subber.sub(inStr) == outStr: print "Test #2 PASSED" + else: print "Test #2 FAILED: '%s'" % subber.sub(inStr) \ No newline at end of file -- cgit v0.9.1