diff options
Diffstat (limited to 'Speak.activity/bot/aiml/PatternMgr.py')
-rw-r--r-- | Speak.activity/bot/aiml/PatternMgr.py | 329 |
1 files changed, 0 insertions, 329 deletions
diff --git a/Speak.activity/bot/aiml/PatternMgr.py b/Speak.activity/bot/aiml/PatternMgr.py deleted file mode 100644 index 21b73f1..0000000 --- a/Speak.activity/bot/aiml/PatternMgr.py +++ /dev/null @@ -1,329 +0,0 @@ -# This class implements the AIML pattern-matching algorithm described -# by Dr. Richard Wallace at the following site: -# http://www.alicebot.org/documentation/matching.html - -import marshal -import pprint -import re -import string -import sys - -class PatternMgr: - # special dictionary keys - _UNDERSCORE = 0 - _STAR = 1 - _TEMPLATE = 2 - _THAT = 3 - _TOPIC = 4 - _BOT_NAME = 5 - - def __init__(self): - self._root = {} - self._templateCount = 0 - self._botName = u"Nameless" - punctuation = "\"`~!@#$%^&*()-_=+[{]}\|;:',<.>/?" - self._puncStripRE = re.compile("[" + re.escape(punctuation) + "]") - self._whitespaceRE = re.compile("\s", re.LOCALE | re.UNICODE) - - def numTemplates(self): - """Return the number of templates currently stored.""" - return self._templateCount - - def setBotName(self, name): - """Set the name of the bot, used to match <bot name="name"> tags in - patterns. The name must be a single word! - - """ - # Collapse a multi-word name into a single word - self._botName = unicode(string.join(name.split())) - - def dump(self): - """Print all learned patterns, for debugging purposes.""" - pprint.pprint(self._root) - - def save(self, filename): - """Dump the current patterns to the file specified by filename. To - restore later, use restore(). - - """ - try: - outFile = open(filename, "wb") - marshal.dump(self._templateCount, outFile) - marshal.dump(self._botName, outFile) - marshal.dump(self._root, outFile) - outFile.close() - except Exception, e: - print "Error saving PatternMgr to file %s:" % filename - raise Exception, e - - def restore(self, filename): - """Restore a previously save()d collection of patterns.""" - try: - inFile = open(filename, "rb") - self._templateCount = marshal.load(inFile) - self._botName = marshal.load(inFile) - self._root = marshal.load(inFile) - inFile.close() - except Exception, e: - print "Error restoring PatternMgr from file %s:" % filename - raise Exception, e - - def add(self, (pattern,that,topic), template): - """Add a [pattern/that/topic] tuple and its corresponding template - to the node tree. - - """ - # TODO: make sure words contains only legal characters - # (alphanumerics,*,_) - - # Navigate through the node tree to the template's location, adding - # nodes if necessary. - node = self._root - for word in string.split(pattern): - key = word - if key == u"_": - key = self._UNDERSCORE - elif key == u"*": - key = self._STAR - elif key == u"BOT_NAME": - key = self._BOT_NAME - if not node.has_key(key): - node[key] = {} - node = node[key] - - # navigate further down, if a non-empty "that" pattern was included - if len(that) > 0: - if not node.has_key(self._THAT): - node[self._THAT] = {} - node = node[self._THAT] - for word in string.split(that): - key = word - if key == u"_": - key = self._UNDERSCORE - elif key == u"*": - key = self._STAR - if not node.has_key(key): - node[key] = {} - node = node[key] - - # navigate yet further down, if a non-empty "topic" string was included - if len(topic) > 0: - if not node.has_key(self._TOPIC): - node[self._TOPIC] = {} - node = node[self._TOPIC] - for word in string.split(topic): - key = word - if key == u"_": - key = self._UNDERSCORE - elif key == u"*": - key = self._STAR - if not node.has_key(key): - node[key] = {} - node = node[key] - - - # add the template. - if not node.has_key(self._TEMPLATE): - self._templateCount += 1 - node[self._TEMPLATE] = template - - def match(self, pattern, that, topic): - """Return the template which is the closest match to pattern. The - 'that' parameter contains the bot's previous response. The 'topic' - parameter contains the current topic of conversation. - - Returns None if no template is found. - - """ - if len(pattern) == 0: - return None - # Mutilate the input. Remove all punctuation and convert the - # text to all caps. - input = string.upper(pattern) - input = re.sub(self._puncStripRE, "", input) - if that.strip() == u"": that = u"ULTRABOGUSDUMMYTHAT" # 'that' must never be empty - thatInput = string.upper(that) - thatInput = re.sub(self._whitespaceRE, " ", thatInput) - thatInput = re.sub(self._puncStripRE, "", thatInput) - if topic.strip() == u"": topic = u"ULTRABOGUSDUMMYTOPIC" # 'topic' must never be empty - topicInput = string.upper(topic) - topicInput = re.sub(self._puncStripRE, "", topicInput) - - # Pass the input off to the recursive call - patMatch, template = self._match(input.split(), thatInput.split(), topicInput.split(), self._root) - return template - - def star(self, starType, pattern, that, topic, index): - """Returns a string, the portion of pattern that was matched by a *. - - The 'starType' parameter specifies which type of star to find. - Legal values are: - - 'star': matches a star in the main pattern. - - 'thatstar': matches a star in the that pattern. - - 'topicstar': matches a star in the topic pattern. - - """ - # Mutilate the input. Remove all punctuation and convert the - # text to all caps. - input = string.upper(pattern) - input = re.sub(self._puncStripRE, "", input) - if that.strip() == u"": that = u"ULTRABOGUSDUMMYTHAT" # 'that' must never be empty - thatInput = string.upper(that) - thatInput = re.sub(self._whitespaceRE, " ", thatInput) - thatInput = re.sub(self._puncStripRE, "", thatInput) - if topic.strip() == u"": topic = u"ULTRABOGUSDUMMYTOPIC" # 'topic' must never be empty - topicInput = string.upper(topic) - topicInput = re.sub(self._puncStripRE, "", topicInput) - - # Pass the input off to the recursive pattern-matcher - patMatch, template = self._match(input.split(), thatInput.split(), topicInput.split(), self._root) - if template == None: - return "" - - # Extract the appropriate portion of the pattern, based on the - # starType argument. - words = None - if starType == 'star': - patMatch = patMatch[:patMatch.index(self._THAT)] - words = input.split() - elif starType == 'thatstar': - patMatch = patMatch[patMatch.index(self._THAT)+1 : patMatch.index(self._TOPIC)] - words = thatInput.split() - elif starType == 'topicstar': - patMatch = patMatch[patMatch.index(self._TOPIC)+1 :] - words = topicInput.split() - else: - # unknown value - raise ValueError, "starType must be in ['star', 'thatstar', 'topicstar']" - - # compare the input string to the matched pattern, word by word. - # At the end of this loop, if foundTheRightStar is true, start and - # end will contain the start and end indices (in "words") of - # the substring that the desired star matched. - foundTheRightStar = False - start = end = j = numStars = k = 0 - for i in range(len(words)): - # This condition is true after processing a star - # that ISN'T the one we're looking for. - if i < k: - continue - # If we're reached the end of the pattern, we're done. - if j == len(patMatch): - break - if not foundTheRightStar: - if patMatch[j] in [self._STAR, self._UNDERSCORE]: #we got a star - numStars += 1 - if numStars == index: - # This is the star we care about. - foundTheRightStar = True - start = i - # Iterate through the rest of the string. - for k in range (i, len(words)): - # If the star is at the end of the pattern, - # we know exactly where it ends. - if j+1 == len (patMatch): - end = len (words) - break - # If the words have started matching the - # pattern again, the star has ended. - if patMatch[j+1] == words[k]: - end = k - 1 - i = k - break - # If we just finished processing the star we cared - # about, we exit the loop early. - if foundTheRightStar: - break - # Move to the next element of the pattern. - j += 1 - - # extract the star words from the original, unmutilated input. - if foundTheRightStar: - #print string.join(pattern.split()[start:end+1]) - if starType == 'star': return string.join(pattern.split()[start:end+1]) - elif starType == 'thatstar': return string.join(that.split()[start:end+1]) - elif starType == 'topicstar': return string.join(topic.split()[start:end+1]) - else: return "" - - def _match(self, words, thatWords, topicWords, root): - """Return a tuple (pat, tem) where pat is a list of nodes, starting - at the root and leading to the matching pattern, and tem is the - matched template. - - """ - # base-case: if the word list is empty, return the current node's - # template. - if len(words) == 0: - # we're out of words. - pattern = [] - template = None - if len(thatWords) > 0: - # If thatWords isn't empty, recursively - # pattern-match on the _THAT node with thatWords as words. - try: - pattern, template = self._match(thatWords, [], topicWords, root[self._THAT]) - if pattern != None: - pattern = [self._THAT] + pattern - except KeyError: - pattern = [] - template = None - elif len(topicWords) > 0: - # If thatWords is empty and topicWords isn't, recursively pattern - # on the _TOPIC node with topicWords as words. - try: - pattern, template = self._match(topicWords, [], [], root[self._TOPIC]) - if pattern != None: - pattern = [self._TOPIC] + pattern - except KeyError: - pattern = [] - template = None - if template == None: - # we're totally out of input. Grab the template at this node. - pattern = [] - try: template = root[self._TEMPLATE] - except KeyError: template = None - return (pattern, template) - - first = words[0] - suffix = words[1:] - - # Check underscore. - # Note: this is causing problems in the standard AIML set, and is - # currently disabled. - if root.has_key(self._UNDERSCORE): - # Must include the case where suf is [] in order to handle the case - # where a * or _ is at the end of the pattern. - for j in range(len(suffix)+1): - suf = suffix[j:] - pattern, template = self._match(suf, thatWords, topicWords, root[self._UNDERSCORE]) - if template is not None: - newPattern = [self._UNDERSCORE] + pattern - return (newPattern, template) - - # Check first - if root.has_key(first): - pattern, template = self._match(suffix, thatWords, topicWords, root[first]) - if template is not None: - newPattern = [first] + pattern - return (newPattern, template) - - # check bot name - if root.has_key(self._BOT_NAME) and first == self._botName: - pattern, template = self._match(suffix, thatWords, topicWords, root[self._BOT_NAME]) - if template is not None: - newPattern = [first] + pattern - return (newPattern, template) - - # check star - if root.has_key(self._STAR): - # Must include the case where suf is [] in order to handle the case - # where a * or _ is at the end of the pattern. - for j in range(len(suffix)+1): - suf = suffix[j:] - pattern, template = self._match(suf, thatWords, topicWords, root[self._STAR]) - if template is not None: - newPattern = [self._STAR] + pattern - return (newPattern, template) - - # No matches were found. - return (None, None)
\ No newline at end of file |