Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/bot/aiml/AimlParser.py
diff options
context:
space:
mode:
Diffstat (limited to 'bot/aiml/AimlParser.py')
-rw-r--r--bot/aiml/AimlParser.py545
1 files changed, 0 insertions, 545 deletions
diff --git a/bot/aiml/AimlParser.py b/bot/aiml/AimlParser.py
deleted file mode 100644
index 75c2cf1..0000000
--- a/bot/aiml/AimlParser.py
+++ /dev/null
@@ -1,545 +0,0 @@
-from xml.sax.handler import ContentHandler
-from xml.sax.xmlreader import Locator
-import sys
-import xml.sax
-import xml.sax.handler
-
-class AimlParserError(Exception): pass
-
-class AimlHandler(ContentHandler):
- # The legal states of the AIML parser
- _STATE_OutsideAiml = 0
- _STATE_InsideAiml = 1
- _STATE_InsideCategory = 2
- _STATE_InsidePattern = 3
- _STATE_AfterPattern = 4
- _STATE_InsideThat = 5
- _STATE_AfterThat = 6
- _STATE_InsideTemplate = 7
- _STATE_AfterTemplate = 8
-
- def __init__(self, encoding = "UTF-8"):
- self.categories = {}
- self._encoding = encoding
- self._state = self._STATE_OutsideAiml
- self._version = ""
- self._namespace = ""
- self._forwardCompatibleMode = False
- self._currentPattern = ""
- self._currentThat = ""
- self._currentTopic = ""
- self._insideTopic = False
- self._currentUnknown = "" # the name of the current unknown element
-
- # This is set to true when a parse error occurs in a category.
- self._skipCurrentCategory = False
-
- # Counts the number of parse errors in a particular AIML document.
- # query with getNumErrors(). If 0, the document is AIML-compliant.
- self._numParseErrors = 0
-
- # TODO: select the proper validInfo table based on the version number.
- self._validInfo = self._validationInfo101
-
- # This stack of bools is used when parsing <li> elements inside
- # <condition> elements, to keep track of whether or not an
- # attribute-less "default" <li> element has been found yet. Only
- # one default <li> is allowed in each <condition> element. We need
- # a stack in order to correctly handle nested <condition> tags.
- self._foundDefaultLiStack = []
-
- # This stack of strings indicates what the current whitespace-handling
- # behavior should be. Each string in the stack is either "default" or
- # "preserve". When a new AIML element is encountered, a new string is
- # pushed onto the stack, based on the value of the element's "xml:space"
- # attribute (if absent, the top of the stack is pushed again). When
- # ending an element, pop an object off the stack.
- self._whitespaceBehaviorStack = ["default"]
-
- self._elemStack = []
- self._locator = Locator()
- self.setDocumentLocator(self._locator)
-
- def getNumErrors(self):
- "Return the number of errors found while parsing the current document."
- return self._numParseErrors
-
- def setEncoding(self, encoding):
- """Set the text encoding to use when encoding strings read from XML.
-
- Defaults to 'UTF-8'.
-
- """
- self._encoding = encoding
-
- def _location(self):
- "Return a string describing the current location in the source file."
- line = self._locator.getLineNumber()
- column = self._locator.getColumnNumber()
- return "(line %d, column %d)" % (line, column)
-
- def _pushWhitespaceBehavior(self, attr):
- """Push a new string onto the whitespaceBehaviorStack.
-
- The string's value is taken from the "xml:space" attribute, if it exists
- and has a legal value ("default" or "preserve"). Otherwise, the previous
- stack element is duplicated.
-
- """
- assert len(self._whitespaceBehaviorStack) > 0, "Whitespace behavior stack should never be empty!"
- try:
- if attr["xml:space"] == "default" or attr["xml:space"] == "preserve":
- self._whitespaceBehaviorStack.append(attr["xml:space"])
- else:
- raise AimlParserError, "Invalid value for xml:space attribute "+self._location()
- except KeyError:
- self._whitespaceBehaviorStack.append(self._whitespaceBehaviorStack[-1])
-
- def startElementNS(self, name, qname, attr):
- print "QNAME:", qname
- print "NAME:", name
- uri,elem = name
- if (elem == "bot"): print "name:", attr.getValueByQName("name"), "a'ite?"
- self.startElement(elem, attr)
- pass
-
- def startElement(self, name, attr):
- # Wrapper around _startElement, which catches errors in _startElement()
- # and keeps going.
-
- # If we're inside an unknown element, ignore everything until we're
- # out again.
- if self._currentUnknown != "":
- return
- # If we're skipping the current category, ignore everything until
- # it's finished.
- if self._skipCurrentCategory:
- return
-
- # process this start-element.
- try: self._startElement(name, attr)
- except AimlParserError, msg:
- # Print the error message
- sys.stderr.write("PARSE ERROR: %s\n" % msg)
-
- self._numParseErrors += 1 # increment error count
- # In case of a parse error, if we're inside a category, skip it.
- if self._state >= self._STATE_InsideCategory:
- self._skipCurrentCategory = True
-
- def _startElement(self, name, attr):
- if name == "aiml":
- # <aiml> tags are only legal in the OutsideAiml state
- if self._state != self._STATE_OutsideAiml:
- raise AimlParserError, "Unexpected <aiml> tag "+self._location()
- self._state = self._STATE_InsideAiml
- self._insideTopic = False
- self._currentTopic = u""
- try: self._version = attr["version"]
- except KeyError:
- # This SHOULD be a syntax error, but so many AIML sets out there are missing
- # "version" attributes that it just seems nicer to let it slide.
- #raise AimlParserError, "Missing 'version' attribute in <aiml> tag "+self._location()
- #print "WARNING: Missing 'version' attribute in <aiml> tag "+self._location()
- #print " Defaulting to version 1.0"
- self._version = "1.0"
- self._forwardCompatibleMode = (self._version != "1.0.1")
- self._pushWhitespaceBehavior(attr)
- # Not sure about this namespace business yet...
- #try:
- # self._namespace = attr["xmlns"]
- # if self._version == "1.0.1" and self._namespace != "http://alicebot.org/2001/AIML-1.0.1":
- # raise AimlParserError, "Incorrect namespace for AIML v1.0.1 "+self._location()
- #except KeyError:
- # if self._version != "1.0":
- # raise AimlParserError, "Missing 'version' attribute(s) in <aiml> tag "+self._location()
- elif self._state == self._STATE_OutsideAiml:
- # If we're outside of an AIML element, we ignore all tags.
- return
- elif name == "topic":
- # <topic> tags are only legal in the InsideAiml state, and only
- # if we're not already inside a topic.
- if (self._state != self._STATE_InsideAiml) or self._insideTopic:
- raise AimlParserError, "Unexpected <topic> tag", self._location()
- try: self._currentTopic = unicode(attr['name'])
- except KeyError:
- raise AimlParserError, "Required \"name\" attribute missing in <topic> element "+self._location()
- self._insideTopic = True
- elif name == "category":
- # <category> tags are only legal in the InsideAiml state
- if self._state != self._STATE_InsideAiml:
- raise AimlParserError, "Unexpected <category> tag "+self._location()
- self._state = self._STATE_InsideCategory
- self._currentPattern = u""
- self._currentThat = u""
- # If we're not inside a topic, the topic is implicitly set to *
- if not self._insideTopic: self._currentTopic = u"*"
- self._elemStack = []
- self._pushWhitespaceBehavior(attr)
- elif name == "pattern":
- # <pattern> tags are only legal in the InsideCategory state
- if self._state != self._STATE_InsideCategory:
- raise AimlParserError, "Unexpected <pattern> tag "+self._location()
- self._state = self._STATE_InsidePattern
- elif name == "that" and self._state == self._STATE_AfterPattern:
- # <that> are legal either inside a <template> element, or
- # inside a <category> element, between the <pattern> and the
- # <template> elements. This clause handles the latter case.
- self._state = self._STATE_InsideThat
- elif name == "template":
- # <template> tags are only legal in the AfterPattern and AfterThat
- # states
- if self._state not in [self._STATE_AfterPattern, self._STATE_AfterThat]:
- raise AimlParserError, "Unexpected <template> tag "+self._location()
- # if no <that> element was specified, it is implicitly set to *
- if self._state == self._STATE_AfterPattern:
- self._currentThat = u"*"
- self._state = self._STATE_InsideTemplate
- self._elemStack.append(['template',{}])
- self._pushWhitespaceBehavior(attr)
- elif self._state == self._STATE_InsidePattern:
- # Certain tags are allowed inside <pattern> elements.
- if name == "bot" and attr.has_key("name") and attr["name"] == u"name":
- # Insert a special character string that the PatternMgr will
- # replace with the bot's name.
- self._currentPattern += u" BOT_NAME "
- else:
- raise AimlParserError, ("Unexpected <%s> tag " % name)+self._location()
- elif self._state == self._STATE_InsideThat:
- # Certain tags are allowed inside <that> elements.
- if name == "bot" and attr.has_key("name") and attr["name"] == u"name":
- # Insert a special character string that the PatternMgr will
- # replace with the bot's name.
- self._currentThat += u" BOT_NAME "
- else:
- raise AimlParserError, ("Unexpected <%s> tag " % name)+self._location()
- elif self._state == self._STATE_InsideTemplate and self._validInfo.has_key(name):
- # Starting a new element inside the current pattern. First
- # we need to convert 'attr' into a native Python dictionary,
- # so it can later be marshaled.
- attrDict = {}
- for k,v in attr.items():
- #attrDict[k[1].encode(self._encoding)] = v.encode(self._encoding)
- attrDict[k.encode(self._encoding)] = unicode(v)
- self._validateElemStart(name, attrDict, self._version)
- # Push the current element onto the element stack.
- self._elemStack.append([name.encode(self._encoding),attrDict])
- self._pushWhitespaceBehavior(attr)
- # If this is a condition element, push a new entry onto the
- # foundDefaultLiStack
- if name == "condition":
- self._foundDefaultLiStack.append(False)
- else:
- # we're now inside an unknown element.
- if self._forwardCompatibleMode:
- # In Forward Compatibility Mode, we ignore the element and its
- # contents.
- self._currentUnknown = name
- else:
- # Otherwise, unknown elements are grounds for error!
- raise AimlParserError, ("Unexpected <%s> tag " % name)+self._location()
-
- def characters(self, ch):
- # Wrapper around _characters which catches errors in _characters()
- # and keeps going.
- if self._state == self._STATE_OutsideAiml:
- # If we're outside of an AIML element, we ignore all text
- return
- if self._currentUnknown != "":
- # If we're inside an unknown element, ignore all text
- return
- if self._skipCurrentCategory:
- # If we're skipping the current category, ignore all text.
- return
- try: self._characters(ch)
- except AimlParserError, msg:
- # Print the message
- sys.stderr.write("PARSE ERROR: %s\n" % msg)
- self._numParseErrors += 1 # increment error count
- # In case of a parse error, if we're inside a category, skip it.
- if self._state >= self._STATE_InsideCategory:
- self._skipCurrentCategory = True
-
- def _characters(self, ch):
- text = unicode(ch)
- if self._state == self._STATE_InsidePattern:
- self._currentPattern += text
- elif self._state == self._STATE_InsideThat:
- self._currentThat += text
- elif self._state == self._STATE_InsideTemplate:
- # First, see whether the element at the top of the element stack
- # is permitted to contain text.
- try:
- parent = self._elemStack[-1][0]
- parentAttr = self._elemStack[-1][1]
- required, optional, canBeParent = self._validInfo[parent]
- nonBlockStyleCondition = (parent == "condition" and not (parentAttr.has_key("name") and parentAttr.has_key("value")))
- if not canBeParent:
- raise AimlParserError, ("Unexpected text inside <%s> element "%parent)+self._location()
- elif parent == "random" or nonBlockStyleCondition:
- # <random> elements can only contain <li> subelements. However,
- # there's invariably some whitespace around the <li> that we need
- # to ignore. Same for non-block-style <condition> elements (i.e.
- # those which don't have both a "name" and a "value" attribute).
- if len(text.strip()) == 0:
- # ignore whitespace inside these elements.
- return
- else:
- # non-whitespace text inside these elements is a syntax error.
- raise AimlParserError, ("Unexpected text inside <%s> element "%parent)+self._location()
- except IndexError:
- # the element stack is empty. This should never happen.
- raise AimlParserError, "Element stack is empty while validating text "+self._location()
-
- # Add a new text element to the element at the top of the element
- # stack. If there's already a text element there, simply append the
- # new characters to its contents.
- try: textElemOnStack = (self._elemStack[-1][-1][0] == "text")
- except IndexError: textElemOnStack = False
- except KeyError: textElemOnStack = False
- if textElemOnStack:
- self._elemStack[-1][-1][2] += text
- else:
- self._elemStack[-1].append(["text", {"xml:space": self._whitespaceBehaviorStack[-1]}, text])
- else:
- # all other text is ignored
- pass
-
- def endElementNS(self, name, qname):
- uri, elem = name
- self.endElement(elem)
-
- def endElement(self, name):
- """Wrapper around _endElement which catches errors in _characters()
- and keeps going.
-
- """
- if self._state == self._STATE_OutsideAiml:
- # If we're outside of an AIML element, ignore all tags
- return
- if self._currentUnknown != "":
- # see if we're at the end of an unknown element. If so, we can
- # stop ignoring everything.
- if name == self._currentUnknown:
- self._currentUnknown = ""
- return
- if self._skipCurrentCategory:
- # If we're skipping the current category, see if it's ending. We
- # stop on ANY </category> tag, since we're not keeping track of
- # state in ignore-mode.
- if name == "category":
- self._skipCurrentCategory = False
- self._state = self._STATE_InsideAiml
- return
- try: self._endElement(name)
- except AimlParserError, msg:
- # Print the message
- sys.stderr.write("PARSE ERROR: %s\n" % msg)
- self._numParseErrors += 1 # increment error count
- # In case of a parse error, if we're inside a category, skip it.
- if self._state >= self._STATE_InsideCategory:
- self._skipCurrentCategory = True
-
- def _endElement(self, name):
- """Verify that an AIML end element is valid in the current
- context.
-
- Raises an AimlParserError if an illegal end element is encountered.
-
- """
- if name == "aiml":
- # </aiml> tags are only legal in the InsideAiml state
- if self._state != self._STATE_InsideAiml:
- raise AimlParserError, "Unexpected </aiml> tag "+self._location()
- self._state = self._STATE_OutsideAiml
- self._whitespaceBehaviorStack.pop()
- elif name == "topic":
- # </topic> tags are only legal in the InsideAiml state, and
- # only if _insideTopic is true.
- if self._state != self._STATE_InsideAiml or not self._insideTopic:
- raise AimlParserError, "Unexpected </topic> tag "+self._location()
- self._insideTopic = False
- self._currentTopic = u""
- elif name == "category":
- # </category> tags are only legal in the AfterTemplate state
- if self._state != self._STATE_AfterTemplate:
- raise AimlParserError, "Unexpected </category> tag "+self._location()
- self._state = self._STATE_InsideAiml
- # End the current category. Store the current pattern/that/topic and
- # element in the categories dictionary.
- key = (self._currentPattern.strip(), self._currentThat.strip(),self._currentTopic.strip())
- self.categories[key] = self._elemStack[-1]
- self._whitespaceBehaviorStack.pop()
- elif name == "pattern":
- # </pattern> tags are only legal in the InsidePattern state
- if self._state != self._STATE_InsidePattern:
- raise AimlParserError, "Unexpected </pattern> tag "+self._location()
- self._state = self._STATE_AfterPattern
- elif name == "that" and self._state == self._STATE_InsideThat:
- # </that> tags are only allowed inside <template> elements or in
- # the InsideThat state. This clause handles the latter case.
- self._state = self._STATE_AfterThat
- elif name == "template":
- # </template> tags are only allowed in the InsideTemplate state.
- if self._state != self._STATE_InsideTemplate:
- raise AimlParserError, "Unexpected </template> tag "+self._location()
- self._state = self._STATE_AfterTemplate
- self._whitespaceBehaviorStack.pop()
- elif self._state == self._STATE_InsidePattern:
- # Certain tags are allowed inside <pattern> elements.
- if name not in ["bot"]:
- raise AimlParserError, ("Unexpected </%s> tag " % name)+self._location()
- elif self._state == self._STATE_InsideThat:
- # Certain tags are allowed inside <that> elements.
- if name not in ["bot"]:
- raise AimlParserError, ("Unexpected </%s> tag " % name)+self._location()
- elif self._state == self._STATE_InsideTemplate:
- # End of an element inside the current template. Append the
- # element at the top of the stack onto the one beneath it.
- elem = self._elemStack.pop()
- self._elemStack[-1].append(elem)
- self._whitespaceBehaviorStack.pop()
- # If the element was a condition, pop an item off the
- # foundDefaultLiStack as well.
- if elem[0] == "condition": self._foundDefaultLiStack.pop()
- else:
- # Unexpected closing tag
- raise AimlParserError, ("Unexpected </%s> tag " % name)+self._location()
-
- # A dictionary containing a validation information for each AIML
- # element. The keys are the names of the elements. The values are a
- # tuple of three items. The first is a list containing the names of
- # REQUIRED attributes, the second is a list of OPTIONAL attributes,
- # and the third is a boolean value indicating whether or not the
- # element can contain other elements and/or text (if False, the
- # element can only appear in an atomic context, such as <date/>).
- _validationInfo101 = {
- "bot": ( ["name"], [], False ),
- "condition": ( [], ["name", "value"], True ), # can only contain <li> elements
- "date": ( [], [], False ),
- "formal": ( [], [], True ),
- "gender": ( [], [], True ),
- "get": ( ["name"], [], False ),
- "gossip": ( [], [], True ),
- "id": ( [], [], False ),
- "input": ( [], ["index"], False ),
- "javascript": ( [], [], True ),
- "learn": ( [], [], True ),
- "li": ( [], ["name", "value"], True ),
- "lowercase": ( [], [], True ),
- "person": ( [], [], True ),
- "person2": ( [], [], True ),
- "random": ( [], [], True ), # can only contain <li> elements
- "sentence": ( [], [], True ),
- "set": ( ["name"], [], True),
- "size": ( [], [], False ),
- "sr": ( [], [], False ),
- "srai": ( [], [], True ),
- "star": ( [], ["index"], False ),
- "system": ( [], [], True ),
- "template": ( [], [], True ), # needs to be in the list because it can be a parent.
- "that": ( [], ["index"], False ),
- "thatstar": ( [], ["index"], False ),
- "think": ( [], [], True ),
- "topicstar": ( [], ["index"], False ),
- "uppercase": ( [], [], True ),
- "version": ( [], [], False ),
- }
-
- def _validateElemStart(self, name, attr, version):
- """Test the validity of an element starting inside a <template>
- element.
-
- This function raises an AimlParserError exception if it the tag is
- invalid. Otherwise, no news is good news.
-
- """
- # Check the element's attributes. Make sure that all required
- # attributes are present, and that any remaining attributes are
- # valid options.
- required, optional, canBeParent = self._validInfo[name]
- for a in required:
- if a not in attr and not self._forwardCompatibleMode:
- raise AimlParserError, ("Required \"%s\" attribute missing in <%s> element " % (a,name))+self._location()
- for a in attr:
- if a in required: continue
- if a[0:4] == "xml:": continue # attributes in the "xml" namespace can appear anywhere
- if a not in optional and not self._forwardCompatibleMode:
- raise AimlParserError, ("Unexpected \"%s\" attribute in <%s> element " % (a,name))+self._location()
-
- # special-case: several tags contain an optional "index" attribute.
- # This attribute's value must be a positive integer.
- if name in ["star", "thatstar", "topicstar"]:
- for k,v in attr.items():
- if k == "index":
- temp = 0
- try: temp = int(v)
- except:
- raise AimlParserError, ("Bad type for \"%s\" attribute (expected integer, found \"%s\") " % (k,v))+self._location()
- if temp < 1:
- raise AimlParserError, ("\"%s\" attribute must have non-negative value " % (k))+self._location()
-
- # See whether the containing element is permitted to contain
- # subelements. If not, this element is invalid no matter what it is.
- try:
- parent = self._elemStack[-1][0]
- parentAttr = self._elemStack[-1][1]
- except IndexError:
- # If the stack is empty, no parent is present. This should never
- # happen.
- raise AimlParserError, ("Element stack is empty while validating <%s> " % name)+self._location()
- required, optional, canBeParent = self._validInfo[parent]
- nonBlockStyleCondition = (parent == "condition" and not (parentAttr.has_key("name") and parentAttr.has_key("value")))
- if not canBeParent:
- raise AimlParserError, ("<%s> elements cannot have any contents "%parent)+self._location()
- # Special-case test if the parent element is <condition> (the
- # non-block-style variant) or <random>: these elements can only
- # contain <li> subelements.
- elif (parent == "random" or nonBlockStyleCondition) and name!="li":
- raise AimlParserError, ("<%s> elements can only contain <li> subelements "%parent)+self._location()
- # Special-case test for <li> elements, which can only be contained
- # by non-block-style <condition> and <random> elements, and whose
- # required attributes are dependent upon which attributes are
- # present in the <condition> parent.
- elif name=="li":
- if not (parent=="random" or nonBlockStyleCondition):
- raise AimlParserError, ("Unexpected <li> element contained by <%s> element "%parent)+self._location()
- if nonBlockStyleCondition:
- if parentAttr.has_key("name"):
- # Single-predicate condition. Each <li> element except the
- # last must have a "value" attribute.
- if len(attr) == 0:
- # This could be the default <li> element for this <condition>,
- # unless we've already found one.
- if self._foundDefaultLiStack[-1]:
- raise AimlParserError, "Unexpected default <li> element inside <condition> "+self._location()
- else:
- self._foundDefaultLiStack[-1] = True
- elif len(attr) == 1 and attr.has_key("value"):
- pass # this is the valid case
- else:
- raise AimlParserError, "Invalid <li> inside single-predicate <condition> "+self._location()
- elif len(parentAttr) == 0:
- # Multi-predicate condition. Each <li> element except the
- # last must have a "name" and a "value" attribute.
- if len(attr) == 0:
- # This could be the default <li> element for this <condition>,
- # unless we've already found one.
- if self._foundDefaultLiStack[-1]:
- raise AimlParserError, "Unexpected default <li> element inside <condition> "+self._location()
- else:
- self._foundDefaultLiStack[-1] = True
- elif len(attr) == 2 and attr.has_key("value") and attr.has_key("name"):
- pass # this is the valid case
- else:
- raise AimlParserError, "Invalid <li> inside multi-predicate <condition> "+self._location()
- # All is well!
- return True
-
-def create_parser():
- """Create and return an AIML parser object."""
- parser = xml.sax.make_parser()
- handler = AimlHandler("UTF-8")
- parser.setContentHandler(handler)
- #parser.setFeature(xml.sax.handler.feature_namespaces, True)
- return parser \ No newline at end of file