From b2d38f17e56bf3b391fe20b77532df23e5721413 Mon Sep 17 00:00:00 2001 From: Aleksey Lim Date: Thu, 12 Aug 2010 16:20:23 +0000 Subject: Add copyright notes to python files --- (limited to 'bot/aiml') diff --git a/bot/aiml/AimlParser.py b/bot/aiml/AimlParser.py deleted file mode 100644 index 75c2cf1..0000000 --- a/bot/aiml/AimlParser.py +++ /dev/null @@ -1,545 +0,0 @@ -from xml.sax.handler import ContentHandler -from xml.sax.xmlreader import Locator -import sys -import xml.sax -import xml.sax.handler - -class AimlParserError(Exception): pass - -class AimlHandler(ContentHandler): - # The legal states of the AIML parser - _STATE_OutsideAiml = 0 - _STATE_InsideAiml = 1 - _STATE_InsideCategory = 2 - _STATE_InsidePattern = 3 - _STATE_AfterPattern = 4 - _STATE_InsideThat = 5 - _STATE_AfterThat = 6 - _STATE_InsideTemplate = 7 - _STATE_AfterTemplate = 8 - - def __init__(self, encoding = "UTF-8"): - self.categories = {} - self._encoding = encoding - self._state = self._STATE_OutsideAiml - self._version = "" - self._namespace = "" - self._forwardCompatibleMode = False - self._currentPattern = "" - self._currentThat = "" - self._currentTopic = "" - self._insideTopic = False - self._currentUnknown = "" # the name of the current unknown element - - # This is set to true when a parse error occurs in a category. - self._skipCurrentCategory = False - - # Counts the number of parse errors in a particular AIML document. - # query with getNumErrors(). If 0, the document is AIML-compliant. - self._numParseErrors = 0 - - # TODO: select the proper validInfo table based on the version number. - self._validInfo = self._validationInfo101 - - # This stack of bools is used when parsing
  • elements inside - # elements, to keep track of whether or not an - # attribute-less "default"
  • element has been found yet. Only - # one default
  • is allowed in each element. We need - # a stack in order to correctly handle nested tags. - self._foundDefaultLiStack = [] - - # This stack of strings indicates what the current whitespace-handling - # behavior should be. Each string in the stack is either "default" or - # "preserve". When a new AIML element is encountered, a new string is - # pushed onto the stack, based on the value of the element's "xml:space" - # attribute (if absent, the top of the stack is pushed again). When - # ending an element, pop an object off the stack. - self._whitespaceBehaviorStack = ["default"] - - self._elemStack = [] - self._locator = Locator() - self.setDocumentLocator(self._locator) - - def getNumErrors(self): - "Return the number of errors found while parsing the current document." - return self._numParseErrors - - def setEncoding(self, encoding): - """Set the text encoding to use when encoding strings read from XML. - - Defaults to 'UTF-8'. - - """ - self._encoding = encoding - - def _location(self): - "Return a string describing the current location in the source file." - line = self._locator.getLineNumber() - column = self._locator.getColumnNumber() - return "(line %d, column %d)" % (line, column) - - def _pushWhitespaceBehavior(self, attr): - """Push a new string onto the whitespaceBehaviorStack. - - The string's value is taken from the "xml:space" attribute, if it exists - and has a legal value ("default" or "preserve"). Otherwise, the previous - stack element is duplicated. - - """ - assert len(self._whitespaceBehaviorStack) > 0, "Whitespace behavior stack should never be empty!" - try: - if attr["xml:space"] == "default" or attr["xml:space"] == "preserve": - self._whitespaceBehaviorStack.append(attr["xml:space"]) - else: - raise AimlParserError, "Invalid value for xml:space attribute "+self._location() - except KeyError: - self._whitespaceBehaviorStack.append(self._whitespaceBehaviorStack[-1]) - - def startElementNS(self, name, qname, attr): - print "QNAME:", qname - print "NAME:", name - uri,elem = name - if (elem == "bot"): print "name:", attr.getValueByQName("name"), "a'ite?" - self.startElement(elem, attr) - pass - - def startElement(self, name, attr): - # Wrapper around _startElement, which catches errors in _startElement() - # and keeps going. - - # If we're inside an unknown element, ignore everything until we're - # out again. - if self._currentUnknown != "": - return - # If we're skipping the current category, ignore everything until - # it's finished. - if self._skipCurrentCategory: - return - - # process this start-element. - try: self._startElement(name, attr) - except AimlParserError, msg: - # Print the error message - sys.stderr.write("PARSE ERROR: %s\n" % msg) - - self._numParseErrors += 1 # increment error count - # In case of a parse error, if we're inside a category, skip it. - if self._state >= self._STATE_InsideCategory: - self._skipCurrentCategory = True - - def _startElement(self, name, attr): - if name == "aiml": - # tags are only legal in the OutsideAiml state - if self._state != self._STATE_OutsideAiml: - raise AimlParserError, "Unexpected tag "+self._location() - self._state = self._STATE_InsideAiml - self._insideTopic = False - self._currentTopic = u"" - try: self._version = attr["version"] - except KeyError: - # This SHOULD be a syntax error, but so many AIML sets out there are missing - # "version" attributes that it just seems nicer to let it slide. - #raise AimlParserError, "Missing 'version' attribute in tag "+self._location() - #print "WARNING: Missing 'version' attribute in tag "+self._location() - #print " Defaulting to version 1.0" - self._version = "1.0" - self._forwardCompatibleMode = (self._version != "1.0.1") - self._pushWhitespaceBehavior(attr) - # Not sure about this namespace business yet... - #try: - # self._namespace = attr["xmlns"] - # if self._version == "1.0.1" and self._namespace != "http://alicebot.org/2001/AIML-1.0.1": - # raise AimlParserError, "Incorrect namespace for AIML v1.0.1 "+self._location() - #except KeyError: - # if self._version != "1.0": - # raise AimlParserError, "Missing 'version' attribute(s) in tag "+self._location() - elif self._state == self._STATE_OutsideAiml: - # If we're outside of an AIML element, we ignore all tags. - return - elif name == "topic": - # tags are only legal in the InsideAiml state, and only - # if we're not already inside a topic. - if (self._state != self._STATE_InsideAiml) or self._insideTopic: - raise AimlParserError, "Unexpected tag", self._location() - try: self._currentTopic = unicode(attr['name']) - except KeyError: - raise AimlParserError, "Required \"name\" attribute missing in element "+self._location() - self._insideTopic = True - elif name == "category": - # tags are only legal in the InsideAiml state - if self._state != self._STATE_InsideAiml: - raise AimlParserError, "Unexpected tag "+self._location() - self._state = self._STATE_InsideCategory - self._currentPattern = u"" - self._currentThat = u"" - # If we're not inside a topic, the topic is implicitly set to * - if not self._insideTopic: self._currentTopic = u"*" - self._elemStack = [] - self._pushWhitespaceBehavior(attr) - elif name == "pattern": - # tags are only legal in the InsideCategory state - if self._state != self._STATE_InsideCategory: - raise AimlParserError, "Unexpected tag "+self._location() - self._state = self._STATE_InsidePattern - elif name == "that" and self._state == self._STATE_AfterPattern: - # are legal either inside a