More GIFT parsing work.

Basic functionality works. Multiple question parsing is still broken, whitespace is too picky.
author: Greg S <enimihil@gmail.com> 2009-04-01 21:05:25 (GMT)
committer: Greg S <enimihil@gmail.com> 2009-04-01 21:05:25 (GMT)
commit: b4ee4bdc3719f1c3f57cb00795291d2652ee4ad8 (patch)
tree: 4cca1927d4e83b347f12743ce03661b9c01728e8
parent: ec60486cafe24e828f52a84de2d6e04b39d97c6a (diff)
2 files changed, 118 insertions, 38 deletions
diff --git a/quizdata/_format_gift.py b/quizdata/_format_gift.py
index b88e51c..437bf16 100644
--- a/quizdata/_format_gift.py
+++ b/quizdata/_format_gift.py
@@ -2,42 +2,80 @@
     Parsing and support functions for the Moodle GIFT format.
 '''
 from pyparsing import (Word, Literal, Optional, Group, OneOrMore, 
-                       ParseException, Combine, restOfLine, printables,
-                       nums)
+                       ParseException, Combine, restOfLine,
+                       nums, StringEnd, ZeroOrMore, oneOf, 
+                       originalTextFor, CharsNotIn, NotAny,
+                       ParserElement, printables, White,
+                       FollowedBy)
 
-_special_chars = set(":/=~#{}->")
-_filt_printables = ''.join([ p for p in printables if p not in _special_chars ])
+__all__ = [ 'parse', 'question', 'questions' ]
 
-_text = OneOrMore(Word(_filt_printables))
-_number = Combine(Word(nums) + Optional("." + Optional(Word(nums))))
+ParserElement.enablePackrat()
+#ParserElement.setDefaultWhitespaceChars(" \t")
 
-_comment = Literal("//") + restOfLine
-_dcolon = Literal("::")
-_qtitle = _dcolon + _text + _dcolon
+comment = Literal("//") + restOfLine
+NL     = Literal("\n").suppress()
 
-_correct_ans = Literal("=") + _text
-_wrong_ans = Literal("~") + _text
-_ans_explain = Literal("#") + _text
-_ans_tf = Literal("TRUE") | Literal("FALSE") | Literal("T") | Literal("F")
-_ans_match = Literal("=") + _text + Literal("->") + _text
+def kill_whitespace(s, l, t):
+    #print t.asList()[1:-1]
+    return [ ' '.join(t.asList()[1:-1]).strip() ]
 
-_ans_numeric = (Optional(Literal("="))
-                + (_number + Optional( Literal(":") + _number) 
-                  |_number + Literal("..") + _number)
-                + Optional( _ans_explain ))
+def printables_except(chars):
+    l = list(printables)
+    for c in chars:
+        l.remove(c)
+    return ''.join(l)
 
-_answer = ( (( _correct_ans | _wrong_ans ) + Optional(_ans_explain)) 
-           | _ans_tf | _ans_match )
+def restricted_text(allowed_chars):
+    return originalTextFor(
+            Optional(NL) 
+            + OneOrMore((Word(allowed_chars)
+                + Optional(NL)))
+            ).setParseAction( kill_whitespace )
 
-question = (Optional(_qtitle) + _text 
-                    + ("{" + OneOrMore(_answer) + "}" |
-                       "{#" + OneOrMore(_ans_numeric) + "}"))
-question.ignore(_comment)
+text = restricted_text(printables)
+qtext = restricted_text(printables_except("{"))
+ans_text = restricted_text(printables_except("=~#}->"))
+after_text = text + FollowedBy(NL+NL)
+title_text = restricted_text(printables_except(":"))
 
-questions = OneOrMore(question + Literal("\n\n"))
-questions.ignore(_comment)
+number = Combine(Word(nums) + Optional("." + Optional(Word(nums))))
+dcolon = Literal("::")
+qtitle = dcolon.suppress() + title_text + dcolon.suppress()
 
-def _parse_only(text):
+correct_ans = (Literal("=") + ans_text)
+wrong_ans = Literal("~") + ans_text
+ans_explain = Literal("#") + ans_text
+ans_tf = oneOf("T F TRUE FALSE")
+ans_match = Literal("=") + ans_text + Literal("->") + ans_text
+
+ans_numeric = Group(Optional(Literal("="))
+                + (number + Optional( Literal(":") + number) 
+                  ^number + Literal("..") + number)
+                + Optional( ans_explain ))
+
+answer = Group(( ( correct_ans ^ wrong_ans ^ ans_tf) + Optional(ans_explain)) 
+               ^ ans_match )
+
+answer_list = ((Literal("{").suppress() 
+                  + OneOrMore(answer) 
+                  + Literal("}").suppress())
+              ^
+               (Literal("{#").suppress() 
+                  + OneOrMore(ans_numeric) 
+                  + Literal("}").suppress()))
+
+question = ZeroOrMore(NL) + Group( Optional(qtitle)("title") + 
+                  qtext("text") + 
+                  answer_list("answers") + 
+                  Optional(after_text)("text_additional") )
+
+question.ignore(comment)
+
+questions = (question + ZeroOrMore(NL + NL + question))("questions") + StringEnd()
+questions.ignore(comment)
+
+def parse_only(text):
     return questions.parseText(text)
 
 def parse(text):
diff --git a/tests/test_gift_parse.py b/tests/test_gift_parse.py
index f982384..4df3fd6 100755
--- a/tests/test_gift_parse.py
+++ b/tests/test_gift_parse.py
@@ -2,45 +2,87 @@
 
 import sys
 from os import path
+from pprint import pprint
 
 base_path = path.abspath(path.join(path.dirname(path.abspath(__file__)),'..'))
 sys.path.append(base_path)
 
 from quizdata import _format_gift
 
-#def test_example():
-#    full_example = open(path.join(base_path, 'tests', "examples.txt")).read()
-#
-#    print _format_gift.questions.parseString(full_example)
+def test_example():
+    full_example = open(path.join(base_path, 'tests', "examples.txt"))
+
+    print _format_gift.questions.parseFile(full_example)
 
 def test_text():
-    print _format_gift._text.parseString(
+    print(_format_gift.text.parseString(
     "this is ? some ! text that should count as ' a single \"bit\" \
-     of text to the GIFT parser.")
+     of text to the GIFT parser."))
 
 def test_simple_tf():
-    print _format_gift.question.parseString("4 is an even number{TRUE}\n")
+    print(_format_gift.question.parseString("4 is an even number{TRUE}\n"))
 
 def test_simple_multi():
-    print _format_gift.question.parseString("""
+    print(_format_gift.question.parseString("""
     What is the capital of France?{=Paris ~London ~Guam ~Tomato}
 
-    """)
+    """))
     
 def test_title():
-    print _format_gift.question.parseString("""
+    print(_format_gift.question.parseString("""
     ::Capital of France
     ::What is the capital of France? {
         =Paris
         ~London
+
         ~Guam
         ~Tomato
     }
 
-    """)
+    """))
+
+def test_explain():
+    print(_format_gift.question.parseString("""
+    What is an integer?{
+        ~A whole number     # Whole numbers are only positive
+        ~The natural numbers plus their negations   # Mostly true, may or may not include zero.
+        ~A type of fish     # Not even close...
+        =The set of real numbers that can be expressed as unit fractions. # There we go, ...ish.
+    }"""))
+
+def test_matching():
+    print(_format_gift.question.parseString("""
+    Match the countries with their capitals. {
+        = Italy -> Rome
+        = USA -> Washington D.C.
+        = France -> Paris
+        = U.K. -> London
+    }"""))
+
+
+def test_questions():
+    print(_format_gift.questions.parseString("""
+Matching Question. {
+    =subquestion1 -> subanswer1
+    =subquestion2 -> subanswer2
+    =subquestion3 -> subanswer3
+    }
+    
+Match the following countries with their corresponding capitals. {
+    =Canada -> Ottawa
+    =Italy  -> Rome
+    =Japan  -> Tokyo
+    =India  -> New Delhi
+    }
+"""))
+
 
 if __name__=='__main__':
     test_text()
     test_simple_tf()
     test_simple_multi()
     test_title()
+    test_explain()
+    test_matching()
+    test_questions()
+    test_example()
author	Greg S <enimihil@gmail.com>	2009-04-01 21:05:25 (GMT)
committer	Greg S <enimihil@gmail.com>	2009-04-01 21:05:25 (GMT)
commit	b4ee4bdc3719f1c3f57cb00795291d2652ee4ad8 (patch)
tree	4cca1927d4e83b347f12743ce03661b9c01728e8
parent	ec60486cafe24e828f52a84de2d6e04b39d97c6a (diff)