From f4475e3e0fc36542e70c16888e7d20a40e3680b1 Mon Sep 17 00:00:00 2001 From: Walter Bender Date: Fri, 19 Nov 2010 14:16:38 +0000 Subject: adding RTF support for show_description --- (limited to 'TurtleArt/RtfParser.py') diff --git a/TurtleArt/RtfParser.py b/TurtleArt/RtfParser.py new file mode 100644 index 0000000..4b1f1dc --- /dev/null +++ b/TurtleArt/RtfParser.py @@ -0,0 +1,156 @@ +#Copyright (c) 2010, Loic Fejoz +#Copyright (c) 2010, Walter Bender + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place - Suite 330, +# Boston, MA 02111-1307, USA. + + +import sys + +class RtfException(Exception): + pass + +plaintext = 1 +control = 2 +argument = 3 +backslash = 4 +escapedChar = 5 + +class RtfParser(object): + + def __init__(self, unicode=False): + self.state = plaintext + self.arg = '' + self.token = '' + self.unicode = unicode + self.par = False + self.output = '' + + def getChar(self, code): + """ called when an escaped char is found """ + return chr(code) + + def getNonBreakingSpace(self): + return ' ' + + def pushState(self): + pass + + def popState(self): + pass + + def putChar(self): + pass + + def doControl(self,token,arg): + pass + + def feed(self,txt): + for c in txt: + self.feedChar(c) + + def feedChar(self,char): + if self.state == plaintext: # this is just normal user content + if char == '\\': + self.state = backslash + elif char == '{': + self.pushState() + elif char == '}': + self.popState() + else: + self.putChar(char) + elif self.state == backslash: # a command or escape + if char == '\\' or char == '{' or char == '}': + self.putChar(char) + self.state = plaintext + else: + if char.isalpha() or char in ('*', '-', '|'): + self.state = control + self.token = char + elif char == "'": + self.state = escapedChar + self.escapedChar = '' + elif char in ['\\', '{','}']: + self.putChar(char) + self.state = plaintext + elif char == "~": # non breaking space + self.putChar(self.getNonBreakingSpace()) + self.state = plaintext + else: + raise RtfException,'unexpected %s after \\' % char + elif self.state == escapedChar: + self.escapedChar = self.escapedChar + char + if len(self.escapedChar) == 2: + char = self.getChar(int(self.escapedChar,16)) + self.putChar(char) + self.state = plaintext + elif self.state == control: # collecting the command token + if char.isalpha(): + self.token = self.token + char + elif char.isdigit() or char== '-': + self.state = argument + self.arg = char + else: + self.doControl(self.token,self.arg) + self.state = plaintext + if char == '\\': + self.state = backslash + elif char == '{': + self.pushState() + elif char == '}': + self.popState() + else: + if not char.isspace(): + self.putChar(char) + elif self.state == argument: # collecting the optional command argument + if char.isdigit(): + self.arg = self.arg + char + else: + self.state = plaintext + self.doControl(self.token,self.arg) + if char == '\\': + self.state = backslash + elif char == '{': + self.pushState() + elif char == '}': + self.popState() + else: + if not char.isspace(): + self.putChar(char) + + +class RtfTextOnly(RtfParser): + def __init__(self): + RtfParser.__init__(self) + self.level = 0 + + def pushState(self): + self.level = self.level + 1 + + def popState(self): + self.level = self.level - 1 + + def putChar(self,ch): + if self.par: + self.output += ch + + def doControl(self,token,arg): + if token[0:3] == 'par': + self.par = True + pass + +if __name__ == '__main__': + text_only = RtfTextOnly() + if len(sys.argv) != 2: + print 'Usage : %s file.rtf' % sys.argv[0] + else: + for line in open(sys.argv[1], 'r'): + text_only.feed(line) + print text_only.output -- cgit v0.9.1