diff options
Diffstat (limited to 'pyLogoCompiler')
-rw-r--r-- | pyLogoCompiler/Communication.py | 649 | ||||
-rw-r--r-- | pyLogoCompiler/Exceptions.py | 35 | ||||
-rw-r--r-- | pyLogoCompiler/__init__.py | 0 | ||||
-rw-r--r-- | pyLogoCompiler/__init__.pyc | bin | 0 -> 177 bytes | |||
-rw-r--r-- | pyLogoCompiler/ply/__init__.py | 4 | ||||
-rw-r--r-- | pyLogoCompiler/ply/__init__.pyc | bin | 0 -> 222 bytes | |||
-rw-r--r-- | pyLogoCompiler/ply/cpp.py | 898 | ||||
-rw-r--r-- | pyLogoCompiler/ply/ctokens.py | 133 | ||||
-rw-r--r-- | pyLogoCompiler/ply/lex.py | 1057 | ||||
-rw-r--r-- | pyLogoCompiler/ply/lex.pyc | bin | 0 -> 29222 bytes | |||
-rw-r--r-- | pyLogoCompiler/ply/yacc.py | 3279 | ||||
-rw-r--r-- | pyLogoCompiler/ply/yacc.pyc | bin | 0 -> 68704 bytes | |||
-rw-r--r-- | pyLogoCompiler/pyLex.py | 260 | ||||
-rw-r--r-- | pyLogoCompiler/pyLex.pyc | bin | 0 -> 7880 bytes | |||
-rw-r--r-- | pyLogoCompiler/pyYacc.py | 1086 | ||||
-rw-r--r-- | pyLogoCompiler/pyYacc.pyc | bin | 0 -> 28393 bytes |
16 files changed, 7401 insertions, 0 deletions
diff --git a/pyLogoCompiler/Communication.py b/pyLogoCompiler/Communication.py new file mode 100644 index 0000000..469b9ec --- /dev/null +++ b/pyLogoCompiler/Communication.py @@ -0,0 +1,649 @@ +# -*- coding: utf-8 -*- +#----------------------------BRGOGO---------------------------------------- +#Project site: http://br-gogo.sourceforge.net +# +# Name: pyLogo +# Origiginal from: Marcelo Barbosa +# About: Esta é uma versão da linguagem Logo em python para a placa Gogo board +# (This is a version of Logo in python for the card board Gogo) +# +# Modified by: Felipe Augusto Silva +# email: suportegogo@gmail.com +# compiler version: 0.23 +#obs.: version to work with pyGogoMonitor +# +# Modified by: Derek O'Connell +# email: doconnel@gmail.com +# +#----------------------------------------------------------------------------- + +import sys +#from time import sleep +import os +import serial +import glob + +from gettext import gettext as _ + +from pyLogoCompiler import Exceptions +from pyLogoCompiler import pyYacc + + + +# ============================================================ + +MODULE_DEBUG = False + +serialPort = None + + +# ============================================================ + +def printFunctionName(caller=""): + if caller <> "": + print ">>>", caller + print ">>> " + sys._getframe(1).f_code.co_name + "()" + +def callerName(): + return sys._getframe(2).f_code.co_name + "()" + + + +#================================================================================ +#================================================================================ +class SerialIF: + + # The coms port is set up when ComMgr is instantiated + def __init__(self, p=None, debug=False): + global serialPort + + serialPort = serial.Serial(p, 9600, bytesize = serial.EIGHTBITS, \ + parity=serial.PARITY_NONE, stopbits=serial.STOPBITS_ONE, \ + timeout=0.1, xonxoff=0, rtscts=0 ) + self.debug = debug + + def rxBytes(self, size=0): + s = serialPort.read(size) + r = [0] * len(s) + for i in range(len(s)): + r[i] = ord(s[i]) + return tuple(r) + + def rxOneByte(self): + x = self.rxBytes(1) + if len(x)>0: + return x[0] + return False + + def rx16BitValueLSBFirst(self): + lowByte = serialPortIF.rxOneByte() & 0xff + highByte = serialPortIF.rxOneByte() #& 0x3 + return (highByte << 8) + lowByte + + def txByteRxEcho(self, byte): + serialPort.write(chr(byte)) + self.rxOneByte() # Rx hw echo byte + # Should check same byte returned here + return True + + + def txBytesNoEcho(self, bytes): + s="" + for i in bytes: + s=s+chr(i) + serialPort.write(s) + + + def txWordMSBFirst(self, word): + self.txByteRxEcho(word / 256) + self.txByteRxEcho(word % 256) + return True + + + def flush(self): + self.rxBytes(10) # Flush does not work properly + serialPort.flushInput() + serialPort.flushOutput() + + + def openPort(self): + try: + print "opening port..." + serialPort.open() + print "port opened" + return True + except serial.serialutil.SerialException: + print "serial.serialutil.SerialException" + return False + + def closePort(self): + if serialPort.isOpen(): + serialPort.read(100) # empty buffer serial + serialPort.close() + return True + + def checkConnection(self): + printFunctionName() + if not serialPort.isOpen(): + return False + self.flush() + serialPort.write(chr(135)) + i = serialPort.read(1); + if i == -1: + print "< Cannot find serial interface" + return False + serialPort.write(chr(0)) + j = serialPort.read(1); + k = serialPort.read(1) + if j == '' or k == '': + return False + if ord(i) == 135 and ord(j) == 0 and ord(k) == 55: + self.flush() + return True + return False + + def getPort(self): + return serialPort.port + + def scanUnix(self): + """scan for available ports (serial and USB). return a list""" + return glob.glob('/dev/ttyUSB*') + glob.glob('/dev/ttyACM*') + glob.glob('/dev/ttyS*') + + def scanNT(self): + """scan for available ports. return a list of tuples (num, name)""" + available = [] + for i in range(256): + try: + s = serial.Serial(i) + available.append(i) + s.close() + except serial.SerialException: + pass + return available + + + def autoConnect(self): + printFunctionName() + if os.name == 'nt': + available = self.scanNT() + else: + available = self.scanUnix() + available.remove('/dev/ttyS0') # Internal to XO + print "< Ports found:", available + for i in available: + print "< Trying port: ",i + serialPort.port = i + if self.openPort(): + if self.checkConnection(): + print "< Connected on port:", i + return True + else: + print "< Failed to connect to: ", i + + return False + + def isUSBVersion(self): + return str(self.getPort()).find("ACM") != -1 + + + +#================================================================================ +#================================================================================ +class CricketIF(): + + # Memory map: + ADDR_CODE_START = 0 # Code memory start (1280 bytes) + ADDR_RECDATA_START = 0x0500 # Recorded data start (2500 bytes) + ADDR_RECDATA_END = 0x0EC3 # Recorded data end + ADDR_CODE_END = 0x0FEF # Code memory end (4080 bytes. Note overlap!) + # Button Code Vector Addresses: Address of pointer to code to run when button pressed + ADDR_BUTTON1_VTR = 0x0FF0 # Address of pointer to code to run for button 1 + #ADDR_BUTTON2_VTR = 0x0FF2 # Address of pointer to code to run for button 2 + #ADDR_CRICKET_NAME = 0x0FF4 # 11 bytes (to 0xFFE) + #ADDR_AUTOSTART_FLAG = 0x0FFF # 1 byte + + # WIP: Preparation for preserving area of memory used for data recording + # (- check exactly what compiler spits out, ie, range of function addresses) + # (- using own variables for now...) + ADDR_CODE1_START = ADDR_CODE_START + ADDR_CODE1_SIZE = ADDR_RECDATA_START - ADDR_CODE1_START + ADDR_DATA_START = ADDR_RECDATA_START + ADDR_DATA_SIZE = ADDR_RECDATA_END - ADDR_RECDATA_START + ADDR_CODE2_START = ADDR_RECDATA_END + ADDR_CODE2_SIZE = ADDR_CODE_END - ADDR_RECDATA_END + + + CMD_SET_PTR_HI_BYTE = 0x80 # 128 + CMD_SET_PTR_LOW_BYTE = 0x81 # 129 + + CMD_READ_BYTES_COUNT_HI = 0x82 # 130 + CMD_READ_BYTES_COUNT_LOW = 0x83 # 131 + + CMD_WRITE_BYTES_COUNT_HI = 0x84 # 132 + CMD_WRITE_BYTES_COUNT_LOW = 0x85 # 133 + CMD_WRITE_BYTES_SENDING = 0x86 # 134 + + + CMD_RUN = 0x86 # 134 + CMD_CHECK = 0x87 # 135 + CMD_NAME = 0x87 # 135 + + RES_FOUND = 0x37 # 55, Response to CMD_CHECK if Cricket found + + + def __init__(self, debug=False): + pass + + #def sendMemoryPointer(self, ptr): + # self.cmdReadByteCount.(ptr) + + def cmdReadByteCount(self, byteCount): + serialPortIF.txByteRxEcho(CricketIF.CMD_READ_BYTES_COUNT_LOW) + serialPortIF.txWordMSBFirst(byteCount) + return True + + def cmdWriteByteCount(self, byteCount): + serialPortIF.txByteRxEcho(CricketIF.CMD_WRITE_BYTES_COUNT_LOW) + serialPortIF.txWordMSBFirst(byteCount) + return True + + def cmdRun(self): + #serialPort.write(chr(RUN)) + serialPortIF.txByteRxEcho(CricketIF.CMD_RUN) + return True + + def cmdCheckAllCrickets(self): + serialPortIF.txByteRxEcho(CricketIF.CMD_CHECK) + serialPortIF.txByteRxEcho(0) + return serialPortIF.rxOneByte() == CricketIF.RES_FOUND + + +#==================================== + + def writeBytesToCricketMemory(self, ptr, bytes): + printFunctionName() + print "< Bytes: ", bytes + try: + # Shouldn't this be "(ptr + len(bytes) < ADDR_CODE_END" ??? + if len(bytes) < CricketIF.ADDR_CODE_END: + self.cmdReadByteCount(ptr) # To set memory ptr? + self.cmdWriteByteCount(len(bytes)) + #serialPortIF.writeBytes(bytes) + for i in range(len(bytes)): + serialPortIF.txByteRxEcho(bytes[i] % 256) + serialPortIF.rxOneByte() # Extra echo byte! + except: + print "< Problem sending data" + raise Exceptions.CommunicationProblem + + + def cmdSetButton1Pointer(self, vector): + self.writeBytesToCricketMemory(CricketIF.ADDR_BUTTON1_VTR, [vector/256, vector%256]) + +#======================================== + + def txCompiledCodeEnd(self): + start = [128,0,0] # 0xF00000 ??? + end = [0] + + overhead = len(start) + len(end) + + if self.byteCount + overhead < CricketIF.ADDR_CODE_END: + self.writeBytesToCricketMemory(self.byteCount, start) + self.cmdSetButton1Pointer(self.byteCount) + self.byteCount = self.byteCount + overhead + else: + self.byteCount = self.byteCount + overhead + raise serial.serialutil.SerialException + return overhead + + + def txCompiledCode(self, byteCode, byteCodeCount): + printFunctionName() + #try: + self.byteCode = byteCode + self.byteCount = byteCodeCount + self.writeBytesToCricketMemory(CricketIF.ADDR_CODE_START, self.byteCode) + self.txCompiledCodeEnd() + #return "< Send success" + #except serial.serialutil.SerialException: + #return "< Problem sending, check communication" + + + def compile(self, logoCode, ErrMsgFunc=None): + printFunctionName() + self.byteCode = pyYacc.analisarCodigo(logoCode, ErrMsgFunc) + self.byteCodeCount = len(self.byteCode) + print "< code:", self.byteCode + + + def download(self): + printFunctionName() + if not serialPortIF.checkConnection(): + print "< Gogo disconnected" + #self.showInfo(_("Gogo disconnected")) + #return False + raise Exceptions.ConnectionProblem + + self.txCompiledCode(self.byteCode, self.byteCodeCount) + + + +#================================================================================ +#================================================================================ + +class GoGoIF: + + NUMBER_OF_SENSORS = 8 + + # COMMAND/RESPONSE HEADERS: + + HDR_SEND = 0x54,0xFE + HDR_ACK = 0X55,0xFF,0xAA + + + # SINGLE BYTE GOGO COMMANDS: + + CMD_PING = 0x00 # 00 Add Board ID + CMD_READ_SENSOR = 0x20 # 32 Add sensor number*4 + CMD_MOTOR_ON = 0x40, # 64, + CMD_MOTOR_OFF = 0x44, # 68, + CMD_MOTOR_REVERSE = 0x48, # 72, + CMD_MOTOR_THISWAY = 0x4C, # 78, !!! + CMD_MOTOR_THATWAY = 0x50, # 80, + CMD_MOTOR_COAST = 0x54, # 84, + CMD_SET_MOTOR_POWER = 0x60 # 96 Add motor power * 4 + + + # DOUBLE BYTE GOGO COMMANDS: + + BURST_MODE_NORMAL = 0 + BURST_MODE_SLOW = 1 + + CMD_TALK_TO_MOTOR = 0x80, # 128, # Byte 2: motors + CMD_SET_BURST = 0xA0 # 160 # Add burst mode + CMD_LED_ON = 0xC0,0 # 192,0 + CMD_LED_OFF = 0xC1,0 # 193,0 + CMD_BEEP = 0xC4,0 # 196,0 + CMD_SET_PWM_DUTY = 0xC8, # 200, # Byte 2: duty cycle + CMD_UPLOAD_EEPROM = 0xCC, # 204, # >>>>>>>> + # Add upper 2 bits of the number of bytes to upload. Byte 2: lower 8 bits + + + # UPLOADING RECORDED DATA + + CMD_AUTO_UPLOAD = 0xCC, 0x00 + HDR_AUTO_UPLOAD_LEN = 0xEE, 0x11 + + + + + def __init__(self, debug=False): + pass + + def sendCmd(self, code, flush=True): + printFunctionName(callerName()) + + #if self.checkConnection(): + # print "Gogo conectada" + #else: + # print "Gogo desconectada!" + # self.showInfo("Gogo desconectada") + # return False + + serialPortIF.flush() + + command = GoGoIF.HDR_SEND + code + serialPortIF.txBytesNoEcho(command) + + cmd_response = serialPortIF.rxBytes(len(command)) + if cmd_response == '': + print "< No response to command" + return False + ack_response = serialPortIF.rxBytes(3) + + print cmd_response, command + print ack_response, GoGoIF.HDR_ACK + + if flush: + serialPortIF.flush() + + #print cmd_response,ack_response + if cmd_response == command and ack_response == GoGoIF.HDR_ACK: + print "< Command successfully sent" + return True + + print "< Error: cmd_response or ack_response" + return False + + + def readSensor(self, sensorNumber=0): + if sensorNumber >= GoGoIF.NUMBER_OF_SENSORS: + print "readSensor(): Sensor does not exist:",sensorNumber + return -1 + + command = GoGoIF.HDR_SEND + tuple([GoGoIF.CMD_READ_SENSOR + (sensorNumber << 2)]) + serialPortIF.txBytesNoEcho(command) + + cmd_response = serialPortIF.rxBytes(3) + ack = serialPortIF.rxBytes(2) + + if not (cmd_response == command and ack == tuple(GoGoIF.HDR_ACK[:2])): + print "readSensor(): Error reading sensor:",sensorNumber + #print command,"!=",cmd_response + #print ack,"!=",tuple(GoGoIF.HDR_ACK[:2]) + serialPortIF.flush() + if serialPortIF.checkConnection(): + return -1 + else: + raise Exceptions.ConnectionProblem + + highByte = serialPortIF.rxOneByte() & 0x3 + lowByte = serialPortIF.rxOneByte() & 0xff + value = (highByte << 8) + lowByte + return value + + + def motorOn(self): + return self.sendCmd(GoGoIF.CMD_MOTOR_ON) + def motorOff(self): + return self.sendCmd(GoGoIF.CMD_MOTOR_OFF) + + #TODO + def motorBreak(self): + self.motorCoast() + def motorReverse(self): + return self.sendCmd(GoGoIF.CMD_MOTOR_REVERSE) + def motorThisway(self): + return self.sendCmd(GoGoIF.CMD_MOTOR_THISWAY) + def motorThatway(self): + return self.sendCmd(GoGoIF.CMD_MOTOR_THATWAY) + def motorCoast(self): + return self.sendCmd(GoGoIF.CMD_MOTOR_COAST) + + + def talkToMotor(self, motors=""): + byte = 0 + if 'a' in motors.lower(): + byte = byte + 1 + if 'b' in motors.lower(): + byte = byte + (1 << 1) + if 'c' in motors.lower(): + byte = byte + (1 << 2) + if 'd' in motors.lower(): + byte = byte + (1 << 3) + return self.sendCmd(GoGoIF.CMD_TALK_TO_MOTOR + (byte,)) + + def setMotorPower(self, power=0): + if power < 8: + command = (tuple([GoGoIF.CMD_SET_MOTOR_POWER + (power << 2)])) + else: + command = (tuple([GoGoIF.CMD_SET_MOTOR_POWER + (7 << 2)])) + return self.sendCmd(command) + + #TODO + def setBurstMode(self, burstBits=0, burstMode=0): + if burstMode < 0 or burstMode > 1: + return False + + command = tuple([GoGoIF.CMD_SET_BURST + burstMode, burstBits]) + return self.sendCmd(command) + + def ledOn(self): + return self.sendCmd(GoGoIF.CMD_LED_ON) + + def ledOff(self): + return self.sendCmd(GoGoIF.CMD_LED_OFF) + + def beep(self): + return self.sendCmd(GoGoIF.CMD_BEEP) + + #TODO + def setPwmDuty(self, duty=0): + if duty >= 0 and duty < 255: + return self.sendCmd(GoGoIF.CMD_SET_PWM_DUTY + (duty,)) + return False + + + def autoUpload(self, progress_cb=None): + printFunctionName() + if not serialPortIF.checkConnection(): + print "< Gogo disconnected" + #self.showInfo(_("Gogo disconnected")) + #return False + raise Exceptions.ConnectionProblem + + recData = [] + + if not self.sendCmd(GoGoIF.CMD_AUTO_UPLOAD, flush=False): + print "autoUpload: 1" + return recData + + # 3. The Gogo will send four bytes. 0xEE, 0x11, uploadLen (low byte), + # uploadLen (high byte). The first two bytes are just headers. The + # latter two are in bytes. + + # Rx upload-length header bytes (2) + hdr = serialPortIF.rxBytes(2) + print "hdr",hdr + dataBytes = serialPortIF.rx16BitValueLSBFirst() + dataWords = dataBytes / 2 + if dataWords > 0: + for i in range(dataWords): + recData.append(serialPortIF.rx16BitValueLSBFirst()) + if progress_cb: + progress_cb(i, dataWords) + if progress_cb: + progress_cb(dataWords, dataWords) + return recData + + + # Here's an example session: + # + # Computer->GoGo : 0x54, 0xFE, 0xCC, 0x00 // command + # GoGo->Computer : 0x54, 0xFE, 0xCC, 0x00 // echo + # GoGo->Computer : 0x55, 0xFF, 0xAA // ack + # GoGo->Computer : 0xEE, 0x11, 0x06, 0x00 // upload header + # GoGo->Computer : 0x00, 0x00, 0x01, 0x00, 0x02, 0x00 // upload data + # + # In this example the gogoboard uploads 3 16-bit values: 0, 1, 2 respectively. + # + # Note. You may find that the gogo seems to be excessively sending stuff + # back to the computer with all the echos and acks. This was done to + # provide compatibility with the Cricket download protocol. In practice, + # once you send the command bytes to the gogo you can just wait for the + # upload headers (0xEE, 0x11) while ignoring the others. + + + +#================================================================================ +#================================================================================ + +class GoGoComms: + + def __init__(self, portNum=None, debug=False): + global serialPortIF + + serialPortIF = SerialIF(portNum, debug) + + self.cricketIF = CricketIF(debug) + self.gogoIF = GoGoIF(debug) + + # Public Serial Interface: + + def autoConnect(self): + return serialPortIF.autoConnect() + + def getPort(self): + return serialPortIF.getPort() + + def closePort(self): + return serialPortIF.closePort() + + def isUSBVersion(self): + return serialPortIF.isUSBVersion() + + # Public Cricket Interface: + + def compile(self, text, ErrMsgFunc=None): + return self.cricketIF.compile(text, ErrMsgFunc) + + def download(self): + return self.cricketIF.download() + + + def autoUpload(self, progress_cb=None): + return self.gogoIF.autoUpload(progress_cb) + + + # Public GoGo Interface: + + def beep(self): + return self.gogoIF.beep() + + def ledOn(self): + return self.gogoIF.ledOn() + + def ledOff(self): + return self.gogoIF.ledOff() + + def talkToMotor(self, m): + return self.gogoIF.talkToMotor(m) + + def motorOn(self): + return self.gogoIF.motorOn() + + def motorOff(self): + return self.gogoIF.motorOff() + + def motorBreak(self): + return self.gogoIF.motorBreak() + + def motorCoast(self): + return self.gogoIF.motorCoast() + + def setMotorPower(self, power): + return self.gogoIF.setMotorPower(power) + + def motorThisway(self): + return self.gogoIF.motorThisway() + + def motorThatway(self): + return self.gogoIF.motorThatway() + + def motorReverse(self): + return self.gogoIF.motorReverse() + + def setPwmDuty(self, duty): + return self.gogoIF.setPwmDuty(duty) + + def readSensor(self, sensorNumber): + return self.gogoIF.readSensor(sensorNumber) + + +#========================================================================================================# + + +if __name__=='__main__': + GoGoComms(0, False) + diff --git a/pyLogoCompiler/Exceptions.py b/pyLogoCompiler/Exceptions.py new file mode 100644 index 0000000..79b4b63 --- /dev/null +++ b/pyLogoCompiler/Exceptions.py @@ -0,0 +1,35 @@ +# -*- coding: utf-8 -*- +#----------------------------BRGOGO---------------------------------------- +#Project site: http://br-gogo.sourceforge.net +# +# Name: pyLogo +# Origiginal from: Marcelo Barbosa +# About: Esta é uma versão da linguagem Logo em python para a placa Gogo board +# +# Modified by: Felipe Augusto Silva +# email: suportegogo@gmail.com +# compiler version: 0.23 +#----------------------------------------------------------------------------- + +class UnknowSymbol(Exception): + pass +class BracketError(Exception): + pass +class BlockTooLong(Exception): + pass +class CodeTooLong(Exception): + pass +class TooManyGlobals(Exception): + pass +class DuplicatedSymbol(Exception): + pass +class ParentesisError(Exception): + pass + + + +class CommunicationProblem(Exception): + pass + +class ConnectionProblem(Exception): + pass diff --git a/pyLogoCompiler/__init__.py b/pyLogoCompiler/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/pyLogoCompiler/__init__.py diff --git a/pyLogoCompiler/__init__.pyc b/pyLogoCompiler/__init__.pyc Binary files differnew file mode 100644 index 0000000..386b46d --- /dev/null +++ b/pyLogoCompiler/__init__.pyc diff --git a/pyLogoCompiler/ply/__init__.py b/pyLogoCompiler/ply/__init__.py new file mode 100644 index 0000000..853a985 --- /dev/null +++ b/pyLogoCompiler/ply/__init__.py @@ -0,0 +1,4 @@ +# PLY package +# Author: David Beazley (dave@dabeaz.com) + +__all__ = ['lex','yacc'] diff --git a/pyLogoCompiler/ply/__init__.pyc b/pyLogoCompiler/ply/__init__.pyc Binary files differnew file mode 100644 index 0000000..f1796e3 --- /dev/null +++ b/pyLogoCompiler/ply/__init__.pyc diff --git a/pyLogoCompiler/ply/cpp.py b/pyLogoCompiler/ply/cpp.py new file mode 100644 index 0000000..51cbe65 --- /dev/null +++ b/pyLogoCompiler/ply/cpp.py @@ -0,0 +1,898 @@ +# ----------------------------------------------------------------------------- +# cpp.py +# +# Author: David Beazley (http://www.dabeaz.com) +# Copyright (C) 2007 +# All rights reserved +# +# This module implements an ANSI-C style lexical preprocessor for PLY. +# ----------------------------------------------------------------------------- +from __future__ import generators + +# ----------------------------------------------------------------------------- +# Default preprocessor lexer definitions. These tokens are enough to get +# a basic preprocessor working. Other modules may import these if they want +# ----------------------------------------------------------------------------- + +tokens = ( + 'CPP_ID','CPP_INTEGER', 'CPP_FLOAT', 'CPP_STRING', 'CPP_CHAR', 'CPP_WS', 'CPP_COMMENT', 'CPP_POUND','CPP_DPOUND' +) + +literals = "+-*/%|&~^<>=!?()[]{}.,;:\\\'\"" + +# Whitespace +def t_CPP_WS(t): + r'\s+' + t.lexer.lineno += t.value.count("\n") + return t + +t_CPP_POUND = r'\#' +t_CPP_DPOUND = r'\#\#' + +# Identifier +t_CPP_ID = r'[A-Za-z_][\w_]*' + +# Integer literal +def CPP_INTEGER(t): + r'(((((0x)|(0X))[0-9a-fA-F]+)|(\d+))([uU]|[lL]|[uU][lL]|[lL][uU])?)' + return t + +t_CPP_INTEGER = CPP_INTEGER + +# Floating literal +t_CPP_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?' + +# String literal +def t_CPP_STRING(t): + r'\"([^\\\n]|(\\(.|\n)))*?\"' + t.lexer.lineno += t.value.count("\n") + return t + +# Character constant 'c' or L'c' +def t_CPP_CHAR(t): + r'(L)?\'([^\\\n]|(\\(.|\n)))*?\'' + t.lexer.lineno += t.value.count("\n") + return t + +# Comment +def t_CPP_COMMENT(t): + r'(/\*(.|\n)*?\*/)|(//.*?\n)' + t.lexer.lineno += t.value.count("\n") + return t + +def t_error(t): + t.type = t.value[0] + t.value = t.value[0] + t.lexer.skip(1) + return t + +import re +import copy +import time +import os.path + +# ----------------------------------------------------------------------------- +# trigraph() +# +# Given an input string, this function replaces all trigraph sequences. +# The following mapping is used: +# +# ??= # +# ??/ \ +# ??' ^ +# ??( [ +# ??) ] +# ??! | +# ??< { +# ??> } +# ??- ~ +# ----------------------------------------------------------------------------- + +_trigraph_pat = re.compile(r'''\?\?[=/\'\(\)\!<>\-]''') +_trigraph_rep = { + '=':'#', + '/':'\\', + "'":'^', + '(':'[', + ')':']', + '!':'|', + '<':'{', + '>':'}', + '-':'~' +} + +def trigraph(input): + return _trigraph_pat.sub(lambda g: _trigraph_rep[g.group()[-1]],input) + +# ------------------------------------------------------------------ +# Macro object +# +# This object holds information about preprocessor macros +# +# .name - Macro name (string) +# .value - Macro value (a list of tokens) +# .arglist - List of argument names +# .variadic - Boolean indicating whether or not variadic macro +# .vararg - Name of the variadic parameter +# +# When a macro is created, the macro replacement token sequence is +# pre-scanned and used to create patch lists that are later used +# during macro expansion +# ------------------------------------------------------------------ + +class Macro(object): + def __init__(self,name,value,arglist=None,variadic=False): + self.name = name + self.value = value + self.arglist = arglist + self.variadic = variadic + if variadic: + self.vararg = arglist[-1] + self.source = None + +# ------------------------------------------------------------------ +# Preprocessor object +# +# Object representing a preprocessor. Contains macro definitions, +# include directories, and other information +# ------------------------------------------------------------------ + +class Preprocessor(object): + def __init__(self,lexer=None): + if lexer is None: + lexer = lex.lexer + self.lexer = lexer + self.macros = { } + self.path = [] + self.temp_path = [] + + # Probe the lexer for selected tokens + self.lexprobe() + + tm = time.localtime() + self.define("__DATE__ \"%s\"" % time.strftime("%b %d %Y",tm)) + self.define("__TIME__ \"%s\"" % time.strftime("%H:%M:%S",tm)) + self.parser = None + + # ----------------------------------------------------------------------------- + # tokenize() + # + # Utility function. Given a string of text, tokenize into a list of tokens + # ----------------------------------------------------------------------------- + + def tokenize(self,text): + tokens = [] + self.lexer.input(text) + while True: + tok = self.lexer.token() + if not tok: break + tokens.append(tok) + return tokens + + # --------------------------------------------------------------------- + # error() + # + # Report a preprocessor error/warning of some kind + # ---------------------------------------------------------------------- + + def error(self,file,line,msg): + print >>sys.stderr,"%s:%d %s" % (file,line,msg) + + # ---------------------------------------------------------------------- + # lexprobe() + # + # This method probes the preprocessor lexer object to discover + # the token types of symbols that are important to the preprocessor. + # If this works right, the preprocessor will simply "work" + # with any suitable lexer regardless of how tokens have been named. + # ---------------------------------------------------------------------- + + def lexprobe(self): + + # Determine the token type for identifiers + self.lexer.input("identifier") + tok = self.lexer.token() + if not tok or tok.value != "identifier": + print _("Couldn't determine identifier type") + else: + self.t_ID = tok.type + + # Determine the token type for integers + self.lexer.input("12345") + tok = self.lexer.token() + if not tok or int(tok.value) != 12345: + print _("Couldn't determine integer type") + else: + self.t_INTEGER = tok.type + self.t_INTEGER_TYPE = type(tok.value) + + # Determine the token type for strings enclosed in double quotes + self.lexer.input("\"filename\"") + tok = self.lexer.token() + if not tok or tok.value != "\"filename\"": + print _("Couldn't determine string type") + else: + self.t_STRING = tok.type + + # Determine the token type for whitespace--if any + self.lexer.input(" ") + tok = self.lexer.token() + if not tok or tok.value != " ": + self.t_SPACE = None + else: + self.t_SPACE = tok.type + + # Determine the token type for newlines + self.lexer.input("\n") + tok = self.lexer.token() + if not tok or tok.value != "\n": + self.t_NEWLINE = None + print _("Couldn't determine token for newlines") + else: + self.t_NEWLINE = tok.type + + self.t_WS = (self.t_SPACE, self.t_NEWLINE) + + # Check for other characters used by the preprocessor + chars = [ '<','>','#','##','\\','(',')',',','.'] + for c in chars: + self.lexer.input(c) + tok = self.lexer.token() + if not tok or tok.value != c: + print _("Unable to lex '%s' required for preprocessor") % c + + # ---------------------------------------------------------------------- + # add_path() + # + # Adds a search path to the preprocessor. + # ---------------------------------------------------------------------- + + def add_path(self,path): + self.path.append(path) + + # ---------------------------------------------------------------------- + # group_lines() + # + # Given an input string, this function splits it into lines. Trailing whitespace + # is removed. Any line ending with \ is grouped with the next line. This + # function forms the lowest level of the preprocessor---grouping into text into + # a line-by-line format. + # ---------------------------------------------------------------------- + + def group_lines(self,input): + lex = self.lexer.clone() + lines = [x.rstrip() for x in input.splitlines()] + for i in xrange(len(lines)): + j = i+1 + while lines[i].endswith('\\') and (j < len(lines)): + lines[i] = lines[i][:-1]+lines[j] + lines[j] = "" + j += 1 + + input = "\n".join(lines) + lex.input(input) + lex.lineno = 1 + + current_line = [] + while True: + tok = lex.token() + if not tok: + break + current_line.append(tok) + if tok.type in self.t_WS and '\n' in tok.value: + yield current_line + current_line = [] + + if current_line: + yield current_line + + # ---------------------------------------------------------------------- + # tokenstrip() + # + # Remove leading/trailing whitespace tokens from a token list + # ---------------------------------------------------------------------- + + def tokenstrip(self,tokens): + i = 0 + while i < len(tokens) and tokens[i].type in self.t_WS: + i += 1 + del tokens[:i] + i = len(tokens)-1 + while i >= 0 and tokens[i].type in self.t_WS: + i -= 1 + del tokens[i+1:] + return tokens + + + # ---------------------------------------------------------------------- + # collect_args() + # + # Collects comma separated arguments from a list of tokens. The arguments + # must be enclosed in parenthesis. Returns a tuple (tokencount,args,positions) + # where tokencount is the number of tokens consumed, args is a list of arguments, + # and positions is a list of integers containing the starting index of each + # argument. Each argument is represented by a list of tokens. + # + # When collecting arguments, leading and trailing whitespace is removed + # from each argument. + # + # This function properly handles nested parenthesis and commas---these do not + # define new arguments. + # ---------------------------------------------------------------------- + + def collect_args(self,tokenlist): + args = [] + positions = [] + current_arg = [] + nesting = 1 + tokenlen = len(tokenlist) + + # Search for the opening '('. + i = 0 + while (i < tokenlen) and (tokenlist[i].type in self.t_WS): + i += 1 + + if (i < tokenlen) and (tokenlist[i].value == '('): + positions.append(i+1) + else: + self.error(self.source,tokenlist[0].lineno,"Missing '(' in macro arguments") + return 0, [], [] + + i += 1 + + while i < tokenlen: + t = tokenlist[i] + if t.value == '(': + current_arg.append(t) + nesting += 1 + elif t.value == ')': + nesting -= 1 + if nesting == 0: + if current_arg: + args.append(self.tokenstrip(current_arg)) + positions.append(i) + return i+1,args,positions + current_arg.append(t) + elif t.value == ',' and nesting == 1: + args.append(self.tokenstrip(current_arg)) + positions.append(i+1) + current_arg = [] + else: + current_arg.append(t) + i += 1 + + # Missing end argument + self.error(self.source,tokenlist[-1].lineno,"Missing ')' in macro arguments") + return 0, [],[] + + # ---------------------------------------------------------------------- + # macro_prescan() + # + # Examine the macro value (token sequence) and identify patch points + # This is used to speed up macro expansion later on---we'll know + # right away where to apply patches to the value to form the expansion + # ---------------------------------------------------------------------- + + def macro_prescan(self,macro): + macro.patch = [] # Standard macro arguments + macro.str_patch = [] # String conversion expansion + macro.var_comma_patch = [] # Variadic macro comma patch + i = 0 + while i < len(macro.value): + if macro.value[i].type == self.t_ID and macro.value[i].value in macro.arglist: + argnum = macro.arglist.index(macro.value[i].value) + # Conversion of argument to a string + if i > 0 and macro.value[i-1].value == '#': + macro.value[i] = copy.copy(macro.value[i]) + macro.value[i].type = self.t_STRING + del macro.value[i-1] + macro.str_patch.append((argnum,i-1)) + continue + # Concatenation + elif (i > 0 and macro.value[i-1].value == '##'): + macro.patch.append(('c',argnum,i-1)) + del macro.value[i-1] + continue + elif ((i+1) < len(macro.value) and macro.value[i+1].value == '##'): + macro.patch.append(('c',argnum,i)) + i += 1 + continue + # Standard expansion + else: + macro.patch.append(('e',argnum,i)) + elif macro.value[i].value == '##': + if macro.variadic and (i > 0) and (macro.value[i-1].value == ',') and \ + ((i+1) < len(macro.value)) and (macro.value[i+1].type == self.t_ID) and \ + (macro.value[i+1].value == macro.vararg): + macro.var_comma_patch.append(i-1) + i += 1 + macro.patch.sort(key=lambda x: x[2],reverse=True) + + # ---------------------------------------------------------------------- + # macro_expand_args() + # + # Given a Macro and list of arguments (each a token list), this method + # returns an expanded version of a macro. The return value is a token sequence + # representing the replacement macro tokens + # ---------------------------------------------------------------------- + + def macro_expand_args(self,macro,args): + # Make a copy of the macro token sequence + rep = [copy.copy(_x) for _x in macro.value] + + # Make string expansion patches. These do not alter the length of the replacement sequence + + str_expansion = {} + for argnum, i in macro.str_patch: + if argnum not in str_expansion: + str_expansion[argnum] = ('"%s"' % "".join([x.value for x in args[argnum]])).replace("\\","\\\\") + rep[i] = copy.copy(rep[i]) + rep[i].value = str_expansion[argnum] + + # Make the variadic macro comma patch. If the variadic macro argument is empty, we get rid + comma_patch = False + if macro.variadic and not args[-1]: + for i in macro.var_comma_patch: + rep[i] = None + comma_patch = True + + # Make all other patches. The order of these matters. It is assumed that the patch list + # has been sorted in reverse order of patch location since replacements will cause the + # size of the replacement sequence to expand from the patch point. + + expanded = { } + for ptype, argnum, i in macro.patch: + # Concatenation. Argument is left unexpanded + if ptype == 'c': + rep[i:i+1] = args[argnum] + # Normal expansion. Argument is macro expanded first + elif ptype == 'e': + if argnum not in expanded: + expanded[argnum] = self.expand_macros(args[argnum]) + rep[i:i+1] = expanded[argnum] + + # Get rid of removed comma if necessary + if comma_patch: + rep = [_i for _i in rep if _i] + + return rep + + + # ---------------------------------------------------------------------- + # expand_macros() + # + # Given a list of tokens, this function performs macro expansion. + # The expanded argument is a dictionary that contains macros already + # expanded. This is used to prevent infinite recursion. + # ---------------------------------------------------------------------- + + def expand_macros(self,tokens,expanded=None): + if expanded is None: + expanded = {} + i = 0 + while i < len(tokens): + t = tokens[i] + if t.type == self.t_ID: + if t.value in self.macros and t.value not in expanded: + # Yes, we found a macro match + expanded[t.value] = True + + m = self.macros[t.value] + if not m.arglist: + # A simple macro + ex = self.expand_macros([copy.copy(_x) for _x in m.value],expanded) + for e in ex: + e.lineno = t.lineno + tokens[i:i+1] = ex + i += len(ex) + else: + # A macro with arguments + j = i + 1 + while j < len(tokens) and tokens[j].type in self.t_WS: + j += 1 + if tokens[j].value == '(': + tokcount,args,positions = self.collect_args(tokens[j:]) + if not m.variadic and len(args) != len(m.arglist): + self.error(self.source,t.lineno,"Macro %s requires %d arguments" % (t.value,len(m.arglist))) + i = j + tokcount + elif m.variadic and len(args) < len(m.arglist)-1: + if len(m.arglist) > 2: + self.error(self.source,t.lineno,"Macro %s must have at least %d arguments" % (t.value, len(m.arglist)-1)) + else: + self.error(self.source,t.lineno,"Macro %s must have at least %d argument" % (t.value, len(m.arglist)-1)) + i = j + tokcount + else: + if m.variadic: + if len(args) == len(m.arglist)-1: + args.append([]) + else: + args[len(m.arglist)-1] = tokens[j+positions[len(m.arglist)-1]:j+tokcount-1] + del args[len(m.arglist):] + + # Get macro replacement text + rep = self.macro_expand_args(m,args) + rep = self.expand_macros(rep,expanded) + for r in rep: + r.lineno = t.lineno + tokens[i:j+tokcount] = rep + i += len(rep) + del expanded[t.value] + continue + elif t.value == '__LINE__': + t.type = self.t_INTEGER + t.value = self.t_INTEGER_TYPE(t.lineno) + + i += 1 + return tokens + + # ---------------------------------------------------------------------- + # evalexpr() + # + # Evaluate an expression token sequence for the purposes of evaluating + # integral expressions. + # ---------------------------------------------------------------------- + + def evalexpr(self,tokens): + # tokens = tokenize(line) + # Search for defined macros + i = 0 + while i < len(tokens): + if tokens[i].type == self.t_ID and tokens[i].value == 'defined': + j = i + 1 + needparen = False + result = "0L" + while j < len(tokens): + if tokens[j].type in self.t_WS: + j += 1 + continue + elif tokens[j].type == self.t_ID: + if tokens[j].value in self.macros: + result = "1L" + else: + result = "0L" + if not needparen: break + elif tokens[j].value == '(': + needparen = True + elif tokens[j].value == ')': + break + else: + self.error(self.source,tokens[i].lineno,"Malformed defined()") + j += 1 + tokens[i].type = self.t_INTEGER + tokens[i].value = self.t_INTEGER_TYPE(result) + del tokens[i+1:j+1] + i += 1 + tokens = self.expand_macros(tokens) + for i,t in enumerate(tokens): + if t.type == self.t_ID: + tokens[i] = copy.copy(t) + tokens[i].type = self.t_INTEGER + tokens[i].value = self.t_INTEGER_TYPE("0L") + elif t.type == self.t_INTEGER: + tokens[i] = copy.copy(t) + # Strip off any trailing suffixes + tokens[i].value = str(tokens[i].value) + while tokens[i].value[-1] not in "0123456789abcdefABCDEF": + tokens[i].value = tokens[i].value[:-1] + + expr = "".join([str(x.value) for x in tokens]) + expr = expr.replace("&&"," and ") + expr = expr.replace("||"," or ") + expr = expr.replace("!"," not ") + try: + result = eval(expr) + except StandardError: + self.error(self.source,tokens[0].lineno,"Couldn't evaluate expression") + result = 0 + return result + + # ---------------------------------------------------------------------- + # parsegen() + # + # Parse an input string/ + # ---------------------------------------------------------------------- + def parsegen(self,input,source=None): + + # Replace trigraph sequences + t = trigraph(input) + lines = self.group_lines(t) + + if not source: + source = "" + + self.define("__FILE__ \"%s\"" % source) + + self.source = source + chunk = [] + enable = True + iftrigger = False + ifstack = [] + + for x in lines: + for i,tok in enumerate(x): + if tok.type not in self.t_WS: break + if tok.value == '#': + # Preprocessor directive + + for tok in x: + if tok in self.t_WS and '\n' in tok.value: + chunk.append(tok) + + dirtokens = self.tokenstrip(x[i+1:]) + if dirtokens: + name = dirtokens[0].value + args = self.tokenstrip(dirtokens[1:]) + else: + name = "" + args = [] + + if name == 'define': + if enable: + for tok in self.expand_macros(chunk): + yield tok + chunk = [] + self.define(args) + elif name == 'include': + if enable: + for tok in self.expand_macros(chunk): + yield tok + chunk = [] + oldfile = self.macros['__FILE__'] + for tok in self.include(args): + yield tok + self.macros['__FILE__'] = oldfile + self.source = source + elif name == 'undef': + if enable: + for tok in self.expand_macros(chunk): + yield tok + chunk = [] + self.undef(args) + elif name == 'ifdef': + ifstack.append((enable,iftrigger)) + if enable: + if not args[0].value in self.macros: + enable = False + iftrigger = False + else: + iftrigger = True + elif name == 'ifndef': + ifstack.append((enable,iftrigger)) + if enable: + if args[0].value in self.macros: + enable = False + iftrigger = False + else: + iftrigger = True + elif name == 'if': + ifstack.append((enable,iftrigger)) + if enable: + result = self.evalexpr(args) + if not result: + enable = False + iftrigger = False + else: + iftrigger = True + elif name == 'elif': + if ifstack: + if ifstack[-1][0]: # We only pay attention if outer "if" allows this + if enable: # If already true, we flip enable False + enable = False + elif not iftrigger: # If False, but not triggered yet, we'll check expression + result = self.evalexpr(args) + if result: + enable = True + iftrigger = True + else: + self.error(self.source,dirtokens[0].lineno,"Misplaced #elif") + + elif name == 'else': + if ifstack: + if ifstack[-1][0]: + if enable: + enable = False + elif not iftrigger: + enable = True + iftrigger = True + else: + self.error(self.source,dirtokens[0].lineno,"Misplaced #else") + + elif name == 'endif': + if ifstack: + enable,iftrigger = ifstack.pop() + else: + self.error(self.source,dirtokens[0].lineno,"Misplaced #endif") + else: + # Unknown preprocessor directive + pass + + else: + # Normal text + if enable: + chunk.extend(x) + + for tok in self.expand_macros(chunk): + yield tok + chunk = [] + + # ---------------------------------------------------------------------- + # include() + # + # Implementation of file-inclusion + # ---------------------------------------------------------------------- + + def include(self,tokens): + # Try to extract the filename and then process an include file + if not tokens: + return + if tokens: + if tokens[0].value != '<' and tokens[0].type != self.t_STRING: + tokens = self.expand_macros(tokens) + + if tokens[0].value == '<': + # Include <...> + i = 1 + while i < len(tokens): + if tokens[i].value == '>': + break + i += 1 + else: + print _("Malformed #include <...>") + return + filename = "".join([x.value for x in tokens[1:i]]) + path = self.path + [""] + self.temp_path + elif tokens[0].type == self.t_STRING: + filename = tokens[0].value[1:-1] + path = self.temp_path + [""] + self.path + else: + print _("Malformed #include statement") + return + for p in path: + iname = os.path.join(p,filename) + try: + data = open(iname,"r").read() + dname = os.path.dirname(iname) + if dname: + self.temp_path.insert(0,dname) + for tok in self.parsegen(data,filename): + yield tok + if dname: + del self.temp_path[0] + break + except IOError,e: + pass + else: + print _("Couldn't find '%s'") % filename + + # ---------------------------------------------------------------------- + # define() + # + # Define a new macro + # ---------------------------------------------------------------------- + + def define(self,tokens): + if isinstance(tokens,(str,unicode)): + tokens = self.tokenize(tokens) + + linetok = tokens + try: + name = linetok[0] + if len(linetok) > 1: + mtype = linetok[1] + else: + mtype = None + if not mtype: + m = Macro(name.value,[]) + self.macros[name.value] = m + elif mtype.type in self.t_WS: + # A normal macro + m = Macro(name.value,self.tokenstrip(linetok[2:])) + self.macros[name.value] = m + elif mtype.value == '(': + # A macro with arguments + tokcount, args, positions = self.collect_args(linetok[1:]) + variadic = False + for a in args: + if variadic: + print _("No more arguments may follow a variadic argument") + break + astr = "".join([str(_i.value) for _i in a]) + if astr == "...": + variadic = True + a[0].type = self.t_ID + a[0].value = '__VA_ARGS__' + variadic = True + del a[1:] + continue + elif astr[-3:] == "..." and a[0].type == self.t_ID: + variadic = True + del a[1:] + # If, for some reason, "." is part of the identifier, strip off the name for the purposes + # of macro expansion + if a[0].value[-3:] == '...': + a[0].value = a[0].value[:-3] + continue + if len(a) > 1 or a[0].type != self.t_ID: + print _("Invalid macro argument") + break + else: + mvalue = self.tokenstrip(linetok[1+tokcount:]) + i = 0 + while i < len(mvalue): + if i+1 < len(mvalue): + if mvalue[i].type in self.t_WS and mvalue[i+1].value == '##': + del mvalue[i] + continue + elif mvalue[i].value == '##' and mvalue[i+1].type in self.t_WS: + del mvalue[i+1] + i += 1 + m = Macro(name.value,mvalue,[x[0].value for x in args],variadic) + self.macro_prescan(m) + self.macros[name.value] = m + else: + print _("Bad macro definition") + except LookupError: + print _("Bad macro definition") + + # ---------------------------------------------------------------------- + # undef() + # + # Undefine a macro + # ---------------------------------------------------------------------- + + def undef(self,tokens): + id = tokens[0].value + try: + del self.macros[id] + except LookupError: + pass + + # ---------------------------------------------------------------------- + # parse() + # + # Parse input text. + # ---------------------------------------------------------------------- + def parse(self,input,source=None,ignore={}): + self.ignore = ignore + self.parser = self.parsegen(input,source) + + # ---------------------------------------------------------------------- + # token() + # + # Method to return individual tokens + # ---------------------------------------------------------------------- + def token(self): + try: + while True: + tok = self.parser.next() + if tok.type not in self.ignore: return tok + except StopIteration: + self.parser = None + return None + +if __name__ == '__main__': + import ply.lex as lex + lexer = lex.lex() + + # Run a preprocessor + import sys + f = open(sys.argv[1]) + input = f.read() + + p = Preprocessor(lexer) + p.parse(input,sys.argv[1]) + while True: + tok = p.token() + if not tok: break + print p.source, tok + + + + + + + + + + + diff --git a/pyLogoCompiler/ply/ctokens.py b/pyLogoCompiler/ply/ctokens.py new file mode 100644 index 0000000..dd5f102 --- /dev/null +++ b/pyLogoCompiler/ply/ctokens.py @@ -0,0 +1,133 @@ +# ---------------------------------------------------------------------- +# ctokens.py +# +# Token specifications for symbols in ANSI C and C++. This file is +# meant to be used as a library in other tokenizers. +# ---------------------------------------------------------------------- + +# Reserved words + +tokens = [ + # Literals (identifier, integer constant, float constant, string constant, char const) + 'ID', 'TYPEID', 'ICONST', 'FCONST', 'SCONST', 'CCONST', + + # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=) + 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD', + 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT', + 'LOR', 'LAND', 'LNOT', + 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE', + + # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=) + 'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL', + 'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL', + + # Increment/decrement (++,--) + 'PLUSPLUS', 'MINUSMINUS', + + # Structure dereference (->) + 'ARROW', + + # Ternary operator (?) + 'TERNARY', + + # Delimeters ( ) [ ] { } , . ; : + 'LPAREN', 'RPAREN', + 'LBRACKET', 'RBRACKET', + 'LBRACE', 'RBRACE', + 'COMMA', 'PERIOD', 'SEMI', 'COLON', + + # Ellipsis (...) + 'ELLIPSIS', +] + +# Operators +t_PLUS = r'\+' +t_MINUS = r'-' +t_TIMES = r'\*' +t_DIVIDE = r'/' +t_MODULO = r'%' +t_OR = r'\|' +t_AND = r'&' +t_NOT = r'~' +t_XOR = r'\^' +t_LSHIFT = r'<<' +t_RSHIFT = r'>>' +t_LOR = r'\|\|' +t_LAND = r'&&' +t_LNOT = r'!' +t_LT = r'<' +t_GT = r'>' +t_LE = r'<=' +t_GE = r'>=' +t_EQ = r'==' +t_NE = r'!=' + +# Assignment operators + +t_EQUALS = r'=' +t_TIMESEQUAL = r'\*=' +t_DIVEQUAL = r'/=' +t_MODEQUAL = r'%=' +t_PLUSEQUAL = r'\+=' +t_MINUSEQUAL = r'-=' +t_LSHIFTEQUAL = r'<<=' +t_RSHIFTEQUAL = r'>>=' +t_ANDEQUAL = r'&=' +t_OREQUAL = r'\|=' +t_XOREQUAL = r'^=' + +# Increment/decrement +t_INCREMENT = r'\+\+' +t_DECREMENT = r'--' + +# -> +t_ARROW = r'->' + +# ? +t_TERNARY = r'\?' + +# Delimeters +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_LBRACKET = r'\[' +t_RBRACKET = r'\]' +t_LBRACE = r'\{' +t_RBRACE = r'\}' +t_COMMA = r',' +t_PERIOD = r'\.' +t_SEMI = r';' +t_COLON = r':' +t_ELLIPSIS = r'\.\.\.' + +# Identifiers +t_ID = r'[A-Za-z_][A-Za-z0-9_]*' + +# Integer literal +t_INTEGER = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?' + +# Floating literal +t_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?' + +# String literal +t_STRING = r'\"([^\\\n]|(\\.))*?\"' + +# Character constant 'c' or L'c' +t_CHARACTER = r'(L)?\'([^\\\n]|(\\.))*?\'' + +# Comment (C-Style) +def t_COMMENT(t): + r'/\*(.|\n)*?\*/' + t.lexer.lineno += t.value.count('\n') + return t + +# Comment (C++-Style) +def t_CPPCOMMENT(t): + r'//.*\n' + t.lexer.lineno += 1 + return t + + + + + + diff --git a/pyLogoCompiler/ply/lex.py b/pyLogoCompiler/ply/lex.py new file mode 100644 index 0000000..9a36490 --- /dev/null +++ b/pyLogoCompiler/ply/lex.py @@ -0,0 +1,1057 @@ +# ----------------------------------------------------------------------------- +# ply: lex.py +# +# Copyright (C) 2001-2009, +# David M. Beazley (Dabeaz LLC) +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# * Neither the name of the David Beazley or Dabeaz LLC may be used to +# endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ----------------------------------------------------------------------------- + +__version__ = "3.2" +__tabversion__ = "3.2" # Version of table file used + +import re, sys, types, copy, os + +# This tuple contains known string types +try: + # Python 2.6 + StringTypes = (types.StringType, types.UnicodeType) +except AttributeError: + # Python 3.0 + StringTypes = (str, bytes) + +# Extract the code attribute of a function. Different implementations +# are for Python 2/3 compatibility. + +if sys.version_info[0] < 3: + def func_code(f): + return f.func_code +else: + def func_code(f): + return f.__code__ + +# This regular expression is used to match valid token names +_is_identifier = re.compile(r'^[a-zA-Z0-9_]+$') + +# Exception thrown when invalid token encountered and no default error +# handler is defined. + +class LexError(Exception): + def __init__(self,message,s): + self.args = (message,) + self.text = s + +# Token class. This class is used to represent the tokens produced. +class LexToken(object): + def __str__(self): + return "LexToken(%s,%r,%d,%d)" % (self.type,self.value,self.lineno,self.lexpos) + def __repr__(self): + return str(self) + +# This object is a stand-in for a logging object created by the +# logging module. + +class PlyLogger(object): + def __init__(self,f): + self.f = f + def critical(self,msg,*args,**kwargs): + self.f.write((msg % args) + "\n") + + def warning(self,msg,*args,**kwargs): + self.f.write("WARNING: "+ (msg % args) + "\n") + + def error(self,msg,*args,**kwargs): + self.f.write("ERROR: " + (msg % args) + "\n") + + info = critical + debug = critical + +# Null logger is used when no output is generated. Does nothing. +class NullLogger(object): + def __getattribute__(self,name): + return self + def __call__(self,*args,**kwargs): + return self + +# ----------------------------------------------------------------------------- +# === Lexing Engine === +# +# The following Lexer class implements the lexer runtime. There are only +# a few public methods and attributes: +# +# input() - Store a new string in the lexer +# token() - Get the next token +# clone() - Clone the lexer +# +# lineno - Current line number +# lexpos - Current position in the input string +# ----------------------------------------------------------------------------- + +class Lexer: + def __init__(self): + self.lexre = None # Master regular expression. This is a list of + # tuples (re,findex) where re is a compiled + # regular expression and findex is a list + # mapping regex group numbers to rules + self.lexretext = None # Current regular expression strings + self.lexstatere = {} # Dictionary mapping lexer states to master regexs + self.lexstateretext = {} # Dictionary mapping lexer states to regex strings + self.lexstaterenames = {} # Dictionary mapping lexer states to symbol names + self.lexstate = "INITIAL" # Current lexer state + self.lexstatestack = [] # Stack of lexer states + self.lexstateinfo = None # State information + self.lexstateignore = {} # Dictionary of ignored characters for each state + self.lexstateerrorf = {} # Dictionary of error functions for each state + self.lexreflags = 0 # Optional re compile flags + self.lexdata = None # Actual input data (as a string) + self.lexpos = 0 # Current position in input text + self.lexlen = 0 # Length of the input text + self.lexerrorf = None # Error rule (if any) + self.lextokens = None # List of valid tokens + self.lexignore = "" # Ignored characters + self.lexliterals = "" # Literal characters that can be passed through + self.lexmodule = None # Module + self.lineno = 1 # Current line number + self.lexoptimize = 0 # Optimized mode + + def clone(self,object=None): + c = copy.copy(self) + + # If the object parameter has been supplied, it means we are attaching the + # lexer to a new object. In this case, we have to rebind all methods in + # the lexstatere and lexstateerrorf tables. + + if object: + newtab = { } + for key, ritem in self.lexstatere.items(): + newre = [] + for cre, findex in ritem: + newfindex = [] + for f in findex: + if not f or not f[0]: + newfindex.append(f) + continue + newfindex.append((getattr(object,f[0].__name__),f[1])) + newre.append((cre,newfindex)) + newtab[key] = newre + c.lexstatere = newtab + c.lexstateerrorf = { } + for key, ef in self.lexstateerrorf.items(): + c.lexstateerrorf[key] = getattr(object,ef.__name__) + c.lexmodule = object + return c + + # ------------------------------------------------------------ + # writetab() - Write lexer information to a table file + # ------------------------------------------------------------ + def writetab(self,tabfile,outputdir=""): + if isinstance(tabfile,types.ModuleType): + return + basetabfilename = tabfile.split(".")[-1] + filename = os.path.join(outputdir,basetabfilename)+".py" + tf = open(filename,"w") + tf.write("# %s.py. This file automatically created by PLY (version %s). Don't edit!\n" % (tabfile,__version__)) + tf.write("_tabversion = %s\n" % repr(__version__)) + tf.write("_lextokens = %s\n" % repr(self.lextokens)) + tf.write("_lexreflags = %s\n" % repr(self.lexreflags)) + tf.write("_lexliterals = %s\n" % repr(self.lexliterals)) + tf.write("_lexstateinfo = %s\n" % repr(self.lexstateinfo)) + + tabre = { } + # Collect all functions in the initial state + initial = self.lexstatere["INITIAL"] + initialfuncs = [] + for part in initial: + for f in part[1]: + if f and f[0]: + initialfuncs.append(f) + + for key, lre in self.lexstatere.items(): + titem = [] + for i in range(len(lre)): + titem.append((self.lexstateretext[key][i],_funcs_to_names(lre[i][1],self.lexstaterenames[key][i]))) + tabre[key] = titem + + tf.write("_lexstatere = %s\n" % repr(tabre)) + tf.write("_lexstateignore = %s\n" % repr(self.lexstateignore)) + + taberr = { } + for key, ef in self.lexstateerrorf.items(): + if ef: + taberr[key] = ef.__name__ + else: + taberr[key] = None + tf.write("_lexstateerrorf = %s\n" % repr(taberr)) + tf.close() + + # ------------------------------------------------------------ + # readtab() - Read lexer information from a tab file + # ------------------------------------------------------------ + def readtab(self,tabfile,fdict): + if isinstance(tabfile,types.ModuleType): + lextab = tabfile + else: + if sys.version_info[0] < 3: + exec("import %s as lextab" % tabfile) + else: + env = { } + exec("import %s as lextab" % tabfile, env,env) + lextab = env['lextab'] + + if getattr(lextab,"_tabversion","0.0") != __version__: + raise ImportError("Inconsistent PLY version") + + self.lextokens = lextab._lextokens + self.lexreflags = lextab._lexreflags + self.lexliterals = lextab._lexliterals + self.lexstateinfo = lextab._lexstateinfo + self.lexstateignore = lextab._lexstateignore + self.lexstatere = { } + self.lexstateretext = { } + for key,lre in lextab._lexstatere.items(): + titem = [] + txtitem = [] + for i in range(len(lre)): + titem.append((re.compile(lre[i][0],lextab._lexreflags),_names_to_funcs(lre[i][1],fdict))) + txtitem.append(lre[i][0]) + self.lexstatere[key] = titem + self.lexstateretext[key] = txtitem + self.lexstateerrorf = { } + for key,ef in lextab._lexstateerrorf.items(): + self.lexstateerrorf[key] = fdict[ef] + self.begin('INITIAL') + + # ------------------------------------------------------------ + # input() - Push a new string into the lexer + # ------------------------------------------------------------ + def input(self,s): + # Pull off the first character to see if s looks like a string + c = s[:1] + if not isinstance(c,StringTypes): + raise ValueError("Expected a string") + self.lexdata = s + self.lexpos = 0 + self.lexlen = len(s) + + # ------------------------------------------------------------ + # begin() - Changes the lexing state + # ------------------------------------------------------------ + def begin(self,state): + if not state in self.lexstatere: + raise ValueError("Undefined state") + self.lexre = self.lexstatere[state] + self.lexretext = self.lexstateretext[state] + self.lexignore = self.lexstateignore.get(state,"") + self.lexerrorf = self.lexstateerrorf.get(state,None) + self.lexstate = state + + # ------------------------------------------------------------ + # push_state() - Changes the lexing state and saves old on stack + # ------------------------------------------------------------ + def push_state(self,state): + self.lexstatestack.append(self.lexstate) + self.begin(state) + + # ------------------------------------------------------------ + # pop_state() - Restores the previous state + # ------------------------------------------------------------ + def pop_state(self): + self.begin(self.lexstatestack.pop()) + + # ------------------------------------------------------------ + # current_state() - Returns the current lexing state + # ------------------------------------------------------------ + def current_state(self): + return self.lexstate + + # ------------------------------------------------------------ + # skip() - Skip ahead n characters + # ------------------------------------------------------------ + def skip(self,n): + self.lexpos += n + + # ------------------------------------------------------------ + # opttoken() - Return the next token from the Lexer + # + # Note: This function has been carefully implemented to be as fast + # as possible. Don't make changes unless you really know what + # you are doing + # ------------------------------------------------------------ + def token(self): + # Make local copies of frequently referenced attributes + lexpos = self.lexpos + lexlen = self.lexlen + lexignore = self.lexignore + lexdata = self.lexdata + + while lexpos < lexlen: + # This code provides some short-circuit code for whitespace, tabs, and other ignored characters + if lexdata[lexpos] in lexignore: + lexpos += 1 + continue + + # Look for a regular expression match + for lexre,lexindexfunc in self.lexre: + m = lexre.match(lexdata,lexpos) + if not m: continue + + # Create a token for return + tok = LexToken() + tok.value = m.group() + tok.lineno = self.lineno + tok.lexpos = lexpos + + i = m.lastindex + func,tok.type = lexindexfunc[i] + + if not func: + # If no token type was set, it's an ignored token + if tok.type: + self.lexpos = m.end() + return tok + else: + lexpos = m.end() + break + + lexpos = m.end() + + # If token is processed by a function, call it + + tok.lexer = self # Set additional attributes useful in token rules + self.lexmatch = m + self.lexpos = lexpos + + newtok = func(tok) + + # Every function must return a token, if nothing, we just move to next token + if not newtok: + lexpos = self.lexpos # This is here in case user has updated lexpos. + lexignore = self.lexignore # This is here in case there was a state change + break + + # Verify type of the token. If not in the token map, raise an error + if not self.lexoptimize: + if not newtok.type in self.lextokens: + raise LexError("%s:%d: Rule '%s' returned an unknown token type '%s'" % ( + func_code(func).co_filename, func_code(func).co_firstlineno, + func.__name__, newtok.type),lexdata[lexpos:]) + + return newtok + else: + # No match, see if in literals + if lexdata[lexpos] in self.lexliterals: + tok = LexToken() + tok.value = lexdata[lexpos] + tok.lineno = self.lineno + tok.type = tok.value + tok.lexpos = lexpos + self.lexpos = lexpos + 1 + return tok + + # No match. Call t_error() if defined. + if self.lexerrorf: + tok = LexToken() + tok.value = self.lexdata[lexpos:] + tok.lineno = self.lineno + tok.type = "error" + tok.lexer = self + tok.lexpos = lexpos + self.lexpos = lexpos + newtok = self.lexerrorf(tok) + if lexpos == self.lexpos: + # Error method didn't change text position at all. This is an error. + raise LexError("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:]) + lexpos = self.lexpos + if not newtok: continue + return newtok + + self.lexpos = lexpos + raise LexError("Illegal character '%s' at index %d" % (lexdata[lexpos],lexpos), lexdata[lexpos:]) + + self.lexpos = lexpos + 1 + if self.lexdata is None: + raise RuntimeError("No input string given with input()") + return None + + # Iterator interface + def __iter__(self): + return self + + def next(self): + t = self.token() + if t is None: + raise StopIteration + return t + + __next__ = next + +# ----------------------------------------------------------------------------- +# ==== Lex Builder === +# +# The functions and classes below are used to collect lexing information +# and build a Lexer object from it. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# get_caller_module_dict() +# +# This function returns a dictionary containing all of the symbols defined within +# a caller further down the call stack. This is used to get the environment +# associated with the yacc() call if none was provided. +# ----------------------------------------------------------------------------- + +def get_caller_module_dict(levels): + try: + raise RuntimeError + except RuntimeError: + e,b,t = sys.exc_info() + f = t.tb_frame + while levels > 0: + f = f.f_back + levels -= 1 + ldict = f.f_globals.copy() + if f.f_globals != f.f_locals: + ldict.update(f.f_locals) + + return ldict + +# ----------------------------------------------------------------------------- +# _funcs_to_names() +# +# Given a list of regular expression functions, this converts it to a list +# suitable for output to a table file +# ----------------------------------------------------------------------------- + +def _funcs_to_names(funclist,namelist): + result = [] + for f,name in zip(funclist,namelist): + if f and f[0]: + result.append((name, f[1])) + else: + result.append(f) + return result + +# ----------------------------------------------------------------------------- +# _names_to_funcs() +# +# Given a list of regular expression function names, this converts it back to +# functions. +# ----------------------------------------------------------------------------- + +def _names_to_funcs(namelist,fdict): + result = [] + for n in namelist: + if n and n[0]: + result.append((fdict[n[0]],n[1])) + else: + result.append(n) + return result + +# ----------------------------------------------------------------------------- +# _form_master_re() +# +# This function takes a list of all of the regex components and attempts to +# form the master regular expression. Given limitations in the Python re +# module, it may be necessary to break the master regex into separate expressions. +# ----------------------------------------------------------------------------- + +def _form_master_re(relist,reflags,ldict,toknames): + if not relist: return [] + regex = "|".join(relist) + try: + lexre = re.compile(regex,re.VERBOSE | reflags) + + # Build the index to function map for the matching engine + lexindexfunc = [ None ] * (max(lexre.groupindex.values())+1) + lexindexnames = lexindexfunc[:] + + for f,i in lexre.groupindex.items(): + handle = ldict.get(f,None) + if type(handle) in (types.FunctionType, types.MethodType): + lexindexfunc[i] = (handle,toknames[f]) + lexindexnames[i] = f + elif handle is not None: + lexindexnames[i] = f + if f.find("ignore_") > 0: + lexindexfunc[i] = (None,None) + else: + lexindexfunc[i] = (None, toknames[f]) + + return [(lexre,lexindexfunc)],[regex],[lexindexnames] + except Exception: + m = int(len(relist)/2) + if m == 0: m = 1 + llist, lre, lnames = _form_master_re(relist[:m],reflags,ldict,toknames) + rlist, rre, rnames = _form_master_re(relist[m:],reflags,ldict,toknames) + return llist+rlist, lre+rre, lnames+rnames + +# ----------------------------------------------------------------------------- +# def _statetoken(s,names) +# +# Given a declaration name s of the form "t_" and a dictionary whose keys are +# state names, this function returns a tuple (states,tokenname) where states +# is a tuple of state names and tokenname is the name of the token. For example, +# calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM') +# ----------------------------------------------------------------------------- + +def _statetoken(s,names): + nonstate = 1 + parts = s.split("_") + for i in range(1,len(parts)): + if not parts[i] in names and parts[i] != 'ANY': break + if i > 1: + states = tuple(parts[1:i]) + else: + states = ('INITIAL',) + + if 'ANY' in states: + states = tuple(names) + + tokenname = "_".join(parts[i:]) + return (states,tokenname) + + +# ----------------------------------------------------------------------------- +# LexerReflect() +# +# This class represents information needed to build a lexer as extracted from a +# user's input file. +# ----------------------------------------------------------------------------- +class LexerReflect(object): + def __init__(self,ldict,log=None,reflags=0): + self.ldict = ldict + self.error_func = None + self.tokens = [] + self.reflags = reflags + self.stateinfo = { 'INITIAL' : 'inclusive'} + self.files = {} + self.error = 0 + + if log is None: + self.log = PlyLogger(sys.stderr) + else: + self.log = log + + # Get all of the basic information + def get_all(self): + self.get_tokens() + self.get_literals() + self.get_states() + self.get_rules() + + # Validate all of the information + def validate_all(self): + self.validate_tokens() + self.validate_literals() + self.validate_rules() + return self.error + + # Get the tokens map + def get_tokens(self): + tokens = self.ldict.get("tokens",None) + if not tokens: + self.log.error("No token list is defined") + self.error = 1 + return + + if not isinstance(tokens,(list, tuple)): + self.log.error("tokens must be a list or tuple") + self.error = 1 + return + + if not tokens: + self.log.error("tokens is empty") + self.error = 1 + return + + self.tokens = tokens + + # Validate the tokens + def validate_tokens(self): + terminals = {} + for n in self.tokens: + if not _is_identifier.match(n): + self.log.error("Bad token name '%s'",n) + self.error = 1 + if n in terminals: + self.log.warning("Token '%s' multiply defined", n) + terminals[n] = 1 + + # Get the literals specifier + def get_literals(self): + self.literals = self.ldict.get("literals","") + + # Validate literals + def validate_literals(self): + try: + for c in self.literals: + if not isinstance(c,StringTypes) or len(c) > 1: + self.log.error("Invalid literal %s. Must be a single character", repr(c)) + self.error = 1 + continue + + except TypeError: + self.log.error("Invalid literals specification. literals must be a sequence of characters") + self.error = 1 + + def get_states(self): + self.states = self.ldict.get("states",None) + # Build statemap + if self.states: + if not isinstance(self.states,(tuple,list)): + self.log.error("states must be defined as a tuple or list") + self.error = 1 + else: + for s in self.states: + if not isinstance(s,tuple) or len(s) != 2: + self.log.error("Invalid state specifier %s. Must be a tuple (statename,'exclusive|inclusive')",repr(s)) + self.error = 1 + continue + name, statetype = s + if not isinstance(name,StringTypes): + self.log.error("State name %s must be a string", repr(name)) + self.error = 1 + continue + if not (statetype == 'inclusive' or statetype == 'exclusive'): + self.log.error("State type for state %s must be 'inclusive' or 'exclusive'",name) + self.error = 1 + continue + if name in self.stateinfo: + self.log.error("State '%s' already defined",name) + self.error = 1 + continue + self.stateinfo[name] = statetype + + # Get all of the symbols with a t_ prefix and sort them into various + # categories (functions, strings, error functions, and ignore characters) + + def get_rules(self): + tsymbols = [f for f in self.ldict if f[:2] == 't_' ] + + # Now build up a list of functions and a list of strings + + self.toknames = { } # Mapping of symbols to token names + self.funcsym = { } # Symbols defined as functions + self.strsym = { } # Symbols defined as strings + self.ignore = { } # Ignore strings by state + self.errorf = { } # Error functions by state + + for s in self.stateinfo: + self.funcsym[s] = [] + self.strsym[s] = [] + + if len(tsymbols) == 0: + self.log.error("No rules of the form t_rulename are defined") + self.error = 1 + return + + for f in tsymbols: + t = self.ldict[f] + states, tokname = _statetoken(f,self.stateinfo) + self.toknames[f] = tokname + + if hasattr(t,"__call__"): + if tokname == 'error': + for s in states: + self.errorf[s] = t + elif tokname == 'ignore': + line = func_code(t).co_firstlineno + file = func_code(t).co_filename + self.log.error("%s:%d: Rule '%s' must be defined as a string",file,line,t.__name__) + self.error = 1 + else: + for s in states: + self.funcsym[s].append((f,t)) + elif isinstance(t, StringTypes): + if tokname == 'ignore': + for s in states: + self.ignore[s] = t + if "\\" in t: + self.log.warning("%s contains a literal backslash '\\'",f) + + elif tokname == 'error': + self.log.error("Rule '%s' must be defined as a function", f) + self.error = 1 + else: + for s in states: + self.strsym[s].append((f,t)) + else: + self.log.error("%s not defined as a function or string", f) + self.error = 1 + + # Sort the functions by line number + for f in self.funcsym.values(): + if sys.version_info[0] < 3: + f.sort(lambda x,y: cmp(func_code(x[1]).co_firstlineno,func_code(y[1]).co_firstlineno)) + else: + # Python 3.0 + f.sort(key=lambda x: func_code(x[1]).co_firstlineno) + + # Sort the strings by regular expression length + for s in self.strsym.values(): + if sys.version_info[0] < 3: + s.sort(lambda x,y: (len(x[1]) < len(y[1])) - (len(x[1]) > len(y[1]))) + else: + # Python 3.0 + s.sort(key=lambda x: len(x[1]),reverse=True) + + # Validate all of the t_rules collected + def validate_rules(self): + for state in self.stateinfo: + # Validate all rules defined by functions + + + + for fname, f in self.funcsym[state]: + line = func_code(f).co_firstlineno + file = func_code(f).co_filename + self.files[file] = 1 + + tokname = self.toknames[fname] + if isinstance(f, types.MethodType): + reqargs = 2 + else: + reqargs = 1 + nargs = func_code(f).co_argcount + if nargs > reqargs: + self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,f.__name__) + self.error = 1 + continue + + if nargs < reqargs: + self.log.error("%s:%d: Rule '%s' requires an argument", file,line,f.__name__) + self.error = 1 + continue + + if not f.__doc__: + self.log.error("%s:%d: No regular expression defined for rule '%s'",file,line,f.__name__) + self.error = 1 + continue + + try: + c = re.compile("(?P<%s>%s)" % (fname,f.__doc__), re.VERBOSE | self.reflags) + if c.match(""): + self.log.error("%s:%d: Regular expression for rule '%s' matches empty string", file,line,f.__name__) + self.error = 1 + except re.error: + _etype, e, _etrace = sys.exc_info() + self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file,line,f.__name__,e) + if '#' in f.__doc__: + self.log.error("%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'",file,line, f.__name__) + self.error = 1 + + # Validate all rules defined by strings + for name,r in self.strsym[state]: + tokname = self.toknames[name] + if tokname == 'error': + self.log.error("Rule '%s' must be defined as a function", name) + self.error = 1 + continue + + if not tokname in self.tokens and tokname.find("ignore_") < 0: + self.log.error("Rule '%s' defined for an unspecified token %s",name,tokname) + self.error = 1 + continue + + try: + c = re.compile("(?P<%s>%s)" % (name,r),re.VERBOSE | self.reflags) + if (c.match("")): + self.log.error("Regular expression for rule '%s' matches empty string",name) + self.error = 1 + except re.error: + _etype, e, _etrace = sys.exc_info() + self.log.error("Invalid regular expression for rule '%s'. %s",name,e) + if '#' in r: + self.log.error("Make sure '#' in rule '%s' is escaped with '\\#'",name) + self.error = 1 + + if not self.funcsym[state] and not self.strsym[state]: + self.log.error("No rules defined for state '%s'",state) + self.error = 1 + + # Validate the error function + efunc = self.errorf.get(state,None) + if efunc: + f = efunc + line = func_code(f).co_firstlineno + file = func_code(f).co_filename + self.files[file] = 1 + + if isinstance(f, types.MethodType): + reqargs = 2 + else: + reqargs = 1 + nargs = func_code(f).co_argcount + if nargs > reqargs: + self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,f.__name__) + self.error = 1 + + if nargs < reqargs: + self.log.error("%s:%d: Rule '%s' requires an argument", file,line,f.__name__) + self.error = 1 + + for f in self.files: + self.validate_file(f) + + + # ----------------------------------------------------------------------------- + # validate_file() + # + # This checks to see if there are duplicated t_rulename() functions or strings + # in the parser input file. This is done using a simple regular expression + # match on each line in the given file. + # ----------------------------------------------------------------------------- + + def validate_file(self,filename): + import os.path + base,ext = os.path.splitext(filename) + if ext != '.py': return # No idea what the file is. Return OK + + try: + f = open(filename) + lines = f.readlines() + f.close() + except IOError: + return # Couldn't find the file. Don't worry about it + + fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(') + sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=') + + counthash = { } + linen = 1 + for l in lines: + m = fre.match(l) + if not m: + m = sre.match(l) + if m: + name = m.group(1) + prev = counthash.get(name) + if not prev: + counthash[name] = linen + else: + self.log.error("%s:%d: Rule %s redefined. Previously defined on line %d",filename,linen,name,prev) + self.error = 1 + linen += 1 + +# ----------------------------------------------------------------------------- +# lex(module) +# +# Build all of the regular expression rules from definitions in the supplied module +# ----------------------------------------------------------------------------- +def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,nowarn=0,outputdir="", debuglog=None, errorlog=None): + global lexer + ldict = None + stateinfo = { 'INITIAL' : 'inclusive'} + lexobj = Lexer() + lexobj.lexoptimize = optimize + global token,input + + if errorlog is None: + errorlog = PlyLogger(sys.stderr) + + if debug: + if debuglog is None: + debuglog = PlyLogger(sys.stderr) + + # Get the module dictionary used for the lexer + if object: module = object + + if module: + _items = [(k,getattr(module,k)) for k in dir(module)] + ldict = dict(_items) + else: + ldict = get_caller_module_dict(2) + + # Collect parser information from the dictionary + linfo = LexerReflect(ldict,log=errorlog,reflags=reflags) + linfo.get_all() + if not optimize: + if linfo.validate_all(): + raise SyntaxError("Can't build lexer") + + if optimize and lextab: + try: + lexobj.readtab(lextab,ldict) + token = lexobj.token + input = lexobj.input + lexer = lexobj + return lexobj + + except ImportError: + pass + + # Dump some basic debugging information + if debug: + debuglog.info("lex: tokens = %r", linfo.tokens) + debuglog.info("lex: literals = %r", linfo.literals) + debuglog.info("lex: states = %r", linfo.stateinfo) + + # Build a dictionary of valid token names + lexobj.lextokens = { } + for n in linfo.tokens: + lexobj.lextokens[n] = 1 + + # Get literals specification + if isinstance(linfo.literals,(list,tuple)): + lexobj.lexliterals = type(linfo.literals[0])().join(linfo.literals) + else: + lexobj.lexliterals = linfo.literals + + # Get the stateinfo dictionary + stateinfo = linfo.stateinfo + + regexs = { } + # Build the master regular expressions + for state in stateinfo: + regex_list = [] + + # Add rules defined by functions first + for fname, f in linfo.funcsym[state]: + line = func_code(f).co_firstlineno + file = func_code(f).co_filename + regex_list.append("(?P<%s>%s)" % (fname,f.__doc__)) + if debug: + debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",fname,f.__doc__, state) + + # Now add all of the simple rules + for name,r in linfo.strsym[state]: + regex_list.append("(?P<%s>%s)" % (name,r)) + if debug: + debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",name,r, state) + + regexs[state] = regex_list + + # Build the master regular expressions + + if debug: + debuglog.info("lex: ==== MASTER REGEXS FOLLOW ====") + + for state in regexs: + lexre, re_text, re_names = _form_master_re(regexs[state],reflags,ldict,linfo.toknames) + lexobj.lexstatere[state] = lexre + lexobj.lexstateretext[state] = re_text + lexobj.lexstaterenames[state] = re_names + if debug: + for i in range(len(re_text)): + debuglog.info("lex: state '%s' : regex[%d] = '%s'",state, i, re_text[i]) + + # For inclusive states, we need to add the regular expressions from the INITIAL state + for state,stype in stateinfo.items(): + if state != "INITIAL" and stype == 'inclusive': + lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL']) + lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL']) + lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIAL']) + + lexobj.lexstateinfo = stateinfo + lexobj.lexre = lexobj.lexstatere["INITIAL"] + lexobj.lexretext = lexobj.lexstateretext["INITIAL"] + + # Set up ignore variables + lexobj.lexstateignore = linfo.ignore + lexobj.lexignore = lexobj.lexstateignore.get("INITIAL","") + + # Set up error functions + lexobj.lexstateerrorf = linfo.errorf + lexobj.lexerrorf = linfo.errorf.get("INITIAL",None) + if not lexobj.lexerrorf: + errorlog.warning("No t_error rule is defined") + + # Check state information for ignore and error rules + for s,stype in stateinfo.items(): + if stype == 'exclusive': + if not s in linfo.errorf: + errorlog.warning("No error rule is defined for exclusive state '%s'", s) + if not s in linfo.ignore and lexobj.lexignore: + errorlog.warning("No ignore rule is defined for exclusive state '%s'", s) + elif stype == 'inclusive': + if not s in linfo.errorf: + linfo.errorf[s] = linfo.errorf.get("INITIAL",None) + if not s in linfo.ignore: + linfo.ignore[s] = linfo.ignore.get("INITIAL","") + + # Create global versions of the token() and input() functions + token = lexobj.token + input = lexobj.input + lexer = lexobj + + # If in optimize mode, we write the lextab + if lextab and optimize: + lexobj.writetab(lextab,outputdir) + + return lexobj + +# ----------------------------------------------------------------------------- +# runmain() +# +# This runs the lexer as a main program +# ----------------------------------------------------------------------------- + +def runmain(lexer=None,data=None): + if not data: + try: + filename = sys.argv[1] + f = open(filename) + data = f.read() + f.close() + except IndexError: + sys.stdout.write("Reading from standard input (type EOF to end):\n") + data = sys.stdin.read() + + if lexer: + _input = lexer.input + else: + _input = input + _input(data) + if lexer: + _token = lexer.token + else: + _token = token + + while 1: + tok = _token() + if not tok: break + sys.stdout.write("(%s,%r,%d,%d)\n" % (tok.type, tok.value, tok.lineno,tok.lexpos)) + +# ----------------------------------------------------------------------------- +# @TOKEN(regex) +# +# This decorator function can be used to set the regex expression on a function +# when its docstring might need to be set in an alternative way +# ----------------------------------------------------------------------------- + +def TOKEN(r): + def set_doc(f): + if hasattr(r,"__call__"): + f.__doc__ = r.__doc__ + else: + f.__doc__ = r + return f + return set_doc + +# Alternative spelling of the TOKEN decorator +Token = TOKEN + diff --git a/pyLogoCompiler/ply/lex.pyc b/pyLogoCompiler/ply/lex.pyc Binary files differnew file mode 100644 index 0000000..68a93f2 --- /dev/null +++ b/pyLogoCompiler/ply/lex.pyc diff --git a/pyLogoCompiler/ply/yacc.py b/pyLogoCompiler/ply/yacc.py new file mode 100644 index 0000000..2a48c91 --- /dev/null +++ b/pyLogoCompiler/ply/yacc.py @@ -0,0 +1,3279 @@ +# ----------------------------------------------------------------------------- +# ply: yacc.py +# +# Copyright (C) 2001-2009, +# David M. Beazley (Dabeaz LLC) +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# * Neither the name of the David Beazley or Dabeaz LLC may be used to +# endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ----------------------------------------------------------------------------- +# +# This implements an LR parser that is constructed from grammar rules defined +# as Python functions. The grammer is specified by supplying the BNF inside +# Python documentation strings. The inspiration for this technique was borrowed +# from John Aycock's Spark parsing system. PLY might be viewed as cross between +# Spark and the GNU bison utility. +# +# The current implementation is only somewhat object-oriented. The +# LR parser itself is defined in terms of an object (which allows multiple +# parsers to co-exist). However, most of the variables used during table +# construction are defined in terms of global variables. Users shouldn't +# notice unless they are trying to define multiple parsers at the same +# time using threads (in which case they should have their head examined). +# +# This implementation supports both SLR and LALR(1) parsing. LALR(1) +# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu), +# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles, +# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced +# by the more efficient DeRemer and Pennello algorithm. +# +# :::::::: WARNING ::::::: +# +# Construction of LR parsing tables is fairly complicated and expensive. +# To make this module run fast, a *LOT* of work has been put into +# optimization---often at the expensive of readability and what might +# consider to be good Python "coding style." Modify the code at your +# own risk! +# ---------------------------------------------------------------------------- + +__version__ = "3.2" +__tabversion__ = "3.2" # Table version + +#----------------------------------------------------------------------------- +# === User configurable parameters === +# +# Change these to modify the default behavior of yacc (if you wish) +#----------------------------------------------------------------------------- + +yaccdebug = 0 # Debugging mode. If set, yacc generates a + # a 'parser.out' file in the current directory + +debug_file = 'parser.out' # Default name of the debugging file +tab_module = 'parsetab' # Default name of the table module +default_lr = 'LALR' # Default LR table generation method + +error_count = 3 # Number of symbols that must be shifted to leave recovery mode + +yaccdevel = 0 # Set to True if developing yacc. This turns off optimized + # implementations of certain functions. + +resultlimit = 40 # Size limit of results when running in debug mode. + +pickle_protocol = 0 # Protocol to use when writing pickle files + +import re, types, sys, os.path + +# Compatibility function for python 2.6/3.0 +if sys.version_info[0] < 3: + def func_code(f): + return f.func_code +else: + def func_code(f): + return f.__code__ + +# Compatibility +try: + MAXINT = sys.maxint +except AttributeError: + MAXINT = sys.maxsize + +# Python 2.x/3.0 compatibility. +def load_ply_lex(): + if sys.version_info[0] < 3: + import lex + else: + import ply.lex as lex + return lex + +# This object is a stand-in for a logging object created by the +# logging module. PLY will use this by default to create things +# such as the parser.out file. If a user wants more detailed +# information, they can create their own logging object and pass +# it into PLY. + +class PlyLogger(object): + def __init__(self,f): + self.f = f + def debug(self,msg,*args,**kwargs): + self.f.write((msg % args) + "\n") + info = debug + + def warning(self,msg,*args,**kwargs): + self.f.write("WARNING: "+ (msg % args) + "\n") + + def error(self,msg,*args,**kwargs): + self.f.write("ERROR: " + (msg % args) + "\n") + + critical = debug + +# Null logger is used when no output is generated. Does nothing. +class NullLogger(object): + def __getattribute__(self,name): + return self + def __call__(self,*args,**kwargs): + return self + +# Exception raised for yacc-related errors +class YaccError(Exception): pass + +# Format the result message that the parser produces when running in debug mode. +def format_result(r): + repr_str = repr(r) + if '\n' in repr_str: repr_str = repr(repr_str) + if len(repr_str) > resultlimit: + repr_str = repr_str[:resultlimit]+" ..." + result = "<%s @ 0x%x> (%s)" % (type(r).__name__,id(r),repr_str) + return result + + +# Format stack entries when the parser is running in debug mode +def format_stack_entry(r): + repr_str = repr(r) + if '\n' in repr_str: repr_str = repr(repr_str) + if len(repr_str) < 16: + return repr_str + else: + return "<%s @ 0x%x>" % (type(r).__name__,id(r)) + +#----------------------------------------------------------------------------- +# === LR Parsing Engine === +# +# The following classes are used for the LR parser itself. These are not +# used during table construction and are independent of the actual LR +# table generation algorithm +#----------------------------------------------------------------------------- + +# This class is used to hold non-terminal grammar symbols during parsing. +# It normally has the following attributes set: +# .type = Grammar symbol type +# .value = Symbol value +# .lineno = Starting line number +# .endlineno = Ending line number (optional, set automatically) +# .lexpos = Starting lex position +# .endlexpos = Ending lex position (optional, set automatically) + +class YaccSymbol: + def __str__(self): return self.type + def __repr__(self): return str(self) + +# This class is a wrapper around the objects actually passed to each +# grammar rule. Index lookup and assignment actually assign the +# .value attribute of the underlying YaccSymbol object. +# The lineno() method returns the line number of a given +# item (or 0 if not defined). The linespan() method returns +# a tuple of (startline,endline) representing the range of lines +# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos) +# representing the range of positional information for a symbol. + +class YaccProduction: + def __init__(self,s,stack=None): + self.slice = s + self.stack = stack + self.lexer = None + self.parser= None + def __getitem__(self,n): + if n >= 0: return self.slice[n].value + else: return self.stack[n].value + + def __setitem__(self,n,v): + self.slice[n].value = v + + def __getslice__(self,i,j): + return [s.value for s in self.slice[i:j]] + + def __len__(self): + return len(self.slice) + + def lineno(self,n): + return getattr(self.slice[n],"lineno",0) + + def set_lineno(self,n,lineno): + self.slice[n].lineno = n + + def linespan(self,n): + startline = getattr(self.slice[n],"lineno",0) + endline = getattr(self.slice[n],"endlineno",startline) + return startline,endline + + def lexpos(self,n): + return getattr(self.slice[n],"lexpos",0) + + def lexspan(self,n): + startpos = getattr(self.slice[n],"lexpos",0) + endpos = getattr(self.slice[n],"endlexpos",startpos) + return startpos,endpos + + def error(self): + raise SyntaxError + + +# ----------------------------------------------------------------------------- +# == LRParser == +# +# The LR Parsing engine. +# ----------------------------------------------------------------------------- + +class LRParser: + def __init__(self,lrtab,errorf): + self.productions = lrtab.lr_productions + self.action = lrtab.lr_action + self.goto = lrtab.lr_goto + self.errorfunc = errorf + + def errok(self): + self.errorok = 1 + + def restart(self): + del self.statestack[:] + del self.symstack[:] + sym = YaccSymbol() + sym.type = '$end' + self.symstack.append(sym) + self.statestack.append(0) + + def parse(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): + lexer.lineno = 0 + if debug or yaccdevel: + if isinstance(debug,int) or not debug: + debug = PlyLogger(sys.stderr) + return self.parsedebug(input,lexer,debug,tracking,tokenfunc) + elif tracking: + return self.parseopt(input,lexer,debug,tracking,tokenfunc) + else: + return self.parseopt_notrack(input,lexer,debug,tracking,tokenfunc) + + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # parsedebug(). + # + # This is the debugging enabled version of parse(). All changes made to the + # parsing engine should be made here. For the non-debugging version, + # copy this code to a method parseopt() and delete all of the sections + # enclosed in: + # + # #--! DEBUG + # statements + # #--! DEBUG + # + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None): + lookahead = None # Current lookahead symbol + lookaheadstack = [ ] # Stack of lookahead symbols + actions = self.action # Local reference to action table (to avoid lookup on self.) + goto = self.goto # Local reference to goto table (to avoid lookup on self.) + prod = self.productions # Local reference to production list (to avoid lookup on self.) + pslice = YaccProduction(None) # Production object passed to grammar rules + errorcount = 0 # Used during error recovery + + # --! DEBUG + debug.info("PLY: PARSE DEBUG START") + # --! DEBUG + + # If no lexer was given, we will try to use the lex module + if not lexer: + lex = load_ply_lex() + lexer = lex.lexer + + # Set up the lexer and parser objects on pslice + pslice.lexer = lexer + pslice.parser = self + + # If input was supplied, pass to lexer + if input is not None: + lexer.input(input) + + if tokenfunc is None: + # Tokenize function + get_token = lexer.token + else: + get_token = tokenfunc + + # Set up the state and symbol stacks + + statestack = [ ] # Stack of parsing states + self.statestack = statestack + symstack = [ ] # Stack of grammar symbols + self.symstack = symstack + + pslice.stack = symstack # Put in the production + errtoken = None # Err token + + # The start state is assumed to be (0,$end) + + statestack.append(0) + sym = YaccSymbol() + sym.type = "$end" + symstack.append(sym) + state = 0 + while 1: + # Get the next symbol on the input. If a lookahead symbol + # is already set, we just use that. Otherwise, we'll pull + # the next token off of the lookaheadstack or from the lexer + + # --! DEBUG + debug.debug('') + debug.debug('State : %s', state) + # --! DEBUG + + if not lookahead: + if not lookaheadstack: + lookahead = get_token() # Get the next token + else: + lookahead = lookaheadstack.pop() + if not lookahead: + lookahead = YaccSymbol() + lookahead.type = "$end" + + # --! DEBUG + debug.debug('Stack : %s', + ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) + # --! DEBUG + + # Check the action table + ltype = lookahead.type + t = actions[state].get(ltype) + + if t is not None: + if t > 0: + # shift a symbol on the stack + statestack.append(t) + state = t + + # --! DEBUG + debug.debug("Action : Shift and goto state %s", t) + # --! DEBUG + + symstack.append(lookahead) + lookahead = None + + # Decrease error count on successful shift + if errorcount: errorcount -=1 + continue + + if t < 0: + # reduce a symbol on the stack, emit a production + p = prod[-t] + pname = p.name + plen = p.len + + # Get production function + sym = YaccSymbol() + sym.type = pname # Production name + sym.value = None + + # --! DEBUG + if plen: + debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, "["+",".join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+"]",-t) + else: + debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, [],-t) + + # --! DEBUG + + if plen: + targ = symstack[-plen-1:] + targ[0] = sym + + # --! TRACKING + if tracking: + t1 = targ[1] + sym.lineno = t1.lineno + sym.lexpos = t1.lexpos + t1 = targ[-1] + sym.endlineno = getattr(t1,"endlineno",t1.lineno) + sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos) + + # --! TRACKING + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # The code enclosed in this section is duplicated + # below as a performance optimization. Make sure + # changes get made in both locations. + + pslice.slice = targ + + try: + # Call the grammar rule with our special slice object + del symstack[-plen:] + del statestack[-plen:] + p.callable(pslice) + # --! DEBUG + debug.info("Result : %s", format_result(pslice[0])) + # --! DEBUG + symstack.append(sym) + state = goto[statestack[-1]][pname] + statestack.append(state) + except SyntaxError: + # If an error was set. Enter error recovery state + lookaheadstack.append(lookahead) + symstack.pop() + statestack.pop() + state = statestack[-1] + sym.type = 'error' + lookahead = sym + errorcount = error_count + self.errorok = 0 + continue + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + else: + + # --! TRACKING + if tracking: + sym.lineno = lexer.lineno + sym.lexpos = lexer.lexpos + # --! TRACKING + + targ = [ sym ] + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # The code enclosed in this section is duplicated + # above as a performance optimization. Make sure + # changes get made in both locations. + + pslice.slice = targ + + try: + # Call the grammar rule with our special slice object + p.callable(pslice) + # --! DEBUG + debug.info("Result : %s", format_result(pslice[0])) + # --! DEBUG + symstack.append(sym) + state = goto[statestack[-1]][pname] + statestack.append(state) + except SyntaxError: + # If an error was set. Enter error recovery state + lookaheadstack.append(lookahead) + symstack.pop() + statestack.pop() + state = statestack[-1] + sym.type = 'error' + lookahead = sym + errorcount = error_count + self.errorok = 0 + continue + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + if t == 0: + n = symstack[-1] + result = getattr(n,"value",None) + # --! DEBUG + debug.info("Done : Returning %s", format_result(result)) + debug.info("PLY: PARSE DEBUG END") + # --! DEBUG + return result + + if t == None: + + # --! DEBUG + debug.error('Error : %s', + ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) + # --! DEBUG + + # We have some kind of parsing error here. To handle + # this, we are going to push the current token onto + # the tokenstack and replace it with an 'error' token. + # If there are any synchronization rules, they may + # catch it. + # + # In addition to pushing the error token, we call call + # the user defined p_error() function if this is the + # first syntax error. This function is only called if + # errorcount == 0. + if errorcount == 0 or self.errorok: + errorcount = error_count + self.errorok = 0 + errtoken = lookahead + if errtoken.type == "$end": + errtoken = None # End of file! + if self.errorfunc: + global errok,token,restart + errok = self.errok # Set some special functions available in error recovery + token = get_token + restart = self.restart + if errtoken and not hasattr(errtoken,'lexer'): + errtoken.lexer = lexer + tok = self.errorfunc(errtoken) + del errok, token, restart # Delete special functions + + if self.errorok: + # User must have done some kind of panic + # mode recovery on their own. The + # returned token is the next lookahead + lookahead = tok + errtoken = None + continue + else: + if errtoken: + if hasattr(errtoken,"lineno"): lineno = lookahead.lineno + else: lineno = 0 + if lineno: + sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) + else: + sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) + else: + sys.stderr.write("yacc: Parse error in input. EOF\n") + return + + else: + errorcount = error_count + + # case 1: the statestack only has 1 entry on it. If we're in this state, the + # entire parse has been rolled back and we're completely hosed. The token is + # discarded and we just keep going. + + if len(statestack) <= 1 and lookahead.type != "$end": + lookahead = None + errtoken = None + state = 0 + # Nuke the pushback stack + del lookaheadstack[:] + continue + + # case 2: the statestack has a couple of entries on it, but we're + # at the end of the file. nuke the top entry and generate an error token + + # Start nuking entries on the stack + if lookahead.type == "$end": + # Whoa. We're really hosed here. Bail out + return + + if lookahead.type != 'error': + sym = symstack[-1] + if sym.type == 'error': + # Hmmm. Error is on top of stack, we'll just nuke input + # symbol and continue + lookahead = None + continue + t = YaccSymbol() + t.type = 'error' + if hasattr(lookahead,"lineno"): + t.lineno = lookahead.lineno + t.value = lookahead + lookaheadstack.append(lookahead) + lookahead = t + else: + symstack.pop() + statestack.pop() + state = statestack[-1] # Potential bug fix + + continue + + # Call an error function here + raise RuntimeError("yacc: internal parser error!!!\n") + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # parseopt(). + # + # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY. + # Edit the debug version above, then copy any modifications to the method + # below while removing #--! DEBUG sections. + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + + def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): + lookahead = None # Current lookahead symbol + lookaheadstack = [ ] # Stack of lookahead symbols + actions = self.action # Local reference to action table (to avoid lookup on self.) + goto = self.goto # Local reference to goto table (to avoid lookup on self.) + prod = self.productions # Local reference to production list (to avoid lookup on self.) + pslice = YaccProduction(None) # Production object passed to grammar rules + errorcount = 0 # Used during error recovery + + # If no lexer was given, we will try to use the lex module + if not lexer: + lex = load_ply_lex() + lexer = lex.lexer + + # Set up the lexer and parser objects on pslice + pslice.lexer = lexer + pslice.parser = self + + # If input was supplied, pass to lexer + if input is not None: + lexer.input(input) + + if tokenfunc is None: + # Tokenize function + get_token = lexer.token + else: + get_token = tokenfunc + + # Set up the state and symbol stacks + + statestack = [ ] # Stack of parsing states + self.statestack = statestack + symstack = [ ] # Stack of grammar symbols + self.symstack = symstack + + pslice.stack = symstack # Put in the production + errtoken = None # Err token + + # The start state is assumed to be (0,$end) + + statestack.append(0) + sym = YaccSymbol() + sym.type = '$end' + symstack.append(sym) + state = 0 + while 1: + # Get the next symbol on the input. If a lookahead symbol + # is already set, we just use that. Otherwise, we'll pull + # the next token off of the lookaheadstack or from the lexer + + if not lookahead: + if not lookaheadstack: + lookahead = get_token() # Get the next token + else: + lookahead = lookaheadstack.pop() + if not lookahead: + lookahead = YaccSymbol() + lookahead.type = '$end' + + # Check the action table + ltype = lookahead.type + t = actions[state].get(ltype) + + if t is not None: + if t > 0: + # shift a symbol on the stack + statestack.append(t) + state = t + + symstack.append(lookahead) + lookahead = None + + # Decrease error count on successful shift + if errorcount: errorcount -=1 + continue + + if t < 0: + # reduce a symbol on the stack, emit a production + p = prod[-t] + pname = p.name + plen = p.len + + # Get production function + sym = YaccSymbol() + sym.type = pname # Production name + sym.value = None + + if plen: + targ = symstack[-plen-1:] + targ[0] = sym + + # --! TRACKING + if tracking: + t1 = targ[1] + sym.lineno = t1.lineno + sym.lexpos = t1.lexpos + t1 = targ[-1] + sym.endlineno = getattr(t1,"endlineno",t1.lineno) + sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos) + + # --! TRACKING + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # The code enclosed in this section is duplicated + # below as a performance optimization. Make sure + # changes get made in both locations. + + pslice.slice = targ + + try: + # Call the grammar rule with our special slice object + del symstack[-plen:] + del statestack[-plen:] + p.callable(pslice) + symstack.append(sym) + state = goto[statestack[-1]][pname] + statestack.append(state) + except SyntaxError: + # If an error was set. Enter error recovery state + lookaheadstack.append(lookahead) + symstack.pop() + statestack.pop() + state = statestack[-1] + sym.type = 'error' + lookahead = sym + errorcount = error_count + self.errorok = 0 + continue + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + else: + + # --! TRACKING + if tracking: + sym.lineno = lexer.lineno + sym.lexpos = lexer.lexpos + # --! TRACKING + + targ = [ sym ] + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # The code enclosed in this section is duplicated + # above as a performance optimization. Make sure + # changes get made in both locations. + + pslice.slice = targ + + try: + # Call the grammar rule with our special slice object + p.callable(pslice) + symstack.append(sym) + state = goto[statestack[-1]][pname] + statestack.append(state) + except SyntaxError: + # If an error was set. Enter error recovery state + lookaheadstack.append(lookahead) + symstack.pop() + statestack.pop() + state = statestack[-1] + sym.type = 'error' + lookahead = sym + errorcount = error_count + self.errorok = 0 + continue + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + if t == 0: + n = symstack[-1] + return getattr(n,"value",None) + + if t == None: + + # We have some kind of parsing error here. To handle + # this, we are going to push the current token onto + # the tokenstack and replace it with an 'error' token. + # If there are any synchronization rules, they may + # catch it. + # + # In addition to pushing the error token, we call call + # the user defined p_error() function if this is the + # first syntax error. This function is only called if + # errorcount == 0. + if errorcount == 0 or self.errorok: + errorcount = error_count + self.errorok = 0 + errtoken = lookahead + if errtoken.type == '$end': + errtoken = None # End of file! + if self.errorfunc: + global errok,token,restart + errok = self.errok # Set some special functions available in error recovery + token = get_token + restart = self.restart + if errtoken and not hasattr(errtoken,'lexer'): + errtoken.lexer = lexer + tok = self.errorfunc(errtoken) + del errok, token, restart # Delete special functions + + if self.errorok: + # User must have done some kind of panic + # mode recovery on their own. The + # returned token is the next lookahead + lookahead = tok + errtoken = None + continue + else: + if errtoken: + if hasattr(errtoken,"lineno"): lineno = lookahead.lineno + else: lineno = 0 + if lineno: + sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) + else: + sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) + else: + sys.stderr.write("yacc: Parse error in input. EOF\n") + return + + else: + errorcount = error_count + + # case 1: the statestack only has 1 entry on it. If we're in this state, the + # entire parse has been rolled back and we're completely hosed. The token is + # discarded and we just keep going. + + if len(statestack) <= 1 and lookahead.type != '$end': + lookahead = None + errtoken = None + state = 0 + # Nuke the pushback stack + del lookaheadstack[:] + continue + + # case 2: the statestack has a couple of entries on it, but we're + # at the end of the file. nuke the top entry and generate an error token + + # Start nuking entries on the stack + if lookahead.type == '$end': + # Whoa. We're really hosed here. Bail out + return + + if lookahead.type != 'error': + sym = symstack[-1] + if sym.type == 'error': + # Hmmm. Error is on top of stack, we'll just nuke input + # symbol and continue + lookahead = None + continue + t = YaccSymbol() + t.type = 'error' + if hasattr(lookahead,"lineno"): + t.lineno = lookahead.lineno + t.value = lookahead + lookaheadstack.append(lookahead) + lookahead = t + else: + symstack.pop() + statestack.pop() + state = statestack[-1] # Potential bug fix + + continue + + # Call an error function here + raise RuntimeError("yacc: internal parser error!!!\n") + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # parseopt_notrack(). + # + # Optimized version of parseopt() with line number tracking removed. + # DO NOT EDIT THIS CODE DIRECTLY. Copy the optimized version and remove + # code in the #--! TRACKING sections + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): + lookahead = None # Current lookahead symbol + lookaheadstack = [ ] # Stack of lookahead symbols + actions = self.action # Local reference to action table (to avoid lookup on self.) + goto = self.goto # Local reference to goto table (to avoid lookup on self.) + prod = self.productions # Local reference to production list (to avoid lookup on self.) + pslice = YaccProduction(None) # Production object passed to grammar rules + errorcount = 0 # Used during error recovery + + # If no lexer was given, we will try to use the lex module + if not lexer: + lex = load_ply_lex() + lexer = lex.lexer + + # Set up the lexer and parser objects on pslice + pslice.lexer = lexer + pslice.parser = self + + # If input was supplied, pass to lexer + if input is not None: + lexer.input(input) + + if tokenfunc is None: + # Tokenize function + get_token = lexer.token + else: + get_token = tokenfunc + + # Set up the state and symbol stacks + + statestack = [ ] # Stack of parsing states + self.statestack = statestack + symstack = [ ] # Stack of grammar symbols + self.symstack = symstack + + pslice.stack = symstack # Put in the production + errtoken = None # Err token + + # The start state is assumed to be (0,$end) + + statestack.append(0) + sym = YaccSymbol() + sym.type = '$end' + symstack.append(sym) + state = 0 + while 1: + # Get the next symbol on the input. If a lookahead symbol + # is already set, we just use that. Otherwise, we'll pull + # the next token off of the lookaheadstack or from the lexer + + if not lookahead: + if not lookaheadstack: + lookahead = get_token() # Get the next token + else: + lookahead = lookaheadstack.pop() + if not lookahead: + lookahead = YaccSymbol() + lookahead.type = '$end' + + # Check the action table + ltype = lookahead.type + t = actions[state].get(ltype) + + if t is not None: + if t > 0: + # shift a symbol on the stack + statestack.append(t) + state = t + + symstack.append(lookahead) + lookahead = None + + # Decrease error count on successful shift + if errorcount: errorcount -=1 + continue + + if t < 0: + # reduce a symbol on the stack, emit a production + p = prod[-t] + pname = p.name + plen = p.len + + # Get production function + sym = YaccSymbol() + sym.type = pname # Production name + sym.value = None + + if plen: + targ = symstack[-plen-1:] + targ[0] = sym + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # The code enclosed in this section is duplicated + # below as a performance optimization. Make sure + # changes get made in both locations. + + pslice.slice = targ + + try: + # Call the grammar rule with our special slice object + del symstack[-plen:] + del statestack[-plen:] + p.callable(pslice) + symstack.append(sym) + state = goto[statestack[-1]][pname] + statestack.append(state) + except SyntaxError: + # If an error was set. Enter error recovery state + lookaheadstack.append(lookahead) + symstack.pop() + statestack.pop() + state = statestack[-1] + sym.type = 'error' + lookahead = sym + errorcount = error_count + self.errorok = 0 + continue + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + else: + + targ = [ sym ] + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # The code enclosed in this section is duplicated + # above as a performance optimization. Make sure + # changes get made in both locations. + + pslice.slice = targ + + try: + # Call the grammar rule with our special slice object + p.callable(pslice) + symstack.append(sym) + state = goto[statestack[-1]][pname] + statestack.append(state) + except SyntaxError: + # If an error was set. Enter error recovery state + lookaheadstack.append(lookahead) + symstack.pop() + statestack.pop() + state = statestack[-1] + sym.type = 'error' + lookahead = sym + errorcount = error_count + self.errorok = 0 + continue + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + if t == 0: + n = symstack[-1] + return getattr(n,"value",None) + + if t == None: + + # We have some kind of parsing error here. To handle + # this, we are going to push the current token onto + # the tokenstack and replace it with an 'error' token. + # If there are any synchronization rules, they may + # catch it. + # + # In addition to pushing the error token, we call call + # the user defined p_error() function if this is the + # first syntax error. This function is only called if + # errorcount == 0. + if errorcount == 0 or self.errorok: + errorcount = error_count + self.errorok = 0 + errtoken = lookahead + if errtoken.type == '$end': + errtoken = None # End of file! + if self.errorfunc: + global errok,token,restart + errok = self.errok # Set some special functions available in error recovery + token = get_token + restart = self.restart + if errtoken and not hasattr(errtoken,'lexer'): + errtoken.lexer = lexer + tok = self.errorfunc(errtoken) + del errok, token, restart # Delete special functions + + if self.errorok: + # User must have done some kind of panic + # mode recovery on their own. The + # returned token is the next lookahead + lookahead = tok + errtoken = None + continue + else: + if errtoken: + if hasattr(errtoken,"lineno"): lineno = lookahead.lineno + else: lineno = 0 + if lineno: + sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) + else: + sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) + else: + sys.stderr.write("yacc: Parse error in input. EOF\n") + return + + else: + errorcount = error_count + + # case 1: the statestack only has 1 entry on it. If we're in this state, the + # entire parse has been rolled back and we're completely hosed. The token is + # discarded and we just keep going. + + if len(statestack) <= 1 and lookahead.type != '$end': + lookahead = None + errtoken = None + state = 0 + # Nuke the pushback stack + del lookaheadstack[:] + continue + + # case 2: the statestack has a couple of entries on it, but we're + # at the end of the file. nuke the top entry and generate an error token + + # Start nuking entries on the stack + if lookahead.type == '$end': + # Whoa. We're really hosed here. Bail out + return + + if lookahead.type != 'error': + sym = symstack[-1] + if sym.type == 'error': + # Hmmm. Error is on top of stack, we'll just nuke input + # symbol and continue + lookahead = None + continue + t = YaccSymbol() + t.type = 'error' + if hasattr(lookahead,"lineno"): + t.lineno = lookahead.lineno + t.value = lookahead + lookaheadstack.append(lookahead) + lookahead = t + else: + symstack.pop() + statestack.pop() + state = statestack[-1] # Potential bug fix + + continue + + # Call an error function here + raise RuntimeError("yacc: internal parser error!!!\n") + +# ----------------------------------------------------------------------------- +# === Grammar Representation === +# +# The following functions, classes, and variables are used to represent and +# manipulate the rules that make up a grammar. +# ----------------------------------------------------------------------------- + +import re + +# regex matching identifiers +_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') + +# ----------------------------------------------------------------------------- +# class Production: +# +# This class stores the raw information about a single production or grammar rule. +# A grammar rule refers to a specification such as this: +# +# expr : expr PLUS term +# +# Here are the basic attributes defined on all productions +# +# name - Name of the production. For example 'expr' +# prod - A list of symbols on the right side ['expr','PLUS','term'] +# prec - Production precedence level +# number - Production number. +# func - Function that executes on reduce +# file - File where production function is defined +# lineno - Line number where production function is defined +# +# The following attributes are defined or optional. +# +# len - Length of the production (number of symbols on right hand side) +# usyms - Set of unique symbols found in the production +# ----------------------------------------------------------------------------- + +class Production(object): + reduced = 0 + def __init__(self,number,name,prod,precedence=('right',0),func=None,file='',line=0): + self.name = name + self.prod = tuple(prod) + self.number = number + self.func = func + self.callable = None + self.file = file + self.line = line + self.prec = precedence + + # Internal settings used during table construction + + self.len = len(self.prod) # Length of the production + + # Create a list of unique production symbols used in the production + self.usyms = [ ] + for s in self.prod: + if s not in self.usyms: + self.usyms.append(s) + + # List of all LR items for the production + self.lr_items = [] + self.lr_next = None + + # Create a string representation + if self.prod: + self.str = "%s -> %s" % (self.name," ".join(self.prod)) + else: + self.str = "%s -> <empty>" % self.name + + def __str__(self): + return self.str + + def __repr__(self): + return "Production("+str(self)+")" + + def __len__(self): + return len(self.prod) + + def __nonzero__(self): + return 1 + + def __getitem__(self,index): + return self.prod[index] + + # Return the nth lr_item from the production (or None if at the end) + def lr_item(self,n): + if n > len(self.prod): return None + p = LRItem(self,n) + + # Precompute the list of productions immediately following. Hack. Remove later + try: + p.lr_after = Prodnames[p.prod[n+1]] + except (IndexError,KeyError): + p.lr_after = [] + try: + p.lr_before = p.prod[n-1] + except IndexError: + p.lr_before = None + + return p + + # Bind the production function name to a callable + def bind(self,pdict): + if self.func: + self.callable = pdict[self.func] + +# This class serves as a minimal standin for Production objects when +# reading table data from files. It only contains information +# actually used by the LR parsing engine, plus some additional +# debugging information. +class MiniProduction(object): + def __init__(self,str,name,len,func,file,line): + self.name = name + self.len = len + self.func = func + self.callable = None + self.file = file + self.line = line + self.str = str + def __str__(self): + return self.str + def __repr__(self): + return "MiniProduction(%s)" % self.str + + # Bind the production function name to a callable + def bind(self,pdict): + if self.func: + self.callable = pdict[self.func] + + +# ----------------------------------------------------------------------------- +# class LRItem +# +# This class represents a specific stage of parsing a production rule. For +# example: +# +# expr : expr . PLUS term +# +# In the above, the "." represents the current location of the parse. Here +# basic attributes: +# +# name - Name of the production. For example 'expr' +# prod - A list of symbols on the right side ['expr','.', 'PLUS','term'] +# number - Production number. +# +# lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term' +# then lr_next refers to 'expr -> expr PLUS . term' +# lr_index - LR item index (location of the ".") in the prod list. +# lookaheads - LALR lookahead symbols for this item +# len - Length of the production (number of symbols on right hand side) +# lr_after - List of all productions that immediately follow +# lr_before - Grammar symbol immediately before +# ----------------------------------------------------------------------------- + +class LRItem(object): + def __init__(self,p,n): + self.name = p.name + self.prod = list(p.prod) + self.number = p.number + self.lr_index = n + self.lookaheads = { } + self.prod.insert(n,".") + self.prod = tuple(self.prod) + self.len = len(self.prod) + self.usyms = p.usyms + + def __str__(self): + if self.prod: + s = "%s -> %s" % (self.name," ".join(self.prod)) + else: + s = "%s -> <empty>" % self.name + return s + + def __repr__(self): + return "LRItem("+str(self)+")" + +# ----------------------------------------------------------------------------- +# rightmost_terminal() +# +# Return the rightmost terminal from a list of symbols. Used in add_production() +# ----------------------------------------------------------------------------- +def rightmost_terminal(symbols, terminals): + i = len(symbols) - 1 + while i >= 0: + if symbols[i] in terminals: + return symbols[i] + i -= 1 + return None + +# ----------------------------------------------------------------------------- +# === GRAMMAR CLASS === +# +# The following class represents the contents of the specified grammar along +# with various computed properties such as first sets, follow sets, LR items, etc. +# This data is used for critical parts of the table generation process later. +# ----------------------------------------------------------------------------- + +class GrammarError(YaccError): pass + +class Grammar(object): + def __init__(self,terminals): + self.Productions = [None] # A list of all of the productions. The first + # entry is always reserved for the purpose of + # building an augmented grammar + + self.Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all + # productions of that nonterminal. + + self.Prodmap = { } # A dictionary that is only used to detect duplicate + # productions. + + self.Terminals = { } # A dictionary mapping the names of terminal symbols to a + # list of the rules where they are used. + + for term in terminals: + self.Terminals[term] = [] + + self.Terminals['error'] = [] + + self.Nonterminals = { } # A dictionary mapping names of nonterminals to a list + # of rule numbers where they are used. + + self.First = { } # A dictionary of precomputed FIRST(x) symbols + + self.Follow = { } # A dictionary of precomputed FOLLOW(x) symbols + + self.Precedence = { } # Precedence rules for each terminal. Contains tuples of the + # form ('right',level) or ('nonassoc', level) or ('left',level) + + self.UsedPrecedence = { } # Precedence rules that were actually used by the grammer. + # This is only used to provide error checking and to generate + # a warning about unused precedence rules. + + self.Start = None # Starting symbol for the grammar + + + def __len__(self): + return len(self.Productions) + + def __getitem__(self,index): + return self.Productions[index] + + # ----------------------------------------------------------------------------- + # set_precedence() + # + # Sets the precedence for a given terminal. assoc is the associativity such as + # 'left','right', or 'nonassoc'. level is a numeric level. + # + # ----------------------------------------------------------------------------- + + def set_precedence(self,term,assoc,level): + assert self.Productions == [None],"Must call set_precedence() before add_production()" + if term in self.Precedence: + raise GrammarError("Precedence already specified for terminal '%s'" % term) + if assoc not in ['left','right','nonassoc']: + raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'") + self.Precedence[term] = (assoc,level) + + # ----------------------------------------------------------------------------- + # add_production() + # + # Given an action function, this function assembles a production rule and + # computes its precedence level. + # + # The production rule is supplied as a list of symbols. For example, + # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and + # symbols ['expr','PLUS','term']. + # + # Precedence is determined by the precedence of the right-most non-terminal + # or the precedence of a terminal specified by %prec. + # + # A variety of error checks are performed to make sure production symbols + # are valid and that %prec is used correctly. + # ----------------------------------------------------------------------------- + + def add_production(self,prodname,syms,func=None,file='',line=0): + + if prodname in self.Terminals: + raise GrammarError("%s:%d: Illegal rule name '%s'. Already defined as a token" % (file,line,prodname)) + if prodname == 'error': + raise GrammarError("%s:%d: Illegal rule name '%s'. error is a reserved word" % (file,line,prodname)) + if not _is_identifier.match(prodname): + raise GrammarError("%s:%d: Illegal rule name '%s'" % (file,line,prodname)) + + # Look for literal tokens + for n,s in enumerate(syms): + if s[0] in "'\"": + try: + c = eval(s) + if (len(c) > 1): + raise GrammarError("%s:%d: Literal token %s in rule '%s' may only be a single character" % (file,line,s, prodname)) + if not c in self.Terminals: + self.Terminals[c] = [] + syms[n] = c + continue + except SyntaxError: + pass + if not _is_identifier.match(s) and s != '%prec': + raise GrammarError("%s:%d: Illegal name '%s' in rule '%s'" % (file,line,s, prodname)) + + # Determine the precedence level + if '%prec' in syms: + if syms[-1] == '%prec': + raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec" % (file,line)) + if syms[-2] != '%prec': + raise GrammarError("%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" % (file,line)) + precname = syms[-1] + prodprec = self.Precedence.get(precname,None) + if not prodprec: + raise GrammarError("%s:%d: Nothing known about the precedence of '%s'" % (file,line,precname)) + else: + self.UsedPrecedence[precname] = 1 + del syms[-2:] # Drop %prec from the rule + else: + # If no %prec, precedence is determined by the rightmost terminal symbol + precname = rightmost_terminal(syms,self.Terminals) + prodprec = self.Precedence.get(precname,('right',0)) + + # See if the rule is already in the rulemap + map = "%s -> %s" % (prodname,syms) + if map in self.Prodmap: + m = self.Prodmap[map] + raise GrammarError("%s:%d: Duplicate rule %s. " % (file,line, m) + + "Previous definition at %s:%d" % (m.file, m.line)) + + # From this point on, everything is valid. Create a new Production instance + pnumber = len(self.Productions) + if not prodname in self.Nonterminals: + self.Nonterminals[prodname] = [ ] + + # Add the production number to Terminals and Nonterminals + for t in syms: + if t in self.Terminals: + self.Terminals[t].append(pnumber) + else: + if not t in self.Nonterminals: + self.Nonterminals[t] = [ ] + self.Nonterminals[t].append(pnumber) + + # Create a production and add it to the list of productions + p = Production(pnumber,prodname,syms,prodprec,func,file,line) + self.Productions.append(p) + self.Prodmap[map] = p + + # Add to the global productions list + try: + self.Prodnames[prodname].append(p) + except KeyError: + self.Prodnames[prodname] = [ p ] + return 0 + + # ----------------------------------------------------------------------------- + # set_start() + # + # Sets the starting symbol and creates the augmented grammar. Production + # rule 0 is S' -> start where start is the start symbol. + # ----------------------------------------------------------------------------- + + def set_start(self,start=None): + if not start: + start = self.Productions[1].name + if start not in self.Nonterminals: + raise GrammarError("start symbol %s undefined" % start) + self.Productions[0] = Production(0,"S'",[start]) + self.Nonterminals[start].append(0) + self.Start = start + + # ----------------------------------------------------------------------------- + # find_unreachable() + # + # Find all of the nonterminal symbols that can't be reached from the starting + # symbol. Returns a list of nonterminals that can't be reached. + # ----------------------------------------------------------------------------- + + def find_unreachable(self): + + # Mark all symbols that are reachable from a symbol s + def mark_reachable_from(s): + if reachable[s]: + # We've already reached symbol s. + return + reachable[s] = 1 + for p in self.Prodnames.get(s,[]): + for r in p.prod: + mark_reachable_from(r) + + reachable = { } + for s in list(self.Terminals) + list(self.Nonterminals): + reachable[s] = 0 + + mark_reachable_from( self.Productions[0].prod[0] ) + + return [s for s in list(self.Nonterminals) + if not reachable[s]] + + # ----------------------------------------------------------------------------- + # infinite_cycles() + # + # This function looks at the various parsing rules and tries to detect + # infinite recursion cycles (grammar rules where there is no possible way + # to derive a string of only terminals). + # ----------------------------------------------------------------------------- + + def infinite_cycles(self): + terminates = {} + + # Terminals: + for t in self.Terminals: + terminates[t] = 1 + + terminates['$end'] = 1 + + # Nonterminals: + + # Initialize to false: + for n in self.Nonterminals: + terminates[n] = 0 + + # Then propagate termination until no change: + while 1: + some_change = 0 + for (n,pl) in self.Prodnames.items(): + # Nonterminal n terminates iff any of its productions terminates. + for p in pl: + # Production p terminates iff all of its rhs symbols terminate. + for s in p.prod: + if not terminates[s]: + # The symbol s does not terminate, + # so production p does not terminate. + p_terminates = 0 + break + else: + # didn't break from the loop, + # so every symbol s terminates + # so production p terminates. + p_terminates = 1 + + if p_terminates: + # symbol n terminates! + if not terminates[n]: + terminates[n] = 1 + some_change = 1 + # Don't need to consider any more productions for this n. + break + + if not some_change: + break + + infinite = [] + for (s,term) in terminates.items(): + if not term: + if not s in self.Prodnames and not s in self.Terminals and s != 'error': + # s is used-but-not-defined, and we've already warned of that, + # so it would be overkill to say that it's also non-terminating. + pass + else: + infinite.append(s) + + return infinite + + + # ----------------------------------------------------------------------------- + # undefined_symbols() + # + # Find all symbols that were used the grammar, but not defined as tokens or + # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol + # and prod is the production where the symbol was used. + # ----------------------------------------------------------------------------- + def undefined_symbols(self): + result = [] + for p in self.Productions: + if not p: continue + + for s in p.prod: + if not s in self.Prodnames and not s in self.Terminals and s != 'error': + result.append((s,p)) + return result + + # ----------------------------------------------------------------------------- + # unused_terminals() + # + # Find all terminals that were defined, but not used by the grammar. Returns + # a list of all symbols. + # ----------------------------------------------------------------------------- + def unused_terminals(self): + unused_tok = [] + for s,v in self.Terminals.items(): + if s != 'error' and not v: + unused_tok.append(s) + + return unused_tok + + # ------------------------------------------------------------------------------ + # unused_rules() + # + # Find all grammar rules that were defined, but not used (maybe not reachable) + # Returns a list of productions. + # ------------------------------------------------------------------------------ + + def unused_rules(self): + unused_prod = [] + for s,v in self.Nonterminals.items(): + if not v: + p = self.Prodnames[s][0] + unused_prod.append(p) + return unused_prod + + # ----------------------------------------------------------------------------- + # unused_precedence() + # + # Returns a list of tuples (term,precedence) corresponding to precedence + # rules that were never used by the grammar. term is the name of the terminal + # on which precedence was applied and precedence is a string such as 'left' or + # 'right' corresponding to the type of precedence. + # ----------------------------------------------------------------------------- + + def unused_precedence(self): + unused = [] + for termname in self.Precedence: + if not (termname in self.Terminals or termname in self.UsedPrecedence): + unused.append((termname,self.Precedence[termname][0])) + + return unused + + # ------------------------------------------------------------------------- + # _first() + # + # Compute the value of FIRST1(beta) where beta is a tuple of symbols. + # + # During execution of compute_first1, the result may be incomplete. + # Afterward (e.g., when called from compute_follow()), it will be complete. + # ------------------------------------------------------------------------- + def _first(self,beta): + + # We are computing First(x1,x2,x3,...,xn) + result = [ ] + for x in beta: + x_produces_empty = 0 + + # Add all the non-<empty> symbols of First[x] to the result. + for f in self.First[x]: + if f == '<empty>': + x_produces_empty = 1 + else: + if f not in result: result.append(f) + + if x_produces_empty: + # We have to consider the next x in beta, + # i.e. stay in the loop. + pass + else: + # We don't have to consider any further symbols in beta. + break + else: + # There was no 'break' from the loop, + # so x_produces_empty was true for all x in beta, + # so beta produces empty as well. + result.append('<empty>') + + return result + + # ------------------------------------------------------------------------- + # compute_first() + # + # Compute the value of FIRST1(X) for all symbols + # ------------------------------------------------------------------------- + def compute_first(self): + if self.First: + return self.First + + # Terminals: + for t in self.Terminals: + self.First[t] = [t] + + self.First['$end'] = ['$end'] + + # Nonterminals: + + # Initialize to the empty set: + for n in self.Nonterminals: + self.First[n] = [] + + # Then propagate symbols until no change: + while 1: + some_change = 0 + for n in self.Nonterminals: + for p in self.Prodnames[n]: + for f in self._first(p.prod): + if f not in self.First[n]: + self.First[n].append( f ) + some_change = 1 + if not some_change: + break + + return self.First + + # --------------------------------------------------------------------- + # compute_follow() + # + # Computes all of the follow sets for every non-terminal symbol. The + # follow set is the set of all symbols that might follow a given + # non-terminal. See the Dragon book, 2nd Ed. p. 189. + # --------------------------------------------------------------------- + def compute_follow(self,start=None): + # If already computed, return the result + if self.Follow: + return self.Follow + + # If first sets not computed yet, do that first. + if not self.First: + self.compute_first() + + # Add '$end' to the follow list of the start symbol + for k in self.Nonterminals: + self.Follow[k] = [ ] + + if not start: + start = self.Productions[1].name + + self.Follow[start] = [ '$end' ] + + while 1: + didadd = 0 + for p in self.Productions[1:]: + # Here is the production set + for i in range(len(p.prod)): + B = p.prod[i] + if B in self.Nonterminals: + # Okay. We got a non-terminal in a production + fst = self._first(p.prod[i+1:]) + hasempty = 0 + for f in fst: + if f != '<empty>' and f not in self.Follow[B]: + self.Follow[B].append(f) + didadd = 1 + if f == '<empty>': + hasempty = 1 + if hasempty or i == (len(p.prod)-1): + # Add elements of follow(a) to follow(b) + for f in self.Follow[p.name]: + if f not in self.Follow[B]: + self.Follow[B].append(f) + didadd = 1 + if not didadd: break + return self.Follow + + + # ----------------------------------------------------------------------------- + # build_lritems() + # + # This function walks the list of productions and builds a complete set of the + # LR items. The LR items are stored in two ways: First, they are uniquely + # numbered and placed in the list _lritems. Second, a linked list of LR items + # is built for each production. For example: + # + # E -> E PLUS E + # + # Creates the list + # + # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] + # ----------------------------------------------------------------------------- + + def build_lritems(self): + for p in self.Productions: + lastlri = p + i = 0 + lr_items = [] + while 1: + if i > len(p): + lri = None + else: + lri = LRItem(p,i) + # Precompute the list of productions immediately following + try: + lri.lr_after = self.Prodnames[lri.prod[i+1]] + except (IndexError,KeyError): + lri.lr_after = [] + try: + lri.lr_before = lri.prod[i-1] + except IndexError: + lri.lr_before = None + + lastlri.lr_next = lri + if not lri: break + lr_items.append(lri) + lastlri = lri + i += 1 + p.lr_items = lr_items + +# ----------------------------------------------------------------------------- +# == Class LRTable == +# +# This basic class represents a basic table of LR parsing information. +# Methods for generating the tables are not defined here. They are defined +# in the derived class LRGeneratedTable. +# ----------------------------------------------------------------------------- + +class VersionError(YaccError): pass + +class LRTable(object): + def __init__(self): + self.lr_action = None + self.lr_goto = None + self.lr_productions = None + self.lr_method = None + + def read_table(self,module): + if isinstance(module,types.ModuleType): + parsetab = module + else: + if sys.version_info[0] < 3: + exec("import %s as parsetab" % module) + else: + env = { } + exec("import %s as parsetab" % module, env, env) + parsetab = env['parsetab'] + + if parsetab._tabversion != __tabversion__: + raise VersionError("yacc table file version is out of date") + + self.lr_action = parsetab._lr_action + self.lr_goto = parsetab._lr_goto + + self.lr_productions = [] + for p in parsetab._lr_productions: + self.lr_productions.append(MiniProduction(*p)) + + self.lr_method = parsetab._lr_method + return parsetab._lr_signature + + def read_pickle(self,filename): + try: + import cPickle as pickle + except ImportError: + import pickle + + in_f = open(filename,"rb") + + tabversion = pickle.load(in_f) + if tabversion != __tabversion__: + raise VersionError("yacc table file version is out of date") + self.lr_method = pickle.load(in_f) + signature = pickle.load(in_f) + self.lr_action = pickle.load(in_f) + self.lr_goto = pickle.load(in_f) + productions = pickle.load(in_f) + + self.lr_productions = [] + for p in productions: + self.lr_productions.append(MiniProduction(*p)) + + in_f.close() + return signature + + # Bind all production function names to callable objects in pdict + def bind_callables(self,pdict): + for p in self.lr_productions: + p.bind(pdict) + +# ----------------------------------------------------------------------------- +# === LR Generator === +# +# The following classes and functions are used to generate LR parsing tables on +# a grammar. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# digraph() +# traverse() +# +# The following two functions are used to compute set valued functions +# of the form: +# +# F(x) = F'(x) U U{F(y) | x R y} +# +# This is used to compute the values of Read() sets as well as FOLLOW sets +# in LALR(1) generation. +# +# Inputs: X - An input set +# R - A relation +# FP - Set-valued function +# ------------------------------------------------------------------------------ + +def digraph(X,R,FP): + N = { } + for x in X: + N[x] = 0 + stack = [] + F = { } + for x in X: + if N[x] == 0: traverse(x,N,stack,F,X,R,FP) + return F + +def traverse(x,N,stack,F,X,R,FP): + stack.append(x) + d = len(stack) + N[x] = d + F[x] = FP(x) # F(X) <- F'(x) + + rel = R(x) # Get y's related to x + for y in rel: + if N[y] == 0: + traverse(y,N,stack,F,X,R,FP) + N[x] = min(N[x],N[y]) + for a in F.get(y,[]): + if a not in F[x]: F[x].append(a) + if N[x] == d: + N[stack[-1]] = MAXINT + F[stack[-1]] = F[x] + element = stack.pop() + while element != x: + N[stack[-1]] = MAXINT + F[stack[-1]] = F[x] + element = stack.pop() + +class LALRError(YaccError): pass + +# ----------------------------------------------------------------------------- +# == LRGeneratedTable == +# +# This class implements the LR table generation algorithm. There are no +# public methods except for write() +# ----------------------------------------------------------------------------- + +class LRGeneratedTable(LRTable): + def __init__(self,grammar,method='LALR',log=None): + if method not in ['SLR','LALR']: + raise LALRError("Unsupported method %s" % method) + + self.grammar = grammar + self.lr_method = method + + # Set up the logger + if not log: + log = NullLogger() + self.log = log + + # Internal attributes + self.lr_action = {} # Action table + self.lr_goto = {} # Goto table + self.lr_productions = grammar.Productions # Copy of grammar Production array + self.lr_goto_cache = {} # Cache of computed gotos + self.lr0_cidhash = {} # Cache of closures + + self._add_count = 0 # Internal counter used to detect cycles + + # Diagonistic information filled in by the table generator + self.sr_conflict = 0 + self.rr_conflict = 0 + self.conflicts = [] # List of conflicts + + self.sr_conflicts = [] + self.rr_conflicts = [] + + # Build the tables + self.grammar.build_lritems() + self.grammar.compute_first() + self.grammar.compute_follow() + self.lr_parse_table() + + # Compute the LR(0) closure operation on I, where I is a set of LR(0) items. + + def lr0_closure(self,I): + self._add_count += 1 + + # Add everything in I to J + J = I[:] + didadd = 1 + while didadd: + didadd = 0 + for j in J: + for x in j.lr_after: + if getattr(x,"lr0_added",0) == self._add_count: continue + # Add B --> .G to J + J.append(x.lr_next) + x.lr0_added = self._add_count + didadd = 1 + + return J + + # Compute the LR(0) goto function goto(I,X) where I is a set + # of LR(0) items and X is a grammar symbol. This function is written + # in a way that guarantees uniqueness of the generated goto sets + # (i.e. the same goto set will never be returned as two different Python + # objects). With uniqueness, we can later do fast set comparisons using + # id(obj) instead of element-wise comparison. + + def lr0_goto(self,I,x): + # First we look for a previously cached entry + g = self.lr_goto_cache.get((id(I),x),None) + if g: return g + + # Now we generate the goto set in a way that guarantees uniqueness + # of the result + + s = self.lr_goto_cache.get(x,None) + if not s: + s = { } + self.lr_goto_cache[x] = s + + gs = [ ] + for p in I: + n = p.lr_next + if n and n.lr_before == x: + s1 = s.get(id(n),None) + if not s1: + s1 = { } + s[id(n)] = s1 + gs.append(n) + s = s1 + g = s.get('$end',None) + if not g: + if gs: + g = self.lr0_closure(gs) + s['$end'] = g + else: + s['$end'] = gs + self.lr_goto_cache[(id(I),x)] = g + return g + + # Compute the LR(0) sets of item function + def lr0_items(self): + + C = [ self.lr0_closure([self.grammar.Productions[0].lr_next]) ] + i = 0 + for I in C: + self.lr0_cidhash[id(I)] = i + i += 1 + + # Loop over the items in C and each grammar symbols + i = 0 + while i < len(C): + I = C[i] + i += 1 + + # Collect all of the symbols that could possibly be in the goto(I,X) sets + asyms = { } + for ii in I: + for s in ii.usyms: + asyms[s] = None + + for x in asyms: + g = self.lr0_goto(I,x) + if not g: continue + if id(g) in self.lr0_cidhash: continue + self.lr0_cidhash[id(g)] = len(C) + C.append(g) + + return C + + # ----------------------------------------------------------------------------- + # ==== LALR(1) Parsing ==== + # + # LALR(1) parsing is almost exactly the same as SLR except that instead of + # relying upon Follow() sets when performing reductions, a more selective + # lookahead set that incorporates the state of the LR(0) machine is utilized. + # Thus, we mainly just have to focus on calculating the lookahead sets. + # + # The method used here is due to DeRemer and Pennelo (1982). + # + # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1) + # Lookahead Sets", ACM Transactions on Programming Languages and Systems, + # Vol. 4, No. 4, Oct. 1982, pp. 615-649 + # + # Further details can also be found in: + # + # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing", + # McGraw-Hill Book Company, (1985). + # + # ----------------------------------------------------------------------------- + + # ----------------------------------------------------------------------------- + # compute_nullable_nonterminals() + # + # Creates a dictionary containing all of the non-terminals that might produce + # an empty production. + # ----------------------------------------------------------------------------- + + def compute_nullable_nonterminals(self): + nullable = {} + num_nullable = 0 + while 1: + for p in self.grammar.Productions[1:]: + if p.len == 0: + nullable[p.name] = 1 + continue + for t in p.prod: + if not t in nullable: break + else: + nullable[p.name] = 1 + if len(nullable) == num_nullable: break + num_nullable = len(nullable) + return nullable + + # ----------------------------------------------------------------------------- + # find_nonterminal_trans(C) + # + # Given a set of LR(0) items, this functions finds all of the non-terminal + # transitions. These are transitions in which a dot appears immediately before + # a non-terminal. Returns a list of tuples of the form (state,N) where state + # is the state number and N is the nonterminal symbol. + # + # The input C is the set of LR(0) items. + # ----------------------------------------------------------------------------- + + def find_nonterminal_transitions(self,C): + trans = [] + for state in range(len(C)): + for p in C[state]: + if p.lr_index < p.len - 1: + t = (state,p.prod[p.lr_index+1]) + if t[1] in self.grammar.Nonterminals: + if t not in trans: trans.append(t) + state = state + 1 + return trans + + # ----------------------------------------------------------------------------- + # dr_relation() + # + # Computes the DR(p,A) relationships for non-terminal transitions. The input + # is a tuple (state,N) where state is a number and N is a nonterminal symbol. + # + # Returns a list of terminals. + # ----------------------------------------------------------------------------- + + def dr_relation(self,C,trans,nullable): + dr_set = { } + state,N = trans + terms = [] + + g = self.lr0_goto(C[state],N) + for p in g: + if p.lr_index < p.len - 1: + a = p.prod[p.lr_index+1] + if a in self.grammar.Terminals: + if a not in terms: terms.append(a) + + # This extra bit is to handle the start state + if state == 0 and N == self.grammar.Productions[0].prod[0]: + terms.append('$end') + + return terms + + # ----------------------------------------------------------------------------- + # reads_relation() + # + # Computes the READS() relation (p,A) READS (t,C). + # ----------------------------------------------------------------------------- + + def reads_relation(self,C, trans, empty): + # Look for empty transitions + rel = [] + state, N = trans + + g = self.lr0_goto(C[state],N) + j = self.lr0_cidhash.get(id(g),-1) + for p in g: + if p.lr_index < p.len - 1: + a = p.prod[p.lr_index + 1] + if a in empty: + rel.append((j,a)) + + return rel + + # ----------------------------------------------------------------------------- + # compute_lookback_includes() + # + # Determines the lookback and includes relations + # + # LOOKBACK: + # + # This relation is determined by running the LR(0) state machine forward. + # For example, starting with a production "N : . A B C", we run it forward + # to obtain "N : A B C ." We then build a relationship between this final + # state and the starting state. These relationships are stored in a dictionary + # lookdict. + # + # INCLUDES: + # + # Computes the INCLUDE() relation (p,A) INCLUDES (p',B). + # + # This relation is used to determine non-terminal transitions that occur + # inside of other non-terminal transition states. (p,A) INCLUDES (p', B) + # if the following holds: + # + # B -> LAT, where T -> epsilon and p' -L-> p + # + # L is essentially a prefix (which may be empty), T is a suffix that must be + # able to derive an empty string. State p' must lead to state p with the string L. + # + # ----------------------------------------------------------------------------- + + def compute_lookback_includes(self,C,trans,nullable): + + lookdict = {} # Dictionary of lookback relations + includedict = {} # Dictionary of include relations + + # Make a dictionary of non-terminal transitions + dtrans = {} + for t in trans: + dtrans[t] = 1 + + # Loop over all transitions and compute lookbacks and includes + for state,N in trans: + lookb = [] + includes = [] + for p in C[state]: + if p.name != N: continue + + # Okay, we have a name match. We now follow the production all the way + # through the state machine until we get the . on the right hand side + + lr_index = p.lr_index + j = state + while lr_index < p.len - 1: + lr_index = lr_index + 1 + t = p.prod[lr_index] + + # Check to see if this symbol and state are a non-terminal transition + if (j,t) in dtrans: + # Yes. Okay, there is some chance that this is an includes relation + # the only way to know for certain is whether the rest of the + # production derives empty + + li = lr_index + 1 + while li < p.len: + if p.prod[li] in self.grammar.Terminals: break # No forget it + if not p.prod[li] in nullable: break + li = li + 1 + else: + # Appears to be a relation between (j,t) and (state,N) + includes.append((j,t)) + + g = self.lr0_goto(C[j],t) # Go to next set + j = self.lr0_cidhash.get(id(g),-1) # Go to next state + + # When we get here, j is the final state, now we have to locate the production + for r in C[j]: + if r.name != p.name: continue + if r.len != p.len: continue + i = 0 + # This look is comparing a production ". A B C" with "A B C ." + while i < r.lr_index: + if r.prod[i] != p.prod[i+1]: break + i = i + 1 + else: + lookb.append((j,r)) + for i in includes: + if not i in includedict: includedict[i] = [] + includedict[i].append((state,N)) + lookdict[(state,N)] = lookb + + return lookdict,includedict + + # ----------------------------------------------------------------------------- + # compute_read_sets() + # + # Given a set of LR(0) items, this function computes the read sets. + # + # Inputs: C = Set of LR(0) items + # ntrans = Set of nonterminal transitions + # nullable = Set of empty transitions + # + # Returns a set containing the read sets + # ----------------------------------------------------------------------------- + + def compute_read_sets(self,C, ntrans, nullable): + FP = lambda x: self.dr_relation(C,x,nullable) + R = lambda x: self.reads_relation(C,x,nullable) + F = digraph(ntrans,R,FP) + return F + + # ----------------------------------------------------------------------------- + # compute_follow_sets() + # + # Given a set of LR(0) items, a set of non-terminal transitions, a readset, + # and an include set, this function computes the follow sets + # + # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)} + # + # Inputs: + # ntrans = Set of nonterminal transitions + # readsets = Readset (previously computed) + # inclsets = Include sets (previously computed) + # + # Returns a set containing the follow sets + # ----------------------------------------------------------------------------- + + def compute_follow_sets(self,ntrans,readsets,inclsets): + FP = lambda x: readsets[x] + R = lambda x: inclsets.get(x,[]) + F = digraph(ntrans,R,FP) + return F + + # ----------------------------------------------------------------------------- + # add_lookaheads() + # + # Attaches the lookahead symbols to grammar rules. + # + # Inputs: lookbacks - Set of lookback relations + # followset - Computed follow set + # + # This function directly attaches the lookaheads to productions contained + # in the lookbacks set + # ----------------------------------------------------------------------------- + + def add_lookaheads(self,lookbacks,followset): + for trans,lb in lookbacks.items(): + # Loop over productions in lookback + for state,p in lb: + if not state in p.lookaheads: + p.lookaheads[state] = [] + f = followset.get(trans,[]) + for a in f: + if a not in p.lookaheads[state]: p.lookaheads[state].append(a) + + # ----------------------------------------------------------------------------- + # add_lalr_lookaheads() + # + # This function does all of the work of adding lookahead information for use + # with LALR parsing + # ----------------------------------------------------------------------------- + + def add_lalr_lookaheads(self,C): + # Determine all of the nullable nonterminals + nullable = self.compute_nullable_nonterminals() + + # Find all non-terminal transitions + trans = self.find_nonterminal_transitions(C) + + # Compute read sets + readsets = self.compute_read_sets(C,trans,nullable) + + # Compute lookback/includes relations + lookd, included = self.compute_lookback_includes(C,trans,nullable) + + # Compute LALR FOLLOW sets + followsets = self.compute_follow_sets(trans,readsets,included) + + # Add all of the lookaheads + self.add_lookaheads(lookd,followsets) + + # ----------------------------------------------------------------------------- + # lr_parse_table() + # + # This function constructs the parse tables for SLR or LALR + # ----------------------------------------------------------------------------- + def lr_parse_table(self): + Productions = self.grammar.Productions + Precedence = self.grammar.Precedence + goto = self.lr_goto # Goto array + action = self.lr_action # Action array + log = self.log # Logger for output + + actionp = { } # Action production array (temporary) + + log.info("Parsing method: %s", self.lr_method) + + # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items + # This determines the number of states + + C = self.lr0_items() + + if self.lr_method == 'LALR': + self.add_lalr_lookaheads(C) + + # Build the parser table, state by state + st = 0 + for I in C: + # Loop over each production in I + actlist = [ ] # List of actions + st_action = { } + st_actionp = { } + st_goto = { } + log.info("") + log.info("state %d", st) + log.info("") + for p in I: + log.info(" (%d) %s", p.number, str(p)) + log.info("") + + for p in I: + if p.len == p.lr_index + 1: + if p.name == "S'": + # Start symbol. Accept! + st_action["$end"] = 0 + st_actionp["$end"] = p + else: + # We are at the end of a production. Reduce! + if self.lr_method == 'LALR': + laheads = p.lookaheads[st] + else: + laheads = self.grammar.Follow[p.name] + for a in laheads: + actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p))) + r = st_action.get(a,None) + if r is not None: + # Whoa. Have a shift/reduce or reduce/reduce conflict + if r > 0: + # Need to decide on shift or reduce here + # By default we favor shifting. Need to add + # some precedence rules here. + sprec,slevel = Productions[st_actionp[a].number].prec + rprec,rlevel = Precedence.get(a,('right',0)) + if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): + # We really need to reduce here. + st_action[a] = -p.number + st_actionp[a] = p + if not slevel and not rlevel: + log.info(" ! shift/reduce conflict for %s resolved as reduce",a) + self.sr_conflicts.append((st,a,'reduce')) + Productions[p.number].reduced += 1 + elif (slevel == rlevel) and (rprec == 'nonassoc'): + st_action[a] = None + else: + # Hmmm. Guess we'll keep the shift + if not rlevel: + log.info(" ! shift/reduce conflict for %s resolved as shift",a) + self.sr_conflicts.append((st,a,'shift')) + elif r < 0: + # Reduce/reduce conflict. In this case, we favor the rule + # that was defined first in the grammar file + oldp = Productions[-r] + pp = Productions[p.number] + if oldp.line > pp.line: + st_action[a] = -p.number + st_actionp[a] = p + chosenp,rejectp = pp,oldp + Productions[p.number].reduced += 1 + Productions[oldp.number].reduced -= 1 + else: + chosenp,rejectp = oldp,pp + self.rr_conflicts.append((st,chosenp,rejectp)) + log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a,st_actionp[a].number, st_actionp[a]) + else: + raise LALRError("Unknown conflict in state %d" % st) + else: + st_action[a] = -p.number + st_actionp[a] = p + Productions[p.number].reduced += 1 + else: + i = p.lr_index + a = p.prod[i+1] # Get symbol right after the "." + if a in self.grammar.Terminals: + g = self.lr0_goto(I,a) + j = self.lr0_cidhash.get(id(g),-1) + if j >= 0: + # We are in a shift state + actlist.append((a,p,"shift and go to state %d" % j)) + r = st_action.get(a,None) + if r is not None: + # Whoa have a shift/reduce or shift/shift conflict + if r > 0: + if r != j: + raise LALRError("Shift/shift conflict in state %d" % st) + elif r < 0: + # Do a precedence check. + # - if precedence of reduce rule is higher, we reduce. + # - if precedence of reduce is same and left assoc, we reduce. + # - otherwise we shift + rprec,rlevel = Productions[st_actionp[a].number].prec + sprec,slevel = Precedence.get(a,('right',0)) + if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): + # We decide to shift here... highest precedence to shift + Productions[st_actionp[a].number].reduced -= 1 + st_action[a] = j + st_actionp[a] = p + if not rlevel: + log.info(" ! shift/reduce conflict for %s resolved as shift",a) + self.sr_conflicts.append((st,a,'shift')) + elif (slevel == rlevel) and (rprec == 'nonassoc'): + st_action[a] = None + else: + # Hmmm. Guess we'll keep the reduce + if not slevel and not rlevel: + log.info(" ! shift/reduce conflict for %s resolved as reduce",a) + self.sr_conflicts.append((st,a,'reduce')) + + else: + raise LALRError("Unknown conflict in state %d" % st) + else: + st_action[a] = j + st_actionp[a] = p + + # Print the actions associated with each terminal + _actprint = { } + for a,p,m in actlist: + if a in st_action: + if p is st_actionp[a]: + log.info(" %-15s %s",a,m) + _actprint[(a,m)] = 1 + log.info("") + # Print the actions that were not used. (debugging) + not_used = 0 + for a,p,m in actlist: + if a in st_action: + if p is not st_actionp[a]: + if not (a,m) in _actprint: + log.debug(" ! %-15s [ %s ]",a,m) + not_used = 1 + _actprint[(a,m)] = 1 + if not_used: + log.debug("") + + # Construct the goto table for this state + + nkeys = { } + for ii in I: + for s in ii.usyms: + if s in self.grammar.Nonterminals: + nkeys[s] = None + for n in nkeys: + g = self.lr0_goto(I,n) + j = self.lr0_cidhash.get(id(g),-1) + if j >= 0: + st_goto[n] = j + log.info(" %-30s shift and go to state %d",n,j) + + action[st] = st_action + actionp[st] = st_actionp + goto[st] = st_goto + st += 1 + + + # ----------------------------------------------------------------------------- + # write() + # + # This function writes the LR parsing tables to a file + # ----------------------------------------------------------------------------- + + def write_table(self,modulename,outputdir='',signature=""): + basemodulename = modulename.split(".")[-1] + filename = os.path.join(outputdir,basemodulename) + ".py" + try: + f = open(filename,"w") + + f.write(""" +# %s +# This file is automatically generated. Do not edit. +_tabversion = %r + +_lr_method = %r + +_lr_signature = %r + """ % (filename, __tabversion__, self.lr_method, signature)) + + # Change smaller to 0 to go back to original tables + smaller = 1 + + # Factor out names to try and make smaller + if smaller: + items = { } + + for s,nd in self.lr_action.items(): + for name,v in nd.items(): + i = items.get(name) + if not i: + i = ([],[]) + items[name] = i + i[0].append(s) + i[1].append(v) + + f.write("\n_lr_action_items = {") + for k,v in items.items(): + f.write("%r:([" % k) + for i in v[0]: + f.write("%r," % i) + f.write("],[") + for i in v[1]: + f.write("%r," % i) + + f.write("]),") + f.write("}\n") + + f.write(""" +_lr_action = { } +for _k, _v in _lr_action_items.items(): + for _x,_y in zip(_v[0],_v[1]): + if not _x in _lr_action: _lr_action[_x] = { } + _lr_action[_x][_k] = _y +del _lr_action_items +""") + + else: + f.write("\n_lr_action = { "); + for k,v in self.lr_action.items(): + f.write("(%r,%r):%r," % (k[0],k[1],v)) + f.write("}\n"); + + if smaller: + # Factor out names to try and make smaller + items = { } + + for s,nd in self.lr_goto.items(): + for name,v in nd.items(): + i = items.get(name) + if not i: + i = ([],[]) + items[name] = i + i[0].append(s) + i[1].append(v) + + f.write("\n_lr_goto_items = {") + for k,v in items.items(): + f.write("%r:([" % k) + for i in v[0]: + f.write("%r," % i) + f.write("],[") + for i in v[1]: + f.write("%r," % i) + + f.write("]),") + f.write("}\n") + + f.write(""" +_lr_goto = { } +for _k, _v in _lr_goto_items.items(): + for _x,_y in zip(_v[0],_v[1]): + if not _x in _lr_goto: _lr_goto[_x] = { } + _lr_goto[_x][_k] = _y +del _lr_goto_items +""") + else: + f.write("\n_lr_goto = { "); + for k,v in self.lr_goto.items(): + f.write("(%r,%r):%r," % (k[0],k[1],v)) + f.write("}\n"); + + # Write production table + f.write("_lr_productions = [\n") + for p in self.lr_productions: + if p.func: + f.write(" (%r,%r,%d,%r,%r,%d),\n" % (p.str,p.name, p.len, p.func,p.file,p.line)) + else: + f.write(" (%r,%r,%d,None,None,None),\n" % (str(p),p.name, p.len)) + f.write("]\n") + f.close() + + except IOError: + e = sys.exc_info()[1] + sys.stderr.write("Unable to create '%s'\n" % filename) + sys.stderr.write(str(e)+"\n") + return + + + # ----------------------------------------------------------------------------- + # pickle_table() + # + # This function pickles the LR parsing tables to a supplied file object + # ----------------------------------------------------------------------------- + + def pickle_table(self,filename,signature=""): + try: + import cPickle as pickle + except ImportError: + import pickle + outf = open(filename,"wb") + pickle.dump(__tabversion__,outf,pickle_protocol) + pickle.dump(self.lr_method,outf,pickle_protocol) + pickle.dump(signature,outf,pickle_protocol) + pickle.dump(self.lr_action,outf,pickle_protocol) + pickle.dump(self.lr_goto,outf,pickle_protocol) + + outp = [] + for p in self.lr_productions: + if p.func: + outp.append((p.str,p.name, p.len, p.func,p.file,p.line)) + else: + outp.append((str(p),p.name,p.len,None,None,None)) + pickle.dump(outp,outf,pickle_protocol) + outf.close() + +# ----------------------------------------------------------------------------- +# === INTROSPECTION === +# +# The following functions and classes are used to implement the PLY +# introspection features followed by the yacc() function itself. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# get_caller_module_dict() +# +# This function returns a dictionary containing all of the symbols defined within +# a caller further down the call stack. This is used to get the environment +# associated with the yacc() call if none was provided. +# ----------------------------------------------------------------------------- + +def get_caller_module_dict(levels): + try: + raise RuntimeError + except RuntimeError: + e,b,t = sys.exc_info() + f = t.tb_frame + while levels > 0: + f = f.f_back + levels -= 1 + ldict = f.f_globals.copy() + if f.f_globals != f.f_locals: + ldict.update(f.f_locals) + + return ldict + +# ----------------------------------------------------------------------------- +# parse_grammar() +# +# This takes a raw grammar rule string and parses it into production data +# ----------------------------------------------------------------------------- +def parse_grammar(doc,file,line): + grammar = [] + # Split the doc string into lines + pstrings = doc.splitlines() + lastp = None + dline = line + for ps in pstrings: + dline += 1 + p = ps.split() + if not p: continue + try: + if p[0] == '|': + # This is a continuation of a previous rule + if not lastp: + raise SyntaxError("%s:%d: Misplaced '|'" % (file,dline)) + prodname = lastp + syms = p[1:] + else: + prodname = p[0] + lastp = prodname + syms = p[2:] + assign = p[1] + if assign != ':' and assign != '::=': + raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file,dline)) + + grammar.append((file,dline,prodname,syms)) + except SyntaxError: + raise + except Exception: + raise SyntaxError("%s:%d: Syntax error in rule '%s'" % (file,dline,ps.strip())) + + return grammar + +# ----------------------------------------------------------------------------- +# ParserReflect() +# +# This class represents information extracted for building a parser including +# start symbol, error function, tokens, precedence list, action functions, +# etc. +# ----------------------------------------------------------------------------- +class ParserReflect(object): + def __init__(self,pdict,log=None): + self.pdict = pdict + self.start = None + self.error_func = None + self.tokens = None + self.files = {} + self.grammar = [] + self.error = 0 + + if log is None: + self.log = PlyLogger(sys.stderr) + else: + self.log = log + + # Get all of the basic information + def get_all(self): + self.get_start() + self.get_error_func() + self.get_tokens() + self.get_precedence() + self.get_pfunctions() + + # Validate all of the information + def validate_all(self): + self.validate_start() + self.validate_error_func() + self.validate_tokens() + self.validate_precedence() + self.validate_pfunctions() + self.validate_files() + return self.error + + # Compute a signature over the grammar + def signature(self): + try: + from hashlib import md5 + except ImportError: + from md5 import md5 + try: + sig = md5() + if self.start: + sig.update(self.start.encode('latin-1')) + if self.prec: + sig.update("".join(["".join(p) for p in self.prec]).encode('latin-1')) + if self.tokens: + sig.update(" ".join(self.tokens).encode('latin-1')) + for f in self.pfuncs: + if f[3]: + sig.update(f[3].encode('latin-1')) + except (TypeError,ValueError): + pass + return sig.digest() + + # ----------------------------------------------------------------------------- + # validate_file() + # + # This method checks to see if there are duplicated p_rulename() functions + # in the parser module file. Without this function, it is really easy for + # users to make mistakes by cutting and pasting code fragments (and it's a real + # bugger to try and figure out why the resulting parser doesn't work). Therefore, + # we just do a little regular expression pattern matching of def statements + # to try and detect duplicates. + # ----------------------------------------------------------------------------- + + def validate_files(self): + # Match def p_funcname( + fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') + + for filename in self.files.keys(): + base,ext = os.path.splitext(filename) + if ext != '.py': return 1 # No idea. Assume it's okay. + + try: + f = open(filename) + lines = f.readlines() + f.close() + except IOError: + continue + + counthash = { } + for linen,l in enumerate(lines): + linen += 1 + m = fre.match(l) + if m: + name = m.group(1) + prev = counthash.get(name) + if not prev: + counthash[name] = linen + else: + self.log.warning("%s:%d: Function %s redefined. Previously defined on line %d", filename,linen,name,prev) + + # Get the start symbol + def get_start(self): + self.start = self.pdict.get('start') + + # Validate the start symbol + def validate_start(self): + if self.start is not None: + if not isinstance(self.start,str): + self.log.error("'start' must be a string") + + # Look for error handler + def get_error_func(self): + self.error_func = self.pdict.get('p_error') + + # Validate the error function + def validate_error_func(self): + if self.error_func: + if isinstance(self.error_func,types.FunctionType): + ismethod = 0 + elif isinstance(self.error_func, types.MethodType): + ismethod = 1 + else: + self.log.error("'p_error' defined, but is not a function or method") + self.error = 1 + return + + eline = func_code(self.error_func).co_firstlineno + efile = func_code(self.error_func).co_filename + self.files[efile] = 1 + + if (func_code(self.error_func).co_argcount != 1+ismethod): + self.log.error("%s:%d: p_error() requires 1 argument",efile,eline) + self.error = 1 + + # Get the tokens map + def get_tokens(self): + tokens = self.pdict.get("tokens",None) + if not tokens: + self.log.error("No token list is defined") + self.error = 1 + return + + if not isinstance(tokens,(list, tuple)): + self.log.error("tokens must be a list or tuple") + self.error = 1 + return + + if not tokens: + self.log.error("tokens is empty") + self.error = 1 + return + + self.tokens = tokens + + # Validate the tokens + def validate_tokens(self): + # Validate the tokens. + if 'error' in self.tokens: + self.log.error("Illegal token name 'error'. Is a reserved word") + self.error = 1 + return + + terminals = {} + for n in self.tokens: + if n in terminals: + self.log.warning("Token '%s' multiply defined", n) + terminals[n] = 1 + + # Get the precedence map (if any) + def get_precedence(self): + self.prec = self.pdict.get("precedence",None) + + # Validate and parse the precedence map + def validate_precedence(self): + preclist = [] + if self.prec: + if not isinstance(self.prec,(list,tuple)): + self.log.error("precedence must be a list or tuple") + self.error = 1 + return + for level,p in enumerate(self.prec): + if not isinstance(p,(list,tuple)): + self.log.error("Bad precedence table") + self.error = 1 + return + + if len(p) < 2: + self.log.error("Malformed precedence entry %s. Must be (assoc, term, ..., term)",p) + self.error = 1 + return + assoc = p[0] + if not isinstance(assoc,str): + self.log.error("precedence associativity must be a string") + self.error = 1 + return + for term in p[1:]: + if not isinstance(term,str): + self.log.error("precedence items must be strings") + self.error = 1 + return + preclist.append((term,assoc,level+1)) + self.preclist = preclist + + # Get all p_functions from the grammar + def get_pfunctions(self): + p_functions = [] + for name, item in self.pdict.items(): + if name[:2] != 'p_': continue + if name == 'p_error': continue + if isinstance(item,(types.FunctionType,types.MethodType)): + line = func_code(item).co_firstlineno + file = func_code(item).co_filename + p_functions.append((line,file,name,item.__doc__)) + + # Sort all of the actions by line number + p_functions.sort() + self.pfuncs = p_functions + + + # Validate all of the p_functions + def validate_pfunctions(self): + grammar = [] + # Check for non-empty symbols + if len(self.pfuncs) == 0: + self.log.error("no rules of the form p_rulename are defined") + self.error = 1 + return + + for line, file, name, doc in self.pfuncs: + func = self.pdict[name] + if isinstance(func, types.MethodType): + reqargs = 2 + else: + reqargs = 1 + if func_code(func).co_argcount > reqargs: + self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,func.__name__) + self.error = 1 + elif func_code(func).co_argcount < reqargs: + self.log.error("%s:%d: Rule '%s' requires an argument",file,line,func.__name__) + self.error = 1 + elif not func.__doc__: + self.log.warning("%s:%d: No documentation string specified in function '%s' (ignored)",file,line,func.__name__) + else: + try: + parsed_g = parse_grammar(doc,file,line) + for g in parsed_g: + grammar.append((name, g)) + except SyntaxError: + e = sys.exc_info()[1] + self.log.error(str(e)) + self.error = 1 + + # Looks like a valid grammar rule + # Mark the file in which defined. + self.files[file] = 1 + + # Secondary validation step that looks for p_ definitions that are not functions + # or functions that look like they might be grammar rules. + + for n,v in self.pdict.items(): + if n[0:2] == 'p_' and isinstance(v, (types.FunctionType, types.MethodType)): continue + if n[0:2] == 't_': continue + if n[0:2] == 'p_' and n != 'p_error': + self.log.warning("'%s' not defined as a function", n) + if ((isinstance(v,types.FunctionType) and func_code(v).co_argcount == 1) or + (isinstance(v,types.MethodType) and func_code(v).co_argcount == 2)): + try: + doc = v.__doc__.split(" ") + if doc[1] == ':': + self.log.warning("%s:%d: Possible grammar rule '%s' defined without p_ prefix", + func_code(v).co_filename, func_code(v).co_firstlineno,n) + except Exception: + pass + + self.grammar = grammar + +# ----------------------------------------------------------------------------- +# yacc(module) +# +# Build a parser +# ----------------------------------------------------------------------------- + +def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None, + check_recursion=1, optimize=0, write_tables=1, debugfile=debug_file,outputdir='', + debuglog=None, errorlog = None, picklefile=None): + + global parse # Reference to the parsing method of the last built parser + + # If pickling is enabled, table files are not created + + if picklefile: + write_tables = 0 + + if errorlog is None: + errorlog = PlyLogger(sys.stderr) + + # Get the module dictionary used for the parser + if module: + _items = [(k,getattr(module,k)) for k in dir(module)] + pdict = dict(_items) + else: + pdict = get_caller_module_dict(2) + + # Collect parser information from the dictionary + pinfo = ParserReflect(pdict,log=errorlog) + pinfo.get_all() + + if pinfo.error: + raise YaccError("Unable to build parser") + + # Check signature against table files (if any) + signature = pinfo.signature() + + # Read the tables + try: + lr = LRTable() + if picklefile: + read_signature = lr.read_pickle(picklefile) + else: + read_signature = lr.read_table(tabmodule) + if optimize or (read_signature == signature): + try: + lr.bind_callables(pinfo.pdict) + parser = LRParser(lr,pinfo.error_func) + parse = parser.parse + #print "A" * 40, pinfo.error_func.__doc__ + return parser + except Exception: + e = sys.exc_info()[1] + errorlog.warning("There was a problem loading the table file: %s", repr(e)) + except VersionError: + e = sys.exc_info() + errorlog.warning(str(e)) + except Exception: + pass + + if debuglog is None: + if debug: + debuglog = PlyLogger(open(debugfile,"w")) + else: + debuglog = NullLogger() + + debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __version__) + + + errors = 0 + + # Validate the parser information + if pinfo.validate_all(): + raise YaccError("Unable to build parser") + + if not pinfo.error_func: + errorlog.warning("no p_error() function is defined") + + # Create a grammar object + grammar = Grammar(pinfo.tokens) + + # Set precedence level for terminals + for term, assoc, level in pinfo.preclist: + try: + grammar.set_precedence(term,assoc,level) + except GrammarError: + e = sys.exc_info()[1] + errorlog.warning("%s",str(e)) + + # Add productions to the grammar + for funcname, gram in pinfo.grammar: + file, line, prodname, syms = gram + try: + grammar.add_production(prodname,syms,funcname,file,line) + except GrammarError: + e = sys.exc_info()[1] + errorlog.error("%s",str(e)) + errors = 1 + + # Set the grammar start symbols + try: + if start is None: + grammar.set_start(pinfo.start) + else: + grammar.set_start(start) + except GrammarError: + e = sys.exc_info()[1] + errorlog.error(str(e)) + errors = 1 + + if errors: + raise YaccError("Unable to build parser") + + # Verify the grammar structure + undefined_symbols = grammar.undefined_symbols() + for sym, prod in undefined_symbols: + errorlog.error("%s:%d: Symbol '%s' used, but not defined as a token or a rule",prod.file,prod.line,sym) + errors = 1 + + unused_terminals = grammar.unused_terminals() + if unused_terminals: + debuglog.info("") + debuglog.info("Unused terminals:") + debuglog.info("") + for term in unused_terminals: + errorlog.warning("Token '%s' defined, but not used", term) + debuglog.info(" %s", term) + + # Print out all productions to the debug log + if debug: + debuglog.info("") + debuglog.info("Grammar") + debuglog.info("") + for n,p in enumerate(grammar.Productions): + debuglog.info("Rule %-5d %s", n, p) + + # Find unused non-terminals + unused_rules = grammar.unused_rules() + for prod in unused_rules: + errorlog.warning("%s:%d: Rule '%s' defined, but not used", prod.file, prod.line, prod.name) + + if len(unused_terminals) == 1: + errorlog.warning("There is 1 unused token") + if len(unused_terminals) > 1: + errorlog.warning("There are %d unused tokens", len(unused_terminals)) + + if len(unused_rules) == 1: + errorlog.warning("There is 1 unused rule") + if len(unused_rules) > 1: + errorlog.warning("There are %d unused rules", len(unused_rules)) + + if debug: + debuglog.info("") + debuglog.info("Terminals, with rules where they appear") + debuglog.info("") + terms = list(grammar.Terminals) + terms.sort() + for term in terms: + debuglog.info("%-20s : %s", term, " ".join([str(s) for s in grammar.Terminals[term]])) + + debuglog.info("") + debuglog.info("Nonterminals, with rules where they appear") + debuglog.info("") + nonterms = list(grammar.Nonterminals) + nonterms.sort() + for nonterm in nonterms: + debuglog.info("%-20s : %s", nonterm, " ".join([str(s) for s in grammar.Nonterminals[nonterm]])) + debuglog.info("") + + if check_recursion: + unreachable = grammar.find_unreachable() + for u in unreachable: + errorlog.warning("Symbol '%s' is unreachable",u) + + infinite = grammar.infinite_cycles() + for inf in infinite: + errorlog.error("Infinite recursion detected for symbol '%s'", inf) + errors = 1 + + unused_prec = grammar.unused_precedence() + for term, assoc in unused_prec: + errorlog.error("Precedence rule '%s' defined for unknown symbol '%s'", assoc, term) + errors = 1 + + if errors: + raise YaccError("Unable to build parser") + + # Run the LRGeneratedTable on the grammar + if debug: + errorlog.debug("Generating %s tables", method) + + lr = LRGeneratedTable(grammar,method,debuglog) + + if debug: + num_sr = len(lr.sr_conflicts) + + # Report shift/reduce and reduce/reduce conflicts + if num_sr == 1: + errorlog.warning("1 shift/reduce conflict") + elif num_sr > 1: + errorlog.warning("%d shift/reduce conflicts", num_sr) + + num_rr = len(lr.rr_conflicts) + if num_rr == 1: + errorlog.warning("1 reduce/reduce conflict") + elif num_rr > 1: + errorlog.warning("%d reduce/reduce conflicts", num_rr) + + # Write out conflicts to the output file + if debug and (lr.sr_conflicts or lr.rr_conflicts): + debuglog.warning("") + debuglog.warning("Conflicts:") + debuglog.warning("") + + for state, tok, resolution in lr.sr_conflicts: + debuglog.warning("shift/reduce conflict for %s in state %d resolved as %s", tok, state, resolution) + + already_reported = {} + for state, rule, rejected in lr.rr_conflicts: + if (state,id(rule),id(rejected)) in already_reported: + continue + debuglog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule) + debuglog.warning("rejected rule (%s) in state %d", rejected,state) + errorlog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule) + errorlog.warning("rejected rule (%s) in state %d", rejected, state) + already_reported[state,id(rule),id(rejected)] = 1 + + warned_never = [] + for state, rule, rejected in lr.rr_conflicts: + if not rejected.reduced and (rejected not in warned_never): + debuglog.warning("Rule (%s) is never reduced", rejected) + errorlog.warning("Rule (%s) is never reduced", rejected) + warned_never.append(rejected) + + # Write the table file if requested + if write_tables: + lr.write_table(tabmodule,outputdir,signature) + + # Write a pickled version of the tables + if picklefile: + lr.pickle_table(picklefile,signature) + + # Build the parser + lr.bind_callables(pinfo.pdict) + parser = LRParser(lr,pinfo.error_func) + #print "B" * 40, pinfo.error_func.__doc__ + + parse = parser.parse + return parser diff --git a/pyLogoCompiler/ply/yacc.pyc b/pyLogoCompiler/ply/yacc.pyc Binary files differnew file mode 100644 index 0000000..7c1718c --- /dev/null +++ b/pyLogoCompiler/ply/yacc.pyc diff --git a/pyLogoCompiler/pyLex.py b/pyLogoCompiler/pyLex.py new file mode 100644 index 0000000..9b87593 --- /dev/null +++ b/pyLogoCompiler/pyLex.py @@ -0,0 +1,260 @@ +# -*- coding: utf-8 -*- +#----------------------------BRGOGO---------------------------------------- +#Project site: http://br-gogo.sourceforge.net +# +# Name: pyLogo +# Origiginal from: Marcelo Barbosa +# About: Esta é uma versão da linguagem Logo em python para a placa Gogo board +# +# Modified by: Felipe Augusto Silva +# email: suportegogo@gmail.com +# compiler version: 0.23 +#----------------------------------------------------------------------------- + +from gettext import gettext as _ + +import os +import ply.lex as lex +from ply.lex import TOKEN + +errMsgFunc = None + +# Lista dos nomes dos tokens. +#Palavras reservadas +reserved = { + 'to' : 'TO', + 'end' : 'END', + 'output' : 'OUTPUT', + 'repeat' : 'REPEAT', + 'if' : 'IF', + 'ifelse' : 'IFELSE', + 'beep' : 'BEEP', + 'waituntil' : 'WAITUNTIL', + 'loop' : 'LOOP', + 'forever' : 'FOREVER', + 'wait' : 'WAIT', + 'stop' : 'STOP', + 'reset' : 'RESET', + 'send' : 'SEND', + 'make' : 'MAKE', + 'resetdp' : 'RESETDP', + 'record' : 'RECORD', + 'erase' : 'ERASE', + 'on' : 'ON', + 'onfor' : 'ONFOR', + 'off' : 'OFF', + 'thisway' : 'THISWAY', + 'thatway' : 'THATWAY', + 'rd' : 'RD', + 'brake' : 'BRAKE', + 'setsvh' : 'SETSVH', + 'svr' : 'SVR', + 'svl' : 'SVL', + 'setpower' : 'SETPOWER', + 'ledon' : 'LEDON', + 'ledoff' : 'LEDOFF', + 'i2c_start' : 'I2C_START', + 'i2c_stop' : 'I2C_STOP', + 'i2c_read' : 'I2C_READ', + 'i2c_write' : 'I2C_WRITE', + 'show' : 'SHOW', + 'and' : 'AND', + 'or' : 'OR', + 'xor' : 'XOR', + 'not' : 'NOT', + 'timer' : 'TIMER', + 'serial' : 'SERIAL', + 'newir?' : 'NEWIRQ', + 'random' : 'RANDOM', + 'recall' : 'RECALL', + 'sensor1' : 'SENSOR1', + 'sensor2' : 'SENSOR2', + 'sensor3' : 'SENSOR3', + 'sensor4' : 'SENSOR4', + 'sensor5' : 'SENSOR5', + 'sensor6' : 'SENSOR6', + 'sensor7' : 'SENSOR7', + 'sensor8' : 'SENSOR8', + 'switch1' : 'SWITCH1', + 'switch2' : 'SWITCH2', + 'switch3' : 'SWITCH3', + 'switch4' : 'SWITCH4', + 'switch5' : 'SWITCH5', + 'switch6' : 'SWITCH6', + 'switch7' : 'SWITCH7', + 'switch8' : 'SWITCH8', + 'highbyte' : 'HIGHBYTE', + 'lowbyte' : 'LOWBYTE', + 'bsend' : 'BSEND', + 'bsr' : 'BSR', + 'when' : 'WHEN', + 'whenoff' : 'WHENOFF', + 'setdp' : 'SETDP', + 'fastsend' : 'FASTSEND', + } +#lista de tokens +tokens = ['MINUS', 'PERCENT', 'LPAREN', 'RPAREN', 'TIMES', 'DIVIDE', 'BYTES', + 'LBRACKET', 'RBRACKET', 'PLUS', 'LESSTHAN', 'EQUALS', 'GREATERTHAN', 'MOTORATTENTION', + 'NUMBERLITERAL', 'PROCEDURENAME', 'RECEIVER', 'REPORTER'] + reserved.values() + +#teste = ['uma'] + reserved.values() + +#------------------------------------Especificação dos tokens--------------------------------- + +##states = ( +## ('procedure','exclusive'), +## ('global','inclusive'), +##) + +#Expressoes regulares para tokens simples +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_TIMES = r'\*' +t_DIVIDE = r'/' +t_PLUS = r'\+' +t_MINUS = r'-' +t_PERCENT = r'\%' +t_LBRACKET = r'\[' +t_RBRACKET = r'\]' +t_LESSTHAN = r'\<' +t_EQUALS = r'\=' +t_GREATERTHAN = r'\>' + +#Expressoes regulares com código de ação + #Definindo dos identificadores +digit = r'([0-9])' +letter = r'([a-zA-Z_])' +alphanumeric = r'([a-zA-Z0-9_])' + +procname = r'(' + letter + r'(' + alphanumeric + r')*)' +reporter = r':(' + letter + r'(' + alphanumeric + r')*)' +receiver = r'"(' + letter + r'(' + alphanumeric + r')*)' + +bytes = r'0x('+ digit + r')+' +nliteral = r'(' + digit + r')+' +names = r'([a-d])+' +motor = r'(' + names + r'),' + +# DMOC 101217: Following to avoid possible problem due to doc-strings being stripped for optimisation: + +id = r'[a-zA-Z_][a-zA-Z0-9_]*' +comment = r'(\;*(.|\n)*?\*;)|(\;.*)' +newline = r'\n+' + + + +@TOKEN(motor) +def t_MOTORATTENTION(t): + print " # pyLex -> t_MOTORATTENTION '%s'" % t + t.type = reserved.get(t.value,'MOTORATTENTION') + return t + +@TOKEN(procname) +def t_PROCEDURENAME(t): + print " # pyLex -> t_PROCEDURENAME '%s'" % t + try: + t.type = reserved.get(t.value,'PROCEDURENAME') # Checa se é uma palavra reservada + except ValueError: + #print "[Line: %d Column: %d] - Undefined: %s" % (t.lineno, t.linepos, t.value) + #print _("[Line: %d Column: %d] - Undefined: %s" % (t.lineno, t.linepos, t.value)) + print _("Line X: Undefined: %s" % t.value) + t.value = 0 + return t + +@TOKEN(reporter) +def t_REPORTER(t): + print " # pyLex -> t_REPORTER '%s'" % t + t.type = reserved.get(t.value,'REPORTER') # Checa se é uma palavra reservada + return t + +@TOKEN(receiver) +def t_RECEIVER(t): + print " # pyLex -> t_RECEIVER '%s'" % t + t.type = reserved.get(t.value,'RECEIVER') # Checa se é uma palavra reservada + print "t.type= '%s'" % t.type + return t + +@TOKEN(bytes) +def t_BYTES(t): + print " # pyLex -> t_BYTES '%s'" % t + t.type = reserved.get(t.value, 'BYTES') + return t + +@TOKEN(nliteral) +def t_NUMBERLITERAL(t): + print " # pyLex -> t_NUMBERLITERAL '%s'" % t + try: + t.value = int(t.value) + except ValueError: + #print _("Número %s não é válido!") % t.value + print _("%s is not a valid number!") % t.value + t.value = 0 + return t + +@TOKEN(id) +def t_ID(t): + r'[a-zA-Z_][a-zA-Z0-9_]*' + t.type = reserved.get(t.value,'ID') # Checa se é uma palavra reservada + return t + +@TOKEN(comment) +def t_COMMENT(t): + r'(\;*(.|\n)*?\*;)|(\;.*)' # ignora todo um trecho de ;* até * + #r'(/\*(.|\n)*?*/)|(//.*)' + pass + +#ignora comentarios +t_ignore_COMMENTLINE = r'\;.*' # comenta apenas a linha + +#ignora espaços e quebra de linhas +t_ignore = " \t" + +#Guarda o valor da linha +@TOKEN(newline) +def t_newline(t): + r'\n+' + t.lexer.lineno += t.value.count("\n") + + +def t_error(t): + global errMsgFunc + #print _("[Linha: %d Coluna: %d] - Token ilegal '%s'. Não foi encontrado referencia para esse token.") % (t.lineno, t.linepos, t.value[0]) + #m = _("[Line: %d Column: %d] - Illegal Token '%s'. References to this token cannot be found.") % (t.lineno, t.lexpos, t.value[0]) + #m = _("Line %d: Illegal Token '%s'. References to this token cannot be found.") % (t.lineno, t.value[0]) + m = msgErro(t.lineno, t.lexpos, t.value[0]) +# if errMsgFunc: +# errMsgFunc(m) + print m + t.lexer.skip(1) + + +def msgErro(numLine, numPos, value, errMsg=_("Line %(line)d: Illegal Token '%(value)s'. References to this token cannot be found.")): + global errMsgFunc + + #return _("[Linha %d - Coluna %d] - Erro de sintaxe '%s'") % (numLine, numPos, value) + #return _("[Line %d - Column %d] - Syntax Error '%s'") % (numLine, numPos, value) + # DMOC: Logo text is in proportional font so getting column is near impossible! + #TODO: Display chunk of text with error highlighted + m = errMsg % {'line': numLine, 'values': value} + if errMsgFunc: + errMsgFunc(m) + return m + + +def setErrMsgFunc(f): + global errMsgFunc + errMsgFunc = f + +#Executa o analisador léxico +#realiza a otimização do analisador lexico solicitando que crie uma tabela para as +#expressões regulares e as tabelas utilizadas +# Build the lexer +def build(optimize=0, debug=0, **kwargs): + if os.name=='nt': + return lex.lex(optimize=optimize, debug=debug, outputdir="c://", **kwargs) #windows? + else: + return lex.lex(optimize=optimize, debug=debug, outputdir="", **kwargs) +if __name__ == '__main__': + lex.runmain() + + diff --git a/pyLogoCompiler/pyLex.pyc b/pyLogoCompiler/pyLex.pyc Binary files differnew file mode 100644 index 0000000..6311e10 --- /dev/null +++ b/pyLogoCompiler/pyLex.pyc diff --git a/pyLogoCompiler/pyYacc.py b/pyLogoCompiler/pyYacc.py new file mode 100644 index 0000000..791d666 --- /dev/null +++ b/pyLogoCompiler/pyYacc.py @@ -0,0 +1,1086 @@ +# -*- coding: utf-8 -*- +#----------------------------BRGOGO---------------------------------------- +#Project site: http://br-gogo.sourceforge.net +# +# Name: pyLogo +# Origiginal from: Marcelo Barbosa +# About: Esta é uma versão da linguagem Logo em python para a placa Gogo board +# +# Modified by: Felipe Augusto Silva +# email: suportegogo@gmail.com +# compiler version: 0.23 +#----------------------------------------------------------------------------- + +from gettext import gettext as _ + +import ply.yacc as yacc + +#Pega os tokens já checados no analisador léxico +from pyLex import tokens +import pyLex + +CODE_START = [0] +CODE_STOP = [7] +Erros = "" + +#Precedência de operadores na linguagem +precedence = (('left', 'AND', 'OR', 'XOR'), + ('left', 'LESSTHAN', 'GREATERTHAN'), + ('left', 'PLUS', 'MINUS'), + ('left', 'TIMES', 'DIVIDE', 'PERCENT'), + ('right', 'UMINUS'), # Operador unário MINUS + ('right', 'UNOT'), # Operador unário NOT + ) + +globais = { } + +#saida = [] + +src_code = '' +errMsgFunc = None + +variaveis = [] +vlocais = {} +nvlocais = 0 +nomeprocs = { } +size = 0 #tamanho do codigo ja compilado +procname = ''#nome do procedimento sendo compilado + +DEBUG = True +RECOMPILE = False + +def p_procedures(p): + '''procedures : procedure procedures''' + print " # pyYacc -> p_procedures" + p[0] = p[1] + p[2] + +def p_procedures_procedure(p): + 'procedures : procedure ' + print " # pyYacc -> p_procedures_procedure p[0]=p[1]='%s'" % p[1] + p[0] = p[1] + +#TODO: remove tmp refs to RECOMPILE... +def p_procedure(p): + '''procedure : TO PROCEDURENAME statements END''' + global RECOMPILE + try: + l = [] + global size + + print " # pyYacc -> p_procedure" + for i in range(4): + print "p[",i+1,"]", p[i+1] + + procname=p[2] + nomeprocs[procname]=size + if p[1] == 'to' and p[4] == 'end': + l.append(nvlocais) #code_start + l += p[3] + l.append(7) #code_end + p[0] = l + size += len(l) + else: + #raise Exception, _("Nome não determinado '%s'") % p[2] + raise Exception, _("Name not found '%s'") % p[2] + except AttributeError: + #raise Exception, _("Nome não determinado '%s'") % p[2] + raise Exception, _("Name not found '%s'") % p[2] + p[0] = 0 + + RECOMPILE = True + +def p_procedure_parametere(p): + '''procedure : TO PROCEDURENAME parameterDeclaration statements END''' + try: + l = [] + print " # pyYacc -> p_procedure_parametere" + global size + + procname=p[2] + nomeprocs[procname]=size + print 'nomeprocs: ',nomeprocs + if p[1] == 'to' and p[5] == 'end': + global nvlocais + global vlocais + l.append(nvlocais) + #global RECOMPILE + #RECOMPILE=True#para q 'statements' acesse os parametros da funcao + l += p[4] + l.append(7) #code_end + nvlocais=0 + vlocais={} + p[0] = l + size += len(l) + else: + #raise Exception, _("Nome não determinado '%s'") % p[2] + raise Exception, _("Name not found '%s'") % p[2] + except ValueError: + #print _("Nome não determinado '%s'") % p[2] + print _("Name not found '%s'") % p[2] + p[0] = 0 + +def p_parameterDeclaration(p): + '''parameterDeclaration : RECEIVER parameterDeclaration + | RECEIVER''' + print " # pyYacc -> p_parameterDeclaration" + print "p[1] = '%s'" % p[1],len(p) + + try: + global nvlocais + x=-1 + while not p[x]=='to': + x-=1 + + procname=p[x+1] + nvlocais+=1 + print procname + print vlocais + if not procname in vlocais: + vlocais[procname]=[] + vlocais[procname].insert(0,p[1]) + except: + #print _("Nome não determinado '%s'") % p[1] + print _("Name not found '%s'") % p[1] + p[0] = [] + +def p_statements(p): + '''statements : statement statements''' + print " # pyYacc -> p_statements" + p[0] = p[1] + p[2] + +def p_statements_statement(p): + 'statements : statement' + print " # pyYacc -> p_statements_statement" + p[0] = p[1] + +def p_statement_repeat(p): + '''statement : REPEAT expression LBRACKET statements RBRACKET''' + #representação do repeat: <expressao> <list> <lengthlist> <expressao> <eol> <repeat> + print " # pyYacc -> p_statement_repeat" + try: + l =[] + l += p[2] + l.append(3) + if p[3] == '[' and p[5] == ']': + l2 = p[4] + l.append(len(l2)+1)#tamanho da lista + l += l2 + l.append(4) + #if DEBUG: + # print l; + l.append(9) #repeat + p[0] = l + except LookupError: + msgErro(p.lineno, p.lexpos, p[2]) + p[0] = [] + +def p_statement_loop(p): + '''statement : LOOP LBRACKET statements RBRACKET''' + print " # pyYacc -> p_statement_loop" + try: + l =[] + l.append(3) + if p[2] == '[' and p[4] == ']': + l2 = p[3] + l.append(len(l2)+1)#tamanho da lista + l += l2 + l.append(4) + #if DEBUG: + # print l; + l.append(15) #loop + p[0] = l + except SyntaxError: + msgErro(p.lineno, p.lexpos, p[3]) + p[0] = [] + +def p_statement_forever(p): + '''statement : FOREVER LBRACKET statements RBRACKET''' + print " # pyYacc -> p_statement_forever" + try: + l =[] + l.append(3) + if p[2] == '[' and p[4] == ']': + l2 = p[3] + l.append(len(l2)+1)#tamanho da lista + l += l2 + l.append(4) + #if DEBUG: + # print l; + l.append(15) #loop + p[0] = l + except SyntaxError: + msgErro(p.lineno, p.lexpos, p[3]) + p[0] = [] + +def p_statement_if(p): + '''statement : IF expression LBRACKET statements RBRACKET''' + #representação do repeat: <expressao> <list> <lengthlist> <expressao> <eol> <if> + print " # pyYacc -> p_statement_if" + try: + l =[] + l += p[2] + l.append(3) + if p[3] == '[' and p[5] == ']': + l2 = p[4] + print l2 + l.append(len(l2)+1)#tamanho da lista + print len(l2)+1 + l += l2 + l.append(4) + #if DEBUG: + # print l; + l.append(10) #if + p[0] = l + except SyntaxError: + msgErro(p.lineno, p.lexpos, p[2]) + p[0] = [] + +def p_statement_ifelse(p): + '''statement : IFELSE expression LBRACKET statements RBRACKET LBRACKET statements RBRACKET''' + #representação do repeat: <expressao> <list> <lengthlist> <expressao> <eol> (2x) <ifelse> + print " # pyYacc -> p_statement_ifelse" + try: + l =[] + l += p[2] + l.append(3) + if p[3] == '[' and p[5] == ']': + l2 = p[4] + l.append(len(l2)+1)#tamanho da lista + l += l2 + l.append(4) + #if DEBUG: + # print l; + if p[6] == '[' and p[8] == ']': + l.append(3) + l2 = p[7] + l.append(len(l2)+1)#tamanho da lista + l += l2 + l.append(4) + #if DEBUG: + # print l; + l.append(11) #ifelse + p[0] = l + except LookupError: + msgErro(p.lineno, p.lexpos, p[2]) + p[0] = [] + +def p_statement_waituntil(p): + '''statement : WAITUNTIL LBRACKET expression RBRACKET''' + #representação do repeat: <list> <lengthlist> <expressao> <eolr> <waituntil> + print " # pyYacc -> p_statement_waituntil" + try: + l =[] + if p[2] == '[' and p[4] == ']': + l.append(3) + l2 = p[3] + l.append(len(l2)+1)#tamanho da lista + l += l2 + l.append(5) + #if DEBUG: + # print l; + l.append(14) #waituntil + p[0] = l + except LookupError: + msgErro(p.lineno, p.lexpos, p[3]) + p[0] = [] + +def p_statement_when(p): + '''statement : WHEN expression LBRACKET expression RBRACKET''' + #representação do repeat: <list> <lengthlist> <expressao> <eolr> <waituntil> + print " # pyYacc -> p_statement_when" + try: + l =[] + l += p[2] + if p[3] == '[' and p[5] == ']': + l.append(3) + l2 = p[3] + l.append(len(l2)+1)#tamanho da lista + l += l2 + l.append(4) #whenoff + #if DEBUG: + # print l; + l.append(44) + p[0] = l + except LookupError: + msgErro(p.lineno, p.lexpos, p[3]) + p[0] = [] + +#def p_statement_show(p): + #'''statement : SHOW REPORTER''' + #print " # pyYacc -> p_statement_SHOW p= '%s'" %p[2] + + #l=[] + #pos=0 + + #for i in variaveis: + #if i == p[2]: + #break + #pos+=1 + + #pos-=1 + + #if pos == -1: #nenhuma variavel encontrada + ##raise TypeError, "Nome não determinado '%s'" % p[2] + #raise Exception, _("Name not found '%s'") % p[2] + #p[0]=[] + + #else: + #l.append(91)#CL_I2C_START + + #l.append(1) + #l.append(176) + #l.append(93)#CL_I2C_WRITE + #l.append(1) + #l.append(2) + #l.append(93)#CL_I2C_WRITE + + #l.append(1) + #l.append(pos) + #l.append(36)#GETGLOBAL + #l.append(72)#HIGH_BYTE + #l.append(93)#CL_I2C_WRITE + + #l.append(1) + #l.append(pos) + #l.append(36)#GETGLOBAL + #l.append(71)#LOW_BYTE + #l.append(93)#CL_I2C_WRITE + + #l.append(92)#L_I2C_STOP + #p[0]=l + +def p_statement_show_expression(p): + '''statement : SHOW expression''' + print " # pyYacc -> p_statement_show_expression p= '%s'" %p[2] + l=[] + + l.append(91) + l.append(1) + l.append(176) + l.append(93)#CL_I2C_WRITE + l.append(1) + l.append(2) + l.append(93)#CL_I2C_WRITE + + print p[2] + if len(p[2])==2: #show numbr in display + if p[2][0]==1: + l.append(1) + l.append(p[2][1]) + l.append(72)#HIGH_BYTE + l.append(93)#CL_I2C_WRITE + l.append(1) + l.append(p[2][1]) + l.append(71)#LOW_BYTE + l.append(93)#CL_I2C_WRITE + if p[2][0]==2: + l.append(2) + l.append(p[2][1])#hi byte + l.append(72)#HIGH_BYTE + l.append(93)#CL_I2C_WRITE + l.append(2) + l.append(p[2][2])#low byte + l.append(71)#LOW_BYTE + l.append(93)#CL_I2C_WRITE + else: #show the result of the expression + l+=p[2] + l.append(72) + l.append(93) + l+=p[2] + l.append(71) + l.append(93) + l.append(92) + p[0]=l + + +def p_statement_show_disp(p): + '''statement : SHOW RECEIVER''' + print " # pyYacc -> p_statement_SHOW-disp p= '%s'" %p[2] + #print _('tamanho'), len(p[2]) + print _('size'), len(p[2]) + l=[] + + l.append(91)#CL_I2C_START + + l.append(1) + l.append(176) + l.append(93)#CL_I2C_WRITE + + l.append(1) + l.append(3) + l.append(93)#CL_I2C_WRITE + + l.append(1) + l.append(ord(p[2][1])) + l.append(93)#CL_I2C_WRITE + + l.append(1) + if (len(p[2]) > 2): + l.append(ord(p[2][2])) + else: + l.append(32) + l.append(93)#CL_I2C_WRITE + + l.append(1) + if (len(p[2]) > 3): + l.append(ord(p[2][3])) + else: + l.append(32) + l.append(93)#CL_I2C_WRITE + + l.append(1) + if (len(p[2]) > 4): + l.append(ord(p[2][4])) + else: + l.append(32) + l.append(93)#CL_I2C_WRITE + + l.append(92)#CL_I2C_STOP + + p[0]=l + +def p_statement_make(p): + '''statement : MAKE RECEIVER expression''' + print " # pyYacc -> p_statement_MAKE" + + if variaveis.count(p[2])==0: + pos=len(variaveis) + variaveis.append(p[2]) + else: + pos=0 + for i in variaveis: + if i == p[2]: + break + pos+=1 + + l=[] + l.append(1)#NUM8 stkPush(fetchNextOpcode()); + l.append(pos); + + l+=p[3]#add the expression to the list + l.append(35)#SETGLOBAL + p[0]=l + + +def p_statement_expression(p): + '''statement : WAIT expression + | SEND expression + | RECORD expression + | ERASE expression + | SETSVH expression + | SVR expression + | SVL expression + | SETPOWER expression + | I2C_WRITE expression + | OUTPUT expression + | ONFOR expression + | BSEND expression + | BSR expression + | SETDP expression + | FASTSEND expression''' + print " # pyYacc -> p_statement_expression" + try: + p.lineno(1) + p.lineno(2) + l =[] + l += p[2] + if p[1] == 'wait': + op = 16 + elif p[1] == 'send': + op = 19 + elif p[1] == 'record': + op = 39 + elif p[1] == 'erase': + op = 43 + elif p[1] == 'setsvh': + op = 87 + elif p[1] == 'svr': + op = 88 + elif p[1] == 'svl': + op = 89 + elif p[1] == 'setpower': + op = 59 + elif p[1] == 'i2c_write': + op = 93 + elif p[1] == 'output': + op = 8 + elif p[1] == 'onfor': + op = 50 + elif p[1] == 'bsend': + op = 61 + elif p[1] == 'bsr': + op = 62 + elif p[1] == 'setdp': + op = 42 + elif p[1] == 'fastsend': + op = 67 + else: + op = 0 + + l.append(op) + p[0] = l + except ValueError: + msgErro(p.lineno, p.lexpos, p[2]) + p[0] = [] + +def p_statement_value(p): + '''statement : BEEP + | STOP + | RESET + | RESETDP + | ON + | OFF + | THISWAY + | THATWAY + | RD + | BRAKE + | LEDON + | LEDOFF + | I2C_START + | I2C_STOP + | WHENOFF + | procedurecall + ''' + print " # pyYacc -> p_statement_value" + if p[1] == 'beep': + op = [12] + elif p[1] == 'stop': + op = [7] + elif p[1] == 'reset': + op = [18] + elif p[1] == 'resetdp': + op = [41] + elif p[1] == 'on': + op = [49] + elif p[1] == 'off': + op = [51] + elif p[1] == 'thisway': + op = [52] + elif p[1] == 'thatway': + op = [53] + elif p[1] == 'rd': + op = [54] + elif p[1] == 'brake': + op = [60] + elif p[1] == 'ledon': + op = [85] + elif p[1] == 'ledoff': + op = [86] + elif p[1] == 'i2c_start': + op = [91] + elif p[1] == 'i2c_stop': + op = [92] + elif p[1] == 'whenoff': + op = [45] + else: + op = p[1] + p[0] = op + +#def p_statement_motorAttention(p): + #'''statement : MOTORATTENTION''' + #it works too!!!! + #print " # pyYacc -> p_statement_motorAttention" + #if p[1] == 'a,': + #op = [46] + #elif p[1] == 'b,': + #op = [47] + #elif p[1] == 'ab,': + #op = [48] + #elif p[1] == 'c,': + #op = [63] + #elif p[1] == 'd,': + #op = [64] + #elif p[1] == 'cd,': + #op = [65] + #elif p[1] == 'abcd,': + #op = [66] + #p[0] = op + +def p_statement_motorAttention(p): + '''statement : MOTORATTENTION''' + print " # pyYacc -> p_statement_motorAttention" + l=[] + l.append(1) + if p[1] == 'a,': + l.append(1) + elif p[1] == 'b,': + l.append(2) + elif p[1] == 'ab,': + l.append(3) + elif p[1] == 'c,': + l.append(4) + elif p[1] == 'ac,': + l.append(5) + elif p[1] == 'bc,': + l.append(6) + elif p[1] == 'abc,': + l.append(7) + elif p[1] == 'd,': + l.append(8) + elif p[1] == 'ad,': + l.append(9) + elif p[1] == 'bd,': + l.append(10) + elif p[1] == 'abd,': + l.append(11) + elif p[1] == 'cd,': + l.append(12) + elif p[1] == 'acd,': + l.append(13) + elif p[1] == 'bcd,': + l.append(14) + elif p[1] == 'abcd,': + l.append(15) + else: + raise TypeError, "Motor '%s' desconhecido\n Ex.:\n a, on\nab, onfor 3\nbc, setpower 2" % p[1] + + l.append(90) + p[0] = l + +def p_statement_bytes(p): + '''statement : BYTES''' + print " # pyYacc -> p_statement_bytes" + if (p[1] == '0x83') or \ + (p[1] == '0x84') or \ + (p[1] == '0x85') or \ + (p[1] == '0x86') or \ + (p[1] == '0x87') : + op = [1, int((p[1]),16)] + else: + op = [] + p[0] = op + +def p_expression_reporter(p): + '''expression : REPORTER''' + print " # pyYacc -> p_expression_reporter" + + l=[] + pos=0 + x=-1 + while not p[x]=='to': + x-=1 + + procname=p[x+1] + print 'procname: ',procname + print 'vlocais: ',vlocais + if (procname in vlocais) and (not vlocais[procname].count('"'+p[1][1:])==0): + for i in vlocais[procname]: + if i == '"'+p[1][1:]: + break + pos+=1 + + l.append(6)#LTHING(retrieve procedure input) + l.append(pos) + else: + if len(variaveis) == 0: + #raise TypeError, "Nome não determinado '%s'" % p[1] + raise TypeError, _("Name not found '%s'") % p[1] + + encontrou=0 + for i in variaveis: + if i[1:] == p[1][1:]: + encontrou=1 + break + pos+=1 + + if encontrou == 0: #nenhuma variavel encontrada + #raise TypeError, "Nome não determinado '%s'" % p[1] + raise TypeError, _("Name not found '%s'") % p[1] + p[0]=[] + + else: + l.append(1) + l.append(pos) + l.append(36)#GETGLOBAL + + p[0]=l + +def p_expression(p): + '''expression : expression AND expression + | expression OR expression + | expression XOR expression + | expression LESSTHAN expression + | expression GREATERTHAN expression + | expression EQUALS expression + | expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression + | expression PERCENT expression''' + print " # pyYacc -> p_expression" + try: + lexp =[] + op = 0 + if (len(p) == 4): #Verifico o tamanho do termo se for 4 então trabalho com os operadores + if p[2] == '+': + op = 23 + elif p[2] == '-': + op = 24 + elif p[2] == '*': + op = 25 + elif p[2] == '/': + op = 26 + elif p[2] == '%': + op = 27 + elif p[2] == '=': + op = 28 + elif p[2] == '>': + op = 29 + elif p[2] == '<': + op = 30 + elif p[2] == 'and': + op = 31 + elif p[2] == 'or': + op = 32 + elif p[2] == 'xor': + op = 33 + elif (len(p) == 3): + if p[1] == '-': + op = 24 + elif p[1] == 'not': + op = 34 + + lexp = [] + lexp += p[1] + p[3] + lexp.append(op) + p[0] = lexp + except TypeError: + print 'Tipos incompativeis! ' + p[0] = [] + +def p_expression_uminus(p): + '''expression : MINUS expression %prec UMINUS''' + print " # pyYacc -> p_expression_uminus" + print p[2] + lexp = [] + lexp += p[2] + lexp.append(1) + lexp.append(0) + lexp.append(24) + p[0] = lexp + +def p_expression_unot(p): + 'expression : NOT expression %prec UNOT' + print " # pyYacc -> p_expression_unot" + lexp = [] + lexp = p[2] + lexp.append(34) + p[0] = lexp + +def p_expression_group(p): + '''expression : LPAREN expression RPAREN''' + print " # pyYacc -> p_expression_group" + try: + lexp = [] + if p[1] == '(' and p[3] == ')': + lexp = p[2] + p[0] = lexp + except LookupError: + msgErro(p.lineno, p.lexpos, p[2]) + p[0] = [] + +def p_expression_group_bracket(p): + '''expression : LBRACKET expression RBRACKET''' + print " # pyYacc -> p_expression_group_bracket" + try: + lexp = [] + if p[1] == '[' and p[3] == ']': + lexp = p[2] + p[0] = lexp + except LookupError: + msgErro(p.lineno, p.lexpos, p[2]) + p[0] = [] + +def p_expression_expression(p): + '''expression : I2C_READ expression + | HIGHBYTE expression + | LOWBYTE expression''' + print " # pyYacc -> p_expression_group" + try: + l =[] + l += p[2] + if p[1] == 'i2c_read': + op = 94 + elif p[1] == 'highbyte': + op = 72 + elif p[1] == 'lowbyte': + op = 71 + else: + op = 0 + + l.append(op) + p[0] = l + except LookupError: + msgErro(p.lineno, p.lexpos, p[2]) + p[0] = [] + + +def p_expression_value(p): + '''expression : NUMBERLITERAL + | TIMER + | RANDOM + | RECALL + | SENSOR1 + | SENSOR2 + | SENSOR3 + | SENSOR4 + | SENSOR5 + | SENSOR6 + | SENSOR7 + | SENSOR8 + | SWITCH1 + | SWITCH2 + | SWITCH3 + | SWITCH4 + | SWITCH5 + | SWITCH6 + | SWITCH7 + | SWITCH8 + | NEWIRQ + | SERIAL''' + print " # pyYacc -> p_expression_value" + try: + p.lineno(1) + l =[] + if isinstance(p[1],int): + print 'numero literal' + if p[1] < 256: + l.append(1) #é um byte + l.append(p[1]) + else: + l.append(2) #é um number + l.append(highByte(p[1])) + l.append(lowByte(p[1])) + elif p[1] == 'timer': + l.append(17) + elif p[1] == 'random': + l.append(22) + elif p[1] == 'recall': + l.append(40) + elif p[1] == 'sensor1': + l.append(55) + elif p[1] == 'sensor2': + l.append(56) + elif p[1] == 'sensor3': + l.append(73) + elif p[1] == 'sensor4': + l.append(74) + elif p[1] == 'sensor5': + l.append(75) + elif p[1] == 'sensor6': + l.append(76) + elif p[1] == 'sensor7': + l.append(77) + elif p[1] == 'sensor8': + l.append(78) + elif p[1] == 'switch1': + l.append(57) + elif p[1] == 'switch2': + l.append(58) + elif p[1] == 'switch3': + l.append(79) + elif p[1] == 'switch4': + l.append(80) + elif p[1] == 'switch5': + l.append(81) + elif p[1] == 'switch6': + l.append(82) + elif p[1] == 'switch7': + l.append(83) + elif p[1] == 'switch8': + l.append(84) + elif p[1] == 'newir?': + l.append(21) + elif p[1] == 'serial': + l.append(82) + elif p[1] in globais.keys(): + l += globais.get(p[1]) + else: + #raise SyntaxError, _('Valor não identificado: %s') % p[1] + raise SyntaxError, _('Not identified: %s') % p[1] + l = p[1] + p[0] = l + except ValueError: + msgErro(p.lineno, p.lexpos, p[2]) + p[0] = [] + +def p_parm_value(p): + '''parm : parm expression + | expression''' + print " # pyYacc -> p_parm_value %s" % p[1:] + l=[] + size=len(p[1:])#get the size of the list 'parm+expression' + for i in range(1,size+1): + l+=p[i]#add the last element of the list in parm or the element given + p[0] = l + +def p_procedurecall_parm(p): + '''procedurecall : PROCEDURENAME parm''' + print " # pyYacc -> p_procedurecall_parm" + global RECOMPILE + global errMsgFunc + + l=[] + l.append(128) #SET_PTR_HI_BYTE + pos=nomeprocs.get(p[1]) + print 'jump pos: ','(',pos,')' + print nomeprocs + if not p[1] in nomeprocs: + print '**********NEED TO RECOMPILE************' + if errMsgFunc: + errMsgFunc(_("Line %(line)d: Unresolved symbol '%(msg)s'") % {'line': p.lineno(1), 'msg': p[1]}) + RECOMPILE=True + l.append(nomeprocs.get(p[1])) + p[0]=p[2]+l + +def p_procedurecall(p): + '''procedurecall : PROCEDURENAME''' + print " # pyYacc -> p_procedurecall ('%s')" % p[1] + global RECOMPILE + global errMsgFunc + + l=[] + l.append(128) #SET_PTR + pos=nomeprocs.get(p[1]) + print 'jump pos: ','(',pos,')' + if not p[1] in nomeprocs: + print '**********NEED TO RECOMPILE************' + if errMsgFunc: + errMsgFunc(_("Line %(line)d: Unresolved symbol '%(msg)s'") % {'line': p.lineno(1), 'msg': p[1]}) + RECOMPILE=True + l.append(nomeprocs.get(p[1])) + p[0]=l + +# Compute column. +# input is the input text string +# token is a token instance +def find_column(token): + global src_code + last_cr = src_code.rfind('\n',0,token.lexpos) + if last_cr < 0: + last_cr = 0 + column = (token.lexpos - last_cr) + 1 + return column + +def p_error(p): + global errMsgFunc + + print "# pyYacc -> p_error ('%s')" % p + if not (p == None): + m = msgErro(p.lineno, p.lexpos, p.value) + # Column calc near impossible for proportional text (esp with tabs) + #raise SyntaxError, msgErro(p.lineno, find_column(p), p.value) + #self.showError(msgErro(p.lineno, p.lexpos, p.value)) + #raise SyntaxError, m + return + #raise SyntaxError, _("Erro na última linha") + m = _("Error in last line") + if errMsgFunc: + errMsgFunc(m) + raise SyntaxError, m + +def highByte(number): + return ((number >> 8) & 0xff) + +def lowByte(number): + return (number & 0xff) + +def msgErro(numLine, numPos, value, errMsg=_("Line %(line)d: Syntax Error '%(msg)s'")): + global errMsgFunc + + #return _("[Linha %d - Coluna %d] - Erro de sintaxe '%s'") % (numLine, numPos, value) + #return _("[Line %d - Column %d] - Syntax Error '%s'") % (numLine, numPos, value) + # DMOC: Logo text is in proportional font so getting column is near impossible! + #TODO: Display chunk of text with error highlighted + m = errMsg % {'line': numLine, 'msg': value} + if errMsgFunc: + errMsgFunc(m) + return m + +def codigoIntermediario(list): + #print _("Codigo intermediario: %s") % list + print _("Intermediate Code: %s") % list + +def codigoFinal(list): + s="" + for i in list: + s = s + chr(i) + + #print _("Código Final: %s") % s + print _("Final Code: %s") % s + return s + +#def analisarCodigo(codigo): + #print " # pyYacc -> analisarCodigo" + #try: + ##limpa lista de variaveis + #del variaveis[:] + #global RECOMPILE + #global size + #global nomeprocs + #size=0 + #nomeprocs={} + ##Para usar os analisadores juntos é necessário passar o léxico para dentro do parser. + #Lexer = pyLex.build() + #print " pyLex.build() terminou" + #parser = yacc.yacc() + #print " yacc.yacc() terminou" + #result = parser.parse(codigo.lower(), lexer=Lexer, tracking=True) + #if RECOMPILE: + #print 'RECOMPILE' + #size=0 + #result = parser.parse(codigo.lower(), lexer=Lexer, tracking=True) + #RECOMPILE=False + #print " parse.parse() gerou resultado '%s'" % result + #except KeyError, value: + #return "Erro ao pressionar a tecla: %s" % value + #except (AttributeError, TypeError, SyntaxError), value: + #msg = "Os seguintes erros foram detectados: %s" % value + "\n" + #return msg + #else: + #print 'var locais: ',vlocais,nvlocais + #return result + + +def analisarCodigo(codigo, errMsgFunction=None): + print " # pyYacc -> analisarCodigo" + #limpa lista de variaveis + del variaveis[:] + + global RECOMPILE + global size + global nomeprocs + + global src_code + global errMsgFunc + + errMsgFunc = errMsgFunction + + src_code = codigo + print src_code + + size=0 + nomeprocs={} + #Para usar os analisadores juntos é necessário passar o léxico para dentro do parser. + Lexer = pyLex.build(optimize=1, debug=0) + print " pyLex.build() ended" + parser = yacc.yacc(optimize=1, debug=0) + print " yacc.yacc() ended" + + pyLex.setErrMsgFunc(errMsgFunc) + + if errMsgFunc: + errMsgFunc('***BEGIN***') + result = parser.parse(src_code.lower(), lexer=Lexer, debug=0, tracking=True) + if RECOMPILE: + print 'RECOMPILE' + size=0 + if errMsgFunc: + errMsgFunc('***BEGIN***') + result = parser.parse(src_code.lower(), lexer=Lexer, debug=0, tracking=True) + #RECOMPILE=False + if errMsgFunc: + errMsgFunc('***END***') + + print " parse.parse() generated result '%s'" % result + return result diff --git a/pyLogoCompiler/pyYacc.pyc b/pyLogoCompiler/pyYacc.pyc Binary files differnew file mode 100644 index 0000000..2ff7e22 --- /dev/null +++ b/pyLogoCompiler/pyYacc.pyc |