fx92-interpreter/fx92/parser.py

287 lines
8.1 KiB
Python

# fx-92 Scientifique Collège+ language interpreter: Syntactic analysis
import re
from fx92.lexer import T, Token, BitcodeLexer
from fx92.ast import N, Node
#---
# LL(1) parser
#---
class Parser:
"""
fx-92 SC+ language parser. This is your everyday LL(1) top-down parser. It
implements the following formal grammar:
program -> stmt*
stmt -> stmt2 EOL?
stmt2 -> FORWARD arg | ROTATE arg | ORIENT arg | GOTO arg arg |
PENDOWN | PENUP | SETVAR arg argvar | INPUT argvar |
MESSAGE arg | PRINT arg | STYLE style | WAIT arg |
REPEAT arg program REPEAT_END |
WHILE cond PARAM program WHILE_END |
IF cond PARAM program IF_END |
IFELSE cond PARAM program ELSE program IFELSE_END
arg -> expr PARAM
argvar -> var PARAM
cond -> expr REL expr
expr -> factor | factor + expr | factor - expr
factor -> atom | atom * factor | atom / factor
atom -> const (var | "(" expr ")" | FUN expr ")")* |
(var | "(" expr ")" | FUN expr ")")+
const -> (+|-)? CONST
var -> VAR
# Unused or TODO
style -> (TODO)
setvar -> SETM | SETA | SETB | SETC | SETD | SETE | SETF | SETX | SETY
"""
def __init__(self, lexer):
"""
Create a UrlParser from a chosen lexer.
"""
# Create the lexer and initialize the lookahead byte
self.lexer = lexer
self.la = None
def parse_program(self):
self.lexer.rewind()
self.la = None
self.advance()
return self.program()
#---
# LL parsing helpers
#---
def advance(self):
"""Return the next token and feed the lookahead."""
next = self.la
self.la = self.lexer.lex()
return next
def expect(self, *types, optional=False):
"""Read the next token, expecting a type specified in *types."""
if self.la.type in types:
return self.advance()
if optional:
return None
expected = [T(t).name for t in types]
got = T(self.la.type).name
pos = self.lexer.position
err = "Expected one of {}, got {} (at token {})".format(expected, got, pos)
print("[urlparser] " + err)
raise Exception("Syntax error: " + err)
#---
# LL parsing rules
#---
# program -> stmt*
def program(self):
stmts = []
while 1:
stmt = self.stmt(optional=True)
if stmt is None:
break
stmts.append(stmt)
return Node(N.PROGRAM, *stmts)
# stmt -> stmt2 EOL?
def stmt(self, optional=False):
st = self.stmt2(optional=optional)
self.expect(T.EOL, optional=True)
return st
# stmt -> (lots of cases)
def stmt2(self, optional=False):
valid = [
T.FORWARD, T.ROTATE, T.ORIENT, T.GOTO, T.PENDOWN, T.PENUP,
T.SETVAR, T.INPUT, T.MESSAGE, T.PRINT, T.STYLE, T.WAIT,
T.REPEAT, T.WHILE, T.IF, T.IFELSE,
]
op = self.expect(*valid, optional=optional)
if op is None:
return None
# Basic statements
if op.type == T.FORWARD:
return Node(N.FORWARD, self.arg())
if op.type == T.ROTATE:
return Node(N.ROTATE, self.arg())
if op.type == T.ORIENT:
return Node(N.ORIENT, self.arg())
if op.type == T.GOTO:
return Node(N.GOTO, self.arg(), self.arg())
if op.type == T.PENDOWN:
return Node(N.PENDOWN)
if op.type == T.PENUP:
return Node(N.PENUP)
if op.type == T.SETVAR:
return Node(N.ASSIGN, self.arg(), self.argvar())
if op.type == T.INPUT:
return Node(N.INPUT, self.argvar())
if op.type == T.MESSAGE:
return Node(N.MESSAGE, self.arg())
if op.type == T.PRINT:
return Node(N.PRINT, self.arg())
if op.type == T.STYLE:
return Node(N.STYLE, self.style())
if op.type == T.WAIT:
return Node(N.WAIT)
# Flow control
if op.type == T.REPEAT:
arg = self.arg()
self.expect(T.EOL, optional=True)
prg = self.program()
self.expect(T.REPEAT_END)
return Node(N.REPEAT, arg, prg)
if op.type == T.WHILE:
cond = self.cond()
self.expect(T.PARAM)
self.expect(T.EOL, optional=True)
prg = self.program()
self.expect(T.WHILE_END)
return Node(N.WHILE, cond, prg)
if op.type == T.IF:
cond = self.cond()
self.expect(T.PARAM)
self.expect(T.EOL, optional=True)
prg = self.program()
self.expect(T.IF_END)
return Node(N.IF, cond, prg, None)
if op.type == T.IFELSE:
cond = self.cond()
self.expect(T.PARAM)
self.expect(T.EOL, optional=True)
p1 = self.program()
self.expect(T.ELSE)
p2 = self.program()
self.expect(T.IFELSE_END)
return Node(N.IF, cond, p1, p2)
# arg -> expr PARAM
def arg(self):
e = self.expr()
self.expect(T.PARAM)
return e
# cond -> expr REL expr
def cond(self):
e1 = self.expr()
rel = self.expect(T.REL).args[0]
e2 = self.expr()
return Node(N.REL, e1, rel, e2)
# expr -> factor | factor + expr | factor - expr
def expr(self):
f = [self.factor()]
while 1:
t = self.expect(T.PLUS, T.MINUS, optional=True)
if t is None:
return Node(N.ADD, *f)
if t.type == T.PLUS:
f.append(self.factor())
if t.type == T.MINUS:
f.append(Node(N.MINUS, self.factor()))
# factor -> atom | atom * factor | atom / factor
def factor(self):
atom = self.atom()
t = self.expect(T.STAR, T.SLASH, optional=True)
if t is None:
return atom
if t.type == T.STAR:
return Node(N.MUL, atom, self.factor())
if t.type == T.SLASH:
return Node(N.DIV, atom, self.factor())
# atom -> const (VAR | "(" expr ")")* | (VAR | "(" expr ")")+
def atom(self):
factors = []
# Initial sign
if self.la.type == T.PLUS:
self.expect(T.PLUS)
elif self.la.type == T.MINUS:
self.expect(T.MINUS)
factors.append(Node(N.CONST, -1))
# Optional constant
if self.la.type == T.CONST:
factors.append(self.const())
while 1:
lat = self.la.type
if lat == T.VAR:
factors.append(self.var())
elif lat == T.LPAR:
self.expect(T.LPAR)
factors.append(self.expr())
# Allow a parenthesis to be removed at the end of a parameter
optional = (self.la.type == T.PARAM)
self.expect(T.RPAR, optional=optional)
elif lat == T.FUN:
name = self.expect(T.FUN).args[0]
e = self.expr()
# Allow a parenthesis to be removed at the end of a parameter
optional = (self.la.type == T.PARAM)
self.expect(T.RPAR, optional=optional)
factors.append(Node(N.FUN, name, e))
else:
break
return Node(N.MUL, *factors)
# const -> (+|-)? CONST
def const(self):
t = self.expect(T.PLUS, T.MINUS, optional=True)
const = self.expect(T.CONST).args[0]
const = Node(N.CONST, const)
if t and t.type == T.MINUS:
const = Node(N.MINUS, const)
return const
# argvar -> var PARAM
def argvar(self):
n = self.var()
self.expect(T.PARAM)
return n.args[0]
# var -> VAR
def var(self):
t = self.expect(T.VAR)
return Node(N.VAR, t.args[0])
# setvar -> SETM | SETA | ... | SETF | SETX | SETY
def setvar(self):
raise Exception("SetVar not supported yet x_x")
# style -> (TODO)
def style(self):
raise Exception("Style not supported yet x_x")