# fx-92 Scientifique Collège+ language interpreter: Syntactic analysis import re from decimal import Decimal from fx92.lexer import T, Token, BitcodeLexer from fx92.ast import N, Node #--- # LL(1) parser #--- class Parser: """ fx-92 SC+ language parser. This is your everyday LL(1) top-down parser. It implements the following formal grammar: program -> stmt* stmt -> stmt2 EOL? stmt2 -> FORWARD arg | ROTATE arg | ORIENT arg | GOTO arg arg | PENDOWN | PENUP | SETVAR arg argvar | INPUT argvar | MESSAGE arg | PRINT arg | STYLE style | WAIT arg | REPEAT arg program REPEAT_END | WHILE cond PARAM program WHILE_END | IF cond PARAM program IF_END | IFELSE cond PARAM program ELSE program IFELSE_END arg -> expr PARAM argvar -> var PARAM cond -> expr REL expr expr -> factor | factor + expr | factor - expr factor -> chunk | chunk * factor | chunk / factor chunk -> power+ power -> atom | atom "^(" expr ")" atom -> const | var | "(" expr ")" | FUN( funargs ")" factor -> atom | atom * factor | atom / factor atom -> const (var | "(" expr ")" | FUN funargs ")")* | (var | "(" expr ")" | FUN funargs ")")+ funargs -> expr | expr ; funargs const -> (+|-)? CONST var -> VAR # Unused or TODO style -> (TODO) setvar -> SETM | SETA | SETB | SETC | SETD | SETE | SETF | SETX | SETY """ def __init__(self, lexer): """ Create a UrlParser from a chosen lexer. """ # Create the lexer and initialize the lookahead byte self.lexer = lexer self.la = None def parse_program(self): self.lexer.rewind() self.la = None self.advance() return self.program() #--- # LL parsing helpers #--- def advance(self): """Return the next token and feed the lookahead.""" next = self.la self.la = self.lexer.lex() return next def expect(self, *types, optional=False): """Read the next token, expecting a type specified in *types.""" if self.la.type in types: return self.advance() if optional: return None expected = [T(t).name for t in types] got = T(self.la.type).name pos = self.lexer.position err = "Expected one of {}, got {} (at token {})".format(expected, got, pos) print("[urlparser] " + err) raise Exception("Syntax error: " + err) #--- # LL parsing rules #--- # program -> stmt* def program(self): stmts = [] while 1: stmt = self.stmt(optional=True) if stmt is None: break stmts.append(stmt) return Node(N.PROGRAM, *stmts) # stmt -> stmt2 EOL? def stmt(self, optional=False): st = self.stmt2(optional=optional) self.expect(T.EOL, optional=True) return st # stmt -> (lots of cases) def stmt2(self, optional=False): valid = [ T.FORWARD, T.ROTATE, T.ORIENT, T.GOTO, T.PENDOWN, T.PENUP, T.SETVAR, T.INPUT, T.MESSAGE, T.PRINT, T.STYLE, T.WAIT, T.REPEAT, T.WHILE, T.IF, T.IFELSE, ] op = self.expect(*valid, optional=optional) if op is None: return None # Basic statements if op.type == T.FORWARD: return Node(N.FORWARD, self.arg()) if op.type == T.ROTATE: return Node(N.ROTATE, self.arg()) if op.type == T.ORIENT: return Node(N.ORIENT, self.arg()) if op.type == T.GOTO: return Node(N.GOTO, self.arg(), self.arg()) if op.type == T.PENDOWN: return Node(N.PENDOWN) if op.type == T.PENUP: return Node(N.PENUP) if op.type == T.SETVAR: return Node(N.ASSIGN, self.arg(), self.argvar()) if op.type == T.INPUT: return Node(N.INPUT, self.argvar()) if op.type == T.MESSAGE: return Node(N.MESSAGE, self.arg()) if op.type == T.PRINT: return Node(N.PRINT, self.arg()) if op.type == T.STYLE: return Node(N.STYLE, self.style()) if op.type == T.WAIT: return Node(N.WAIT) # Flow control if op.type == T.REPEAT: arg = self.arg() self.expect(T.EOL, optional=True) prg = self.program() self.expect(T.REPEAT_END) self.expect(T.EOL, optional=True) return Node(N.REPEAT, arg, prg) if op.type == T.WHILE: cond = self.cond() self.expect(T.PARAM) self.expect(T.EOL, optional=True) prg = self.program() self.expect(T.WHILE_END) self.expect(T.EOL, optional=True) return Node(N.WHILE, cond, prg) if op.type == T.IF: cond = self.cond() self.expect(T.PARAM) self.expect(T.EOL, optional=True) prg = self.program() self.expect(T.IF_END) self.expect(T.EOL, optional=True) return Node(N.IF, cond, prg, None) if op.type == T.IFELSE: cond = self.cond() self.expect(T.PARAM) self.expect(T.EOL, optional=True) p1 = self.program() self.expect(T.ELSE) self.expect(T.EOL, optional=True) p2 = self.program() self.expect(T.IFELSE_END) self.expect(T.EOL, optional=True) return Node(N.IF, cond, p1, p2) # arg -> expr PARAM def arg(self): e = self.expr() self.expect(T.PARAM) return e # cond -> expr REL expr def cond(self): e1 = self.expr() rel = self.expect(T.REL).args[0] e2 = self.expr() return Node(N.REL, e1, rel, e2) # expr -> factor | factor + expr | factor - expr def expr(self): f = [self.factor()] while 1: t = self.expect(T.PLUS, T.MINUS, optional=True) if t is None: return Node(N.ADD, *f) if t.type == T.PLUS: f.append(self.factor()) if t.type == T.MINUS: f.append(Node(N.MINUS, self.factor())) # factor -> chunk | chunk * factor | chunk / factor def factor(self): chunk = self.chunk() t = self.expect(T.STAR, T.SLASH, optional=True) if t is None: return chunk if t.type == T.STAR: return Node(N.MUL, chunk, self.factor()) if t.type == T.SLASH: return Node(N.DIV, chunk, self.factor()) # chunk -> power+ def chunk(self): factors = [] # Initial sign if self.la.type == T.PLUS: self.expect(T.PLUS) elif self.la.type == T.MINUS: self.expect(T.MINUS) factors.append(Node(N.CONST, Decimal(-1))) while 1: optional = len(factors) > 0 f = self.power(optional=optional) if f is None: break factors.append(f) return Node(N.MUL, *factors) # power -> atom | atom "^(" expr ")" def power(self, optional=False): a = self.atom(optional=optional) if a is None: return None if self.expect(T.EXP, optional=True) is not None: e = self.expr() # Allow a parenthesis to be removed at the end of a parameter optional = (self.la.type in [T.PARAM, T.SEMI]) self.expect(T.RPAR, optional=optional) return Node(N.EXP, a, e) return a # atom -> const | var | "(" expr ")" | FUN( funargs ")" def atom(self, optional=False): lat = self.la.type if lat == T.CONST: return self.const() if lat == T.VAR: return self.var() if lat == T.LPAR: self.expect(T.LPAR) e = self.expr() # Allow a parenthesis to be removed at the end of a parameter optional = (self.la.type in [T.PARAM, T.SEMI]) self.expect(T.RPAR, optional=optional) return e if lat == T.FUN: name = self.expect(T.FUN).args[0] a = self.funargs() # Allow a parenthesis to be removed at the end of a parameter optional = (self.la.type in [T.PARAM, T.SEMI]) self.expect(T.RPAR, optional=optional) return Node(N.FUN, name, a) if not optional: raise Exception("Empty atom") return None # funargs -> expr | expr; funargs def funargs(self): a = [self.expr()] while self.expect(T.SEMI, optional=True) is not None: a.append(self.expr()) return a # const -> (+|-)? CONST def const(self): t = self.expect(T.PLUS, T.MINUS, optional=True) const = self.expect(T.CONST).args[0] const = Node(N.CONST, const) if t and t.type == T.MINUS: const = Node(N.MINUS, const) return const # argvar -> var PARAM def argvar(self): n = self.var() self.expect(T.PARAM) return n.args[0] # var -> VAR def var(self): t = self.expect(T.VAR) return Node(N.VAR, t.args[0]) # setvar -> SETM | SETA | ... | SETF | SETX | SETY def setvar(self): raise Exception("SetVar not supported yet x_x") # style -> (TODO) def style(self): raise Exception("Style not supported yet x_x")