fx92-interpreter/parser.py
Lephe 6151144d0a
initial commit: most of the lexing and parsing work
The program is currently able to lex most useful tokens, and parse
constructs associated with them on simple examples.

Unit tests are still missing to formally ensure everything's right.
2019-09-30 11:29:05 +02:00

284 lines
8 KiB
Python

# fx-92 Scientifique Collège+ language interpreter: Syntactic analysis
import re
from lexer import T, Token, ByteLexer
from ast import N, Node
#---
# LL(1) parser
#---
class UrlParser:
"""
fx-92 SC+ language parser with a wes.casio.com URL or hexadecimal input.
The URLs are typically in this form:
http://wes.casio.com/math/index.php?q=I-295A+U-000000000000+M-0E0000
0000+S-000410110000100E0010B300D365+E-{code...}
The program can also be provided in text hexadecimal form, which is
everything following the "+E-" in the URL.
This is your everyday LL(1) top-down parser. It implements the following
formal grammar:
program -> stmt*
stmt -> stmt2 EOL?
stmt2 -> FORWARD arg | ROTATE arg | ORIENT arg | GOTO arg arg |
PENDOWN | PENUP | SETVAR arg argvar | INPUT argvar |
MESSAGE arg | PRINT arg | STYLE style | WAIT arg |
REPEAT arg program REPEAT_END |
WHILE arg program WHILE_END |
IF arg program IF_END |
IFELSE arg program ELSE program IFELSE_END
arg -> expr PARAM
argvar -> var PARAM
expr -> factor | factor + expr | factor - expr
factor -> atom | atom * factor | atom / factor
atom -> const (var | "(" expr ")")* | (var | "(" expr ")")+
const -> (+|-)? CONST
var -> VAR
# Unused or TODO
style -> (TODO)
setvar -> SETM | SETA | SETB | SETC | SETD | SETE | SETF | SETX | SETY
"""
def __init__(self, url):
"""
Create a UrlParser from a wes.casio.com URL or hexadecimal code.
"""
if url.startswith("http://") \
or url.startswith("https://") \
or url.startswith("wes.casio.com"):
print("[urlparser] URL includes protocol, will start after '+E-'")
offset = url.find("+E-")
if offset < 0:
print("[urlparser] '+E-' not found, cannot decode URL")
raise Exception("Cannot decode URL")
url = url[offset+3:]
if not re.fullmatch(r'(?:[0-9a-fA-F]{2})+', url):
print("[urlparser] URL is not strict hexa, noise will be skipped")
# Create the lexer and initialize the lookahead byte
self.lexer = ByteLexer(bytes.fromhex(url))
self.la = None
def lex_program(self):
self.lexer.rewind()
while not self.lexer.at_end():
x = self.lexer.lex()
print(x)
def parse_program(self):
self.lexer.rewind()
self.la = None
self.advance()
return self.program()
#---
# LL parsing helpers
#---
def advance(self):
"""Return the next token and feed the lookahead."""
next = self.la
self.la = self.lexer.lex()
return next
def expect(self, *types, optional=False):
"""Read the next token, expecting a type specified in *types."""
if self.la.type in types:
return self.advance()
if optional:
return None
expected = [T(t).name for t in types]
got = T(self.la.type).name
err = f"Expected one of {expected}, got {got}"
print("[urlparser] " + err)
raise Exception("Syntax error: " + err)
#---
# LL parsing rules
#---
# program -> stmt*
def program(self):
stmts = []
while 1:
stmt = self.stmt(optional=True)
if stmt is None:
break
stmts.append(stmt)
return Node(N.PROGRAM, *stmts)
# stmt -> stmt2 EOL?
def stmt(self, optional=False):
st = self.stmt2(optional=optional)
self.expect(T.EOL, optional=True)
return st
# stmt -> (lots of cases)
def stmt2(self, optional=False):
valid = [
T.FORWARD, T.ROTATE, T.ORIENT, T.GOTO, T.PENDOWN, T.PENUP,
T.SETVAR, T.INPUT, T.MESSAGE, T.PRINT, T.STYLE, T.WAIT,
T.REPEAT, T.WHILE, T.IF, T.IFELSE,
]
op = self.expect(*valid, optional=optional)
if op is None:
return None
# Basic statements
if op.type == T.FORWARD:
return Node(N.FORWARD, self.arg())
if op.type == T.ROTATE:
return Node(N.ROTATE, self.arg())
if op.type == T.ORIENT:
return Node(N.ORIENT, self.arg())
if op.type == T.GOTO:
return Node(N.GOTO, self.arg(), self.arg())
if op.type == T.PENDOWN:
return Node(N.PENDOWN)
if op.type == T.PENUP:
return Node(N.PENUP)
if op.type == T.SETVAR:
return Node(N.ASSIGN, self.arg(), self.argvar())
if op.type == T.INPUT:
return Node(N.INPUT, self.argvar())
if op.type == T.MESSAGE:
return Node(N.MESSAGE, self.arg())
if op.type == T.PRINT:
return Node(N.PRINT, self.arg())
if op.type == T.STYLE:
return Node(N.STYLE, self.style())
if op.type == T.WAIT:
return Node(N.WAIT, self.arg())
# Flow control
if op.type == T.REPEAT:
arg = self.arg()
self.expect(T.EOL)
prg = self.program()
self.expect(T.REPEAT_END)
return Node(N.REPEAT, arg, prg)
if op.type == T.WHILE:
arg = self.arg()
self.expect(T.EOL)
prg = self.program()
self.expect(T.WHILE_END)
return Node(N.WHILE, arg, prg)
if op.type == T.IF:
arg = self.arg()
self.expect(T.EOL)
prg = self.program()
self.expect(T.IF_END)
return Node(N.IF, arg, prg, None)
if op.type == T.IFELSE:
arg = self.arg()
self.expect(T.EOL)
p1 = self.program()
self.expect(T.ELSE)
p2 = self.program()
self.expect(T.IFELSE_END)
return Node(N.IF, arg, p1, p2)
# arg -> expr PARAM
def arg(self):
e = self.expr()
self.expect(T.PARAM)
return e
# expr -> factor | factor + expr | factor - expr
def expr(self):
factor = self.factor()
t = self.expect(T.PLUS, T.MINUS, optional=True)
if t is None:
return factor
if t.type == T.PLUS:
return Node(N.ADD, factor, self.expr())
if t.type == T.MINUS:
return Node(N.SUB, factor, self.expr())
# factor -> atom | atom * factor | atom / factor
def factor(self):
atom = self.atom()
t = self.expect(T.STAR, T.SLASH, optional=True)
if t is None:
return atom
if t.type == T.STAR:
return Node(N.MUL, atom, self.factor())
if t.type == T.SLASH:
return Node(N.DIV, atom, self.factor())
# atom -> const (VAR | "(" expr ")")* | (VAR | "(" expr ")")+
def atom(self):
factors = []
lat = self.la.type
# Case of constants
if lat == T.PLUS or lat == T.MINUS or lat == T.CONST:
factors.append(self.const())
while 1:
lat = self.la.type
if lat == T.VAR:
factors.append(self.var())
elif lat == T.LPAR:
self.expect(T.LPAR)
factors.append(self.expr())
self.expect(T.RPAR)
else:
break
return Node(N.MUL, *factors)
# const -> (+|-)? CONST
def const(self):
t = self.expect(T.PLUS, T.MINUS, optional=True)
const = self.expect(T.CONST).args[0]
const = Node(N.CONST, const)
if t and t.type == T.MINUS:
const = Node(N.MINUS, const)
return const
# argvar -> var PARAM
def argvar(self):
n = self.var()
self.expect(T.PARAM)
return n
# var -> VAR
def var(self):
t = self.expect(T.VAR)
return Node(N.VAR, t.args[0])
# setvar -> SETM | SETA | ... | SETF | SETX | SETY
def setvar(self):
raise Exception("SetVar not supported yet x_x")
# style -> (TODO)
def style(self):
raise Exception("Style not supported yet x_x")