Lephe
6151144d0a
The program is currently able to lex most useful tokens, and parse constructs associated with them on simple examples. Unit tests are still missing to formally ensure everything's right.
284 lines
8 KiB
Python
284 lines
8 KiB
Python
# fx-92 Scientifique Collège+ language interpreter: Syntactic analysis
|
|
|
|
import re
|
|
from lexer import T, Token, ByteLexer
|
|
from ast import N, Node
|
|
|
|
#---
|
|
# LL(1) parser
|
|
#---
|
|
|
|
class UrlParser:
|
|
"""
|
|
fx-92 SC+ language parser with a wes.casio.com URL or hexadecimal input.
|
|
The URLs are typically in this form:
|
|
|
|
http://wes.casio.com/math/index.php?q=I-295A+U-000000000000+M-0E0000
|
|
0000+S-000410110000100E0010B300D365+E-{code...}
|
|
|
|
The program can also be provided in text hexadecimal form, which is
|
|
everything following the "+E-" in the URL.
|
|
|
|
This is your everyday LL(1) top-down parser. It implements the following
|
|
formal grammar:
|
|
|
|
program -> stmt*
|
|
stmt -> stmt2 EOL?
|
|
stmt2 -> FORWARD arg | ROTATE arg | ORIENT arg | GOTO arg arg |
|
|
PENDOWN | PENUP | SETVAR arg argvar | INPUT argvar |
|
|
MESSAGE arg | PRINT arg | STYLE style | WAIT arg |
|
|
REPEAT arg program REPEAT_END |
|
|
WHILE arg program WHILE_END |
|
|
IF arg program IF_END |
|
|
IFELSE arg program ELSE program IFELSE_END
|
|
arg -> expr PARAM
|
|
argvar -> var PARAM
|
|
|
|
expr -> factor | factor + expr | factor - expr
|
|
factor -> atom | atom * factor | atom / factor
|
|
atom -> const (var | "(" expr ")")* | (var | "(" expr ")")+
|
|
const -> (+|-)? CONST
|
|
var -> VAR
|
|
|
|
# Unused or TODO
|
|
style -> (TODO)
|
|
setvar -> SETM | SETA | SETB | SETC | SETD | SETE | SETF | SETX | SETY
|
|
"""
|
|
|
|
def __init__(self, url):
|
|
"""
|
|
Create a UrlParser from a wes.casio.com URL or hexadecimal code.
|
|
"""
|
|
|
|
if url.startswith("http://") \
|
|
or url.startswith("https://") \
|
|
or url.startswith("wes.casio.com"):
|
|
print("[urlparser] URL includes protocol, will start after '+E-'")
|
|
offset = url.find("+E-")
|
|
|
|
if offset < 0:
|
|
print("[urlparser] '+E-' not found, cannot decode URL")
|
|
raise Exception("Cannot decode URL")
|
|
|
|
url = url[offset+3:]
|
|
|
|
if not re.fullmatch(r'(?:[0-9a-fA-F]{2})+', url):
|
|
print("[urlparser] URL is not strict hexa, noise will be skipped")
|
|
|
|
# Create the lexer and initialize the lookahead byte
|
|
self.lexer = ByteLexer(bytes.fromhex(url))
|
|
self.la = None
|
|
|
|
def lex_program(self):
|
|
self.lexer.rewind()
|
|
|
|
while not self.lexer.at_end():
|
|
x = self.lexer.lex()
|
|
print(x)
|
|
|
|
def parse_program(self):
|
|
self.lexer.rewind()
|
|
self.la = None
|
|
self.advance()
|
|
return self.program()
|
|
|
|
#---
|
|
# LL parsing helpers
|
|
#---
|
|
|
|
def advance(self):
|
|
"""Return the next token and feed the lookahead."""
|
|
next = self.la
|
|
self.la = self.lexer.lex()
|
|
return next
|
|
|
|
def expect(self, *types, optional=False):
|
|
"""Read the next token, expecting a type specified in *types."""
|
|
|
|
if self.la.type in types:
|
|
return self.advance()
|
|
if optional:
|
|
return None
|
|
|
|
expected = [T(t).name for t in types]
|
|
got = T(self.la.type).name
|
|
err = f"Expected one of {expected}, got {got}"
|
|
print("[urlparser] " + err)
|
|
raise Exception("Syntax error: " + err)
|
|
|
|
#---
|
|
# LL parsing rules
|
|
#---
|
|
|
|
# program -> stmt*
|
|
def program(self):
|
|
stmts = []
|
|
|
|
while 1:
|
|
stmt = self.stmt(optional=True)
|
|
if stmt is None:
|
|
break
|
|
stmts.append(stmt)
|
|
|
|
return Node(N.PROGRAM, *stmts)
|
|
|
|
# stmt -> stmt2 EOL?
|
|
def stmt(self, optional=False):
|
|
st = self.stmt2(optional=optional)
|
|
self.expect(T.EOL, optional=True)
|
|
return st
|
|
|
|
# stmt -> (lots of cases)
|
|
def stmt2(self, optional=False):
|
|
valid = [
|
|
T.FORWARD, T.ROTATE, T.ORIENT, T.GOTO, T.PENDOWN, T.PENUP,
|
|
T.SETVAR, T.INPUT, T.MESSAGE, T.PRINT, T.STYLE, T.WAIT,
|
|
T.REPEAT, T.WHILE, T.IF, T.IFELSE,
|
|
]
|
|
op = self.expect(*valid, optional=optional)
|
|
|
|
if op is None:
|
|
return None
|
|
|
|
# Basic statements
|
|
if op.type == T.FORWARD:
|
|
return Node(N.FORWARD, self.arg())
|
|
if op.type == T.ROTATE:
|
|
return Node(N.ROTATE, self.arg())
|
|
if op.type == T.ORIENT:
|
|
return Node(N.ORIENT, self.arg())
|
|
if op.type == T.GOTO:
|
|
return Node(N.GOTO, self.arg(), self.arg())
|
|
if op.type == T.PENDOWN:
|
|
return Node(N.PENDOWN)
|
|
if op.type == T.PENUP:
|
|
return Node(N.PENUP)
|
|
if op.type == T.SETVAR:
|
|
return Node(N.ASSIGN, self.arg(), self.argvar())
|
|
if op.type == T.INPUT:
|
|
return Node(N.INPUT, self.argvar())
|
|
if op.type == T.MESSAGE:
|
|
return Node(N.MESSAGE, self.arg())
|
|
if op.type == T.PRINT:
|
|
return Node(N.PRINT, self.arg())
|
|
if op.type == T.STYLE:
|
|
return Node(N.STYLE, self.style())
|
|
if op.type == T.WAIT:
|
|
return Node(N.WAIT, self.arg())
|
|
|
|
# Flow control
|
|
|
|
if op.type == T.REPEAT:
|
|
arg = self.arg()
|
|
self.expect(T.EOL)
|
|
prg = self.program()
|
|
self.expect(T.REPEAT_END)
|
|
return Node(N.REPEAT, arg, prg)
|
|
|
|
if op.type == T.WHILE:
|
|
arg = self.arg()
|
|
self.expect(T.EOL)
|
|
prg = self.program()
|
|
self.expect(T.WHILE_END)
|
|
return Node(N.WHILE, arg, prg)
|
|
|
|
if op.type == T.IF:
|
|
arg = self.arg()
|
|
self.expect(T.EOL)
|
|
prg = self.program()
|
|
self.expect(T.IF_END)
|
|
return Node(N.IF, arg, prg, None)
|
|
|
|
if op.type == T.IFELSE:
|
|
arg = self.arg()
|
|
self.expect(T.EOL)
|
|
p1 = self.program()
|
|
self.expect(T.ELSE)
|
|
p2 = self.program()
|
|
self.expect(T.IFELSE_END)
|
|
return Node(N.IF, arg, p1, p2)
|
|
|
|
# arg -> expr PARAM
|
|
def arg(self):
|
|
e = self.expr()
|
|
self.expect(T.PARAM)
|
|
return e
|
|
|
|
# expr -> factor | factor + expr | factor - expr
|
|
def expr(self):
|
|
factor = self.factor()
|
|
t = self.expect(T.PLUS, T.MINUS, optional=True)
|
|
|
|
if t is None:
|
|
return factor
|
|
if t.type == T.PLUS:
|
|
return Node(N.ADD, factor, self.expr())
|
|
if t.type == T.MINUS:
|
|
return Node(N.SUB, factor, self.expr())
|
|
|
|
# factor -> atom | atom * factor | atom / factor
|
|
def factor(self):
|
|
atom = self.atom()
|
|
t = self.expect(T.STAR, T.SLASH, optional=True)
|
|
|
|
if t is None:
|
|
return atom
|
|
if t.type == T.STAR:
|
|
return Node(N.MUL, atom, self.factor())
|
|
if t.type == T.SLASH:
|
|
return Node(N.DIV, atom, self.factor())
|
|
|
|
# atom -> const (VAR | "(" expr ")")* | (VAR | "(" expr ")")+
|
|
def atom(self):
|
|
factors = []
|
|
lat = self.la.type
|
|
|
|
# Case of constants
|
|
if lat == T.PLUS or lat == T.MINUS or lat == T.CONST:
|
|
factors.append(self.const())
|
|
|
|
while 1:
|
|
lat = self.la.type
|
|
|
|
if lat == T.VAR:
|
|
factors.append(self.var())
|
|
elif lat == T.LPAR:
|
|
self.expect(T.LPAR)
|
|
factors.append(self.expr())
|
|
self.expect(T.RPAR)
|
|
else:
|
|
break
|
|
|
|
return Node(N.MUL, *factors)
|
|
|
|
# const -> (+|-)? CONST
|
|
def const(self):
|
|
t = self.expect(T.PLUS, T.MINUS, optional=True)
|
|
const = self.expect(T.CONST).args[0]
|
|
|
|
const = Node(N.CONST, const)
|
|
if t and t.type == T.MINUS:
|
|
const = Node(N.MINUS, const)
|
|
|
|
return const
|
|
|
|
# argvar -> var PARAM
|
|
def argvar(self):
|
|
n = self.var()
|
|
self.expect(T.PARAM)
|
|
return n
|
|
|
|
# var -> VAR
|
|
def var(self):
|
|
t = self.expect(T.VAR)
|
|
return Node(N.VAR, t.args[0])
|
|
|
|
# setvar -> SETM | SETA | ... | SETF | SETX | SETY
|
|
def setvar(self):
|
|
raise Exception("SetVar not supported yet x_x")
|
|
|
|
# style -> (TODO)
|
|
def style(self):
|
|
raise Exception("Style not supported yet x_x")
|
|
|