284 lines
8.0 KiB
Python
284 lines
8.0 KiB
Python
# fx-92 Scientifique Collège+ language interpreter: Syntactic analysis
|
|
|
|
import re
|
|
from lexer import T, Token, ByteLexer
|
|
from ast import N, Node
|
|
|
|
#---
|
|
# LL(1) parser
|
|
#---
|
|
|
|
class UrlParser:
|
|
"""
|
|
fx-92 SC+ language parser with a wes.casio.com URL or hexadecimal input.
|
|
The URLs are typically in this form:
|
|
|
|
http://wes.casio.com/math/index.php?q=I-295A+U-000000000000+M-0E0000
|
|
0000+S-000410110000100E0010B300D365+E-{code...}
|
|
|
|
The program can also be provided in text hexadecimal form, which is
|
|
everything following the "+E-" in the URL.
|
|
|
|
This is your everyday LL(1) top-down parser. It implements the following
|
|
formal grammar:
|
|
|
|
program -> stmt*
|
|
stmt -> stmt2 EOL?
|
|
stmt2 -> FORWARD arg | ROTATE arg | ORIENT arg | GOTO arg arg |
|
|
PENDOWN | PENUP | SETVAR arg argvar | INPUT argvar |
|
|
MESSAGE arg | PRINT arg | STYLE style | WAIT arg |
|
|
REPEAT arg program REPEAT_END |
|
|
WHILE arg program WHILE_END |
|
|
IF arg program IF_END |
|
|
IFELSE arg program ELSE program IFELSE_END
|
|
arg -> expr PARAM
|
|
argvar -> var PARAM
|
|
|
|
expr -> factor | factor + expr | factor - expr
|
|
factor -> atom | atom * factor | atom / factor
|
|
atom -> const (var | "(" expr ")")* | (var | "(" expr ")")+
|
|
const -> (+|-)? CONST
|
|
var -> VAR
|
|
|
|
# Unused or TODO
|
|
style -> (TODO)
|
|
setvar -> SETM | SETA | SETB | SETC | SETD | SETE | SETF | SETX | SETY
|
|
"""
|
|
|
|
def __init__(self, url):
|
|
"""
|
|
Create a UrlParser from a wes.casio.com URL or hexadecimal code.
|
|
"""
|
|
|
|
if url.startswith("http://") \
|
|
or url.startswith("https://") \
|
|
or url.startswith("wes.casio.com"):
|
|
print("[urlparser] URL includes protocol, will start after '+E-'")
|
|
offset = url.find("+E-")
|
|
|
|
if offset < 0:
|
|
print("[urlparser] '+E-' not found, cannot decode URL")
|
|
raise Exception("Cannot decode URL")
|
|
|
|
url = url[offset+3:]
|
|
|
|
if not re.fullmatch(r'(?:[0-9a-fA-F]{2})+', url):
|
|
print("[urlparser] URL is not strict hexa, noise will be skipped")
|
|
|
|
# Create the lexer and initialize the lookahead byte
|
|
self.lexer = ByteLexer(bytes.fromhex(url))
|
|
self.la = None
|
|
|
|
def lex_program(self):
|
|
self.lexer.rewind()
|
|
|
|
while not self.lexer.at_end():
|
|
x = self.lexer.lex()
|
|
print(x)
|
|
|
|
def parse_program(self):
|
|
self.lexer.rewind()
|
|
self.la = None
|
|
self.advance()
|
|
return self.program()
|
|
|
|
#---
|
|
# LL parsing helpers
|
|
#---
|
|
|
|
def advance(self):
|
|
"""Return the next token and feed the lookahead."""
|
|
next = self.la
|
|
self.la = self.lexer.lex()
|
|
return next
|
|
|
|
def expect(self, *types, optional=False):
|
|
"""Read the next token, expecting a type specified in *types."""
|
|
|
|
if self.la.type in types:
|
|
return self.advance()
|
|
if optional:
|
|
return None
|
|
|
|
expected = [T(t).name for t in types]
|
|
got = T(self.la.type).name
|
|
err = f"Expected one of {expected}, got {got}"
|
|
print("[urlparser] " + err)
|
|
raise Exception("Syntax error: " + err)
|
|
|
|
#---
|
|
# LL parsing rules
|
|
#---
|
|
|
|
# program -> stmt*
|
|
def program(self):
|
|
stmts = []
|
|
|
|
while 1:
|
|
stmt = self.stmt(optional=True)
|
|
if stmt is None:
|
|
break
|
|
stmts.append(stmt)
|
|
|
|
return Node(N.PROGRAM, *stmts)
|
|
|
|
# stmt -> stmt2 EOL?
|
|
def stmt(self, optional=False):
|
|
st = self.stmt2(optional=optional)
|
|
self.expect(T.EOL, optional=True)
|
|
return st
|
|
|
|
# stmt -> (lots of cases)
|
|
def stmt2(self, optional=False):
|
|
valid = [
|
|
T.FORWARD, T.ROTATE, T.ORIENT, T.GOTO, T.PENDOWN, T.PENUP,
|
|
T.SETVAR, T.INPUT, T.MESSAGE, T.PRINT, T.STYLE, T.WAIT,
|
|
T.REPEAT, T.WHILE, T.IF, T.IFELSE,
|
|
]
|
|
op = self.expect(*valid, optional=optional)
|
|
|
|
if op is None:
|
|
return None
|
|
|
|
# Basic statements
|
|
if op.type == T.FORWARD:
|
|
return Node(N.FORWARD, self.arg())
|
|
if op.type == T.ROTATE:
|
|
return Node(N.ROTATE, self.arg())
|
|
if op.type == T.ORIENT:
|
|
return Node(N.ORIENT, self.arg())
|
|
if op.type == T.GOTO:
|
|
return Node(N.GOTO, self.arg(), self.arg())
|
|
if op.type == T.PENDOWN:
|
|
return Node(N.PENDOWN)
|
|
if op.type == T.PENUP:
|
|
return Node(N.PENUP)
|
|
if op.type == T.SETVAR:
|
|
return Node(N.ASSIGN, self.arg(), self.argvar())
|
|
if op.type == T.INPUT:
|
|
return Node(N.INPUT, self.argvar())
|
|
if op.type == T.MESSAGE:
|
|
return Node(N.MESSAGE, self.arg())
|
|
if op.type == T.PRINT:
|
|
return Node(N.PRINT, self.arg())
|
|
if op.type == T.STYLE:
|
|
return Node(N.STYLE, self.style())
|
|
if op.type == T.WAIT:
|
|
return Node(N.WAIT, self.arg())
|
|
|
|
# Flow control
|
|
|
|
if op.type == T.REPEAT:
|
|
arg = self.arg()
|
|
self.expect(T.EOL)
|
|
prg = self.program()
|
|
self.expect(T.REPEAT_END)
|
|
return Node(N.REPEAT, arg, prg)
|
|
|
|
if op.type == T.WHILE:
|
|
arg = self.arg()
|
|
self.expect(T.EOL)
|
|
prg = self.program()
|
|
self.expect(T.WHILE_END)
|
|
return Node(N.WHILE, arg, prg)
|
|
|
|
if op.type == T.IF:
|
|
arg = self.arg()
|
|
self.expect(T.EOL)
|
|
prg = self.program()
|
|
self.expect(T.IF_END)
|
|
return Node(N.IF, arg, prg, None)
|
|
|
|
if op.type == T.IFELSE:
|
|
arg = self.arg()
|
|
self.expect(T.EOL)
|
|
p1 = self.program()
|
|
self.expect(T.ELSE)
|
|
p2 = self.program()
|
|
self.expect(T.IFELSE_END)
|
|
return Node(N.IF, arg, p1, p2)
|
|
|
|
# arg -> expr PARAM
|
|
def arg(self):
|
|
e = self.expr()
|
|
self.expect(T.PARAM)
|
|
return e
|
|
|
|
# expr -> factor | factor + expr | factor - expr
|
|
def expr(self):
|
|
factor = self.factor()
|
|
t = self.expect(T.PLUS, T.MINUS, optional=True)
|
|
|
|
if t is None:
|
|
return factor
|
|
if t.type == T.PLUS:
|
|
return Node(N.ADD, factor, self.expr())
|
|
if t.type == T.MINUS:
|
|
return Node(N.SUB, factor, self.expr())
|
|
|
|
# factor -> atom | atom * factor | atom / factor
|
|
def factor(self):
|
|
atom = self.atom()
|
|
t = self.expect(T.STAR, T.SLASH, optional=True)
|
|
|
|
if t is None:
|
|
return atom
|
|
if t.type == T.STAR:
|
|
return Node(N.MUL, atom, self.factor())
|
|
if t.type == T.SLASH:
|
|
return Node(N.DIV, atom, self.factor())
|
|
|
|
# atom -> const (VAR | "(" expr ")")* | (VAR | "(" expr ")")+
|
|
def atom(self):
|
|
factors = []
|
|
lat = self.la.type
|
|
|
|
# Case of constants
|
|
if lat == T.PLUS or lat == T.MINUS or lat == T.CONST:
|
|
factors.append(self.const())
|
|
|
|
while 1:
|
|
lat = self.la.type
|
|
|
|
if lat == T.VAR:
|
|
factors.append(self.var())
|
|
elif lat == T.LPAR:
|
|
self.expect(T.LPAR)
|
|
factors.append(self.expr())
|
|
self.expect(T.RPAR)
|
|
else:
|
|
break
|
|
|
|
return Node(N.MUL, *factors)
|
|
|
|
# const -> (+|-)? CONST
|
|
def const(self):
|
|
t = self.expect(T.PLUS, T.MINUS, optional=True)
|
|
const = self.expect(T.CONST).args[0]
|
|
|
|
const = Node(N.CONST, const)
|
|
if t and t.type == T.MINUS:
|
|
const = Node(N.MINUS, const)
|
|
|
|
return const
|
|
|
|
# argvar -> var PARAM
|
|
def argvar(self):
|
|
n = self.var()
|
|
self.expect(T.PARAM)
|
|
return n.args[0]
|
|
|
|
# var -> VAR
|
|
def var(self):
|
|
t = self.expect(T.VAR)
|
|
return Node(N.VAR, t.args[0])
|
|
|
|
# setvar -> SETM | SETA | ... | SETF | SETX | SETY
|
|
def setvar(self):
|
|
raise Exception("SetVar not supported yet x_x")
|
|
|
|
# style -> (TODO)
|
|
def style(self):
|
|
raise Exception("Style not supported yet x_x")
|
|
|