From 3d8dcdd989ffc3823596bda5997d367fd6729afa Mon Sep 17 00:00:00 2001 From: Lephe Date: Wed, 2 Oct 2019 07:18:29 +0200 Subject: [PATCH] add command-line options and a text lexer This change lays the ground for automated unit tests. It adds command-line options to select the input language format between URL (hexa text) and plain text, change a few output settings, and redirect graphical output to an image. A text lexer has also been added so that unit tests and new programs can be written in an English-like syntax instead of raw hexadecimal. --- ast.py | 2 + drawing.py | 14 ++-- fx92.py | 84 ++++++++++++++++--- lexer.py | 173 +++++++++++++++++++++++++++++++++++++++- parser.py | 71 +++++------------ printer.py | 5 +- tests/line-patterns.txt | 24 ++++++ 7 files changed, 302 insertions(+), 71 deletions(-) create mode 100644 tests/line-patterns.txt diff --git a/ast.py b/ast.py index 7c013a9..94fbaaf 100644 --- a/ast.py +++ b/ast.py @@ -94,6 +94,8 @@ class Node: return self.args[0] if self.type == N.ADD and self.constchildren(): return Node(N.CONST, sum(c.value for c in self.args)) + if self.type == N.ADD and arity == 2 and self.args[1].type == N.MINUS: + return Node(N.SUB, self.args[0], self.args[1].args[0]) if self.type == N.MINUS and self.constchildren(): return Node(N.CONST, -self.args[0].value) diff --git a/drawing.py b/drawing.py index b7e887a..69dd34e 100644 --- a/drawing.py +++ b/drawing.py @@ -10,10 +10,11 @@ class Window: BLACK = (0, 0, 0, 255) WHITE = (255, 255, 255, 255) - def __init__(self, width, height, scale): + def __init__(self, width, height, scale, quiet=False): self.width = width self.height = height self.scale = scale + self.quiet = quiet def __enter__(self): """ @@ -27,9 +28,10 @@ class Window: raise Exception("Failed to initialize SDL") # Create the window + mode = SDL_WINDOW_HIDDEN if self.quiet else SDL_WINDOW_SHOWN self.w = SDL_CreateWindow("fx-92 Scientifique Collège+".encode(), SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, - self.width*self.scale, self.height*self.scale, SDL_WINDOW_SHOWN) + self.width*self.scale, self.height*self.scale, mode) if self.w is None: raise Exception("Failed to create window") @@ -114,6 +116,9 @@ class Window: def wait(self): """Wait for the window to be closed.""" + if self.quiet: + return + event = SDL_Event() while 1: SDL_WaitEvent(event) @@ -121,11 +126,6 @@ class Window: if event.type == SDL_QUIT: break - if event.type == SDL_KEYDOWN: - key = event.key.keysym.sym - if key == SDLK_s: - print("Going to save the picture, probably") - def update(self): """Push window contents on-screen.""" diff --git a/fx92.py b/fx92.py index c482e72..73c514a 100755 --- a/fx92.py +++ b/fx92.py @@ -1,8 +1,10 @@ #! /usr/bin/python3 import sys +import getopt -from parser import UrlParser +from parser import Parser +from lexer import UrlLexer, TextLexer from printer import print_ast from drawing import Window from interpreter import Context @@ -12,7 +14,18 @@ from interpreter import Context #--- usage_string = f""" -usage: {sys.argv[0]} +usage: {sys.argv[0]} [-s|-u] [options...] + +If "-" is specified as input, stdin is read. + +Input mode (default is -s): + -s Input file is an ASCII script ("GOTO 12, 35") + -u Input file is a wes.casio.com URL ("https://...F908313200333500") + +Output options: + --quiet Do not show the SDL window + --save= Save a copy of the screen output in a bitmap file + --scale= Window scale up (default 4, max 16) """.strip() def usage(exitcode=None): @@ -21,25 +34,74 @@ def usage(exitcode=None): if exitcode is not None: sys.exit(exitcode) -def main(argv): - args = argv[1:] +def main(): + # Read command-line arguments + try: + opts, args = getopt.gnu_getopt(sys.argv[1:], "hus", + ["help", "quiet", "save=", "scale=", "debug="]) + opts = dict(opts) - if not args or "-h" in args or "--help" in args or "-?" in args: - usage(0) - if len(args) != 1: - usage(1) + if len(sys.argv) == 1 or "-h" in opts or "--help" in opts: + usage(0) - parser = UrlParser(args[0]) + if "-u" in opts and "-s" in opts: + raise getopt.GetoptError("-s (script input) and -u (URL input) " + "are exclusive") + + if len(args) < 1: + usage(1) + if len(args) > 1: + raise getopt.GetoptError("only one input file can be specified") + + except getopt.GetoptError as e: + print("error:", e, file=sys.stderr) + print(f"Try '{sys.argv[0]} --help' for details.", file=sys.stderr) + sys.exit(1) + + # Other parameters + quiet = "--quiet" in opts + out = opts.get("--save", None) + debug = opts.get("--debug", None) + + scale = int(opts.get("--scale", "4")) + if scale < 1: + scale = 1 + if scale > 16: + scale = 16 + + # Read the input program + file = args[0] + if file == "-": + program = sys.stdin.read() + else: + with open(file, "r") as fp: + program = fp.read() + + # URL mode + if "-u" in opts: + lexer = UrlLexer(program) + # Default, script mode + else: + lexer = TextLexer(program) + + # Lexer debug mode, just print the token stream + if debug == "lexer": + lexer.dump() + return 0 + + parser = Parser(lexer) ast = parser.parse_program() ast = ast.simplify() print_ast(ast, lang="ast") - with Window(width=96, height=32, scale=8) as w: + with Window(width=128, height=48, scale=scale, quiet=quiet) as w: ctx = Context(w) ctx.run(ast) w.wait() + return 0 + if __name__ == "__main__": - main(sys.argv) + sys.exit(main()) diff --git a/lexer.py b/lexer.py index 8473341..fce551a 100644 --- a/lexer.py +++ b/lexer.py @@ -90,10 +90,27 @@ class Token: return base + args #--- -# Lexer +# Lexer base #--- -class ByteLexer: +class LexerBase: + """ + Lexer base class. This class only provides common methods and cannot be + used to analyse a program. + """ + + def dump(self): + self.rewind() + + while not self.at_end(): + x = self.lex() + print(x) + +#--- +# Bitcode lexer +#--- + +class BitcodeLexer(LexerBase): """ fx-92 SC+ language lexer with bytes() bitcode input. """ @@ -198,3 +215,155 @@ class ByteLexer: """Check whether the whole input has been read.""" return self.pos >= len(self.hex) +#--- +# Url lexer +#--- + +class UrlLexer(BitcodeLexer): + """ + fx-92 SC+ language lexer with a wes.casio.com URL or hexadecimal input. + The URLs are typically in this form: + + http://wes.casio.com/math/index.php?q=I-295A+U-000000000000+M-0E0000 + 0000+S-000410110000100E0010B300D365+E-{code...} + + The program can also be provided in text hexadecimal form, which is + everything following the "+E-" in the URL. + """ + + def __init__(self, url): + if url.startswith("http://") \ + or url.startswith("https://") \ + or url.startswith("wes.casio.com"): + print("[urlparser] URL includes protocol, will start after '+E-'") + offset = url.find("+E-") + + if offset < 0: + print("[urlparser] '+E-' not found, cannot decode URL") + raise Exception("Cannot decode URL") + + url = url[offset+3:] + + if not re.fullmatch(r'(?:[0-9a-fA-F]{2})+', url): + print("[urlparser] URL is not strict hexa, noise will be skipped") + + super().__init__(bytes.fromhex(url)) + +#--- +# Plain text lexer +#--- + +class TextLexer(LexerBase): + """ + fx-92 SC+ language lexer with Basic-like input. + + This thing is very naive and extremely inefficient. + """ + + RE_STMTS = re.compile( + r"NOP|FORWARD|ROTATE|ORIENT|GOTO|PENDOWN|PENUP|SETVAR|INPUT|MESSAGE|" + r"PRINT|STYLE|WAIT|REPEAT_END|REPEAT|WHILE_END|WHILE|IF_END|ELSE|" + r"IFELSE_END|IFELSE|IF", + re.IGNORECASE) + + RE_CONST = re.compile( + r"([0-9]+(?:\.[0-9]+)?(?:[eE][0-9]+)?)(%?)") + + def __init__(self, code): + """Initialize the lexer with text code.""" + + self.base_code = code.replace(";", "\n") + self.rewind() + + def rewind(self): + """Restart lexing the same input.""" + + self.code = self.base_code + self.errors = 0 + self.pending_param = False + + def lex(self): + """Return the next token in the stream.""" + + c = self.code.lstrip(" \t") + + # Special case of newlines. If a non-statement has been identified and + # no comma has followed, emit a PARAM token manually. + if (not c or c[0] == "\n") and self.pending_param: + self.pending_param = False + self.code = c.lstrip("\n") + return Token(T.PARAM) + + c = self.code.lstrip(" \t\n") + + # End of file + if not c: + self.code = "" + return Token(T.END) + + # Statements + m = re.match(self.RE_STMTS, c) + if m is not None: + t = Token(getattr(T, m[0].upper())) + self.code = c[len(m[0]):] + return t + + # Relations + rels = [ ">=", "<=", "!=", ">", "<" ] + for r in rels: + if c.startswith(r): + self.code = c[len(r):] + self.pending_param = True + return Token(T.REL, r) + + # Punctuation + punct = { + ",": T.PARAM, + ":": T.COLON, + "?": T.QUEST, + "(": T.LPAR, + ")": T.RPAR, + "=": T.EQUAL, + "+": T.PLUS, + "-": T.MINUS, + "*": T.STAR, + "/": T.SLASH, + "!": T.BANG, + } + if c[0] in punct: + self.code = c[1:] + self.pending_param = (c[0] != ",") + return Token(punct[c[0]]) + + # Constants + m = re.match(self.RE_CONST, c) + if m is not None: + f = float(m[1]) + if m[2] == "%": + f /= 100 + + self.code = c[len(m[0]):] + self.pending_param = True + return Token(T.CONST, f) + + # Variables + if c[0] in "MABCDEFxXyY": + var = c[0].lower() if c[0] in "xXyY" else c[0] + self.code = c[1:] + self.pending_param = True + return Token(T.VAR, c[0]) + + # If nothing can be found, raise an exception + s = c.split(maxsplit=1) + err = s[0] + self.code = s[1] if len(s) > 1 else "" + + raise Exception(f"Lexical error near '{err}'") + + def at_end(self): + """Check whether the whole input has been read.""" + return not self.code and not self.pending_param + +# + +__all__ = ["T", "Token", "BitcodeLexer", "UrlLexer", "TextLexer"] diff --git a/parser.py b/parser.py index a4ed165..cbe1308 100644 --- a/parser.py +++ b/parser.py @@ -1,26 +1,17 @@ # fx-92 Scientifique Collège+ language interpreter: Syntactic analysis import re -from lexer import T, Token, ByteLexer +from lexer import T, Token, BitcodeLexer from ast import N, Node #--- # LL(1) parser #--- -class UrlParser: +class Parser: """ - fx-92 SC+ language parser with a wes.casio.com URL or hexadecimal input. - The URLs are typically in this form: - - http://wes.casio.com/math/index.php?q=I-295A+U-000000000000+M-0E0000 - 0000+S-000410110000100E0010B300D365+E-{code...} - - The program can also be provided in text hexadecimal form, which is - everything following the "+E-" in the URL. - - This is your everyday LL(1) top-down parser. It implements the following - formal grammar: + fx-92 SC+ language parser. This is your everyday LL(1) top-down parser. It + implements the following formal grammar: program -> stmt* stmt -> stmt2 EOL? @@ -45,37 +36,15 @@ class UrlParser: setvar -> SETM | SETA | SETB | SETC | SETD | SETE | SETF | SETX | SETY """ - def __init__(self, url): + def __init__(self, lexer): """ - Create a UrlParser from a wes.casio.com URL or hexadecimal code. + Create a UrlParser from a chosen lexer. """ - if url.startswith("http://") \ - or url.startswith("https://") \ - or url.startswith("wes.casio.com"): - print("[urlparser] URL includes protocol, will start after '+E-'") - offset = url.find("+E-") - - if offset < 0: - print("[urlparser] '+E-' not found, cannot decode URL") - raise Exception("Cannot decode URL") - - url = url[offset+3:] - - if not re.fullmatch(r'(?:[0-9a-fA-F]{2})+', url): - print("[urlparser] URL is not strict hexa, noise will be skipped") - # Create the lexer and initialize the lookahead byte - self.lexer = ByteLexer(bytes.fromhex(url)) + self.lexer = lexer self.la = None - def lex_program(self): - self.lexer.rewind() - - while not self.lexer.at_end(): - x = self.lexer.lex() - print(x) - def parse_program(self): self.lexer.rewind() self.la = None @@ -170,28 +139,28 @@ class UrlParser: if op.type == T.REPEAT: arg = self.arg() - self.expect(T.EOL) + self.expect(T.EOL, optional=True) prg = self.program() self.expect(T.REPEAT_END) return Node(N.REPEAT, arg, prg) if op.type == T.WHILE: arg = self.arg() - self.expect(T.EOL) + self.expect(T.EOL, optional=True) prg = self.program() self.expect(T.WHILE_END) return Node(N.WHILE, arg, prg) if op.type == T.IF: arg = self.arg() - self.expect(T.EOL) + self.expect(T.EOL, optional=True) prg = self.program() self.expect(T.IF_END) return Node(N.IF, arg, prg, None) if op.type == T.IFELSE: arg = self.arg() - self.expect(T.EOL) + self.expect(T.EOL, optional=True) p1 = self.program() self.expect(T.ELSE) p2 = self.program() @@ -206,15 +175,17 @@ class UrlParser: # expr -> factor | factor + expr | factor - expr def expr(self): - factor = self.factor() - t = self.expect(T.PLUS, T.MINUS, optional=True) + f = [self.factor()] - if t is None: - return factor - if t.type == T.PLUS: - return Node(N.ADD, factor, self.expr()) - if t.type == T.MINUS: - return Node(N.SUB, factor, self.expr()) + while 1: + t = self.expect(T.PLUS, T.MINUS, optional=True) + + if t is None: + return Node(N.ADD, *f) + if t.type == T.PLUS: + f.append(self.factor()) + if t.type == T.MINUS: + f.append(Node(N.MINUS, self.factor())) # factor -> atom | atom * factor | atom / factor def factor(self): diff --git a/printer.py b/printer.py index 0e01c42..e7c75bc 100644 --- a/printer.py +++ b/printer.py @@ -1,7 +1,6 @@ # fx-92 Scientifique Collège+ language interpreter: AST printer from ast import N, Node -__all__ = ["print_ast"] #--- # Message definitions @@ -73,3 +72,7 @@ def print_ast(n, lang="en", indent=0): for arg in n.args: print_ast(arg, lang=lang, indent=indent+2) + +# + +__all__ = ["print_ast"] diff --git a/tests/line-patterns.txt b/tests/line-patterns.txt new file mode 100644 index 0000000..1bdee82 --- /dev/null +++ b/tests/line-patterns.txt @@ -0,0 +1,24 @@ +goto -60, 12 ; pendown ; goto x+13, y+15 ; penup +goto -60, 9 ; pendown ; goto x+13, y+13 ; penup +goto -60, 6 ; pendown ; goto x+13, y+8 ; penup +goto -60, 3 ; pendown ; goto x+13, y+5 ; penup +goto -60, 0 ; pendown ; goto x+13, y ; penup + +goto -47, -8 ; pendown ; goto x-13, y+5 ; penup +goto -47, -14 ; pendown ; goto x-13, y+8 ; penup +goto -47, -22 ; pendown ; goto x-13, y+13 ; penup +goto -47, -27 ; pendown ; goto x-13, y+15 ; penup + +setvar 0, A + +repeat 6 + goto -30,16-7A + pendown + goto x+3A+3,y+5 + penup + goto x+A+3,y + pendown + goto x-3A-3,y-5 + penup + setvar A+1, A +repeat_end