add command-line options and a text lexer

This change lays the ground for automated unit tests. It adds command-line options to select the input language format between URL (hexa text) and plain text, change a few output settings, and redirect graphical output to an image. A text lexer has also been added so that unit tests and new programs can be written in an English-like syntax instead of raw hexadecimal.
2019-10-02 07:18:29 +02:00 · 2019-10-02 07:18:29 +02:00 · 3d8dcdd989
parent 0189d7f372
commit 3d8dcdd989
7 changed files with 302 additions and 71 deletions
--- a/ast.py
+++ b/ast.py
@ -94,6 +94,8 @@ class Node:
            return self.args[0]
        if self.type == N.ADD and self.constchildren():
            return Node(N.CONST, sum(c.value for c in self.args))
+        if self.type == N.ADD and arity == 2 and self.args[1].type == N.MINUS:
+            return Node(N.SUB, self.args[0], self.args[1].args[0])

        if self.type == N.MINUS and self.constchildren():
            return Node(N.CONST, -self.args[0].value)
--- a/drawing.py
+++ b/drawing.py
@ -10,10 +10,11 @@ class Window:
    BLACK = (0, 0, 0, 255)
    WHITE = (255, 255, 255, 255)

-    def __init__(self, width, height, scale):
+    def __init__(self, width, height, scale, quiet=False):
        self.width = width
        self.height = height
        self.scale = scale
+        self.quiet = quiet

    def __enter__(self):
        """
@ -27,9 +28,10 @@ class Window:
            raise Exception("Failed to initialize SDL")

        # Create the window
+        mode = SDL_WINDOW_HIDDEN if self.quiet else SDL_WINDOW_SHOWN
        self.w = SDL_CreateWindow("fx-92 Scientifique Collège+".encode(),
            SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED,
-            self.width*self.scale, self.height*self.scale, SDL_WINDOW_SHOWN)
+            self.width*self.scale, self.height*self.scale, mode)
        if self.w is None:
            raise Exception("Failed to create window")

@ -114,6 +116,9 @@ class Window:
    def wait(self):
        """Wait for the window to be closed."""

+        if self.quiet:
+            return
+
        event = SDL_Event()
        while 1:
            SDL_WaitEvent(event)
@ -121,11 +126,6 @@ class Window:
            if event.type == SDL_QUIT:
                break

-            if event.type == SDL_KEYDOWN:
-                key = event.key.keysym.sym
-                if key == SDLK_s:
-                    print("Going to save the picture, probably")
-
    def update(self):
        """Push window contents on-screen."""

--- a/fx92.py
+++ b/fx92.py
@ -1,8 +1,10 @@
 #! /usr/bin/python3

 import sys
+import getopt

-from parser import UrlParser
+from parser import Parser
+from lexer import UrlLexer, TextLexer
 from printer import print_ast
 from drawing import Window
 from interpreter import Context
@ -12,7 +14,18 @@ from interpreter import Context
 #---

 usage_string = f"""
-usage: {sys.argv[0]} <wes.casio.com URL>
+usage: {sys.argv[0]} [-s|-u] <file> [options...]
+
+If "-" is specified as input, stdin is read.
+
+Input mode (default is -s):
+  -s  Input file is an ASCII script ("GOTO 12, 35")
+  -u  Input file is a wes.casio.com URL ("https://...F908313200333500")
+
+Output options:
+  --quiet        Do not show the SDL window
+  --save=<file>  Save a copy of the screen output in a bitmap file
+  --scale=<n>    Window scale up (default 4, max 16)
 """.strip()

 def usage(exitcode=None):
@ -21,25 +34,74 @@ def usage(exitcode=None):
    if exitcode is not None:
        sys.exit(exitcode)

-def main(argv):
-    args = argv[1:]
+def main():
+    # Read command-line arguments
+    try:
+        opts, args = getopt.gnu_getopt(sys.argv[1:], "hus",
+            ["help", "quiet", "save=", "scale=", "debug="])
+        opts = dict(opts)

-    if not args or "-h" in args or "--help" in args or "-?" in args:
-        usage(0)
-    if len(args) != 1:
-        usage(1)
+        if len(sys.argv) == 1 or "-h" in opts or "--help" in opts:
+            usage(0)

-    parser = UrlParser(args[0])
+        if "-u" in opts and "-s" in opts:
+            raise getopt.GetoptError("-s (script input) and -u (URL input) "
+                "are exclusive")
+
+        if len(args) < 1:
+            usage(1)
+        if len(args) > 1:
+            raise getopt.GetoptError("only one input file can be specified")
+
+    except getopt.GetoptError as e:
+        print("error:", e, file=sys.stderr)
+        print(f"Try '{sys.argv[0]} --help' for details.", file=sys.stderr)
+        sys.exit(1)
+
+    # Other parameters
+    quiet = "--quiet" in opts
+    out = opts.get("--save", None)
+    debug = opts.get("--debug", None)
+
+    scale = int(opts.get("--scale", "4"))
+    if scale < 1:
+        scale = 1
+    if scale > 16:
+        scale = 16
+
+    # Read the input program
+    file = args[0]
+    if file == "-":
+        program = sys.stdin.read()
+    else:
+        with open(file, "r") as fp:
+            program = fp.read()
+
+    # URL mode
+    if "-u" in opts:
+        lexer = UrlLexer(program)
+    # Default, script mode
+    else:
+        lexer = TextLexer(program)
+
+    # Lexer debug mode, just print the token stream
+    if debug == "lexer":
+        lexer.dump()
+        return 0
+
+    parser = Parser(lexer)
    ast = parser.parse_program()
    ast = ast.simplify()

    print_ast(ast, lang="ast")

-    with Window(width=96, height=32, scale=8) as w:
+    with Window(width=128, height=48, scale=scale, quiet=quiet) as w:
        ctx = Context(w)
        ctx.run(ast)

        w.wait()

+    return 0
+
 if __name__ == "__main__":
-    main(sys.argv)
+    sys.exit(main())
--- a/lexer.py
+++ b/lexer.py
@ -90,10 +90,27 @@ class Token:
        return base + args

 #---
-# Lexer
+# Lexer base
 #---

-class ByteLexer:
+class LexerBase:
+    """
+    Lexer base class. This class only provides common methods and cannot be
+    used to analyse a program.
+    """
+
+    def dump(self):
+        self.rewind()
+
+        while not self.at_end():
+            x = self.lex()
+            print(x)
+
+#---
+# Bitcode lexer
+#---
+
+class BitcodeLexer(LexerBase):
    """
    fx-92 SC+ language lexer with bytes() bitcode input.
    """
@ -198,3 +215,155 @@ class ByteLexer:
        """Check whether the whole input has been read."""
        return self.pos >= len(self.hex)

+#---
+# Url lexer
+#---
+
+class UrlLexer(BitcodeLexer):
+    """
+    fx-92 SC+ language lexer with a wes.casio.com URL or hexadecimal input.
+    The URLs are typically in this form:
+
+        http://wes.casio.com/math/index.php?q=I-295A+U-000000000000+M-0E0000
+        0000+S-000410110000100E0010B300D365+E-{code...}
+
+    The program can also be provided in text hexadecimal form, which is
+    everything following the "+E-" in the URL.
+    """
+
+    def __init__(self, url):
+        if url.startswith("http://") \
+        or url.startswith("https://") \
+        or url.startswith("wes.casio.com"):
+            print("[urlparser] URL includes protocol, will start after '+E-'")
+            offset = url.find("+E-")
+
+            if offset < 0:
+                print("[urlparser] '+E-' not found, cannot decode URL")
+                raise Exception("Cannot decode URL")
+
+            url = url[offset+3:]
+
+        if not re.fullmatch(r'(?:[0-9a-fA-F]{2})+', url):
+            print("[urlparser] URL is not strict hexa, noise will be skipped")
+
+        super().__init__(bytes.fromhex(url))
+
+#---
+# Plain text lexer
+#---
+
+class TextLexer(LexerBase):
+    """
+    fx-92 SC+ language lexer with Basic-like input.
+
+    This thing is very naive and extremely inefficient.
+    """
+
+    RE_STMTS = re.compile(
+        r"NOP|FORWARD|ROTATE|ORIENT|GOTO|PENDOWN|PENUP|SETVAR|INPUT|MESSAGE|"
+        r"PRINT|STYLE|WAIT|REPEAT_END|REPEAT|WHILE_END|WHILE|IF_END|ELSE|"
+        r"IFELSE_END|IFELSE|IF",
+        re.IGNORECASE)
+
+    RE_CONST = re.compile(
+        r"([0-9]+(?:\.[0-9]+)?(?:[eE][0-9]+)?)(%?)")
+
+    def __init__(self, code):
+        """Initialize the lexer with text code."""
+
+        self.base_code = code.replace(";", "\n")
+        self.rewind()
+
+    def rewind(self):
+        """Restart lexing the same input."""
+
+        self.code = self.base_code
+        self.errors = 0
+        self.pending_param = False
+
+    def lex(self):
+        """Return the next token in the stream."""
+
+        c = self.code.lstrip(" \t")
+
+        # Special case of newlines. If a non-statement has been identified and
+        # no comma has followed, emit a PARAM token manually.
+        if (not c or c[0] == "\n") and self.pending_param:
+            self.pending_param = False
+            self.code = c.lstrip("\n")
+            return Token(T.PARAM)
+
+        c = self.code.lstrip(" \t\n")
+
+        # End of file
+        if not c:
+            self.code = ""
+            return Token(T.END)
+
+        # Statements
+        m = re.match(self.RE_STMTS, c)
+        if m is not None:
+            t = Token(getattr(T, m[0].upper()))
+            self.code = c[len(m[0]):]
+            return t
+
+        # Relations
+        rels = [ ">=", "<=", "!=", ">", "<" ]
+        for r in rels:
+            if c.startswith(r):
+                self.code = c[len(r):]
+                self.pending_param = True
+                return Token(T.REL, r)
+
+        # Punctuation
+        punct = {
+            ",": T.PARAM,
+            ":": T.COLON,
+            "?": T.QUEST,
+            "(": T.LPAR,
+            ")": T.RPAR,
+            "=": T.EQUAL,
+            "+": T.PLUS,
+            "-": T.MINUS,
+            "*": T.STAR,
+            "/": T.SLASH,
+            "!": T.BANG,
+        }
+        if c[0] in punct:
+            self.code = c[1:]
+            self.pending_param = (c[0] != ",")
+            return Token(punct[c[0]])
+
+        # Constants
+        m = re.match(self.RE_CONST, c)
+        if m is not None:
+            f = float(m[1])
+            if m[2] == "%":
+                f /= 100
+
+            self.code = c[len(m[0]):]
+            self.pending_param = True
+            return Token(T.CONST, f)
+
+        # Variables
+        if c[0] in "MABCDEFxXyY":
+            var = c[0].lower() if c[0] in "xXyY" else c[0]
+            self.code = c[1:]
+            self.pending_param = True
+            return Token(T.VAR, c[0])
+
+        # If nothing can be found, raise an exception
+        s = c.split(maxsplit=1)
+        err = s[0]
+        self.code = s[1] if len(s) > 1 else ""
+
+        raise Exception(f"Lexical error near '{err}'")
+
+    def at_end(self):
+        """Check whether the whole input has been read."""
+        return not self.code and not self.pending_param
+
+#
+
+__all__ = ["T", "Token", "BitcodeLexer", "UrlLexer", "TextLexer"]
--- a/parser.py
+++ b/parser.py
@ -1,26 +1,17 @@
 # fx-92 Scientifique Collège+ language interpreter: Syntactic analysis

 import re
-from lexer import T, Token, ByteLexer
+from lexer import T, Token, BitcodeLexer
 from ast import N, Node

 #---
 # LL(1) parser
 #---

-class UrlParser:
+class Parser:
    """
-    fx-92 SC+ language parser with a wes.casio.com URL or hexadecimal input.
-    The URLs are typically in this form:
-
-        http://wes.casio.com/math/index.php?q=I-295A+U-000000000000+M-0E0000
-        0000+S-000410110000100E0010B300D365+E-{code...}
-
-    The program can also be provided in text hexadecimal form, which is
-    everything following the "+E-" in the URL.
-
-    This is your everyday LL(1) top-down parser. It implements the following
-    formal grammar:
+    fx-92 SC+ language parser. This is your everyday LL(1) top-down parser. It
+    implements the following formal grammar:

    program -> stmt*
    stmt    -> stmt2 EOL?
@ -45,37 +36,15 @@ class UrlParser:
    setvar  -> SETM | SETA | SETB | SETC | SETD | SETE | SETF | SETX | SETY
    """

-    def __init__(self, url):
+    def __init__(self, lexer):
        """
-        Create a UrlParser from a wes.casio.com URL or hexadecimal code.
+        Create a UrlParser from a chosen lexer.
        """

-        if url.startswith("http://") \
-        or url.startswith("https://") \
-        or url.startswith("wes.casio.com"):
-            print("[urlparser] URL includes protocol, will start after '+E-'")
-            offset = url.find("+E-")
-
-            if offset < 0:
-                print("[urlparser] '+E-' not found, cannot decode URL")
-                raise Exception("Cannot decode URL")
-
-            url = url[offset+3:]
-
-        if not re.fullmatch(r'(?:[0-9a-fA-F]{2})+', url):
-            print("[urlparser] URL is not strict hexa, noise will be skipped")
-
        # Create the lexer and initialize the lookahead byte
-        self.lexer = ByteLexer(bytes.fromhex(url))
+        self.lexer = lexer
        self.la = None

-    def lex_program(self):
-        self.lexer.rewind()
-
-        while not self.lexer.at_end():
-            x = self.lexer.lex()
-            print(x)
-
    def parse_program(self):
        self.lexer.rewind()
        self.la = None
@ -170,28 +139,28 @@ class UrlParser:

        if op.type == T.REPEAT:
            arg = self.arg()
-            self.expect(T.EOL)
+            self.expect(T.EOL, optional=True)
            prg = self.program()
            self.expect(T.REPEAT_END)
            return Node(N.REPEAT, arg, prg)

        if op.type == T.WHILE:
            arg = self.arg()
-            self.expect(T.EOL)
+            self.expect(T.EOL, optional=True)
            prg = self.program()
            self.expect(T.WHILE_END)
            return Node(N.WHILE, arg, prg)

        if op.type == T.IF:
            arg = self.arg()
-            self.expect(T.EOL)
+            self.expect(T.EOL, optional=True)
            prg = self.program()
            self.expect(T.IF_END)
            return Node(N.IF, arg, prg, None)

        if op.type == T.IFELSE:
            arg = self.arg()
-            self.expect(T.EOL)
+            self.expect(T.EOL, optional=True)
            p1 = self.program()
            self.expect(T.ELSE)
            p2 = self.program()
@ -206,15 +175,17 @@ class UrlParser:

    # expr -> factor | factor + expr | factor - expr
    def expr(self):
-        factor = self.factor()
-        t = self.expect(T.PLUS, T.MINUS, optional=True)
+        f = [self.factor()]

-        if t is None:
-            return factor
-        if t.type == T.PLUS:
-            return Node(N.ADD, factor, self.expr())
-        if t.type == T.MINUS:
-            return Node(N.SUB, factor, self.expr())
+        while 1:
+            t = self.expect(T.PLUS, T.MINUS, optional=True)
+
+            if t is None:
+                return Node(N.ADD, *f)
+            if t.type == T.PLUS:
+                f.append(self.factor())
+            if t.type == T.MINUS:
+                f.append(Node(N.MINUS, self.factor()))

    # factor -> atom | atom * factor | atom / factor
    def factor(self):
--- a/printer.py
+++ b/printer.py
@ -1,7 +1,6 @@
 # fx-92 Scientifique Collège+ language interpreter: AST printer

 from ast import N, Node
-__all__ = ["print_ast"]

 #---
 # Message definitions
@ -73,3 +72,7 @@ def print_ast(n, lang="en", indent=0):

    for arg in n.args:
        print_ast(arg, lang=lang, indent=indent+2)
+
+#
+
+__all__ = ["print_ast"]
--- a/tests/line-patterns.txt
+++ b/tests/line-patterns.txt
@ -0,0 +1,24 @@
+goto -60, 12 ; pendown ; goto x+13, y+15 ; penup
+goto -60,  9 ; pendown ; goto x+13, y+13 ; penup
+goto -60,  6 ; pendown ; goto x+13, y+8  ; penup
+goto -60,  3 ; pendown ; goto x+13, y+5  ; penup
+goto -60,  0 ; pendown ; goto x+13, y    ; penup
+
+goto -47,  -8 ; pendown ; goto x-13, y+5  ; penup
+goto -47, -14 ; pendown ; goto x-13, y+8  ; penup
+goto -47, -22 ; pendown ; goto x-13, y+13 ; penup
+goto -47, -27 ; pendown ; goto x-13, y+15 ; penup
+
+setvar 0, A
+
+repeat 6
+  goto -30,16-7A
+  pendown
+  goto x+3A+3,y+5
+  penup
+  goto x+A+3,y
+  pendown
+  goto x-3A-3,y-5
+  penup
+  setvar A+1, A
+repeat_end