fx92-interpreter/parser.py

# fx-92 Scientifique Collège+ language interpreter: Syntactic analysis

import re
from lexer import T, Token, ByteLexer
from ast import N, Node

#---
# LL(1) parser
#---

class UrlParser:
    """
    fx-92 SC+ language parser with a wes.casio.com URL or hexadecimal input.
    The URLs are typically in this form:

        http://wes.casio.com/math/index.php?q=I-295A+U-000000000000+M-0E0000
        0000+S-000410110000100E0010B300D365+E-{code...}

    The program can also be provided in text hexadecimal form, which is
    everything following the "+E-" in the URL.

    This is your everyday LL(1) top-down parser. It implements the following
    formal grammar:

    program -> stmt*
    stmt    -> stmt2 EOL?
    stmt2   -> FORWARD arg | ROTATE arg | ORIENT arg | GOTO arg arg |
               PENDOWN | PENUP | SETVAR arg argvar | INPUT argvar |
               MESSAGE arg | PRINT arg | STYLE style | WAIT arg |
               REPEAT arg program REPEAT_END |
               WHILE arg program WHILE_END |
               IF arg program IF_END |
               IFELSE arg program ELSE program IFELSE_END
    arg     -> expr PARAM
    argvar  -> var PARAM

    expr    -> factor | factor + expr | factor - expr
    factor  -> atom | atom * factor | atom / factor
    atom    -> const (var | "(" expr ")")* | (var | "(" expr ")")+
    const   -> (+|-)? CONST
    var     -> VAR

    # Unused or TODO
    style   -> (TODO)
    setvar  -> SETM | SETA | SETB | SETC | SETD | SETE | SETF | SETX | SETY
    """

    def __init__(self, url):
        """
        Create a UrlParser from a wes.casio.com URL or hexadecimal code.
        """

        if url.startswith("http://") \
        or url.startswith("https://") \
        or url.startswith("wes.casio.com"):
            print("[urlparser] URL includes protocol, will start after '+E-'")
            offset = url.find("+E-")

            if offset < 0:
                print("[urlparser] '+E-' not found, cannot decode URL")
                raise Exception("Cannot decode URL")

            url = url[offset+3:]

        if not re.fullmatch(r'(?:[0-9a-fA-F]{2})+', url):
            print("[urlparser] URL is not strict hexa, noise will be skipped")

        # Create the lexer and initialize the lookahead byte
        self.lexer = ByteLexer(bytes.fromhex(url))
        self.la = None

    def lex_program(self):
        self.lexer.rewind()

        while not self.lexer.at_end():
            x = self.lexer.lex()
            print(x)

    def parse_program(self):
        self.lexer.rewind()
        self.la = None
        self.advance()
        return self.program()

    #---
    # LL parsing helpers
    #---

    def advance(self):
        """Return the next token and feed the lookahead."""
        next = self.la
        self.la = self.lexer.lex()
        return next

    def expect(self, *types, optional=False):
        """Read the next token, expecting a type specified in *types."""

        if self.la.type in types:
            return self.advance()
        if optional:
            return None

        expected = [T(t).name for t in types]
        got = T(self.la.type).name
        err = f"Expected one of {expected}, got {got}"
        print("[urlparser] " + err)
        raise Exception("Syntax error: " + err)

    #---
    # LL parsing rules
    #---

    # program -> stmt*
    def program(self):
        stmts = []

        while 1:
            stmt = self.stmt(optional=True)
            if stmt is None:
                break
            stmts.append(stmt)

        return Node(N.PROGRAM, *stmts)

    # stmt -> stmt2 EOL?
    def stmt(self, optional=False):
        st = self.stmt2(optional=optional)
        self.expect(T.EOL, optional=True)
        return st

    # stmt -> (lots of cases)
    def stmt2(self, optional=False):
        valid = [
            T.FORWARD, T.ROTATE, T.ORIENT,  T.GOTO,  T.PENDOWN, T.PENUP,
            T.SETVAR,  T.INPUT,  T.MESSAGE, T.PRINT, T.STYLE,   T.WAIT,
            T.REPEAT,  T.WHILE,  T.IF,      T.IFELSE,
        ]
        op = self.expect(*valid, optional=optional)

        if op is None:
            return None

        # Basic statements
        if op.type == T.FORWARD:
            return Node(N.FORWARD, self.arg())
        if op.type == T.ROTATE:
            return Node(N.ROTATE, self.arg())
        if op.type == T.ORIENT:
            return Node(N.ORIENT, self.arg())
        if op.type == T.GOTO:
            return Node(N.GOTO, self.arg(), self.arg())
        if op.type == T.PENDOWN:
            return Node(N.PENDOWN)
        if op.type == T.PENUP:
            return Node(N.PENUP)
        if op.type == T.SETVAR:
            return Node(N.ASSIGN, self.arg(), self.argvar())
        if op.type == T.INPUT:
            return Node(N.INPUT, self.argvar())
        if op.type == T.MESSAGE:
            return Node(N.MESSAGE, self.arg())
        if op.type == T.PRINT:
            return Node(N.PRINT, self.arg())
        if op.type == T.STYLE:
            return Node(N.STYLE, self.style())
        if op.type == T.WAIT:
            return Node(N.WAIT, self.arg())

        # Flow control

        if op.type == T.REPEAT:
            arg = self.arg()
            self.expect(T.EOL)
            prg = self.program()
            self.expect(T.REPEAT_END)
            return Node(N.REPEAT, arg, prg)

        if op.type == T.WHILE:
            arg = self.arg()
            self.expect(T.EOL)
            prg = self.program()
            self.expect(T.WHILE_END)
            return Node(N.WHILE, arg, prg)

        if op.type == T.IF:
            arg = self.arg()
            self.expect(T.EOL)
            prg = self.program()
            self.expect(T.IF_END)
            return Node(N.IF, arg, prg, None)

        if op.type == T.IFELSE:
            arg = self.arg()
            self.expect(T.EOL)
            p1 = self.program()
            self.expect(T.ELSE)
            p2 = self.program()
            self.expect(T.IFELSE_END)
            return Node(N.IF, arg, p1, p2)

    # arg -> expr PARAM
    def arg(self):
        e = self.expr()
        self.expect(T.PARAM)
        return e

    # expr -> factor | factor + expr | factor - expr
    def expr(self):
        factor = self.factor()
        t = self.expect(T.PLUS, T.MINUS, optional=True)

        if t is None:
            return factor
        if t.type == T.PLUS:
            return Node(N.ADD, factor, self.expr())
        if t.type == T.MINUS:
            return Node(N.SUB, factor, self.expr())

    # factor -> atom | atom * factor | atom / factor
    def factor(self):
        atom = self.atom()
        t = self.expect(T.STAR, T.SLASH, optional=True)

        if t is None:
            return atom
        if t.type == T.STAR:
            return Node(N.MUL, atom, self.factor())
        if t.type == T.SLASH:
            return Node(N.DIV, atom, self.factor())

    # atom -> const (VAR | "(" expr ")")* | (VAR | "(" expr ")")+
    def atom(self):
        factors = []
        lat = self.la.type

        # Case of constants
        if lat == T.PLUS or lat == T.MINUS or lat == T.CONST:
            factors.append(self.const())

        while 1:
            lat = self.la.type

            if lat == T.VAR:
                factors.append(self.var())
            elif lat == T.LPAR:
                self.expect(T.LPAR)
                factors.append(self.expr())
                self.expect(T.RPAR)
            else:
                break

        return Node(N.MUL, *factors)

    # const -> (+|-)? CONST
    def const(self):
        t = self.expect(T.PLUS, T.MINUS, optional=True)
        const = self.expect(T.CONST).args[0]

        const = Node(N.CONST, const)
        if t and t.type == T.MINUS:
            const = Node(N.MINUS, const)

        return const

    # argvar -> var PARAM
    def argvar(self):
        n = self.var()
        self.expect(T.PARAM)
        return n

    # var -> VAR
    def var(self):
        t = self.expect(T.VAR)
        return Node(N.VAR, t.args[0])

    # setvar -> SETM | SETA | ... | SETF | SETX | SETY
    def setvar(self):
        raise Exception("SetVar not supported yet x_x")

    # style -> (TODO)
    def style(self):
        raise Exception("Style not supported yet x_x")