add command-line options and a text lexer
This change lays the ground for automated unit tests. It adds
command-line options to select the input language format between URL
(hexa text) and plain text, change a few output settings, and redirect
graphical output to an image.

A text lexer has also been added so that unit tests and new programs can
be written in an English-like syntax instead of raw hexadecimal.
2019-10-02 07:18:29 +02:00

# fx-92 Scientifique Collège+ language interpreter: Lexical analysis
import math
import re
import enum
# Token description
class T(enum.IntEnum):
# Basic commands
EOL = 0xF901
END = 0xF902
NOP = 0xF903
# Basic statements
FORWARD = 0xF905
ROTATE = 0xF906
ORIENT = 0xF907
GOTO = 0xF908
PENDOWN = 0xF909
PENUP = 0xF90A
INPUT = 0xF90C
PRINT = 0xF90E
STYLE = 0xF90F
WAIT = 0xF910
# Flow control
REPEAT = 0xF911
WHILE = 0xF913
WHILE_END = 0xF914
IF = 0xF915
IF_END = 0xF916
IFELSE = 0xF917
ELSE = 0xF918
# Variable assignment
SETM = 0xFB10
SETA = 0xFB12
SETB = 0xFB13
SETC = 0xFB14
SETD = 0xFB15
SETE = 0xFB16
SETF = 0xFB17
SETX = 0xFB18
SETY = 0xFB19
# Miscellaneous
PARAM = 0x00
COLON = 0x23
QUEST = 0x25
LPAR = 0x60
RPAR = 0xD0
EQUAL = 0xA5
PLUS = 0xA6
MINUS = 0xA7
STAR = 0xA8
SLASH = 0xA9
BANG = 0xD8
# Tokens with parameters
CONST = -1
VAR = -2
REL = -3
class Token:
def __init__(self, type, *args):
"""Instanciate a new token."""
self.type = type
self.args = args
def __repr__(self):
"""Inambiguous token representation."""
base = T(self.type).name
except ValueError:
base = f"<Token:{hex(self.type)}>"
if self.args:
args = "(" + ",".join(repr(arg) for arg in self.args) + ")"
args = ""
return base + args
# Lexer base
class LexerBase:
Lexer base class. This class only provides common methods and cannot be
used to analyse a program.
def dump(self):
while not self.at_end():
x = self.lex()
# Bitcode lexer
class BitcodeLexer(LexerBase):
fx-92 SC+ language lexer with bytes() bitcode input.
def __init__(self, hex):
"""Initialize the lexer with input data."""
self.hex = hex
def rewind(self):
"""Restart lexing the same input."""
self.pos = 0
self.errors = 0
def lex(self):
"""Return the next token in the stream."""
h, p = self.hex, self.pos
if self.at_end():
return Token(T.END)
# 2-byte commands
if h[p] in [0xF9, 0xFB]:
# Stop if there is no trailing byte
if p >= len(h) - 1:
print(f"[lexer] Invalid trailing byte {hex(h[p])}")
p = len(h)
return Token(T.END)
# Return any value that is defined in the Token class
code = (h[p] << 8) | h[p+1]
self.pos += 2
return Token(T(code))
# Also a few more values not in the Token class
rels = { 0x01: "<", 0x02: ">", 0x03: "!=", 0x04: "<=", 0x05: ">=" }
if h[p] == 0xFB and h[p+1] in rels:
return Token(T.REL, rels[h[p+1]])
print(f"[lexer] Unknown opcode {hex(code)}")
self.errors += 1
# Try to read another token
return self.lex()
# Single-byte characters
self.pos += 1
code = h[p]
# Translate unary minus to normal minus
if code == 0xC0:
code = 0xA7
return Token(T(code))
if code == 0x21:
return Token(T.CONST, math.e)
if code == 0x22:
return Token(T.CONST, math.pi)
# Constants
if code in range(0x30, 0x39+1):
# Never thought pointer arithmetic would beat Python. Grr!
re_const = rb'([0-9]+(?:\x2E[0-9]*)?(?:\x2D[0-9]+)?)(%?)'
match = re.match(re_const, h[p:])
if match is not None:
text = match[1].replace(b'\x2E', b'.').replace(b'\x2D', b'e')
self.pos += len(text) - 1
f = float(text.decode('utf-8'))
if match[2] == "%":
f /= 100
return Token(T.CONST, f)
# Variables
if code in range(0x42, 0x47+1):
return Token(T.VAR, chr(h[p]-1))
if code == 0x40:
return Token(T.VAR, "M")
if code == 0x48:
return Token(T.VAR, "x")
if code == 0x49:
return Token(T.VAR, "y")
print(f"[lexer] Unknown opcode {hex(code)}")
self.errors += 1
# Try to read another token
return self.lex()
def at_end(self):
"""Check whether the whole input has been read."""
return self.pos >= len(self.hex)
# Url lexer
class UrlLexer(BitcodeLexer):
fx-92 SC+ language lexer with a wes.casio.com URL or hexadecimal input.
The URLs are typically in this form:
The program can also be provided in text hexadecimal form, which is
everything following the "+E-" in the URL.
def __init__(self, url):
if url.startswith("http://") \
or url.startswith("https://") \
or url.startswith("wes.casio.com"):
print("[urlparser] URL includes protocol, will start after '+E-'")
offset = url.find("+E-")
if offset < 0:
print("[urlparser] '+E-' not found, cannot decode URL")
raise Exception("Cannot decode URL")
url = url[offset+3:]
if not re.fullmatch(r'(?:[0-9a-fA-F]{2})+', url):
print("[urlparser] URL is not strict hexa, noise will be skipped")
# Plain text lexer
class TextLexer(LexerBase):
fx-92 SC+ language lexer with Basic-like input.
This thing is very naive and extremely inefficient.
RE_STMTS = re.compile(
RE_CONST = re.compile(
def __init__(self, code):
"""Initialize the lexer with text code."""
self.base_code = code.replace(";", "\n")
def rewind(self):
"""Restart lexing the same input."""
self.code = self.base_code
self.errors = 0
self.pending_param = False
def lex(self):
"""Return the next token in the stream."""
c = self.code.lstrip(" \t")
# Special case of newlines. If a non-statement has been identified and
# no comma has followed, emit a PARAM token manually.
if (not c or c[0] == "\n") and self.pending_param:
self.pending_param = False
self.code = c.lstrip("\n")
return Token(T.PARAM)
c = self.code.lstrip(" \t\n")
# End of file
if not c:
self.code = ""
return Token(T.END)
# Statements
m = re.match(self.RE_STMTS, c)
if m is not None:
t = Token(getattr(T, m[0].upper()))
self.code = c[len(m[0]):]
return t
# Relations
rels = [ ">=", "<=", "!=", ">", "<" ]
for r in rels:
if c.startswith(r):
self.code = c[len(r):]
self.pending_param = True
return Token(T.REL, r)
# Punctuation
punct = {
",": T.PARAM,
":": T.COLON,
"?": T.QUEST,
"(": T.LPAR,
")": T.RPAR,
"=": T.EQUAL,
"+": T.PLUS,
"-": T.MINUS,
"*": T.STAR,
"/": T.SLASH,
"!": T.BANG,
if c[0] in punct:
self.code = c[1:]
self.pending_param = (c[0] != ",")
return Token(punct[c[0]])
# Constants
m = re.match(self.RE_CONST, c)
if m is not None:
f = float(m[1])
if m[2] == "%":
f /= 100
self.code = c[len(m[0]):]
self.pending_param = True
return Token(T.CONST, f)
# Variables
if c[0] in "MABCDEFxXyY":
var = c[0].lower() if c[0] in "xXyY" else c[0]
self.code = c[1:]
self.pending_param = True
return Token(T.VAR, c[0])
# If nothing can be found, raise an exception
s = c.split(maxsplit=1)
err = s[0]
self.code = s[1] if len(s) > 1 else ""
raise Exception(f"Lexical error near '{err}'")
def at_end(self):
"""Check whether the whole input has been read."""
return not self.code and not self.pending_param
__all__ = ["T", "Token", "BitcodeLexer", "UrlLexer", "TextLexer"]