201 lines
4.5 KiB
Python
201 lines
4.5 KiB
Python
# fx-92 Scientifique Collège+ language interpreter: Lexical analysis
|
|
|
|
import math
|
|
import re
|
|
import enum
|
|
|
|
#---
|
|
# Token description
|
|
#---
|
|
|
|
@enum.unique
|
|
class T(enum.IntEnum):
|
|
# Basic commands
|
|
EOL = 0xF901
|
|
END = 0xF902
|
|
NOP = 0xF903
|
|
|
|
# Basic statements
|
|
FORWARD = 0xF905
|
|
ROTATE = 0xF906
|
|
ORIENT = 0xF907
|
|
GOTO = 0xF908
|
|
PENDOWN = 0xF909
|
|
PENUP = 0xF90A
|
|
SETVAR = 0xF90B
|
|
INPUT = 0xF90C
|
|
MESSAGE = 0xF90D
|
|
PRINT = 0xF90E
|
|
STYLE = 0xF90F
|
|
WAIT = 0xF910
|
|
|
|
# Flow control
|
|
REPEAT = 0xF911
|
|
REPEAT_END = 0xF912
|
|
WHILE = 0xF913
|
|
WHILE_END = 0xF914
|
|
IF = 0xF915
|
|
IF_END = 0xF916
|
|
IFELSE = 0xF917
|
|
ELSE = 0xF918
|
|
IFELSE_END = 0xF919
|
|
|
|
# Variable assignment
|
|
SETM = 0xFB10
|
|
SETA = 0xFB12
|
|
SETB = 0xFB13
|
|
SETC = 0xFB14
|
|
SETD = 0xFB15
|
|
SETE = 0xFB16
|
|
SETF = 0xFB17
|
|
SETX = 0xFB18
|
|
SETY = 0xFB19
|
|
|
|
# Miscellaneous
|
|
PARAM = 0x00
|
|
COLON = 0x23
|
|
QUEST = 0x25
|
|
LPAR = 0x60
|
|
RPAR = 0xD0
|
|
EQUAL = 0xA5
|
|
PLUS = 0xA6
|
|
MINUS = 0xA7
|
|
STAR = 0xA8
|
|
SLASH = 0xA9
|
|
BANG = 0xD8
|
|
|
|
# Tokens with parameters
|
|
CONST = -1
|
|
VAR = -2
|
|
REL = -3
|
|
|
|
class Token:
|
|
def __init__(self, type, *args):
|
|
"""Instanciate a new token."""
|
|
self.type = type
|
|
self.args = args
|
|
|
|
def __repr__(self):
|
|
"""Inambiguous token representation."""
|
|
try:
|
|
base = T(self.type).name
|
|
except ValueError:
|
|
base = f"<Token:{hex(self.type)}>"
|
|
|
|
if self.args:
|
|
args = "(" + ",".join(repr(arg) for arg in self.args) + ")"
|
|
else:
|
|
args = ""
|
|
|
|
return base + args
|
|
|
|
#---
|
|
# Lexer
|
|
#---
|
|
|
|
class ByteLexer:
|
|
"""
|
|
fx-92 SC+ language lexer with bytes() bitcode input.
|
|
"""
|
|
|
|
def __init__(self, hex):
|
|
"""Initialize the lexer with input data."""
|
|
self.hex = hex
|
|
self.rewind()
|
|
|
|
def rewind(self):
|
|
"""Restart lexing the same input."""
|
|
self.pos = 0
|
|
self.errors = 0
|
|
|
|
def lex(self):
|
|
"""Return the next token in the stream."""
|
|
h, p = self.hex, self.pos
|
|
|
|
if self.at_end():
|
|
return Token(T.END)
|
|
|
|
# 2-byte commands
|
|
|
|
if h[p] in [0xF9, 0xFB]:
|
|
# Stop if there is no trailing byte
|
|
if p >= len(h) - 1:
|
|
print(f"[lexer] Invalid trailing byte {hex(h[p])}")
|
|
p = len(h)
|
|
return Token(T.END)
|
|
|
|
# Return any value that is defined in the Token class
|
|
code = (h[p] << 8) | h[p+1]
|
|
self.pos += 2
|
|
|
|
try:
|
|
return Token(T(code))
|
|
except:
|
|
pass
|
|
|
|
# Also a few more values not in the Token class
|
|
rels = { 0x01: "<", 0x02: ">", 0x03: "!=", 0x04: "<=", 0x05: ">=" }
|
|
if h[p] == 0xFB and h[p+1] in rels:
|
|
return Token(T.REL, rels[h[p+1]])
|
|
|
|
print(f"[lexer] Unknown opcode {hex(code)}")
|
|
self.errors += 1
|
|
|
|
# Try to read another token
|
|
return self.lex()
|
|
|
|
# Single-byte characters
|
|
|
|
self.pos += 1
|
|
code = h[p]
|
|
|
|
# Translate unary minus to normal minus
|
|
if code == 0xC0:
|
|
code = 0xA7
|
|
|
|
try:
|
|
return Token(T(code))
|
|
except:
|
|
pass
|
|
|
|
if code == 0x21:
|
|
return Token(T.CONST, math.e)
|
|
if code == 0x22:
|
|
return Token(T.CONST, math.pi)
|
|
|
|
# Constants
|
|
if code in range(0x30, 0x39+1):
|
|
# Never thought pointer arithmetic would beat Python. Grr!
|
|
re_const = rb'([0-9]+(?:\x2E[0-9]*)?(?:\x2D[0-9]+)?)(%?)'
|
|
match = re.match(re_const, h[p:])
|
|
|
|
if match is not None:
|
|
text = match[1].replace(b'\x2E', b'.').replace(b'\x2D', b'e')
|
|
self.pos += len(text) - 1
|
|
|
|
f = float(text.decode('utf-8'))
|
|
if match[2] == "%":
|
|
f /= 100
|
|
return Token(T.CONST, f)
|
|
|
|
# Variables
|
|
if code in range(0x42, 0x47+1):
|
|
return Token(T.VAR, chr(h[p]-1))
|
|
if code == 0x40:
|
|
return Token(T.VAR, "M")
|
|
if code == 0x48:
|
|
return Token(T.VAR, "x")
|
|
if code == 0x49:
|
|
return Token(T.VAR, "y")
|
|
|
|
print(f"[lexer] Unknown opcode {hex(code)}")
|
|
self.errors += 1
|
|
|
|
# Try to read another token
|
|
return self.lex()
|
|
|
|
def at_end(self):
|
|
"""Check whether the whole input has been read."""
|
|
return self.pos >= len(self.hex)
|
|
|