#-----------------------------------------------------------------------------# # ," /\ ", Azur: A game engine for CASIO fx-CG and PC # # | _/__\_ | Designed by Lephe' and the Planète Casio community. # # "._`\/'_." License: MIT # #-----------------------------------------------------------------------------# # isel.py: Instruction selection testing facility # # Many of the primitive numerical functions of libnum are written to compile # efficiently on SuperH, where "efficiently" mainly cares about using specific # instructions and generating short code. This utility tests these assumptions # by compiling C++ files that use the library, and matching the assembler code # against simple specifications. # # The assembler output from g++ is decoded by simple text analysis. Function # span from their `_SYMBOL:` to their `.size _SYMBOL, .-_SYMBOL`. Mnemonics are # split on whitespace and arguments on commas; directives and unused labels are # removed. # # The assembler output is matched using two types of expressions: # - "Program expressions", which run against an entire function and evaluate to # integers. These are used to count, mainly. # - "Instruction expressions", which run against single instructions and # evaluate to booleans. These are used to identify instructions. # # Instruction expressions are based on wildcard patterns matching mnemonics, # like `add*` or `b?.s`, combined with logical operators `!`, `&&` and `||`. # For instance, `add || addc || addv` identifies addition instructions, while # `!mov && !rts` eliminates the usual function boilerplate. # # Program expressions are based on the count-expression `[e]` where `e` is an # instruction expression. `[e]` evaluates to the number of instructions in the # function that match `e`. These are combined with integral constants and the # usual arithmetic, logical and comparison operators. C-style bool-as-int # semantics are used, so tests like `!= 0` or `> 0` can often be omitted. `%` # is a shortcut for the number of "non-trivial" instructions, and currently # expands to `[!mov && !rts]`. # # A test is a normal C++ source built using the library, which exposes # functions with C linkage (ie. no name mangling) and has specifications in # comments of the form: # # // FUNCTION_NAME: PROGRAM_EXPRESSION # # The comments should span entire lines and need not be placed near the # functions that they test. #--- from dataclasses import dataclass import subprocess import functools import fnmatch import typing import enum import sys import os import re #--- # Program representation and parsing #--- @dataclass class Insn: """A concrete instruction from a compiled assembly program.""" mnemonic: str args: list[str] #--- # Specification representation and parsing #--- @dataclass class InsnPattern: """An abstract pattern that can be matched against asm instructions.""" # Instruction mnemonic as a wildcard pattern (may use '?' and '*') mnemonicWildcard: str # Pattern for the arguments # args: list def evalAtInsn(self, program, i) -> bool: return fnmatch.fnmatchcase(program[i].mnemonic, self.mnemonicWildcard) def treeStr(self, indent): return (" "*indent) + self.mnemonicWildcard @dataclass class InsnExpr: """An expression built off `InsnPattern`, which runs on single instructions and evaluates to a boolean.""" # Node constructor and arguments ctor: str args: list[typing.Union[InsnPattern, "InsnExpr"]] def evalAtInsn(self, program, i) -> bool: args = [a.evalAtInsn(program, i) for a in self.args] match self.ctor, args: case "mnemonic", [x]: return x # Boolean operators case "!", [x]: return 1 if not x else 0 case "&&", [x,y]: return 1 if x and y else 0 case "||", [x,y]: return 1 if x or y else 0 def treeStr(self, indent): args = [a.treeStr(indent+2) for a in self.args] return (" "*indent) + self.ctor + ":\n" + "\n".join(args) @dataclass class ProgExpr: """An expression which runs on full programs and evaluates to counts of matched instructions.""" # Node constructor and arguments ctor: str args: list[typing.Union[InsnExpr, int, "Expr"]] def evalArg(self, program, arg): match arg: case int(i): return i case InsnExpr(_, _) as e: return sum(int(e.evalAtInsn(program, i)) for i in range(len(program))) case ProgExpr(_, _) as e: return e.evalAtProg(program) def evalAtProg(self, program) -> int: args = [self.evalArg(program, a) for a in self.args] match self.ctor, args: case "atom", [x]: return x # Comparisons case "<", [x,y]: return x < y case ">", [x,y]: return x > y case "<=", [x,y]: return x <= y case ">=", [x,y]: return x >= y case "=", [x,y]: return x == y case "!=", [x,y]: return x != y # Arithmetic case "+", [x,y]: return x + y case "-", [x,y]: return x - y case "-", [x]: return -x # Boolean operators case "!", [x]: return 1 if not x else 0 case "&&", [x,y]: return 1 if x and y else 0 case "||", [x,y]: return 1 if x or y else 0 def argStr(self, arg, indent): match arg: case int(i): return (" "*indent) + str(i) case InsnExpr(_, _) as e: return e.treeStr(indent) case ProgExpr(_, _) as e: return e.treeStr(indent) def treeStr(self, indent): args = [self.argStr(a, indent+2) for a in self.args] return (" "*indent) + self.ctor + ":\n" + "\n".join(args) T = enum.Enum("T", "NUM OP MNEMONIC LPAR RPAR LBRA RBRA PERCENT END".split()) @dataclass class Token: type: T value: typing.Any def __str__(self): return f"{self.type}({self.value})" class ExprLexer: """A lexer for `InsnPattern`, `InsnExpr` and `ProgExpr`.""" RE_NUM = re.compile(r"[0-9]+|0[xX][0-9a-fA-F]+") RE_OP = re.compile(r"!|&&|\|\||<=|>=|<>|<|>|=|!=|\+|-") RE_MNEMONIC = re.compile(r"[a-zA-Z.*?][a-zA-Z0-9.*?]*") PUNCT = { "(": T.LPAR, ")": T.RPAR, "[": T.LBRA, "]": T.RBRA, "%": T.PERCENT, } def __init__(self, code): """Initialize the lexer to start analyzing `code`.""" self.sourceCode = code self.rewind() def rewind(self): """Start or restart lexing the same input.""" self.code = self.sourceCode self.position = 0 self.errors = 0 def atEnd(self): """Check whether the end of the input has been reached.""" return len(self.code) == 0 def dump(self, fp=sys.stdout): """Exhaust lexer input and print the result to the specified stream.""" while not self.atEnd(): t = self.lex() print(f"{self.position:5d}: {t}", file=fp) def lex(self): """Return the next token in the stream.""" self.position += 1 c = self.code.lstrip(" \t") if len(c) == 0: return Token(T.END, None) if c[0] in ExprLexer.PUNCT: self.code = c[1:] return Token(ExprLexer.PUNCT[c[0]], None) if (m := ExprLexer.RE_NUM.match(c)) is not None: self.code = c[len(m[0]):] return Token(T.NUM, int(m[0], 0)) if (m := ExprLexer.RE_OP.match(c)) is not None: self.code = c[len(m[0]):] return Token(T.OP, m[0]) if (m := ExprLexer.RE_MNEMONIC.match(c)) is not None: self.code = c[len(m[0]):] return Token(T.MNEMONIC, m[0]) # Raise a lexing error s = c.split(maxsplit=1) err = s[0] self.code = s[1] if len(s) > 1 else "" raise Exception(f"Lexing error near '{err}'") class ExprParser: """An LL(1) recursive descent parser for program expressions.""" def __init__(self, lexer): """Parse the output of a given lexer.""" self.lexer = lexer self.la = None def parseProgExpr(self): """Parse the entire input as a ProgExpr.""" self.lexer.rewind() self.la = None self.advance() e = self.pexpr() if not self.lexer.atEnd(): print("Remaining input:") self.lexer.dump() raise Exception("Syntax error: expected end of input") return e def advance(self): """Return the next token and update the lookahead.""" next = self.la self.la = self.lexer.lex() return next def expect(self, types, pred=None, optional=False): """Read the next token, ensuring it is one of the specified types; if `pred` is specified, also tests the predicate. If `optional` is set, returns None in case of mismatch rather than raising an error.""" if isinstance(types, T): types = [types] if self.la.type in types and (pred is None or pred(self.la)): return self.advance() if optional: return None expected = ", ".join(str(t) for t in types) pos = self.lexer.position err = f"Expected one of {expected}, got {self.la} (at token {pos})" if pred is not None: err += " (with predicate)" raise Exception(f"Syntax error: {err}") # Rule combinators implementing unary and binary operators with precedence def binaryOpsLeft(ctor, ops): def decorate(f): def symbol(self): e = f(self) pred = lambda t: t.value in ops while (op := self.expect(T.OP, pred, True)) is not None: e = ctor(op.value, [e, f(self)]) return e return symbol return decorate def binaryOps(ctor, ops, *, rassoc=False): def decorate(f): def symbol(self): lhs = f(self) pred = lambda t: t.value in ops if (op := self.expect(T.OP, pred, True)) is not None: rhs = symbol(self) if rassoc else f(self) return ctor(op.value, [lhs, rhs]) else: return lhs return symbol return decorate def binaryOpsRight(ctor, ops): return binaryOpsRight(ctor, ops, rassoc=True) def unaryOps(ctor, ops, assoc=True): def decorate(f): def symbol(self): if (op := self.expect(T.OP, optional=True, pred=lambda t: t.value in ops)) is not None: arg = symbol(self) if assoc else f(self) return ctor(op.value, [arg]) else: return f(self) return symbol return decorate # Parsing rules @binaryOpsLeft(ProgExpr, ["||"]) @binaryOpsLeft(ProgExpr, ["&&"]) @binaryOps(ProgExpr, [">", ">=", "<", "<=", "=", "!="]) @binaryOpsLeft(ProgExpr, ["+", "-"]) @unaryOps(ProgExpr, ["!", "-"]) def pexpr(self): t = self.expect([T.LPAR, T.NUM, T.PERCENT, T.LBRA]) if t.type == T.LPAR: pe = self.pexpr() self.expect(T.RPAR) return pe elif t.type == T.NUM: return ProgExpr("atom", [t.value]) elif t.type == T.PERCENT: return parseProgExpr("[!rts && !mov]") elif t.type == T.LBRA: ie = self.iexpr() self.expect(T.RBRA) return ProgExpr("atom", [ie]) @binaryOpsLeft(InsnExpr, ["||"]) @binaryOpsLeft(InsnExpr, ["&&"]) @unaryOps(InsnExpr, ["!"]) def iexpr(self): if self.expect(T.LPAR, optional=True): ie = self.iexpr() self.expect(T.RPAR) return ie else: ip = self.ipat() return InsnExpr("mnemonic", [ip]) def ipat(self): t = self.expect(T.MNEMONIC) return InsnPattern(t.value) def parseProgExpr(string): l = ExprLexer(string) p = ExprParser(l) return p.parseProgExpr() #--- # Main logic #--- def runCompiler(input, flags): p = subprocess.run( ["sh-elf-g++", input, *flags, "-S", "-o", "-", "-std=c++20", "-O2"], stdout=subprocess.PIPE, check=True) return str(p.stdout, "utf8") def extractFunctions(asm): # Normalize spacing asm = asm.replace("\t", " ") # Split into lines and remove indentation lines = [l.strip() for l in asm.splitlines()] # Remove directives and local symbols lines = [l for l in lines if l and not l.startswith(".")] funcs = dict() currentFunc = None for l in lines: if l.endswith(":"): currentFunc = l[:-1] funcs[currentFunc] = [] elif currentFunc is None: raise Exception(f"instruction '{l}' before symbol name") else: mnemonic, *args = l.split(maxsplit=1) if args != []: args = [a.strip() for a in args[0].split(",")] funcs[currentFunc].append(Insn(mnemonic, args)) return funcs def printRawFunction(asm, sybl): # Find symbol definition start = asm.index(sybl + ":") if start < 0: print(f"") return False # Find function size end = asm.index(f".size\t{sybl}, .-{sybl}", start) if end < 0: print(f" ", file=sys.stderr) asm = runCompiler(sys.argv[1], sys.argv[2:]) funcs = extractFunctions(asm) tests = loadTests(sys.argv[1]) errors = False for sybl in tests: if sybl not in funcs: print(f"error: no function '{sybl}' found", file=sys.stderr) errors = True init = True for sybl in sorted(tests): if not init: print("") init = False print(f"\x1b[36m{40*'<>'}\x1b[0m") if not printRawFunction(asm, sybl): errors = True print("") for ref, expr in tests[sybl]: r = expr.evalAtProg(funcs[sybl]) if r != 0: print(f"\x1b[32mPASSED\x1b[0m {ref}") else: print(f"\x1b[31mFAILED\x1b[0m {ref}") errors = True sys.exit(1 if errors else 0)