libnum: add tests for how optimized compiled assembly code is
This change adds tests for libnum (run with `make -C build-x tests`) that compile example programs with g++ and evaluate how optimized the assembly code is. This is done by checking user-provided specifications of what instructions should and shouldn't be used against the compiled assembler.
This commit is contained in:
parent
d77c7fef78
commit
e90abaaabc
|
@ -34,6 +34,7 @@
|
|||
|
||||
cmake_minimum_required(VERSION 3.15)
|
||||
project(Azur VERSION 0.1 LANGUAGES C CXX ASM)
|
||||
include(CTest)
|
||||
|
||||
set(CMAKE_INSTALL_MESSAGE LAZY)
|
||||
|
||||
|
@ -150,3 +151,10 @@ add_subdirectory(azur)
|
|||
|
||||
# CMake module to find the library
|
||||
install(FILES FindAzur.cmake DESTINATION ${MODPATH})
|
||||
|
||||
#---
|
||||
# Testing
|
||||
#---
|
||||
|
||||
# Force --verbose on CTest
|
||||
add_custom_target(tests COMMAND ${CMAKE_CTEST_COMMAND} --verbose)
|
||||
|
|
|
@ -5,6 +5,8 @@
|
|||
#-----------------------------------------------------------------------------#
|
||||
# libnum build system
|
||||
|
||||
include(CTest)
|
||||
|
||||
add_library(num STATIC
|
||||
src/static_checks.cpp)
|
||||
|
||||
|
@ -14,3 +16,17 @@ target_include_directories(num PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/include")
|
|||
install(TARGETS num DESTINATION ${LIBDIR})
|
||||
# Headers: azur/*.h
|
||||
install(DIRECTORY include/ DESTINATION ${INCDIR})
|
||||
|
||||
#---
|
||||
# Testing
|
||||
#---
|
||||
|
||||
set(TESTS
|
||||
test/isel_num8.cpp)
|
||||
|
||||
foreach(testfile IN LISTS TESTS)
|
||||
add_test(NAME "${testfile}"
|
||||
COMMAND python "${CMAKE_CURRENT_SOURCE_DIR}/test/isel.py"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/${testfile}"
|
||||
-I "${CMAKE_CURRENT_SOURCE_DIR}/include")
|
||||
endforeach()
|
||||
|
|
|
@ -0,0 +1,474 @@
|
|||
#-----------------------------------------------------------------------------#
|
||||
# ," /\ ", Azur: A game engine for CASIO fx-CG and PC #
|
||||
# | _/__\_ | Designed by Lephe' and the Planète Casio community. #
|
||||
# "._`\/'_." License: MIT <https://opensource.org/licenses/MIT> #
|
||||
#-----------------------------------------------------------------------------#
|
||||
# isel.py: Instruction selection testing facility
|
||||
#
|
||||
# Many of the primitive numerical functions of libnum are written to compile
|
||||
# efficiently on SuperH, where "efficiently" mainly cares about using specific
|
||||
# instructions and generating short code. This utility tests these assumptions
|
||||
# by compiling C++ files that use the library, and matching the assembler code
|
||||
# against simple specifications.
|
||||
#
|
||||
# The assembler output from g++ is decoded by simple text analysis. Function
|
||||
# span from their `_SYMBOL:` to their `.size _SYMBOL, .-_SYMBOL`. Mnemonics are
|
||||
# split on whitespace and arguments on commas; directives and unused labels are
|
||||
# removed.
|
||||
#
|
||||
# The assembler output is matched using two types of expressions:
|
||||
# - "Program expressions", which run against an entire function and evaluate to
|
||||
# integers. These are used to count, mainly.
|
||||
# - "Instruction expressions", which run against single instructions and
|
||||
# evaluate to booleans. These are used to identify instructions.
|
||||
#
|
||||
# Instruction expressions are based on wildcard patterns matching mnemonics,
|
||||
# like `add*` or `b?.s`, combined with logical operators `!`, `&&` and `||`.
|
||||
# For instance, `add || addc || addv` identifies addition instructions, while
|
||||
# `!mov && !rts` eliminates the usual function boilerplate.
|
||||
#
|
||||
# Program expressions are based on the count-expression `[e]` where `e` is an
|
||||
# instruction expression. `[e]` evaluates to the number of instructions in the
|
||||
# function that match `e`. These are combined with integral constants and the
|
||||
# usual arithmetic, logical and comparison operators. C-style bool-as-int
|
||||
# semantics are used, so tests like `!= 0` or `> 0` can often be omitted. `%`
|
||||
# is a shortcut for the number of "non-trivial" instructions, and currently
|
||||
# expands to `[!mov && !rts]`.
|
||||
#
|
||||
# A test is a normal C++ source built using the library, which exposes
|
||||
# functions with C linkage (ie. no name mangling) and has specifications in
|
||||
# comments of the form:
|
||||
#
|
||||
# // FUNCTION_NAME: PROGRAM_EXPRESSION
|
||||
#
|
||||
# The comments should span entire lines and need not be placed near the
|
||||
# functions that they test.
|
||||
#---
|
||||
|
||||
from dataclasses import dataclass
|
||||
import subprocess
|
||||
import functools
|
||||
import fnmatch
|
||||
import typing
|
||||
import enum
|
||||
import sys
|
||||
import os
|
||||
import re
|
||||
|
||||
#---
|
||||
# Program representation and parsing
|
||||
#---
|
||||
|
||||
@dataclass
|
||||
class Insn:
|
||||
"""A concrete instruction from a compiled assembly program."""
|
||||
mnemonic: str
|
||||
args: list[str]
|
||||
|
||||
#---
|
||||
# Specification representation and parsing
|
||||
#---
|
||||
|
||||
@dataclass
|
||||
class InsnPattern:
|
||||
"""An abstract pattern that can be matched against asm instructions."""
|
||||
|
||||
# Instruction mnemonic as a wildcard pattern (may use '?' and '*')
|
||||
mnemonicWildcard: str
|
||||
# Pattern for the arguments
|
||||
# args: list
|
||||
|
||||
def evalAtInsn(self, program, i) -> bool:
|
||||
return fnmatch.fnmatchcase(program[i].mnemonic, self.mnemonicWildcard)
|
||||
|
||||
def treeStr(self, indent):
|
||||
return (" "*indent) + self.mnemonicWildcard
|
||||
|
||||
@dataclass
|
||||
class InsnExpr:
|
||||
"""An expression built off `InsnPattern`, which runs on single instructions
|
||||
and evaluates to a boolean."""
|
||||
|
||||
# Node constructor and arguments
|
||||
ctor: str
|
||||
args: list[typing.Union[InsnPattern, "InsnExpr"]]
|
||||
|
||||
def evalAtInsn(self, program, i) -> bool:
|
||||
args = [a.evalAtInsn(program, i) for a in self.args]
|
||||
|
||||
match self.ctor, args:
|
||||
case "mnemonic", [x]: return x
|
||||
# Boolean operators
|
||||
case "!", [x]: return 1 if not x else 0
|
||||
case "&&", [x,y]: return 1 if x and y else 0
|
||||
case "||", [x,y]: return 1 if x or y else 0
|
||||
|
||||
def treeStr(self, indent):
|
||||
args = [a.treeStr(indent+2) for a in self.args]
|
||||
return (" "*indent) + self.ctor + ":\n" + "\n".join(args)
|
||||
|
||||
@dataclass
|
||||
class ProgExpr:
|
||||
"""An expression which runs on full programs and evaluates to counts of
|
||||
matched instructions."""
|
||||
|
||||
# Node constructor and arguments
|
||||
ctor: str
|
||||
args: list[typing.Union[InsnExpr, int, "Expr"]]
|
||||
|
||||
def evalArg(self, program, arg):
|
||||
match arg:
|
||||
case int(i):
|
||||
return i
|
||||
case InsnExpr(_, _) as e:
|
||||
return sum(int(e.evalAtInsn(program, i))
|
||||
for i in range(len(program)))
|
||||
case ProgExpr(_, _) as e:
|
||||
return e.evalAtProg(program)
|
||||
|
||||
def evalAtProg(self, program) -> int:
|
||||
args = [self.evalArg(program, a) for a in self.args]
|
||||
|
||||
match self.ctor, args:
|
||||
case "atom", [x]: return x
|
||||
# Comparisons
|
||||
case "<", [x,y]: return x < y
|
||||
case ">", [x,y]: return x > y
|
||||
case "<=", [x,y]: return x <= y
|
||||
case ">=", [x,y]: return x >= y
|
||||
case "=", [x,y]: return x == y
|
||||
case "!=", [x,y]: return x != y
|
||||
# Arithmetic
|
||||
case "+", [x,y]: return x + y
|
||||
case "-", [x,y]: return x - y
|
||||
case "-", [x]: return -x
|
||||
# Boolean operators
|
||||
case "!", [x]: return 1 if not x else 0
|
||||
case "&&", [x,y]: return 1 if x and y else 0
|
||||
case "||", [x,y]: return 1 if x or y else 0
|
||||
|
||||
def argStr(self, arg, indent):
|
||||
match arg:
|
||||
case int(i):
|
||||
return (" "*indent) + str(i)
|
||||
case InsnExpr(_, _) as e:
|
||||
return e.treeStr(indent)
|
||||
case ProgExpr(_, _) as e:
|
||||
return e.treeStr(indent)
|
||||
|
||||
def treeStr(self, indent):
|
||||
args = [self.argStr(a, indent+2) for a in self.args]
|
||||
return (" "*indent) + self.ctor + ":\n" + "\n".join(args)
|
||||
|
||||
T = enum.Enum("T", "NUM OP MNEMONIC LPAR RPAR LBRA RBRA PERCENT END".split())
|
||||
|
||||
@dataclass
|
||||
class Token:
|
||||
type: T
|
||||
value: typing.Any
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.type}({self.value})"
|
||||
|
||||
class ExprLexer:
|
||||
"""A lexer for `InsnPattern`, `InsnExpr` and `ProgExpr`."""
|
||||
|
||||
RE_NUM = re.compile(r"[0-9]+|0[xX][0-9a-fA-F]+")
|
||||
RE_OP = re.compile(r"!|&&|\|\||<=|>=|<>|<|>|=|!=|\+|-")
|
||||
RE_MNEMONIC = re.compile(r"[a-zA-Z.*?][a-zA-Z0-9.*?]*")
|
||||
PUNCT = {
|
||||
"(": T.LPAR, ")": T.RPAR,
|
||||
"[": T.LBRA, "]": T.RBRA,
|
||||
"%": T.PERCENT,
|
||||
}
|
||||
|
||||
def __init__(self, code):
|
||||
"""Initialize the lexer to start analyzing `code`."""
|
||||
self.sourceCode = code
|
||||
self.rewind()
|
||||
|
||||
def rewind(self):
|
||||
"""Start or restart lexing the same input."""
|
||||
self.code = self.sourceCode
|
||||
self.position = 0
|
||||
self.errors = 0
|
||||
|
||||
def atEnd(self):
|
||||
"""Check whether the end of the input has been reached."""
|
||||
return len(self.code) == 0
|
||||
|
||||
def dump(self, fp=sys.stdout):
|
||||
"""Exhaust lexer input and print the result to the specified stream."""
|
||||
while not self.atEnd():
|
||||
t = self.lex()
|
||||
print(f"{self.position:5d}: {t}", file=fp)
|
||||
|
||||
def lex(self):
|
||||
"""Return the next token in the stream."""
|
||||
self.position += 1
|
||||
c = self.code.lstrip(" \t")
|
||||
|
||||
if len(c) == 0:
|
||||
return Token(T.END, None)
|
||||
|
||||
if c[0] in ExprLexer.PUNCT:
|
||||
self.code = c[1:]
|
||||
return Token(ExprLexer.PUNCT[c[0]], None)
|
||||
|
||||
if (m := ExprLexer.RE_NUM.match(c)) is not None:
|
||||
self.code = c[len(m[0]):]
|
||||
return Token(T.NUM, int(m[0], 0))
|
||||
|
||||
if (m := ExprLexer.RE_OP.match(c)) is not None:
|
||||
self.code = c[len(m[0]):]
|
||||
return Token(T.OP, m[0])
|
||||
|
||||
if (m := ExprLexer.RE_MNEMONIC.match(c)) is not None:
|
||||
self.code = c[len(m[0]):]
|
||||
return Token(T.MNEMONIC, m[0])
|
||||
|
||||
# Raise a lexing error
|
||||
s = c.split(maxsplit=1)
|
||||
err = s[0]
|
||||
self.code = s[1] if len(s) > 1 else ""
|
||||
raise Exception(f"Lexing error near '{err}'")
|
||||
|
||||
class ExprParser:
|
||||
"""An LL(1) recursive descent parser for program expressions."""
|
||||
|
||||
def __init__(self, lexer):
|
||||
"""Parse the output of a given lexer."""
|
||||
self.lexer = lexer
|
||||
self.la = None
|
||||
|
||||
def parseProgExpr(self):
|
||||
"""Parse the entire input as a ProgExpr."""
|
||||
self.lexer.rewind()
|
||||
self.la = None
|
||||
self.advance()
|
||||
e = self.pexpr()
|
||||
|
||||
if not self.lexer.atEnd():
|
||||
print("Remaining input:")
|
||||
self.lexer.dump()
|
||||
raise Exception("Syntax error: expected end of input")
|
||||
return e
|
||||
|
||||
def advance(self):
|
||||
"""Return the next token and update the lookahead."""
|
||||
next = self.la
|
||||
self.la = self.lexer.lex()
|
||||
return next
|
||||
|
||||
def expect(self, types, pred=None, optional=False):
|
||||
"""Read the next token, ensuring it is one of the specified types; if
|
||||
`pred` is specified, also tests the predicate. If `optional` is set,
|
||||
returns None in case of mismatch rather than raising an error."""
|
||||
|
||||
if isinstance(types, T):
|
||||
types = [types]
|
||||
if self.la.type in types and (pred is None or pred(self.la)):
|
||||
return self.advance()
|
||||
if optional:
|
||||
return None
|
||||
|
||||
expected = ", ".join(str(t) for t in types)
|
||||
pos = self.lexer.position
|
||||
err = f"Expected one of {expected}, got {self.la} (at token {pos})"
|
||||
if pred is not None:
|
||||
err += " (with predicate)"
|
||||
raise Exception(f"Syntax error: {err}")
|
||||
|
||||
# Rule combinators implementing unary and binary operators with precedence
|
||||
|
||||
def binaryOpsLeft(ctor, ops):
|
||||
def decorate(f):
|
||||
def symbol(self):
|
||||
e = f(self)
|
||||
pred = lambda t: t.value in ops
|
||||
while (op := self.expect(T.OP, pred, True)) is not None:
|
||||
e = ctor(op.value, [e, f(self)])
|
||||
return e
|
||||
return symbol
|
||||
return decorate
|
||||
|
||||
def binaryOps(ctor, ops, *, rassoc=False):
|
||||
def decorate(f):
|
||||
def symbol(self):
|
||||
lhs = f(self)
|
||||
pred = lambda t: t.value in ops
|
||||
if (op := self.expect(T.OP, pred, True)) is not None:
|
||||
rhs = symbol(self) if rassoc else f(self)
|
||||
return ctor(op.value, [lhs, rhs])
|
||||
else:
|
||||
return lhs
|
||||
return symbol
|
||||
return decorate
|
||||
|
||||
def binaryOpsRight(ctor, ops):
|
||||
return binaryOpsRight(ctor, ops, rassoc=True)
|
||||
|
||||
def unaryOps(ctor, ops, assoc=True):
|
||||
def decorate(f):
|
||||
def symbol(self):
|
||||
if (op := self.expect(T.OP, optional=True,
|
||||
pred=lambda t: t.value in ops)) is not None:
|
||||
arg = symbol(self) if assoc else f(self)
|
||||
return ctor(op.value, [arg])
|
||||
else:
|
||||
return f(self)
|
||||
return symbol
|
||||
return decorate
|
||||
|
||||
# Parsing rules
|
||||
|
||||
@binaryOpsLeft(ProgExpr, ["||"])
|
||||
@binaryOpsLeft(ProgExpr, ["&&"])
|
||||
@binaryOps(ProgExpr, [">", ">=", "<", "<=", "=", "!="])
|
||||
@binaryOpsLeft(ProgExpr, ["+", "-"])
|
||||
@unaryOps(ProgExpr, ["!", "-"])
|
||||
def pexpr(self):
|
||||
t = self.expect([T.LPAR, T.NUM, T.PERCENT, T.LBRA])
|
||||
if t.type == T.LPAR:
|
||||
pe = self.pexpr()
|
||||
self.expect(T.RPAR)
|
||||
return pe
|
||||
elif t.type == T.NUM:
|
||||
return ProgExpr("atom", [t.value])
|
||||
elif t.type == T.PERCENT:
|
||||
return parseProgExpr("[!rts && !mov]")
|
||||
elif t.type == T.LBRA:
|
||||
ie = self.iexpr()
|
||||
self.expect(T.RBRA)
|
||||
return ProgExpr("atom", [ie])
|
||||
|
||||
@binaryOpsLeft(InsnExpr, ["||"])
|
||||
@binaryOpsLeft(InsnExpr, ["&&"])
|
||||
@unaryOps(InsnExpr, ["!"])
|
||||
def iexpr(self):
|
||||
if self.expect(T.LPAR, optional=True):
|
||||
ie = self.iexpr()
|
||||
self.expect(T.RPAR)
|
||||
return ie
|
||||
else:
|
||||
ip = self.ipat()
|
||||
return InsnExpr("mnemonic", [ip])
|
||||
|
||||
def ipat(self):
|
||||
t = self.expect(T.MNEMONIC)
|
||||
return InsnPattern(t.value)
|
||||
|
||||
def parseProgExpr(string):
|
||||
l = ExprLexer(string)
|
||||
p = ExprParser(l)
|
||||
return p.parseProgExpr()
|
||||
|
||||
#---
|
||||
# Main logic
|
||||
#---
|
||||
|
||||
def runCompiler(input, flags):
|
||||
p = subprocess.run(
|
||||
["sh-elf-g++", input, *flags, "-S", "-o", "-", "-std=c++20", "-O2"],
|
||||
stdout=subprocess.PIPE, check=True)
|
||||
return str(p.stdout, "utf8")
|
||||
|
||||
def extractFunctions(asm):
|
||||
# Normalize spacing
|
||||
asm = asm.replace("\t", " ")
|
||||
# Split into lines and remove indentation
|
||||
lines = [l.strip() for l in asm.splitlines()]
|
||||
# Remove directives and local symbols
|
||||
lines = [l for l in lines if l and not l.startswith(".")]
|
||||
|
||||
funcs = dict()
|
||||
currentFunc = None
|
||||
|
||||
for l in lines:
|
||||
if l.endswith(":"):
|
||||
currentFunc = l[:-1]
|
||||
funcs[currentFunc] = []
|
||||
elif currentFunc is None:
|
||||
raise Exception(f"instruction '{l}' before symbol name")
|
||||
else:
|
||||
mnemonic, *args = l.split(maxsplit=1)
|
||||
if args != []:
|
||||
args = [a.strip() for a in args[0].split(",")]
|
||||
funcs[currentFunc].append(Insn(mnemonic, args))
|
||||
|
||||
return funcs
|
||||
|
||||
def printRawFunction(asm, sybl):
|
||||
# Find symbol definition
|
||||
start = asm.index(sybl + ":")
|
||||
if start < 0:
|
||||
print(f"<Unable to extract function {sybl}>")
|
||||
return False
|
||||
|
||||
# Find function size
|
||||
end = asm.index(f".size\t{sybl}, .-{sybl}", start)
|
||||
if end < 0:
|
||||
print(f"<Unable to extract function {sybl}")
|
||||
return False
|
||||
|
||||
func = asm[start:end].strip() + "\n"
|
||||
|
||||
# Eliminate labels that are defined but unused
|
||||
RE_LABEL = re.compile(r"^(\.[a-zA-Z_][a-zA-Z0-9_]*):$", re.MULTILINE)
|
||||
for label in RE_LABEL.findall(func):
|
||||
if func.count(f"{label}:\n") == 1:
|
||||
func = func.replace(f"{label}:\n", "")
|
||||
|
||||
print(func.strip())
|
||||
return True
|
||||
|
||||
def loadTests(input):
|
||||
RE_SPEC = re.compile(r'^//\s*([a-zA-Z_][a-zA-Z0-9_]*):\s*(.+)$')
|
||||
|
||||
with open(input, "r") as fp:
|
||||
code = fp.read()
|
||||
|
||||
tests = dict()
|
||||
for line in code.split("\n"):
|
||||
m = RE_SPEC.match(line)
|
||||
if m is not None:
|
||||
sybl = "_" + m[1]
|
||||
if sybl not in tests:
|
||||
tests[sybl] = []
|
||||
tests[sybl].append((m[2], parseProgExpr(m[2])))
|
||||
|
||||
return tests
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
print(f"usage: {sys.argv[0]} <C++ SOURCE> <CXXFLAGS...>", file=sys.stderr)
|
||||
|
||||
asm = runCompiler(sys.argv[1], sys.argv[2:])
|
||||
funcs = extractFunctions(asm)
|
||||
tests = loadTests(sys.argv[1])
|
||||
errors = False
|
||||
|
||||
for sybl in tests:
|
||||
if sybl not in funcs:
|
||||
print(f"error: no function '{sybl}' found", file=sys.stderr)
|
||||
errors = True
|
||||
|
||||
init = True
|
||||
for sybl in sorted(tests):
|
||||
if not init:
|
||||
print("")
|
||||
init = False
|
||||
print(f"\x1b[36m{40*'<>'}\x1b[0m")
|
||||
|
||||
if not printRawFunction(asm, sybl):
|
||||
errors = True
|
||||
print("")
|
||||
|
||||
for ref, expr in tests[sybl]:
|
||||
r = expr.evalAtProg(funcs[sybl])
|
||||
if r != 0:
|
||||
print(f"\x1b[32mPASSED\x1b[0m {ref}")
|
||||
else:
|
||||
print(f"\x1b[31mFAILED\x1b[0m {ref}")
|
||||
errors = True
|
||||
|
||||
sys.exit(1 if errors else 0)
|
|
@ -0,0 +1,19 @@
|
|||
#include <num/num.h>
|
||||
|
||||
using namespace num;
|
||||
|
||||
extern "C" {
|
||||
|
||||
// num8_of_num16: %=0
|
||||
num8 num8_of_num16(num16 x)
|
||||
{
|
||||
return num8(x);
|
||||
}
|
||||
|
||||
// num8_of_num32: %=1 && [shlr8]
|
||||
num8 num8_of_num32(num32 x)
|
||||
{
|
||||
return num8(x);
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue