libnum: add tests for how optimized compiled assembly code is

This change adds tests for libnum (run with `make -C build-x tests`)
that compile example programs with g++ and evaluate how optimized the
assembly code is. This is done by checking user-provided specifications
of what instructions should and shouldn't be used against the compiled
assembler.
This commit is contained in:
Lephenixnoir 2022-07-19 23:18:01 +01:00
parent d77c7fef78
commit e90abaaabc
Signed by: Lephenixnoir
GPG Key ID: 1BBA026E13FC0495
4 changed files with 517 additions and 0 deletions

View File

@ -34,6 +34,7 @@
cmake_minimum_required(VERSION 3.15)
project(Azur VERSION 0.1 LANGUAGES C CXX ASM)
include(CTest)
set(CMAKE_INSTALL_MESSAGE LAZY)
@ -150,3 +151,10 @@ add_subdirectory(azur)
# CMake module to find the library
install(FILES FindAzur.cmake DESTINATION ${MODPATH})
#---
# Testing
#---
# Force --verbose on CTest
add_custom_target(tests COMMAND ${CMAKE_CTEST_COMMAND} --verbose)

View File

@ -5,6 +5,8 @@
#-----------------------------------------------------------------------------#
# libnum build system
include(CTest)
add_library(num STATIC
src/static_checks.cpp)
@ -14,3 +16,17 @@ target_include_directories(num PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/include")
install(TARGETS num DESTINATION ${LIBDIR})
# Headers: azur/*.h
install(DIRECTORY include/ DESTINATION ${INCDIR})
#---
# Testing
#---
set(TESTS
test/isel_num8.cpp)
foreach(testfile IN LISTS TESTS)
add_test(NAME "${testfile}"
COMMAND python "${CMAKE_CURRENT_SOURCE_DIR}/test/isel.py"
"${CMAKE_CURRENT_SOURCE_DIR}/${testfile}"
-I "${CMAKE_CURRENT_SOURCE_DIR}/include")
endforeach()

474
libnum/test/isel.py Normal file
View File

@ -0,0 +1,474 @@
#-----------------------------------------------------------------------------#
# ," /\ ", Azur: A game engine for CASIO fx-CG and PC #
# | _/__\_ | Designed by Lephe' and the Planète Casio community. #
# "._`\/'_." License: MIT <https://opensource.org/licenses/MIT> #
#-----------------------------------------------------------------------------#
# isel.py: Instruction selection testing facility
#
# Many of the primitive numerical functions of libnum are written to compile
# efficiently on SuperH, where "efficiently" mainly cares about using specific
# instructions and generating short code. This utility tests these assumptions
# by compiling C++ files that use the library, and matching the assembler code
# against simple specifications.
#
# The assembler output from g++ is decoded by simple text analysis. Function
# span from their `_SYMBOL:` to their `.size _SYMBOL, .-_SYMBOL`. Mnemonics are
# split on whitespace and arguments on commas; directives and unused labels are
# removed.
#
# The assembler output is matched using two types of expressions:
# - "Program expressions", which run against an entire function and evaluate to
# integers. These are used to count, mainly.
# - "Instruction expressions", which run against single instructions and
# evaluate to booleans. These are used to identify instructions.
#
# Instruction expressions are based on wildcard patterns matching mnemonics,
# like `add*` or `b?.s`, combined with logical operators `!`, `&&` and `||`.
# For instance, `add || addc || addv` identifies addition instructions, while
# `!mov && !rts` eliminates the usual function boilerplate.
#
# Program expressions are based on the count-expression `[e]` where `e` is an
# instruction expression. `[e]` evaluates to the number of instructions in the
# function that match `e`. These are combined with integral constants and the
# usual arithmetic, logical and comparison operators. C-style bool-as-int
# semantics are used, so tests like `!= 0` or `> 0` can often be omitted. `%`
# is a shortcut for the number of "non-trivial" instructions, and currently
# expands to `[!mov && !rts]`.
#
# A test is a normal C++ source built using the library, which exposes
# functions with C linkage (ie. no name mangling) and has specifications in
# comments of the form:
#
# // FUNCTION_NAME: PROGRAM_EXPRESSION
#
# The comments should span entire lines and need not be placed near the
# functions that they test.
#---
from dataclasses import dataclass
import subprocess
import functools
import fnmatch
import typing
import enum
import sys
import os
import re
#---
# Program representation and parsing
#---
@dataclass
class Insn:
"""A concrete instruction from a compiled assembly program."""
mnemonic: str
args: list[str]
#---
# Specification representation and parsing
#---
@dataclass
class InsnPattern:
"""An abstract pattern that can be matched against asm instructions."""
# Instruction mnemonic as a wildcard pattern (may use '?' and '*')
mnemonicWildcard: str
# Pattern for the arguments
# args: list
def evalAtInsn(self, program, i) -> bool:
return fnmatch.fnmatchcase(program[i].mnemonic, self.mnemonicWildcard)
def treeStr(self, indent):
return (" "*indent) + self.mnemonicWildcard
@dataclass
class InsnExpr:
"""An expression built off `InsnPattern`, which runs on single instructions
and evaluates to a boolean."""
# Node constructor and arguments
ctor: str
args: list[typing.Union[InsnPattern, "InsnExpr"]]
def evalAtInsn(self, program, i) -> bool:
args = [a.evalAtInsn(program, i) for a in self.args]
match self.ctor, args:
case "mnemonic", [x]: return x
# Boolean operators
case "!", [x]: return 1 if not x else 0
case "&&", [x,y]: return 1 if x and y else 0
case "||", [x,y]: return 1 if x or y else 0
def treeStr(self, indent):
args = [a.treeStr(indent+2) for a in self.args]
return (" "*indent) + self.ctor + ":\n" + "\n".join(args)
@dataclass
class ProgExpr:
"""An expression which runs on full programs and evaluates to counts of
matched instructions."""
# Node constructor and arguments
ctor: str
args: list[typing.Union[InsnExpr, int, "Expr"]]
def evalArg(self, program, arg):
match arg:
case int(i):
return i
case InsnExpr(_, _) as e:
return sum(int(e.evalAtInsn(program, i))
for i in range(len(program)))
case ProgExpr(_, _) as e:
return e.evalAtProg(program)
def evalAtProg(self, program) -> int:
args = [self.evalArg(program, a) for a in self.args]
match self.ctor, args:
case "atom", [x]: return x
# Comparisons
case "<", [x,y]: return x < y
case ">", [x,y]: return x > y
case "<=", [x,y]: return x <= y
case ">=", [x,y]: return x >= y
case "=", [x,y]: return x == y
case "!=", [x,y]: return x != y
# Arithmetic
case "+", [x,y]: return x + y
case "-", [x,y]: return x - y
case "-", [x]: return -x
# Boolean operators
case "!", [x]: return 1 if not x else 0
case "&&", [x,y]: return 1 if x and y else 0
case "||", [x,y]: return 1 if x or y else 0
def argStr(self, arg, indent):
match arg:
case int(i):
return (" "*indent) + str(i)
case InsnExpr(_, _) as e:
return e.treeStr(indent)
case ProgExpr(_, _) as e:
return e.treeStr(indent)
def treeStr(self, indent):
args = [self.argStr(a, indent+2) for a in self.args]
return (" "*indent) + self.ctor + ":\n" + "\n".join(args)
T = enum.Enum("T", "NUM OP MNEMONIC LPAR RPAR LBRA RBRA PERCENT END".split())
@dataclass
class Token:
type: T
value: typing.Any
def __str__(self):
return f"{self.type}({self.value})"
class ExprLexer:
"""A lexer for `InsnPattern`, `InsnExpr` and `ProgExpr`."""
RE_NUM = re.compile(r"[0-9]+|0[xX][0-9a-fA-F]+")
RE_OP = re.compile(r"!|&&|\|\||<=|>=|<>|<|>|=|!=|\+|-")
RE_MNEMONIC = re.compile(r"[a-zA-Z.*?][a-zA-Z0-9.*?]*")
PUNCT = {
"(": T.LPAR, ")": T.RPAR,
"[": T.LBRA, "]": T.RBRA,
"%": T.PERCENT,
}
def __init__(self, code):
"""Initialize the lexer to start analyzing `code`."""
self.sourceCode = code
self.rewind()
def rewind(self):
"""Start or restart lexing the same input."""
self.code = self.sourceCode
self.position = 0
self.errors = 0
def atEnd(self):
"""Check whether the end of the input has been reached."""
return len(self.code) == 0
def dump(self, fp=sys.stdout):
"""Exhaust lexer input and print the result to the specified stream."""
while not self.atEnd():
t = self.lex()
print(f"{self.position:5d}: {t}", file=fp)
def lex(self):
"""Return the next token in the stream."""
self.position += 1
c = self.code.lstrip(" \t")
if len(c) == 0:
return Token(T.END, None)
if c[0] in ExprLexer.PUNCT:
self.code = c[1:]
return Token(ExprLexer.PUNCT[c[0]], None)
if (m := ExprLexer.RE_NUM.match(c)) is not None:
self.code = c[len(m[0]):]
return Token(T.NUM, int(m[0], 0))
if (m := ExprLexer.RE_OP.match(c)) is not None:
self.code = c[len(m[0]):]
return Token(T.OP, m[0])
if (m := ExprLexer.RE_MNEMONIC.match(c)) is not None:
self.code = c[len(m[0]):]
return Token(T.MNEMONIC, m[0])
# Raise a lexing error
s = c.split(maxsplit=1)
err = s[0]
self.code = s[1] if len(s) > 1 else ""
raise Exception(f"Lexing error near '{err}'")
class ExprParser:
"""An LL(1) recursive descent parser for program expressions."""
def __init__(self, lexer):
"""Parse the output of a given lexer."""
self.lexer = lexer
self.la = None
def parseProgExpr(self):
"""Parse the entire input as a ProgExpr."""
self.lexer.rewind()
self.la = None
self.advance()
e = self.pexpr()
if not self.lexer.atEnd():
print("Remaining input:")
self.lexer.dump()
raise Exception("Syntax error: expected end of input")
return e
def advance(self):
"""Return the next token and update the lookahead."""
next = self.la
self.la = self.lexer.lex()
return next
def expect(self, types, pred=None, optional=False):
"""Read the next token, ensuring it is one of the specified types; if
`pred` is specified, also tests the predicate. If `optional` is set,
returns None in case of mismatch rather than raising an error."""
if isinstance(types, T):
types = [types]
if self.la.type in types and (pred is None or pred(self.la)):
return self.advance()
if optional:
return None
expected = ", ".join(str(t) for t in types)
pos = self.lexer.position
err = f"Expected one of {expected}, got {self.la} (at token {pos})"
if pred is not None:
err += " (with predicate)"
raise Exception(f"Syntax error: {err}")
# Rule combinators implementing unary and binary operators with precedence
def binaryOpsLeft(ctor, ops):
def decorate(f):
def symbol(self):
e = f(self)
pred = lambda t: t.value in ops
while (op := self.expect(T.OP, pred, True)) is not None:
e = ctor(op.value, [e, f(self)])
return e
return symbol
return decorate
def binaryOps(ctor, ops, *, rassoc=False):
def decorate(f):
def symbol(self):
lhs = f(self)
pred = lambda t: t.value in ops
if (op := self.expect(T.OP, pred, True)) is not None:
rhs = symbol(self) if rassoc else f(self)
return ctor(op.value, [lhs, rhs])
else:
return lhs
return symbol
return decorate
def binaryOpsRight(ctor, ops):
return binaryOpsRight(ctor, ops, rassoc=True)
def unaryOps(ctor, ops, assoc=True):
def decorate(f):
def symbol(self):
if (op := self.expect(T.OP, optional=True,
pred=lambda t: t.value in ops)) is not None:
arg = symbol(self) if assoc else f(self)
return ctor(op.value, [arg])
else:
return f(self)
return symbol
return decorate
# Parsing rules
@binaryOpsLeft(ProgExpr, ["||"])
@binaryOpsLeft(ProgExpr, ["&&"])
@binaryOps(ProgExpr, [">", ">=", "<", "<=", "=", "!="])
@binaryOpsLeft(ProgExpr, ["+", "-"])
@unaryOps(ProgExpr, ["!", "-"])
def pexpr(self):
t = self.expect([T.LPAR, T.NUM, T.PERCENT, T.LBRA])
if t.type == T.LPAR:
pe = self.pexpr()
self.expect(T.RPAR)
return pe
elif t.type == T.NUM:
return ProgExpr("atom", [t.value])
elif t.type == T.PERCENT:
return parseProgExpr("[!rts && !mov]")
elif t.type == T.LBRA:
ie = self.iexpr()
self.expect(T.RBRA)
return ProgExpr("atom", [ie])
@binaryOpsLeft(InsnExpr, ["||"])
@binaryOpsLeft(InsnExpr, ["&&"])
@unaryOps(InsnExpr, ["!"])
def iexpr(self):
if self.expect(T.LPAR, optional=True):
ie = self.iexpr()
self.expect(T.RPAR)
return ie
else:
ip = self.ipat()
return InsnExpr("mnemonic", [ip])
def ipat(self):
t = self.expect(T.MNEMONIC)
return InsnPattern(t.value)
def parseProgExpr(string):
l = ExprLexer(string)
p = ExprParser(l)
return p.parseProgExpr()
#---
# Main logic
#---
def runCompiler(input, flags):
p = subprocess.run(
["sh-elf-g++", input, *flags, "-S", "-o", "-", "-std=c++20", "-O2"],
stdout=subprocess.PIPE, check=True)
return str(p.stdout, "utf8")
def extractFunctions(asm):
# Normalize spacing
asm = asm.replace("\t", " ")
# Split into lines and remove indentation
lines = [l.strip() for l in asm.splitlines()]
# Remove directives and local symbols
lines = [l for l in lines if l and not l.startswith(".")]
funcs = dict()
currentFunc = None
for l in lines:
if l.endswith(":"):
currentFunc = l[:-1]
funcs[currentFunc] = []
elif currentFunc is None:
raise Exception(f"instruction '{l}' before symbol name")
else:
mnemonic, *args = l.split(maxsplit=1)
if args != []:
args = [a.strip() for a in args[0].split(",")]
funcs[currentFunc].append(Insn(mnemonic, args))
return funcs
def printRawFunction(asm, sybl):
# Find symbol definition
start = asm.index(sybl + ":")
if start < 0:
print(f"<Unable to extract function {sybl}>")
return False
# Find function size
end = asm.index(f".size\t{sybl}, .-{sybl}", start)
if end < 0:
print(f"<Unable to extract function {sybl}")
return False
func = asm[start:end].strip() + "\n"
# Eliminate labels that are defined but unused
RE_LABEL = re.compile(r"^(\.[a-zA-Z_][a-zA-Z0-9_]*):$", re.MULTILINE)
for label in RE_LABEL.findall(func):
if func.count(f"{label}:\n") == 1:
func = func.replace(f"{label}:\n", "")
print(func.strip())
return True
def loadTests(input):
RE_SPEC = re.compile(r'^//\s*([a-zA-Z_][a-zA-Z0-9_]*):\s*(.+)$')
with open(input, "r") as fp:
code = fp.read()
tests = dict()
for line in code.split("\n"):
m = RE_SPEC.match(line)
if m is not None:
sybl = "_" + m[1]
if sybl not in tests:
tests[sybl] = []
tests[sybl].append((m[2], parseProgExpr(m[2])))
return tests
if len(sys.argv) < 2:
print(f"usage: {sys.argv[0]} <C++ SOURCE> <CXXFLAGS...>", file=sys.stderr)
asm = runCompiler(sys.argv[1], sys.argv[2:])
funcs = extractFunctions(asm)
tests = loadTests(sys.argv[1])
errors = False
for sybl in tests:
if sybl not in funcs:
print(f"error: no function '{sybl}' found", file=sys.stderr)
errors = True
init = True
for sybl in sorted(tests):
if not init:
print("")
init = False
print(f"\x1b[36m{40*'<>'}\x1b[0m")
if not printRawFunction(asm, sybl):
errors = True
print("")
for ref, expr in tests[sybl]:
r = expr.evalAtProg(funcs[sybl])
if r != 0:
print(f"\x1b[32mPASSED\x1b[0m {ref}")
else:
print(f"\x1b[31mFAILED\x1b[0m {ref}")
errors = True
sys.exit(1 if errors else 0)

19
libnum/test/isel_num8.cpp Normal file
View File

@ -0,0 +1,19 @@
#include <num/num.h>
using namespace num;
extern "C" {
// num8_of_num16: %=0
num8 num8_of_num16(num16 x)
{
return num8(x);
}
// num8_of_num32: %=1 && [shlr8]
num8 num8_of_num32(num32 x)
{
return num8(x);
}
}