diff --git a/CMakeLists.txt b/CMakeLists.txt index 60506ce..926ce6b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -67,6 +67,7 @@ if(AZUR_PLATFORM STREQUAL gint) set(AZUR_PLATFORM_GENERIC TRUE) set(AZUR_TOOLKIT_GINT TRUE) set(AZUR_GRAPHICS_GINT_CG TRUE) + set(AZUR_ARCH_SH4ALDSP TRUE) set(AZUR_TERMINAL_NONE TRUE) # Link with gint and libprof (we only do this to get include directories) @@ -84,6 +85,7 @@ if(AZUR_PLATFORM STREQUAL linux) set(AZUR_PLATFORM_GENERIC TRUE) set(AZUR_TOOLKIT_SDL TRUE) set(AZUR_GRAPHICS_OPENGL_3_3 TRUE) + set(AZUR_ARCH_GENERIC TRUE) set(AZUR_TERMINAL_ANSI TRUE) # Once again, we link only for the include paths @@ -99,6 +101,7 @@ if(AZUR_PLATFORM STREQUAL emscripten) set(AZUR_PLATFORM_EMSCRIPTEN TRUE) set(AZUR_TOOLKIT_SDL TRUE) set(AZUR_GRAPHICS_OPENGL_ES_2_0 TRUE) + set(AZUR_ARCH_GENERIC TRUE) set(AZUR_TERMINAL_PLAIN TRUE) set(PORTS -sUSE_SDL=2 -sUSE_SDL_IMAGE=2 -sSDL2_IMAGE_FORMATS=["png"]) diff --git a/azur/include/azur/config.h.in b/azur/include/azur/config.h.in index 76b2731..ade0e19 100644 --- a/azur/include/azur/config.h.in +++ b/azur/include/azur/config.h.in @@ -44,6 +44,14 @@ #cmakedefine AZUR_GRAPHICS_GINT_CG +/* CPU architecture (mostly useful for optimized SuperH stuff). */ + +/* Generic/unknown CPU architecture; only C code is used. */ +#cmakedefine AZUR_ARCH_GENERIC +/* SH4AL-DSP for the fx-CG. */ +#cmakedefine AZUR_ARCH_SH4ALDSP + + /* Input/output settings on terminal. */ /* No terminal output entirely. */ diff --git a/libnum/CMakeLists.txt b/libnum/CMakeLists.txt index ec0fcaf..e9b211e 100644 --- a/libnum/CMakeLists.txt +++ b/libnum/CMakeLists.txt @@ -7,16 +7,31 @@ include(CTest) -add_library(num STATIC +set(SOURCES src/num.cpp src/str.cpp) -target_include_directories(num PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include") +if(DEFINED AZUR_ARCH_GENERIC) + set(LIBNUM_ARCH_GENERIC TRUE) +elseif(DEFINED AZUR_ARCH_SH4ALDSP) + set(LIBNUM_ARCH_SH4ALDSP TRUE) + list(APPEND SOURCES + src/sh4aldsp/div_i32_i16_i16.s) +endif() + +configure_file(include/num/config.h.in num/config.h) + +add_library(num STATIC ${SOURCES}) +target_include_directories(num PUBLIC + "${CMAKE_CURRENT_SOURCE_DIR}/include" + "${CMAKE_CURRENT_BINARY_DIR}") # Library file: libnum.a install(TARGETS num DESTINATION ${LIBDIR}) -# Headers: azur/*.h +# Headers: num/config.h and num/*.h install(DIRECTORY include/ DESTINATION ${INCDIR}) +install(FILES "${CMAKE_CURRENT_BINARY_DIR}/num/config.h" + DESTINATION ${INCDIR}/num) #--- # Unit tests diff --git a/libnum/include/num/config.h.in b/libnum/include/num/config.h.in new file mode 100644 index 0000000..2009077 --- /dev/null +++ b/libnum/include/num/config.h.in @@ -0,0 +1,15 @@ +//---------------------------------------------------------------------------// +// ," /\ ", Azur: A game engine for CASIO fx-CG and PC // +// | _/__\_ | Designed by Lephe' and the Planète Casio community. // +// "._`\/'_." License: MIT // +//---------------------------------------------------------------------------// +// num.config: Compile-time configuration options + +#pragma once + +/* CPU architecture. */ + +/* Generic/unknown CPU architecture; only C code is used. */ +#cmakedefine LIBNUM_ARCH_GENERIC +/* SH4AL-DSP for the fx-CG. */ +#cmakedefine LIBNUM_ARCH_SH4ALDSP diff --git a/libnum/include/num/num.h b/libnum/include/num/num.h index 9f08091..c2851f7 100644 --- a/libnum/include/num/num.h +++ b/libnum/include/num/num.h @@ -48,9 +48,8 @@ #pragma once -#include +#include #include - #include #include @@ -219,8 +218,8 @@ struct num16 v = (v * other.v) / 256; return *this; } - inline constexpr num16 &operator/=(num16 const &other) { - v = (v * 256) / other.v; + inline num16 &operator/=(num16 const &other) { + v = prim::div_i32_i16_i16(v * 256, other.v); return *this; } inline constexpr num16 &operator%=(num16 const &other) { diff --git a/libnum/include/num/primitives.h b/libnum/include/num/primitives.h new file mode 100644 index 0000000..a241f4f --- /dev/null +++ b/libnum/include/num/primitives.h @@ -0,0 +1,37 @@ +//---------------------------------------------------------------------------// +// ," /\ ", Azur: A game engine for CASIO fx-CG and PC // +// | _/__\_ | Designed by Lephe' and the Planète Casio community. // +// "._`\/'_." License: MIT // +//---------------------------------------------------------------------------// +// num.primitives: Platform-specific optimized primitives +// +// This header provides a generic interface to optimized primitives such as +// unusual-size divisions. These are used in libnum functions for a performance +// boost. +// +// The following functions are defined: +// - div_X_Y_Z: Division of an X by a Y with a result of type Z (where X,Y,Z is +// a signed integer type iN or an unsigned integer type uN) +//--- + +#pragma once + +#include +#include + +namespace libnum::prim { + +#ifdef LIBNUM_ARCH_SH4ALDSP + +extern int div_i32_i16_i16(int32_t num, int16_t denum); + +#else + +static inline constexpr int div_i32_i16_i16(int32_t num, int16_t denum) +{ + return num / denum; +} + +#endif + +} /* namespace libnum::prim */ diff --git a/libnum/src/sh4aldsp/div_i32_i16_i16.s b/libnum/src/sh4aldsp/div_i32_i16_i16.s new file mode 100644 index 0000000..48a74dc --- /dev/null +++ b/libnum/src/sh4aldsp/div_i32_i16_i16.s @@ -0,0 +1,54 @@ +# --------------------------------------------------------------------------- # +# ," /\ ", Azur: A game engine for CASIO fx-CG and PC # +# | _/__\_ | Designed by Lephe' and the Planète Casio community. # +# "._`\/'_." License: MIT # +# --------------------------------------------------------------------------- # +# SH4AL-DSP optimized i32 / i16 -> i16 division. +# +# This simply uses the CPU's ability to divide without rotation for 16 bit +# divisors without all the boilerplate than libgcc's __sdivisi3 requires since +# it assumes 32-bit inputs. Used for num16 division. +# --- + +.global __ZN6libnum4prim15div_i32_i16_i16Els + +# libnum::prim::div_i32_i16_i16(long, short) +__ZN6libnum4prim15div_i32_i16_i16Els: + shll16 r5 + mov #0, r2 + + mov r4, r3 + rotcl r3 + + subc r2, r4 + + div0s r5, r4 + + div1 r5, r4 + div1 r5, r4 + div1 r5, r4 + div1 r5, r4 + + div1 r5, r4 + div1 r5, r4 + div1 r5, r4 + div1 r5, r4 + + div1 r5, r4 + div1 r5, r4 + div1 r5, r4 + div1 r5, r4 + + div1 r5, r4 + div1 r5, r4 + div1 r5, r4 + div1 r5, r4 + + exts.w r4, r4 + + rotcl r4 + + addc r2, r4 + + rts + exts.w r4, r0