From 406cc3739e1013769d87b8c21808e564696a9f6e Mon Sep 17 00:00:00 2001 From: SlyVTT Date: Sat, 13 May 2023 11:28:27 +0200 Subject: [PATCH] Resync with last Azur update --- .gitignore | 2 + azur/CMakeLists.txt | 5 ++ azur/include/azur/config.h.in | 8 +++ azur/include/azur/gint/render.h | 11 ++- azur/src/gint/init.cpp | 98 +++++++++++++++++++++++++ azur/src/gint/shaders/circle.c | 1 - azur/src/gint/shaders/line.c | 4 +- azur/src/gl/init.cpp | 3 +- libnum/CMakeLists.txt | 22 +++++- libnum/include/num/config.h.in | 15 ++++ libnum/include/num/num.h | 100 +++++++++++++++++++++----- libnum/include/num/primitives.h | 37 ++++++++++ libnum/include/num/vec.h | 4 ++ libnum/src/num.cpp | 21 ++++++ libnum/src/sh4aldsp/div_i32_i16_i16.s | 54 ++++++++++++++ libnum/test/isel.py | 2 +- 16 files changed, 353 insertions(+), 34 deletions(-) create mode 100644 azur/src/gint/init.cpp create mode 100644 libnum/include/num/config.h.in create mode 100644 libnum/include/num/primitives.h create mode 100644 libnum/src/num.cpp create mode 100644 libnum/src/sh4aldsp/div_i32_i16_i16.s diff --git a/.gitignore b/.gitignore index 44a9310..ffea1cd 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,5 @@ # Editor files *.sublime-* +.vscode + diff --git a/azur/CMakeLists.txt b/azur/CMakeLists.txt index 2b9ae22..d869bb3 100644 --- a/azur/CMakeLists.txt +++ b/azur/CMakeLists.txt @@ -35,6 +35,11 @@ if(AZUR_TOOLKIT_SDL AND AZUR_GRAPHICS_OPENGL) DEPENDS gen_glsl.py ${ASSETS}) endif() +if(AZUR_TOOLKIT_GINT) + list(APPEND SOURCES + src/gint/init.cpp) +endif() + # gint rendering if(AZUR_GRAPHICS_GINT_CG) list(APPEND SOURCES diff --git a/azur/include/azur/config.h.in b/azur/include/azur/config.h.in index 76b2731..ade0e19 100644 --- a/azur/include/azur/config.h.in +++ b/azur/include/azur/config.h.in @@ -44,6 +44,14 @@ #cmakedefine AZUR_GRAPHICS_GINT_CG +/* CPU architecture (mostly useful for optimized SuperH stuff). */ + +/* Generic/unknown CPU architecture; only C code is used. */ +#cmakedefine AZUR_ARCH_GENERIC +/* SH4AL-DSP for the fx-CG. */ +#cmakedefine AZUR_ARCH_SH4ALDSP + + /* Input/output settings on terminal. */ /* No terminal output entirely. */ diff --git a/azur/include/azur/gint/render.h b/azur/include/azur/gint/render.h index 02dabd2..c825dfe 100644 --- a/azur/include/azur/gint/render.h +++ b/azur/include/azur/gint/render.h @@ -203,14 +203,13 @@ void azrp_image(int x, int y, bopti_image_t const *image); void azrp_subimage(int x, int y, bopti_image_t const *image, int left, int top, int width, int height, int flags); - void azrp_triangle(int x1, int y1, int x2, int y2, int x3, int y3, int color); -/* Draw a line between point (x1, y1) and (x2,y2) with clipping */ -void azrp_line( int x1, int y1, int x2, int y2, int color); +/* azrp_line(): Draw a line with clipping to the screen resolution between point (x1,y1) and (x2,y2) */ +void azrp_line( int x1, int y1, int x2, int y2, uint16_t color ); -/* Draw a circle of radius rad and center (xC, yC) */ -void azrp_circle(int xc, int yc, uint16_t rad, uint16_t color); +/* azrp_circle() : Draw a circle with clipping to the screen resolution with a center (xc,yc) and a radius rad */ +void azrp_circle( int xc, int yx, uint16_t rad, uint16_t color ); /* See below for more detailed image functions. Dynamic effects are provided @@ -224,8 +223,6 @@ void azrp_shader_image_rgb16_configure(void); void azrp_shader_image_p8_configure(void); void azrp_shader_image_p4_configure(void); void azrp_shader_triangle_configure(void); -void azrp_shader_line_configure(void); -void azrp_shader_circle_configure(void); //--- // Performance indicators diff --git a/azur/src/gint/init.cpp b/azur/src/gint/init.cpp new file mode 100644 index 0000000..2a327c1 --- /dev/null +++ b/azur/src/gint/init.cpp @@ -0,0 +1,98 @@ +#include +#include +#include +#include + +int azur_init(char const *title, int window_width, int window_height) +{ + (void)title; + (void)window_width; + (void)window_height; + return 0; +} + +__attribute__((destructor)) +void azur_quit(void) +{ +} + +//--- +// Main loop setup +//--- + +/* Time spent in the main loop (seconds) + TODO: Handle ml_time (also in the SDL backend) */ +static double ml_time = 0.0; +/* Timers for render and updates */ +static int ml_timer_render = -1; +static int ml_timer_update = -1; + +static int set_flag(volatile int *flag) +{ + *flag = 1; + return TIMER_CONTINUE; +} + +int azur_main_loop( + void (*render)(void), int render_fps, + int (*update)(void), int update_ups, + int flags) +{ + volatile int render_tick = 1; + volatile int update_tick = 0; + bool started = false; + + ml_timer_render = timer_configure(TIMER_ANY, 1000000 / render_fps, + GINT_CALL(set_flag, &render_tick)); + if(ml_timer_render < 0) { + azlog(ERROR, "failed to create render timer\n"); + return 1; + } + else { + timer_start(ml_timer_render); + } + + if(!(flags & AZUR_MAIN_LOOP_TIED)) { + ml_timer_update = timer_configure(TIMER_ANY, 1000000 / update_ups, + GINT_CALL(set_flag, &update_tick)); + if(ml_timer_update < 0) { + timer_stop(ml_timer_render); + azlog(ERROR, "failed to create render timer\n"); + return 1; + } + else { + timer_start(ml_timer_update); + } + } + + while(1) { + if(update_tick && !(flags & AZUR_MAIN_LOOP_TIED)) { + update_tick = 0; + if(update && update()) break; + } + + if(render_tick) { + render_tick = 0; + + /* Tied renders and updates */ + if(started && (flags & AZUR_MAIN_LOOP_TIED)) { + if(update && update()) break; + } + if(render) render(); + started = true; + } + + sleep(); + } + + if(ml_timer_render >= 0) { + timer_stop(ml_timer_render); + ml_timer_render = 0; + } + if(ml_timer_update >= 0) { + timer_stop(ml_timer_update); + ml_timer_update = 0; + } + + return 0; +} diff --git a/azur/src/gint/shaders/circle.c b/azur/src/gint/shaders/circle.c index 7a08a1d..181aac2 100644 --- a/azur/src/gint/shaders/circle.c +++ b/azur/src/gint/shaders/circle.c @@ -145,5 +145,4 @@ void azrp_shader_circle( void *uniforms, void *comnd, void *fragment ) } cmd->curr_frag++; - } \ No newline at end of file diff --git a/azur/src/gint/shaders/line.c b/azur/src/gint/shaders/line.c index 8aeccd8..3101e1b 100644 --- a/azur/src/gint/shaders/line.c +++ b/azur/src/gint/shaders/line.c @@ -40,7 +40,7 @@ int SGN( int x1 ) else return -1; } -void azrp_line(int xA, int yA, int xB, int yB, int color) +void azrp_line(int xA, int yA, int xB, int yB, uint16_t color) { prof_enter(azrp_perf_cmdgen); @@ -199,4 +199,4 @@ void azrp_shader_line( void *uniforms, void *comnd, void *fragment ) cmd->curr_y = cmd->curr_y & 15; cmd->i = i+1; -} \ No newline at end of file +} diff --git a/azur/src/gl/init.cpp b/azur/src/gl/init.cpp index 10d187d..5e5d4e8 100644 --- a/azur/src/gl/init.cpp +++ b/azur/src/gl/init.cpp @@ -134,7 +134,8 @@ SDL_Window *azur_sdl_window(void) // Main loop setup //--- -/* Time spent in the main loop (seconds) */ +/* Time spent in the main loop (seconds) + TODO: Handle ml_time (also in the gint backend) */ static double ml_time = 0.0; /* In emscripten, callbacks are void/void, vsync is always ON, and the diff --git a/libnum/CMakeLists.txt b/libnum/CMakeLists.txt index a98d896..e9b211e 100644 --- a/libnum/CMakeLists.txt +++ b/libnum/CMakeLists.txt @@ -7,15 +7,31 @@ include(CTest) -add_library(num STATIC +set(SOURCES + src/num.cpp src/str.cpp) -target_include_directories(num PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include") +if(DEFINED AZUR_ARCH_GENERIC) + set(LIBNUM_ARCH_GENERIC TRUE) +elseif(DEFINED AZUR_ARCH_SH4ALDSP) + set(LIBNUM_ARCH_SH4ALDSP TRUE) + list(APPEND SOURCES + src/sh4aldsp/div_i32_i16_i16.s) +endif() + +configure_file(include/num/config.h.in num/config.h) + +add_library(num STATIC ${SOURCES}) +target_include_directories(num PUBLIC + "${CMAKE_CURRENT_SOURCE_DIR}/include" + "${CMAKE_CURRENT_BINARY_DIR}") # Library file: libnum.a install(TARGETS num DESTINATION ${LIBDIR}) -# Headers: azur/*.h +# Headers: num/config.h and num/*.h install(DIRECTORY include/ DESTINATION ${INCDIR}) +install(FILES "${CMAKE_CURRENT_BINARY_DIR}/num/config.h" + DESTINATION ${INCDIR}/num) #--- # Unit tests diff --git a/libnum/include/num/config.h.in b/libnum/include/num/config.h.in new file mode 100644 index 0000000..2009077 --- /dev/null +++ b/libnum/include/num/config.h.in @@ -0,0 +1,15 @@ +//---------------------------------------------------------------------------// +// ," /\ ", Azur: A game engine for CASIO fx-CG and PC // +// | _/__\_ | Designed by Lephe' and the Planète Casio community. // +// "._`\/'_." License: MIT // +//---------------------------------------------------------------------------// +// num.config: Compile-time configuration options + +#pragma once + +/* CPU architecture. */ + +/* Generic/unknown CPU architecture; only C code is used. */ +#cmakedefine LIBNUM_ARCH_GENERIC +/* SH4AL-DSP for the fx-CG. */ +#cmakedefine LIBNUM_ARCH_SH4ALDSP diff --git a/libnum/include/num/num.h b/libnum/include/num/num.h index dac83b6..c2851f7 100644 --- a/libnum/include/num/num.h +++ b/libnum/include/num/num.h @@ -48,9 +48,8 @@ #pragma once -#include +#include #include - #include #include @@ -117,9 +116,15 @@ struct num8 v %= other.v; return *this; } + inline constexpr num8 ifloor() { + return 0; + } inline constexpr num8 floor() { return num8(0); } + inline constexpr num8 iceil() { + return v != 0; + } /* Warning: num8::ceil() always overflows! */ inline constexpr num8 ceil() { return num8(0); @@ -132,19 +137,19 @@ struct num8 /* Comparisons with int */ - inline constexpr bool operator==(int const &i) { + inline constexpr bool operator==(int const &i) const { return (v | i) == 0; } - inline constexpr bool operator<(int const &i) { + inline constexpr bool operator<(int const &i) const { return i >= 1; } - inline constexpr bool operator>=(int const &i) { + inline constexpr bool operator>=(int const &i) const { return i <= 0; } - inline constexpr bool operator<=(int const &i) { + inline constexpr bool operator<=(int const &i) const { return i + !v > 0; } - inline constexpr bool operator>(int const &i) { + inline constexpr bool operator>(int const &i) const { return i + !v <= 0; } @@ -195,6 +200,9 @@ struct num16 /* num16 x num16 -> num32 multiplication This is efficiently implemented with a muls.w instruction. */ static constexpr num32 dmul(num16 const &x, num16 const &y); + /* num16 / num16 -> num16 division for positive numbers + This bypasses some sign tests, which saves a bit of time. */ + static constexpr num16 div_positive(num16 const &x, num16 const &y); /* Basic arithmetic */ @@ -210,19 +218,25 @@ struct num16 v = (v * other.v) / 256; return *this; } - inline constexpr num16 &operator/=(num16 const &other) { - v = (v * 256) / other.v; + inline num16 &operator/=(num16 const &other) { + v = prim::div_i32_i16_i16(v * 256, other.v); return *this; } inline constexpr num16 &operator%=(num16 const &other) { v %= other.v; return *this; } + inline constexpr int ifloor() { + return v >> 8; + } inline constexpr num16 floor() { num16 x; x.v = v & 0xff00; return x; } + inline constexpr int iceil() { + return (v + 0xff) >> 8; + } inline constexpr num16 ceil() { num16 x; x.v = ((v - 1) | 0xff) + 1; @@ -236,22 +250,22 @@ struct num16 /* Comparisons with int */ - inline constexpr bool operator==(int const &i) { + inline constexpr bool operator==(int const &i) const { return (int16_t)i == i && (i << 8) == v; } - inline constexpr bool operator<(int const &i) { + inline constexpr bool operator<(int const &i) const { return (v >> 8) < i; } - inline constexpr bool operator>=(int const &i) { + inline constexpr bool operator>=(int const &i) const { return (v >> 8) >= i; } /* Unfortunately the branchless version for this test is expressed in terms of `v`, not `i`, so it does not simplify well when `i` is known. In that case, writing eg. `x > num16(0)` is faster than `x > 0`. */ - inline constexpr bool operator<=(int const &i) { + inline constexpr bool operator<=(int const &i) const { return (v >> 8) + ((v & 0xff) != 0) <= i; } - inline constexpr bool operator>(int const &i) { + inline constexpr bool operator>(int const &i) const { return (v >> 8) + ((v & 0xff) != 0) > i; } @@ -330,11 +344,17 @@ struct num32 v %= other.v; return *this; } + inline constexpr int ifloor() { + return v >> 16; + } inline constexpr num32 floor() { num32 x; x.v = v & 0xffff0000; return x; } + inline constexpr int iceil() { + return (v + 0xffff) >> 16; + } inline constexpr num32 ceil() { num32 x; x.v = ((v - 1) | 0xffff) + 1; @@ -345,22 +365,23 @@ struct num32 x.v = v & 0xffff; return x; } + num32 sqrt() const; /* Comparisons with int */ - inline constexpr bool operator==(int const &i) { + inline constexpr bool operator==(int const &i) const { return (int16_t)i == i && (i << 16) == v; } - inline constexpr bool operator<(int const &i) { + inline constexpr bool operator<(int const &i) const { return (v >> 16) < i; } - inline constexpr bool operator>=(int const &i) { + inline constexpr bool operator>=(int const &i) const { return (v >> 16) >= i; } - inline constexpr bool operator<=(int const &i) { + inline constexpr bool operator<=(int const &i) const { return (v >> 16) + ((v & 0xffff) != 0) <= i; } - inline constexpr bool operator>(int const &i) { + inline constexpr bool operator>(int const &i) const { return (v >> 16) + ((v & 0xffff) != 0) > i; } @@ -447,11 +468,17 @@ struct num64 v %= other.v; return *this; } + inline constexpr int ifloor() { + return v >> 32; + } inline constexpr num64 floor() { num64 x; x.v = v & 0xffffffff00000000ull; return x; } + inline constexpr int iceil() { + return (v >> 32) + ((uint32_t)v != 0); + } inline constexpr num64 ceil() { num64 x; x.v = ((v - 1) | 0xffffffffull) + 1; @@ -569,6 +596,26 @@ inline constexpr T operator-(T const &op) { return T(0) - op; } +/* Internal minima, maxima and clamp */ + +template requires(is_num) +inline constexpr T min(T const &left, T const &right) +{ + return (left < right) ? left : right; +} + +template requires(is_num) +inline constexpr T max(T const &left, T const &right) +{ + return (left > right) ? left : right; +} + +template requires(is_num) +inline constexpr T clamp(T const &val, T const &lower, T const &upper) +{ + return max(lower, min(val, upper)); +} + /* Other specific operations */ inline constexpr num32 num16::dmul(num16 const &x, num16 const &y) @@ -578,6 +625,13 @@ inline constexpr num32 num16::dmul(num16 const &x, num16 const &y) return n; } +inline constexpr num16 num16::div_positive(num16 const &x, num16 const &y) +{ + num16 r; + r.v = ((uint32_t)(uint16_t)x.v << 8) / (uint16_t)y.v; + return r; +} + inline constexpr num64 num32::dmul(num32 const &x, num32 const &y) { num64 n; @@ -585,4 +639,12 @@ inline constexpr num64 num32::dmul(num32 const &x, num32 const &y) return n; } +/* Floor modulo. We provide an optimized version for constants, which optimizes + away the main condition. */ +template requires(is_num) +inline constexpr T modf(T const &x, T const &y) { + T r = x % y; + return (r.v && (r.v ^ y.v) < 0) ? r + y : r; +} + } /* namespace libnum */ diff --git a/libnum/include/num/primitives.h b/libnum/include/num/primitives.h new file mode 100644 index 0000000..a241f4f --- /dev/null +++ b/libnum/include/num/primitives.h @@ -0,0 +1,37 @@ +//---------------------------------------------------------------------------// +// ," /\ ", Azur: A game engine for CASIO fx-CG and PC // +// | _/__\_ | Designed by Lephe' and the Planète Casio community. // +// "._`\/'_." License: MIT // +//---------------------------------------------------------------------------// +// num.primitives: Platform-specific optimized primitives +// +// This header provides a generic interface to optimized primitives such as +// unusual-size divisions. These are used in libnum functions for a performance +// boost. +// +// The following functions are defined: +// - div_X_Y_Z: Division of an X by a Y with a result of type Z (where X,Y,Z is +// a signed integer type iN or an unsigned integer type uN) +//--- + +#pragma once + +#include +#include + +namespace libnum::prim { + +#ifdef LIBNUM_ARCH_SH4ALDSP + +extern int div_i32_i16_i16(int32_t num, int16_t denum); + +#else + +static inline constexpr int div_i32_i16_i16(int32_t num, int16_t denum) +{ + return num / denum; +} + +#endif + +} /* namespace libnum::prim */ diff --git a/libnum/include/num/vec.h b/libnum/include/num/vec.h index ffd375e..b95afe5 100644 --- a/libnum/include/num/vec.h +++ b/libnum/include/num/vec.h @@ -189,6 +189,10 @@ inline constexpr vec operator*(T const &lhs, vec rhs) { rhs[i] *= lhs; return rhs; } +template +inline constexpr vec operator/(vec lhs, T const &rhs) { + return lhs /= rhs; +} /* Comparisons */ diff --git a/libnum/src/num.cpp b/libnum/src/num.cpp new file mode 100644 index 0000000..db86219 --- /dev/null +++ b/libnum/src/num.cpp @@ -0,0 +1,21 @@ +#include +using namespace libnum; + +/* Integer square root (rather slow) */ +static int64_t sqrtll(int64_t n) +{ + if(n < 4) + return (n > 0); + + int64_t low_bound = sqrtll(n / 4) * 2; + int64_t high_bound = low_bound + 1; + + return (high_bound * high_bound <= n) ? high_bound : low_bound; +} + +num32 num32::sqrt() const +{ + num32 r; + r.v = sqrtll((int64_t)v << 16); + return r; +} diff --git a/libnum/src/sh4aldsp/div_i32_i16_i16.s b/libnum/src/sh4aldsp/div_i32_i16_i16.s new file mode 100644 index 0000000..48a74dc --- /dev/null +++ b/libnum/src/sh4aldsp/div_i32_i16_i16.s @@ -0,0 +1,54 @@ +# --------------------------------------------------------------------------- # +# ," /\ ", Azur: A game engine for CASIO fx-CG and PC # +# | _/__\_ | Designed by Lephe' and the Planète Casio community. # +# "._`\/'_." License: MIT # +# --------------------------------------------------------------------------- # +# SH4AL-DSP optimized i32 / i16 -> i16 division. +# +# This simply uses the CPU's ability to divide without rotation for 16 bit +# divisors without all the boilerplate than libgcc's __sdivisi3 requires since +# it assumes 32-bit inputs. Used for num16 division. +# --- + +.global __ZN6libnum4prim15div_i32_i16_i16Els + +# libnum::prim::div_i32_i16_i16(long, short) +__ZN6libnum4prim15div_i32_i16_i16Els: + shll16 r5 + mov #0, r2 + + mov r4, r3 + rotcl r3 + + subc r2, r4 + + div0s r5, r4 + + div1 r5, r4 + div1 r5, r4 + div1 r5, r4 + div1 r5, r4 + + div1 r5, r4 + div1 r5, r4 + div1 r5, r4 + div1 r5, r4 + + div1 r5, r4 + div1 r5, r4 + div1 r5, r4 + div1 r5, r4 + + div1 r5, r4 + div1 r5, r4 + div1 r5, r4 + div1 r5, r4 + + exts.w r4, r4 + + rotcl r4 + + addc r2, r4 + + rts + exts.w r4, r0 diff --git a/libnum/test/isel.py b/libnum/test/isel.py index 9ece295..ad406f8 100644 --- a/libnum/test/isel.py +++ b/libnum/test/isel.py @@ -306,7 +306,7 @@ class ExprParser: return decorate def binaryOpsRight(ctor, ops): - return binaryOpsRight(ctor, ops, rassoc=True) + return binaryOps(ctor, ops, rassoc=True) def unaryOps(ctor, ops, assoc=True): def decorate(f):