Resync with last Azur update

2023-05-13 11:28:27 +02:00 · 2023-05-13 11:28:27 +02:00 · 406cc3739e
parent 139cd45159
commit 406cc3739e
16 changed files with 353 additions and 34 deletions
--- a/.gitignore
+++ b/.gitignore
@ -5,3 +5,5 @@

 # Editor files
 *.sublime-*
+.vscode
+
--- a/azur/CMakeLists.txt
+++ b/azur/CMakeLists.txt
@ -35,6 +35,11 @@ if(AZUR_TOOLKIT_SDL AND AZUR_GRAPHICS_OPENGL)
    DEPENDS gen_glsl.py ${ASSETS})
 endif()

+if(AZUR_TOOLKIT_GINT)
+  list(APPEND SOURCES
+    src/gint/init.cpp)
+endif()
+
 # gint rendering
 if(AZUR_GRAPHICS_GINT_CG)
  list(APPEND SOURCES
--- a/azur/include/azur/config.h.in
+++ b/azur/include/azur/config.h.in
@ -44,6 +44,14 @@
 #cmakedefine AZUR_GRAPHICS_GINT_CG


+/* CPU architecture (mostly useful for optimized SuperH stuff). */
+
+/* Generic/unknown CPU architecture; only C code is used. */
+#cmakedefine AZUR_ARCH_GENERIC
+/* SH4AL-DSP for the fx-CG. */
+#cmakedefine AZUR_ARCH_SH4ALDSP
+
+
 /* Input/output settings on terminal. */

 /* No terminal output entirely. */
--- a/azur/include/azur/gint/render.h
+++ b/azur/include/azur/gint/render.h
@ -203,14 +203,13 @@ void azrp_image(int x, int y, bopti_image_t const *image);
 void azrp_subimage(int x, int y, bopti_image_t const *image,
   int left, int top, int width, int height, int flags);

-
 void azrp_triangle(int x1, int y1, int x2, int y2, int x3, int y3, int color);

-/* Draw a line between point (x1, y1) and (x2,y2) with clipping */
-void azrp_line( int x1, int y1, int x2, int y2, int color);
+/* azrp_line(): Draw a line with clipping to the screen resolution between point (x1,y1) and (x2,y2) */
+void azrp_line( int x1, int y1, int x2, int y2, uint16_t color );

-/* Draw a circle of radius rad and center (xC, yC) */
-void azrp_circle(int xc, int yc, uint16_t rad, uint16_t color);
+/* azrp_circle() : Draw a circle with clipping to the screen resolution with a center (xc,yc) and a radius rad */
+void azrp_circle( int xc, int yx, uint16_t rad, uint16_t color );


 /* See below for more detailed image functions. Dynamic effects are provided
@ -224,8 +223,6 @@ void azrp_shader_image_rgb16_configure(void);
 void azrp_shader_image_p8_configure(void);
 void azrp_shader_image_p4_configure(void);
 void azrp_shader_triangle_configure(void);
-void azrp_shader_line_configure(void);
-void azrp_shader_circle_configure(void);

 //---
 // Performance indicators
--- a/azur/src/gint/init.cpp
+++ b/azur/src/gint/init.cpp
@ -0,0 +1,98 @@
+#include <azur/azur.h>
+#include <azur/log.h>
+#include <gint/timer.h>
+#include <gint/cpu.h>
+
+int azur_init(char const *title, int window_width, int window_height)
+{
+    (void)title;
+    (void)window_width;
+    (void)window_height;
+    return 0;
+}
+
+__attribute__((destructor))
+void azur_quit(void)
+{
+}
+
+//---
+// Main loop setup
+//---
+
+/* Time spent in the main loop (seconds)
+   TODO: Handle ml_time (also in the SDL backend) */
+static double ml_time = 0.0;
+/* Timers for render and updates */
+static int ml_timer_render = -1;
+static int ml_timer_update = -1;
+
+static int set_flag(volatile int *flag)
+{
+    *flag = 1;
+    return TIMER_CONTINUE;
+}
+
+int azur_main_loop(
+    void (*render)(void), int render_fps,
+    int (*update)(void), int update_ups,
+    int flags)
+{
+    volatile int render_tick = 1;
+    volatile int update_tick = 0;
+    bool started = false;
+
+    ml_timer_render = timer_configure(TIMER_ANY, 1000000 / render_fps,
+        GINT_CALL(set_flag, &render_tick));
+    if(ml_timer_render < 0) {
+        azlog(ERROR, "failed to create render timer\n");
+        return 1;
+    }
+    else {
+        timer_start(ml_timer_render);
+    }
+
+    if(!(flags & AZUR_MAIN_LOOP_TIED)) {
+        ml_timer_update = timer_configure(TIMER_ANY, 1000000 / update_ups,
+            GINT_CALL(set_flag, &update_tick));
+        if(ml_timer_update < 0) {
+            timer_stop(ml_timer_render);
+            azlog(ERROR, "failed to create render timer\n");
+            return 1;
+        }
+        else {
+            timer_start(ml_timer_update);
+        }
+    }
+
+    while(1) {
+        if(update_tick && !(flags & AZUR_MAIN_LOOP_TIED)) {
+            update_tick = 0;
+            if(update && update()) break;
+        }
+
+        if(render_tick) {
+            render_tick = 0;
+
+            /* Tied renders and updates */
+            if(started && (flags & AZUR_MAIN_LOOP_TIED)) {
+                if(update && update()) break;
+            }
+            if(render) render();
+            started = true;
+        }
+
+        sleep();
+    }
+
+    if(ml_timer_render >= 0) {
+        timer_stop(ml_timer_render);
+        ml_timer_render = 0;
+    }
+    if(ml_timer_update >= 0) {
+        timer_stop(ml_timer_update);
+        ml_timer_update = 0;
+    }
+
+    return 0;
+}
--- a/azur/src/gint/shaders/circle.c
+++ b/azur/src/gint/shaders/circle.c
@ -145,5 +145,4 @@ void azrp_shader_circle( void *uniforms, void *comnd, void *fragment )
    }

    cmd->curr_frag++;
-
 }
--- a/azur/src/gint/shaders/line.c
+++ b/azur/src/gint/shaders/line.c
@ -40,7 +40,7 @@ int SGN( int x1 )
    else return -1;
 }

-void azrp_line(int xA, int yA, int xB, int yB, int color)
+void azrp_line(int xA, int yA, int xB, int yB, uint16_t color)
 {
    prof_enter(azrp_perf_cmdgen);

@ -199,4 +199,4 @@ void azrp_shader_line( void *uniforms, void *comnd, void *fragment )
    cmd->curr_y = cmd->curr_y & 15;

    cmd->i = i+1;
-}
+}
--- a/azur/src/gl/init.cpp
+++ b/azur/src/gl/init.cpp
@ -134,7 +134,8 @@ SDL_Window *azur_sdl_window(void)
 // Main loop setup
 //---

-/* Time spent in the main loop (seconds) */
+/* Time spent in the main loop (seconds)
+   TODO: Handle ml_time (also in the gint backend) */
 static double ml_time = 0.0;

 /* In emscripten, callbacks are void/void, vsync is always ON, and the
--- a/libnum/CMakeLists.txt
+++ b/libnum/CMakeLists.txt
@ -7,15 +7,31 @@

 include(CTest)

-add_library(num STATIC
+set(SOURCES
+  src/num.cpp
  src/str.cpp)

-target_include_directories(num PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include")
+if(DEFINED AZUR_ARCH_GENERIC)
+  set(LIBNUM_ARCH_GENERIC TRUE)
+elseif(DEFINED AZUR_ARCH_SH4ALDSP)
+  set(LIBNUM_ARCH_SH4ALDSP TRUE)
+  list(APPEND SOURCES
+    src/sh4aldsp/div_i32_i16_i16.s)
+endif()
+
+configure_file(include/num/config.h.in num/config.h)
+
+add_library(num STATIC ${SOURCES})
+target_include_directories(num PUBLIC
+  "${CMAKE_CURRENT_SOURCE_DIR}/include"
+  "${CMAKE_CURRENT_BINARY_DIR}")

 # Library file: libnum.a
 install(TARGETS num DESTINATION ${LIBDIR})
-# Headers: azur/*.h
+# Headers: num/config.h and num/*.h
 install(DIRECTORY include/ DESTINATION ${INCDIR})
+install(FILES "${CMAKE_CURRENT_BINARY_DIR}/num/config.h"
+        DESTINATION ${INCDIR}/num)

 #---
 # Unit tests
--- a/libnum/include/num/config.h.in
+++ b/libnum/include/num/config.h.in
@ -0,0 +1,15 @@
+//---------------------------------------------------------------------------//
+//  ,"  /\  ",    Azur: A game engine for CASIO fx-CG and PC                 //
+// |  _/__\_  |   Designed by Lephe' and the Planète Casio community.        //
+//  "._`\/'_."    License: MIT <https://opensource.org/licenses/MIT>         //
+//---------------------------------------------------------------------------//
+// num.config: Compile-time configuration options
+
+#pragma once
+
+/* CPU architecture. */
+
+/* Generic/unknown CPU architecture; only C code is used. */
+#cmakedefine LIBNUM_ARCH_GENERIC
+/* SH4AL-DSP for the fx-CG. */
+#cmakedefine LIBNUM_ARCH_SH4ALDSP
--- a/libnum/include/num/num.h
+++ b/libnum/include/num/num.h
@ -48,9 +48,8 @@

 #pragma once

-#include <cstdint>
+#include <num/primitives.h>
 #include <cstddef>
-
 #include <type_traits>
 #include <concepts>

@ -117,9 +116,15 @@ struct num8
        v %= other.v;
        return *this;
    }
+    inline constexpr num8 ifloor() {
+        return 0;
+    }
    inline constexpr num8 floor() {
        return num8(0);
    }
+    inline constexpr num8 iceil() {
+        return v != 0;
+    }
    /* Warning: num8::ceil() always overflows! */
    inline constexpr num8 ceil() {
        return num8(0);
@ -132,19 +137,19 @@ struct num8

    /* Comparisons with int */

-    inline constexpr bool operator==(int const &i) {
+    inline constexpr bool operator==(int const &i) const {
        return (v | i) == 0;
    }
-    inline constexpr bool operator<(int const &i) {
+    inline constexpr bool operator<(int const &i) const {
        return i >= 1;
    }
-    inline constexpr bool operator>=(int const &i) {
+    inline constexpr bool operator>=(int const &i) const {
        return i <= 0;
    }
-    inline constexpr bool operator<=(int const &i) {
+    inline constexpr bool operator<=(int const &i) const {
        return i + !v > 0;
    }
-    inline constexpr bool operator>(int const &i) {
+    inline constexpr bool operator>(int const &i) const {
        return i + !v <= 0;
    }

@ -195,6 +200,9 @@ struct num16
    /* num16 x num16 -> num32 multiplication
       This is efficiently implemented with a muls.w instruction. */
    static constexpr num32 dmul(num16 const &x, num16 const &y);
+    /* num16 / num16 -> num16 division for positive numbers
+       This bypasses some sign tests, which saves a bit of time. */
+    static constexpr num16 div_positive(num16 const &x, num16 const &y);

    /* Basic arithmetic */

@ -210,19 +218,25 @@ struct num16
        v = (v * other.v) / 256;
        return *this;
    }
-    inline constexpr num16 &operator/=(num16 const &other) {
-        v = (v * 256) / other.v;
+    inline num16 &operator/=(num16 const &other) {
+        v = prim::div_i32_i16_i16(v * 256, other.v);
        return *this;
    }
    inline constexpr num16 &operator%=(num16 const &other) {
        v %= other.v;
        return *this;
    }
+    inline constexpr int ifloor() {
+        return v >> 8;
+    }
    inline constexpr num16 floor() {
        num16 x;
        x.v = v & 0xff00;
        return x;
    }
+    inline constexpr int iceil() {
+        return (v + 0xff) >> 8;
+    }
    inline constexpr num16 ceil() {
        num16 x;
        x.v = ((v - 1) | 0xff) + 1;
@ -236,22 +250,22 @@ struct num16

    /* Comparisons with int */

-    inline constexpr bool operator==(int const &i) {
+    inline constexpr bool operator==(int const &i) const {
        return (int16_t)i == i && (i << 8) == v;
    }
-    inline constexpr bool operator<(int const &i) {
+    inline constexpr bool operator<(int const &i) const {
        return (v >> 8) < i;
    }
-    inline constexpr bool operator>=(int const &i) {
+    inline constexpr bool operator>=(int const &i) const {
        return (v >> 8) >= i;
    }
    /* Unfortunately the branchless version for this test is expressed in terms
       of `v`, not `i`, so it does not simplify well when `i` is known. In that
       case, writing eg. `x > num16(0)` is faster than `x > 0`. */
-    inline constexpr bool operator<=(int const &i) {
+    inline constexpr bool operator<=(int const &i) const {
        return (v >> 8) + ((v & 0xff) != 0) <= i;
    }
-    inline constexpr bool operator>(int const &i) {
+    inline constexpr bool operator>(int const &i) const {
        return (v >> 8) + ((v & 0xff) != 0) > i;
    }

@ -330,11 +344,17 @@ struct num32
        v %= other.v;
        return *this;
    }
+    inline constexpr int ifloor() {
+        return v >> 16;
+    }
    inline constexpr num32 floor() {
        num32 x;
        x.v = v & 0xffff0000;
        return x;
    }
+    inline constexpr int iceil() {
+        return (v + 0xffff) >> 16;
+    }
    inline constexpr num32 ceil() {
        num32 x;
        x.v = ((v - 1) | 0xffff) + 1;
@ -345,22 +365,23 @@ struct num32
        x.v = v & 0xffff;
        return x;
    }
+    num32 sqrt() const;

    /* Comparisons with int */

-    inline constexpr bool operator==(int const &i) {
+    inline constexpr bool operator==(int const &i) const {
        return (int16_t)i == i && (i << 16) == v;
    }
-    inline constexpr bool operator<(int const &i) {
+    inline constexpr bool operator<(int const &i) const {
        return (v >> 16) < i;
    }
-    inline constexpr bool operator>=(int const &i) {
+    inline constexpr bool operator>=(int const &i) const {
        return (v >> 16) >= i;
    }
-    inline constexpr bool operator<=(int const &i) {
+    inline constexpr bool operator<=(int const &i) const {
        return (v >> 16) + ((v & 0xffff) != 0) <= i;
    }
-    inline constexpr bool operator>(int const &i) {
+    inline constexpr bool operator>(int const &i) const {
        return (v >> 16) + ((v & 0xffff) != 0) > i;
    }

@ -447,11 +468,17 @@ struct num64
        v %= other.v;
        return *this;
    }
+    inline constexpr int ifloor() {
+        return v >> 32;
+    }
    inline constexpr num64 floor() {
        num64 x;
        x.v = v & 0xffffffff00000000ull;
        return x;
    }
+    inline constexpr int iceil() {
+        return (v >> 32) + ((uint32_t)v != 0);
+    }
    inline constexpr num64 ceil() {
        num64 x;
        x.v = ((v - 1) | 0xffffffffull) + 1;
@ -569,6 +596,26 @@ inline constexpr T operator-(T const &op) {
    return T(0) - op;
 }

+/* Internal minima, maxima and clamp */
+
+template<typename T> requires(is_num<T>)
+inline constexpr T min(T const &left, T const &right)
+{
+    return (left < right) ? left : right;
+}
+
+template<typename T> requires(is_num<T>)
+inline constexpr T max(T const &left, T const &right)
+{
+    return (left > right) ? left : right;
+}
+
+template<typename T> requires(is_num<T>)
+inline constexpr T clamp(T const &val, T const &lower, T const &upper)
+{
+    return max(lower, min(val, upper));
+}
+
 /* Other specific operations */

 inline constexpr num32 num16::dmul(num16 const &x, num16 const &y)
@ -578,6 +625,13 @@ inline constexpr num32 num16::dmul(num16 const &x, num16 const &y)
    return n;
 }

+inline constexpr num16 num16::div_positive(num16 const &x, num16 const &y)
+{
+    num16 r;
+    r.v = ((uint32_t)(uint16_t)x.v << 8) / (uint16_t)y.v;
+    return r;
+}
+
 inline constexpr num64 num32::dmul(num32 const &x, num32 const &y)
 {
    num64 n;
@ -585,4 +639,12 @@ inline constexpr num64 num32::dmul(num32 const &x, num32 const &y)
    return n;
 }

+/* Floor modulo. We provide an optimized version for constants, which optimizes
+   away the main condition. */
+template<typename T> requires(is_num<T>)
+inline constexpr T modf(T const &x, T const &y) {
+    T r = x % y;
+    return (r.v && (r.v ^ y.v) < 0) ? r + y : r;
+}
+
 } /* namespace libnum */
--- a/libnum/include/num/primitives.h
+++ b/libnum/include/num/primitives.h
@ -0,0 +1,37 @@
+//---------------------------------------------------------------------------//
+//  ,"  /\  ",    Azur: A game engine for CASIO fx-CG and PC                 //
+// |  _/__\_  |   Designed by Lephe' and the Planète Casio community.        //
+//  "._`\/'_."    License: MIT <https://opensource.org/licenses/MIT>         //
+//---------------------------------------------------------------------------//
+// num.primitives: Platform-specific optimized primitives
+//
+// This header provides a generic interface to optimized primitives such as
+// unusual-size divisions. These are used in libnum functions for a performance
+// boost.
+//
+// The following functions are defined:
+// - div_X_Y_Z: Division of an X by a Y with a result of type Z (where X,Y,Z is
+//              a signed integer type iN or an unsigned integer type uN)
+//---
+
+#pragma once
+
+#include <num/config.h>
+#include <cstdint>
+
+namespace libnum::prim {
+
+#ifdef LIBNUM_ARCH_SH4ALDSP
+
+extern int div_i32_i16_i16(int32_t num, int16_t denum);
+
+#else
+
+static inline constexpr int div_i32_i16_i16(int32_t num, int16_t denum)
+{
+    return num / denum;
+}
+
+#endif
+
+} /* namespace libnum::prim */
--- a/libnum/include/num/vec.h
+++ b/libnum/include/num/vec.h
@ -189,6 +189,10 @@ inline constexpr vec<T,N> operator*(T const &lhs, vec<T,N> rhs) {
        rhs[i] *= lhs;
    return rhs;
 }
+template<typename T, int N>
+inline constexpr vec<T,N> operator/(vec<T,N> lhs, T const &rhs) {
+    return lhs /= rhs;
+}

 /* Comparisons */

--- a/libnum/src/num.cpp
+++ b/libnum/src/num.cpp
@ -0,0 +1,21 @@
+#include <num/num.h>
+using namespace libnum;
+
+/* Integer square root (rather slow) */
+static int64_t sqrtll(int64_t n)
+{
+    if(n < 4)
+        return (n > 0);
+
+    int64_t low_bound = sqrtll(n / 4) * 2;
+    int64_t high_bound = low_bound + 1;
+
+    return (high_bound * high_bound <= n) ? high_bound : low_bound;
+}
+
+num32 num32::sqrt() const
+{
+    num32 r;
+    r.v = sqrtll((int64_t)v << 16);
+    return r;
+}
--- a/libnum/src/sh4aldsp/div_i32_i16_i16.s
+++ b/libnum/src/sh4aldsp/div_i32_i16_i16.s
@ -0,0 +1,54 @@
+# --------------------------------------------------------------------------- #
+#  ,"  /\  ",    Azur: A game engine for CASIO fx-CG and PC                   #
+# |  _/__\_  |   Designed by Lephe' and the Planète Casio community.          #
+#  "._`\/'_."    License: MIT <https://opensource.org/licenses/MIT>           #
+# --------------------------------------------------------------------------- #
+# SH4AL-DSP optimized i32 / i16 -> i16 division.
+#
+# This simply uses the CPU's ability to divide without rotation for 16 bit
+# divisors without all the boilerplate than libgcc's __sdivisi3 requires since
+# it assumes 32-bit inputs. Used for num16 division.
+# ---
+
+.global __ZN6libnum4prim15div_i32_i16_i16Els
+
+# libnum::prim::div_i32_i16_i16(long, short)
+__ZN6libnum4prim15div_i32_i16_i16Els:
+	shll16	r5
+	mov	#0, r2
+
+	mov	r4, r3
+	rotcl	r3
+
+	subc	r2, r4
+
+	div0s	r5, r4
+
+	div1	r5, r4
+	div1	r5, r4
+	div1	r5, r4
+	div1	r5, r4
+
+	div1	r5, r4
+	div1	r5, r4
+	div1	r5, r4
+	div1	r5, r4
+
+	div1	r5, r4
+	div1	r5, r4
+	div1	r5, r4
+	div1	r5, r4
+
+	div1	r5, r4
+	div1	r5, r4
+	div1	r5, r4
+	div1	r5, r4
+
+	exts.w	r4, r4
+
+	rotcl	r4
+
+	addc	r2, r4
+
+	rts
+	exts.w	r4, r0
--- a/libnum/test/isel.py
+++ b/libnum/test/isel.py
@ -306,7 +306,7 @@ class ExprParser:
        return decorate

    def binaryOpsRight(ctor, ops):
-        return binaryOpsRight(ctor, ops, rassoc=True)
+        return binaryOps(ctor, ops, rassoc=True)

    def unaryOps(ctor, ops, assoc=True):
        def decorate(f):