libnum: add optimized num16 division for SH4AL-DSP

This commit is contained in:
Lephenixnoir 2023-04-29 12:14:09 +02:00
parent a7529430dd
commit 6eeef0fce9
Signed by: Lephenixnoir
GPG Key ID: 1BBA026E13FC0495
7 changed files with 138 additions and 7 deletions

View File

@ -67,6 +67,7 @@ if(AZUR_PLATFORM STREQUAL gint)
set(AZUR_PLATFORM_GENERIC TRUE)
set(AZUR_TOOLKIT_GINT TRUE)
set(AZUR_GRAPHICS_GINT_CG TRUE)
set(AZUR_ARCH_SH4ALDSP TRUE)
set(AZUR_TERMINAL_NONE TRUE)
# Link with gint and libprof (we only do this to get include directories)
@ -84,6 +85,7 @@ if(AZUR_PLATFORM STREQUAL linux)
set(AZUR_PLATFORM_GENERIC TRUE)
set(AZUR_TOOLKIT_SDL TRUE)
set(AZUR_GRAPHICS_OPENGL_3_3 TRUE)
set(AZUR_ARCH_GENERIC TRUE)
set(AZUR_TERMINAL_ANSI TRUE)
# Once again, we link only for the include paths
@ -99,6 +101,7 @@ if(AZUR_PLATFORM STREQUAL emscripten)
set(AZUR_PLATFORM_EMSCRIPTEN TRUE)
set(AZUR_TOOLKIT_SDL TRUE)
set(AZUR_GRAPHICS_OPENGL_ES_2_0 TRUE)
set(AZUR_ARCH_GENERIC TRUE)
set(AZUR_TERMINAL_PLAIN TRUE)
set(PORTS -sUSE_SDL=2 -sUSE_SDL_IMAGE=2 -sSDL2_IMAGE_FORMATS=["png"])

View File

@ -44,6 +44,14 @@
#cmakedefine AZUR_GRAPHICS_GINT_CG
/* CPU architecture (mostly useful for optimized SuperH stuff). */
/* Generic/unknown CPU architecture; only C code is used. */
#cmakedefine AZUR_ARCH_GENERIC
/* SH4AL-DSP for the fx-CG. */
#cmakedefine AZUR_ARCH_SH4ALDSP
/* Input/output settings on terminal. */
/* No terminal output entirely. */

View File

@ -7,16 +7,31 @@
include(CTest)
add_library(num STATIC
set(SOURCES
src/num.cpp
src/str.cpp)
target_include_directories(num PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include")
if(DEFINED AZUR_ARCH_GENERIC)
set(LIBNUM_ARCH_GENERIC TRUE)
elseif(DEFINED AZUR_ARCH_SH4ALDSP)
set(LIBNUM_ARCH_SH4ALDSP TRUE)
list(APPEND SOURCES
src/sh4aldsp/div_i32_i16_i16.s)
endif()
configure_file(include/num/config.h.in num/config.h)
add_library(num STATIC ${SOURCES})
target_include_directories(num PUBLIC
"${CMAKE_CURRENT_SOURCE_DIR}/include"
"${CMAKE_CURRENT_BINARY_DIR}")
# Library file: libnum.a
install(TARGETS num DESTINATION ${LIBDIR})
# Headers: azur/*.h
# Headers: num/config.h and num/*.h
install(DIRECTORY include/ DESTINATION ${INCDIR})
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/num/config.h"
DESTINATION ${INCDIR}/num)
#---
# Unit tests

View File

@ -0,0 +1,15 @@
//---------------------------------------------------------------------------//
// ," /\ ", Azur: A game engine for CASIO fx-CG and PC //
// | _/__\_ | Designed by Lephe' and the Planète Casio community. //
// "._`\/'_." License: MIT <https://opensource.org/licenses/MIT> //
//---------------------------------------------------------------------------//
// num.config: Compile-time configuration options
#pragma once
/* CPU architecture. */
/* Generic/unknown CPU architecture; only C code is used. */
#cmakedefine LIBNUM_ARCH_GENERIC
/* SH4AL-DSP for the fx-CG. */
#cmakedefine LIBNUM_ARCH_SH4ALDSP

View File

@ -48,9 +48,8 @@
#pragma once
#include <cstdint>
#include <num/primitives.h>
#include <cstddef>
#include <type_traits>
#include <concepts>
@ -219,8 +218,8 @@ struct num16
v = (v * other.v) / 256;
return *this;
}
inline constexpr num16 &operator/=(num16 const &other) {
v = (v * 256) / other.v;
inline num16 &operator/=(num16 const &other) {
v = prim::div_i32_i16_i16(v * 256, other.v);
return *this;
}
inline constexpr num16 &operator%=(num16 const &other) {

View File

@ -0,0 +1,37 @@
//---------------------------------------------------------------------------//
// ," /\ ", Azur: A game engine for CASIO fx-CG and PC //
// | _/__\_ | Designed by Lephe' and the Planète Casio community. //
// "._`\/'_." License: MIT <https://opensource.org/licenses/MIT> //
//---------------------------------------------------------------------------//
// num.primitives: Platform-specific optimized primitives
//
// This header provides a generic interface to optimized primitives such as
// unusual-size divisions. These are used in libnum functions for a performance
// boost.
//
// The following functions are defined:
// - div_X_Y_Z: Division of an X by a Y with a result of type Z (where X,Y,Z is
// a signed integer type iN or an unsigned integer type uN)
//---
#pragma once
#include <num/config.h>
#include <cstdint>
namespace libnum::prim {
#ifdef LIBNUM_ARCH_SH4ALDSP
extern int div_i32_i16_i16(int32_t num, int16_t denum);
#else
static inline constexpr int div_i32_i16_i16(int32_t num, int16_t denum)
{
return num / denum;
}
#endif
} /* namespace libnum::prim */

View File

@ -0,0 +1,54 @@
# --------------------------------------------------------------------------- #
# ," /\ ", Azur: A game engine for CASIO fx-CG and PC #
# | _/__\_ | Designed by Lephe' and the Planète Casio community. #
# "._`\/'_." License: MIT <https://opensource.org/licenses/MIT> #
# --------------------------------------------------------------------------- #
# SH4AL-DSP optimized i32 / i16 -> i16 division.
#
# This simply uses the CPU's ability to divide without rotation for 16 bit
# divisors without all the boilerplate than libgcc's __sdivisi3 requires since
# it assumes 32-bit inputs. Used for num16 division.
# ---
.global __ZN6libnum4prim15div_i32_i16_i16Els
# libnum::prim::div_i32_i16_i16(long, short)
__ZN6libnum4prim15div_i32_i16_i16Els:
shll16 r5
mov #0, r2
mov r4, r3
rotcl r3
subc r2, r4
div0s r5, r4
div1 r5, r4
div1 r5, r4
div1 r5, r4
div1 r5, r4
div1 r5, r4
div1 r5, r4
div1 r5, r4
div1 r5, r4
div1 r5, r4
div1 r5, r4
div1 r5, r4
div1 r5, r4
div1 r5, r4
div1 r5, r4
div1 r5, r4
div1 r5, r4
exts.w r4, r4
rotcl r4
addc r2, r4
rts
exts.w r4, r0