libnum: add optimized num16 division for SH4AL-DSP
This commit is contained in:
parent
a7529430dd
commit
6eeef0fce9
|
@ -67,6 +67,7 @@ if(AZUR_PLATFORM STREQUAL gint)
|
|||
set(AZUR_PLATFORM_GENERIC TRUE)
|
||||
set(AZUR_TOOLKIT_GINT TRUE)
|
||||
set(AZUR_GRAPHICS_GINT_CG TRUE)
|
||||
set(AZUR_ARCH_SH4ALDSP TRUE)
|
||||
set(AZUR_TERMINAL_NONE TRUE)
|
||||
|
||||
# Link with gint and libprof (we only do this to get include directories)
|
||||
|
@ -84,6 +85,7 @@ if(AZUR_PLATFORM STREQUAL linux)
|
|||
set(AZUR_PLATFORM_GENERIC TRUE)
|
||||
set(AZUR_TOOLKIT_SDL TRUE)
|
||||
set(AZUR_GRAPHICS_OPENGL_3_3 TRUE)
|
||||
set(AZUR_ARCH_GENERIC TRUE)
|
||||
set(AZUR_TERMINAL_ANSI TRUE)
|
||||
|
||||
# Once again, we link only for the include paths
|
||||
|
@ -99,6 +101,7 @@ if(AZUR_PLATFORM STREQUAL emscripten)
|
|||
set(AZUR_PLATFORM_EMSCRIPTEN TRUE)
|
||||
set(AZUR_TOOLKIT_SDL TRUE)
|
||||
set(AZUR_GRAPHICS_OPENGL_ES_2_0 TRUE)
|
||||
set(AZUR_ARCH_GENERIC TRUE)
|
||||
set(AZUR_TERMINAL_PLAIN TRUE)
|
||||
|
||||
set(PORTS -sUSE_SDL=2 -sUSE_SDL_IMAGE=2 -sSDL2_IMAGE_FORMATS=["png"])
|
||||
|
|
|
@ -44,6 +44,14 @@
|
|||
#cmakedefine AZUR_GRAPHICS_GINT_CG
|
||||
|
||||
|
||||
/* CPU architecture (mostly useful for optimized SuperH stuff). */
|
||||
|
||||
/* Generic/unknown CPU architecture; only C code is used. */
|
||||
#cmakedefine AZUR_ARCH_GENERIC
|
||||
/* SH4AL-DSP for the fx-CG. */
|
||||
#cmakedefine AZUR_ARCH_SH4ALDSP
|
||||
|
||||
|
||||
/* Input/output settings on terminal. */
|
||||
|
||||
/* No terminal output entirely. */
|
||||
|
|
|
@ -7,16 +7,31 @@
|
|||
|
||||
include(CTest)
|
||||
|
||||
add_library(num STATIC
|
||||
set(SOURCES
|
||||
src/num.cpp
|
||||
src/str.cpp)
|
||||
|
||||
target_include_directories(num PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include")
|
||||
if(DEFINED AZUR_ARCH_GENERIC)
|
||||
set(LIBNUM_ARCH_GENERIC TRUE)
|
||||
elseif(DEFINED AZUR_ARCH_SH4ALDSP)
|
||||
set(LIBNUM_ARCH_SH4ALDSP TRUE)
|
||||
list(APPEND SOURCES
|
||||
src/sh4aldsp/div_i32_i16_i16.s)
|
||||
endif()
|
||||
|
||||
configure_file(include/num/config.h.in num/config.h)
|
||||
|
||||
add_library(num STATIC ${SOURCES})
|
||||
target_include_directories(num PUBLIC
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/include"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}")
|
||||
|
||||
# Library file: libnum.a
|
||||
install(TARGETS num DESTINATION ${LIBDIR})
|
||||
# Headers: azur/*.h
|
||||
# Headers: num/config.h and num/*.h
|
||||
install(DIRECTORY include/ DESTINATION ${INCDIR})
|
||||
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/num/config.h"
|
||||
DESTINATION ${INCDIR}/num)
|
||||
|
||||
#---
|
||||
# Unit tests
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
//---------------------------------------------------------------------------//
|
||||
// ," /\ ", Azur: A game engine for CASIO fx-CG and PC //
|
||||
// | _/__\_ | Designed by Lephe' and the Planète Casio community. //
|
||||
// "._`\/'_." License: MIT <https://opensource.org/licenses/MIT> //
|
||||
//---------------------------------------------------------------------------//
|
||||
// num.config: Compile-time configuration options
|
||||
|
||||
#pragma once
|
||||
|
||||
/* CPU architecture. */
|
||||
|
||||
/* Generic/unknown CPU architecture; only C code is used. */
|
||||
#cmakedefine LIBNUM_ARCH_GENERIC
|
||||
/* SH4AL-DSP for the fx-CG. */
|
||||
#cmakedefine LIBNUM_ARCH_SH4ALDSP
|
|
@ -48,9 +48,8 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <num/primitives.h>
|
||||
#include <cstddef>
|
||||
|
||||
#include <type_traits>
|
||||
#include <concepts>
|
||||
|
||||
|
@ -219,8 +218,8 @@ struct num16
|
|||
v = (v * other.v) / 256;
|
||||
return *this;
|
||||
}
|
||||
inline constexpr num16 &operator/=(num16 const &other) {
|
||||
v = (v * 256) / other.v;
|
||||
inline num16 &operator/=(num16 const &other) {
|
||||
v = prim::div_i32_i16_i16(v * 256, other.v);
|
||||
return *this;
|
||||
}
|
||||
inline constexpr num16 &operator%=(num16 const &other) {
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
//---------------------------------------------------------------------------//
|
||||
// ," /\ ", Azur: A game engine for CASIO fx-CG and PC //
|
||||
// | _/__\_ | Designed by Lephe' and the Planète Casio community. //
|
||||
// "._`\/'_." License: MIT <https://opensource.org/licenses/MIT> //
|
||||
//---------------------------------------------------------------------------//
|
||||
// num.primitives: Platform-specific optimized primitives
|
||||
//
|
||||
// This header provides a generic interface to optimized primitives such as
|
||||
// unusual-size divisions. These are used in libnum functions for a performance
|
||||
// boost.
|
||||
//
|
||||
// The following functions are defined:
|
||||
// - div_X_Y_Z: Division of an X by a Y with a result of type Z (where X,Y,Z is
|
||||
// a signed integer type iN or an unsigned integer type uN)
|
||||
//---
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <num/config.h>
|
||||
#include <cstdint>
|
||||
|
||||
namespace libnum::prim {
|
||||
|
||||
#ifdef LIBNUM_ARCH_SH4ALDSP
|
||||
|
||||
extern int div_i32_i16_i16(int32_t num, int16_t denum);
|
||||
|
||||
#else
|
||||
|
||||
static inline constexpr int div_i32_i16_i16(int32_t num, int16_t denum)
|
||||
{
|
||||
return num / denum;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} /* namespace libnum::prim */
|
|
@ -0,0 +1,54 @@
|
|||
# --------------------------------------------------------------------------- #
|
||||
# ," /\ ", Azur: A game engine for CASIO fx-CG and PC #
|
||||
# | _/__\_ | Designed by Lephe' and the Planète Casio community. #
|
||||
# "._`\/'_." License: MIT <https://opensource.org/licenses/MIT> #
|
||||
# --------------------------------------------------------------------------- #
|
||||
# SH4AL-DSP optimized i32 / i16 -> i16 division.
|
||||
#
|
||||
# This simply uses the CPU's ability to divide without rotation for 16 bit
|
||||
# divisors without all the boilerplate than libgcc's __sdivisi3 requires since
|
||||
# it assumes 32-bit inputs. Used for num16 division.
|
||||
# ---
|
||||
|
||||
.global __ZN6libnum4prim15div_i32_i16_i16Els
|
||||
|
||||
# libnum::prim::div_i32_i16_i16(long, short)
|
||||
__ZN6libnum4prim15div_i32_i16_i16Els:
|
||||
shll16 r5
|
||||
mov #0, r2
|
||||
|
||||
mov r4, r3
|
||||
rotcl r3
|
||||
|
||||
subc r2, r4
|
||||
|
||||
div0s r5, r4
|
||||
|
||||
div1 r5, r4
|
||||
div1 r5, r4
|
||||
div1 r5, r4
|
||||
div1 r5, r4
|
||||
|
||||
div1 r5, r4
|
||||
div1 r5, r4
|
||||
div1 r5, r4
|
||||
div1 r5, r4
|
||||
|
||||
div1 r5, r4
|
||||
div1 r5, r4
|
||||
div1 r5, r4
|
||||
div1 r5, r4
|
||||
|
||||
div1 r5, r4
|
||||
div1 r5, r4
|
||||
div1 r5, r4
|
||||
div1 r5, r4
|
||||
|
||||
exts.w r4, r4
|
||||
|
||||
rotcl r4
|
||||
|
||||
addc r2, r4
|
||||
|
||||
rts
|
||||
exts.w r4, r0
|
Loading…
Reference in New Issue