libnum: initial commit

2022-06-02 21:18:28 +01:00 · 2022-06-02 21:18:28 +01:00 · bf5db2a0f0
parent b0ca2a87a5
commit bf5db2a0f0
4 changed files with 472 additions and 0 deletions
--- a/libnum/CMakeLists.txt
+++ b/libnum/CMakeLists.txt
@ -0,0 +1,18 @@
+cmake_minimum_required(VERSION 3.15)
+project(libnum VERSION 0.1)
+
+# Most of the code is in the headers
+add_library(num STATIC src/static_checks.cpp test.cpp)
+
+target_include_directories(num
+  PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/include")
+target_compile_options(num PRIVATE -std=c++17)
+
+#---
+# Install
+#---
+
+# Library file: libnum.a
+install(TARGETS num DESTINATION ${LIBDIR})
+# Headers: azur/*.h
+install(DIRECTORY include/ DESTINATION ${INCDIR})
--- a/libnum/include/num/num.h
+++ b/libnum/include/num/num.h
@ -0,0 +1,420 @@
+//---
+// num.num: Fixed-point numerical types
+//
+// This header provides numerical types of various fixed-point sizes. The base
+// type num is num32, and other data structures outside of this header
+// (vectors, matrices, etc.) normally only use num. Other types are useful for
+// storage and sometimes intermediate computation steps.
+//---
+
+/* TODO: Conversion with float/double: use the binary format efficiently
+   General idea for a num -> fp conversion:
+   1. Start with mantissa=num_value, exponent=num_fixed_position
+   2. Decrease exponent and shift mantissa until top bit is 1, then shift again
+   3. Generate the floating-point value
+   General idea for an fp -> num conversion:
+   1. Literally just shift mantissa by exponent - num_fixed_position */
+
+#pragma once
+
+#include <cstdint>
+#include <cstddef>
+
+#include <type_traits>
+
+namespace libnum {
+
+struct num8;
+struct num16;
+struct num32;
+struct num64;
+
+using num = num32;
+
+/* num8: unsigned 0:8 fixed-point type
+   * Size:        8 bits (1 byte)
+   * Range:       0.0 (0x00) ... 0.996094 (0xff)
+   * Precision:   0.0039 (1/256)
+   * Represents:  <integer value> / 256
+
+   This type is useful to store values of low precision in the 0..1 range. The
+   value 1 cannot be represented, but it can sometimes be handled as a special
+   case (interpolation curves) or emitted entirely (restricting the range). */
+struct num8
+{
+    uint8_t v;
+
+    inline constexpr num8(): v(0) {}
+    /* Convert from int; pretty useless, but for completeness. */
+    inline constexpr num8(int): v(0) {}
+    /* Convert from float */
+    inline constexpr num8(float f): v(f * 256) {}
+    /* Convert from double */
+    inline constexpr num8(double d): v(d * 256) {}
+    /* Convert from other num types */
+    inline constexpr explicit num8(num16 n);
+    inline constexpr explicit num8(num32 n);
+    inline constexpr explicit num8(num64 n);
+
+    /* Convert to int; equally useless, but then again... */
+    inline constexpr explicit operator int() { return 0; }
+    /* Convert to float */
+    inline constexpr explicit operator float() { return (float)v / 256; }
+    /* Convert to double */
+    inline constexpr explicit operator double() { return (double)v / 256; }
+
+    /* Basic arithmetic */
+
+    inline constexpr num8 &operator+=(num8 const &other) {
+        v += other.v;
+        return *this;
+    }
+    inline constexpr num8 &operator-=(num8 const &other) {
+        v -= other.v;
+        return *this;
+    }
+    inline constexpr num8 &operator*=(num8 const &other) {
+        v = (v * other.v) >> 8;
+        return *this;
+    }
+    inline constexpr num8 &operator/=(num8 const &other) {
+        v = (v * 256) / other.v;
+        return *this;
+    }
+    inline constexpr num8 &operator%=(num8 const &other) {
+        v %= other.v;
+        return *this;
+    }
+};
+
+/* num16: Signed 8:8 fixed-point type
+   * Size:        16 bits (2 bytes)
+   * Range:       -128.0 (0x8000) ... 127.996094 (0x7fff)
+   * Precision:   0.0039 (1/256)
+   * Represents:  <integer value> / 256
+
+   This type is useful to store numeric parameters that have a limited range.
+   Using it in actual computations requires sign-extensions, but it is useful
+   in multiplications because the 16-bit multiplication (muls.w) takes only 1
+   cycle, and the num16 x num16 -> num32 result is immediately available. */
+struct num16
+{
+    int16_t v;
+
+    inline constexpr num16(): v(0) {}
+    /* Convert from int */
+    inline constexpr num16(int i): v(i * 256) {}
+    /* Convert from float */
+    inline constexpr num16(float f): v(f * 256) {}
+    /* Convert from double */
+    inline constexpr num16(double d): v(d * 256) {}
+    /* Convert from other num types */
+    inline constexpr explicit num16(num8 n);
+    inline constexpr explicit num16(num32 n);
+    inline constexpr explicit num16(num64 n);
+
+    /* Convert to int */
+    inline constexpr explicit operator int() { return v >> 8; }
+    /* Convert to float */
+    inline constexpr explicit operator float() { return (float)v / 256; }
+    /* Convert to double */
+    inline constexpr explicit operator double() { return (double)v / 256; }
+
+    /* num16 x num16 -> num32 multiplication
+       This is efficiently implemented with a muls.l instruction. */
+    static constexpr num32 dmul(num16 const &x, num16 const &y);
+
+    /* Basic arithmetic */
+
+    inline constexpr num16 &operator+=(num16 const &other) {
+        v += other.v;
+        return *this;
+    }
+    inline constexpr num16 &operator-=(num16 const &other) {
+        v -= other.v;
+        return *this;
+    }
+    inline constexpr num16 &operator*=(num16 const &other) {
+        v = (v * other.v) >> 8;
+        return *this;
+    }
+    inline constexpr num16 &operator/=(num16 const &other) {
+        v = (v * 256) / other.v;
+        return *this;
+    }
+    inline constexpr num16 &operator%=(num16 const &other) {
+        v %= other.v;
+        return *this;
+    }
+};
+
+/* num32: Signed 16:16 fixed-point type
+   * Size:        32 bits (4 bytes)
+   * Range:       -32768.0 (0x80000000) ... 32767.999985 (0x7fffffff)
+   * Precision:   0.000015 (1/65536)
+   * Represents:  <integer value> / 65536
+
+   This is the ubiquitous fixed-point type in this library, most functions and
+   types use it. It can be used pretty freely in ways similar to a float, with
+   the important drawback that overflows are very possible. */
+struct num32
+{
+    int32_t v;
+
+    inline constexpr num32(): v(0) {}
+    /* Convert from int */
+    inline constexpr num32(int i): v(i * 65536) {}
+    /* Convert from float */
+    inline constexpr num32(float f): v(f * 65536) {}
+    /* Convert from double */
+    inline constexpr num32(double d): v(d * 65536) {}
+    /* Convert from other num types */
+    inline constexpr explicit num32(num8 n);
+    inline constexpr explicit num32(num16 n);
+    inline constexpr explicit num32(num64 n);
+
+    /* Convert to int */
+    inline constexpr explicit operator int() const {
+        return v >> 16;
+    }
+    /* Convert to float */
+    inline constexpr explicit operator float() const {
+        return (float)v / 65536;
+    }
+    /* Convert to double */
+    inline constexpr explicit operator double() const {
+        return (double)v / 65536;
+    }
+
+    /* num32 x num32 -> num64 multiplication
+       This is efficiently implemented with a dmuls.l instruction. */
+    static constexpr num64 dmul(num32 const &x, num32 const &y);
+
+    /* Basic arithmetic */
+
+    inline constexpr num32 &operator+=(num32 const &other) {
+        v += other.v;
+        return *this;
+    }
+    inline constexpr num32 &operator-=(num32 const &other) {
+        v -= other.v;
+        return *this;
+    }
+    inline constexpr num32 &operator*=(num32 const &other) {
+        v = ((int64_t)v * (int64_t)other.v) >> 16;
+        return *this;
+    }
+    inline constexpr num32 &operator/=(num32 const &other) {
+        v = ((int64_t)v * 65536) / other.v;
+        return *this;
+    }
+    inline constexpr num32 &operator%=(num32 const &other) {
+        v %= other.v;
+        return *this;
+    }
+};
+
+/* Arithmetic with integers */
+
+inline constexpr num32 operator*(int n, num32 x) {
+    num32 r;
+    r.v = n * x.v;
+    return r;
+}
+inline constexpr num32 operator*(num32 x, int n) {
+    num32 r;
+    r.v = n * x.v;
+    return r;
+}
+inline constexpr num32 operator/(num32 x, int n) {
+    num32 r;
+    r.v = x.v / n;
+    return r;
+}
+
+/* num64: Signed 32:32 fixed-point type
+   * Size:        64 bits (8 bytes)
+   * Range:       -2147483648.0 ... 2147483647.999999998
+   * Precision:   2.33e-10 (1/4294967296)
+   * Represents:  <integer value> / 4294967296
+
+   This fixed-point type with extra precision can be used for intermediate
+   computations when num32 would overflow. */
+struct num64
+{
+    int64_t v;
+
+    inline constexpr num64(): v(0) {}
+    /* Convert from int */
+    inline constexpr num64(int i): v((int64_t)i * 4294967296) {}
+    /* Convert from float */
+    inline constexpr num64(float f): v(f * 4294967296) {}
+    /* Convert from double */
+    inline constexpr num64(double d): v(d * 4294967296) {}
+    /* Convert from other num types */
+    inline constexpr explicit num64(num8 n);
+    inline constexpr explicit num64(num16 n);
+    inline constexpr explicit num64(num32 n);
+
+    /* Convert to int */
+    inline constexpr explicit operator int() { return v >> 32; }
+    /* Convert to float */
+    inline constexpr explicit operator float() { return (float)v/4294967296; }
+    /* Convert to double */
+    inline constexpr explicit operator double() {return (double)v/4294967296;}
+
+    /* Basic arithmetic */
+
+    inline constexpr num64 &operator+=(num64 const &other) {
+        v += other.v;
+        return *this;
+    }
+    inline constexpr num64 &operator-=(num64 const &other) {
+        v -= other.v;
+        return *this;
+    }
+    /* TOOD: Multiplication and division of mul64
+    inline constexpr num64 &operator*=(num64 const &other) {
+        v = ...;
+        return *this;
+    }
+    inline constexpr num64 &operator/=(num64 const &other) {
+        v = ...;
+        return *this;
+    } */
+    inline constexpr num64 &operator%=(num64 const &other) {
+        v %= other.v;
+        return *this;
+    }
+};
+
+/* Converting constructors (defined here for dependency reasons). */
+
+inline constexpr num8::num8(num16 n): v(n.v) {}
+/* Casting to unsigned allows the use of shlr instead of shad */
+inline constexpr num8::num8(num32 n): v((uint32_t)n.v >> 8) {}
+/* Slightly inefficient; acceses both longwords of n.v, only one is needed */
+inline constexpr num8::num8(num64 n): v(n.v >> 24) {}
+
+inline constexpr num16::num16(num8 n): v(n.v) {}
+/* Casting to unsigned allows the use of shlr instead of shad */
+inline constexpr num16::num16(num32 n): v((uint32_t)n.v >> 8) {}
+inline constexpr num16::num16(num64 n): v(n.v >> 24) {}
+
+inline constexpr num32::num32(num8 n): v(n.v * 256) {}
+inline constexpr num32::num32(num16 n): v(n.v * 256) {}
+inline constexpr num32::num32(num64 n): v(n.v >> 16) {}
+
+inline constexpr num64::num64(num8 n): v((uint64_t)n.v * 16777216) {}
+/* Pretty slow (~10 cycles) because of sign-extension across registers */
+inline constexpr num64::num64(num16 n): v((int64_t)n.v * 16777216) {}
+inline constexpr num64::num64(num32 n): v((int64_t)n.v * 65536) {}
+
+/* The following type trait has value=true for exactly the four num types. */
+template<typename T>
+struct is_num {
+    static constexpr bool value =
+        std::is_same<T, num8>::value ||
+        std::is_same<T, num16>::value ||
+        std::is_same<T, num32>::value ||
+        std::is_same<T, num64>::value;
+};
+
+template<typename T>
+constexpr bool is_num_v = is_num<T>::value;
+
+/* Boolean logic (defined in the same way for all types). */
+
+template<typename T>
+typename std::enable_if<is_num_v<T>, bool>::type
+inline constexpr operator==(T const &left, T const &right) {
+    return left.v == right.v;
+}
+
+template<typename T>
+typename std::enable_if<is_num_v<T>, bool>::type
+inline constexpr operator!=(T const &left, T const &right) {
+    return left.v !=right.v;
+}
+template<typename T>
+typename std::enable_if<is_num_v<T>, bool>::type
+inline constexpr operator<(T const &left, T const &right) {
+    return left.v < right.v;
+}
+template<typename T>
+typename std::enable_if<is_num_v<T>, bool>::type
+inline constexpr operator<=(T const &left, T const &right) {
+    return left.v <= right.v;
+}
+template<typename T>
+typename std::enable_if<is_num_v<T>, bool>::type
+inline constexpr operator>(T const &left, T const &right) {
+    return left.v > right.v;
+}
+template<typename T>
+typename std::enable_if<is_num_v<T>, bool>::type
+inline constexpr operator>=(T const &left, T const &right) {
+    return left.v >= right.v;
+}
+
+/* Pure arithmetic operators (defined in the same way for all types). */
+
+template<typename T>
+typename std::enable_if<is_num_v<T>, T>::type
+inline constexpr operator+(T left, T const &right) {
+    return (left += right);
+}
+
+template<typename T>
+typename std::enable_if<is_num_v<T>, T>::type
+inline constexpr operator-(T left, T const &right) {
+    return (left -= right);
+}
+
+template<typename T>
+typename std::enable_if<is_num_v<T>, T>::type
+inline constexpr operator*(T left, T const &right) {
+    return (left *= right);
+}
+
+template<typename T>
+typename std::enable_if<is_num_v<T>, T>::type
+inline constexpr operator/(T left, T const &right) {
+    return (left /= right);
+}
+
+template<typename T>
+typename std::enable_if<is_num_v<T>, T>::type
+inline constexpr operator%(T left, T const &right) {
+    return (left %= right);
+}
+
+template<typename T>
+typename std::enable_if<is_num_v<T>, T>::type
+inline constexpr operator+(T const &op) {
+    return op;
+}
+
+template<typename T>
+typename std::enable_if<is_num_v<T>, T>::type
+inline constexpr operator-(T const &op) {
+    return T(0) - op;
+}
+
+/* Other specific operations. */
+
+inline constexpr num32 num16::dmul(num16 const &x, num16 const &y)
+{
+    num32 n;
+    n.v = x.v * y.v;
+    return n;
+}
+
+inline constexpr num64 num32::dmul(num32 const &x, num32 const &y)
+{
+    num64 n;
+    n.v = (int64_t)x.v * (int64_t)y.v;
+    return n;
+}
+
+} /* namespace libnum */
--- a/libnum/src/static_checks.cpp
+++ b/libnum/src/static_checks.cpp
@ -0,0 +1,26 @@
+#include <num/num.h>
+
+using namespace libnum;
+
+static_assert(sizeof(num8) == 1);
+static_assert(num8(1).v == 0x00);
+static_assert(num8(0.5).v == 0x80);
+static_assert(num8(0.0625f).v == 0x10);
+static_assert((float)num8(0.25) == 0.25f);
+static_assert(num8(0.625) + num8(0.125) == num8(0.75));
+static_assert(num8(0.25) < num8(0.75));
+static_assert(num8(0.5) >= num8(0.5));
+
+static_assert(sizeof(num16) == 2);
+static_assert((uint16_t)num16(-1).v == 0xff00);
+static_assert(num16(num8(0.25)).v == num16(0.25).v);
+
+static_assert(sizeof(num32) == 4);
+// static_assert(num32(num16(-15)) == num32(-15));
+
+static_assert(sizeof(num64) == 8);
+static_assert(num64(num16(1)) == num64(1));
+static_assert(num64(num16(-1)) == num64(-1));
+
+static_assert(libnum::is_num_v<num8> == true);
+static_assert(libnum::is_num_v<int> == false);
--- a/libnum/src/str.cpp
+++ b/libnum/src/str.cpp
@ -0,0 +1,8 @@
+#include <num/num.h>
+
+/* Digits of the decimal part, from most to least significant. Returns the
+   number of digits (which is 0 when x=0) */
+static int decimal_digits(char *str, num64 x)
+{
+//	x = mod_64(x, num64_const(1));
+}