Add arbitrary precision integer support.

Some functionality is still missing (eg and, or, bit shift), and some things are buggy (eg subtract).
2014-02-22 19:25:23 +00:00 · 2014-02-22 19:25:23 +00:00 · 438c88dd2f
parent 2077397118
commit 438c88dd2f
12 changed files with 1267 additions and 6 deletions
--- a/py/binary.c
+++ b/py/binary.c
@ -6,7 +6,6 @@
 #include "mpconfig.h"
 #include "qstr.h"
 #include "obj.h"
-#include "objint.h"
 #include "binary.h"

 // Helpers to work with binary-encoded data
--- a/py/mpconfig.h
+++ b/py/mpconfig.h
@ -68,6 +68,7 @@
 // Long int implementation
 #define MICROPY_LONGINT_IMPL_NONE (0)
 #define MICROPY_LONGINT_IMPL_LONGLONG (1)
+#define MICROPY_LONGINT_IMPL_MPZ (2)

 #ifndef MICROPY_LONGINT_IMPL
 #define MICROPY_LONGINT_IMPL (MICROPY_LONGINT_IMPL_NONE)
--- a/py/mpz.c
+++ b/py/mpz.c
--- a/py/mpz.h
+++ b/py/mpz.h
@ -0,0 +1,65 @@
+typedef uint16_t mpz_dig_t;
+typedef uint32_t mpz_dbl_dig_t;
+typedef int32_t mpz_dbl_dig_signed_t;
+
+typedef struct _mpz_t {
+    struct {
+        machine_uint_t neg : 1;
+        machine_uint_t alloc : 31;
+    };
+    machine_uint_t len;
+    mpz_dig_t *dig;
+} mpz_t;
+
+bool mpz_int_is_sml_int(int i);
+
+void mpz_init_zero(mpz_t *z);
+void mpz_init_from_int(mpz_t *z, machine_int_t val);
+void mpz_deinit(mpz_t *z);
+
+mpz_t *mpz_zero();
+mpz_t *mpz_from_int(machine_int_t i);
+mpz_t *mpz_from_str(const char *str, uint len, bool neg, uint base);
+void mpz_free(mpz_t *z);
+
+mpz_t *mpz_clone(const mpz_t *src);
+
+void mpz_set(mpz_t *dest, const mpz_t *src);
+void mpz_set_from_int(mpz_t *z, machine_int_t src);
+uint mpz_set_from_str(mpz_t *z, const char *str, uint len, bool neg, uint base);
+
+bool mpz_is_zero(const mpz_t *z);
+bool mpz_is_pos(const mpz_t *z);
+bool mpz_is_neg(const mpz_t *z);
+bool mpz_is_odd(const mpz_t *z);
+bool mpz_is_even(const mpz_t *z);
+
+int mpz_cmp(const mpz_t *lhs, const mpz_t *rhs);
+int mpz_cmp_sml_int(const mpz_t *lhs, int sml_int);
+
+mpz_t *mpz_abs(const mpz_t *z);
+mpz_t *mpz_neg(const mpz_t *z);
+mpz_t *mpz_add(const mpz_t *lhs, const mpz_t *rhs);
+mpz_t *mpz_sub(const mpz_t *lhs, const mpz_t *rhs);
+mpz_t *mpz_mul(const mpz_t *lhs, const mpz_t *rhs);
+mpz_t *mpz_pow(const mpz_t *lhs, const mpz_t *rhs);
+
+void mpz_abs_inpl(mpz_t *dest, const mpz_t *z);
+void mpz_neg_inpl(mpz_t *dest, const mpz_t *z);
+void mpz_add_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs);
+void mpz_sub_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs);
+void mpz_mul_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs);
+void mpz_pow_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs);
+
+mpz_t *mpz_gcd(const mpz_t *z1, const mpz_t *z2);
+mpz_t *mpz_lcm(const mpz_t *z1, const mpz_t *z2);
+void mpz_divmod(const mpz_t *lhs, const mpz_t *rhs, mpz_t **quo, mpz_t **rem);
+void mpz_divmod_inpl(mpz_t *dest_quo, mpz_t *dest_rem, const mpz_t *lhs, const mpz_t *rhs);
+mpz_t *mpz_div(const mpz_t *lhs, const mpz_t *rhs);
+mpz_t *mpz_mod(const mpz_t *lhs, const mpz_t *rhs);
+
+int mpz_as_int(const mpz_t *z);
+machine_float_t mpz_as_float(const mpz_t *z);
+uint mpz_as_str_size(const mpz_t *z, uint base);
+char *mpz_as_str(const mpz_t *z, uint base);
+uint mpz_as_str_inpl(const mpz_t *z, uint base, char *str);
--- a/py/obj.h
+++ b/py/obj.h
@ -225,6 +225,9 @@ mp_obj_t mp_obj_new_cell(mp_obj_t obj);
 mp_obj_t mp_obj_new_int(machine_int_t value);
 mp_obj_t mp_obj_new_int_from_uint(machine_uint_t value);
 mp_obj_t mp_obj_new_int_from_long_str(const char *s);
+#if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
+mp_obj_t mp_obj_new_int_from_ll(long long val);
+#endif
 mp_obj_t mp_obj_new_str(const byte* data, uint len, bool make_qstr_if_not_already);
 mp_obj_t mp_obj_new_bytes(const byte* data, uint len);
 #if MICROPY_ENABLE_FLOAT
--- a/py/objint.c
+++ b/py/objint.c
@ -9,6 +9,7 @@
 #include "qstr.h"
 #include "obj.h"
 #include "parsenum.h"
+#include "mpz.h"
 #include "objint.h"

 // This dispatcher function is expected to be independent of the implementation
--- a/py/objint.h
+++ b/py/objint.h
@ -2,13 +2,11 @@ typedef struct _mp_obj_int_t {
    mp_obj_base_t base;
 #if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_LONGLONG
    mp_longint_impl_t val;
+#elif MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_MPZ
+    mpz_t mpz;
 #endif
 } mp_obj_int_t;

 void int_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind);
 mp_obj_t int_unary_op(int op, mp_obj_t o_in);
 mp_obj_t int_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in);
-
-#if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
-mp_obj_t mp_obj_new_int_from_ll(long long val);
-#endif
--- a/py/objint_longlong.c
+++ b/py/objint_longlong.c
@ -8,6 +8,7 @@
 #include "mpconfig.h"
 #include "qstr.h"
 #include "obj.h"
+#include "mpz.h"
 #include "objint.h"
 #include "runtime0.h"

--- a/py/objint_mpz.c
+++ b/py/objint_mpz.c
@ -0,0 +1,181 @@
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <assert.h>
+
+#include "nlr.h"
+#include "misc.h"
+#include "mpconfig.h"
+#include "qstr.h"
+#include "obj.h"
+#include "mpz.h"
+#include "objint.h"
+#include "runtime0.h"
+
+#if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_MPZ
+
+STATIC mp_obj_int_t *mp_obj_int_new_mpz(void) {
+    mp_obj_int_t *o = m_new_obj(mp_obj_int_t);
+    o->base.type = &int_type;
+    mpz_init_zero(&o->mpz);
+    return o;
+}
+
+void int_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind) {
+    if (MP_OBJ_IS_SMALL_INT(self_in)) {
+        print(env, INT_FMT, MP_OBJ_SMALL_INT_VALUE(self_in));
+    } else {
+        // TODO would rather not allocate memory to print...
+        mp_obj_int_t *self = self_in;
+        char *str = mpz_as_str(&self->mpz, 10);
+        print(env, "%s", str);
+        m_free(str, 0);
+    }
+}
+
+mp_obj_t int_unary_op(int op, mp_obj_t o_in) {
+    mp_obj_int_t *o = o_in;
+    switch (op) {
+        case RT_UNARY_OP_BOOL: return MP_BOOL(!mpz_is_zero(&o->mpz));
+        case RT_UNARY_OP_POSITIVE: return o_in;
+        case RT_UNARY_OP_NEGATIVE: { mp_obj_int_t *o2 = mp_obj_int_new_mpz(); mpz_neg_inpl(&o2->mpz, &o->mpz); return o2; }
+        //case RT_UNARY_OP_INVERT: ~ not implemented for mpz
+        default: return NULL; // op not supported
+    }
+}
+
+mp_obj_t int_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
+    mpz_t *zlhs = &((mp_obj_int_t*)lhs_in)->mpz;
+    mpz_t *zrhs;
+
+    if (MP_OBJ_IS_SMALL_INT(rhs_in)) {
+        zrhs = mpz_from_int(MP_OBJ_SMALL_INT_VALUE(rhs_in));
+    } else if (MP_OBJ_IS_TYPE(rhs_in, &int_type)) {
+        zrhs = &((mp_obj_int_t*)rhs_in)->mpz;
+    } else {
+        return MP_OBJ_NULL;
+    }
+
+    if (op == RT_BINARY_OP_TRUE_DIVIDE || op == RT_BINARY_OP_INPLACE_TRUE_DIVIDE) {
+        machine_float_t flhs = mpz_as_float(zlhs);
+        machine_float_t frhs = mpz_as_float(zrhs);
+        return mp_obj_new_float(flhs / frhs);
+
+    } else if (op <= RT_BINARY_OP_POWER) {
+        mp_obj_int_t *res = mp_obj_int_new_mpz();
+
+        switch (op) {
+            case RT_BINARY_OP_ADD:
+            case RT_BINARY_OP_INPLACE_ADD:
+                mpz_add_inpl(&res->mpz, zlhs, zrhs);
+                break;
+            case RT_BINARY_OP_SUBTRACT:
+            case RT_BINARY_OP_INPLACE_SUBTRACT:
+                mpz_sub_inpl(&res->mpz, zlhs, zrhs);
+                break;
+            case RT_BINARY_OP_MULTIPLY:
+            case RT_BINARY_OP_INPLACE_MULTIPLY:
+                mpz_mul_inpl(&res->mpz, zlhs, zrhs);
+                break;
+            case RT_BINARY_OP_FLOOR_DIVIDE:
+            case RT_BINARY_OP_INPLACE_FLOOR_DIVIDE: {
+                mpz_t rem; mpz_init_zero(&rem);
+                mpz_divmod_inpl(&res->mpz, &rem, zlhs, zrhs);
+                mpz_deinit(&rem);
+                break;
+            }
+
+            //case RT_BINARY_OP_MODULO:
+            //case RT_BINARY_OP_INPLACE_MODULO:
+
+            //case RT_BINARY_OP_AND:
+            //case RT_BINARY_OP_INPLACE_AND:
+            //case RT_BINARY_OP_OR:
+            //case RT_BINARY_OP_INPLACE_OR:
+            //case RT_BINARY_OP_XOR:
+            //case RT_BINARY_OP_INPLACE_XOR:
+
+            //case RT_BINARY_OP_LSHIFT:
+            //case RT_BINARY_OP_INPLACE_LSHIFT:
+            //case RT_BINARY_OP_RSHIFT:
+            //case RT_BINARY_OP_INPLACE_RSHIFT:
+
+            case RT_BINARY_OP_POWER:
+            case RT_BINARY_OP_INPLACE_POWER:
+                mpz_pow_inpl(&res->mpz, zlhs, zrhs);
+                break;
+
+            default:
+                return MP_OBJ_NULL;
+        }
+
+        return res;
+
+    } else {
+        int cmp = mpz_cmp(zlhs, zrhs);
+        switch (op) {
+            case RT_BINARY_OP_LESS:
+                return MP_BOOL(cmp < 0);
+            case RT_BINARY_OP_MORE:
+                return MP_BOOL(cmp > 0);
+            case RT_BINARY_OP_LESS_EQUAL:
+                return MP_BOOL(cmp <= 0);
+            case RT_BINARY_OP_MORE_EQUAL:
+                return MP_BOOL(cmp >= 0);
+            case RT_BINARY_OP_EQUAL:
+                return MP_BOOL(cmp == 0);
+            case RT_BINARY_OP_NOT_EQUAL:
+                return MP_BOOL(cmp != 0);
+
+            default:
+                return MP_OBJ_NULL;
+        }
+    }
+}
+
+mp_obj_t mp_obj_new_int(machine_int_t value) {
+    if (MP_OBJ_FITS_SMALL_INT(value)) {
+        return MP_OBJ_NEW_SMALL_INT(value);
+    }
+    return mp_obj_new_int_from_ll(value);
+}
+
+mp_obj_t mp_obj_new_int_from_ll(long long val) {
+    mp_obj_int_t *o = mp_obj_int_new_mpz();
+    mpz_set_from_int(&o->mpz, val);
+    return o;
+}
+
+mp_obj_t mp_obj_new_int_from_uint(machine_uint_t value) {
+    // SMALL_INT accepts only signed numbers, of one bit less size
+    // than word size, which totals 2 bits less for unsigned numbers.
+    if ((value & (WORD_MSBIT_HIGH | (WORD_MSBIT_HIGH >> 1))) == 0) {
+        return MP_OBJ_NEW_SMALL_INT(value);
+    }
+    return mp_obj_new_int_from_ll(value);
+}
+
+mp_obj_t mp_obj_new_int_from_long_str(const char *str) {
+    mp_obj_int_t *o = mp_obj_int_new_mpz();
+    uint len = strlen(str);
+    uint n = mpz_set_from_str(&o->mpz, str, len, false, 10);
+    if (n != len) {
+        nlr_jump(mp_obj_new_exception_msg(&mp_type_SyntaxError, "invalid syntax for number"));
+    }
+    return o;
+}
+
+machine_int_t mp_obj_int_get(mp_obj_t self_in) {
+    if (MP_OBJ_IS_SMALL_INT(self_in)) {
+        return MP_OBJ_SMALL_INT_VALUE(self_in);
+    }
+    mp_obj_int_t *self = self_in;
+    return mpz_as_int(&self->mpz);
+}
+
+machine_int_t mp_obj_int_get_checked(mp_obj_t self_in) {
+    // TODO: Check overflow
+    return mp_obj_int_get(self_in);
+}
+
+#endif
--- a/py/py.mk
+++ b/py/py.mk
@ -15,6 +15,7 @@ PY_O_BASENAME = \
 	qstr.o \
 	vstr.o \
 	unicode.o \
+	mpz.o \
 	lexer.o \
 	lexerstr.o \
 	lexerunix.o \
@ -51,6 +52,7 @@ PY_O_BASENAME = \
 	objgetitemiter.o \
 	objint.o \
 	objint_longlong.o \
+	objint_mpz.o \
 	objlist.o \
 	objmap.o \
 	objmodule.o \
--- a/stm/mpconfigport.h
+++ b/stm/mpconfigport.h
@ -7,6 +7,7 @@
 #define MICROPY_ENABLE_GC           (1)
 #define MICROPY_ENABLE_REPL_HELPERS (1)
 #define MICROPY_ENABLE_FLOAT        (1)
+#define MICROPY_LONGINT_IMPL        (MICROPY_LONGINT_IMPL_MPZ)
 #define MICROPY_PATH_MAX            (128)

 // type definitions for the specific machine
--- a/unix/mpconfigport.h
+++ b/unix/mpconfigport.h
@ -14,7 +14,7 @@
 #define MICROPY_ENABLE_LEXER_UNIX   (1)
 #define MICROPY_ENABLE_SOURCE_LINE  (1)
 #define MICROPY_ENABLE_FLOAT        (1)
-#define MICROPY_LONGINT_IMPL        (MICROPY_LONGINT_IMPL_LONGLONG)
+#define MICROPY_LONGINT_IMPL        (MICROPY_LONGINT_IMPL_MPZ)
 #define MICROPY_PATH_MAX            (PATH_MAX)

 // type definitions for the specific machine