diff --git a/py/makeqstrdata.py b/py/makeqstrdata.py index 71f529fb6..2350fe04d 100644 --- a/py/makeqstrdata.py +++ b/py/makeqstrdata.py @@ -52,7 +52,8 @@ codepoint2name[ord("^")] = "caret" codepoint2name[ord("|")] = "pipe" codepoint2name[ord("~")] = "tilde" -# static qstrs, should be sorted +# static qstrs, these must maintain a specific order for .mpy compatibility +# See QSTR_LAST_STATIC at the top of py/persistentcode.c static_qstr_list = [ "", @@ -222,6 +223,71 @@ static_qstr_list = [ "zip", ] +# Additional QSTRs that must have index <255 because they are stored in +# `mp_binary_op_method_name` and `mp_unary_op_method_name` (see py/objtype.c). +# These are not part of the .mpy compatibility list, but we place them in the +# fixed unsorted pool (i.e. QDEF0) to ensure their indices are small. +operator_qstr_list = { + "__bool__", + "__pos__", + "__neg__", + "__invert__", + "__abs__", + "__float__", + "__complex__", + "__sizeof__", + "__lt__", + "__gt__", + "__eq__", + "__le__", + "__ge__", + "__ne__", + "__contains__", + "__iadd__", + "__isub__", + "__imul__", + "__imatmul__", + "__ifloordiv__", + "__itruediv__", + "__imod__", + "__ipow__", + "__ior__", + "__ixor__", + "__iand__", + "__ilshift__", + "__irshift__", + "__add__", + "__sub__", + "__mul__", + "__matmul__", + "__floordiv__", + "__truediv__", + "__mod__", + "__divmod__", + "__pow__", + "__or__", + "__xor__", + "__and__", + "__lshift__", + "__rshift__", + "__radd__", + "__rsub__", + "__rmul__", + "__rmatmul__", + "__rfloordiv__", + "__rtruediv__", + "__rmod__", + "__rpow__", + "__ror__", + "__rxor__", + "__rand__", + "__rlshift__", + "__rrshift__", + "__get__", + "__set__", + "__delete__", +} + # this must match the equivalent function in qstr.c def compute_hash(qstr, bytes_hash): @@ -244,22 +310,13 @@ def qstr_escape(qst): return re.sub(r"[^A-Za-z0-9_]", esc_char, qst) +static_qstr_list_ident = list(map(qstr_escape, static_qstr_list)) + + def parse_input_headers(infiles): qcfgs = {} qstrs = {} - # add static qstrs - for qstr in static_qstr_list: - # work out the corresponding qstr name - ident = qstr_escape(qstr) - - # don't add duplicates - assert ident not in qstrs - - # add the qstr to the list, with order number to retain original order in file - order = len(qstrs) - 300000 - qstrs[ident] = (order, ident, qstr) - # read the qstrs in from the input files for infile in infiles: with open(infile, "rt") as f: @@ -294,22 +351,12 @@ def parse_input_headers(infiles): ident = qstr_escape(qstr) # don't add duplicates + if ident in static_qstr_list_ident: + continue if ident in qstrs: continue - # add the qstr to the list, with order number to retain original order in file - order = len(qstrs) - # but put special method names like __add__ at the top of list, so - # that their id's fit into a byte - if ident == "": - # Sort empty qstr above all still - order = -200000 - elif ident == "__dir__": - # Put __dir__ after empty qstr for builtin dir() to work - order = -190000 - elif ident.startswith("__"): - order -= 100000 - qstrs[ident] = (order, ident, qstr) + qstrs[ident] = (ident, qstr) if not qcfgs: sys.stderr.write("ERROR: Empty preprocessor output - check for errors above\n") @@ -348,12 +395,19 @@ def print_qstr_data(qcfgs, qstrs): print("") # add NULL qstr with no hash or data - print('QDEF(MP_QSTRnull, 0, 0, "")') + print('QDEF0(MP_QSTRnull, 0, 0, "")') - # go through each qstr and print it out - for order, ident, qstr in sorted(qstrs.values(), key=lambda x: x[0]): + # add static qstrs to the first unsorted pool + for qstr in static_qstr_list: qbytes = make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr) - print("QDEF(MP_QSTR_%s, %s)" % (ident, qbytes)) + print("QDEF0(MP_QSTR_%s, %s)" % (qstr_escape(qstr), qbytes)) + + # add remaining qstrs to the sorted (by value) pool (unless they're in + # operator_qstr_list, in which case add them to the unsorted pool) + for ident, qstr in sorted(qstrs.values(), key=lambda x: x[1]): + qbytes = make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr) + pool = 0 if qstr in operator_qstr_list else 1 + print("QDEF%d(MP_QSTR_%s, %s)" % (pool, ident, qbytes)) def do_work(infiles): diff --git a/py/persistentcode.c b/py/persistentcode.c index 0feb93bdb..df1003bab 100644 --- a/py/persistentcode.c +++ b/py/persistentcode.c @@ -40,6 +40,11 @@ #include "py/smallint.h" +// makeqstrdata.py has a fixed list of qstrs at the start that we can assume +// are available with know indices on all MicroPython implementations, and +// avoid needing to duplicate the string data in the .mpy file. This is the +// last one in that list (anything with a qstr less than or equal to this is +// assumed to be in the list). #define QSTR_LAST_STATIC MP_QSTR_zip #if MICROPY_DYNAMIC_COMPILER diff --git a/py/qstr.c b/py/qstr.c index ea700566f..0ae08354a 100644 --- a/py/qstr.c +++ b/py/qstr.c @@ -33,9 +33,6 @@ #include "py/gc.h" #include "py/runtime.h" -// NOTE: we are using linear arrays to store and search for qstr's (unique strings, interned strings) -// ultimately we will replace this with a static hash table of some kind - #if MICROPY_DEBUG_VERBOSE // print debugging info #define DEBUG_printf DEBUG_printf #else // don't print debugging info @@ -74,38 +71,94 @@ size_t qstr_compute_hash(const byte *data, size_t len) { return hash; } +// The first pool is the static qstr table. The contents must remain stable as +// it is part of the .mpy ABI. See the top of py/persistentcode.c and +// static_qstr_list in makeqstrdata.py. This pool is unsorted (although in a +// future .mpy version we could re-order them and make it sorted). It also +// contains additional qstrs that must have IDs <256, see operator_qstr_list +// in makeqstrdata.py. +const qstr_hash_t mp_qstr_const_hashes_static[] = { + #ifndef NO_QSTR +#define QDEF0(id, hash, len, str) hash, +#define QDEF1(id, hash, len, str) + #include "genhdr/qstrdefs.generated.h" +#undef QDEF0 +#undef QDEF1 + #endif +}; + +const qstr_len_t mp_qstr_const_lengths_static[] = { + #ifndef NO_QSTR +#define QDEF0(id, hash, len, str) len, +#define QDEF1(id, hash, len, str) + #include "genhdr/qstrdefs.generated.h" +#undef QDEF0 +#undef QDEF1 + #endif +}; + +const qstr_pool_t mp_qstr_const_pool_static = { + NULL, // no previous pool + 0, // no previous pool + false, // is_sorted + MICROPY_ALLOC_QSTR_ENTRIES_INIT, + MP_QSTRnumber_of_static, // corresponds to number of strings in array just below + (qstr_hash_t *)mp_qstr_const_hashes_static, + (qstr_len_t *)mp_qstr_const_lengths_static, + { + #ifndef NO_QSTR +#define QDEF0(id, hash, len, str) str, +#define QDEF1(id, hash, len, str) + #include "genhdr/qstrdefs.generated.h" +#undef QDEF0 +#undef QDEF1 + #endif + }, +}; + +// The next pool is the remainder of the qstrs defined in the firmware. This +// is sorted. const qstr_hash_t mp_qstr_const_hashes[] = { #ifndef NO_QSTR -#define QDEF(id, hash, len, str) hash, +#define QDEF0(id, hash, len, str) +#define QDEF1(id, hash, len, str) hash, #include "genhdr/qstrdefs.generated.h" -#undef QDEF +#undef QDEF0 +#undef QDEF1 #endif }; const qstr_len_t mp_qstr_const_lengths[] = { #ifndef NO_QSTR -#define QDEF(id, hash, len, str) len, +#define QDEF0(id, hash, len, str) +#define QDEF1(id, hash, len, str) len, #include "genhdr/qstrdefs.generated.h" -#undef QDEF +#undef QDEF0 +#undef QDEF1 #endif }; const qstr_pool_t mp_qstr_const_pool = { - NULL, // no previous pool - 0, // no previous pool + &mp_qstr_const_pool_static, + MP_QSTRnumber_of_static, + true, // is_sorted MICROPY_ALLOC_QSTR_ENTRIES_INIT, - MP_QSTRnumber_of, // corresponds to number of strings in array just below + MP_QSTRnumber_of - MP_QSTRnumber_of_static, // corresponds to number of strings in array just below (qstr_hash_t *)mp_qstr_const_hashes, (qstr_len_t *)mp_qstr_const_lengths, { #ifndef NO_QSTR -#define QDEF(id, hash, len, str) str, +#define QDEF0(id, hash, len, str) +#define QDEF1(id, hash, len, str) str, #include "genhdr/qstrdefs.generated.h" -#undef QDEF +#undef QDEF0 +#undef QDEF1 #endif }, }; +// If frozen code is enabled, then there is an additional, sorted, ROM pool +// containing additional qstrs required by the frozen code. #ifdef MICROPY_QSTR_EXTRA_POOL extern const qstr_pool_t MICROPY_QSTR_EXTRA_POOL; #define CONST_POOL MICROPY_QSTR_EXTRA_POOL @@ -185,7 +238,24 @@ qstr qstr_find_strn(const char *str, size_t str_len) { // search pools for the data for (const qstr_pool_t *pool = MP_STATE_VM(last_pool); pool != NULL; pool = pool->prev) { - for (mp_uint_t at = 0, top = pool->len; at < top; at++) { + size_t low = 0; + size_t high = pool->len - 1; + + // binary search inside the pool + if (pool->is_sorted) { + while (high - low > 1) { + size_t mid = (low + high) / 2; + int cmp = strncmp(str, pool->qstrs[mid], str_len); + if (cmp <= 0) { + high = mid; + } else { + low = mid; + } + } + } + + // sequential search for the remaining strings + for (mp_uint_t at = low; at < high + 1; at++) { if (pool->hashes[at] == str_hash && pool->lengths[at] == str_len && memcmp(pool->qstrs[at], str, str_len) == 0) { return pool->total_prev_len + at; @@ -194,7 +264,7 @@ qstr qstr_find_strn(const char *str, size_t str_len) { } // not found; return null qstr - return 0; + return MP_QSTRnull; } qstr qstr_from_str(const char *str) { diff --git a/py/qstr.h b/py/qstr.h index 0ef861f33..0c4fc4632 100644 --- a/py/qstr.h +++ b/py/qstr.h @@ -38,9 +38,21 @@ // first entry in enum will be MP_QSTRnull=0, which indicates invalid/no qstr enum { #ifndef NO_QSTR -#define QDEF(id, hash, len, str) id, +#define QDEF0(id, hash, len, str) id, +#define QDEF1(id, hash, len, str) #include "genhdr/qstrdefs.generated.h" -#undef QDEF +#undef QDEF0 +#undef QDEF1 + #endif + MP_QSTRnumber_of_static, + MP_QSTRstart_of_main = MP_QSTRnumber_of_static - 1, // unused but shifts the enum counter back one + + #ifndef NO_QSTR +#define QDEF0(id, hash, len, str) +#define QDEF1(id, hash, len, str) id, + #include "genhdr/qstrdefs.generated.h" +#undef QDEF0 +#undef QDEF1 #endif MP_QSTRnumber_of, // no underscore so it can't clash with any of the above }; @@ -66,7 +78,8 @@ typedef uint16_t qstr_len_t; typedef struct _qstr_pool_t { const struct _qstr_pool_t *prev; - size_t total_prev_len; + size_t total_prev_len : (8 * sizeof(size_t) - 1); + size_t is_sorted : 1; size_t alloc; size_t len; qstr_hash_t *hashes; diff --git a/tools/mpy-tool.py b/tools/mpy-tool.py index fc3518d35..95b2c492d 100755 --- a/tools/mpy-tool.py +++ b/tools/mpy-tool.py @@ -1396,15 +1396,16 @@ def disassemble_mpy(compiled_modules): cm.disassemble() -def freeze_mpy(base_qstrs, compiled_modules): +def freeze_mpy(firmware_qstr_idents, compiled_modules): # add to qstrs new = {} for q in global_qstrs.qstrs: - # don't add duplicates - if q is None or q.qstr_esc in base_qstrs or q.qstr_esc in new: + # don't add duplicates that are already in the firmware + if q is None or q.qstr_esc in firmware_qstr_idents or q.qstr_esc in new: continue new[q.qstr_esc] = (len(new), q.qstr_esc, q.str, bytes_cons(q.str, "utf8")) - new = sorted(new.values(), key=lambda x: x[0]) + # Sort by string value (because this is a sorted pool). + new = sorted(new.values(), key=lambda x: x[2]) print('#include "py/mpconfig.h"') print('#include "py/objint.h"') @@ -1485,6 +1486,7 @@ def freeze_mpy(base_qstrs, compiled_modules): print("const qstr_pool_t mp_qstr_frozen_const_pool = {") print(" &mp_qstr_const_pool, // previous pool") print(" MP_QSTRnumber_of, // previous pool size") + print(" true, // is_sorted") print(" %u, // allocated entries" % qstr_pool_alloc) print(" %u, // used entries" % len(new)) print(" (qstr_hash_t *)mp_qstr_frozen_const_hashes,") @@ -1779,14 +1781,16 @@ def main(): config.native_arch = MP_NATIVE_ARCH_NONE # set config values for qstrs, and get the existing base set of qstrs + # already in the firmware if args.qstr_header: - qcfgs, base_qstrs = qstrutil.parse_input_headers([args.qstr_header]) + qcfgs, extra_qstrs = qstrutil.parse_input_headers([args.qstr_header]) + firmware_qstr_idents = set(qstrutil.static_qstr_list_ident) | set(extra_qstrs.keys()) config.MICROPY_QSTR_BYTES_IN_LEN = int(qcfgs["BYTES_IN_LEN"]) config.MICROPY_QSTR_BYTES_IN_HASH = int(qcfgs["BYTES_IN_HASH"]) else: config.MICROPY_QSTR_BYTES_IN_LEN = 1 config.MICROPY_QSTR_BYTES_IN_HASH = 1 - base_qstrs = list(qstrutil.static_qstr_list) + firmware_qstr_idents = set(qstrutil.static_qstr_list) # Create initial list of global qstrs. global_qstrs = GlobalQStrList() @@ -1808,7 +1812,7 @@ def main(): if args.freeze: try: - freeze_mpy(base_qstrs, compiled_modules) + freeze_mpy(firmware_qstr_idents, compiled_modules) except FreezeError as er: print(er, file=sys.stderr) sys.exit(1)