py/persistentcode: Define static qstr set to reduce size of mpy files.

When encoded in the mpy file, if qstr <= QSTR_LAST_STATIC then store two
bytes: 0, static_qstr_id.  Otherwise encode the qstr as usual (either with
string data or a reference into the qstr window).

Reduces mpy file size by about 5%.
This commit is contained in:
Damien George 2019-03-01 14:33:03 +11:00
parent 992a6e1dea
commit 4f0931b21f
3 changed files with 213 additions and 6 deletions

View File

@ -51,6 +51,176 @@ codepoint2name[ord('^')] = 'caret'
codepoint2name[ord('|')] = 'pipe'
codepoint2name[ord('~')] = 'tilde'
# static qstrs, should be sorted
static_qstr_list = [
"",
"__dir__", # Put __dir__ after empty qstr for builtin dir() to work
"\n",
" ",
"*",
"/",
"<module>",
"_",
"__call__",
"__class__",
"__delitem__",
"__enter__",
"__exit__",
"__getattr__",
"__getitem__",
"__hash__",
"__init__",
"__int__",
"__iter__",
"__len__",
"__main__",
"__module__",
"__name__",
"__new__",
"__next__",
"__qualname__",
"__repr__",
"__setitem__",
"__str__",
"ArithmeticError",
"AssertionError",
"AttributeError",
"BaseException",
"EOFError",
"Ellipsis",
"Exception",
"GeneratorExit",
"ImportError",
"IndentationError",
"IndexError",
"KeyError",
"KeyboardInterrupt",
"LookupError",
"MemoryError",
"NameError",
"NoneType",
"NotImplementedError",
"OSError",
"OverflowError",
"RuntimeError",
"StopIteration",
"SyntaxError",
"SystemExit",
"TypeError",
"ValueError",
"ZeroDivisionError",
"abs",
"all",
"any",
"append",
"args",
"bool",
"builtins",
"bytearray",
"bytecode",
"bytes",
"callable",
"chr",
"classmethod",
"clear",
"close",
"const",
"copy",
"count",
"dict",
"dir",
"divmod",
"end",
"endswith",
"eval",
"exec",
"extend",
"find",
"format",
"from_bytes",
"get",
"getattr",
"globals",
"hasattr",
"hash",
"id",
"index",
"insert",
"int",
"isalpha",
"isdigit",
"isinstance",
"islower",
"isspace",
"issubclass",
"isupper",
"items",
"iter",
"join",
"key",
"keys",
"len",
"list",
"little",
"locals",
"lower",
"lstrip",
"main",
"map",
"micropython",
"next",
"object",
"open",
"ord",
"pop",
"popitem",
"pow",
"print",
"range",
"read",
"readinto",
"readline",
"remove",
"replace",
"repr",
"reverse",
"rfind",
"rindex",
"round",
"rsplit",
"rstrip",
"self",
"send",
"sep",
"set",
"setattr",
"setdefault",
"sort",
"sorted",
"split",
"start",
"startswith",
"staticmethod",
"step",
"stop",
"str",
"strip",
"sum",
"super",
"throw",
"to_bytes",
"tuple",
"type",
"update",
"upper",
"utf-8",
"value",
"values",
"write",
"zip",
]
# this must match the equivalent function in qstr.c
def compute_hash(qstr, bytes_hash):
hash = 5381
@ -70,9 +240,22 @@ def qstr_escape(qst):
return re.sub(r'[^A-Za-z0-9_]', esc_char, qst)
def parse_input_headers(infiles):
# read the qstrs in from the input files
qcfgs = {}
qstrs = {}
# add static qstrs
for qstr in static_qstr_list:
# work out the corresponding qstr name
ident = qstr_escape(qstr)
# don't add duplicates
assert ident not in qstrs
# add the qstr to the list, with order number to retain original order in file
order = len(qstrs) - 300000
qstrs[ident] = (order, ident, qstr)
# read the qstrs in from the input files
for infile in infiles:
with open(infile, 'rt') as f:
for line in f:

View File

@ -38,6 +38,8 @@
#include "py/smallint.h"
#define QSTR_LAST_STATIC MP_QSTR_zip
// The current version of .mpy files
#define MPY_VERSION (3)
@ -187,6 +189,10 @@ STATIC size_t read_uint(mp_reader_t *reader, byte **out) {
STATIC qstr load_qstr(mp_reader_t *reader, qstr_window_t *qw) {
size_t len = read_uint(reader, NULL);
if (len == 0) {
// static qstr
return read_byte(reader);
}
if (len & 1) {
// qstr in window
return qstr_window_access(qw, len >> 1);
@ -362,6 +368,12 @@ STATIC void mp_print_uint(mp_print_t *print, size_t n) {
}
STATIC void save_qstr(mp_print_t *print, qstr_window_t *qw, qstr qst) {
if (qst <= QSTR_LAST_STATIC) {
// encode static qstr
byte buf[2] = {0, qst & 0xff};
mp_print_bytes(print, buf, 2);
return;
}
size_t idx = qstr_window_insert(qw, qst);
if (idx < QSTR_WINDOW_SIZE) {
// qstr found in window, encode index to it

View File

@ -63,6 +63,17 @@ class Config:
MICROPY_LONGINT_IMPL_MPZ = 2
config = Config()
class QStrType:
def __init__(self, str):
self.str = str
self.qstr_esc = qstrutil.qstr_escape(self.str)
self.qstr_id = 'MP_QSTR_' + self.qstr_esc
# Initialise global list of qstrs with static qstrs
global_qstrs = [None] # MP_QSTR_NULL should never be referenced
for n in qstrutil.static_qstr_list:
global_qstrs.append(QStrType(n))
class QStrWindow:
def __init__(self, size_log2):
self.window = []
@ -421,17 +432,17 @@ def read_uint(f, out=None):
break
return i
global_qstrs = []
qstr_type = namedtuple('qstr', ('str', 'qstr_esc', 'qstr_id'))
def read_qstr(f, qstr_win):
ln = read_uint(f)
if ln == 0:
# static qstr
return bytes_cons(f.read(1))[0]
if ln & 1:
# qstr in table
return qstr_win.access(ln >> 1)
ln >>= 1
data = str_cons(f.read(ln), 'utf8')
qstr_esc = qstrutil.qstr_escape(data)
global_qstrs.append(qstr_type(data, qstr_esc, 'MP_QSTR_' + qstr_esc))
global_qstrs.append(QStrType(data))
qstr_win.push(len(global_qstrs) - 1)
return len(global_qstrs) - 1
@ -476,6 +487,7 @@ def read_qstr_and_pack(f, bytecode, qstr_win):
bytecode.append(qst >> 8)
def read_bytecode(file, bytecode, qstr_win):
QSTR_LAST_STATIC = len(qstrutil.static_qstr_list)
while not bytecode.is_full():
op = read_byte(file, bytecode)
f, sz = mp_opcode_format(bytecode.buf, bytecode.idx - 1, False)
@ -528,7 +540,7 @@ def freeze_mpy(base_qstrs, raw_codes):
new = {}
for q in global_qstrs:
# don't add duplicates
if q.qstr_esc in base_qstrs or q.qstr_esc in new:
if q is None or q.qstr_esc in base_qstrs or q.qstr_esc in new:
continue
new[q.qstr_esc] = (len(new), q.qstr_esc, q.str)
new = sorted(new.values(), key=lambda x: x[0])