py: Provide mp_decode_uint_skip() to help reduce stack usage.

Taking the address of a local variable leads to increased stack usage, so
the mp_decode_uint_skip() function is added to reduce the need for taking
addresses.  The changes in this patch reduce stack usage of a Python call
by 8 bytes on ARM Thumb, by 16 bytes on non-windowing Xtensa archs, and by
16 bytes on x86-64.  Code size is also slightly reduced on most archs by
around 32 bytes.
This commit is contained in:
Damien George 2017-06-09 13:31:57 +10:00
parent 4352b944d2
commit a8a5d1e8c8
5 changed files with 28 additions and 25 deletions

10
py/bc.c
View File

@ -64,6 +64,14 @@ mp_uint_t mp_decode_uint_value(const byte *ptr) {
return mp_decode_uint(&ptr);
}
// This function is used to help reduce stack usage at the caller, for the case when
// the caller doesn't need the actual value and just wants to skip over it.
const byte *mp_decode_uint_skip(const byte *ptr) {
while ((*ptr++) & 0x80) {
}
return ptr;
}
STATIC NORETURN void fun_pos_args_mismatch(mp_obj_fun_bc_t *f, size_t expected, size_t given) {
#if MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE
// generic message, used also for other argument issues
@ -115,7 +123,7 @@ void mp_setup_code_state(mp_code_state_t *code_state, size_t n_args, size_t n_kw
// get params
size_t n_state = mp_decode_uint(&code_state->ip);
mp_decode_uint(&code_state->ip); // skip n_exc_stack
code_state->ip = mp_decode_uint_skip(code_state->ip); // skip n_exc_stack
size_t scope_flags = *code_state->ip++;
size_t n_pos_args = *code_state->ip++;
size_t n_kwonly_args = *code_state->ip++;

View File

@ -92,6 +92,7 @@ typedef struct _mp_code_state_t {
mp_uint_t mp_decode_uint(const byte **ptr);
mp_uint_t mp_decode_uint_value(const byte *ptr);
const byte *mp_decode_uint_skip(const byte *ptr);
mp_vm_return_kind_t mp_execute_bytecode(mp_code_state_t *code_state, volatile mp_obj_t inject_exc);
mp_code_state_t *mp_obj_fun_bc_prepare_codestate(mp_obj_t func, size_t n_args, size_t n_kw, const mp_obj_t *args);

View File

@ -141,11 +141,11 @@ const mp_obj_type_t mp_type_fun_builtin_var = {
/* byte code functions */
qstr mp_obj_code_get_name(const byte *code_info) {
mp_decode_uint(&code_info); // skip code_info_size entry
code_info = mp_decode_uint_skip(code_info); // skip code_info_size entry
#if MICROPY_PERSISTENT_CODE
return code_info[0] | (code_info[1] << 8);
#else
return mp_decode_uint(&code_info);
return mp_decode_uint_value(code_info);
#endif
}
@ -163,8 +163,8 @@ qstr mp_obj_fun_get_name(mp_const_obj_t fun_in) {
#endif
const byte *bc = fun->bytecode;
mp_decode_uint(&bc); // skip n_state
mp_decode_uint(&bc); // skip n_exc_stack
bc = mp_decode_uint_skip(bc); // skip n_state
bc = mp_decode_uint_skip(bc); // skip n_exc_stack
bc++; // skip scope_params
bc++; // skip n_pos_args
bc++; // skip n_kwonly_args
@ -205,12 +205,9 @@ mp_code_state_t *mp_obj_fun_bc_prepare_codestate(mp_obj_t self_in, size_t n_args
MP_STACK_CHECK();
mp_obj_fun_bc_t *self = MP_OBJ_TO_PTR(self_in);
// get start of bytecode
const byte *ip = self->bytecode;
// bytecode prelude: state size and exception stack size
size_t n_state = mp_decode_uint(&ip);
size_t n_exc_stack = mp_decode_uint(&ip);
size_t n_state = mp_decode_uint_value(self->bytecode);
size_t n_exc_stack = mp_decode_uint_value(mp_decode_uint_skip(self->bytecode));
// allocate state for locals and stack
size_t state_size = n_state * sizeof(mp_obj_t) + n_exc_stack * sizeof(mp_exc_stack_t);
@ -243,12 +240,9 @@ STATIC mp_obj_t fun_bc_call(mp_obj_t self_in, size_t n_args, size_t n_kw, const
mp_obj_fun_bc_t *self = MP_OBJ_TO_PTR(self_in);
DEBUG_printf("Func n_def_args: %d\n", self->n_def_args);
// get start of bytecode
const byte *ip = self->bytecode;
// bytecode prelude: state size and exception stack size
size_t n_state = mp_decode_uint(&ip);
size_t n_exc_stack = mp_decode_uint(&ip);
size_t n_state = mp_decode_uint_value(self->bytecode);
size_t n_exc_stack = mp_decode_uint_value(mp_decode_uint_skip(self->bytecode));
#if VM_DETECT_STACK_OVERFLOW
n_state += 1;

View File

@ -54,12 +54,9 @@ STATIC mp_obj_t gen_wrap_call(mp_obj_t self_in, size_t n_args, size_t n_kw, cons
mp_obj_fun_bc_t *self_fun = (mp_obj_fun_bc_t*)self->fun;
assert(self_fun->base.type == &mp_type_fun_bc);
// get start of bytecode
const byte *ip = self_fun->bytecode;
// bytecode prelude: get state size and exception stack size
mp_uint_t n_state = mp_decode_uint(&ip);
mp_uint_t n_exc_stack = mp_decode_uint(&ip);
size_t n_state = mp_decode_uint_value(self_fun->bytecode);
size_t n_exc_stack = mp_decode_uint_value(mp_decode_uint_skip(self_fun->bytecode));
// allocate the generator object, with room for local stack and exception stack
mp_obj_gen_instance_t *o = m_new_obj_var(mp_obj_gen_instance_t, byte,

13
py/vm.c
View File

@ -1363,22 +1363,25 @@ unwind_loop:
// TODO need a better way of not adding traceback to constant objects (right now, just GeneratorExit_obj and MemoryError_obj)
if (nlr.ret_val != &mp_const_GeneratorExit_obj && nlr.ret_val != &mp_const_MemoryError_obj) {
const byte *ip = code_state->fun_bc->bytecode;
mp_decode_uint(&ip); // skip n_state
mp_decode_uint(&ip); // skip n_exc_stack
ip = mp_decode_uint_skip(ip); // skip n_state
ip = mp_decode_uint_skip(ip); // skip n_exc_stack
ip++; // skip scope_params
ip++; // skip n_pos_args
ip++; // skip n_kwonly_args
ip++; // skip n_def_pos_args
size_t bc = code_state->ip - ip;
size_t code_info_size = mp_decode_uint(&ip);
size_t code_info_size = mp_decode_uint_value(ip);
ip = mp_decode_uint_skip(ip); // skip code_info_size
bc -= code_info_size;
#if MICROPY_PERSISTENT_CODE
qstr block_name = ip[0] | (ip[1] << 8);
qstr source_file = ip[2] | (ip[3] << 8);
ip += 4;
#else
qstr block_name = mp_decode_uint(&ip);
qstr source_file = mp_decode_uint(&ip);
qstr block_name = mp_decode_uint_value(ip);
ip = mp_decode_uint_skip(ip);
qstr source_file = mp_decode_uint_value(ip);
ip = mp_decode_uint_skip(ip);
#endif
size_t source_line = 1;
size_t c;