From 749b16174b5292b2680ab027a6a1b3874e22ea43 Mon Sep 17 00:00:00 2001 From: Damien George Date: Sat, 12 May 2018 22:09:34 +1000 Subject: [PATCH] py/mpstate.h: Adjust start of root pointer section to exclude non-ptrs. This patch moves the start of the root pointer section in mp_state_ctx_t so that it skips entries that are not pointers and don't need scanning. Previously, the start of the root pointer section was at the very beginning of the mp_state_ctx_t struct (which is the beginning of mp_state_thread_t). This was the original assembler version of the NLR code was hard-coded to have the nlr_top pointer at the start of this state structure. But now that the NLR code is partially written in C there is no longer this restriction on the location of nlr_top (and a comment to this effect has been removed in this patch). So now the root pointer section starts part way through the mp_state_thread_t structure, after the entries which are not root pointers. This patch also moves the non-pointer entries for MICROPY_ENABLE_SCHEDULER outside the root pointer section. Moving non-pointer entries out of the root pointer section helps to make the GC more precise and should help to prevent some cases of collectable garbage being kept. This patch also has a measurable improvement in performance of the pystone.py benchmark: on unix x86-64 and stm32 there was an improvement of roughly 0.6% (tested with both gcc 7.3 and gcc 8.1). --- py/gc.c | 4 +++- py/mpstate.h | 34 +++++++++++++++++++++------------- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/py/gc.c b/py/gc.c index 84c9918fd..38de51399 100644 --- a/py/gc.c +++ b/py/gc.c @@ -328,7 +328,9 @@ void gc_collect_start(void) { // correctly in the mp_state_ctx structure. We scan nlr_top, dict_locals, // dict_globals, then the root pointer section of mp_state_vm. void **ptrs = (void**)(void*)&mp_state_ctx; - gc_collect_root(ptrs, offsetof(mp_state_ctx_t, vm.qstr_last_chunk) / sizeof(void*)); + size_t root_start = offsetof(mp_state_ctx_t, thread.dict_locals); + size_t root_end = offsetof(mp_state_ctx_t, vm.qstr_last_chunk); + gc_collect_root(ptrs + root_start / sizeof(void*), (root_end - root_start) / sizeof(void*)); #if MICROPY_ENABLE_PYSTACK // Trace root pointers from the Python stack. diff --git a/py/mpstate.h b/py/mpstate.h index 16c4bf6c5..199fd2cb6 100644 --- a/py/mpstate.h +++ b/py/mpstate.h @@ -105,10 +105,11 @@ typedef struct _mp_state_mem_t { // This structure hold runtime and VM information. It includes a section // which contains root pointers that must be scanned by the GC. typedef struct _mp_state_vm_t { - //////////////////////////////////////////////////////////// - // START ROOT POINTER SECTION - // everything that needs GC scanning must go here - // this must start at the start of this structure + // + // CONTINUE ROOT POINTER SECTION + // This must start at the start of this structure and follows + // the state in the mp_state_thread_t structure, continuing + // the root pointer section from there. // qstr_pool_t *last_pool; @@ -139,8 +140,6 @@ typedef struct _mp_state_vm_t { volatile mp_obj_t mp_pending_exception; #if MICROPY_ENABLE_SCHEDULER - volatile int16_t sched_state; - uint16_t sched_sp; mp_sched_item_t sched_stack[MICROPY_SCHEDULER_DEPTH]; #endif @@ -208,6 +207,11 @@ typedef struct _mp_state_vm_t { mp_int_t mp_emergency_exception_buf_size; #endif + #if MICROPY_ENABLE_SCHEDULER + volatile int16_t sched_state; + uint16_t sched_sp; + #endif + #if MICROPY_PY_THREAD_GIL // This is a global mutex used to make the VM/runtime thread-safe. mp_thread_mutex_t gil_mutex; @@ -217,11 +221,6 @@ typedef struct _mp_state_vm_t { // This structure holds state that is specific to a given thread. // Everything in this structure is scanned for root pointers. typedef struct _mp_state_thread_t { - mp_obj_dict_t *dict_locals; - mp_obj_dict_t *dict_globals; - - nlr_buf_t *nlr_top; // ROOT POINTER - // Stack top at the start of program char *stack_top; @@ -234,12 +233,21 @@ typedef struct _mp_state_thread_t { uint8_t *pystack_end; uint8_t *pystack_cur; #endif + + //////////////////////////////////////////////////////////// + // START ROOT POINTER SECTION + // Everything that needs GC scanning must start here, and + // is followed by state in the mp_state_vm_t structure. + // + + mp_obj_dict_t *dict_locals; + mp_obj_dict_t *dict_globals; + + nlr_buf_t *nlr_top; } mp_state_thread_t; // This structure combines the above 3 structures. // The order of the entries are important for root pointer scanning in the GC to work. -// Note: if this structure changes then revisit all nlr asm code since they -// have the offset of nlr_top hard-coded. typedef struct _mp_state_ctx_t { mp_state_thread_t thread; mp_state_vm_t vm;