py/mpstate.h: Adjust start of root pointer section to exclude non-ptrs.

This patch moves the start of the root pointer section in mp_state_ctx_t
so that it skips entries that are not pointers and don't need scanning.

Previously, the start of the root pointer section was at the very beginning
of the mp_state_ctx_t struct (which is the beginning of mp_state_thread_t).
This was the original assembler version of the NLR code was hard-coded to
have the nlr_top pointer at the start of this state structure.  But now
that the NLR code is partially written in C there is no longer this
restriction on the location of nlr_top (and a comment to this effect has
been removed in this patch).

So now the root pointer section starts part way through the
mp_state_thread_t structure, after the entries which are not root pointers.

This patch also moves the non-pointer entries for MICROPY_ENABLE_SCHEDULER
outside the root pointer section.

Moving non-pointer entries out of the root pointer section helps to make
the GC more precise and should help to prevent some cases of collectable
garbage being kept.

This patch also has a measurable improvement in performance of the
pystone.py benchmark: on unix x86-64 and stm32 there was an improvement of
roughly 0.6% (tested with both gcc 7.3 and gcc 8.1).
This commit is contained in:
Damien George 2018-05-12 22:09:34 +10:00
parent aeaace0737
commit 749b16174b
2 changed files with 24 additions and 14 deletions

View File

@ -328,7 +328,9 @@ void gc_collect_start(void) {
// correctly in the mp_state_ctx structure. We scan nlr_top, dict_locals,
// dict_globals, then the root pointer section of mp_state_vm.
void **ptrs = (void**)(void*)&mp_state_ctx;
gc_collect_root(ptrs, offsetof(mp_state_ctx_t, vm.qstr_last_chunk) / sizeof(void*));
size_t root_start = offsetof(mp_state_ctx_t, thread.dict_locals);
size_t root_end = offsetof(mp_state_ctx_t, vm.qstr_last_chunk);
gc_collect_root(ptrs + root_start / sizeof(void*), (root_end - root_start) / sizeof(void*));
#if MICROPY_ENABLE_PYSTACK
// Trace root pointers from the Python stack.

View File

@ -105,10 +105,11 @@ typedef struct _mp_state_mem_t {
// This structure hold runtime and VM information. It includes a section
// which contains root pointers that must be scanned by the GC.
typedef struct _mp_state_vm_t {
////////////////////////////////////////////////////////////
// START ROOT POINTER SECTION
// everything that needs GC scanning must go here
// this must start at the start of this structure
//
// CONTINUE ROOT POINTER SECTION
// This must start at the start of this structure and follows
// the state in the mp_state_thread_t structure, continuing
// the root pointer section from there.
//
qstr_pool_t *last_pool;
@ -139,8 +140,6 @@ typedef struct _mp_state_vm_t {
volatile mp_obj_t mp_pending_exception;
#if MICROPY_ENABLE_SCHEDULER
volatile int16_t sched_state;
uint16_t sched_sp;
mp_sched_item_t sched_stack[MICROPY_SCHEDULER_DEPTH];
#endif
@ -208,6 +207,11 @@ typedef struct _mp_state_vm_t {
mp_int_t mp_emergency_exception_buf_size;
#endif
#if MICROPY_ENABLE_SCHEDULER
volatile int16_t sched_state;
uint16_t sched_sp;
#endif
#if MICROPY_PY_THREAD_GIL
// This is a global mutex used to make the VM/runtime thread-safe.
mp_thread_mutex_t gil_mutex;
@ -217,11 +221,6 @@ typedef struct _mp_state_vm_t {
// This structure holds state that is specific to a given thread.
// Everything in this structure is scanned for root pointers.
typedef struct _mp_state_thread_t {
mp_obj_dict_t *dict_locals;
mp_obj_dict_t *dict_globals;
nlr_buf_t *nlr_top; // ROOT POINTER
// Stack top at the start of program
char *stack_top;
@ -234,12 +233,21 @@ typedef struct _mp_state_thread_t {
uint8_t *pystack_end;
uint8_t *pystack_cur;
#endif
////////////////////////////////////////////////////////////
// START ROOT POINTER SECTION
// Everything that needs GC scanning must start here, and
// is followed by state in the mp_state_vm_t structure.
//
mp_obj_dict_t *dict_locals;
mp_obj_dict_t *dict_globals;
nlr_buf_t *nlr_top;
} mp_state_thread_t;
// This structure combines the above 3 structures.
// The order of the entries are important for root pointer scanning in the GC to work.
// Note: if this structure changes then revisit all nlr asm code since they
// have the offset of nlr_top hard-coded.
typedef struct _mp_state_ctx_t {
mp_state_thread_t thread;
mp_state_vm_t vm;