From 006671056da6627073f041b4d451cab9db031ff0 Mon Sep 17 00:00:00 2001 From: Damien George Date: Sat, 13 Oct 2018 14:53:35 +1100 Subject: [PATCH] py/emitnative: Load native fun table ptr from const table for all archs. All architectures now have a dedicated register to hold the pointer to the native function table mp_fun_table, and so they all need to load this register at the start of the native function. This commit makes the loading of this register uniform across architectures by passing the pointer in the constant table for the native function, and then loading the register from the constant table. Doing it this way means that the pointer is not stored in the assembly code, helping to make the code more portable. --- py/asmarm.h | 6 ++++++ py/asmthumb.c | 2 +- py/asmthumb.h | 5 +++++ py/asmx64.c | 2 +- py/asmx64.h | 6 ++++++ py/asmx86.c | 2 +- py/asmx86.h | 6 ++++++ py/asmxtensa.c | 4 ++-- py/asmxtensa.h | 5 +++++ py/emitnative.c | 52 ++++++++++++++++++++----------------------------- 10 files changed, 54 insertions(+), 36 deletions(-) diff --git a/py/asmarm.h b/py/asmarm.h index f72a7f732..3ee633c22 100644 --- a/py/asmarm.h +++ b/py/asmarm.h @@ -124,6 +124,9 @@ void asm_arm_b_label(asm_arm_t *as, uint label); void asm_arm_bl_ind(asm_arm_t *as, void *fun_ptr, uint fun_id, uint reg_temp); void asm_arm_bx_reg(asm_arm_t *as, uint reg_src); +// Holds a pointer to mp_fun_table +#define ASM_ARM_REG_FUN_TABLE ASM_ARM_REG_R7 + #if GENERIC_ASM_API // The following macros provide a (mostly) arch-independent API to @@ -146,6 +149,9 @@ void asm_arm_bx_reg(asm_arm_t *as, uint reg_src); #define REG_LOCAL_3 ASM_ARM_REG_R6 #define REG_LOCAL_NUM (3) +// Holds a pointer to mp_fun_table +#define REG_FUN_TABLE ASM_ARM_REG_FUN_TABLE + #define ASM_T asm_arm_t #define ASM_END_PASS asm_arm_end_pass #define ASM_ENTRY asm_arm_entry diff --git a/py/asmthumb.c b/py/asmthumb.c index 54b539a8d..1ef09c78e 100644 --- a/py/asmthumb.c +++ b/py/asmthumb.c @@ -383,7 +383,7 @@ void asm_thumb_bcc_label(asm_thumb_t *as, int cond, uint label) { void asm_thumb_bl_ind(asm_thumb_t *as, void *fun_ptr, uint fun_id, uint reg_temp) { // Load ptr to function from table, indexed by fun_id, then call it - asm_thumb_ldr_reg_reg_i12_optimised(as, reg_temp, ASM_THUMB_REG_R7, fun_id); + asm_thumb_ldr_reg_reg_i12_optimised(as, reg_temp, ASM_THUMB_REG_FUN_TABLE, fun_id); asm_thumb_op16(as, OP_BLX(reg_temp)); } diff --git a/py/asmthumb.h b/py/asmthumb.h index 83aec0287..0fd39120e 100644 --- a/py/asmthumb.h +++ b/py/asmthumb.h @@ -261,6 +261,9 @@ void asm_thumb_b_label(asm_thumb_t *as, uint label); // convenience: picks narro void asm_thumb_bcc_label(asm_thumb_t *as, int cc, uint label); // convenience: picks narrow or wide branch void asm_thumb_bl_ind(asm_thumb_t *as, void *fun_ptr, uint fun_id, uint reg_temp); // convenience +// Holds a pointer to mp_fun_table +#define ASM_THUMB_REG_FUN_TABLE ASM_THUMB_REG_R7 + #if GENERIC_ASM_API // The following macros provide a (mostly) arch-independent API to @@ -284,6 +287,8 @@ void asm_thumb_bl_ind(asm_thumb_t *as, void *fun_ptr, uint fun_id, uint reg_temp #define REG_LOCAL_3 ASM_THUMB_REG_R6 #define REG_LOCAL_NUM (3) +#define REG_FUN_TABLE ASM_THUMB_REG_FUN_TABLE + #define ASM_T asm_thumb_t #define ASM_END_PASS asm_thumb_end_pass #define ASM_ENTRY asm_thumb_entry diff --git a/py/asmx64.c b/py/asmx64.c index 9cd2fc64c..3609f49d3 100644 --- a/py/asmx64.c +++ b/py/asmx64.c @@ -623,7 +623,7 @@ void asm_x64_call_i1(asm_x64_t *as, void* func, int i1) { void asm_x64_call_ind(asm_x64_t *as, size_t fun_id, int temp_r64) { assert(temp_r64 < 8); - asm_x64_mov_mem64_to_r64(as, ASM_X64_REG_RBP, fun_id * WORD_SIZE, temp_r64); + asm_x64_mov_mem64_to_r64(as, ASM_X64_REG_FUN_TABLE, fun_id * WORD_SIZE, temp_r64); asm_x64_write_byte_2(as, OPCODE_CALL_RM32, MODRM_R64(2) | MODRM_RM_REG | MODRM_RM_R64(temp_r64)); } diff --git a/py/asmx64.h b/py/asmx64.h index f40b127e5..76e3ad556 100644 --- a/py/asmx64.h +++ b/py/asmx64.h @@ -116,6 +116,9 @@ void asm_x64_mov_local_addr_to_r64(asm_x64_t* as, int local_num, int dest_r64); void asm_x64_mov_reg_pcrel(asm_x64_t *as, int dest_r64, mp_uint_t label); void asm_x64_call_ind(asm_x64_t* as, size_t fun_id, int temp_r32); +// Holds a pointer to mp_fun_table +#define ASM_X64_REG_FUN_TABLE ASM_X64_REG_RBP + #if GENERIC_ASM_API // The following macros provide a (mostly) arch-independent API to @@ -141,6 +144,9 @@ void asm_x64_call_ind(asm_x64_t* as, size_t fun_id, int temp_r32); #define REG_LOCAL_3 ASM_X64_REG_R13 #define REG_LOCAL_NUM (3) +// Holds a pointer to mp_fun_table +#define REG_FUN_TABLE ASM_X64_REG_FUN_TABLE + #define ASM_T asm_x64_t #define ASM_END_PASS asm_x64_end_pass #define ASM_ENTRY asm_x64_entry diff --git a/py/asmx86.c b/py/asmx86.c index 60917fdeb..8ce576ac8 100644 --- a/py/asmx86.c +++ b/py/asmx86.c @@ -514,7 +514,7 @@ void asm_x86_call_ind(asm_x86_t *as, size_t fun_id, mp_uint_t n_args, int temp_r } // Load the pointer to the function and make the call - asm_x86_mov_mem32_to_r32(as, ASM_X86_REG_EBP, fun_id * WORD_SIZE, temp_r32); + asm_x86_mov_mem32_to_r32(as, ASM_X86_REG_FUN_TABLE, fun_id * WORD_SIZE, temp_r32); asm_x86_write_byte_2(as, OPCODE_CALL_RM32, MODRM_R32(2) | MODRM_RM_REG | MODRM_RM_R32(temp_r32)); // the caller must clean up the stack diff --git a/py/asmx86.h b/py/asmx86.h index a5535b548..1e3d3170a 100644 --- a/py/asmx86.h +++ b/py/asmx86.h @@ -114,6 +114,9 @@ void asm_x86_mov_local_addr_to_r32(asm_x86_t* as, int local_num, int dest_r32); void asm_x86_mov_reg_pcrel(asm_x86_t *as, int dest_r64, mp_uint_t label); void asm_x86_call_ind(asm_x86_t* as, size_t fun_id, mp_uint_t n_args, int temp_r32); +// Holds a pointer to mp_fun_table +#define ASM_X86_REG_FUN_TABLE ASM_X86_REG_EBP + #if GENERIC_ASM_API // The following macros provide a (mostly) arch-independent API to @@ -139,6 +142,9 @@ void asm_x86_call_ind(asm_x86_t* as, size_t fun_id, mp_uint_t n_args, int temp_r #define REG_LOCAL_3 ASM_X86_REG_EDI #define REG_LOCAL_NUM (3) +// Holds a pointer to mp_fun_table +#define REG_FUN_TABLE ASM_X86_REG_FUN_TABLE + #define ASM_T asm_x86_t #define ASM_END_PASS asm_x86_end_pass #define ASM_ENTRY asm_x86_entry diff --git a/py/asmxtensa.c b/py/asmxtensa.c index 6a3a874f1..8da56ffe3 100644 --- a/py/asmxtensa.c +++ b/py/asmxtensa.c @@ -213,9 +213,9 @@ void asm_xtensa_mov_reg_pcrel(asm_xtensa_t *as, uint reg_dest, uint label) { void asm_xtensa_call_ind(asm_xtensa_t *as, uint idx) { if (idx < 16) { - asm_xtensa_op_l32i_n(as, ASM_XTENSA_REG_A0, ASM_XTENSA_REG_A15, idx); + asm_xtensa_op_l32i_n(as, ASM_XTENSA_REG_A0, ASM_XTENSA_REG_FUN_TABLE, idx); } else { - asm_xtensa_op_l32i(as, ASM_XTENSA_REG_A0, ASM_XTENSA_REG_A15, idx); + asm_xtensa_op_l32i(as, ASM_XTENSA_REG_A0, ASM_XTENSA_REG_FUN_TABLE, idx); } asm_xtensa_op_callx0(as, ASM_XTENSA_REG_A0); } diff --git a/py/asmxtensa.h b/py/asmxtensa.h index 07c3aa819..c348b854b 100644 --- a/py/asmxtensa.h +++ b/py/asmxtensa.h @@ -245,6 +245,9 @@ void asm_xtensa_mov_reg_local_addr(asm_xtensa_t *as, uint reg_dest, int local_nu void asm_xtensa_mov_reg_pcrel(asm_xtensa_t *as, uint reg_dest, uint label); void asm_xtensa_call_ind(asm_xtensa_t *as, uint idx); +// Holds a pointer to mp_fun_table +#define ASM_XTENSA_REG_FUN_TABLE ASM_XTENSA_REG_A15 + #if GENERIC_ASM_API // The following macros provide a (mostly) arch-independent API to @@ -268,6 +271,8 @@ void asm_xtensa_call_ind(asm_xtensa_t *as, uint idx); #define REG_LOCAL_3 ASM_XTENSA_REG_A14 #define REG_LOCAL_NUM (3) +#define REG_FUN_TABLE ASM_XTENSA_REG_FUN_TABLE + #define ASM_T asm_xtensa_t #define ASM_END_PASS asm_xtensa_end_pass #define ASM_ENTRY asm_xtensa_entry diff --git a/py/emitnative.c b/py/emitnative.c index 4d6c3445f..26af7f947 100644 --- a/py/emitnative.c +++ b/py/emitnative.c @@ -287,7 +287,7 @@ STATIC void emit_native_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scop emit->pass = pass; emit->do_viper_types = scope->emit_options == MP_EMIT_OPT_VIPER; emit->stack_size = 0; - emit->const_table_cur_obj = 0; + emit->const_table_cur_obj = 1; // first entry is for mp_fun_table emit->const_table_cur_raw_code = 0; emit->last_emit_was_return_value = false; emit->scope = scope; @@ -372,24 +372,16 @@ STATIC void emit_native_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scop // Entry to function ASM_ENTRY(emit->as, emit->stack_start + emit->n_state - num_locals_in_regs); - // TODO don't load r7 if we don't need it - #if N_THUMB - asm_thumb_mov_reg_i32(emit->as, ASM_THUMB_REG_R7, (mp_uint_t)mp_fun_table); - #elif N_ARM - asm_arm_mov_reg_i32(emit->as, ASM_ARM_REG_R7, (mp_uint_t)mp_fun_table); - #elif N_XTENSA - ASM_MOV_REG_IMM(emit->as, ASM_XTENSA_REG_A15, (uint32_t)mp_fun_table); - #elif N_X86 - asm_x86_mov_i32_to_r32(emit->as, (intptr_t)mp_fun_table, ASM_X86_REG_EBP); - #elif N_X64 - asm_x64_mov_i64_to_r64_optimised(emit->as, (intptr_t)mp_fun_table, ASM_X64_REG_RBP); + #if N_X86 + asm_x86_mov_arg_to_r32(emit->as, 0, REG_ARG_1); #endif + // Load REG_FUN_TABLE with a pointer to mp_fun_table, found in the const_table + ASM_LOAD_REG_REG_OFFSET(emit->as, REG_LOCAL_3, REG_ARG_1, offsetof(mp_obj_fun_bc_t, const_table) / sizeof(uintptr_t)); + ASM_LOAD_REG_REG_OFFSET(emit->as, REG_FUN_TABLE, REG_LOCAL_3, 0); + // Store function object (passed as first arg) to stack if needed if (NEED_FUN_OBJ(emit)) { - #if N_X86 - asm_x86_mov_arg_to_r32(emit->as, 0, REG_ARG_1); - #endif ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_FUN_OBJ(emit), REG_ARG_1); } @@ -458,28 +450,18 @@ STATIC void emit_native_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scop asm_x86_mov_arg_to_r32(emit->as, 1, REG_ARG_2); #endif ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_EXC_VAL(emit), REG_ARG_2); + + // Load REG_FUN_TABLE with a pointer to mp_fun_table, found in the const_table + ASM_LOAD_REG_REG_OFFSET(emit->as, REG_TEMP0, REG_GENERATOR_STATE, LOCAL_IDX_FUN_OBJ(emit)); + ASM_LOAD_REG_REG_OFFSET(emit->as, REG_TEMP0, REG_TEMP0, offsetof(mp_obj_fun_bc_t, const_table) / sizeof(uintptr_t)); + ASM_LOAD_REG_REG_OFFSET(emit->as, REG_FUN_TABLE, REG_TEMP0, emit->scope->num_pos_args + emit->scope->num_kwonly_args); } else { // The locals and stack start after the code_state structure emit->stack_start = emit->code_state_start + sizeof(mp_code_state_t) / sizeof(mp_uint_t); // Allocate space on C-stack for code_state structure, which includes state ASM_ENTRY(emit->as, emit->stack_start + emit->n_state); - } - // TODO don't load r7 if we don't need it - #if N_THUMB - asm_thumb_mov_reg_i32(emit->as, ASM_THUMB_REG_R7, (mp_uint_t)mp_fun_table); - #elif N_ARM - asm_arm_mov_reg_i32(emit->as, ASM_ARM_REG_R7, (mp_uint_t)mp_fun_table); - #elif N_XTENSA - ASM_MOV_REG_IMM(emit->as, ASM_XTENSA_REG_A15, (uint32_t)mp_fun_table); - #elif N_X86 - asm_x86_mov_i32_to_r32(emit->as, (intptr_t)mp_fun_table, ASM_X86_REG_EBP); - #elif N_X64 - asm_x64_mov_i64_to_r64_optimised(emit->as, (intptr_t)mp_fun_table, ASM_X64_REG_RBP); - #endif - - if (!(emit->scope->scope_flags & MP_SCOPE_FLAG_GENERATOR)) { // Prepare incoming arguments for call to mp_setup_code_state #if N_X86 @@ -489,6 +471,10 @@ STATIC void emit_native_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scop asm_x86_mov_arg_to_r32(emit->as, 3, REG_ARG_4); #endif + // Load REG_FUN_TABLE with a pointer to mp_fun_table, found in the const_table + ASM_LOAD_REG_REG_OFFSET(emit->as, REG_LOCAL_3, REG_ARG_1, offsetof(mp_obj_fun_bc_t, const_table) / sizeof(uintptr_t)); + ASM_LOAD_REG_REG_OFFSET(emit->as, REG_FUN_TABLE, REG_LOCAL_3, emit->scope->num_pos_args + emit->scope->num_kwonly_args); + // Set code_state.fun_bc ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_FUN_OBJ(emit), REG_ARG_1); @@ -591,11 +577,15 @@ STATIC void emit_native_end_pass(emit_t *emit) { emit->const_table_num_obj = emit->const_table_cur_obj; if (emit->pass == MP_PASS_CODE_SIZE) { size_t const_table_alloc = emit->const_table_num_obj + emit->const_table_cur_raw_code; + size_t nqstr = 0; if (!emit->do_viper_types) { // Add room for qstr names of arguments - const_table_alloc += emit->scope->num_pos_args + emit->scope->num_kwonly_args; + nqstr = emit->scope->num_pos_args + emit->scope->num_kwonly_args; + const_table_alloc += nqstr; } emit->const_table = m_new(mp_uint_t, const_table_alloc); + // Store mp_fun_table pointer just after qstrs + emit->const_table[nqstr] = (mp_uint_t)(uintptr_t)mp_fun_table; } if (emit->pass == MP_PASS_EMIT) {