py/asmthumb: Add support for ARMv6M in native emitter.

Adds a new compile-time option MICROPY_EMIT_THUMB_ARMV7M which is enabled
by default (to get existing behaviour) and which should be disabled (set to
0) when building native emitter support (@micropython.native) on ARMv6M
targets.
This commit is contained in:
graham sanderson 2020-12-12 13:04:10 -06:00 committed by Damien George
parent fe16e785fe
commit 40d2010882
4 changed files with 225 additions and 8 deletions

View File

@ -47,6 +47,7 @@
#define SIGNED_FIT12(x) (((x) & 0xfffff800) == 0) || (((x) & 0xfffff800) == 0xfffff800)
#define SIGNED_FIT23(x) (((x) & 0xffc00000) == 0) || (((x) & 0xffc00000) == 0xffc00000)
#if MICROPY_EMIT_THUMB_ARMV7M
// Note: these actually take an imm12 but the high-bit is not encoded here
#define OP_ADD_W_RRI_HI(reg_src) (0xf200 | (reg_src))
#define OP_ADD_W_RRI_LO(reg_dest, imm11) ((imm11 << 4 & 0x7000) | reg_dest << 8 | (imm11 & 0xff))
@ -55,6 +56,7 @@
#define OP_LDR_W_HI(reg_base) (0xf8d0 | (reg_base))
#define OP_LDR_W_LO(reg_dest, imm12) ((reg_dest) << 12 | (imm12))
#endif
static inline byte *asm_thumb_get_cur_to_write_bytes(asm_thumb_t *as, int n) {
return mp_asm_base_get_cur_to_write_bytes(&as->base, n);
@ -122,7 +124,7 @@ void asm_thumb_entry(asm_thumb_t *as, int num_locals) {
// If this Thumb machine code is run from ARM state then add a prelude
// to switch to Thumb state for the duration of the function.
#if MICROPY_DYNAMIC_COMPILER || MICROPY_EMIT_ARM || (defined(__arm__) && !defined(__thumb2__))
#if MICROPY_DYNAMIC_COMPILER || MICROPY_EMIT_ARM || (defined(__arm__) && !defined(__thumb2__) && !defined(__thumb__))
#if MICROPY_DYNAMIC_COMPILER
if (mp_dynamic_compiler.native_arch == MP_NATIVE_ARCH_ARMV6)
#endif
@ -171,11 +173,21 @@ void asm_thumb_entry(asm_thumb_t *as, int num_locals) {
}
asm_thumb_op16(as, OP_PUSH_RLIST_LR(reglist));
if (stack_adjust > 0) {
#if MICROPY_EMIT_THUMB_ARMV7M
if (UNSIGNED_FIT7(stack_adjust)) {
asm_thumb_op16(as, OP_SUB_SP(stack_adjust));
} else {
asm_thumb_op32(as, OP_SUB_W_RRI_HI(ASM_THUMB_REG_SP), OP_SUB_W_RRI_LO(ASM_THUMB_REG_SP, stack_adjust * 4));
}
#else
int adj = stack_adjust;
// we don't expect the stack_adjust to be massive
while (!UNSIGNED_FIT7(adj)) {
asm_thumb_op16(as, OP_SUB_SP(127));
adj -= 127;
}
asm_thumb_op16(as, OP_SUB_SP(adj));
#endif
}
as->push_reglist = reglist;
as->stack_adjust = stack_adjust;
@ -183,11 +195,21 @@ void asm_thumb_entry(asm_thumb_t *as, int num_locals) {
void asm_thumb_exit(asm_thumb_t *as) {
if (as->stack_adjust > 0) {
#if MICROPY_EMIT_THUMB_ARMV7M
if (UNSIGNED_FIT7(as->stack_adjust)) {
asm_thumb_op16(as, OP_ADD_SP(as->stack_adjust));
} else {
asm_thumb_op32(as, OP_ADD_W_RRI_HI(ASM_THUMB_REG_SP), OP_ADD_W_RRI_LO(ASM_THUMB_REG_SP, as->stack_adjust * 4));
}
#else
int adj = as->stack_adjust;
// we don't expect the stack_adjust to be massive
while (!UNSIGNED_FIT7(adj)) {
asm_thumb_op16(as, OP_ADD_SP(127));
adj -= 127;
}
asm_thumb_op16(as, OP_ADD_SP(adj));
#endif
}
asm_thumb_op16(as, OP_POP_RLIST_PC(as->push_reglist));
}
@ -241,6 +263,8 @@ void asm_thumb_mov_reg_reg(asm_thumb_t *as, uint reg_dest, uint reg_src) {
asm_thumb_op16(as, 0x4600 | op_lo);
}
#if MICROPY_EMIT_THUMB_ARMV7M
// if loading lo half with movw, the i16 value will be zero extended into the r32 register!
size_t asm_thumb_mov_reg_i16(asm_thumb_t *as, uint mov_op, uint reg_dest, int i16_src) {
assert(reg_dest < ASM_THUMB_REG_R15);
@ -250,6 +274,16 @@ size_t asm_thumb_mov_reg_i16(asm_thumb_t *as, uint mov_op, uint reg_dest, int i1
return loc;
}
#else
void asm_thumb_mov_rlo_i16(asm_thumb_t *as, uint rlo_dest, int i16_src) {
asm_thumb_mov_rlo_i8(as, rlo_dest, (i16_src >> 8) & 0xff);
asm_thumb_lsl_rlo_rlo_i5(as, rlo_dest, rlo_dest, 8);
asm_thumb_add_rlo_i8(as, rlo_dest, i16_src & 0xff);
}
#endif
#define OP_B_N(byte_offset) (0xe000 | (((byte_offset) >> 1) & 0x07ff))
bool asm_thumb_b_n_label(asm_thumb_t *as, uint label) {
@ -274,8 +308,13 @@ bool asm_thumb_bcc_nw_label(asm_thumb_t *as, int cond, uint label, bool wide) {
asm_thumb_op16(as, OP_BCC_N(cond, rel));
return as->base.pass != MP_ASM_PASS_EMIT || SIGNED_FIT9(rel);
} else {
#if MICROPY_EMIT_THUMB_ARMV7M
asm_thumb_op32(as, OP_BCC_W_HI(cond, rel), OP_BCC_W_LO(rel));
return true;
#else
// this method should not be called for ARMV6M
return false;
#endif
}
}
@ -296,8 +335,30 @@ size_t asm_thumb_mov_reg_i32(asm_thumb_t *as, uint reg_dest, mp_uint_t i32) {
size_t loc = mp_asm_base_get_code_pos(&as->base);
#if MICROPY_EMIT_THUMB_ARMV7M
asm_thumb_mov_reg_i16(as, ASM_THUMB_OP_MOVW, reg_dest, i32);
asm_thumb_mov_reg_i16(as, ASM_THUMB_OP_MOVT, reg_dest, i32 >> 16);
#else
// should only be called with lo reg for ARMV6M
assert(reg_dest < ASM_THUMB_REG_R8);
// sanity check that generated code is aligned
assert(!as->base.code_base || !(3u & (uintptr_t)as->base.code_base));
// basically:
// (nop)
// ldr reg_dest, _data
// b 1f
// _data: .word i32
// 1:
if (as->base.code_offset & 2u) {
asm_thumb_op16(as, ASM_THUMB_OP_NOP);
}
asm_thumb_ldr_rlo_pcrel_i8(as, reg_dest, 0);
asm_thumb_op16(as, OP_B_N(2));
asm_thumb_op16(as, i32 & 0xffff);
asm_thumb_op16(as, i32 >> 16);
#endif
return loc;
}
@ -305,27 +366,68 @@ size_t asm_thumb_mov_reg_i32(asm_thumb_t *as, uint reg_dest, mp_uint_t i32) {
void asm_thumb_mov_reg_i32_optimised(asm_thumb_t *as, uint reg_dest, int i32) {
if (reg_dest < 8 && UNSIGNED_FIT8(i32)) {
asm_thumb_mov_rlo_i8(as, reg_dest, i32);
} else if (UNSIGNED_FIT16(i32)) {
asm_thumb_mov_reg_i16(as, ASM_THUMB_OP_MOVW, reg_dest, i32);
} else {
asm_thumb_mov_reg_i32(as, reg_dest, i32);
#if MICROPY_EMIT_THUMB_ARMV7M
if (UNSIGNED_FIT16(i32)) {
asm_thumb_mov_reg_i16(as, ASM_THUMB_OP_MOVW, reg_dest, i32);
} else {
asm_thumb_mov_reg_i32(as, reg_dest, i32);
}
#else
uint rlo_dest = reg_dest;
assert(rlo_dest < ASM_THUMB_REG_R8); // should never be called for ARMV6M
bool negate = i32 < 0 && ((i32 + i32) & 0xffffffffu); // don't negate 0x80000000
if (negate) {
i32 = -i32;
}
uint clz = __builtin_clz(i32);
uint ctz = i32 ? __builtin_ctz(i32) : 0;
assert(clz + ctz <= 32);
if (clz + ctz >= 24) {
asm_thumb_mov_rlo_i8(as, rlo_dest, (i32 >> ctz) & 0xff);
asm_thumb_lsl_rlo_rlo_i5(as, rlo_dest, rlo_dest, ctz);
} else if (UNSIGNED_FIT16(i32)) {
asm_thumb_mov_rlo_i16(as, rlo_dest, i32);
} else {
if (negate) {
// no point in negating if we're storing in 32 bit anyway
negate = false;
i32 = -i32;
}
asm_thumb_mov_reg_i32(as, rlo_dest, i32);
}
if (negate) {
asm_thumb_neg_rlo_rlo(as, rlo_dest, rlo_dest);
}
#endif
}
}
#define OP_STR_TO_SP_OFFSET(rlo_dest, word_offset) (0x9000 | ((rlo_dest) << 8) | ((word_offset) & 0x00ff))
#define OP_LDR_FROM_SP_OFFSET(rlo_dest, word_offset) (0x9800 | ((rlo_dest) << 8) | ((word_offset) & 0x00ff))
static void asm_thumb_mov_local_check(asm_thumb_t *as, int word_offset) {
if (as->base.pass >= MP_ASM_PASS_EMIT) {
assert(word_offset >= 0);
if (!UNSIGNED_FIT8(word_offset)) {
mp_raise_NotImplementedError(MP_ERROR_TEXT("too many locals for native method"));
}
}
}
void asm_thumb_mov_local_reg(asm_thumb_t *as, int local_num, uint rlo_src) {
assert(rlo_src < ASM_THUMB_REG_R8);
int word_offset = local_num;
assert(as->base.pass < MP_ASM_PASS_EMIT || word_offset >= 0);
asm_thumb_mov_local_check(as, word_offset);
asm_thumb_op16(as, OP_STR_TO_SP_OFFSET(rlo_src, word_offset));
}
void asm_thumb_mov_reg_local(asm_thumb_t *as, uint rlo_dest, int local_num) {
assert(rlo_dest < ASM_THUMB_REG_R8);
int word_offset = local_num;
assert(as->base.pass < MP_ASM_PASS_EMIT || word_offset >= 0);
asm_thumb_mov_local_check(as, word_offset);
asm_thumb_op16(as, OP_LDR_FROM_SP_OFFSET(rlo_dest, word_offset));
}
@ -341,21 +443,63 @@ void asm_thumb_mov_reg_local_addr(asm_thumb_t *as, uint rlo_dest, int local_num)
void asm_thumb_mov_reg_pcrel(asm_thumb_t *as, uint rlo_dest, uint label) {
mp_uint_t dest = get_label_dest(as, label);
mp_int_t rel = dest - as->base.code_offset;
rel -= 4 + 4; // adjust for mov_reg_i16 and then PC+4 prefetch of add_reg_reg
rel |= 1; // to stay in Thumb state when jumping to this address
#if MICROPY_EMIT_THUMB_ARMV7M
rel -= 4 + 4; // adjust for mov_reg_i16 and then PC+4 prefetch of add_reg_reg
asm_thumb_mov_reg_i16(as, ASM_THUMB_OP_MOVW, rlo_dest, rel); // 4 bytes
#else
rel -= 8 + 4; // adjust for four instructions and then PC+4 prefetch of add_reg_reg
// 6 bytes
asm_thumb_mov_rlo_i16(as, rlo_dest, rel);
// 2 bytes - not always needed, but we want to keep the size the same
asm_thumb_sxth_rlo_rlo(as, rlo_dest, rlo_dest);
#endif
asm_thumb_add_reg_reg(as, rlo_dest, ASM_THUMB_REG_R15); // 2 bytes
}
#if MICROPY_EMIT_THUMB_ARMV7M
static inline void asm_thumb_ldr_reg_reg_i12(asm_thumb_t *as, uint reg_dest, uint reg_base, uint word_offset) {
asm_thumb_op32(as, OP_LDR_W_HI(reg_base), OP_LDR_W_LO(reg_dest, word_offset * 4));
}
#endif
void asm_thumb_ldr_reg_reg_i12_optimised(asm_thumb_t *as, uint reg_dest, uint reg_base, uint word_offset) {
if (reg_dest < ASM_THUMB_REG_R8 && reg_base < ASM_THUMB_REG_R8 && UNSIGNED_FIT5(word_offset)) {
asm_thumb_ldr_rlo_rlo_i5(as, reg_dest, reg_base, word_offset);
} else {
#if MICROPY_EMIT_THUMB_ARMV7M
asm_thumb_ldr_reg_reg_i12(as, reg_dest, reg_base, word_offset);
#else
word_offset -= 31;
if (reg_dest < ASM_THUMB_REG_R8 && reg_base < ASM_THUMB_REG_R8) {
if (UNSIGNED_FIT8(word_offset) && (word_offset < 64 || reg_dest != reg_base)) {
if (word_offset < 64) {
if (reg_dest != reg_base) {
asm_thumb_mov_reg_reg(as, reg_dest, reg_base);
}
asm_thumb_add_rlo_i8(as, reg_dest, word_offset * 4);
} else {
asm_thumb_mov_rlo_i8(as, reg_dest, word_offset);
asm_thumb_lsl_rlo_rlo_i5(as, reg_dest, reg_dest, 2);
asm_thumb_add_rlo_rlo_rlo(as, reg_dest, reg_dest, reg_base);
}
} else {
if (reg_dest != reg_base) {
asm_thumb_mov_rlo_i16(as, reg_dest, word_offset * 4);
asm_thumb_add_rlo_rlo_rlo(as, reg_dest, reg_dest, reg_dest);
} else {
uint reg_other = reg_dest ^ 7;
asm_thumb_op16(as, OP_PUSH_RLIST((1 << reg_other)));
asm_thumb_mov_rlo_i16(as, reg_other, word_offset * 4);
asm_thumb_add_rlo_rlo_rlo(as, reg_dest, reg_dest, reg_other);
asm_thumb_op16(as, OP_POP_RLIST((1 << reg_other)));
}
}
} else {
assert(0); // should never be called for ARMV6M
}
asm_thumb_ldr_rlo_rlo_i5(as, reg_dest, reg_dest, 31);
#endif
}
}
@ -378,7 +522,20 @@ void asm_thumb_b_label(asm_thumb_t *as, uint label) {
} else {
// is a forwards jump, so need to assume it's large
large_jump:
#if MICROPY_EMIT_THUMB_ARMV7M
asm_thumb_op32(as, OP_BW_HI(rel), OP_BW_LO(rel));
#else
if (SIGNED_FIT12(rel)) {
// this code path has to be the same number of instructions irrespective of rel
asm_thumb_op16(as, OP_B_N(rel));
} else {
asm_thumb_op16(as, ASM_THUMB_OP_NOP);
if (dest != (mp_uint_t)-1) {
// we have an actual branch > 12 bits; this is not handled yet
mp_raise_NotImplementedError(MP_ERROR_TEXT("native method too big"));
}
}
#endif
}
}
@ -397,7 +554,13 @@ void asm_thumb_bcc_label(asm_thumb_t *as, int cond, uint label) {
} else {
// is a forwards jump, so need to assume it's large
large_jump:
#if MICROPY_EMIT_THUMB_ARMV7M
asm_thumb_op32(as, OP_BCC_W_HI(cond, rel), OP_BCC_W_LO(rel));
#else
// reverse the sense of the branch to jump over a longer branch
asm_thumb_op16(as, OP_BCC_N(cond ^ 1, 0));
asm_thumb_b_label(as, label);
#endif
}
}

View File

@ -157,6 +157,7 @@ static inline void asm_thumb_sub_rlo_rlo_i3(asm_thumb_t *as, uint rlo_dest, uint
#define ASM_THUMB_FORMAT_3_CMP (0x2800)
#define ASM_THUMB_FORMAT_3_ADD (0x3000)
#define ASM_THUMB_FORMAT_3_SUB (0x3800)
#define ASM_THUMB_FORMAT_3_LDR (0x4800)
#define ASM_THUMB_FORMAT_3_ENCODE(op, rlo, i8) ((op) | ((rlo) << 8) | (i8))
@ -177,6 +178,9 @@ static inline void asm_thumb_add_rlo_i8(asm_thumb_t *as, uint rlo, int i8) {
static inline void asm_thumb_sub_rlo_i8(asm_thumb_t *as, uint rlo, int i8) {
asm_thumb_format_3(as, ASM_THUMB_FORMAT_3_SUB, rlo, i8);
}
static inline void asm_thumb_ldr_rlo_pcrel_i8(asm_thumb_t *as, uint rlo, uint i8) {
asm_thumb_format_3(as, ASM_THUMB_FORMAT_3_LDR, rlo, i8);
}
// FORMAT 4: ALU operations
@ -202,6 +206,12 @@ void asm_thumb_format_4(asm_thumb_t *as, uint op, uint rlo_dest, uint rlo_src);
static inline void asm_thumb_cmp_rlo_rlo(asm_thumb_t *as, uint rlo_dest, uint rlo_src) {
asm_thumb_format_4(as, ASM_THUMB_FORMAT_4_CMP, rlo_dest, rlo_src);
}
static inline void asm_thumb_mvn_rlo_rlo(asm_thumb_t *as, uint rlo_dest, uint rlo_src) {
asm_thumb_format_4(as, ASM_THUMB_FORMAT_4_MVN, rlo_dest, rlo_src);
}
static inline void asm_thumb_neg_rlo_rlo(asm_thumb_t *as, uint rlo_dest, uint rlo_src) {
asm_thumb_format_4(as, ASM_THUMB_FORMAT_4_NEG, rlo_dest, rlo_src);
}
// FORMAT 5: hi register operations (add, cmp, mov, bx)
// For add/cmp/mov, at least one of the args must be a high register
@ -263,6 +273,32 @@ static inline void asm_thumb_ldrb_rlo_rlo_i5(asm_thumb_t *as, uint rlo_dest, uin
static inline void asm_thumb_ldrh_rlo_rlo_i5(asm_thumb_t *as, uint rlo_dest, uint rlo_base, uint byte_offset) {
asm_thumb_format_9_10(as, ASM_THUMB_FORMAT_10_LDRH, rlo_dest, rlo_base, byte_offset);
}
static inline void asm_thumb_lsl_rlo_rlo_i5(asm_thumb_t *as, uint rlo_dest, uint rlo_src, uint shift) {
asm_thumb_format_1(as, ASM_THUMB_FORMAT_1_LSL, rlo_dest, rlo_src, shift);
}
static inline void asm_thumb_asr_rlo_rlo_i5(asm_thumb_t *as, uint rlo_dest, uint rlo_src, uint shift) {
asm_thumb_format_1(as, ASM_THUMB_FORMAT_1_ASR, rlo_dest, rlo_src, shift);
}
// FORMAT 11: sign/zero extend
#define ASM_THUMB_FORMAT_11_ENCODE(op, rlo_dest, rlo_src) \
((op) | ((rlo_src) << 3) | (rlo_dest))
#define ASM_THUMB_FORMAT_11_SXTH (0xb200)
#define ASM_THUMB_FORMAT_11_SXTB (0xb240)
#define ASM_THUMB_FORMAT_11_UXTH (0xb280)
#define ASM_THUMB_FORMAT_11_UXTB (0xb2c0)
static inline void asm_thumb_format_11(asm_thumb_t *as, uint op, uint rlo_dest, uint rlo_src) {
assert(rlo_dest < ASM_THUMB_REG_R8);
assert(rlo_src < ASM_THUMB_REG_R8);
asm_thumb_op16(as, ASM_THUMB_FORMAT_11_ENCODE(op, rlo_dest, rlo_src));
}
static inline void asm_thumb_sxth_rlo_rlo(asm_thumb_t *as, uint rlo_dest, uint rlo_src) {
asm_thumb_format_11(as, ASM_THUMB_FORMAT_11_SXTH, rlo_dest, rlo_src);
}
// TODO convert these to above format style
@ -270,7 +306,12 @@ static inline void asm_thumb_ldrh_rlo_rlo_i5(asm_thumb_t *as, uint rlo_dest, uin
#define ASM_THUMB_OP_MOVT (0xf2c0)
void asm_thumb_mov_reg_reg(asm_thumb_t *as, uint reg_dest, uint reg_src);
#if MICROPY_EMIT_THUMB_ARMV7M
size_t asm_thumb_mov_reg_i16(asm_thumb_t *as, uint mov_op, uint reg_dest, int i16_src);
#else
void asm_thumb_mov_rlo_i16(asm_thumb_t *as, uint rlo_dest, int i16_src);
#endif
// these return true if the destination is in range, false otherwise
bool asm_thumb_b_n_label(asm_thumb_t *as, uint label);
@ -344,7 +385,11 @@ void asm_thumb_bl_ind(asm_thumb_t *as, uint fun_id, uint reg_temp); // convenien
#define ASM_MOV_LOCAL_REG(as, local_num, reg) asm_thumb_mov_local_reg((as), (local_num), (reg))
#define ASM_MOV_REG_IMM(as, reg_dest, imm) asm_thumb_mov_reg_i32_optimised((as), (reg_dest), (imm))
#if MICROPY_EMIT_THUMB_ARMV7M
#define ASM_MOV_REG_IMM_FIX_U16(as, reg_dest, imm) asm_thumb_mov_reg_i16((as), ASM_THUMB_OP_MOVW, (reg_dest), (imm))
#else
#define ASM_MOV_REG_IMM_FIX_U16(as, reg_dest, imm) asm_thumb_mov_rlo_i16((as), (reg_dest), (imm))
#endif
#define ASM_MOV_REG_IMM_FIX_WORD(as, reg_dest, imm) asm_thumb_mov_reg_i32((as), (reg_dest), (imm))
#define ASM_MOV_REG_LOCAL(as, reg_dest, local_num) asm_thumb_mov_reg_local((as), (reg_dest), (local_num))
#define ASM_MOV_REG_REG(as, reg_dest, reg_src) asm_thumb_mov_reg_reg((as), (reg_dest), (reg_src))

View File

@ -573,7 +573,11 @@ STATIC void emit_inline_thumb_op(emit_inline_asm_t *emit, qstr op, mp_uint_t n_a
goto unknown_op;
}
int label_num = get_arg_label(emit, op_str, pn_args[0]);
if (!asm_thumb_bcc_nw_label(&emit->as, cc, label_num, op_len == 5 && op_str[4] == 'w')) {
bool wide = op_len == 5 && op_str[4] == 'w';
if (wide && !ARMV7M) {
goto unknown_op;
}
if (!asm_thumb_bcc_nw_label(&emit->as, cc, label_num, wide)) {
goto branch_not_in_range;
}
} else if (ARMV7M && op_str[0] == 'i' && op_str[1] == 't') {

View File

@ -304,6 +304,11 @@
#define MICROPY_EMIT_THUMB (0)
#endif
// Whether to emit ARMv7-M instruction support in thumb native code
#ifndef MICROPY_EMIT_THUMB_ARMV7M
#define MICROPY_EMIT_THUMB_ARMV7M (1)
#endif
// Whether to enable the thumb inline assembler
#ifndef MICROPY_EMIT_INLINE_THUMB
#define MICROPY_EMIT_INLINE_THUMB (0)