py/emit: Suppress unreachable bytecode/native code that follows jump.

This new logic tracks when an unconditional jump/raise occurs in the
emitted code stream (bytecode or native machine code) and suppresses all
subsequent code, until a label is assigned.  This eliminates a lot of
cases of dead code, with relatively simple logic.

This commit combined with the previous one (that removed the existing
dead-code finding logic) has the following code size change:

       bare-arm:   -16 -0.028%
    minimal x86:   -60 -0.036%
       unix x64:  -368 -0.070%
    unix nanbox:   -80 -0.017%
          stm32:  -204 -0.052% PYBV10
         cc3200:    +0 +0.000%
        esp8266:  -232 -0.033% GENERIC
          esp32:  -224 -0.015% GENERIC[incl -40(data)]
         mimxrt:  -192 -0.054% TEENSY40
     renesas-ra:  -200 -0.032% RA6M2_EK
            nrf:   +28 +0.015% pca10040
            rp2:  -256 -0.050% PICO
           samd:   -12 -0.009% ADAFRUIT_ITSYBITSY_M4_EXPRESS

Signed-off-by: Damien George <damien@micropython.org>
This commit is contained in:
Damien George 2022-06-17 23:14:32 +10:00
parent e85a096302
commit a506335524
8 changed files with 268 additions and 66 deletions

View File

@ -55,15 +55,20 @@ void mp_asm_base_start_pass(mp_asm_base_t *as, int pass) {
assert(as->code_base != NULL);
}
as->pass = pass;
as->suppress = false;
as->code_offset = 0;
}
// all functions must go through this one to emit bytes
// if as->pass < MP_ASM_PASS_EMIT, then this function just counts the number
// of bytes needed and returns NULL, and callers should not store any data
// It also returns NULL if generated code should be suppressed at this point.
uint8_t *mp_asm_base_get_cur_to_write_bytes(void *as_in, size_t num_bytes_to_write) {
mp_asm_base_t *as = as_in;
uint8_t *c = NULL;
if (as->suppress) {
return c;
}
if (as->pass == MP_ASM_PASS_EMIT) {
assert(as->code_offset + num_bytes_to_write <= as->code_size);
c = as->code_base + as->code_offset;
@ -74,6 +79,11 @@ uint8_t *mp_asm_base_get_cur_to_write_bytes(void *as_in, size_t num_bytes_to_wri
void mp_asm_base_label_assign(mp_asm_base_t *as, size_t label) {
assert(label < as->max_num_labels);
// Assiging a label ends any dead-code region, and all following machine
// code should be emitted (until another mp_asm_base_suppress_code() call).
as->suppress = false;
if (as->pass < MP_ASM_PASS_EMIT) {
// assign label offset
assert(as->label_offsets[label] == (size_t)-1);

View File

@ -33,7 +33,12 @@
#define MP_ASM_PASS_EMIT (2)
typedef struct _mp_asm_base_t {
int pass;
uint8_t pass;
// Set to true using mp_asm_base_suppress_code() if the code generator
// should suppress emitted code due to it being dead code.
bool suppress;
size_t code_offset;
size_t code_size;
uint8_t *code_base;
@ -50,6 +55,10 @@ void mp_asm_base_label_assign(mp_asm_base_t *as, size_t label);
void mp_asm_base_align(mp_asm_base_t *as, unsigned int align);
void mp_asm_base_data(mp_asm_base_t *as, unsigned int bytesize, uintptr_t val);
static inline void mp_asm_base_suppress_code(mp_asm_base_t *as) {
as->suppress = true;
}
static inline size_t mp_asm_base_get_code_pos(mp_asm_base_t *as) {
return as->code_offset;
}

View File

@ -48,6 +48,11 @@ struct _emit_t {
pass_kind_t pass : 8;
// Set to true if the code generator should suppress emitted code due to it
// being dead code. This can happen when opcodes immediately follow an
// unconditional flow control (eg jump or raise).
bool suppress;
int stack_size;
mp_emit_common_t *emit_common;
@ -140,6 +145,9 @@ STATIC void emit_write_code_info_bytes_lines(emit_t *emit, mp_uint_t bytes_to_sk
// all functions must go through this one to emit byte code
STATIC uint8_t *emit_get_cur_to_write_bytecode(void *emit_in, size_t num_bytes_to_write) {
emit_t *emit = emit_in;
if (emit->suppress) {
return emit->dummy_data;
}
if (emit->pass < MP_PASS_EMIT) {
emit->bytecode_offset += num_bytes_to_write;
return emit->dummy_data;
@ -222,6 +230,10 @@ STATIC void emit_write_bytecode_byte_child(emit_t *emit, int stack_adj, byte b,
STATIC void emit_write_bytecode_byte_label(emit_t *emit, int stack_adj, byte b1, mp_uint_t label) {
mp_emit_bc_adjust_stack_size(emit, stack_adj);
if (emit->suppress) {
return;
}
// Determine if the jump offset is signed or unsigned, based on the opcode.
const bool is_signed = b1 <= MP_BC_POP_JUMP_IF_FALSE;
@ -271,6 +283,7 @@ STATIC void emit_write_bytecode_byte_label(emit_t *emit, int stack_adj, byte b1,
void mp_emit_bc_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scope) {
emit->pass = pass;
emit->stack_size = 0;
emit->suppress = false;
emit->scope = scope;
emit->last_source_line_offset = 0;
emit->last_source_line = 1;
@ -426,6 +439,10 @@ void mp_emit_bc_set_source_line(emit_t *emit, mp_uint_t source_line) {
}
void mp_emit_bc_label_assign(emit_t *emit, mp_uint_t l) {
// Assiging a label ends any dead-code region, and all following opcodes
// should be emitted (until another unconditional flow control).
emit->suppress = false;
mp_emit_bc_adjust_stack_size(emit, 0);
if (emit->pass == MP_PASS_SCOPE) {
return;
@ -589,6 +606,7 @@ void mp_emit_bc_rot_three(emit_t *emit) {
void mp_emit_bc_jump(emit_t *emit, mp_uint_t label) {
emit_write_bytecode_byte_label(emit, 0, MP_BC_JUMP, label);
emit->suppress = true;
}
void mp_emit_bc_pop_jump_if(emit_t *emit, bool cond, mp_uint_t label) {
@ -622,6 +640,7 @@ void mp_emit_bc_unwind_jump(emit_t *emit, mp_uint_t label, mp_uint_t except_dept
emit_write_bytecode_byte_label(emit, 0, MP_BC_UNWIND_JUMP, label & ~MP_EMIT_BREAK_FROM_FOR);
emit_write_bytecode_raw_byte(emit, ((label & MP_EMIT_BREAK_FROM_FOR) ? 0x80 : 0) | except_depth);
}
emit->suppress = true;
}
void mp_emit_bc_setup_block(emit_t *emit, mp_uint_t label, int kind) {
@ -663,6 +682,7 @@ void mp_emit_bc_for_iter_end(emit_t *emit) {
void mp_emit_bc_pop_except_jump(emit_t *emit, mp_uint_t label, bool within_exc_handler) {
(void)within_exc_handler;
emit_write_bytecode_byte_label(emit, 0, MP_BC_POP_EXCEPT_JUMP, label);
emit->suppress = true;
}
void mp_emit_bc_unary_op(emit_t *emit, mp_unary_op_t op) {
@ -766,6 +786,7 @@ void mp_emit_bc_call_method(emit_t *emit, mp_uint_t n_positional, mp_uint_t n_ke
void mp_emit_bc_return_value(emit_t *emit) {
emit_write_bytecode_byte(emit, -1, MP_BC_RETURN_VALUE);
emit->suppress = true;
}
void mp_emit_bc_raise_varargs(emit_t *emit, mp_uint_t n_args) {
@ -773,6 +794,7 @@ void mp_emit_bc_raise_varargs(emit_t *emit, mp_uint_t n_args) {
MP_STATIC_ASSERT(MP_BC_RAISE_LAST + 2 == MP_BC_RAISE_FROM);
assert(n_args <= 2);
emit_write_bytecode_byte(emit, -n_args, MP_BC_RAISE_LAST + n_args);
emit->suppress = true;
}
void mp_emit_bc_yield(emit_t *emit, int kind) {

View File

@ -1966,6 +1966,7 @@ STATIC void emit_native_jump(emit_t *emit, mp_uint_t label) {
need_stack_settled(emit);
ASM_JUMP(emit->as, label);
emit_post(emit);
mp_asm_base_suppress_code(&emit->as->base);
}
STATIC void emit_native_jump_helper(emit_t *emit, bool cond, mp_uint_t label, bool pop) {
@ -2810,6 +2811,7 @@ STATIC void emit_native_raise_varargs(emit_t *emit, mp_uint_t n_args) {
}
// TODO probably make this 1 call to the runtime (which could even call convert, native_raise(obj, type))
emit_call(emit, MP_F_NATIVE_RAISE);
mp_asm_base_suppress_code(&emit->as->base);
}
STATIC void emit_native_yield(emit_t *emit, int kind) {

View File

@ -119,12 +119,12 @@ def f():
#from sys import * # tested at module scope
# raise
raise
raise 1
if a: raise
if a: raise 1
# return
return
return 1
if a: return
if a: return 1
# function with lots of locals
def f():

View File

@ -47,8 +47,8 @@ arg names:
42 IMPORT_STAR
43 LOAD_CONST_NONE
44 RETURN_VALUE
File cmdline/cmd_showbc.py, code block 'f' (descriptor: \.\+, bytecode @\.\+ 45\[46\] bytes)
Raw bytecode (code_info_size=8\[46\], bytecode_size=372):
File cmdline/cmd_showbc.py, code block 'f' (descriptor: \.\+, bytecode @\.\+ 46\[68\] bytes)
Raw bytecode (code_info_size=8\[46\], bytecode_size=382):
a8 12 9\[bf\] 03 05 60 60 26 22 24 64 22 24 25 25 24
26 23 63 22 22 25 23 23 2f 6c 25 65 25 25 69 68
26 65 27 6a 62 20 23 62 2a 29 69 24 25 28 67 26
@ -126,19 +126,18 @@ arg names:
bc=313 line=100
bc=315 line=101
########
bc=321 line=103
bc=327 line=106
bc=331 line=107
bc=337 line=110
bc=340 line=111
bc=346 line=114
bc=346 line=117
bc=351 line=118
bc=363 line=121
bc=363 line=122
bc=364 line=123
bc=366 line=126
bc=368 line=127
bc=325 line=106
bc=329 line=107
bc=335 line=110
bc=338 line=111
bc=344 line=114
bc=344 line=117
bc=349 line=118
bc=361 line=121
bc=361 line=122
bc=365 line=123
bc=370 line=126
bc=375 line=127
00 LOAD_CONST_NONE
01 LOAD_CONST_FALSE
02 BINARY_OP 27 __add__
@ -363,48 +362,55 @@ arg names:
308 LOAD_FAST 1
309 POP_TOP
310 END_FINALLY
311 JUMP 324
313 SETUP_EXCEPT 320
315 UNWIND_JUMP 327 1
318 POP_EXCEPT_JUMP 324
320 POP_TOP
321 POP_EXCEPT_JUMP 324
323 END_FINALLY
324 LOAD_FAST 0
325 POP_JUMP_IF_TRUE 313
327 LOAD_FAST 0
328 SETUP_WITH 335
330 POP_TOP
331 LOAD_DEREF 14
333 POP_TOP
334 LOAD_CONST_NONE
335 WITH_CLEANUP
336 END_FINALLY
337 LOAD_CONST_SMALL_INT 1
338 STORE_DEREF 16
340 LOAD_FAST_N 16
342 MAKE_CLOSURE \.\+ 1
345 STORE_FAST 13
346 LOAD_CONST_SMALL_INT 0
347 LOAD_CONST_NONE
348 IMPORT_NAME 'a'
350 STORE_FAST 0
351 LOAD_CONST_SMALL_INT 0
352 LOAD_CONST_STRING 'b'
354 BUILD_TUPLE 1
356 IMPORT_NAME 'a'
358 IMPORT_FROM 'b'
360 STORE_DEREF 14
362 POP_TOP
363 RAISE_LAST
364 LOAD_CONST_SMALL_INT 1
365 RAISE_OBJ
366 LOAD_CONST_NONE
367 RETURN_VALUE
311 JUMP 322
313 SETUP_EXCEPT 318
315 UNWIND_JUMP 325 1
318 POP_TOP
319 POP_EXCEPT_JUMP 322
321 END_FINALLY
322 LOAD_FAST 0
323 POP_JUMP_IF_TRUE 313
325 LOAD_FAST 0
326 SETUP_WITH 333
328 POP_TOP
329 LOAD_DEREF 14
331 POP_TOP
332 LOAD_CONST_NONE
333 WITH_CLEANUP
334 END_FINALLY
335 LOAD_CONST_SMALL_INT 1
336 STORE_DEREF 16
338 LOAD_FAST_N 16
340 MAKE_CLOSURE \.\+ 1
343 STORE_FAST 13
344 LOAD_CONST_SMALL_INT 0
345 LOAD_CONST_NONE
346 IMPORT_NAME 'a'
348 STORE_FAST 0
349 LOAD_CONST_SMALL_INT 0
350 LOAD_CONST_STRING 'b'
352 BUILD_TUPLE 1
354 IMPORT_NAME 'a'
356 IMPORT_FROM 'b'
358 STORE_DEREF 14
360 POP_TOP
361 LOAD_FAST 0
362 POP_JUMP_IF_FALSE 365
364 RAISE_LAST
365 LOAD_FAST 0
366 POP_JUMP_IF_FALSE 370
368 LOAD_CONST_SMALL_INT 1
369 RETURN_VALUE
370 LOAD_CONST_NONE
371 RETURN_VALUE
369 RAISE_OBJ
370 LOAD_FAST 0
371 POP_JUMP_IF_FALSE 375
373 LOAD_CONST_NONE
374 RETURN_VALUE
375 LOAD_FAST 0
376 POP_JUMP_IF_FALSE 380
378 LOAD_CONST_SMALL_INT 1
379 RETURN_VALUE
380 LOAD_CONST_NONE
381 RETURN_VALUE
File cmdline/cmd_showbc.py, code block 'f' (descriptor: \.\+, bytecode @\.\+ 59 bytes)
Raw bytecode (code_info_size=8, bytecode_size=51):
a8 10 0a 05 80 82 34 38 81 57 c0 57 c1 57 c2 57
@ -623,9 +629,9 @@ arg names: *
08 DELETE_DEREF 0
10 LOAD_CONST_NONE
11 RETURN_VALUE
File cmdline/cmd_showbc.py, code block 'f' (descriptor: \.\+, bytecode @\.\+ 15 bytes)
Raw bytecode (code_info_size=8, bytecode_size=7):
9a 01 0a 05 03 08 80 8b b1 25 00 f2 63 51 63
File cmdline/cmd_showbc.py, code block 'f' (descriptor: \.\+, bytecode @\.\+ 13 bytes)
Raw bytecode (code_info_size=8, bytecode_size=5):
9a 01 0a 05 03 08 80 8b b1 25 00 f2 63
arg names: * b
(N_STATE 4)
(N_EXC_STACK 0)
@ -635,8 +641,6 @@ arg names: * b
01 LOAD_DEREF 0
03 BINARY_OP 27 __add__
04 RETURN_VALUE
05 LOAD_CONST_NONE
06 RETURN_VALUE
mem: total=\\d\+, current=\\d\+, peak=\\d\+
stack: \\d\+ out of \\d\+
GC: total: \\d\+, used: \\d\+, free: \\d\+

View File

@ -0,0 +1,33 @@
# cmdline: -v -v
# test printing of bytecode when it's optimised away
def f0():
return 0
print(1)
def f1(x):
if x:
return
print(1)
print(2)
def f2(x):
raise Exception
print(1)
def f3(x):
while x:
break
print(1)
print(2)
def f4(x):
while x:
continue
print(1)
print(2)

View File

@ -0,0 +1,122 @@
File cmdline/cmd_showbc_opt.py, code block '<module>' (descriptor: \.\+, bytecode @\.\+ 35 bytes)
Raw bytecode (code_info_size=13, bytecode_size=22):
00 16 01 60 20 64 40 84 07 64 40 84 07 32 00 16
02 32 01 16 03 32 02 16 04 32 03 16 05 32 04 16
06 51 63
arg names:
(N_STATE 1)
(N_EXC_STACK 0)
bc=0 line=1
bc=0 line=4
bc=0 line=5
bc=4 line=8
bc=4 line=10
bc=8 line=17
bc=12 line=20
bc=12 line=22
bc=16 line=29
00 MAKE_FUNCTION \.\+
02 STORE_NAME f0
04 MAKE_FUNCTION \.\+
06 STORE_NAME f1
08 MAKE_FUNCTION \.\+
10 STORE_NAME f2
12 MAKE_FUNCTION \.\+
14 STORE_NAME f3
16 MAKE_FUNCTION \.\+
18 STORE_NAME f4
20 LOAD_CONST_NONE
21 RETURN_VALUE
File cmdline/cmd_showbc_opt.py, code block 'f0' (descriptor: \.\+, bytecode @\.\+ 8 bytes)
Raw bytecode (code_info_size=6, bytecode_size=2):
08 08 02 60 40 22 80 63
arg names:
(N_STATE 2)
(N_EXC_STACK 0)
bc=0 line=1
bc=0 line=4
bc=0 line=6
bc=2 line=7
00 LOAD_CONST_SMALL_INT 0
01 RETURN_VALUE
File cmdline/cmd_showbc_opt.py, code block 'f1' (descriptor: \.\+, bytecode @\.\+ 22 bytes)
Raw bytecode (code_info_size=9, bytecode_size=13):
11 0e 03 08 80 0a 23 22 20 b0 44 42 51 63 12 07
82 34 01 59 51 63
arg names: x
(N_STATE 3)
(N_EXC_STACK 0)
bc=0 line=1
bc=0 line=11
bc=3 line=12
bc=5 line=13
bc=5 line=14
00 LOAD_FAST 0
01 POP_JUMP_IF_FALSE 5
03 LOAD_CONST_NONE
04 RETURN_VALUE
05 LOAD_GLOBAL print
07 LOAD_CONST_SMALL_INT 2
08 CALL_FUNCTION n=1 nkw=0
10 POP_TOP
11 LOAD_CONST_NONE
12 RETURN_VALUE
File cmdline/cmd_showbc_opt.py, code block 'f2' (descriptor: \.\+, bytecode @\.\+ 10 bytes)
Raw bytecode (code_info_size=7, bytecode_size=3):
11 0a 04 08 80 11 23 12 09 65
arg names: x
(N_STATE 3)
(N_EXC_STACK 0)
bc=0 line=1
bc=0 line=18
bc=3 line=19
00 LOAD_GLOBAL Exception
02 RAISE_OBJ
File cmdline/cmd_showbc_opt.py, code block 'f3' (descriptor: \.\+, bytecode @\.\+ 24 bytes)
Raw bytecode (code_info_size=9, bytecode_size=15):
11 0e 05 08 80 16 22 22 23 42 42 42 43 b0 43 3b
12 07 82 34 01 59 51 63
arg names: x
(N_STATE 3)
(N_EXC_STACK 0)
bc=0 line=1
bc=0 line=23
bc=2 line=24
bc=4 line=25
bc=7 line=26
00 JUMP 4
02 JUMP 7
04 LOAD_FAST 0
05 POP_JUMP_IF_TRUE 2
07 LOAD_GLOBAL print
09 LOAD_CONST_SMALL_INT 2
10 CALL_FUNCTION n=1 nkw=0
12 POP_TOP
13 LOAD_CONST_NONE
14 RETURN_VALUE
File cmdline/cmd_showbc_opt.py, code block 'f4' (descriptor: \.\+, bytecode @\.\+ 24 bytes)
Raw bytecode (code_info_size=9, bytecode_size=15):
11 0e 06 08 80 1d 22 22 23 42 42 42 40 b0 43 3b
12 07 82 34 01 59 51 63
arg names: x
(N_STATE 3)
(N_EXC_STACK 0)
bc=0 line=1
bc=0 line=30
bc=2 line=31
bc=4 line=32
bc=7 line=33
00 JUMP 4
02 JUMP 4
04 LOAD_FAST 0
05 POP_JUMP_IF_TRUE 2
07 LOAD_GLOBAL print
09 LOAD_CONST_SMALL_INT 2
10 CALL_FUNCTION n=1 nkw=0
12 POP_TOP
13 LOAD_CONST_NONE
14 RETURN_VALUE
mem: total=\\d\+, current=\\d\+, peak=\\d\+
stack: \\d\+ out of \\d\+
GC: total: \\d\+, used: \\d\+, free: \\d\+
No. of 1-blocks: \\d\+, 2-blocks: \\d\+, max blk sz: \\d\+, max free sz: \\d\+