py: Improve encoding scheme for line-number to bytecode map.

Reduces by about a factor of 10 on average the amount of RAM needed to
store the line-number to bytecode map in the bytecode prelude.

Using CPython3.4's stdlib for statistics: previously, an average of
13 bytes were used per (bytecode offset, line-number offset) pair, and
now with this improvement, that's down to 1.3 bytes on average.

Large RAM usage before was due to some very large steps in line numbers,
both from the start of the first line in a function way down in the
file, and also functions that have big comments and/or big strings in
them (both cases were significant).

Although the savings are large on average for the CPython stdlib, it
won't have such a big effect for small scripts used in embedded
programming.

Addresses issue #648.
This commit is contained in:
Damien George 2014-07-31 16:12:01 +00:00
parent 8cc2018d47
commit 4747becc64
3 changed files with 48 additions and 10 deletions

View File

@ -115,12 +115,24 @@ STATIC void emit_write_code_info_qstr(emit_t* emit, qstr qstr) {
#if MICROPY_ENABLE_SOURCE_LINE
STATIC void emit_write_code_info_bytes_lines(emit_t* emit, uint bytes_to_skip, uint lines_to_skip) {
assert(bytes_to_skip > 0 || lines_to_skip > 0);
//printf(" %d %d\n", bytes_to_skip, lines_to_skip);
while (bytes_to_skip > 0 || lines_to_skip > 0) {
uint b = MIN(bytes_to_skip, 31);
uint l = MIN(lines_to_skip, 7);
mp_uint_t b, l;
if (lines_to_skip <= 6) {
// use 0b0LLBBBBB encoding
b = MIN(bytes_to_skip, 0x1f);
l = MIN(lines_to_skip, 0x3);
*emit_get_cur_to_write_code_info(emit, 1) = b | (l << 5);
} else {
// use 0b1LLLBBBB 0bLLLLLLLL encoding (l's LSB in second byte)
b = MIN(bytes_to_skip, 0xf);
l = MIN(lines_to_skip, 0x7ff);
byte *ci = emit_get_cur_to_write_code_info(emit, 2);
ci[0] = 0x80 | b | ((l >> 4) & 0x70);
ci[1] = l;
}
bytes_to_skip -= b;
lines_to_skip -= l;
*emit_get_cur_to_write_code_info(emit, 1) = b | (l << 5);
}
}
#endif
@ -363,7 +375,6 @@ STATIC void emit_bc_set_source_line(emit_t *emit, int source_line) {
uint bytes_to_skip = emit->bytecode_offset - emit->last_source_line_offset;
uint lines_to_skip = source_line - emit->last_source_line;
emit_write_code_info_bytes_lines(emit, bytes_to_skip, lines_to_skip);
//printf(" %d %d\n", bytes_to_skip, lines_to_skip);
emit->last_source_line_offset = emit->bytecode_offset;
emit->last_source_line = source_line;
}

View File

@ -95,9 +95,18 @@ void mp_bytecode_print(const void *descr, const byte *ip, int len) {
mp_int_t bc = (code_info + code_info_size) - ip;
mp_uint_t source_line = 1;
printf(" bc=" INT_FMT " line=" UINT_FMT "\n", bc, source_line);
for (const byte* ci = code_info + 12; *ci; ci++) {
bc += *ci & 31;
source_line += *ci >> 5;
for (const byte* ci = code_info + 12; *ci;) {
if ((ci[0] & 0x80) == 0) {
// 0b0LLBBBBB encoding
bc += ci[0] & 0x1f;
source_line += ci[0] >> 5;
ci += 1;
} else {
// 0b1LLLBBBB 0bLLLLLLLL encoding (l's LSB in second byte)
bc += ci[0] & 0xf;
source_line += ((ci[0] << 4) & 0x700) | ci[1];
ci += 2;
}
printf(" bc=" INT_FMT " line=" UINT_FMT "\n", bc, source_line);
}
}

24
py/vm.c
View File

@ -931,9 +931,27 @@ exception_handler:
const byte* ci = code_info + 12;
if (*ci) {
source_line = 1;
for (; *ci && bc >= ((*ci) & 31); ci++) {
bc -= *ci & 31;
source_line += *ci >> 5;
mp_uint_t c;
while ((c = *ci)) {
mp_uint_t b, l;
if ((c & 0x80) == 0) {
// 0b0LLBBBBB encoding
b = c & 0x1f;
l = c >> 5;
ci += 1;
} else {
// 0b1LLLBBBB 0bLLLLLLLL encoding (l's LSB in second byte)
b = c & 0xf;
l = ((c << 4) & 0x700) | ci[1];
ci += 2;
}
if (bc >= b) {
bc -= b;
source_line += l;
} else {
// found source line corresponding to bytecode offset
break;
}
}
}
mp_obj_exception_add_traceback(nlr.ret_val, source_file, source_line, block_name);