py: Optimise lexer by exposing lexer type.

mp_lexer_t type is exposed, mp_token_t type is removed, and simple lexer
functions (like checking current token kind) are now inlined.

This saves 784 bytes ROM on 32-bit unix, 348 bytes on stmhal, and 460
bytes on bare-arm.  It also saves a tiny bit of RAM since mp_lexer_t
is a bit smaller.  Also will run a bit more efficiently.
This commit is contained in:
Damien George 2014-12-05 19:35:18 +00:00
parent 41c07d5b80
commit a4c52c5a3d
11 changed files with 123 additions and 153 deletions

View File

@ -32,7 +32,7 @@ void do_str(const char *src) {
}
// parse okay
qstr source_name = mp_lexer_source_name(lex);
qstr source_name = lex->source_name;
mp_lexer_free(lex);
mp_obj_t module_fun = mp_compile(pn, source_name, MP_EMIT_OPT_NONE, true);

View File

@ -127,7 +127,7 @@ STATIC void do_load(mp_obj_t module_obj, vstr_t *file) {
}
#if MICROPY_PY___FILE__
qstr source_name = mp_lexer_source_name(lex);
qstr source_name = lex->source_name;
mp_store_attr(module_obj, MP_QSTR___file__, MP_OBJ_NEW_QSTR(source_name));
#endif

View File

@ -42,32 +42,10 @@
// TODO seems that CPython allows NULL byte in the input stream
// don't know if that's intentional or not, but we don't allow it
struct _mp_lexer_t {
qstr source_name; // name of source
void *stream_data; // data for stream
mp_lexer_stream_next_byte_t stream_next_byte; // stream callback to get next byte
mp_lexer_stream_close_t stream_close; // stream callback to free
unichar chr0, chr1, chr2; // current cached characters from source
mp_uint_t line; // source line
mp_uint_t column; // source column
mp_int_t emit_dent; // non-zero when there are INDENT/DEDENT tokens to emit
mp_int_t nested_bracket_level; // >0 when there are nested brackets over multiple lines
mp_uint_t alloc_indent_level;
mp_uint_t num_indent_level;
uint16_t *indent_level;
vstr_t vstr;
mp_token_t tok_cur;
};
mp_uint_t mp_optimise_value;
// TODO replace with a call to a standard function
bool str_strn_equal(const char *str, const char *strn, mp_uint_t len) {
STATIC bool str_strn_equal(const char *str, const char *strn, mp_uint_t len) {
mp_uint_t i = 0;
while (i < len && *str == *strn) {
@ -79,27 +57,6 @@ bool str_strn_equal(const char *str, const char *strn, mp_uint_t len) {
return i == len && *str == 0;
}
#ifdef MICROPY_DEBUG_PRINTERS
void mp_token_show(const mp_token_t *tok) {
printf("(" UINT_FMT ":" UINT_FMT ") kind:%u str:%p len:" UINT_FMT, tok->src_line, tok->src_column, tok->kind, tok->str, tok->len);
if (tok->str != NULL && tok->len > 0) {
const byte *i = (const byte *)tok->str;
const byte *j = (const byte *)i + tok->len;
printf(" ");
while (i < j) {
unichar c = utf8_get_char(i);
i = utf8_next_char(i);
if (unichar_isprint(c)) {
printf("%c", c);
} else {
printf("?");
}
}
}
printf("\n");
}
#endif
#define CUR_CHAR(lex) ((lex)->chr0)
STATIC bool is_end(mp_lexer_t *lex) {
@ -210,7 +167,7 @@ STATIC void next_char(mp_lexer_t *lex) {
}
}
void indent_push(mp_lexer_t *lex, mp_uint_t indent) {
STATIC void indent_push(mp_lexer_t *lex, mp_uint_t indent) {
if (lex->num_indent_level >= lex->alloc_indent_level) {
// TODO use m_renew_maybe and somehow indicate an error if it fails... probably by using MP_TOKEN_MEMORY_ERROR
lex->indent_level = m_renew(uint16_t, lex->indent_level, lex->alloc_indent_level, lex->alloc_indent_level + MICROPY_ALLOC_LEXEL_INDENT_INC);
@ -219,11 +176,11 @@ void indent_push(mp_lexer_t *lex, mp_uint_t indent) {
lex->indent_level[lex->num_indent_level++] = indent;
}
mp_uint_t indent_top(mp_lexer_t *lex) {
STATIC mp_uint_t indent_top(mp_lexer_t *lex) {
return lex->indent_level[lex->num_indent_level - 1];
}
void indent_pop(mp_lexer_t *lex) {
STATIC void indent_pop(mp_lexer_t *lex) {
lex->num_indent_level -= 1;
}
@ -335,7 +292,10 @@ STATIC bool get_hex(mp_lexer_t *lex, mp_uint_t num_digits, mp_uint_t *result) {
return true;
}
STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool first_token) {
STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, bool first_token) {
// start new token text
vstr_reset(&lex->vstr);
// skip white space and comments
bool had_physical_newline = false;
while (!is_end(lex)) {
@ -355,12 +315,9 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
next_char(lex);
if (!is_physical_newline(lex)) {
// SyntaxError: unexpected character after line continuation character
tok->src_line = lex->line;
tok->src_column = lex->column;
tok->kind = MP_TOKEN_BAD_LINE_CONTINUATION;
vstr_reset(&lex->vstr);
tok->str = vstr_str(&lex->vstr);
tok->len = 0;
lex->tok_line = lex->line;
lex->tok_column = lex->column;
lex->tok_kind = MP_TOKEN_BAD_LINE_CONTINUATION;
return;
} else {
next_char(lex);
@ -371,29 +328,26 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
}
// set token source information
tok->src_line = lex->line;
tok->src_column = lex->column;
// start new token text
vstr_reset(&lex->vstr);
lex->tok_line = lex->line;
lex->tok_column = lex->column;
if (first_token && lex->line == 1 && lex->column != 1) {
// check that the first token is in the first column
// if first token is not on first line, we get a physical newline and
// this check is done as part of normal indent/dedent checking below
// (done to get equivalence with CPython)
tok->kind = MP_TOKEN_INDENT;
lex->tok_kind = MP_TOKEN_INDENT;
} else if (lex->emit_dent < 0) {
tok->kind = MP_TOKEN_DEDENT;
lex->tok_kind = MP_TOKEN_DEDENT;
lex->emit_dent += 1;
} else if (lex->emit_dent > 0) {
tok->kind = MP_TOKEN_INDENT;
lex->tok_kind = MP_TOKEN_INDENT;
lex->emit_dent -= 1;
} else if (had_physical_newline && lex->nested_bracket_level == 0) {
tok->kind = MP_TOKEN_NEWLINE;
lex->tok_kind = MP_TOKEN_NEWLINE;
mp_uint_t num_spaces = lex->column - 1;
lex->emit_dent = 0;
@ -407,20 +361,20 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
lex->emit_dent -= 1;
}
if (num_spaces != indent_top(lex)) {
tok->kind = MP_TOKEN_DEDENT_MISMATCH;
lex->tok_kind = MP_TOKEN_DEDENT_MISMATCH;
}
}
} else if (is_end(lex)) {
if (indent_top(lex) > 0) {
tok->kind = MP_TOKEN_NEWLINE;
lex->tok_kind = MP_TOKEN_NEWLINE;
lex->emit_dent = 0;
while (indent_top(lex) > 0) {
indent_pop(lex);
lex->emit_dent -= 1;
}
} else {
tok->kind = MP_TOKEN_END;
lex->tok_kind = MP_TOKEN_END;
}
} else if (is_char_or(lex, '\'', '\"')
@ -451,9 +405,9 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
// set token kind
if (is_bytes) {
tok->kind = MP_TOKEN_BYTES;
lex->tok_kind = MP_TOKEN_BYTES;
} else {
tok->kind = MP_TOKEN_STRING;
lex->tok_kind = MP_TOKEN_STRING;
}
// get first quoting character
@ -566,14 +520,14 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
// check we got the required end quotes
if (n_closing < num_quotes) {
tok->kind = MP_TOKEN_LONELY_STRING_OPEN;
lex->tok_kind = MP_TOKEN_LONELY_STRING_OPEN;
}
// cut off the end quotes from the token text
vstr_cut_tail_bytes(&lex->vstr, n_closing);
} else if (is_head_of_identifier(lex)) {
tok->kind = MP_TOKEN_NAME;
lex->tok_kind = MP_TOKEN_NAME;
// get first char
vstr_add_char(&lex->vstr, CUR_CHAR(lex));
@ -586,7 +540,7 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
}
} else if (is_digit(lex) || (is_char(lex, '.') && is_following_digit(lex))) {
tok->kind = MP_TOKEN_NUMBER;
lex->tok_kind = MP_TOKEN_NUMBER;
// get first char
vstr_add_char(&lex->vstr, CUR_CHAR(lex));
@ -621,9 +575,9 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
vstr_add_char(&lex->vstr, '.');
next_char(lex);
next_char(lex);
tok->kind = MP_TOKEN_ELLIPSIS;
lex->tok_kind = MP_TOKEN_ELLIPSIS;
} else {
tok->kind = MP_TOKEN_DEL_PERIOD;
lex->tok_kind = MP_TOKEN_DEL_PERIOD;
}
} else {
@ -645,7 +599,7 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
if (*t == 0) {
// didn't match any delimiter or operator characters
tok->kind = MP_TOKEN_INVALID;
lex->tok_kind = MP_TOKEN_INVALID;
} else {
// matched a delimiter or operator character
@ -670,7 +624,7 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
next_char(lex);
tok_enc_index = t_index;
} else {
tok->kind = MP_TOKEN_INVALID;
lex->tok_kind = MP_TOKEN_INVALID;
goto tok_enc_no_match;
}
break;
@ -692,37 +646,33 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
}
// set token kind
tok->kind = tok_enc_kind[tok_enc_index];
lex->tok_kind = tok_enc_kind[tok_enc_index];
tok_enc_no_match:
// compute bracket level for implicit line joining
if (tok->kind == MP_TOKEN_DEL_PAREN_OPEN || tok->kind == MP_TOKEN_DEL_BRACKET_OPEN || tok->kind == MP_TOKEN_DEL_BRACE_OPEN) {
if (lex->tok_kind == MP_TOKEN_DEL_PAREN_OPEN || lex->tok_kind == MP_TOKEN_DEL_BRACKET_OPEN || lex->tok_kind == MP_TOKEN_DEL_BRACE_OPEN) {
lex->nested_bracket_level += 1;
} else if (tok->kind == MP_TOKEN_DEL_PAREN_CLOSE || tok->kind == MP_TOKEN_DEL_BRACKET_CLOSE || tok->kind == MP_TOKEN_DEL_BRACE_CLOSE) {
} else if (lex->tok_kind == MP_TOKEN_DEL_PAREN_CLOSE || lex->tok_kind == MP_TOKEN_DEL_BRACKET_CLOSE || lex->tok_kind == MP_TOKEN_DEL_BRACE_CLOSE) {
lex->nested_bracket_level -= 1;
}
}
}
// point token text to vstr buffer
tok->str = vstr_str(&lex->vstr);
tok->len = vstr_len(&lex->vstr);
// check for keywords
if (tok->kind == MP_TOKEN_NAME) {
if (lex->tok_kind == MP_TOKEN_NAME) {
// We check for __debug__ here and convert it to its value. This is so
// the parser gives a syntax error on, eg, x.__debug__. Otherwise, we
// need to check for this special token in many places in the compiler.
// TODO improve speed of these string comparisons
//for (mp_int_t i = 0; tok_kw[i] != NULL; i++) {
for (mp_int_t i = 0; i < MP_ARRAY_SIZE(tok_kw); i++) {
if (str_strn_equal(tok_kw[i], tok->str, tok->len)) {
if (str_strn_equal(tok_kw[i], lex->vstr.buf, lex->vstr.len)) {
if (i == MP_ARRAY_SIZE(tok_kw) - 1) {
// tok_kw[MP_ARRAY_SIZE(tok_kw) - 1] == "__debug__"
tok->kind = (mp_optimise_value == 0 ? MP_TOKEN_KW_TRUE : MP_TOKEN_KW_FALSE);
lex->tok_kind = (mp_optimise_value == 0 ? MP_TOKEN_KW_TRUE : MP_TOKEN_KW_FALSE);
} else {
tok->kind = MP_TOKEN_KW_FALSE + i;
lex->tok_kind = MP_TOKEN_KW_FALSE + i;
}
break;
}
@ -782,7 +732,7 @@ mp_lexer_t *mp_lexer_new(qstr src_name, void *stream_data, mp_lexer_stream_next_
}
// preload first token
mp_lexer_next_token_into(lex, &lex->tok_cur, true);
mp_lexer_next_token_into(lex, true);
return lex;
}
@ -798,18 +748,27 @@ void mp_lexer_free(mp_lexer_t *lex) {
}
}
qstr mp_lexer_source_name(mp_lexer_t *lex) {
return lex->source_name;
}
void mp_lexer_to_next(mp_lexer_t *lex) {
mp_lexer_next_token_into(lex, &lex->tok_cur, false);
mp_lexer_next_token_into(lex, false);
}
const mp_token_t *mp_lexer_cur(const mp_lexer_t *lex) {
return &lex->tok_cur;
}
bool mp_lexer_is_kind(mp_lexer_t *lex, mp_token_kind_t kind) {
return lex->tok_cur.kind == kind;
#if MICROPY_DEBUG_PRINTERS
void mp_lexer_show_token(const mp_lexer_t *lex) {
printf("(" UINT_FMT ":" UINT_FMT ") kind:%u str:%p len:%u", lex->tok_line, lex->tok_column, lex->tok_kind, lex->vstr.buf, lex->vstr.len);
if (lex->vstr.len > 0) {
const byte *i = (const byte *)lex->vstr.buf;
const byte *j = (const byte *)i + lex->vstr.len;
printf(" ");
while (i < j) {
unichar c = utf8_get_char(i);
i = utf8_next_char(i);
if (unichar_isprint(c)) {
printf("%c", c);
} else {
printf("?");
}
}
}
printf("\n");
}
#endif

View File

@ -130,15 +130,6 @@ typedef enum _mp_token_kind_t {
MP_TOKEN_DEL_MINUS_MORE,
} mp_token_kind_t;
typedef struct _mp_token_t {
mp_uint_t src_line; // source line
mp_uint_t src_column; // source column
mp_token_kind_t kind; // kind of token
const char *str; // string of token (valid only while this token is current token)
mp_uint_t len; // (byte) length of string of token
} mp_token_t;
// the next-byte function must return the next byte in the stream
// it must return MP_LEXER_EOF if end of stream
// it can be called again after returning MP_LEXER_EOF, and in that case must return MP_LEXER_EOF
@ -146,21 +137,38 @@ typedef struct _mp_token_t {
typedef mp_uint_t (*mp_lexer_stream_next_byte_t)(void*);
typedef void (*mp_lexer_stream_close_t)(void*);
typedef struct _mp_lexer_t mp_lexer_t;
// this data structure is exposed for efficiency
// public members are: source_name, tok_line, tok_column, tok_kind, vstr
typedef struct _mp_lexer_t {
qstr source_name; // name of source
void *stream_data; // data for stream
mp_lexer_stream_next_byte_t stream_next_byte; // stream callback to get next byte
mp_lexer_stream_close_t stream_close; // stream callback to free
void mp_token_show(const mp_token_t *tok);
unichar chr0, chr1, chr2; // current cached characters from source
mp_uint_t line; // current source line
mp_uint_t column; // current source column
mp_int_t emit_dent; // non-zero when there are INDENT/DEDENT tokens to emit
mp_int_t nested_bracket_level; // >0 when there are nested brackets over multiple lines
mp_uint_t alloc_indent_level;
mp_uint_t num_indent_level;
uint16_t *indent_level;
mp_uint_t tok_line; // token source line
mp_uint_t tok_column; // token source column
mp_token_kind_t tok_kind; // token kind
vstr_t vstr; // token data
} mp_lexer_t;
mp_lexer_t *mp_lexer_new(qstr src_name, void *stream_data, mp_lexer_stream_next_byte_t stream_next_byte, mp_lexer_stream_close_t stream_close);
mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, mp_uint_t len, mp_uint_t free_len);
void mp_lexer_free(mp_lexer_t *lex);
qstr mp_lexer_source_name(mp_lexer_t *lex);
void mp_lexer_to_next(mp_lexer_t *lex);
const mp_token_t *mp_lexer_cur(const mp_lexer_t *lex);
bool mp_lexer_is_kind(mp_lexer_t *lex, mp_token_kind_t kind);
bool mp_lexer_show_error_pythonic_prefix(mp_lexer_t *lex);
bool mp_lexer_show_error_pythonic(mp_lexer_t *lex, const char *msg);
void mp_lexer_show_token(const mp_lexer_t *lex);
/******************************************************************/
// platform specific import function; must be implemented for a specific port

View File

@ -24,6 +24,8 @@
* THE SOFTWARE.
*/
#include <stdint.h>
#include "mpconfig.h"
#include "misc.h"
#include "qstr.h"

View File

@ -30,6 +30,7 @@
#if MICROPY_HELPER_LEXER_UNIX
#include <stdio.h>
#include <stdint.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>

View File

@ -155,7 +155,7 @@ STATIC void push_rule_from_arg(parser_t *parser, mp_uint_t arg) {
assert((arg & RULE_ARG_KIND_MASK) == RULE_ARG_RULE || (arg & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE);
mp_uint_t rule_id = arg & RULE_ARG_ARG_MASK;
assert(rule_id < RULE_maximum_number_of);
push_rule(parser, mp_lexer_cur(parser->lexer)->src_line, rules[rule_id], 0);
push_rule(parser, parser->lexer->tok_line, rules[rule_id], 0);
}
STATIC void pop_rule(parser_t *parser, const rule_t **rule, mp_uint_t *arg_i, mp_uint_t *src_line) {
@ -298,17 +298,17 @@ STATIC void push_result_string(parser_t *parser, mp_uint_t src_line, const char
push_result_node(parser, (mp_parse_node_t)pn);
}
STATIC void push_result_token(parser_t *parser, const mp_lexer_t *lex) {
const mp_token_t *tok = mp_lexer_cur(lex);
STATIC void push_result_token(parser_t *parser) {
mp_parse_node_t pn;
if (tok->kind == MP_TOKEN_NAME) {
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_ID, qstr_from_strn(tok->str, tok->len));
} else if (tok->kind == MP_TOKEN_NUMBER) {
mp_lexer_t *lex = parser->lexer;
if (lex->tok_kind == MP_TOKEN_NAME) {
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_ID, qstr_from_strn(lex->vstr.buf, lex->vstr.len));
} else if (lex->tok_kind == MP_TOKEN_NUMBER) {
bool dec = false;
bool small_int = true;
mp_int_t int_val = 0;
mp_uint_t len = tok->len;
const char *str = tok->str;
mp_uint_t len = lex->vstr.len;
const char *str = lex->vstr.buf;
mp_uint_t base = 0;
mp_uint_t i = mp_parse_num_base(str, len, &base);
bool overflow = false;
@ -343,29 +343,29 @@ STATIC void push_result_token(parser_t *parser, const mp_lexer_t *lex) {
} else {
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_INTEGER, qstr_from_strn(str, len));
}
} else if (tok->kind == MP_TOKEN_STRING) {
} else if (lex->tok_kind == MP_TOKEN_STRING) {
// Don't automatically intern all strings. doc strings (which are usually large)
// will be discarded by the compiler, and so we shouldn't intern them.
qstr qst = MP_QSTR_NULL;
if (tok->len <= MICROPY_ALLOC_PARSE_INTERN_STRING_LEN) {
if (lex->vstr.len <= MICROPY_ALLOC_PARSE_INTERN_STRING_LEN) {
// intern short strings
qst = qstr_from_strn(tok->str, tok->len);
qst = qstr_from_strn(lex->vstr.buf, lex->vstr.len);
} else {
// check if this string is already interned
qst = qstr_find_strn(tok->str, tok->len);
qst = qstr_find_strn(lex->vstr.buf, lex->vstr.len);
}
if (qst != MP_QSTR_NULL) {
// qstr exists, make a leaf node
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_STRING, qst);
} else {
// not interned, make a node holding a pointer to the string data
push_result_string(parser, mp_lexer_cur(lex)->src_line, tok->str, tok->len);
push_result_string(parser, lex->tok_line, lex->vstr.buf, lex->vstr.len);
return;
}
} else if (tok->kind == MP_TOKEN_BYTES) {
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_BYTES, qstr_from_strn(tok->str, tok->len));
} else if (lex->tok_kind == MP_TOKEN_BYTES) {
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_BYTES, qstr_from_strn(lex->vstr.buf, lex->vstr.len));
} else {
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_TOKEN, tok->kind);
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_TOKEN, lex->tok_kind);
}
push_result_node(parser, pn);
}
@ -414,7 +414,7 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, mp_p
case MP_PARSE_EVAL_INPUT: top_level_rule = RULE_eval_input; break;
default: top_level_rule = RULE_file_input;
}
push_rule(&parser, mp_lexer_cur(lex)->src_line, rules[top_level_rule], 0);
push_rule(&parser, lex->tok_line, rules[top_level_rule], 0);
// parse!
@ -454,8 +454,8 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, mp_p
for (; i < n - 1; ++i) {
switch (rule->arg[i] & RULE_ARG_KIND_MASK) {
case RULE_ARG_TOK:
if (mp_lexer_is_kind(lex, rule->arg[i] & RULE_ARG_ARG_MASK)) {
push_result_token(&parser, lex);
if (lex->tok_kind == (rule->arg[i] & RULE_ARG_ARG_MASK)) {
push_result_token(&parser);
mp_lexer_to_next(lex);
goto next_rule;
}
@ -469,8 +469,8 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, mp_p
}
}
if ((rule->arg[i] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
if (mp_lexer_is_kind(lex, rule->arg[i] & RULE_ARG_ARG_MASK)) {
push_result_token(&parser, lex);
if (lex->tok_kind == (rule->arg[i] & RULE_ARG_ARG_MASK)) {
push_result_token(&parser);
mp_lexer_to_next(lex);
} else {
backtrack = true;
@ -507,10 +507,10 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, mp_p
case RULE_ARG_TOK:
// need to match a token
tok_kind = rule->arg[i] & RULE_ARG_ARG_MASK;
if (mp_lexer_is_kind(lex, tok_kind)) {
if (lex->tok_kind == tok_kind) {
// matched token
if (tok_kind == MP_TOKEN_NAME) {
push_result_token(&parser, lex);
push_result_token(&parser);
}
mp_lexer_to_next(lex);
} else {
@ -657,11 +657,11 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, mp_p
mp_uint_t arg = rule->arg[i & 1 & n];
switch (arg & RULE_ARG_KIND_MASK) {
case RULE_ARG_TOK:
if (mp_lexer_is_kind(lex, arg & RULE_ARG_ARG_MASK)) {
if (lex->tok_kind == (arg & RULE_ARG_ARG_MASK)) {
if (i & 1 & n) {
// separators which are tokens are not pushed to result stack
} else {
push_result_token(&parser, lex);
push_result_token(&parser);
}
mp_lexer_to_next(lex);
// got element of list, so continue parsing list
@ -722,7 +722,7 @@ memory_error:
}
// check we are at the end of the token stream
if (!mp_lexer_is_kind(lex, MP_TOKEN_END)) {
if (lex->tok_kind != MP_TOKEN_END) {
goto syntax_error;
}
@ -745,9 +745,9 @@ finished:
return result;
syntax_error:
if (mp_lexer_is_kind(lex, MP_TOKEN_INDENT)) {
if (lex->tok_kind == MP_TOKEN_INDENT) {
*parse_error_kind_out = MP_PARSE_ERROR_UNEXPECTED_INDENT;
} else if (mp_lexer_is_kind(lex, MP_TOKEN_DEDENT_MISMATCH)) {
} else if (lex->tok_kind == MP_TOKEN_DEDENT_MISMATCH) {
*parse_error_kind_out = MP_PARSE_ERROR_UNMATCHED_UNINDENT;
} else {
*parse_error_kind_out = MP_PARSE_ERROR_INVALID_SYNTAX;
@ -755,7 +755,7 @@ syntax_error:
// debugging: print the rule name that failed and the token
printf("rule: %s\n", rule->rule_name);
#if MICROPY_DEBUG_PRINTERS
mp_token_show(mp_lexer_cur(lex));
mp_token_show(lex);
#endif
#endif
}

View File

@ -43,7 +43,7 @@
#define STR_INVALID_SYNTAX "invalid syntax"
void mp_parse_show_exception(mp_lexer_t *lex, mp_parse_error_kind_t parse_error_kind) {
printf(" File \"%s\", line " UINT_FMT ", column " UINT_FMT "\n", qstr_str(mp_lexer_source_name(lex)), mp_lexer_cur(lex)->src_line, mp_lexer_cur(lex)->src_column);
printf(" File \"%s\", line " UINT_FMT ", column " UINT_FMT "\n", qstr_str(lex->source_name), lex->tok_line, lex->tok_column);
switch (parse_error_kind) {
case MP_PARSE_ERROR_MEMORY:
printf("MemoryError: %s\n", STR_MEMORY);
@ -88,7 +88,7 @@ mp_obj_t mp_parse_make_exception(mp_lexer_t *lex, mp_parse_error_kind_t parse_er
// add traceback to give info about file name and location
// we don't have a 'block' name, so just pass the NULL qstr to indicate this
mp_obj_exception_add_traceback(exc, mp_lexer_source_name(lex), mp_lexer_cur(lex)->src_line, MP_QSTR_NULL);
mp_obj_exception_add_traceback(exc, lex->source_name, lex->tok_line, MP_QSTR_NULL);
return exc;
}

View File

@ -1258,7 +1258,7 @@ mp_obj_t mp_parse_compile_execute(mp_lexer_t *lex, mp_parse_input_kind_t parse_i
nlr_raise(exc);
}
qstr source_name = mp_lexer_source_name(lex);
qstr source_name = lex->source_name;
mp_lexer_free(lex);
// save context and set new context

View File

@ -66,7 +66,7 @@ STATIC int parse_compile_execute(mp_lexer_t *lex, mp_parse_input_kind_t input_ki
mp_parse_error_kind_t parse_error_kind;
mp_parse_node_t pn = mp_parse(lex, input_kind, &parse_error_kind);
qstr source_name = mp_lexer_source_name(lex);
qstr source_name = lex->source_name;
// check for parse error
if (pn == MP_PARSE_NODE_NULL) {

View File

@ -114,8 +114,8 @@ STATIC int execute_from_lexer(mp_lexer_t *lex, mp_parse_input_kind_t input_kind,
if (0) {
// just tokenise
while (!mp_lexer_is_kind(lex, MP_TOKEN_END)) {
mp_token_show(mp_lexer_cur(lex));
while (lex->tok_kind != MP_TOKEN_END) {
mp_lexer_show_token(lex);
mp_lexer_to_next(lex);
}
mp_lexer_free(lex);
@ -132,7 +132,7 @@ STATIC int execute_from_lexer(mp_lexer_t *lex, mp_parse_input_kind_t input_kind,
return 1;
}
qstr source_name = mp_lexer_source_name(lex);
qstr source_name = lex->source_name;
#if MICROPY_PY___FILE__
if (input_kind == MP_PARSE_FILE_INPUT) {
mp_store_global(MP_QSTR___file__, MP_OBJ_NEW_QSTR(source_name));