From a4c52c5a3d19b5527023fedfaae96cb717d03802 Mon Sep 17 00:00:00 2001 From: Damien George Date: Fri, 5 Dec 2014 19:35:18 +0000 Subject: [PATCH] py: Optimise lexer by exposing lexer type. mp_lexer_t type is exposed, mp_token_t type is removed, and simple lexer functions (like checking current token kind) are now inlined. This saves 784 bytes ROM on 32-bit unix, 348 bytes on stmhal, and 460 bytes on bare-arm. It also saves a tiny bit of RAM since mp_lexer_t is a bit smaller. Also will run a bit more efficiently. --- bare-arm/main.c | 2 +- py/builtinimport.c | 2 +- py/lexer.c | 155 +++++++++++++++++---------------------------- py/lexer.h | 42 +++++++----- py/lexerstr.c | 2 + py/lexerunix.c | 1 + py/parse.c | 58 ++++++++--------- py/parsehelper.c | 4 +- py/runtime.c | 2 +- stmhal/pyexec.c | 2 +- unix/main.c | 6 +- 11 files changed, 123 insertions(+), 153 deletions(-) diff --git a/bare-arm/main.c b/bare-arm/main.c index 476a08ba0..3c187e5fb 100644 --- a/bare-arm/main.c +++ b/bare-arm/main.c @@ -32,7 +32,7 @@ void do_str(const char *src) { } // parse okay - qstr source_name = mp_lexer_source_name(lex); + qstr source_name = lex->source_name; mp_lexer_free(lex); mp_obj_t module_fun = mp_compile(pn, source_name, MP_EMIT_OPT_NONE, true); diff --git a/py/builtinimport.c b/py/builtinimport.c index c96a7d4ae..2910f8d97 100644 --- a/py/builtinimport.c +++ b/py/builtinimport.c @@ -127,7 +127,7 @@ STATIC void do_load(mp_obj_t module_obj, vstr_t *file) { } #if MICROPY_PY___FILE__ - qstr source_name = mp_lexer_source_name(lex); + qstr source_name = lex->source_name; mp_store_attr(module_obj, MP_QSTR___file__, MP_OBJ_NEW_QSTR(source_name)); #endif diff --git a/py/lexer.c b/py/lexer.c index a9444645a..a93d8ad0d 100644 --- a/py/lexer.c +++ b/py/lexer.c @@ -42,32 +42,10 @@ // TODO seems that CPython allows NULL byte in the input stream // don't know if that's intentional or not, but we don't allow it -struct _mp_lexer_t { - qstr source_name; // name of source - void *stream_data; // data for stream - mp_lexer_stream_next_byte_t stream_next_byte; // stream callback to get next byte - mp_lexer_stream_close_t stream_close; // stream callback to free - - unichar chr0, chr1, chr2; // current cached characters from source - - mp_uint_t line; // source line - mp_uint_t column; // source column - - mp_int_t emit_dent; // non-zero when there are INDENT/DEDENT tokens to emit - mp_int_t nested_bracket_level; // >0 when there are nested brackets over multiple lines - - mp_uint_t alloc_indent_level; - mp_uint_t num_indent_level; - uint16_t *indent_level; - - vstr_t vstr; - mp_token_t tok_cur; -}; - mp_uint_t mp_optimise_value; // TODO replace with a call to a standard function -bool str_strn_equal(const char *str, const char *strn, mp_uint_t len) { +STATIC bool str_strn_equal(const char *str, const char *strn, mp_uint_t len) { mp_uint_t i = 0; while (i < len && *str == *strn) { @@ -79,27 +57,6 @@ bool str_strn_equal(const char *str, const char *strn, mp_uint_t len) { return i == len && *str == 0; } -#ifdef MICROPY_DEBUG_PRINTERS -void mp_token_show(const mp_token_t *tok) { - printf("(" UINT_FMT ":" UINT_FMT ") kind:%u str:%p len:" UINT_FMT, tok->src_line, tok->src_column, tok->kind, tok->str, tok->len); - if (tok->str != NULL && tok->len > 0) { - const byte *i = (const byte *)tok->str; - const byte *j = (const byte *)i + tok->len; - printf(" "); - while (i < j) { - unichar c = utf8_get_char(i); - i = utf8_next_char(i); - if (unichar_isprint(c)) { - printf("%c", c); - } else { - printf("?"); - } - } - } - printf("\n"); -} -#endif - #define CUR_CHAR(lex) ((lex)->chr0) STATIC bool is_end(mp_lexer_t *lex) { @@ -210,7 +167,7 @@ STATIC void next_char(mp_lexer_t *lex) { } } -void indent_push(mp_lexer_t *lex, mp_uint_t indent) { +STATIC void indent_push(mp_lexer_t *lex, mp_uint_t indent) { if (lex->num_indent_level >= lex->alloc_indent_level) { // TODO use m_renew_maybe and somehow indicate an error if it fails... probably by using MP_TOKEN_MEMORY_ERROR lex->indent_level = m_renew(uint16_t, lex->indent_level, lex->alloc_indent_level, lex->alloc_indent_level + MICROPY_ALLOC_LEXEL_INDENT_INC); @@ -219,11 +176,11 @@ void indent_push(mp_lexer_t *lex, mp_uint_t indent) { lex->indent_level[lex->num_indent_level++] = indent; } -mp_uint_t indent_top(mp_lexer_t *lex) { +STATIC mp_uint_t indent_top(mp_lexer_t *lex) { return lex->indent_level[lex->num_indent_level - 1]; } -void indent_pop(mp_lexer_t *lex) { +STATIC void indent_pop(mp_lexer_t *lex) { lex->num_indent_level -= 1; } @@ -335,7 +292,10 @@ STATIC bool get_hex(mp_lexer_t *lex, mp_uint_t num_digits, mp_uint_t *result) { return true; } -STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool first_token) { +STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, bool first_token) { + // start new token text + vstr_reset(&lex->vstr); + // skip white space and comments bool had_physical_newline = false; while (!is_end(lex)) { @@ -355,12 +315,9 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs next_char(lex); if (!is_physical_newline(lex)) { // SyntaxError: unexpected character after line continuation character - tok->src_line = lex->line; - tok->src_column = lex->column; - tok->kind = MP_TOKEN_BAD_LINE_CONTINUATION; - vstr_reset(&lex->vstr); - tok->str = vstr_str(&lex->vstr); - tok->len = 0; + lex->tok_line = lex->line; + lex->tok_column = lex->column; + lex->tok_kind = MP_TOKEN_BAD_LINE_CONTINUATION; return; } else { next_char(lex); @@ -371,29 +328,26 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs } // set token source information - tok->src_line = lex->line; - tok->src_column = lex->column; - - // start new token text - vstr_reset(&lex->vstr); + lex->tok_line = lex->line; + lex->tok_column = lex->column; if (first_token && lex->line == 1 && lex->column != 1) { // check that the first token is in the first column // if first token is not on first line, we get a physical newline and // this check is done as part of normal indent/dedent checking below // (done to get equivalence with CPython) - tok->kind = MP_TOKEN_INDENT; + lex->tok_kind = MP_TOKEN_INDENT; } else if (lex->emit_dent < 0) { - tok->kind = MP_TOKEN_DEDENT; + lex->tok_kind = MP_TOKEN_DEDENT; lex->emit_dent += 1; } else if (lex->emit_dent > 0) { - tok->kind = MP_TOKEN_INDENT; + lex->tok_kind = MP_TOKEN_INDENT; lex->emit_dent -= 1; } else if (had_physical_newline && lex->nested_bracket_level == 0) { - tok->kind = MP_TOKEN_NEWLINE; + lex->tok_kind = MP_TOKEN_NEWLINE; mp_uint_t num_spaces = lex->column - 1; lex->emit_dent = 0; @@ -407,20 +361,20 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs lex->emit_dent -= 1; } if (num_spaces != indent_top(lex)) { - tok->kind = MP_TOKEN_DEDENT_MISMATCH; + lex->tok_kind = MP_TOKEN_DEDENT_MISMATCH; } } } else if (is_end(lex)) { if (indent_top(lex) > 0) { - tok->kind = MP_TOKEN_NEWLINE; + lex->tok_kind = MP_TOKEN_NEWLINE; lex->emit_dent = 0; while (indent_top(lex) > 0) { indent_pop(lex); lex->emit_dent -= 1; } } else { - tok->kind = MP_TOKEN_END; + lex->tok_kind = MP_TOKEN_END; } } else if (is_char_or(lex, '\'', '\"') @@ -451,9 +405,9 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs // set token kind if (is_bytes) { - tok->kind = MP_TOKEN_BYTES; + lex->tok_kind = MP_TOKEN_BYTES; } else { - tok->kind = MP_TOKEN_STRING; + lex->tok_kind = MP_TOKEN_STRING; } // get first quoting character @@ -566,14 +520,14 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs // check we got the required end quotes if (n_closing < num_quotes) { - tok->kind = MP_TOKEN_LONELY_STRING_OPEN; + lex->tok_kind = MP_TOKEN_LONELY_STRING_OPEN; } // cut off the end quotes from the token text vstr_cut_tail_bytes(&lex->vstr, n_closing); } else if (is_head_of_identifier(lex)) { - tok->kind = MP_TOKEN_NAME; + lex->tok_kind = MP_TOKEN_NAME; // get first char vstr_add_char(&lex->vstr, CUR_CHAR(lex)); @@ -586,7 +540,7 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs } } else if (is_digit(lex) || (is_char(lex, '.') && is_following_digit(lex))) { - tok->kind = MP_TOKEN_NUMBER; + lex->tok_kind = MP_TOKEN_NUMBER; // get first char vstr_add_char(&lex->vstr, CUR_CHAR(lex)); @@ -621,9 +575,9 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs vstr_add_char(&lex->vstr, '.'); next_char(lex); next_char(lex); - tok->kind = MP_TOKEN_ELLIPSIS; + lex->tok_kind = MP_TOKEN_ELLIPSIS; } else { - tok->kind = MP_TOKEN_DEL_PERIOD; + lex->tok_kind = MP_TOKEN_DEL_PERIOD; } } else { @@ -645,7 +599,7 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs if (*t == 0) { // didn't match any delimiter or operator characters - tok->kind = MP_TOKEN_INVALID; + lex->tok_kind = MP_TOKEN_INVALID; } else { // matched a delimiter or operator character @@ -670,7 +624,7 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs next_char(lex); tok_enc_index = t_index; } else { - tok->kind = MP_TOKEN_INVALID; + lex->tok_kind = MP_TOKEN_INVALID; goto tok_enc_no_match; } break; @@ -692,37 +646,33 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs } // set token kind - tok->kind = tok_enc_kind[tok_enc_index]; + lex->tok_kind = tok_enc_kind[tok_enc_index]; tok_enc_no_match: // compute bracket level for implicit line joining - if (tok->kind == MP_TOKEN_DEL_PAREN_OPEN || tok->kind == MP_TOKEN_DEL_BRACKET_OPEN || tok->kind == MP_TOKEN_DEL_BRACE_OPEN) { + if (lex->tok_kind == MP_TOKEN_DEL_PAREN_OPEN || lex->tok_kind == MP_TOKEN_DEL_BRACKET_OPEN || lex->tok_kind == MP_TOKEN_DEL_BRACE_OPEN) { lex->nested_bracket_level += 1; - } else if (tok->kind == MP_TOKEN_DEL_PAREN_CLOSE || tok->kind == MP_TOKEN_DEL_BRACKET_CLOSE || tok->kind == MP_TOKEN_DEL_BRACE_CLOSE) { + } else if (lex->tok_kind == MP_TOKEN_DEL_PAREN_CLOSE || lex->tok_kind == MP_TOKEN_DEL_BRACKET_CLOSE || lex->tok_kind == MP_TOKEN_DEL_BRACE_CLOSE) { lex->nested_bracket_level -= 1; } } } - // point token text to vstr buffer - tok->str = vstr_str(&lex->vstr); - tok->len = vstr_len(&lex->vstr); - // check for keywords - if (tok->kind == MP_TOKEN_NAME) { + if (lex->tok_kind == MP_TOKEN_NAME) { // We check for __debug__ here and convert it to its value. This is so // the parser gives a syntax error on, eg, x.__debug__. Otherwise, we // need to check for this special token in many places in the compiler. // TODO improve speed of these string comparisons //for (mp_int_t i = 0; tok_kw[i] != NULL; i++) { for (mp_int_t i = 0; i < MP_ARRAY_SIZE(tok_kw); i++) { - if (str_strn_equal(tok_kw[i], tok->str, tok->len)) { + if (str_strn_equal(tok_kw[i], lex->vstr.buf, lex->vstr.len)) { if (i == MP_ARRAY_SIZE(tok_kw) - 1) { // tok_kw[MP_ARRAY_SIZE(tok_kw) - 1] == "__debug__" - tok->kind = (mp_optimise_value == 0 ? MP_TOKEN_KW_TRUE : MP_TOKEN_KW_FALSE); + lex->tok_kind = (mp_optimise_value == 0 ? MP_TOKEN_KW_TRUE : MP_TOKEN_KW_FALSE); } else { - tok->kind = MP_TOKEN_KW_FALSE + i; + lex->tok_kind = MP_TOKEN_KW_FALSE + i; } break; } @@ -782,7 +732,7 @@ mp_lexer_t *mp_lexer_new(qstr src_name, void *stream_data, mp_lexer_stream_next_ } // preload first token - mp_lexer_next_token_into(lex, &lex->tok_cur, true); + mp_lexer_next_token_into(lex, true); return lex; } @@ -798,18 +748,27 @@ void mp_lexer_free(mp_lexer_t *lex) { } } -qstr mp_lexer_source_name(mp_lexer_t *lex) { - return lex->source_name; -} - void mp_lexer_to_next(mp_lexer_t *lex) { - mp_lexer_next_token_into(lex, &lex->tok_cur, false); + mp_lexer_next_token_into(lex, false); } -const mp_token_t *mp_lexer_cur(const mp_lexer_t *lex) { - return &lex->tok_cur; -} - -bool mp_lexer_is_kind(mp_lexer_t *lex, mp_token_kind_t kind) { - return lex->tok_cur.kind == kind; +#if MICROPY_DEBUG_PRINTERS +void mp_lexer_show_token(const mp_lexer_t *lex) { + printf("(" UINT_FMT ":" UINT_FMT ") kind:%u str:%p len:%u", lex->tok_line, lex->tok_column, lex->tok_kind, lex->vstr.buf, lex->vstr.len); + if (lex->vstr.len > 0) { + const byte *i = (const byte *)lex->vstr.buf; + const byte *j = (const byte *)i + lex->vstr.len; + printf(" "); + while (i < j) { + unichar c = utf8_get_char(i); + i = utf8_next_char(i); + if (unichar_isprint(c)) { + printf("%c", c); + } else { + printf("?"); + } + } + } + printf("\n"); } +#endif diff --git a/py/lexer.h b/py/lexer.h index d70735f6d..c2f621d4c 100644 --- a/py/lexer.h +++ b/py/lexer.h @@ -130,15 +130,6 @@ typedef enum _mp_token_kind_t { MP_TOKEN_DEL_MINUS_MORE, } mp_token_kind_t; -typedef struct _mp_token_t { - mp_uint_t src_line; // source line - mp_uint_t src_column; // source column - - mp_token_kind_t kind; // kind of token - const char *str; // string of token (valid only while this token is current token) - mp_uint_t len; // (byte) length of string of token -} mp_token_t; - // the next-byte function must return the next byte in the stream // it must return MP_LEXER_EOF if end of stream // it can be called again after returning MP_LEXER_EOF, and in that case must return MP_LEXER_EOF @@ -146,21 +137,38 @@ typedef struct _mp_token_t { typedef mp_uint_t (*mp_lexer_stream_next_byte_t)(void*); typedef void (*mp_lexer_stream_close_t)(void*); -typedef struct _mp_lexer_t mp_lexer_t; +// this data structure is exposed for efficiency +// public members are: source_name, tok_line, tok_column, tok_kind, vstr +typedef struct _mp_lexer_t { + qstr source_name; // name of source + void *stream_data; // data for stream + mp_lexer_stream_next_byte_t stream_next_byte; // stream callback to get next byte + mp_lexer_stream_close_t stream_close; // stream callback to free -void mp_token_show(const mp_token_t *tok); + unichar chr0, chr1, chr2; // current cached characters from source + + mp_uint_t line; // current source line + mp_uint_t column; // current source column + + mp_int_t emit_dent; // non-zero when there are INDENT/DEDENT tokens to emit + mp_int_t nested_bracket_level; // >0 when there are nested brackets over multiple lines + + mp_uint_t alloc_indent_level; + mp_uint_t num_indent_level; + uint16_t *indent_level; + + mp_uint_t tok_line; // token source line + mp_uint_t tok_column; // token source column + mp_token_kind_t tok_kind; // token kind + vstr_t vstr; // token data +} mp_lexer_t; mp_lexer_t *mp_lexer_new(qstr src_name, void *stream_data, mp_lexer_stream_next_byte_t stream_next_byte, mp_lexer_stream_close_t stream_close); mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, mp_uint_t len, mp_uint_t free_len); void mp_lexer_free(mp_lexer_t *lex); -qstr mp_lexer_source_name(mp_lexer_t *lex); void mp_lexer_to_next(mp_lexer_t *lex); -const mp_token_t *mp_lexer_cur(const mp_lexer_t *lex); -bool mp_lexer_is_kind(mp_lexer_t *lex, mp_token_kind_t kind); - -bool mp_lexer_show_error_pythonic_prefix(mp_lexer_t *lex); -bool mp_lexer_show_error_pythonic(mp_lexer_t *lex, const char *msg); +void mp_lexer_show_token(const mp_lexer_t *lex); /******************************************************************/ // platform specific import function; must be implemented for a specific port diff --git a/py/lexerstr.c b/py/lexerstr.c index a1f7ce41d..c3456b9bd 100644 --- a/py/lexerstr.c +++ b/py/lexerstr.c @@ -24,6 +24,8 @@ * THE SOFTWARE. */ +#include + #include "mpconfig.h" #include "misc.h" #include "qstr.h" diff --git a/py/lexerunix.c b/py/lexerunix.c index 52eac9eda..8e3241ad0 100644 --- a/py/lexerunix.c +++ b/py/lexerunix.c @@ -30,6 +30,7 @@ #if MICROPY_HELPER_LEXER_UNIX #include +#include #include #include #include diff --git a/py/parse.c b/py/parse.c index 092f6e65f..6912920e3 100644 --- a/py/parse.c +++ b/py/parse.c @@ -155,7 +155,7 @@ STATIC void push_rule_from_arg(parser_t *parser, mp_uint_t arg) { assert((arg & RULE_ARG_KIND_MASK) == RULE_ARG_RULE || (arg & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE); mp_uint_t rule_id = arg & RULE_ARG_ARG_MASK; assert(rule_id < RULE_maximum_number_of); - push_rule(parser, mp_lexer_cur(parser->lexer)->src_line, rules[rule_id], 0); + push_rule(parser, parser->lexer->tok_line, rules[rule_id], 0); } STATIC void pop_rule(parser_t *parser, const rule_t **rule, mp_uint_t *arg_i, mp_uint_t *src_line) { @@ -298,17 +298,17 @@ STATIC void push_result_string(parser_t *parser, mp_uint_t src_line, const char push_result_node(parser, (mp_parse_node_t)pn); } -STATIC void push_result_token(parser_t *parser, const mp_lexer_t *lex) { - const mp_token_t *tok = mp_lexer_cur(lex); +STATIC void push_result_token(parser_t *parser) { mp_parse_node_t pn; - if (tok->kind == MP_TOKEN_NAME) { - pn = mp_parse_node_new_leaf(MP_PARSE_NODE_ID, qstr_from_strn(tok->str, tok->len)); - } else if (tok->kind == MP_TOKEN_NUMBER) { + mp_lexer_t *lex = parser->lexer; + if (lex->tok_kind == MP_TOKEN_NAME) { + pn = mp_parse_node_new_leaf(MP_PARSE_NODE_ID, qstr_from_strn(lex->vstr.buf, lex->vstr.len)); + } else if (lex->tok_kind == MP_TOKEN_NUMBER) { bool dec = false; bool small_int = true; mp_int_t int_val = 0; - mp_uint_t len = tok->len; - const char *str = tok->str; + mp_uint_t len = lex->vstr.len; + const char *str = lex->vstr.buf; mp_uint_t base = 0; mp_uint_t i = mp_parse_num_base(str, len, &base); bool overflow = false; @@ -343,29 +343,29 @@ STATIC void push_result_token(parser_t *parser, const mp_lexer_t *lex) { } else { pn = mp_parse_node_new_leaf(MP_PARSE_NODE_INTEGER, qstr_from_strn(str, len)); } - } else if (tok->kind == MP_TOKEN_STRING) { + } else if (lex->tok_kind == MP_TOKEN_STRING) { // Don't automatically intern all strings. doc strings (which are usually large) // will be discarded by the compiler, and so we shouldn't intern them. qstr qst = MP_QSTR_NULL; - if (tok->len <= MICROPY_ALLOC_PARSE_INTERN_STRING_LEN) { + if (lex->vstr.len <= MICROPY_ALLOC_PARSE_INTERN_STRING_LEN) { // intern short strings - qst = qstr_from_strn(tok->str, tok->len); + qst = qstr_from_strn(lex->vstr.buf, lex->vstr.len); } else { // check if this string is already interned - qst = qstr_find_strn(tok->str, tok->len); + qst = qstr_find_strn(lex->vstr.buf, lex->vstr.len); } if (qst != MP_QSTR_NULL) { // qstr exists, make a leaf node pn = mp_parse_node_new_leaf(MP_PARSE_NODE_STRING, qst); } else { // not interned, make a node holding a pointer to the string data - push_result_string(parser, mp_lexer_cur(lex)->src_line, tok->str, tok->len); + push_result_string(parser, lex->tok_line, lex->vstr.buf, lex->vstr.len); return; } - } else if (tok->kind == MP_TOKEN_BYTES) { - pn = mp_parse_node_new_leaf(MP_PARSE_NODE_BYTES, qstr_from_strn(tok->str, tok->len)); + } else if (lex->tok_kind == MP_TOKEN_BYTES) { + pn = mp_parse_node_new_leaf(MP_PARSE_NODE_BYTES, qstr_from_strn(lex->vstr.buf, lex->vstr.len)); } else { - pn = mp_parse_node_new_leaf(MP_PARSE_NODE_TOKEN, tok->kind); + pn = mp_parse_node_new_leaf(MP_PARSE_NODE_TOKEN, lex->tok_kind); } push_result_node(parser, pn); } @@ -414,7 +414,7 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, mp_p case MP_PARSE_EVAL_INPUT: top_level_rule = RULE_eval_input; break; default: top_level_rule = RULE_file_input; } - push_rule(&parser, mp_lexer_cur(lex)->src_line, rules[top_level_rule], 0); + push_rule(&parser, lex->tok_line, rules[top_level_rule], 0); // parse! @@ -454,8 +454,8 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, mp_p for (; i < n - 1; ++i) { switch (rule->arg[i] & RULE_ARG_KIND_MASK) { case RULE_ARG_TOK: - if (mp_lexer_is_kind(lex, rule->arg[i] & RULE_ARG_ARG_MASK)) { - push_result_token(&parser, lex); + if (lex->tok_kind == (rule->arg[i] & RULE_ARG_ARG_MASK)) { + push_result_token(&parser); mp_lexer_to_next(lex); goto next_rule; } @@ -469,8 +469,8 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, mp_p } } if ((rule->arg[i] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) { - if (mp_lexer_is_kind(lex, rule->arg[i] & RULE_ARG_ARG_MASK)) { - push_result_token(&parser, lex); + if (lex->tok_kind == (rule->arg[i] & RULE_ARG_ARG_MASK)) { + push_result_token(&parser); mp_lexer_to_next(lex); } else { backtrack = true; @@ -507,10 +507,10 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, mp_p case RULE_ARG_TOK: // need to match a token tok_kind = rule->arg[i] & RULE_ARG_ARG_MASK; - if (mp_lexer_is_kind(lex, tok_kind)) { + if (lex->tok_kind == tok_kind) { // matched token if (tok_kind == MP_TOKEN_NAME) { - push_result_token(&parser, lex); + push_result_token(&parser); } mp_lexer_to_next(lex); } else { @@ -657,11 +657,11 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, mp_p mp_uint_t arg = rule->arg[i & 1 & n]; switch (arg & RULE_ARG_KIND_MASK) { case RULE_ARG_TOK: - if (mp_lexer_is_kind(lex, arg & RULE_ARG_ARG_MASK)) { + if (lex->tok_kind == (arg & RULE_ARG_ARG_MASK)) { if (i & 1 & n) { // separators which are tokens are not pushed to result stack } else { - push_result_token(&parser, lex); + push_result_token(&parser); } mp_lexer_to_next(lex); // got element of list, so continue parsing list @@ -722,7 +722,7 @@ memory_error: } // check we are at the end of the token stream - if (!mp_lexer_is_kind(lex, MP_TOKEN_END)) { + if (lex->tok_kind != MP_TOKEN_END) { goto syntax_error; } @@ -745,9 +745,9 @@ finished: return result; syntax_error: - if (mp_lexer_is_kind(lex, MP_TOKEN_INDENT)) { + if (lex->tok_kind == MP_TOKEN_INDENT) { *parse_error_kind_out = MP_PARSE_ERROR_UNEXPECTED_INDENT; - } else if (mp_lexer_is_kind(lex, MP_TOKEN_DEDENT_MISMATCH)) { + } else if (lex->tok_kind == MP_TOKEN_DEDENT_MISMATCH) { *parse_error_kind_out = MP_PARSE_ERROR_UNMATCHED_UNINDENT; } else { *parse_error_kind_out = MP_PARSE_ERROR_INVALID_SYNTAX; @@ -755,7 +755,7 @@ syntax_error: // debugging: print the rule name that failed and the token printf("rule: %s\n", rule->rule_name); #if MICROPY_DEBUG_PRINTERS - mp_token_show(mp_lexer_cur(lex)); + mp_token_show(lex); #endif #endif } diff --git a/py/parsehelper.c b/py/parsehelper.c index a6c54e8fc..f30471067 100644 --- a/py/parsehelper.c +++ b/py/parsehelper.c @@ -43,7 +43,7 @@ #define STR_INVALID_SYNTAX "invalid syntax" void mp_parse_show_exception(mp_lexer_t *lex, mp_parse_error_kind_t parse_error_kind) { - printf(" File \"%s\", line " UINT_FMT ", column " UINT_FMT "\n", qstr_str(mp_lexer_source_name(lex)), mp_lexer_cur(lex)->src_line, mp_lexer_cur(lex)->src_column); + printf(" File \"%s\", line " UINT_FMT ", column " UINT_FMT "\n", qstr_str(lex->source_name), lex->tok_line, lex->tok_column); switch (parse_error_kind) { case MP_PARSE_ERROR_MEMORY: printf("MemoryError: %s\n", STR_MEMORY); @@ -88,7 +88,7 @@ mp_obj_t mp_parse_make_exception(mp_lexer_t *lex, mp_parse_error_kind_t parse_er // add traceback to give info about file name and location // we don't have a 'block' name, so just pass the NULL qstr to indicate this - mp_obj_exception_add_traceback(exc, mp_lexer_source_name(lex), mp_lexer_cur(lex)->src_line, MP_QSTR_NULL); + mp_obj_exception_add_traceback(exc, lex->source_name, lex->tok_line, MP_QSTR_NULL); return exc; } diff --git a/py/runtime.c b/py/runtime.c index c0ae4726f..463e325d2 100644 --- a/py/runtime.c +++ b/py/runtime.c @@ -1258,7 +1258,7 @@ mp_obj_t mp_parse_compile_execute(mp_lexer_t *lex, mp_parse_input_kind_t parse_i nlr_raise(exc); } - qstr source_name = mp_lexer_source_name(lex); + qstr source_name = lex->source_name; mp_lexer_free(lex); // save context and set new context diff --git a/stmhal/pyexec.c b/stmhal/pyexec.c index 6bf8009a1..36a496aa1 100644 --- a/stmhal/pyexec.c +++ b/stmhal/pyexec.c @@ -66,7 +66,7 @@ STATIC int parse_compile_execute(mp_lexer_t *lex, mp_parse_input_kind_t input_ki mp_parse_error_kind_t parse_error_kind; mp_parse_node_t pn = mp_parse(lex, input_kind, &parse_error_kind); - qstr source_name = mp_lexer_source_name(lex); + qstr source_name = lex->source_name; // check for parse error if (pn == MP_PARSE_NODE_NULL) { diff --git a/unix/main.c b/unix/main.c index 2ade40b18..6733bbdda 100644 --- a/unix/main.c +++ b/unix/main.c @@ -114,8 +114,8 @@ STATIC int execute_from_lexer(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, if (0) { // just tokenise - while (!mp_lexer_is_kind(lex, MP_TOKEN_END)) { - mp_token_show(mp_lexer_cur(lex)); + while (lex->tok_kind != MP_TOKEN_END) { + mp_lexer_show_token(lex); mp_lexer_to_next(lex); } mp_lexer_free(lex); @@ -132,7 +132,7 @@ STATIC int execute_from_lexer(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, return 1; } - qstr source_name = mp_lexer_source_name(lex); + qstr source_name = lex->source_name; #if MICROPY_PY___FILE__ if (input_kind == MP_PARSE_FILE_INPUT) { mp_store_global(MP_QSTR___file__, MP_OBJ_NEW_QSTR(source_name));