py/lexer: Make lexer use an mp_reader as its source.

This commit is contained in:
Damien George 2016-11-16 18:27:20 +11:00
parent 66d955c218
commit 5bdf1650de
3 changed files with 23 additions and 34 deletions

View File

@ -35,10 +35,11 @@ typedef struct _mp_lexer_str32_buf_t {
uint8_t byte_off;
} mp_lexer_str32_buf_t;
STATIC mp_uint_t str32_buf_next_byte(mp_lexer_str32_buf_t *sb) {
STATIC mp_uint_t str32_buf_next_byte(void *sb_in) {
mp_lexer_str32_buf_t *sb = (mp_lexer_str32_buf_t*)sb_in;
byte c = sb->val & 0xff;
if (c == 0) {
return MP_LEXER_EOF;
return MP_READER_EOF;
}
if (++sb->byte_off > 3) {
@ -51,7 +52,8 @@ STATIC mp_uint_t str32_buf_next_byte(mp_lexer_str32_buf_t *sb) {
return c;
}
STATIC void str32_buf_free(mp_lexer_str32_buf_t *sb) {
STATIC void str32_buf_free(void *sb_in) {
mp_lexer_str32_buf_t *sb = (mp_lexer_str32_buf_t*)sb_in;
m_del_obj(mp_lexer_str32_buf_t, sb);
}
@ -63,7 +65,8 @@ mp_lexer_t *mp_lexer_new_from_str32(qstr src_name, const char *str, mp_uint_t le
sb->byte_off = (uint32_t)str & 3;
sb->src_cur = (uint32_t*)(str - sb->byte_off);
sb->val = *sb->src_cur++ >> sb->byte_off * 8;
return mp_lexer_new(src_name, sb, (mp_lexer_stream_next_byte_t)str32_buf_next_byte, (mp_lexer_stream_close_t)str32_buf_free);
mp_reader_t reader = {sb, str32_buf_next_byte, str32_buf_free};
return mp_lexer_new(src_name, reader);
}
#endif // MICROPY_ENABLE_COMPILER

View File

@ -52,6 +52,7 @@ STATIC bool str_strn_equal(const char *str, const char *strn, mp_uint_t len) {
return i == len && *str == 0;
}
#define MP_LEXER_EOF ((unichar)MP_READER_EOF)
#define CUR_CHAR(lex) ((lex)->chr0)
STATIC bool is_end(mp_lexer_t *lex) {
@ -145,7 +146,7 @@ STATIC void next_char(mp_lexer_t *lex) {
lex->chr0 = lex->chr1;
lex->chr1 = lex->chr2;
lex->chr2 = lex->stream_next_byte(lex->stream_data);
lex->chr2 = lex->reader.readbyte(lex->reader.data);
if (lex->chr0 == '\r') {
// CR is a new line, converted to LF
@ -153,7 +154,7 @@ STATIC void next_char(mp_lexer_t *lex) {
if (lex->chr1 == '\n') {
// CR LF is a single new line
lex->chr1 = lex->chr2;
lex->chr2 = lex->stream_next_byte(lex->stream_data);
lex->chr2 = lex->reader.readbyte(lex->reader.data);
}
}
@ -689,21 +690,17 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, bool first_token) {
}
}
mp_lexer_t *mp_lexer_new(qstr src_name, void *stream_data, mp_lexer_stream_next_byte_t stream_next_byte, mp_lexer_stream_close_t stream_close) {
mp_lexer_t *mp_lexer_new(qstr src_name, mp_reader_t reader) {
mp_lexer_t *lex = m_new_obj_maybe(mp_lexer_t);
// check for memory allocation error
if (lex == NULL) {
if (stream_close) {
stream_close(stream_data);
}
reader.close(reader.data);
return NULL;
}
lex->source_name = src_name;
lex->stream_data = stream_data;
lex->stream_next_byte = stream_next_byte;
lex->stream_close = stream_close;
lex->reader = reader;
lex->line = 1;
lex->column = 1;
lex->emit_dent = 0;
@ -724,9 +721,9 @@ mp_lexer_t *mp_lexer_new(qstr src_name, void *stream_data, mp_lexer_stream_next_
lex->indent_level[0] = 0;
// preload characters
lex->chr0 = stream_next_byte(stream_data);
lex->chr1 = stream_next_byte(stream_data);
lex->chr2 = stream_next_byte(stream_data);
lex->chr0 = reader.readbyte(reader.data);
lex->chr1 = reader.readbyte(reader.data);
lex->chr2 = reader.readbyte(reader.data);
// if input stream is 0, 1 or 2 characters long and doesn't end in a newline, then insert a newline at the end
if (lex->chr0 == MP_LEXER_EOF) {
@ -756,7 +753,7 @@ mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, mp_uint_t
if (!mp_reader_new_mem(&reader, (const byte*)str, len, free_len)) {
return NULL;
}
return mp_lexer_new(src_name, reader.data, (mp_lexer_stream_next_byte_t)reader.readbyte, (mp_lexer_stream_close_t)reader.close);
return mp_lexer_new(src_name, reader);
}
#if MICROPY_READER_POSIX || MICROPY_READER_FATFS
@ -767,7 +764,7 @@ mp_lexer_t *mp_lexer_new_from_file(const char *filename) {
if (ret != 0) {
return NULL;
}
return mp_lexer_new(qstr_from_str(filename), reader.data, (mp_lexer_stream_next_byte_t)reader.readbyte, (mp_lexer_stream_close_t)reader.close);
return mp_lexer_new(qstr_from_str(filename), reader);
}
#if MICROPY_HELPER_LEXER_UNIX
@ -778,7 +775,7 @@ mp_lexer_t *mp_lexer_new_from_fd(qstr filename, int fd, bool close_fd) {
if (ret != 0) {
return NULL;
}
return mp_lexer_new(filename, reader.data, (mp_lexer_stream_next_byte_t)reader.readbyte, (mp_lexer_stream_close_t)reader.close);
return mp_lexer_new(filename, reader);
}
#endif
@ -787,9 +784,7 @@ mp_lexer_t *mp_lexer_new_from_fd(qstr filename, int fd, bool close_fd) {
void mp_lexer_free(mp_lexer_t *lex) {
if (lex) {
if (lex->stream_close) {
lex->stream_close(lex->stream_data);
}
lex->reader.close(lex->reader.data);
vstr_clear(&lex->vstr);
m_del(uint16_t, lex->indent_level, lex->alloc_indent_level);
m_del_obj(mp_lexer_t, lex);

View File

@ -30,6 +30,7 @@
#include "py/mpconfig.h"
#include "py/qstr.h"
#include "py/reader.h"
/* lexer.h -- simple tokeniser for Micro Python
*
@ -142,21 +143,11 @@ typedef enum _mp_token_kind_t {
MP_TOKEN_DEL_MINUS_MORE,
} mp_token_kind_t;
// the next-byte function must return the next byte in the stream
// it must return MP_LEXER_EOF if end of stream
// it can be called again after returning MP_LEXER_EOF, and in that case must return MP_LEXER_EOF
#define MP_LEXER_EOF ((unichar)(-1))
typedef mp_uint_t (*mp_lexer_stream_next_byte_t)(void*);
typedef void (*mp_lexer_stream_close_t)(void*);
// this data structure is exposed for efficiency
// public members are: source_name, tok_line, tok_column, tok_kind, vstr
typedef struct _mp_lexer_t {
qstr source_name; // name of source
void *stream_data; // data for stream
mp_lexer_stream_next_byte_t stream_next_byte; // stream callback to get next byte
mp_lexer_stream_close_t stream_close; // stream callback to free
mp_reader_t reader; // stream source
unichar chr0, chr1, chr2; // current cached characters from source
@ -176,7 +167,7 @@ typedef struct _mp_lexer_t {
vstr_t vstr; // token data
} mp_lexer_t;
mp_lexer_t *mp_lexer_new(qstr src_name, void *stream_data, mp_lexer_stream_next_byte_t stream_next_byte, mp_lexer_stream_close_t stream_close);
mp_lexer_t *mp_lexer_new(qstr src_name, mp_reader_t reader);
mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, mp_uint_t len, mp_uint_t free_len);
void mp_lexer_free(mp_lexer_t *lex);