%{ #include "parser.h" #include "errors.h" #include #include #include #include #include #include #include /* Values to attach to the token */ typedef Token::LexAttribute YYSTYPE; YYSTYPE yylval; /* Buffer for literal strings */ #define LEX_STR_MAX 1023 static char STR_buffer[LEX_STR_MAX + 1]; /* Number of characters written to string in STR mode so far */ static int STR_len = 0; /* Input details for a single file being read */ struct Input { /* File path or a dummy string at top-level */ std::string filename; /* Current line */ int line; /* Whether the file is from REPL */ bool repl; /* Current parenthesis depth */ int expr_depth; /* Flex buffer (this is a pointer) */ YY_BUFFER_STATE buffer; }; /* Stack of queues of files being lexed. A new entry on the stack is added whenever the include (.) command is used. A queue is created with the its arguments, which are waiting to be read in sequence. */ static std::stack> lex_inputs; /* Input used in the last token, in case it is needed after the last command has been read but is still being executed */ static Input lex_last_used_input {}; #define YY_USER_ACTION \ if(!lex_idle()) lex_last_used_input = lex_current_input(); bool lex_idle() { return lex_inputs.size() == 0; } /* Current input. Throws when there is no current input */ static Input &lex_current_input() { return lex_inputs.top().front(); } std::string lex_last_used_file() { Input const &in = lex_last_used_input; return (in.repl ? "" : in.filename); } /* Push a new queue of inputs. */ static void lex_push(std::deque queue) { if(!queue.size()) return; lex_inputs.push(queue); yy_switch_to_buffer(lex_current_input().buffer); } /* Pop a single input when end-of-file is reached. Return whether to stop. */ static bool lex_pop() { if(!lex_inputs.size()) return true; auto &q = lex_inputs.top(); auto &front = q.front(); yy_delete_buffer(front.buffer); q.pop_front(); if(q.empty()) lex_inputs.pop(); if(!lex_inputs.size()) { return true; } else { yy_switch_to_buffer(lex_current_input().buffer); return false; } } /* Error messages and exceptions */ static void err(char const *format, ...) { static char buf[1024]; va_list args; va_start(args, format); vsnprintf(buf, 1024, format, args); va_end(args); Input const &in = lex_current_input(); throw SyntaxError(in.filename.c_str(), in.line, buf); } /* Parse numerical values */ long parse_num(char const *text) { /* Determine base */ int base = 10; if(text[0] == '0' && tolower(text[1]) == 'x') base = 16, text += 2; else if(text[0] == '0' && tolower(text[1]) == 'b') base = 2, text += 2; char *end; long val = strtoul(text, &end, base); if(*end == 'k') val <<= 10; if(*end == 'M') val <<= 20; if(*end == 'G') val <<= 30; return val; } %} %option prefix="shell" %option noyywrap %option nounput %x EXPR %x STR /* Used in error rules for word boundary violations */ letter [a-zA-Z0-9_.%] num_hex 0[xX][a-zA-Z0-9]+ num_dec (0d)?[0-9]+ num_bin 0[bB][0-1]+ num_suffix [kMG] num ({num_hex}|{num_dec}|{num_bin}){num_suffix}? syscall [%][a-fA-F0-9]+ option [a-zA-Z]+=[^ ]* symbol \.|\.?[a-zA-Z_][a-zA-Z0-9_.]* space [ \t]+ %% <*>"#"[^\n]* ; <*>{space} { return T::SPC; } <*>[\n] { lex_current_input().line++; lex_current_input().expr_depth = 0; BEGIN(INITIAL); return T::SEPARATOR; } <*>[;] { lex_current_input().expr_depth = 0; BEGIN(INITIAL); return T::SEPARATOR; } "+" { return '+'; } "-" { return '-'; } "*" { return '*'; } "/" { return '/'; } "%" { return '%'; } ")" { int d = std::max(lex_current_input().expr_depth - 1, 0); lex_current_input().expr_depth = d; if(d == 0) BEGIN(INITIAL); return ')'; } ">>" { return '>'; } "<<" { return '<'; } <*>"$" { return '$'; } <*>"(" { lex_current_input().expr_depth++; BEGIN(EXPR); return '('; } ":" { return ':'; } ".." { return '.'; } ["] { BEGIN(STR); STR_len = 0; } \" { BEGIN(INITIAL); STR_buffer[STR_len] = 0; yylval.STRING = STR_buffer; return T::STRING; } [^\\\n"]+ { int length = std::min(yyleng, LEX_STR_MAX - STR_len); memcpy(STR_buffer + STR_len, yytext, length); STR_len += length; } \\n { if(STR_len < LEX_STR_MAX) STR_buffer[STR_len++] = '\n'; } \\t { if(STR_len < LEX_STR_MAX) STR_buffer[STR_len++] = '\t'; } {option} { yylval.STRING = yytext; return T::OPTION; } <*>{syscall} { yylval.NUM = strtoul(yytext+1, NULL, 16); return T::SYSCALL; } <*>{num} { yylval.NUM = parse_num(yytext); return T::NUM; } <*>{symbol} { yylval.STRING = yytext; return T::SYMBOL; } <*>"?" { return '?'; } /* Generic error and word boundaries violations */ <*>{syscall}{letter} { err("invalid syscall number '%s'", yytext); } <*>{num}{letter} { err("invalid numerical value '%s'", yytext); } <*>. { err("invalid token near '%s'", yytext); } <> { if(lex_pop()) return T::END; } %% void lex_repl(std::string input) { /* yy_scan_bytes() switches buffer, but lex_push() will fix that */ Input in = { .filename = "", .line = 1, .repl = true, .expr_depth = 0, .buffer = yy_scan_bytes(input.c_str(), input.size()), }; lex_push({ in }); } void lex_include(std::vector paths) { std::deque ins; for(auto const &path: paths) { yyin = fopen(path.c_str(), "r"); if(!yyin) { FxOS_log(ERR, "cannot read '%s'", path); return; } ins.push_back({ .filename = path, .line = 1, .repl = false, .expr_depth = 0, .buffer = yy_create_buffer(yyin, YY_BUF_SIZE), }); } lex_push(ins); } Token lex_read() { Token t; t.type = yylex(); t.value.NUM = 0; t.value.STRING = ""; switch(t.type) { case T::SYSCALL: case T::NUM: t.value.NUM = yylval.NUM; break; case T::SYMBOL: case T::OPTION: case T::STRING: t.value.STRING = std::string(yylval.STRING); break; default: break; } return t; }