fxos/shell/lexer.l

262 lines
6.1 KiB
Plaintext

%{
#include "parser.h"
#include "errors.h"
#include <string>
#include <cstdarg>
#include <cctype>
#include <stack>
#include <deque>
#include <fmt/core.h>
#include <fxos/util/log.h>
/* Values to attach to the token */
typedef Token::LexAttribute YYSTYPE;
YYSTYPE yylval;
/* Buffer for literal strings */
#define LEX_STR_MAX 1023
static char STR_buffer[LEX_STR_MAX + 1];
/* Number of characters written to string in STR mode so far */
static int STR_len = 0;
/* Input details for a single file being read */
struct Input {
/* File path or a dummy string at top-level */
std::string filename;
/* Current line */
int line;
/* Whether the file is from REPL */
bool repl;
/* Current parenthesis depth */
int expr_depth;
/* Flex buffer (this is a pointer) */
YY_BUFFER_STATE buffer;
};
/* Stack of queues of files being lexed. A new entry on the stack is added
whenever the include (.) command is used. A queue is created with the
its arguments, which are waiting to be read in sequence. */
static std::stack<std::deque<Input>> lex_inputs;
/* Input used in the last token, in case it is needed after the last command
has been read but is still being executed */
static Input lex_last_used_input {};
#define YY_USER_ACTION \
if(!lex_idle()) lex_last_used_input = lex_current_input();
bool lex_idle()
{
return lex_inputs.size() == 0;
}
/* Current input. Throws when there is no current input */
static Input &lex_current_input()
{
return lex_inputs.top().front();
}
std::string lex_last_used_file()
{
Input const &in = lex_last_used_input;
return (in.repl ? "" : in.filename);
}
/* Push a new queue of inputs. */
static void lex_push(std::deque<Input> queue)
{
if(!queue.size()) return;
lex_inputs.push(queue);
yy_switch_to_buffer(lex_current_input().buffer);
}
/* Pop a single input when end-of-file is reached. Return whether to stop. */
static bool lex_pop()
{
if(!lex_inputs.size()) return true;
auto &q = lex_inputs.top();
auto &front = q.front();
yy_delete_buffer(front.buffer);
q.pop_front();
if(q.empty()) lex_inputs.pop();
if(!lex_inputs.size()) {
return true;
}
else {
yy_switch_to_buffer(lex_current_input().buffer);
return false;
}
}
/* Error messages and exceptions */
static void err(char const *format, ...)
{
static char buf[1024];
va_list args;
va_start(args, format);
vsnprintf(buf, 1024, format, args);
va_end(args);
Input const &in = lex_current_input();
throw SyntaxError(in.filename.c_str(), in.line, buf);
}
/* Parse numerical values */
long parse_num(char const *text)
{
/* Determine base */
int base = 10;
if(text[0] == '0' && tolower(text[1]) == 'x') base = 16, text += 2;
else if(text[0] == '0' && tolower(text[1]) == 'b') base = 2, text += 2;
char *end;
long val = strtoul(text, &end, base);
if(*end == 'k') val <<= 10;
if(*end == 'M') val <<= 20;
if(*end == 'G') val <<= 30;
return val;
}
%}
%option prefix="shell"
%option noyywrap
%option nounput
%x EXPR
%x STR
/* Used in error rules for word boundary violations */
letter [a-zA-Z0-9_.%]
num_hex 0[xX][a-zA-Z0-9]+
num_dec (0d)?[0-9]+
num_bin 0[bB][0-1]+
num_suffix [kMG]
num ({num_hex}|{num_dec}|{num_bin}){num_suffix}?
syscall [%][a-fA-F0-9]+
option [a-zA-Z]+=[^ ]*
symbol \.|\.?[a-zA-Z_][a-zA-Z0-9_.]*
space [ \t]+
%%
<*>"#"[^\n]* ;
<*>{space} { return T::SPC; }
<*>[\n] { lex_current_input().line++;
lex_current_input().expr_depth = 0;
BEGIN(INITIAL); return T::SEPARATOR; }
<*>[;] { lex_current_input().expr_depth = 0;
BEGIN(INITIAL); return T::SEPARATOR; }
<EXPR>"+" { return '+'; }
<EXPR>"-" { return '-'; }
<EXPR>"*" { return '*'; }
<EXPR>"/" { return '/'; }
<EXPR>"%" { return '%'; }
<EXPR>")" { int d = std::max(lex_current_input().expr_depth - 1, 0);
lex_current_input().expr_depth = d;
if(d == 0) BEGIN(INITIAL); return ')'; }
<EXPR>">>" { return '>'; }
<EXPR>"<<" { return '<'; }
<*>"$" { return '$'; }
<*>"(" { lex_current_input().expr_depth++;
BEGIN(EXPR); return '('; }
":" { return ':'; }
".." { return '.'; }
["] { BEGIN(STR); STR_len = 0; }
<STR>\" { BEGIN(INITIAL); STR_buffer[STR_len] = 0;
yylval.STRING = STR_buffer; return T::STRING; }
<STR>[^\\\n"]+ {
int length = std::min(yyleng, LEX_STR_MAX - STR_len);
memcpy(STR_buffer + STR_len, yytext, length);
STR_len += length; }
<STR>\\n { if(STR_len < LEX_STR_MAX) STR_buffer[STR_len++] = '\n'; }
<STR>\\t { if(STR_len < LEX_STR_MAX) STR_buffer[STR_len++] = '\t'; }
{option} { yylval.STRING = yytext; return T::OPTION; }
<*>{syscall} { yylval.NUM = strtoul(yytext+1, NULL, 16); return T::SYSCALL; }
<*>{num} { yylval.NUM = parse_num(yytext); return T::NUM; }
<*>{symbol} { yylval.STRING = yytext; return T::SYMBOL; }
<*>"?" { return '?'; }
/* Generic error and word boundaries violations */
<*>{syscall}{letter} { err("invalid syscall number '%s'", yytext); }
<*>{num}{letter} { err("invalid numerical value '%s'", yytext); }
<*>. { err("invalid token near '%s'", yytext); }
<<EOF>> { if(lex_pop()) return T::END; }
%%
void lex_repl(std::string input)
{
/* yy_scan_bytes() switches buffer, but lex_push() will fix that */
Input in = {
.filename = "<command-line>",
.line = 1,
.repl = true,
.expr_depth = 0,
.buffer = yy_scan_bytes(input.c_str(), input.size()),
};
lex_push({ in });
}
void lex_include(std::vector<std::string> paths)
{
std::deque<Input> ins;
for(auto const &path: paths) {
yyin = fopen(path.c_str(), "r");
if(!yyin) {
FxOS_log(ERR, "cannot read '%s'", path);
return;
}
ins.push_back({
.filename = path,
.line = 1,
.repl = false,
.expr_depth = 0,
.buffer = yy_create_buffer(yyin, YY_BUF_SIZE),
});
}
lex_push(ins);
}
Token lex_read()
{
Token t;
t.type = yylex();
t.value.NUM = 0;
t.value.STRING = "";
switch(t.type) {
case T::SYSCALL:
case T::NUM:
t.value.NUM = yylval.NUM;
break;
case T::SYMBOL:
case T::OPTION:
case T::STRING:
t.value.STRING = std::string(yylval.STRING);
break;
default:
break;
}
return t;
}