#include "parser.h" #include "errors.h" #include #include //--- // Lexing tools //--- std::string T::str() const { switch((int)m_name) { case T::END: return "end of file"; case T::SPC: return "whitespace"; case T::SEPARATOR: return "end of command"; case T::NUM: return "number"; case T::SYMBOL: return "symbol"; case T::SYSCALL: return "syscall number"; case T::OPTION: return "command option"; case T::STRING: return "string"; case '.': return "'..'"; case '<': return "'<<'"; case '>': return "'>>'"; default: return fmt::format("'{}'", (char)m_name); } } std::string Token::str() const { switch((int)this->type) { case T::END: return "end of file"; case T::SPC: return "whitespace"; case T::SEPARATOR: return "end of command"; case T::NUM: return fmt::format("number {}", this->value.NUM); case T::SYMBOL: return fmt::format("symbol '{}'", this->value.STRING); case T::SYSCALL: return fmt::format("syscall number %{:03x}", this->value.NUM); case T::OPTION: return fmt::format("command option '{}'", this->value.STRING); case T::STRING: return fmt::format("string '{}'", this->value.STRING); case '.': return "'..'"; case '<': return "'<<'"; case '>': return "'>>'"; default: return fmt::format("'{}'", (char)this->type); } } //--- // Parser //--- Parser::Parser(bool complete): m_complete {complete}, m_la {}, m_expr_space {nullptr} { } void Parser::start() { feed(); } Token Parser::feed(bool ignore_spaces) { Token t = m_la; do m_la = lex_read(); while(ignore_spaces && m_la.type == T::SPC); return t; } Token Parser::lookahead() const { return m_la; } bool Parser::at_end() const { /* When parsing to complete we try to go infinitely far, so we ignore T::END. We supply T::SEPARATOR to complete the compound commands */ if(m_complete) return (m_la.type == T::SEPARATOR); return (m_la.type == T::SEPARATOR || m_la.type == T::END); } void Parser::end() { m_options.clear(); if(!at_end()) throw SyntaxError("expected end of command"); } void Parser::skip_separators() { while(m_la.type == T::SEPARATOR) feed(); } void Parser::exhaust_until_separator() { while(!at_end()) { try { Token t = feed(); } catch(SyntaxError const &e) { } } } void Parser::dump_command() { while(!at_end()) { Token t = m_la; if(t.type == T::NUM) fmt::print("NUM {:#x}\n", t.value.NUM); else if(t.type == T::SYSCALL) fmt::print("SYSCALL %{:03x}\n", t.value.NUM); else if(t.type == T::SYMBOL) fmt::print("SYMBOL '{}'\n", t.value.STRING); else if(t.type == T::OPTION) fmt::print("OPTION '{}'\n", t.value.STRING); else if(t.type == T::STRING) fmt::print("STRING '{}'\n", t.value.STRING); else if(t.type == '>') fmt::print(">>\n"); else if(t.type == '<') fmt::print("<<\n"); else fmt::print("{}\n", (char)t.type); feed(); } } //--- // Main parsing rules //--- void Parser::option(std::string name, OptionHandler callback) { m_options.emplace(name, callback); } Token Parser::expect(std::initializer_list types, bool ignore_spaces) { bool correct_type = false; for(T type: types) { if(m_la.type == type) correct_type = true; } if(!correct_type) { static char err[128]; int offset = sprintf(err, "expected "); for(auto it = types.begin(); it != types.end(); it++) { offset += sprintf(err + offset, "%s%s%s", (it != types.begin() && it + 1 == types.end() ? "or " : ""), (*it).str().c_str(), (it + 1 == types.end() ? "; " : ", ")); } sprintf(err + offset, "instead found %s", m_la.str().c_str()); throw SyntaxError(err); } Token t = feed(ignore_spaces); return t; } Token Parser::expect(T type, bool ignore_spaces) { return expect({type}, ignore_spaces); } std::string Parser::symbol(std::string category) { /* Auto-complete a symbol which has not been typed yet */ if(m_complete && m_la.type == T::END) throw CompletionRequest(category, ""); if(!m_complete) return expect(T::SYMBOL).value.STRING; /* When completing, we have to know whether the symbol is finished (ie. there is another token after, including a space) or not */ Token t = expect(T::SYMBOL, false); std::string sym = t.value.STRING; /* This will throw only if there is no token after, not even spaces */ if(m_complete && m_la.type == T::END) throw CompletionRequest(category, sym); /* If a space is found, get rid of it */ if(m_la.type == T::SPC) feed(); return sym; } std::string Parser::str() { Token t = expect(T::STRING); std::string str = t.value.STRING; return str; } long Parser::num() { return expect(T::NUM).value.NUM; } Range Parser::range(VirtualSpace *space, long before, long after) { long start = expr(space); /* Accept non-rangs if (before) and (after) are provided */ if(m_la.type != ':' && m_la.type != '.' && before >= 0 && after >= 0) return {start - before, start + after}; Token t = expect({':', '.'}); long other = expr(space); Range r = {start, (t.type == ':' ? start + other : other)}; if(r.start > r.end) std::swap(r.start, r.end); return r; } FxOS::MemoryRegion Parser::region(VirtualSpace *space, long before, long after) { if(m_la.type == '$' || m_la.type == '(' || m_la.type == '-' || m_la.type == T::NUM || m_la.type == T::SYSCALL) { Range r = range(space, before, after); return FxOS::MemoryRegion("", r.start, r.end - 1, false); } /* Return symbol by default so that an empty input autocompletes to a memory region name */ try { return FxOS::MemoryRegion(symbol("memory_region")); } catch(std::invalid_argument const &e) { /* Ignore nonexisting regions when autocompleting */ if(m_complete) return FxOS::MemoryRegion("", 0, 1, false); else throw e; } } void Parser::accept_options() { while(m_la.type == T::OPTION) { Token t = expect(T::OPTION); std::string opt = t.value.STRING; std::string name = opt.substr(0, opt.find('=')); if(!m_options.count(name)) { throw CommandError("unrecognized option {}", name); } std::string value = opt.substr(opt.find('=') + 1); m_options[name](value); } } //--- // Parsing rules for expressions //--- long Parser::atom() { Token t = expect({'$', '(', '-', T::SYMBOL, T::NUM, T::SYSCALL}); if(t.type == T::SYMBOL) { /* TODO: Specify the space that symbols are taken from */ if(m_complete && m_la.type == T::END) throw CompletionRequest("symbol", t.value.STRING, m_expr_space); long val = 0; if(m_expr_space) { auto const &opt = m_expr_space->symbols.lookup(t.value.STRING); if(opt && opt->type == FxOS::Symbol::Address) { val = opt->value; } else if(opt && opt->type == FxOS::Symbol::Syscall) { OS *os = m_expr_space->os_analysis(); if(os && (int)opt->value < os->syscall_count()) val = os->syscall(opt->value); } else { throw CommandError("symbol '{}' is undefined", t.value.STRING); } } else throw CommandError( "cannot query symbol '{}', no virtual space", t.value.STRING); return val; } else if(t.type == T::SYSCALL) { if(!m_expr_space) return 0; OS *os = m_expr_space->os_analysis(); if(!os || t.value.NUM < 0 || t.value.NUM > os->syscall_count()) return 0; return os->syscall(t.value.NUM); } else if(t.type == '$') { return (m_expr_space ? m_expr_space->cursor : 0); } else if(t.type == '-') { return -atom(); } else if(t.type == T::NUM) { return t.value.NUM; } else { long v = term(); expect(')'); return v; } } long Parser::factor() { long v = atom(); while(m_la.type == '*' || m_la.type == '/' || m_la.type == '%') { int op = expect({'*', '/', '%'}).type; if(op == '*') v *= atom(); else if(op == '/') v /= atom(); else if(op == '%') v %= atom(); } return v; } long Parser::term() { long v = factor(); while(m_la.type == '+' || m_la.type == '-') { int op = expect({'+', '-'}).type; if(op == '+') v += factor(); else if(op == '-') v -= factor(); } return v; } long Parser::expr(VirtualSpace *space) { m_expr_space = space; long val = atom(); m_expr_space = nullptr; return val; }