#include "parser.h" #include "errors.h" #include #include //--- // Lexing tools //--- std::string T::str() const { switch((int)m_name) { case T::END: return "end of file"; case T::SPC: return "whitespace"; case T::SEPARATOR: return "end of command"; case T::NUM: return "number"; case T::SYMBOL: return "symbol"; case T::SYSCALL: return "syscall number"; case T::OPTION: return "command option"; case T::STRING: return "string"; case '.': return "'..'"; case '<': return "'<<'"; case '>': return "'>>'"; default: return fmt::format("'{}'", (char)m_name); } } std::string Token::str() const { switch((int)this->type) { case T::END: return "end of file"; case T::SPC: return "whitespace"; case T::SEPARATOR: return "end of command"; case T::NUM: return fmt::format("number {}", this->value.NUM); case T::SYMBOL: return fmt::format("symbol '{}'", this->value.SYMBOL); case T::SYSCALL: return fmt::format("syscall number %{:03x}", this->value.NUM); case T::OPTION: return fmt::format("command option '{}'", this->value.OPTION); case T::STRING: return fmt::format("string '{}'", this->value.STRING); case '.': return "'..'"; case '<': return "'<<'"; case '>': return "'>>'"; default: return fmt::format("'{}'", (char)this->type); } } //--- // Parser //--- Parser::Parser(bool complete): m_complete {complete}, m_la {-1,0}, m_expr_space {nullptr} { } void Parser::start() { feed(); } Token Parser::feed(bool ignore_spaces) { Token t = m_la; do m_la = lex_read(); while(ignore_spaces && m_la.type == T::SPC); return t; } Token Parser::lookahead() const { return m_la; } bool Parser::at_end() const { /* When parsing to complete we try to go infinitely far, so we ignore T::END. We supply T::SEPARATOR to complete the compound commands */ if(m_complete) return (m_la.type == T::SEPARATOR); return (m_la.type == T::SEPARATOR || m_la.type == T::END); } void Parser::end() { m_options.clear(); if(!at_end()) throw SyntaxError("expected end of command"); } void Parser::skip_separators() { while(m_la.type == T::SEPARATOR) feed(); } void Parser::exhaust_until_separator() { while(!at_end()) { try { feed(); } catch(FxOS::SyntaxError const &e) {} } } void Parser::dump_command() { while(!at_end()) { Token t = m_la; if(t.type == T::NUM) fmt::print("NUM {:#x}\n", t.value.NUM); else if(t.type == T::SYSCALL) fmt::print("SYSCALL %{:03x}\n", t.value.NUM); else if(t.type == T::SYMBOL) fmt::print("SYMBOL '{}'\n", t.value.SYMBOL); else if(t.type == T::OPTION) fmt::print("OPTION '{}'\n", t.value.OPTION); else if(t.type == T::STRING) fmt::print("STRING '{}'\n", t.value.STRING); else if(t.type == '>') fmt::print(">>\n"); else if(t.type == '<') fmt::print("<<\n"); else fmt::print("{}\n", (char)t.type); feed(); } } //--- // Main parsing rules //--- void Parser::option(char name, OptionHandler callback) { m_options.emplace(name, callback); } Token Parser::expect(std::initializer_list types, bool ignore_spaces) { bool correct_type = false; for(T type: types) { if(m_la.type == type) correct_type = true; } if(!correct_type) { static char err[128]; int offset = sprintf(err, "expected "); for(auto it = types.begin(); it != types.end(); it++) { offset += sprintf(err + offset, "%s%s%s", (it != types.begin() && it + 1 == types.end() ? "or " : ""), (*it).str().c_str(), (it + 1 == types.end() ? "; " : ", ")); } sprintf(err + offset, "instead found %s", m_la.str().c_str()); throw SyntaxError(err); } Token t = feed(ignore_spaces); return t; } Token Parser::expect(T type, bool ignore_spaces) { return expect({ type }, ignore_spaces); } std::string Parser::symbol(std::string category) { /* Auto-complete a symbol which has not been typed yet */ if(m_complete && m_la.type == T::END) throw CompleteCategory(category, ""); if(!m_complete) return expect(T::SYMBOL).value.SYMBOL; /* When completing, we have to know whether the symbol is finished (ie. there is another token after, including a space) or not */ Token t = expect(T::SYMBOL, false); std::string sym = t.value.SYMBOL; free(t.value.SYMBOL); /* This will throw only if there is no token after, not even spaces */ if(m_la.type == T::END) throw CompleteCategory(category, sym); /* If a space is found, get rid of it */ if(m_la.type == T::SPC) feed(); return sym; } std::string Parser::str() { Token t = expect(T::STRING); std::string str = t.value.STRING; free(t.value.STRING); return str; } long Parser::num() { return expect(T::NUM).value.NUM; } Range Parser::range(VirtualSpace *space, long before, long after) { long start = expr(space); /* Accept non-rangs if (before) and (after) are provided */ if(m_la.type != ':' && m_la.type != '.' && before >= 0 && after >= 0) return { start - before, start + after }; Token t = expect({ ':', '.' }); long other = expr(space); Range r = { start, (t.type == ':' ? start + other : other) }; if(r.start > r.end) std::swap(r.start, r.end); return r; } FxOS::MemoryRegion Parser::region(VirtualSpace *space, long before, long after) { if(m_la.type == '$' || m_la.type == '(' || m_la.type == '-' || m_la.type == T::NUM || m_la.type == T::SYSCALL) { Range r = range(space, before, after); return FxOS::MemoryRegion("", r.start, r.end-1, false); } /* Return symbol by default so that an empty input autocompletes to a memory region name */ try { return FxOS::MemoryRegion(symbol("memory_region")); } catch(std::invalid_argument const &e) { /* Ignore nonexisting regions when autocompleting */ if(m_complete) return FxOS::MemoryRegion("", 0, 1, false); else throw e; } } void Parser::accept_options() { while(m_la.type == T::OPTION) { Token t = expect(T::OPTION); char *text = t.value.OPTION; char name = text[1]; if(!m_options.count(name)) { throw CommandError("unrecognized option -{}", name); } std::string value = ""; if(strnlen(text, 3) >= 3) value = text + 3; m_options[name](value); } } //--- // Parsing rules for expressions //--- long Parser::atom() { Token t = expect({ '$', '(', '-', T::SYMBOL, T::NUM, T::SYSCALL }); if(t.type == T::SYMBOL) { long val = 0; /* TODO: Query symbol and return its value */ if(m_expr_space) { auto const &opt = m_expr_space->symbols.lookup(t.value.SYMBOL); if(opt && opt->type == FxOS::Symbol::Address) { val = opt->value; } else if(opt && opt->type == FxOS::Symbol::Syscall) { OS *os = m_expr_space->os_analysis(); if(os && (int)opt->value < os->syscall_count()) val = os->syscall(opt->value); } else { throw CommandError("symbol '{}' is undefined", t.value.SYMBOL); } } else throw CommandError("cannot query symbol '{}', no virtual space", t.value.SYMBOL); if(m_complete && m_la.type == T::END) throw CompleteCategory("expression", t.value.SYMBOL); return val; } else if(t.type == T::SYSCALL) { if(!m_expr_space) return 0; OS *os = m_expr_space->os_analysis(); if(!os || t.value.NUM < 0 || t.value.NUM > os->syscall_count()) return 0; return os->syscall(t.value.NUM); } else if(t.type == '$') { return (m_expr_space ? m_expr_space->cursor : 0); } else if(t.type == '-') { return -atom(); } else if(t.type == T::NUM) { return t.value.NUM; } else { long v = term(); expect(')'); return v; } } long Parser::factor() { long v = atom(); while(m_la.type == '*' || m_la.type == '/' || m_la.type == '%') { int op = expect({ '*', '/', '%' }).type; if(op == '*') v *= atom(); else if(op == '/') v /= atom(); else if(op == '%') v %= atom(); } return v; } long Parser::term() { long v = factor(); while(m_la.type == '+' || m_la.type == '-') { int op = expect({ '+', '-' }).type; if(op == '+') v += factor(); else if(op == '-') v -= factor(); } return v; } long Parser::expr(VirtualSpace *space) { m_expr_space = space; long val = atom(); m_expr_space = nullptr; return val; }