From 8a2c67d83f9d1da91a942dae78beacd789852653 Mon Sep 17 00:00:00 2001 From: Lephenixnoir Date: Tue, 5 Apr 2022 20:49:04 +0100 Subject: [PATCH] fix token memory leaks in lexer --- shell/lexer.l | 28 +++++++++++++++++++++++----- shell/parser.cpp | 28 +++++++++++++--------------- shell/parser.h | 15 ++++++++------- 3 files changed, 44 insertions(+), 27 deletions(-) diff --git a/shell/lexer.l b/shell/lexer.l index b6cd7ae..13cd7f0 100644 --- a/shell/lexer.l +++ b/shell/lexer.l @@ -12,7 +12,7 @@ #include /* Values to attach to the token */ -typedef Token::Attribute YYSTYPE; +typedef Token::LexAttribute YYSTYPE; YYSTYPE yylval; /* Buffer for literal strings */ @@ -79,6 +79,8 @@ static bool lex_pop() if(!lex_inputs.size()) return true; auto &q = lex_inputs.top(); + auto &front = q.front(); + yy_delete_buffer(front.buffer); q.pop_front(); if(q.empty()) lex_inputs.pop(); @@ -176,7 +178,7 @@ space [ \t]+ ["] { BEGIN(STR); STR_len = 0; } \" { BEGIN(INITIAL); STR_buffer[STR_len] = 0; - yylval.STRING = strdup(STR_buffer); return T::STRING; } + yylval.STRING = STR_buffer; return T::STRING; } [^\\\n"]+ { int length = std::min(yyleng, LEX_STR_MAX - STR_len); memcpy(STR_buffer + STR_len, yytext, length); @@ -184,11 +186,11 @@ space [ \t]+ \\n { if(STR_len < LEX_STR_MAX) STR_buffer[STR_len++] = '\n'; } \\t { if(STR_len < LEX_STR_MAX) STR_buffer[STR_len++] = '\t'; } -{option} { yylval.OPTION = strdup(yytext); return T::OPTION; } +{option} { yylval.STRING = yytext; return T::OPTION; } <*>{syscall} { yylval.NUM = strtoul(yytext+1, NULL, 16); return T::SYSCALL; } <*>{num} { yylval.NUM = parse_num(yytext); return T::NUM; } -<*>{symbol} { yylval.SYMBOL = strdup(yytext); return T::SYMBOL; } +<*>{symbol} { yylval.STRING = yytext; return T::SYMBOL; } <*>"?" { return '?'; } /* Generic error and word boundaries violations */ @@ -238,6 +240,22 @@ Token lex_read() { Token t; t.type = yylex(); - t.value = yylval; + t.value.NUM = 0; + t.value.STRING = ""; + + switch(t.type) { + case T::SYSCALL: + case T::NUM: + t.value.NUM = yylval.NUM; + break; + case T::SYMBOL: + case T::OPTION: + case T::STRING: + t.value.STRING = std::string(yylval.STRING); + break; + default: + break; + } + return t; } diff --git a/shell/parser.cpp b/shell/parser.cpp index 07de872..2af6318 100644 --- a/shell/parser.cpp +++ b/shell/parser.cpp @@ -49,11 +49,11 @@ std::string Token::str() const case T::NUM: return fmt::format("number {}", this->value.NUM); case T::SYMBOL: - return fmt::format("symbol '{}'", this->value.SYMBOL); + return fmt::format("symbol '{}'", this->value.STRING); case T::SYSCALL: return fmt::format("syscall number %{:03x}", this->value.NUM); case T::OPTION: - return fmt::format("command option '{}'", this->value.OPTION); + return fmt::format("command option '{}'", this->value.STRING); case T::STRING: return fmt::format("string '{}'", this->value.STRING); case '.': @@ -72,7 +72,7 @@ std::string Token::str() const //--- Parser::Parser(bool complete): - m_complete {complete}, m_la {-1,0}, m_expr_space {nullptr} + m_complete {complete}, m_la {}, m_expr_space {nullptr} { } @@ -120,7 +120,7 @@ void Parser::exhaust_until_separator() { while(!at_end()) { try { - feed(); + Token t = feed(); } catch(SyntaxError const &e) {} } @@ -136,9 +136,9 @@ void Parser::dump_command() else if(t.type == T::SYSCALL) fmt::print("SYSCALL %{:03x}\n", t.value.NUM); else if(t.type == T::SYMBOL) - fmt::print("SYMBOL '{}'\n", t.value.SYMBOL); + fmt::print("SYMBOL '{}'\n", t.value.STRING); else if(t.type == T::OPTION) - fmt::print("OPTION '{}'\n", t.value.OPTION); + fmt::print("OPTION '{}'\n", t.value.STRING); else if(t.type == T::STRING) fmt::print("STRING '{}'\n", t.value.STRING); else if(t.type == '>') @@ -200,13 +200,12 @@ std::string Parser::symbol(std::string category) throw CompletionRequest(category, ""); if(!m_complete) - return expect(T::SYMBOL).value.SYMBOL; + return expect(T::SYMBOL).value.STRING; /* When completing, we have to know whether the symbol is finished (ie. there is another token after, including a space) or not */ Token t = expect(T::SYMBOL, false); - std::string sym = t.value.SYMBOL; - free(t.value.SYMBOL); + std::string sym = t.value.STRING; /* This will throw only if there is no token after, not even spaces */ if(m_complete && m_la.type == T::END) @@ -222,7 +221,6 @@ std::string Parser::str() { Token t = expect(T::STRING); std::string str = t.value.STRING; - free(t.value.STRING); return str; } @@ -272,7 +270,7 @@ void Parser::accept_options() while(m_la.type == T::OPTION) { Token t = expect(T::OPTION); - std::string opt = t.value.OPTION; + std::string opt = t.value.STRING; std::string name = opt.substr(0, opt.find('=')); if(!m_options.count(name)) { @@ -295,11 +293,11 @@ long Parser::atom() if(t.type == T::SYMBOL) { /* TODO: Specify the space that symbols are taken from */ if(m_complete && m_la.type == T::END) - throw CompletionRequest("symbol", t.value.SYMBOL, m_expr_space); + throw CompletionRequest("symbol", t.value.STRING, m_expr_space); long val = 0; if(m_expr_space) { - auto const &opt = m_expr_space->symbols.lookup(t.value.SYMBOL); + auto const &opt = m_expr_space->symbols.lookup(t.value.STRING); if(opt && opt->type == FxOS::Symbol::Address) { val = opt->value; } @@ -309,11 +307,11 @@ long Parser::atom() val = os->syscall(opt->value); } else { - throw CommandError("symbol '{}' is undefined", t.value.SYMBOL); + throw CommandError("symbol '{}' is undefined", t.value.STRING); } } else throw CommandError("cannot query symbol '{}', no virtual space", - t.value.SYMBOL); + t.value.STRING); return val; } else if(t.type == T::SYSCALL) { diff --git a/shell/parser.h b/shell/parser.h index c7fc3f7..4f5a630 100644 --- a/shell/parser.h +++ b/shell/parser.h @@ -61,17 +61,18 @@ struct Range /* Token with their data */ struct Token { - union Attribute { - /* Value of a numerical constant or syscall number */ + /* Lexer-style attribute */ + struct LexAttribute { long NUM; - /* Name of a symbol (to free(3) after use) */ - char *SYMBOL; - /* Text of an option (to free(3) after use) */ - char *OPTION; - /* Text of a literal string */ char *STRING; }; + /* Attribute once in the parser */ + struct Attribute { + long NUM; + std::string STRING; /* SYMBOL, OPTION, STRING */ + }; + /* Token type and value */ T type; Attribute value;