Blang/test/lexer.l

72 lines
2.1 KiB
Plaintext

%{
#include <iostream>
#include <string>
using namespace std;
enum TokenKind{
Token_StringLiteral,
Token_NumberLiteral,
Token_Operator,
Token_Keyword,
Token_Identifier,
Token_EndOfStatement,
Token_Unknown,
Token_Whitespace,
Token_LeftDelimiter,
Token_RightDelimiter
};
string val;
int nbLines = 0;
int nbChars = 0;
%}
%option noyywrap
%x strEnv
integer [0-9]+
real [0-9]+\.[0-9]*|\.[0-9]+
value {integer}|{real}
operator "+"|"-"|"/"|"*"|"=>"|"<="|">="|"="|"!="|"->"
keyword "If"|"Then"|"Else"|"IdEnd"|"While"|"WhileEnd"|"Do"|"LpWhile"|"For"|"To"|"Step"|"Next"|"Goto"|"Lbl"
ident [a-zA-Z_][0-9a-zA-Z_]*
%%
{value} { val=yytext; return(Token_NumberLiteral); }
"\"" { val.clear(); BEGIN(strEnv); }
<strEnv>"\"" { BEGIN(INITIAL); return(Token_StringLiteral); }
<strEnv>"\n" { cerr << "multi-line strings not allowed" << endl; ++nbLines; }
<strEnv><<EOF>> { BEGIN(INITIAL); return(Token_StringLiteral); }
<strEnv>. { val+=yytext[0]; }
{operator} { val=yytext; return(Token_Operator); }
{keyword} { val=yytext; return(Token_Keyword); }
{ident} { val=yytext; return(Token_Identifier); }
[ \t]+ { /* nothing to be done */ }
"\n" { ++nbLines; }
. { val=yytext; return(Token_Unknown); }
%%
int main(int argc,char ** argv)
{
int token;
if(argc>1)
yyin=fopen(argv[1],"r"); // check result !!!
do {
token=yylex();
switch(token) {
case Token_StringLiteral: cerr << "Token_StringLiteral[" << val << "]" << endl; break;
case Token_Keyword: cerr << "Token_Keyword[" << val << "]" << endl; break;
case Token_Operator: cerr << "Token_Operator[" << val << "]" << endl; break;
case Token_Identifier: cerr << "Token_Identifier[" << val << "]" << endl; break;
case Token_NumberLiteral: cerr << "Token_NumberLiteral[" << val << "]" << endl; break;
case Token_Unknown: cerr << "Token_Unknown[" << val << "]" << endl; break;
}
} while(token);
cerr << nbLines << " lines" << endl;
return(0);
}