ebc/lexer.py

92 lines
1.7 KiB
Python

import ply.lex as lex
# Reserved values
reserved = {
'If': 'IF',
'Then': 'THEN',
'Else': 'ELSE',
'IfEnd': 'IFEND',
'While': 'WHILE',
'WhileEnd': 'WHILEEND',
'Do': 'DO',
'LpWhile': 'LPWHILE',
'For': 'FOR',
'To': 'TO',
'Step': 'STEP',
'Next': 'NEXT',
'Locate': 'LOCATE',
'Getkey': 'GETKEY',
'Not': 'NOT',
'And': 'And',
'Or': 'OR',
'List': 'LIST',
'Mat': 'MAT',
'Str': 'STR',
'Lbl': 'LBL',
'Goto': 'GOTO',
}
literals = '+-*/%(){}[]=,'
# List of token names
tokens = [
'STRING',
'NUMBER',
'ISEQUAL',
'PLUSASSIGN',
'MINUSASSIGN',
'TIMESASSIGN',
'DIVASSIGN',
'MODASSIGN',
'NEWLINE',
'ID',
] + list(reserved.values())
# common regex
t_ISEQUAL = r'=='
t_PLUSASSIGN = r'\+='
t_MINUSASSIGN = r'\-='
t_TIMESASSIGN = r'\*='
t_DIVASSIGN = r'/='
t_MODASSIGN = r'\%='
# Comments
t_ignore_COMMENT = r'//.*'
# A regex rule with some action code
def t_NUMBER(t):
r'[+-]?[0-9]+\b'
t.value = int(t.value)
return t
# Strings
def t_STRING(t):
'(\".*\")|(\'.*\')'
t.value = t.value[1:-1] # remove those thingies
t.value = t.value.replace('\\"', '"')
t.value = t.value.replace('"', '\\"')
return t
# Check for reserved words
def t_ID(t):
r'[a-zA-Z_][a-zA-Z_0-9]*'
t.type = reserved.get(t.value,'ID')
return t
# Define a rule so we can track line numbers
def t_newline(t):
r'(;|\n)+'
#r'\n+'
t.type = 'NEWLINE'
t.lexer.lineno += len(t.value)
return t
# A string containing ignored characters (spaces and tabs)
t_ignore = ' \t'
# Error handling rule
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Build the lex
lexer = lex.lex()