ebc/lexer.py

73 lines
1.3 KiB
Python

import ply.lex as lex
# Reserved values
reserved = {
'if': 'IF',
'then': 'THEN',
'else': 'ELSE',
'ifend': 'IFEND',
'while': 'WHILE',
'whileend': 'WHILEEND',
'do': 'DO',
'lpwhile': 'LPWHILE',
'for': 'FOR',
'to': 'TO',
'step': 'STEP',
'next': 'NEXT',
'locate': 'LOCATE',
}
literals = "+-*/()=,{}"
# List of token names
tokens = [
'STRING',
'NUMBER',
'EQUAL',
'NEWLINE',
'ID',
] + list(reserved.values())
# common regex
t_EQUAL = r'=='
# Comments
t_ignore_COMMENT = r'\#.*'
# A regex rule with some action code
def t_NUMBER(t):
r'[+-]?[0-9]+\b'
t.value = int(t.value)
return t
# Strings
def t_STRING(t):
'(\".*\")|(\'.*\')'
t.value = t.value[1:-1] # remove those thingies
t.value = t.value.replace('\\"', '"')
t.value = t.value.replace('"', '\\"')
return t
# Check for reserved words
def t_ID(t):
r'[a-zA-Z_][a-zA-Z_0-9]*'
t.type = reserved.get(t.value,'ID')
return t
# Define a rule so we can track line numbers
def t_newline(t):
r'(;|\n)+'
#r'\n+'
t.type = 'NEWLINE'
t.lexer.lineno += len(t.value)
return t
# A string containing ignored characters (spaces and tabs)
t_ignore = ' \t'
# Error handling rule
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Build the lex
lexer = lex.lex()