ebc/lexer.py

119 lines
2.2 KiB
Python

import ply.lex as lex
# Reserved values
reserved = {
'If': 'IF',
'Then': 'THEN',
'Else': 'ELSE',
'IfEnd': 'IFEND',
'While': 'WHILE',
'WhileEnd': 'WHILEEND',
'Do': 'DO',
'LpWhile': 'LPWHILE',
'For': 'FOR',
'To': 'TO',
'Step': 'STEP',
'Next': 'NEXT',
'Locate': 'LOCATE',
'Getkey': 'GETKEY',
'Not': 'NOT',
'And': 'And',
'Or': 'OR',
'List': 'LIST',
'Mat': 'MAT',
'Str': 'STR',
'Lbl': 'LBL',
'Goto': 'GOTO',
}
# List of token names
tokens = [
'PLUS',
'MINUS',
'TIMES',
'DIVIDE',
'MODULO',
'LPAREN',
'RPAREN',
'LCURBRA',
'RCURBRA',
'LSQRBRA',
'RSQRBRA',
'ASSIGN',
'COMMA',
'STRING',
'NUMBER',
'ISEQUAL',
'PLUSASSIGN',
'MINUSASSIGN',
'TIMESASSIGN',
'DIVIDEASSIGN',
'MODULOASSIGN',
'NEWLINE',
'ID',
] + list(reserved.values())
# common regex
t_PLUS = r'\+'
t_MINUS = r'\-'
t_TIMES = r'\*'
t_DIVIDE = r'\/'
t_MODULO = r'\%'
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_LCURBRA = r'\{'
t_RCURBRA = r'\}'
t_LSQRBRA = r'\['
t_RSQRBRA = r'\]'
t_ASSIGN = r'\='
t_COMMA = r'\,'
t_ISEQUAL = r'=='
t_PLUSASSIGN = r'\+='
t_MINUSASSIGN = r'\-='
t_TIMESASSIGN = r'\*='
t_DIVIDEASSIGN = r'/='
t_MODULOASSIGN = r'\%='
# Comments
t_ignore_COMMENT = r'//.*'
# A regex rule with some action code
def t_NUMBER(t):
r'[+-]*[(0-9).]+\b'
t.value = t.value.replace("+", "")
t.value = t.value.replace("--", "")
t.value = float(t.value)
return t
# Strings
def t_STRING(t):
'(\".*\")|(\'.*\')'
t.value = t.value[1:-1] # remove those thingies
t.value = t.value.replace('\\"', '"')
t.value = t.value.replace('"', '\\"')
return t
# Check for reserved words
def t_ID(t):
r'[a-zA-Z_][a-zA-Z_0-9]*'
t.type = reserved.get(t.value,'ID')
return t
# Define a rule so we can track line numbers
def t_newline(t):
r'(;|\n)+'
#r'\n+'
t.type = 'NEWLINE'
t.lexer.lineno += len(t.value)
return t
# A string containing ignored characters (spaces and tabs)
t_ignore = ' \t'
# Error handling rule
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Build the lex
lexer = lex.lex()