ebc/lexer.py

import ply.lex as lex

# Reserved values
reserved = {
    'If': 'IF',
    'Then': 'THEN',
    'Else': 'ELSE',
    'IfEnd': 'IFEND',
    'While': 'WHILE',
    'WhileEnd': 'WHILEEND',
    'Do': 'DO',
    'LpWhile': 'LPWHILE',
    'For': 'FOR',
    'To': 'TO',
    'Step': 'STEP',
    'Next': 'NEXT',
    'Locate': 'LOCATE',
    'Getkey': 'GETKEY',
    'Not': 'NOT',
    'And': 'And',
    'Or': 'OR',
    'List': 'LIST',
    'Mat': 'MAT',
    'Str': 'STR',
    'Lbl': 'LBL',
    'Goto': 'GOTO',
}

# List of token names
tokens = [
    'PLUS',
    'MINUS',
    'TIMES',
    'DIVIDE',
    'MODULO',
    'LPAREN',
    'RPAREN',
    'LCURBRA',
    'RCURBRA',
    'LSQRBRA',
    'RSQRBRA',
    'ASSIGN',
    'COMMA',
    'STRING',
    'NUMBER',
    'ISEQUAL',
    'PLUSASSIGN',
    'MINUSASSIGN',
    'TIMESASSIGN',
    'DIVIDEASSIGN',
    'MODULOASSIGN',
    'NEWLINE',
    'ID',
] + list(reserved.values())

# common regex
t_PLUS = r'\+'
t_MINUS = r'\-'
t_TIMES = r'\*'
t_DIVIDE = r'\/'
t_MODULO = r'\%'
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_LCURBRA = r'\{'
t_RCURBRA = r'\}'
t_LSQRBRA = r'\['
t_RSQRBRA = r'\]'
t_ASSIGN = r'\='
t_COMMA = r'\,'
t_ISEQUAL     = r'=='
t_PLUSASSIGN  = r'\+='
t_MINUSASSIGN = r'\-='
t_TIMESASSIGN = r'\*='
t_DIVIDEASSIGN   = r'/='
t_MODULOASSIGN   = r'\%='
# Comments
t_ignore_COMMENT = r'//.*'

# A regex rule with some action code
def t_NUMBER(t):
    r'[+-]*[(0-9).]+\b'
    t.value = t.value.replace("+", "")
    t.value = t.value.replace("--", "")
    t.value = float(t.value)
    return t

# Strings
def t_STRING(t):
    '(\".*\")|(\'.*\')'
    t.value = t.value[1:-1] # remove those thingies
    t.value = t.value.replace('\\"', '"')
    t.value = t.value.replace('"', '\\"')
    return t

# Check for reserved words
def t_ID(t):
    r'[a-zA-Z_][a-zA-Z_0-9]*'
    t.type = reserved.get(t.value,'ID')
    return t

# Define a rule so we can track line numbers
def t_newline(t):
    r'(;|\n)+'
    #r'\n+'
    t.type = 'NEWLINE'
    t.lexer.lineno += len(t.value)
    return t

# A string containing ignored characters (spaces and tabs)
t_ignore  = ' \t'

# Error handling rule
def t_error(t):
    print("Illegal character '%s'" % t.value[0])
    t.lexer.skip(1)

# Build the lex
lexer = lex.lex()