ebc/lexer.py

import ply.lex as lex

# Reserved values
reserved = {
    'if': 'IF',
    'then': 'THEN',
    'else': 'ELSE',
    'ifend': 'IFEND',
    'while': 'WHILE',
    'whileend': 'WHILEEND',
    'do': 'DO',
    'lpwhile': 'LPWHILE',
    'for': 'FOR',
    'to': 'TO',
    'step': 'STEP',
    'next': 'NEXT',
    'locate': 'LOCATE',
}

literals = "+-*/()=,{}"
# List of token names
tokens = [
    'STRING',
    'NUMBER',
    'EQUAL',
    'NEWLINE',
    'ID',
] + list(reserved.values())

# common regex
t_EQUAL  = r'=='
# Comments
t_ignore_COMMENT = r'\#.*'

# A regex rule with some action code
def t_NUMBER(t):
    r'[+-]?[0-9]+\b'
    t.value = int(t.value)
    return t

# Strings
def t_STRING(t):
    '(\".*\")|(\'.*\')'
    t.value = t.value[1:-1] # remove those thingies
    t.value = t.value.replace('\\"', '"')
    t.value = t.value.replace('"', '\\"')
    return t

# Check for reserved words
def t_ID(t):
    r'[a-zA-Z_][a-zA-Z_0-9]*'
    t.type = reserved.get(t.value,'ID')
    return t

# Define a rule so we can track line numbers
def t_newline(t):
    r'(;|\n)+'
    #r'\n+'
    t.type = 'NEWLINE'
    t.lexer.lineno += len(t.value)
    return t

# A string containing ignored characters (spaces and tabs)
t_ignore  = ' \t'

# Error handling rule
def t_error(t):
    print("Illegal character '%s'" % t.value[0])
    t.lexer.skip(1)

# Build the lex
lexer = lex.lex()