ebc/lexer.py

import ply.lex as lex

# Reserved values
reserved = {
    'If': 'IF',
    'Then': 'THEN',
    'Else': 'ELSE',
    'IfEnd': 'IFEND',
    'While': 'WHILE',
    'WhileEnd': 'WHILEEND',
    'Do': 'DO',
    'LpWhile': 'LPWHILE',
    'For': 'FOR',
    'To': 'TO',
    'Step': 'STEP',
    'Next': 'NEXT',
    'Locate': 'LOCATE',
    'Getkey': 'GETKEY',
    'Not': 'NOT',
    'And': 'And',
    'Or': 'OR',
    'List': 'LIST',
    'Mat': 'MAT',
    'Str': 'STR',
    'Lbl': 'LBL',
    'Goto': 'GOTO',
}

literals = '+-*/%(){}[]=,'
# List of token names
tokens = [
    'STRING',
    'NUMBER',
    'ISEQUAL',
    'PLUSASSIGN',
    'MINUSASSIGN',
    'TIMESASSIGN',
    'DIVASSIGN',
    'MODASSIGN',
    'NEWLINE',
    'ID',
] + list(reserved.values())

# common regex
t_ISEQUAL     = r'=='
t_PLUSASSIGN  = r'\+='
t_MINUSASSIGN = r'\-='
t_TIMESASSIGN = r'\*='
t_DIVASSIGN   = r'/='
t_MODASSIGN   = r'\%='
# Comments
t_ignore_COMMENT = r'//.*'

# A regex rule with some action code
def t_NUMBER(t):
    r'[+-]?[0-9]+\b'
    t.value = int(t.value)
    return t

# Strings
def t_STRING(t):
    '(\".*\")|(\'.*\')'
    t.value = t.value[1:-1] # remove those thingies
    t.value = t.value.replace('\\"', '"')
    t.value = t.value.replace('"', '\\"')
    return t

# Check for reserved words
def t_ID(t):
    r'[a-zA-Z_][a-zA-Z_0-9]*'
    t.type = reserved.get(t.value,'ID')
    return t

# Define a rule so we can track line numbers
def t_newline(t):
    r'(;|\n)+'
    #r'\n+'
    t.type = 'NEWLINE'
    t.lexer.lineno += len(t.value)
    return t

# A string containing ignored characters (spaces and tabs)
t_ignore  = ' \t'

# Error handling rule
def t_error(t):
    print("Illegal character '%s'" % t.value[0])
    t.lexer.skip(1)

# Build the lex
lexer = lex.lex()