ebc/lexer.py

import ply.lex as lex

# Reserved values
reserved = {
    'If': 'IF',
    'Then': 'THEN',
    'Else': 'ELSE',
    'IfEnd': 'IFEND',
    'While': 'WHILE',
    'WhileEnd': 'WHILEEND',
    'Do': 'DO',
    'LpWhile': 'LPWHILE',
    'For': 'FOR',
    'To': 'TO',
    'Step': 'STEP',
    'Next': 'NEXT',
    'Locate': 'LOCATE',
    'Getkey': 'GETKEY',
    'Not': 'NOT',
    'And': 'And',
    'Or': 'OR',
    'List': 'LIST',
    'Mat': 'MAT',
    'Str': 'STR',
    'Lbl': 'LBL',
    'Goto': 'GOTO',
}

# List of token names
tokens = [
    'PLUS',
    'MINUS',
    'TIMES',
    'DIVIDE',
    'MODULO',
    'LPAREN',
    'RPAREN',
    'LCURBRA',
    'RCURBRA',
    'LSQRBRA',
    'RSQRBRA',
    'ASSIGN',
    'COMMA',
    'STRING',
    'NUMBER',
    'ISEQUAL',
    'PLUSASSIGN',
    'MINUSASSIGN',
    'TIMESASSIGN',
    'DIVIDEASSIGN',
    'MODULOASSIGN',
    'NEWLINE',
    'ID',
] + list(reserved.values())

# common regex
t_PLUS = r'\+'
t_MINUS = r'\-'
t_TIMES = r'\*'
t_DIVIDE = r'\/'
t_MODULO = r'\%'
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_LCURBRA = r'\{'
t_RCURBRA = r'\}'
t_LSQRBRA = r'\['
t_RSQRBRA = r'\]'
t_ASSIGN = r'\='
t_COMMA = r'\,'
t_ISEQUAL     = r'=='
t_PLUSASSIGN  = r'\+='
t_MINUSASSIGN = r'\-='
t_TIMESASSIGN = r'\*='
t_DIVIDEASSIGN   = r'/='
t_MODULOASSIGN   = r'\%='
# Comments
t_ignore_COMMENT = r'//.*'

# A regex rule with some action code
def t_NUMBER(t):
    r'[+-]*[(0-9).]+\b'
    t.value = t.value.replace("+", "")
    t.value = t.value.replace("--", "")
    t.value = float(t.value)
    return t

# Strings
def t_STRING(t):
    '(\".*\")|(\'.*\')'
    t.value = t.value[1:-1] # remove those thingies
    t.value = t.value.replace('\\"', '"')
    t.value = t.value.replace('"', '\\"')
    return t

# Check for reserved words
def t_ID(t):
    r'[a-zA-Z_][a-zA-Z_0-9]*'
    t.type = reserved.get(t.value,'ID')
    return t

# Define a rule so we can track line numbers
def t_newline(t):
    r'(;|\n)+'
    #r'\n+'
    t.type = 'NEWLINE'
    t.lexer.lineno += len(t.value)
    return t

# A string containing ignored characters (spaces and tabs)
t_ignore  = ' \t'

# Error handling rule
def t_error(t):
    print("Illegal character '%s'" % t.value[0])
    t.lexer.skip(1)

# Build the lex
lexer = lex.lex()
Proper lexer. 2020-05-07 12:26:57 +02:00			`import ply.lex as lex`

			`# Reserved values`
			`reserved = {`
Changed reserved case, new keywords from Getkey, new litterals, changed comment from # to // 2020-05-07 14:16:22 +02:00			`'If': 'IF',`
			`'Then': 'THEN',`
			`'Else': 'ELSE',`
			`'IfEnd': 'IFEND',`
			`'While': 'WHILE',`
			`'WhileEnd': 'WHILEEND',`
			`'Do': 'DO',`
			`'LpWhile': 'LPWHILE',`
			`'For': 'FOR',`
			`'To': 'TO',`
			`'Step': 'STEP',`
			`'Next': 'NEXT',`
			`'Locate': 'LOCATE',`
			`'Getkey': 'GETKEY',`
			`'Not': 'NOT',`
			`'And': 'And',`
			`'Or': 'OR',`
List, Str, Mat tokens 2020-05-08 10:48:22 +02:00			`'List': 'LIST',`
			`'Mat': 'MAT',`
			`'Str': 'STR',`
main.bc updated, added Lbl/Goto to lexer.py 2020-05-08 11:29:52 +02:00			`'Lbl': 'LBL',`
			`'Goto': 'GOTO',`
Proper lexer. 2020-05-07 12:26:57 +02:00			`}`

			`# List of token names`
			`tokens = [`
Moved from litterals to regular tokens for lisibility 2020-05-08 15:42:36 +02:00			`'PLUS',`
			`'MINUS',`
			`'TIMES',`
			`'DIVIDE',`
			`'MODULO',`
			`'LPAREN',`
			`'RPAREN',`
			`'LCURBRA',`
			`'RCURBRA',`
			`'LSQRBRA',`
			`'RSQRBRA',`
			`'ASSIGN',`
			`'COMMA',`
Proper lexer. 2020-05-07 12:26:57 +02:00			`'STRING',`
			`'NUMBER',`
Modulo and assign shortcuts 2020-05-08 10:44:32 +02:00			`'ISEQUAL',`
			`'PLUSASSIGN',`
			`'MINUSASSIGN',`
			`'TIMESASSIGN',`
Moved from litterals to regular tokens for lisibility 2020-05-08 15:42:36 +02:00			`'DIVIDEASSIGN',`
			`'MODULOASSIGN',`
Proper lexer. 2020-05-07 12:26:57 +02:00			`'NEWLINE',`
			`'ID',`
			`] + list(reserved.values())`

			`# common regex`
Moved from litterals to regular tokens for lisibility 2020-05-08 15:42:36 +02:00			`t_PLUS = r'\+'`
			`t_MINUS = r'\-'`
			`t_TIMES = r'\*'`
			`t_DIVIDE = r'\/'`
			`t_MODULO = r'\%'`
			`t_LPAREN = r'\('`
			`t_RPAREN = r'\)'`
			`t_LCURBRA = r'\{'`
			`t_RCURBRA = r'\}'`
			`t_LSQRBRA = r'\['`
			`t_RSQRBRA = r'\]'`
			`t_ASSIGN = r'\='`
			`t_COMMA = r'\,'`
Modulo and assign shortcuts 2020-05-08 10:44:32 +02:00			`t_ISEQUAL = r'=='`
			`t_PLUSASSIGN = r'\+='`
			`t_MINUSASSIGN = r'\-='`
			`t_TIMESASSIGN = r'\*='`
Moved from litterals to regular tokens for lisibility 2020-05-08 15:42:36 +02:00			`t_DIVIDEASSIGN = r'/='`
			`t_MODULOASSIGN = r'\%='`
Proper lexer. 2020-05-07 12:26:57 +02:00			`# Comments`
List, Str, Mat tokens 2020-05-08 10:48:22 +02:00			`t_ignore_COMMENT = r'//.*'`
Proper lexer. 2020-05-07 12:26:57 +02:00
			`# A regex rule with some action code`
			`def t_NUMBER(t):`
Hello world of yacc (yay) 2020-06-09 10:24:09 +02:00			`r'[+-]*[(0-9).]+\b'`
			`t.value = t.value.replace("+", "")`
(minor) Changed priority of numbers... Yeah uh 2020-06-09 10:36:34 +02:00			`t.value = t.value.replace("--", "")`
Hello world of yacc (yay) 2020-06-09 10:24:09 +02:00			`t.value = float(t.value)`
Proper lexer. 2020-05-07 12:26:57 +02:00			`return t`

			`# Strings`
			`def t_STRING(t):`
			`'(\".\")\|(\'.\')'`
			`t.value = t.value[1:-1] # remove those thingies`
			`t.value = t.value.replace('\\"', '"')`
			`t.value = t.value.replace('"', '\\"')`
			`return t`

			`# Check for reserved words`
			`def t_ID(t):`
			`r'[a-zA-Z_][a-zA-Z_0-9]*'`
			`t.type = reserved.get(t.value,'ID')`
			`return t`

			`# Define a rule so we can track line numbers`
			`def t_newline(t):`
			`r'(;\|\n)+'`
			`#r'\n+'`
			`t.type = 'NEWLINE'`
			`t.lexer.lineno += len(t.value)`
			`return t`

			`# A string containing ignored characters (spaces and tabs)`
			`t_ignore = ' \t'`

			`# Error handling rule`
			`def t_error(t):`
			`print("Illegal character '%s'" % t.value[0])`
			`t.lexer.skip(1)`

			`# Build the lex`
			`lexer = lex.lex()`