import ply.lex as lex # Reserved values reserved = { 'If': 'IF', 'Then': 'THEN', 'Else': 'ELSE', 'IfEnd': 'IFEND', 'While': 'WHILE', 'WhileEnd': 'WHILEEND', 'Do': 'DO', 'LpWhile': 'LPWHILE', 'For': 'FOR', 'To': 'TO', 'Step': 'STEP', 'Next': 'NEXT', 'Locate': 'LOCATE', 'Getkey': 'GETKEY', 'Not': 'NOT', 'And': 'And', 'Or': 'OR', 'List': 'LIST', 'Mat': 'MAT', 'Str': 'STR', 'Lbl': 'LBL', 'Goto': 'GOTO', } literals = '+-*/%(){}[]=,' # List of token names tokens = [ 'STRING', 'NUMBER', 'ISEQUAL', 'PLUSASSIGN', 'MINUSASSIGN', 'TIMESASSIGN', 'DIVASSIGN', 'MODASSIGN', 'NEWLINE', 'ID', ] + list(reserved.values()) # common regex t_ISEQUAL = r'==' t_PLUSASSIGN = r'\+=' t_MINUSASSIGN = r'\-=' t_TIMESASSIGN = r'\*=' t_DIVASSIGN = r'/=' t_MODASSIGN = r'\%=' # Comments t_ignore_COMMENT = r'//.*' # A regex rule with some action code def t_NUMBER(t): r'[+-]?[0-9]+\b' t.value = int(t.value) return t # Strings def t_STRING(t): '(\".*\")|(\'.*\')' t.value = t.value[1:-1] # remove those thingies t.value = t.value.replace('\\"', '"') t.value = t.value.replace('"', '\\"') return t # Check for reserved words def t_ID(t): r'[a-zA-Z_][a-zA-Z_0-9]*' t.type = reserved.get(t.value,'ID') return t # Define a rule so we can track line numbers def t_newline(t): r'(;|\n)+' #r'\n+' t.type = 'NEWLINE' t.lexer.lineno += len(t.value) return t # A string containing ignored characters (spaces and tabs) t_ignore = ' \t' # Error handling rule def t_error(t): print("Illegal character '%s'" % t.value[0]) t.lexer.skip(1) # Build the lex lexer = lex.lex()