import ply.lex as lex # Reserved values reserved = { 'If': 'IF', 'Then': 'THEN', 'Else': 'ELSE', 'IfEnd': 'IFEND', 'While': 'WHILE', 'WhileEnd': 'WHILEEND', 'Do': 'DO', 'LpWhile': 'LPWHILE', 'For': 'FOR', 'To': 'TO', 'Step': 'STEP', 'Next': 'NEXT', 'Locate': 'LOCATE', 'Getkey': 'GETKEY', 'Not': 'NOT', 'And': 'And', 'Or': 'OR', 'List': 'LIST', 'Mat': 'MAT', 'Str': 'STR', 'Lbl': 'LBL', 'Goto': 'GOTO', } # List of token names tokens = [ 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MODULO', 'LPAREN', 'RPAREN', 'LCURBRA', 'RCURBRA', 'LSQRBRA', 'RSQRBRA', 'ASSIGN', 'COMMA', 'STRING', 'NUMBER', 'ISEQUAL', 'PLUSASSIGN', 'MINUSASSIGN', 'TIMESASSIGN', 'DIVIDEASSIGN', 'MODULOASSIGN', 'NEWLINE', 'ID', ] + list(reserved.values()) # common regex t_PLUS = r'\+' t_MINUS = r'\-' t_TIMES = r'\*' t_DIVIDE = r'\/' t_MODULO = r'\%' t_LPAREN = r'\(' t_RPAREN = r'\)' t_LCURBRA = r'\{' t_RCURBRA = r'\}' t_LSQRBRA = r'\[' t_RSQRBRA = r'\]' t_ASSIGN = r'\=' t_COMMA = r'\,' t_ISEQUAL = r'==' t_PLUSASSIGN = r'\+=' t_MINUSASSIGN = r'\-=' t_TIMESASSIGN = r'\*=' t_DIVIDEASSIGN = r'/=' t_MODULOASSIGN = r'\%=' # Comments t_ignore_COMMENT = r'//.*' # A regex rule with some action code def t_NUMBER(t): r'[+-]*[0-9.]+\b' t.value = t.value.replace("+", "") t.value = t.value.replace("--", "") t.value = float(t.value) return t # Strings def t_STRING(t): '(\".*\")|(\'.*\')' t.value = t.value[1:-1] # remove those thingies t.value = t.value.replace('\\"', '"') t.value = t.value.replace('"', '\\"') return t # Check for reserved words def t_ID(t): r'[a-zA-Z_][a-zA-Z_0-9]*' t.type = reserved.get(t.value,'ID') return t # Define a rule so we can track line numbers def t_newline(t): r'(;|\n)+' #r'\n+' t.type = 'NEWLINE' t.lexer.lineno += len(t.value) return t # A string containing ignored characters (spaces and tabs) t_ignore = ' \t' # Error handling rule def t_error(t): print("Illegal character '%s'" % t.value[0]) t.lexer.skip(1) # Build the lex lexer = lex.lex()