Proper lexer.

This commit is contained in:
KikooDX 2020-05-07 12:26:57 +02:00
parent 7878230778
commit b7c1eec5ba
7 changed files with 85 additions and 77 deletions

View File

@ -1,10 +0,0 @@
List 1 = {5, 6, 7}
B = 0
For A = 1 To 5 Step 2
Locate A, 1, A
B += 1
Next
Locate 1, 1, 0
For A = 1, 21, 1
Locate A, 4, "D"
Next

View File

@ -1,51 +0,0 @@
import sys
import os
def rem_empty(data):
return [v for v in data if v]
class PreprocessorError(ValueError):
pass
def open_and_preprocess(path, name, main=False):
file = open(f"{path}/{name}", "r")
content = file.read()
defines = dict()
#take care of whitespaces
content_rem = rem_empty(content.split("\n"))
content = []
for line in content_rem:
content += [line.strip()]
#preprocessing
for line in content:
if line[0] == "#": #is process command
line_argv = rem_empty(line[1:].split(" "))
command = line_argv[0].lower()
if command == "name":
if main:
global out_name
out_name = line_argv[1]
elif command == "include":
open_and_preprocess(path, " ".join(line_argv[1:]))
elif command == "define":
defines[line_argv[1]] = " ".join(line_argv[2:])
else:
raise PreprocessorError(f"{command} preprocessor unknown")
else:
print(line)
temp = open(".temp", "w")
stdout = sys.stdout
sys.stdout = temp
path = "."
name = "main.bc"
out_name = None
open_and_preprocess(path, name, main=True)
sys.stdout = stdout
temp.close()
if not out_name:
raise PreprocessorError("#name <name> preprocessor missing")
else:
os.replace(".temp", f"{out_name}.txt")

View File

@ -1,3 +0,0 @@
#name EXAMPLE
Locate 1, 1, 0
#include test.bc

72
lexer.py Normal file
View File

@ -0,0 +1,72 @@
import ply.lex as lex
# Reserved values
reserved = {
'if': 'IF',
'then': 'THEN',
'else': 'ELSE',
'ifend': 'IFEND',
'while': 'WHILE',
'whileend': 'WHILEEND',
'do': 'DO',
'lpwhile': 'LPWHILE',
'for': 'FOR',
'to': 'TO',
'step': 'STEP',
'next': 'NEXT',
'locate': 'LOCATE',
}
literals = "+-*/()=,{}"
# List of token names
tokens = [
'STRING',
'NUMBER',
'EQUAL',
'NEWLINE',
'ID',
] + list(reserved.values())
# common regex
t_EQUAL = r'=='
# Comments
t_ignore_COMMENT = r'\#.*'
# A regex rule with some action code
def t_NUMBER(t):
r'[+-]?[0-9]+\b'
t.value = int(t.value)
return t
# Strings
def t_STRING(t):
'(\".*\")|(\'.*\')'
t.value = t.value[1:-1] # remove those thingies
t.value = t.value.replace('\\"', '"')
t.value = t.value.replace('"', '\\"')
return t
# Check for reserved words
def t_ID(t):
r'[a-zA-Z_][a-zA-Z_0-9]*'
t.type = reserved.get(t.value,'ID')
return t
# Define a rule so we can track line numbers
def t_newline(t):
r'(;|\n)+'
#r'\n+'
t.type = 'NEWLINE'
t.lexer.lineno += len(t.value)
return t
# A string containing ignored characters (spaces and tabs)
t_ignore = ' \t'
# Error handling rule
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Build the lex
lexer = lex.lex()

15
main.bc
View File

@ -1,11 +1,6 @@
#name MAIN
#define test 20
List 1 = {5, 6, 7}
list 1 = {-53, 8, 9}
B = 0
For A = 1 To 5 Step 2
Locate A, 1, A
B += 1
Next
#include example.bc
for i = 1 to 5
locate 1, 1, "\"Bonsoir Paris\" !"
locate 1, 2, 'Ah oui on peut faire ça aussi "coucou"'
next

View File

@ -1,3 +0,0 @@
For A = 1, 21, 1
Locate A, 4, "D"
Next

8
test.py Normal file
View File

@ -0,0 +1,8 @@
from lexer import lexer
# Give the lexer some input.
lexer.input(open("main.bc", "r").read())
# Tokenize
for tok in lexer:
print(tok)