Proper lexer.

2020-05-07 12:26:57 +02:00 · 2020-05-07 12:26:57 +02:00 · b7c1eec5ba
parent 7878230778
commit b7c1eec5ba
7 changed files with 85 additions and 77 deletions
--- a/MAIN.txt
+++ b/MAIN.txt
@ -1,10 +0,0 @@
-List 1 = {5, 6, 7}
-B = 0
-For A = 1 To 5 Step 2
-Locate A, 1, A
-B += 1
-Next
-Locate 1, 1, 0
-For A = 1, 21, 1
-Locate A, 4, "D"
-Next
--- a/convert.py
+++ b/convert.py
@ -1,51 +0,0 @@
-import sys
-import os
-
-def rem_empty(data):
-    return [v for v in data if v]
-
-class PreprocessorError(ValueError):
-    pass
-
-def open_and_preprocess(path, name, main=False):
-    file = open(f"{path}/{name}", "r")
-    content = file.read()
-    defines = dict()
-
-    #take care of whitespaces
-    content_rem = rem_empty(content.split("\n"))
-    content = []
-    for line in content_rem:
-        content += [line.strip()]
-
-    #preprocessing
-    for line in content:
-        if line[0] == "#": #is process command
-            line_argv = rem_empty(line[1:].split(" "))
-            command = line_argv[0].lower()
-            if command == "name":
-                if main:
-                    global out_name
-                    out_name = line_argv[1]
-            elif command == "include":
-                open_and_preprocess(path, " ".join(line_argv[1:]))
-            elif command == "define":
-                defines[line_argv[1]] = " ".join(line_argv[2:])
-            else:
-                raise PreprocessorError(f"{command} preprocessor unknown")
-        else:
-            print(line)
-
-temp = open(".temp", "w")
-stdout = sys.stdout
-sys.stdout = temp
-path = "."
-name = "main.bc"
-out_name = None
-open_and_preprocess(path, name, main=True)
-sys.stdout = stdout
-temp.close()
-if not out_name:
-    raise PreprocessorError("#name <name> preprocessor missing")
-else:
-    os.replace(".temp", f"{out_name}.txt")
--- a/example.bc
+++ b/example.bc
@ -1,3 +0,0 @@
-#name EXAMPLE
-Locate 1, 1, 0
-#include test.bc
--- a/lexer.py
+++ b/lexer.py
@ -0,0 +1,72 @@
+import ply.lex as lex
+
+# Reserved values
+reserved = {
+    'if': 'IF',
+    'then': 'THEN',
+    'else': 'ELSE',
+    'ifend': 'IFEND',
+    'while': 'WHILE',
+    'whileend': 'WHILEEND',
+    'do': 'DO',
+    'lpwhile': 'LPWHILE',
+    'for': 'FOR',
+    'to': 'TO',
+    'step': 'STEP',
+    'next': 'NEXT',
+    'locate': 'LOCATE',
+}
+
+literals = "+-*/()=,{}"
+# List of token names
+tokens = [
+    'STRING',
+    'NUMBER',
+    'EQUAL',
+    'NEWLINE',
+    'ID',
+] + list(reserved.values())
+
+# common regex
+t_EQUAL  = r'=='
+# Comments
+t_ignore_COMMENT = r'\#.*'
+
+# A regex rule with some action code
+def t_NUMBER(t):
+    r'[+-]?[0-9]+\b'
+    t.value = int(t.value)
+    return t
+
+# Strings
+def t_STRING(t):
+    '(\".*\")|(\'.*\')'
+    t.value = t.value[1:-1] # remove those thingies
+    t.value = t.value.replace('\\"', '"')
+    t.value = t.value.replace('"', '\\"')
+    return t
+
+# Check for reserved words
+def t_ID(t):
+    r'[a-zA-Z_][a-zA-Z_0-9]*'
+    t.type = reserved.get(t.value,'ID')
+    return t
+
+# Define a rule so we can track line numbers
+def t_newline(t):
+    r'(;|\n)+'
+    #r'\n+'
+    t.type = 'NEWLINE'
+    t.lexer.lineno += len(t.value)
+    return t
+
+# A string containing ignored characters (spaces and tabs)
+t_ignore  = ' \t'
+
+# Error handling rule
+def t_error(t):
+    print("Illegal character '%s'" % t.value[0])
+    t.lexer.skip(1)
+
+# Build the lex
+lexer = lex.lex()
--- a/main.bc
+++ b/main.bc
@ -1,11 +1,6 @@
-#name MAIN
-#define test 20
-List 1 = {5, 6, 7}
+list 1 = {-53, 8, 9}

-B = 0
-For A = 1 To 5 Step 2
-  Locate A, 1, A
-  B += 1
-Next
-
-#include example.bc
+for i = 1 to 5
+  locate 1, 1, "\"Bonsoir Paris\" !"
+  locate 1, 2, 'Ah oui on peut faire ça aussi "coucou"'
+next
--- a/test.bc
+++ b/test.bc
@ -1,3 +0,0 @@
-For A = 1, 21, 1
-  Locate A, 4, "D"
-Next
--- a/test.py
+++ b/test.py
@ -0,0 +1,8 @@
+from lexer import lexer
+
+# Give the lexer some input.
+lexer.input(open("main.bc", "r").read())
+
+# Tokenize
+for tok in lexer:
+    print(tok)