Compylateur/Compylateur.py

222 lines
6.0 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# --------------------------------------------------
# Compylateur (Version dev)
# By Sha-Chan~ 
# from April to June 2020
#
# Code provided with licence (CC BY-NC-ND 4.0)
# For more informations about licence :
# https://creativecommons.org/licenses/by-nc-nd/4.0/
# --------------------------------------------------
# ==================================================
# Objects code
# ==================================================
# --- Tokens --- #
class Token():
def __init__(self, token_type = "", token_value = ""):
self.type = token_type
self.value = token_value
class TokenList():
def __init__(self):
self.index = 0
self.list = list()
def add(self, token):
self.list.append(token)
def next(self):
self.index += 1
if self.index < len(self.list):
return self.list[self.index]
else:
return Token()
def generate(self):
for i in self.list: print((i.type, i.value))
# --- Abstract Syntax Tree (AST) --- #
class Node():
def __init__(self, node_type, node_value, *sub_node):
self.type = node_type
self.value = node_value
self.sub_node = list(sub_node)
def add_node(self, *sub_node):
for i in sub_node: self.sub_node.append(i)
def gen(self):
return self.type, self.value, self.sub_node
def AST_gen(node, tab = 0):
for i in node:
print(tab * " " + "{0} : {1}".format(i.gen()[0], i.gen()[1]))
if i.gen()[2]: AST_gen(i.gen()[2], tab + 1)
# --- Parser --- #
class Parser():
def __init__(self, l_token):
self.l_token = l_token
self.token_ahead = l_token.list[0]
def expect(self, target = []):
last = self.token_ahead
self.token_ahead = self.l_token.next()
if target != [] and last.type not in target:
raise SyntaxError("This operand was not expected : '{0}' (for dev : {1})".format(last.value, target))
return last
def expr(self): return self.sum()
def atome(self, minus = False):
atm = self.expect(["VAR", "NUM", "LPAR", "MINUS"])
if atm.type == "MINUS": return self.atome(not minus)
elif atm.type == "VAR":
#if self.token_ahead.type == "LPAR"
if minus: return Node("Operation", "--", Node("Variable", atm.value))
else: return Node("Variable", atm.value)
elif atm.type == "NUM":
return Node("Number", (atm.value, -atm.value)[minus])
else:
e = self.expr()
self.expect("RPAR")
if minus: return Node("Operation", "--", e)
else: return e
def sum(self):
atomes = [self.product()]
while self.token_ahead.type in ("PLUS", "MINUS"):
operator = self.expect()
atome_after = self.product()
atomes.append((atome_after, Node("Operation", "-", atome_after))[operator.type == "MINUS"])
return (Node("Operation", "+", *atomes), atomes[0])[len(atomes) == 1]
def product(self):
atomes = [self.exp()]
while self.token_ahead.type in ("MULTI", "DIVI"):
operator = self.expect()
atome_after = self.exp()
atomes.append((atome_after, Node("Operation", "1/", atome_after))[operator.type == "DIVI"])
return (Node("Operation", "*", *atomes), atomes[0])[len(atomes) == 1]
def exp(self):
atome_1 = self.atome()
if self.token_ahead.type != "EXP":
return atome_1
op = self.expect()
atome_2 = self.atome()
return Node("Operation", op.value, atome_1, atome_2)
# ==================================================
# Lexer
# ==================================================
# --- Main function --- #
def lexer(prgm_src):
token = {
"(":"LPAR",
")":"RPAR",
"+":"PLUS",
"-":"MINUS",
"*":"MULTI",
"/":"DIVI",
"^":"EXP",
",":"COMMA"}
for i in {"=", "<", "<=", ">", ">=", "+", "-", "/", "*", "^", "(", ")", "[", "]", "{", "}", '"', "\n", ",", ";"}:
prgm_src = prgm_src.replace(i, " " + i + " ")
word = [i for i in prgm_src.lower().split(" ") if i != ""]
l_token = TokenList()
index, undef = 0, bool()
while index < len(word):
undef = True
for target in token.keys():
name = token[target]
if word[index] in target and lexer_detect(word, index, target):
l_token.add(Token(name, target))
undef = False
index += len(target)
break
if undef and word[index] == "\"":
l_token, index = text_detecter(word, index, l_token)
elif undef:
if word[index].isdigit():
l_token.add(Token("NUM", eval(word[index])))
else:
l_token.add(Token("VAR", word[index]))
index += 1
return l_token
# --- Secondary functions --- #
def lexer_detect(word, index, target):
try:
return not 0 in [target[i] == word[i + index] for i in range(len(target))]
except:
return 0
def text_detecter(word, index, l_token):
txt = word[index]
index += 1
while word[index] != '"':
txt = txt + " " + word[index]
index += 1
l_token.add(Token("TEXT", txt + ' "'))
return l_token, index + 1
# ==================================================
# Parser
# ==================================================
# --- Main function --- #
def parser(l_token):
par = Parser(l_token)
ast = Node("Programm", "")
ast.add_node(par.sum())
return ast
# --- Secondary functions --- #
# (empty for the moment)
# ==================================================
# Miscellaneous functions
# ==================================================
def compylateur(code):
l_token = lexer(code)
print("--- Tokens ---")
l_token.generate()
ast = parser(l_token)
print("\n\n--- AST ---")
AST_gen(ast.sub_node)