diff --git a/compylateur.py b/compylateur.py new file mode 100644 index 0000000..95ee9e8 --- /dev/null +++ b/compylateur.py @@ -0,0 +1,460 @@ +""" +Compylateur — dev +================= + +Licence +------- +Code provided under GNU General Public Licence v3.0+ + +Description +----------- +Compylateur is a project which aims to compile the french pseudo-code into Python script. +""" + +# ================================================== +# Tokens and Abstract syntax tree +# ================================================== + +# --- Tokens --- # + +class Token(): + def __init__(self, token_type="", token_value=""): + self.type = token_type + self.value = token_value + +class TokenList(): + def __init__(self): + self.index = 0 + self.list = list() + + def add(self, token): + self.list.append(token) + + def next(self): + self.index += 1 + if self.index < len(self.list): + return self.list[self.index] + else: + return Token() + + def generate(self): + for i in self.list: print((i.type, i.value)) + +# --- Abstract Syntax Tree (AST) --- # + +class Node(): + def __init__(self, node_type, node_value, *sub_node): + self.type = node_type + self.value = node_value + self.sub_node = list(sub_node) + + def add_node(self, *sub_node): + for i in sub_node: self.sub_node.append(i) + + def gen(self): + return self.type, self.value, self.sub_node + + +def AST_gen(node, tab=0): + for i in node: + print(tab * " " + "{0} : {1}".format(i.gen()[0], i.gen()[1])) + if i.gen()[2]: AST_gen(i.gen()[2], tab + 1) + +# ================================================== +# Lexer +# ================================================== + +# --- Main function --- # + +def lexer(prgm_src): + prgm_src = prgm_src.replace("\n", " ") + token = { + "(":"LPAR", + ")":"RPAR", + "+":"PLUS", + "-":"MINUS", + "*":"MULTI", + "/":"DIVI", + "^":"EXP", + ",":"COMMA", + "=":"EQUAL", + "est supérieur à":"SUP", ">":"SUP", "est plus grand que":"SUP", + "est supérieur ou égal à":"SUP_EGA", ">=":"SUP_EGA", "≥":"SUP_EGA", "est plus grand ou égal à":"SUP_EGA", + "est inférieur à":"INF", "<":"INF", "≤":"INF_EGA", "est plus petit que":"INF", + "est inférieur ou égal à":"INF_EGA", "<=":"INF_EGA", "est plus petit ou égal à":"INF_EGA", + "est égal à":"EGA", "==":"EGA", "égal":"EGA", "égale":"EGA", + "est différent de":"DIF", "!=":"DIF", "≠":"DIF", + "ou":"OR", + "et":"AND", + "affecter à":"AFFECT", "prend la valeur":"TAKE", "est initialisé à":"TAKE", + "afficher":"DISPLAY", + "demander la valeur de":"REQUEST", "on demande la valeur de":"REQUEST", "saisir la valeur de":"REQUEST", "saisir":"REQUEST", "à l'utilisateur":"USER", "la valeur":"VALUE", + "fin si":"END_IF", "fin pour":"END_FOR", "fin tant que":"END_WHILE", "fin tantque":"END_WHILE", "faire":"DO", + "si":"IF", "alors":"THEN", "sinon , si":"ELIF", "sinon":"ELSE", + "pour":"FOR", "allant de":"INTER_ST", "variant entre":"INTER_ST", "variant de":"INTER_ST", "à":"INTER_ED", "jusqu'à":"INTER_ED", + "tant que":"WHILE", "tantque":"WHILE"} + + for i in {"=", "<", "<=", ">", ">=", "+", "-", "/", "*", "^", "(", ")", "[", "]", "{", "}", '"', "\n", ",", ";"}: + prgm_src = prgm_src.replace(i, " " + i + " ") + word = [i for i in prgm_src.lower().split(" ") if i != ""] + + l_token = TokenList() + index, undef = 0, bool() + + while index < len(word): + undef = True + + for target in token.keys(): + name, value, target = token[target], target, target.split(" ") + + if word[index] == target[0] and lexer_detect(word, index, target): + l_token.add(Token(name, value)) + undef = False + index += len(target) + break + + + if undef and word[index] == '"': + l_token, index = text_detecter(word, index + 1, l_token) + elif undef: + if word[index].isdigit(): + l_token.add(Token("NUM", eval(word[index]))) + else: + l_token.add(Token("VAR", word[index])) + index += 1 + + return l_token + +# --- Secondary functions --- # + +def lexer_detect(word, index, target): + try: + return not 0 in [target[i] == word[i + index] for i in range(len(target))] + except: + return 0 + +def text_detecter(word, index, l_token): + txt = word[index] + index += 1 + while word[index] != '"': + txt = txt + " " + word[index] + index += 1 + l_token.add(Token("TEXT", '"' + txt + '"')) + return l_token, index + 1 + +# ================================================== +# Parser +# ================================================== + +class Parser(): + def __init__(self, l_token): + self.l_token = l_token + self.token_ahead = l_token.list[0] + + def expect(self, *target): + last = self.token_ahead + self.token_ahead = self.l_token.next() + if target != () and last.type not in target: + raise SyntaxError(f"This operand was not expected: '{last.value}' (for dev: {target})") + return last + + # --- Arithmetic's rules --- # + + def expr(self): return self.sum() + + def atome(self, minus=False): + atm = self.expect("VAR", "NUM", "LPAR", "MINUS") + + if atm.type == "MINUS": return self.atome(not minus) + elif atm.type == "VAR": + if self.token_ahead.type == "LPAR": + self.expect() + return Node("Function", atm.value, *self.fct()) + + if minus: return Node("Operation", "--", Node("Variable", atm.value)) + else: return Node("Variable", atm.value) + + elif atm.type == "NUM": + return Node("Number", (atm.value, -atm.value)[minus]) + else: + e = self.expr() + self.expect("RPAR") + if minus: return Node("Operation", "--", e) + else: return e + + def fct(self): + param = list() + while self.token_ahead.type != "RPAR": + param.append(self.expr()) + if self.token_ahead.type == "RPAR": + break + self.expect("COMMA") + self.expect("RPAR") + return param + + def sum(self): + atomes = [self.product()] + + while self.token_ahead.type in ("PLUS", "MINUS"): + operator = self.expect() + atome_after = self.product() + atomes.append( + (atome_after, Node("Operation", "-", atome_after))[operator.type == "MINUS"] + ) + + return (Node("Operation", "+", *atomes), atomes[0])[len(atomes) == 1] + + def product(self): + atomes = [self.exp()] + + while self.token_ahead.type in ("MULTI", "DIVI"): + operator = self.expect() + atome_after = self.exp() + atomes.append( + (atome_after, Node("Operation", "1/", atome_after))[operator.type == "DIVI"] + ) + + return (Node("Operation", "*", *atomes), atomes[0])[len(atomes) == 1] + + def exp(self): + atome_1 = self.atome() + if self.token_ahead.type != "EXP": + return atome_1 + op = self.expect() + atome_2 = self.atome() + return Node("Operation", op.value, atome_1, atome_2) + + # --- Comparison and Condition's rules --- # + + def condition(self): return self.condition_or() + + def condition_or(self): + elmnt_1 = self.condition_and() + if self.token_ahead.type != "OR": return elmnt_1 + self.expect() + elmnt_2 = self.condition_and() + return Node("Condition", "OR", elmnt_1, elmnt_2) + + def condition_and(self): + elmnt_1 = self.comparison_1() + if self.token_ahead.type != "AND": return elmnt_1 + self.expect() + elmnt_2 = self.comparison_1() + return Node("Condition", "AND", elmnt_1, elmnt_2) + + def comparison_1(self): + elmnt_1 = self.comparison_2() + if self.token_ahead.type not in ("EGA", "DIF"): return elmnt_1 + comp = self.expect() + elmnt_2 = self.comparison_2() + return Node("Comparison", comp.type, elmnt_1, elmnt_2) + + def comparison_2(self): + elmnt_1 = self.expr() + if self.token_ahead.type not in ("SUP", "SUP_EGA", "INF", "INF_EGA"): return elmnt_1 + comp = self.expect() + elmnt_2 = self.expr() + return Node("Comparison", comp.type, elmnt_1, elmnt_2) + + # --- Statements's rules --- # + + def block(self): + block_tokens = ("AFFECT", "REQUEST", "VAR", "DISPLAY", "IF", "FOR", "WHILE") + ast = Node("Block", "") + while self.token_ahead.type in block_tokens: + ast.add_node(self.statement()) + return ast + + + def statement(self): + if self.token_ahead.type in ("AFFECT", "REQUEST", "VAR"): return self.assignement() + elif self.token_ahead.type == "DISPLAY": return self.display() + elif self.token_ahead.type == "IF": return self.statement_if() + elif self.token_ahead.type == "FOR": return self.statement_for() + elif self.token_ahead.type == "WHILE": return self.statement_while() + + def assignement(self): + value = None + + if self.token_ahead.type == "REQUEST": + self.expect() + var = self.expect("VAR") + if self.token_ahead.type == "USER": self.expect() + return Node("User's request", "", Node("Variable", var.value)) + + if self.token_ahead.type == "AFFECT": + self.expect() + var = self.expect("VAR") + self.expect("VALUE") + value = self.expr() + + elif self.token_ahead.type == "VAR": + var = self.expect() + self.expect("TAKE") + value = self.expr() + + return Node("Assignement","", Node("Variable", var.value), value) + + def display(self): + self.expect() + text = Node("Display", "") + if self.token_ahead.type in ("VAR", "NUM", "LPAR"): + text.add_node(Node("Expression", "", self.expr())) + else: + text.add_node(Node("Text", self.expect("TEXT").value)) + + while self.token_ahead.type == "COMMA": + self.expect() + if self.token_ahead.type in ("VAR", "NUM", "LPAR"): + text.add_node(Node("Expression", "", self.expr())) + else: + text.add_node(Node("Text", self.expect("TEXT").value)) + return text + + def statement_if(self): + self.expect() + cond_1 = self.condition() + self.expect("THEN", "COMMA", "DO") + block_1 = self.block() + ast = [cond_1, block_1] + while self.token_ahead.type == "ELIF": + self.expect() + ast.append(self.condition()) + self.expect("THEN", "COMMA", "DO") + ast.append(self.block()) + if self.token_ahead.type == "ELSE": + self.expect() + ast.append((self.block())) + + self.expect("END_IF") + return Node("Statement", "if", *ast) + + def statement_for(self): + self.expect() + it_var = self.expect("VAR") + self.expect("INTER_ST") + start_value = self.expr() + self.expect("INTER_ED") + end_value = self.expr() + self.expect("COMMA", "DO") + ast = Node("Statement", + "for", + Node("Incremented variable", it_var.value), + Node("Start value", start_value.value), + Node("End value", end_value.value) + ) + ast.add_node(self.block()) + self.expect("END_FOR") + return ast + + def statement_while(self): + self.expect() + condition = self.condition() + self.expect("COMMA", "DO") + block = self.block() + self.expect("END_WHILE") + return Node("Statement", "while", condition, block) + + +# --- Secondary functions --- # +def parser(l_token): + par = Parser(l_token) + ast = Node("Programm", "") + ast.add_node(par.block()) + + return ast + + +def node_interpreter(node): + if node.type == "Assignement": + return f"{node.sub_node[0].value} = {node_interpreter(node.sub_node[1])}\n" + + if node.type in ("Number", "Text", "Variable"): + return node.value + + elif node.type == "Operation": + if node.value == "1/": + return f"1 / {node_interpreter(node.sub_node[0])}" + + return f"{node_interpreter(node.sub_node[0])} {node.value} {node_interpreter(node.sub_node[1])}" + + if node.type == "Expression": + return node_interpreter(node.sub_node[0]) + + if node.type == "Display": + if node.sub_node[0].type == "Text": + return f"print({node.sub_node[0].value})\n" + elif node.sub_node[0].type == "Expression": + return "print(f\"{" + node_interpreter(node.sub_node[0]) + "}\")" + + if node.type == "Statement": + if node.value == "if": + instructions = ast_interpreter(node.sub_node[1:]).split("\n") + instructions = "\n ".join(instructions) + return f"if {node_interpreter(node.sub_node[0])}:\n {instructions}\n" + + elif node.value == "for": + pass + + elif node.value == "while": + pass + + if node.type == "Comparison": + if node.value == "EGA": + return f"{node.sub_node[0].value} == {node_interpreter(node.sub_node[1])}" + + elif node.value == "SUP": + return f"{node.sub_node[0].value} > {node_interpreter(node.sub_node[1])}" + + elif node.value == "SUP_EGA": + return f"{node.sub_node[0].value} >= {node_interpreter(node.sub_node[1])}" + + elif node.value == "INF": + return f"{node.sub_node[0].value} < {node_interpreter(node.sub_node[1])}" + + elif node.value == "INF_EGA": + return f"{node.sub_node[0].value} <= {node_interpreter(node.sub_node[1])}" + + return "" + + +def ast_interpreter(ast_nodes): + python_code = "" + for node in ast_nodes: + if node.type == "Block": + python_code += ast_interpreter(node.sub_node) + + python_code += node_interpreter(node) + + return python_code + +# ================================================== +# Miscellaneous functions +# ================================================== +def compylateur(code, file=False): + if file: code = open(code + ".txt", 'r').read() + + l_token = lexer(code) + print("--- Tokens ---") + l_token.generate() + + ast = parser(l_token) + print("\n\n--- AST ---") + AST_gen(ast.sub_node) + + python_code = ast_interpreter(ast.sub_node) + print("\n\n--- code entré") + print(code) + print("--- code python") + print(python_code) + + +txt = """afficher "var" +si var égale 2 alors +var prend la valeur var + 1 +afficher var / 2 +fin si +afficher "fin du programme." +""" +compylateur(txt) +# compylateur("test", True) \ No newline at end of file diff --git a/test.txt b/test.txt index e6d6835..ab60b3b 100644 --- a/test.txt +++ b/test.txt @@ -3,4 +3,4 @@ si a est égal à 0 alors afficher "Gagné !" sinon afficher "Perdu…" -fin si \ No newline at end of file +fin si