""" Compylateur — dev ================= Licence ------- Code provided under GNU General Public Licence v3.0+ Description ----------- Compylateur is a project which aims to compile the french pseudo-code into Python script. """ # ================================================== # Tokens and Abstract syntax tree # ================================================== # --- Tokens --- # class Token(): def __init__(self, token_type="", token_value=""): self.type = token_type self.value = token_value class TokenList(): def __init__(self): self.index = 0 self.list = list() def add(self, token): self.list.append(token) def next(self): self.index += 1 if self.index < len(self.list): return self.list[self.index] else: return Token() def generate(self): for i in self.list: print((i.type, i.value)) # --- Abstract Syntax Tree (AST) --- # class Node(): def __init__(self, node_type, node_value, *sub_node): self.type = node_type self.value = node_value self.sub_node = list(sub_node) def add_node(self, *sub_node): for i in sub_node: self.sub_node.append(i) def gen(self): return self.type, self.value, self.sub_node def AST_gen(node, tab=0): for i in node: print(tab * " " + "{0} : {1}".format(i.gen()[0], i.gen()[1])) if i.gen()[2]: AST_gen(i.gen()[2], tab + 1) # ================================================== # Lexer # ================================================== # --- Main function --- # def lexer(prgm_src): prgm_src = prgm_src.replace("\n", " ") token = { "(":"LPAR", ")":"RPAR", "+":"PLUS", "-":"MINUS", "*":"MULTI", "/":"DIVI", "^":"EXP", ",":"COMMA", "=":"EQUAL", "est supérieur à":"SUP", ">":"SUP", "est plus grand que":"SUP", "est supérieur ou égal à":"SUP_EGA", ">=":"SUP_EGA", "≥":"SUP_EGA", "est plus grand ou égal à":"SUP_EGA", "est inférieur à":"INF", "<":"INF", "≤":"INF_EGA", "est plus petit que":"INF", "est inférieur ou égal à":"INF_EGA", "<=":"INF_EGA", "est plus petit ou égal à":"INF_EGA", "est égal à":"EGA", "==":"EGA", "égal":"EGA", "égale":"EGA", "est différent de":"DIF", "!=":"DIF", "≠":"DIF", "ou":"OR", "et":"AND", "affecter à":"AFFECT", "prend la valeur":"TAKE", "est initialisé à":"TAKE", "afficher":"DISPLAY", "demander la valeur de":"REQUEST", "on demande la valeur de":"REQUEST", "saisir la valeur de":"REQUEST", "saisir":"REQUEST", "à l'utilisateur":"USER", "la valeur":"VALUE", "fin si":"END_IF", "fin pour":"END_FOR", "fin tant que":"END_WHILE", "fin tantque":"END_WHILE", "faire":"DO", "si":"IF", "alors":"THEN", "sinon , si":"ELIF", "sinon":"ELSE", "pour":"FOR", "allant de":"INTER_ST", "variant entre":"INTER_ST", "variant de":"INTER_ST", "à":"INTER_ED", "jusqu'à":"INTER_ED", "tant que":"WHILE", "tantque":"WHILE"} for i in {"=", "<", "<=", ">", ">=", "+", "-", "/", "*", "^", "(", ")", "[", "]", "{", "}", '"', "\n", ",", ";"}: prgm_src = prgm_src.replace(i, " " + i + " ") word = [i for i in prgm_src.lower().split(" ") if i != ""] l_token = TokenList() index, undef = 0, bool() while index < len(word): undef = True for target in token.keys(): name, value, target = token[target], target, target.split(" ") if word[index] == target[0] and lexer_detect(word, index, target): l_token.add(Token(name, value)) undef = False index += len(target) break if undef and word[index] == '"': l_token, index = text_detecter(word, index + 1, l_token) elif undef: if word[index].isdigit(): l_token.add(Token("NUM", eval(word[index]))) else: l_token.add(Token("VAR", word[index])) index += 1 return l_token # --- Secondary functions --- # def lexer_detect(word, index, target): try: return not 0 in [target[i] == word[i + index] for i in range(len(target))] except: return 0 def text_detecter(word, index, l_token): txt = word[index] index += 1 while word[index] != '"': txt = txt + " " + word[index] index += 1 l_token.add(Token("TEXT", '"' + txt + '"')) return l_token, index + 1 # ================================================== # Parser # ================================================== class Parser(): def __init__(self, l_token): self.l_token = l_token self.token_ahead = l_token.list[0] def expect(self, *target): last = self.token_ahead self.token_ahead = self.l_token.next() if target != () and last.type not in target: raise SyntaxError(f"This operand was not expected: '{last.value}' (for dev: {target})") return last # --- Arithmetic's rules --- # def expr(self): return self.sum() def atome(self, minus=False): atm = self.expect("VAR", "NUM", "LPAR", "MINUS") if atm.type == "MINUS": return self.atome(not minus) elif atm.type == "VAR": if self.token_ahead.type == "LPAR": self.expect() return Node("Function", atm.value, *self.fct()) if minus: return Node("Operation", "--", Node("Variable", atm.value)) else: return Node("Variable", atm.value) elif atm.type == "NUM": return Node("Number", (atm.value, -atm.value)[minus]) else: e = self.expr() self.expect("RPAR") if minus: return Node("Operation", "--", e) else: return e def fct(self): param = list() while self.token_ahead.type != "RPAR": param.append(self.expr()) if self.token_ahead.type == "RPAR": break self.expect("COMMA") self.expect("RPAR") return param def sum(self): atomes = [self.product()] while self.token_ahead.type in ("PLUS", "MINUS"): operator = self.expect() atome_after = self.product() atomes.append( (atome_after, Node("Operation", "-", atome_after))[operator.type == "MINUS"] ) return (Node("Operation", "+", *atomes), atomes[0])[len(atomes) == 1] def product(self): atomes = [self.exp()] while self.token_ahead.type in ("MULTI", "DIVI"): operator = self.expect() atome_after = self.exp() atomes.append( (atome_after, Node("Operation", "1/", atome_after))[operator.type == "DIVI"] ) return (Node("Operation", "*", *atomes), atomes[0])[len(atomes) == 1] def exp(self): atome_1 = self.atome() if self.token_ahead.type != "EXP": return atome_1 op = self.expect() atome_2 = self.atome() return Node("Operation", op.value, atome_1, atome_2) # --- Comparison and Condition's rules --- # def condition(self): return self.condition_or() def condition_or(self): elmnt_1 = self.condition_and() if self.token_ahead.type != "OR": return elmnt_1 self.expect() elmnt_2 = self.condition_and() return Node("Condition", "OR", elmnt_1, elmnt_2) def condition_and(self): elmnt_1 = self.comparison_1() if self.token_ahead.type != "AND": return elmnt_1 self.expect() elmnt_2 = self.comparison_1() return Node("Condition", "AND", elmnt_1, elmnt_2) def comparison_1(self): elmnt_1 = self.comparison_2() if self.token_ahead.type not in ("EGA", "DIF"): return elmnt_1 comp = self.expect() elmnt_2 = self.comparison_2() return Node("Comparison", comp.type, elmnt_1, elmnt_2) def comparison_2(self): elmnt_1 = self.expr() if self.token_ahead.type not in ("SUP", "SUP_EGA", "INF", "INF_EGA"): return elmnt_1 comp = self.expect() elmnt_2 = self.expr() return Node("Comparison", comp.type, elmnt_1, elmnt_2) # --- Statements's rules --- # def block(self): block_tokens = ("AFFECT", "REQUEST", "VAR", "DISPLAY", "IF", "FOR", "WHILE") ast = Node("Block", "") while self.token_ahead.type in block_tokens: ast.add_node(self.statement()) return ast def statement(self): if self.token_ahead.type in ("AFFECT", "REQUEST", "VAR"): return self.assignement() elif self.token_ahead.type == "DISPLAY": return self.display() elif self.token_ahead.type == "IF": return self.statement_if() elif self.token_ahead.type == "FOR": return self.statement_for() elif self.token_ahead.type == "WHILE": return self.statement_while() def assignement(self): value = None if self.token_ahead.type == "REQUEST": self.expect() var = self.expect("VAR") if self.token_ahead.type == "USER": self.expect() return Node("User's request", "", Node("Variable", var.value)) if self.token_ahead.type == "AFFECT": self.expect() var = self.expect("VAR") self.expect("VALUE") value = self.expr() elif self.token_ahead.type == "VAR": var = self.expect() self.expect("TAKE") value = self.expr() return Node("Assignement","", Node("Variable", var.value), value) def display(self): self.expect() text = Node("Display", "") if self.token_ahead.type in ("VAR", "NUM", "LPAR"): text.add_node(Node("Expression", "", self.expr())) else: text.add_node(Node("Text", self.expect("TEXT").value)) while self.token_ahead.type == "COMMA": self.expect() if self.token_ahead.type in ("VAR", "NUM", "LPAR"): text.add_node(Node("Expression", "", self.expr())) else: text.add_node(Node("Text", self.expect("TEXT").value)) return text def statement_if(self): self.expect() cond_1 = self.condition() self.expect("THEN", "COMMA", "DO") block_1 = self.block() ast = [cond_1, block_1] while self.token_ahead.type == "ELIF": self.expect() ast.append(self.condition()) self.expect("THEN", "COMMA", "DO") ast.append(self.block()) if self.token_ahead.type == "ELSE": self.expect() ast.append((self.block())) self.expect("END_IF") return Node("Statement", "if", *ast) def statement_for(self): self.expect() it_var = self.expect("VAR") self.expect("INTER_ST") start_value = self.expr() self.expect("INTER_ED") end_value = self.expr() self.expect("COMMA", "DO") ast = Node("Statement", "for", Node("Incremented variable", it_var.value), Node("Start value", start_value.value), Node("End value", end_value.value) ) ast.add_node(self.block()) self.expect("END_FOR") return ast def statement_while(self): self.expect() condition = self.condition() self.expect("COMMA", "DO") block = self.block() self.expect("END_WHILE") return Node("Statement", "while", condition, block) # --- Secondary functions --- # def parser(l_token): par = Parser(l_token) ast = Node("Programm", "") ast.add_node(par.block()) return ast def node_interpreter(node): if node.type == "Assignement": return f"{node.sub_node[0].value} = {node_interpreter(node.sub_node[1])}\n" if node.type in ("Number", "Text", "Variable"): return node.value if node.type == "User's request": return f"{node.sub_node[0].value} = input(\"{node.sub_node[0].value} \")\n" if node.type == "Operation": if node.value == "1/": return f"(1 / {node_interpreter(node.sub_node[0])})" elif node.value == "-": return f"(-{node_interpreter(node.sub_node[0])})" else: return f"{node_interpreter(node.sub_node[0])} {node.value} {node_interpreter(node.sub_node[1])}" if node.type == "Expression": return node_interpreter(node.sub_node[0]) if node.type == "Display": if node.sub_node[0].type == "Text": return f"print(f{node.sub_node[0].value})\n" elif node.sub_node[0].type == "Expression": return "print(f\"{" + node_interpreter(node.sub_node[0]) + "}\")\n" if node.type == "Statement": instructions = ast_interpreter(node.sub_node[1:]).split("\n") instructions = "\n ".join(instructions) if node.value == "if": return f"if {node_interpreter(node.sub_node[0])}:\n {instructions}\n" elif node.value == "for": return f"for {node.sub_node[0].value} in range({node.sub_node[1].value}, {node.sub_node[2].value}):\n {instructions}\n" elif node.value == "while": return f"while {node_interpreter(node.sub_node[0])}:\n {instructions}\n" if node.type == "Condition": if len(node.sub_node) > 1: return f"{node_interpreter(node.sub_node[0])} {node.value.lower()} {node_interpreter(node.sub_node[1])}" else: return node_interpreter(node.sub_node[0]) if node.type == "Comparison": if node.value == "EGA": return f"{node.sub_node[0].value} == {node_interpreter(node.sub_node[1])}" elif node.value == "SUP": return f"{node.sub_node[0].value} > {node_interpreter(node.sub_node[1])}" elif node.value == "SUP_EGA": return f"{node.sub_node[0].value} >= {node_interpreter(node.sub_node[1])}" elif node.value == "INF": return f"{node.sub_node[0].value} < {node_interpreter(node.sub_node[1])}" elif node.value == "INF_EGA": return f"{node.sub_node[0].value} <= {node_interpreter(node.sub_node[1])}" if node.type == "Function": args = [f"{node_interpreter(i)}" for i in node.sub_node] return f"{node.value}(" + ", ".join(args) + ")\n" return "" def ast_interpreter(ast_nodes): python_code = "" for node in ast_nodes: if node.type == "Block": python_code += ast_interpreter(node.sub_node) python_code += node_interpreter(node) return python_code # ================================================== # Miscellaneous functions # ================================================== def compylateur(code, file=False): if file: code = open(code + ".txt", 'r').read() l_token = lexer(code) print("--- Tokens ---") l_token.generate() ast = parser(l_token) print("\n\n--- AST ---") AST_gen(ast.sub_node) python_code = ast_interpreter(ast.sub_node) print("\n\n--- code entré") print(code) print("--- code python") print(python_code) txt = """demander la valeur de var var prend la valeur int(var, 10) afficher "var : {var}" si var égale 2 ou var est inférieur ou égal à 10 alors var prend la valeur var + 1 afficher var / 2 fin si pour i allant de 0 à 5, afficher i fin pour tant que i est supérieur à 0, afficher i i prend la valeur i - 1 fin tant que afficher "fin du programme." """ compylateur(txt) # compylateur("test", True)