diff --git a/modules/Lexical.py b/modules/Lexical.py index 6564e58..1d04e43 100644 --- a/modules/Lexical.py +++ b/modules/Lexical.py @@ -2,7 +2,7 @@ from modules.color_print import print_blue, print_green, print_purple, print_red, print_yellow, print_dark_cyan from modules.color_print import ANSI_RED, ANSI_RESET from modules.RegExp import RegExp, postfix_me -from modules.Node_AST import build_AST_tree, eval_followpos, get_node_dict, pre_followpos +from modules.Node_AST import Operator, build_AST_tree, eval_followpos, get_node_dict, pre_followpos from modules.State import DFA, build_DFA from modules.lexical_aux import build_my_tree, build_ouput_file, dfa_mine, eval_tree, get_current_directory from modules.lexical_aux import get_start_accept, get_table_dict, get_tokens_sole, list_to_str, print_dfa_trans, reverse_dict, write_file @@ -10,99 +10,85 @@ class Lexical: - def __init__(self, operators={'(', ')', 'STAR', 'OR', 'PLUS','CONCAT'}): - #self.lex_scan = scan - #self.lex_path = lex_path - #self.program_path = program_path + def __init__(self, operators={'(', ')', 'STAR', 'OR', 'PLUS', 'CONCAT'}): self.operators = operators - def run_scan(self): - ## init scanner + # init scanner lex_path = self.lex_path program_path = self.program_path lex_scan = Scanner(lex_path) lex_scan.analaze_lex() - ## list the RD dict + # list the RD dict RD_list = lex_scan.get_RD_list() - ## and OR in between - RD_list = intersperse(RD_list,["OR"]) - + # and OR in between + RD_list = intersperse(RD_list, ["OR"]) + flat_list = flatten_list(RD_list) self.flat_list = flat_list - ## or keywords - kw_exp = intersperse(lex_scan.keywords,"OR") + # or keywords + kw_exp = intersperse(lex_scan.keywords, "OR") - ## or punctuations - pn_exp = intersperse(lex_scan.punctuations,"OR") + # or punctuations + pn_exp = intersperse(lex_scan.punctuations, "OR") - ## ored key-pn: + # ored key-pn: kw_pn = lex_scan.keywords.union(lex_scan.punctuations) - #print_blue(kw_pn) - kw_pn = intersperse(kw_pn,"OR") - #print_dark_cyan(kw_pn) + kw_pn = intersperse(kw_pn, "OR") ####################### - ## get_new_kn_pn + # get_new_kn_pn ####################### - new_kw_pn = list(chain.from_iterable(["lbracket"] if item == '(' else [item] for item in kw_pn)) - new_kw_pn = list(chain.from_iterable(["rbracket"] if item == ')' else [item] for item in new_kw_pn)) + new_kw_pn = list(chain.from_iterable( + ["lbracket"] if item == '(' else [item] for item in kw_pn)) + new_kw_pn = list(chain.from_iterable(["rbracket"] if item == ')' else [ + item] for item in new_kw_pn)) new_op = {"lbracket", "rbracket", "OR"} - new_kw_pn = list(chain.from_iterable(list(item) if item not in new_op else [item] for item in new_kw_pn)) + new_kw_pn = list(chain.from_iterable( + list(item) if item not in new_op else [item] for item in new_kw_pn)) kp_r = RegExp(new_kw_pn, {"OR"}) kp_r.handle_exp() - # postfix keyword-punctuations to add + # postfix keyword-punctuations to add kp_post = kp_r.get_postfix() - ## now replace lbracket and rbracket with ( and ) - kp_post = list(chain.from_iterable(['('] if item == 'lbracket' else [item] for item in kp_post)) - kp_post = list(chain.from_iterable([')'] if item == 'rbracket' else [item] for item in kp_post)) - - self.kp_post = kp_post - + # now replace lbracket and rbracket with ( and ) + kp_post = list(chain.from_iterable( + ['('] if item == 'lbracket' else [item] for item in kp_post)) + kp_post = list(chain.from_iterable([')'] if item == 'rbracket' else [ + item] for item in kp_post)) - ## get postfix_exp of pn_kw - #pn_kw = lex_scan.postfix_keyword_punc() - + self.kp_post = kp_post - ## read program file + # read program file lex_scan.read_program_file(program_path) - ## expand rd (subs re in rd) + # expand rd (subs re in rd) lex_scan.expand_rd(3) - - self.lex_scan = lex_scan + self.lex_scan = lex_scan def build_my_tree(self): exp = self.flat_list operators = self.operators - - #print_blue(self.flat_list) - # build RE and concats r = RegExp(exp, operators, star="STAR") - mod_list = r.handle_exp() + r.handle_exp() - - ## eval postfix expression for the AST + # eval postfix expression for the AST post = r.get_postfix() - #print_red(post) - - ## I do not add # above to avoid some confusion + # I do not add # above to avoid some confusion post.append("#") - post.append("CONCAT") - #print_yellow(f"postfix exp: {post}") + post.append(Operator.CONCAT) - ## now build AST - tree = build_AST_tree(post,operators) + # now build AST + tree = build_AST_tree(post, operators) self.tree = tree return tree @@ -112,71 +98,63 @@ def expand_my_tree(self): ## tree, REs, pn_kw, operators tree = self.tree REs = self.lex_scan.RE - pn_kw = self.lex_scan.postfix_keyword_punc() operators = self.operators - - ## add the REs !! + + # add the REs !! REs = postfix_me(REs, operators) - #print_red(REs) - + for term, exp in REs.items(): tree.attach_node(term, exp) - ## add keywords and punctuations - #tree.implant_node(tree, pn_kw) + # add keywords and punctuations tree.implant_node(tree, self.kp_post) - ## assign ids + # assign ids tree.assign_id() - def eval_tree(self): - + tree = self.tree - ## get firstpos and lastpos and nullables (+, ? not yet) + # get firstpos and lastpos and nullables (+, ? not yet) pre_followpos(tree) - - ## store in root the ids for leaves + + # store in root the ids for leaves get_node_dict(tree) - ## evaluate followpos for the DFA + # evaluate followpos for the DFA eval_followpos(tree) def dfa_mine(self): tree = self.tree - ## get a dict for id: (name , followpos) + # get a dict for id: (name , followpos) DFA_dict = tree.get_DFA_dict() - #print_green(DFA_dict) + # print_green(DFA_dict) - ## prepare for building the DFA - ## the firstpos of root is the first state in the DFA + # prepare for building the DFA + # the firstpos of root is the first state in the DFA root_s = tree.firstpos self.start_state = root_s - #print_blue(f"first of root:{root_s}") - - ## now, let's build our DFA + + # now, let's build our DFA dfa_table, accept_states = build_DFA(DFA_dict, root_s) self.accept_states = accept_states - #print(f"root_s {root_s}, accept_states {accept_states}") - - ## create your DFA machine + # create your DFA machine machine = DFA(dfa_table, accept_states, frozenset(root_s)) - + self.machine = machine return machine def get_tokens(self): - machine = self.machine input_lists = self.lex_scan.program_list ac_tok = [] for tok in input_lists: machine.accepted_tokens = [] - machine.simulate_dfa_2(tok,[]) + machine.simulate_dfa_2(tok, []) accepted_tokens = machine.accepted_tokens ac_tok = ac_tok + accepted_tokens @@ -191,114 +169,3 @@ def dfa_stuff(self): ac_tok = self.get_tokens() return ac_tok - - - -def main(): - - ## get directory for lexical and program - cd = get_current_directory() - lex_file = 'lexical1.txt' - lex_path = cd + '/' + lex_file - program_path = cd + '/' + 'program1.txt' - - ## build full dfa - lx = Lexical() - lx.lex_path = lex_path - lx.program_path = program_path - lx.run_scan() - - - ac_tok = lx.dfa_stuff() - - for j in ac_tok: - print(''.join(j),end='\t') - - dfa_tab = lx.machine.dfa_table - - print(len(dfa_tab)) - print("*"*20) - - ###################### - ## build symbol table - ###################### - - operators={'(', ')', 'STAR', 'OR', 'PLUS','CONCAT'} - - - exp_rd_rev = reverse_dict(lx.lex_scan.expanded_rd) - exp_rd_rev = lx.lex_scan.expanded_rd - accepted_tokens = ac_tok.copy() - - visited_tokens = set() - detection_table = {} - - - #print_blue(lx.lex_scan.keywords) - for k in accepted_tokens: - k_str = ''.join(k) - if k_str in lx.lex_scan.keywords: - visited_tokens.add(tuple(k)) - detection_table[k_str] = k_str - - for k in accepted_tokens: - k_str = ''.join(k) - if k_str in lx.lex_scan.punctuations: - visited_tokens.add(tuple(k)) - detection_table[k_str] = k_str - - for key, val in exp_rd_rev.items(): - - tree1 = build_my_tree(val,operators.copy()) - tree1.assign_id() - eval_tree(tree1) - m = dfa_mine(tree1) - # tree1.print_tree() - - - acc_tokens = [] - for j in accepted_tokens: - if tuple(j) not in visited_tokens: - c = get_tokens_sole(m, j.copy()) - if c: - visited_tokens.add(tuple(j)) - detection_table[''.join(j)] = key - - - symbol_table = build_ouput_file(accepted_tokens, detection_table) - print("") - - print_blue(list_to_str(accepted_tokens)) - lexeme_path = cd + '/' + 'lexemes.txt' - write_file(lexeme_path, list_to_str(accepted_tokens)) - - - - #print(len(dfa_tab)) - print("*"*20) - - print("*.*. Stream of Tokens .*.*") - print_yellow(symbol_table) - output_path = cd + '/' + 'tokens.txt' - write_file(output_path, symbol_table) - - - table_dict = get_table_dict(frozenset(dfa_tab)) - #print_dark_cyan(table_dict) - - print("\n*.*. Transition Table .*.*") - print_dfa_trans(dfa_tab, table_dict) - start, accept = get_start_accept(frozenset(lx.start_state), lx.accept_states, table_dict) - print_yellow(f"Start State: {start}") - print_yellow(f"Accept States: {accept}\n") - - - - - - - - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/modules/lexical_aux.py b/modules/lexical_aux.py index 10decfc..b96a604 100644 --- a/modules/lexical_aux.py +++ b/modules/lexical_aux.py @@ -1,46 +1,46 @@ import os +import sys from modules.RegExp import RegExp, postfix_me from modules.Node_AST import build_AST_tree, eval_followpos, get_node_dict, pre_followpos from modules.State import DFA, build_DFA from modules.color_print import print_yellow -def get_current_directory(): - #current_path = os.path.dirname(os.path.abspath(__file__)) - path = os.getcwd() +def get_current_directory(): + # TODO: check why do we need this function + # is there a better alternative? + path = os.getcwd() return path - -def write_file(path_file, output_list): + +def write_file(path_file, output_list): os.makedirs(os.path.dirname(path_file), exist_ok=True) with open(path_file, 'w+') as filehandle: for listitem in output_list: filehandle.write('%s\n' % listitem) + def build_my_tree(exp, operators): - + # build RE and concats - #print_yellow(f"operators: {operators}") r = RegExp(exp, operators, star="STAR") mod_list = r.handle_exp() - #print_green(mod_list) - - ## eval postfix expression for the AST + # eval postfix expression for the AST post = r.get_postfix() - - ## I do not add # above to avoid some confusion + # I do not add # above to avoid some confusion post.append("#") post.append("CONCAT") - #print_yellow(f"postfix exp: {post}") - ## now build AST - tree = build_AST_tree(post,operators) + # now build AST + tree = build_AST_tree(post, operators) return tree + def reverse_dict(the_dict): + # TODO: why are we reversing the dict? keys = list(the_dict.keys()) keys.reverse() values = list(the_dict.values()) @@ -49,85 +49,80 @@ def reverse_dict(the_dict): return new_dict + def expand_my_tree(tree, REs, pn_kw, operators): - ## add the REs !! + # add the REs !! REs = postfix_me(REs, operators) - #print_red(REs) - + for term, exp in REs.items(): tree.attach_node(term, exp) - ## add keywords and punctuations + # add keywords and punctuations tree.implant_node(tree, pn_kw) - + tree.assign_id() + def eval_tree(tree): - ## get firstpos and lastpos and nullables + # get firstpos and lastpos and nullables pre_followpos(tree) - - ## store in root the ids for leaves + + # store in root the ids for leaves get_node_dict(tree) - ## evaluate followpos for the DFA + # evaluate followpos for the DFA eval_followpos(tree) -def dfa_mine(tree): - ## get a dict for id: (name , followpos) +def dfa_mine(tree): + # get a dict for id: (name , followpos) DFA_dict = tree.get_DFA_dict() - #print_green(DFA_dict) - ## prepare for building the DFA - ## the firstpos of root is the first state in the DFA + # prepare for building the DFA + # the firstpos of root is the first state in the DFA root_s = tree.firstpos - #print_blue(f"first of root:{root_s}") - - ## now, let's build our DFA + + # now, let's build our DFA dfa_table, accept_states = build_DFA(DFA_dict, root_s) - - ## create your DFA machine + # create your DFA machine machine = DFA(dfa_table, accept_states, frozenset(root_s)) - + return machine + def get_tokens(machine, input_lists): ac_tok = [] for tok in input_lists: machine.accepted_tokens = [] - machine.simulate_dfa_2(tok,[]) + machine.simulate_dfa_2(tok, []) accepted_tokens = machine.accepted_tokens ac_tok = ac_tok + accepted_tokens - + return ac_tok + def build_ouput_file(accepted_tokens, detection_table): symbol_table = [] - for i in accepted_tokens: str_d = detection_table[''.join(i)] symbol_table.append(str_d) return symbol_table + def get_tokens_sole(machine, tok): ac_tok = [] token_temp = tok.copy() machine.accepted_tokens = [] - machine.simulate_dfa_2(tok,[]) + machine.simulate_dfa_2(tok, []) accepted_tokens = machine.accepted_tokens - ac_tok = ac_tok + accepted_tokens + ac_tok += accepted_tokens - #print_red(f"toks are: {token_temp}, {ac_tok}") ac_tok2 = [''.join(x) for x in ac_tok] ac_tok2 = ''.join(ac_tok2) token_temp2 = ''.join(token_temp) - - #print_green(f"ac_tok {ac_tok2}, {token_temp2}") - if ac_tok2 == token_temp2: - return True - return False + return ac_tok2 == token_temp2 def list_to_str(the_list): @@ -142,6 +137,7 @@ def list_to_str(the_list): ## consider adding to Lexical Class ## ###################################### + def get_table_dict(dfa_tab): """ takes dfa table and returns dict of unique names for states @@ -151,40 +147,29 @@ def get_table_dict(dfa_tab): for i in dfa_tab: table_dict[i] = 's' + str(identifier) - identifier +=1 + identifier += 1 return table_dict + def print_dfa_trans(dfa_tab, table_dict): - for k,v in dfa_tab.items(): - #print_yellow(table_dict[k]) + for k, v in dfa_tab.items(): d = table_dict.get(k) - if d: - #print_yellow(f"\n{d}") - pass - else: - #print_yellow('\ns' + str(len(dfa_tab))) + if not d: d = 's' + str(len(dfa_tab)) - - print_yellow(f"\n{d}") + print_yellow(f"\n{d}") for key, value in v.items(): - #print(key, end="->") - #print_blue(table_dict.get(value)) d = table_dict.get(value) - if d: - #print(d + " | ", end ="\t") - pass - else: - #print('s' + str(len(dfa_tab)) + " | " , end ="\t") + if not d: d = 's' + str(len(dfa_tab)) t = (key, d) - - print(t,end = '\t') + print(t, end='\t') print("") + def get_start_accept(start_state, accept_states, table_dict): """ returns start and accept states with unique names """ s = table_dict.get(start_state) @@ -192,7 +177,6 @@ def get_start_accept(start_state, accept_states, table_dict): for i in accept_states: a = table_dict.get(i) - #print_blue(a) if a: accept.add(a) return s, accept @@ -216,7 +200,3 @@ def get_arg(param_index, default=None): print( f"[FATAL] The comand-line argument #[{param_index}] is missing") exit(-1) # Program execution failed. - - - -