From 0d8598b0dead2382d21676a7a231432b69c2f423 Mon Sep 17 00:00:00 2001 From: Hagar Usama Date: Sat, 15 Oct 2022 23:12:13 +0200 Subject: [PATCH] refactor scanner (#9) --- modules/Scanner.py | 150 +++++---------------------------------------- 1 file changed, 17 insertions(+), 133 deletions(-) diff --git a/modules/Scanner.py b/modules/Scanner.py index 8119f02..b106c46 100644 --- a/modules/Scanner.py +++ b/modules/Scanner.py @@ -15,9 +15,6 @@ def __init__(self, lexical_path): self.program_list = [] self.operators = {'(', ')', 'STAR', 'OR', 'PLUS','CONCAT'} - - - def analaze_lex(self): self.get_buffer() self.handle_file() @@ -53,42 +50,31 @@ def get_RE_list(self): return get_value_list(self.RE) def sep_RD(self): + # TODO: reduce complexity for k, v in self.RD.items(): - #print(k,v) a = v for i in v: - if i not in self.RE: if i not in self.operators: if len(i) > 1: - #print_yellow(f"non-RE is: {i}") - # separate i, add it to its list, update - new_i = list(i) a = list(chain.from_iterable(new_i if item == i else [item] for item in a)) - #print(f"a is {a}") self.RD[k] = a - - def expand_rd(self, r): rd_temp = copy(self.RD) - new_rd = {} - + # TODO: reduce complexity + # three nested loops are horrible for i in range(1,r): - for k,v in rd_temp.items(): + for k,_ in rd_temp.items(): for key, value in self.RE.items(): - #print_yellow(f"{i}, {j}") a = rd_temp[k] - #print(a) a = list(chain.from_iterable(value if item == key else [item] for item in a)) rd_temp[k] = a - - #print_purple(rd_temp) + self.expanded_rd = rd_temp - def handle_lexical(self): @@ -116,32 +102,22 @@ def handle_lexical(self): for i in rd: r = handle_rd(i) - #print_blue(f"r= {r}") RDs[r[0]] = r[1] - #print_purple(handle_rd(i)) + for i in re: r = handle_re(i) - #print_green(f"r= {r}") REs[r[0]] = r[1] - #print_purple(handle_re(i)) - - - #print_purple(pn) - #print_blue(kw) - + self.punctuations = pn self.keywords = kw self.RD = RDs self.RE = REs - #return pn, kw, REs, RDs - def postfix_keyword_punc(self): - #kw_pn = self.keywords.union(self.punctuations) kw_pn = self.keywords.union(self.punctuations) - kw_pn = intersperse(list(kw_pn) ,"OR") + # TODO: check the reason for removing & appending kw_pn.remove("OR") kw_pn.append("OR") @@ -187,9 +163,6 @@ def list_rules(self): for key, value in self.RD.items(): self.RD[key] = value.split(" ") - - #return RE, RD - def sort_file(input_list): @@ -201,26 +174,22 @@ def sort_file(input_list): RDs = [] REs = [] - + # TODO: make this more dynamic for i in input_list: if i.strip().startswith("{"): keywords.append(i.strip()) - #print("Keywords") elif i.strip().startswith("["): - #print("Punctuations") punctuations.append(i.strip()) else: x = re.search(r"[a-zA-Z]+[0-9]*:", i.strip()) if x: RDs.append(i.strip()) - #print(f"RD {i.strip()}") else: x = re.search(r"[a-zA-Z]+[0-9]* =", i.strip()) if x: REs.append(i.strip()) - #print(f"RE {i.strip()}") return punctuations, keywords, REs, RDs @@ -229,8 +198,8 @@ def sort_file(input_list): def generate_equivalent_range(str_input): "generate ranges in ReExp" + # TODO: make range detection dynamic if str_input == "a-z": - range_s = 'a' range_e = 'z' y = range_s @@ -248,7 +217,6 @@ def generate_equivalent_range(str_input): y += " OR " + chr(j) elif str_input == "0-9": - range_s = '0' range_e = '9' y = range_s @@ -259,7 +227,7 @@ def generate_equivalent_range(str_input): return y def handle_keyword(input_list): - + # TODO: make it dymaic later input_list = input_list.replace("{",'') input_list = input_list.replace("}",'') input_list = input_list.split(" ") @@ -268,6 +236,7 @@ def handle_keyword(input_list): return set(input_list) def handle_punctuations(input_list): + # TODO: make it dymaic later input_list = input_list.replace("[",'') input_list = input_list.replace("]",'') input_list = input_list.split(" ") @@ -281,13 +250,6 @@ def handle_rd(input_list): input_list = [i.strip() for i in input_list] - #print_red(f"input list RD: {input_list}") - - ## separating - #for i in input_list: - # if i not in RE: - # print(f"non-RE i is: {i}") - return input_list def handle_re(input_list): @@ -329,11 +291,11 @@ def get_value_list(the_dict): (with brackets for merging """ - val = [] - for key, value in the_dict.items(): + values = [] + for _, value in the_dict.items(): value = add_brackets(value) - val.append(value) - return val + values.append(value) + return values def add_brackets(the_list): LBRKT = "(" @@ -346,82 +308,4 @@ def add_brackets(the_list): def flatten_list(the_list): flat_list = [] flat_list = [item for sublist in the_list for item in sublist] - """ - for i in range(len(the_list)): - flat_list.append(the_list[i]) - print_yellow(the_list[i]) - if i != len(the_list) - 1: - #flat_list.append("OR") - pass - """ - return flat_list - -def main(): - lex_scan = Scanner("/home/u/git/last_chance/Lexical/lexical3.txt") - lex_scan.analaze_lex() - - print_red(lex_scan.RE) - - for key, value in lex_scan.RD.items(): - print_yellow(f"{key}=>{value}") - - for key, value in lex_scan.RE.items(): - print_green(f"{key}=>{value}") - - print_blue(lex_scan.punctuations) - print_purple(lex_scan.keywords) - - RD_list = lex_scan.get_RD_list() - RD_list = intersperse(RD_list,["OR"]) - #print(flatten_list(RD_list)) - - # this list contains all RDs ored - flat_list = flatten_list(RD_list) - kw_exp = intersperse(lex_scan.keywords,"OR") - print_purple(kw_exp) - pn_exp = intersperse(lex_scan.punctuations,"OR") - print_blue(pn_exp) - - print_purple(flat_list) - - print_red(lex_scan.postfix_keyword_punc()) - - - lex_scan.read_program_file("/home/u/git/last_chance/Lexical/program3.txt") - print_blue(lex_scan.program_list) - - - lex_scan.expand_rd(3) - print_green(lex_scan.expanded_rd) - print(lex_scan.RD) - - print_blue("sep*"*10) - - operators={'(', ')', 'STAR', 'OR', 'PLUS','CONCAT'} - - """ for k, v in lex_scan.RD.items(): - #print(k,v) - a = v - for i in v: - - if i not in lex_scan.RE: - if i not in operators: - if len(i) > 1: - print_yellow(f"non-RE is: {i}") - # separate i, add it to its list, update - - new_i = list(i) - a = list(chain.from_iterable(new_i if item == i else [item] for item in a)) - #print(f"a is {a}") - lex_scan.RD[k] = a - """ - - - print(lex_scan.RD) - for k,v in lex_scan.RD.items(): - print(k,v) - - - -if __name__ == "__main__": - main() + return flat_list \ No newline at end of file