From 4de337e49ab2b1a11922be3ab8b1ca9c73bf1e91 Mon Sep 17 00:00:00 2001
From: XbzOnGit <gitpku163@163.com>
Date: Thu, 7 Mar 2024 23:10:33 +0800
Subject: [PATCH] Add minimization of DFA into NFA2DFA

---
 NFA2DFA/NFA2DFA.py |   3 +-
 NFA2DFA/mindfa.py  | 162 +++++++++++++++++++++++++++++++++++++++++++++
 README.md          |   1 +
 3 files changed, 165 insertions(+), 1 deletion(-)
 create mode 100644 NFA2DFA/mindfa.py

diff --git a/NFA2DFA/NFA2DFA.py b/NFA2DFA/NFA2DFA.py
index ec98727..d28b6d5 100644
--- a/NFA2DFA/NFA2DFA.py
+++ b/NFA2DFA/NFA2DFA.py
@@ -1,6 +1,6 @@
 import json
 import sys
-
+from mindfa import minimize_dfa
 
 
 untaged_dstates = [] # A stack is enough.
@@ -111,4 +111,5 @@ def nfa2dfa(nfa_dict:dict, st_qname:str, ed_qnames:list)->dict:
     st_qname = nfa_dict['q_start']
     ed_qnames = nfa_dict['accepted_q']
     dfa_dict = nfa2dfa(graph_nfa, st_qname, ed_qnames)
+    dfa_dict = minimize_dfa(dfa_dict['graph'],dfa_dict['q_start'],dfa_dict['accepted_q'])
     print(dfa_dict)
\ No newline at end of file
diff --git a/NFA2DFA/mindfa.py b/NFA2DFA/mindfa.py
new file mode 100644
index 0000000..373b8c2
--- /dev/null
+++ b/NFA2DFA/mindfa.py
@@ -0,0 +1,162 @@
+from typing import FrozenSet, Dict, Set
+# partition is a set of names.
+# And another dict to record which name is in which set.
+# (two-way mapping)
+        
+# Need to prove we can partition in any order.
+# The binary-relation of undistinguishable is eq relation.
+# reflecxivity and symmetry are obvious.
+# Transitivity is also true, cos it requires any string.
+# So they can be divided into eq-classes. 
+
+# f is a set of functions, fc(S) = T, means S(c) --> T.
+# Every symbol has a function fc.
+# Any fc is a homomorphism.
+        
+# When breaking the loop, it can never proceed after that.
+# If cannot partition in one round in one step of any symbol, then every partition is an eq class, 
+# cos if they are not, there exsists some string forcing some set to be partionable in one step.
+
+
+# It starts from a situation that it is possible for one partition to be not in the same eq class, 
+# but different partitions must be in different eq classes.
+# Now prove: 1. correctness: after merging by partition, every partition is undestinguishable.
+# If not, there exsists a string, then eat one by one, until two stages differ.
+# Then go backward for one, that partition can be partitioned again in one step.
+# 1 shows that we get the eq-classes.
+# 2. min: Cannot be a smaller DFA.
+# If smaller, must have a larger set which is a mixture of not only one eq-class.
+def minimize_dfa(dfa_dict:dict,st_name:str,ed_names:list)->dict:
+    # First, check if all acc or no acc.
+    # Should have no duplication in ed_names.
+    F = frozenset(ed_names)
+    if len(F) != len(ed_names):
+        raise "The accepting states in minimize_dfa have duplications"
+    all_symbols = dfa_dict[st_name].keys()
+    if len(ed_names) == len(dfa_dict.keys()):
+        return {"q_start":"q0",
+                "graph":{"q0":{sym : "q0" for sym in all_symbols},
+                         "accepted_q":["q0"]}}
+    elif len(ed_names) == 0:
+        return {"q_start":"q0",
+                "graph":{"q0":{sym : "q0" for sym in all_symbols},
+                         "accepted_q":[]}}
+    else:
+        # Have acc && noacc.
+        # Top-down. Parition refinement.
+        # Note that we do not remove non-distinguishable states here.
+        # When generating from NFA, every state is reachable.
+        # dead states are not removed, cos I want a complete DFA.
+        class Linked_Node:
+            # User class hashable by default.
+            # Key is derived from id().
+            # Only eq to itself.
+            def __init__(self,part:FrozenSet[str]):
+                self.partition : FrozenSet[str] = part
+                self.prev = None
+                self.next = None
+                self.qname = ""
+            
+        class Partion_list:
+            def __init__(self):
+                self.head = None
+                self.size = 0
+            def insert_front(self, part_node: Linked_Node):
+                part_node.next = self.head
+                part_node.prev = None
+                if self.head is not None:
+                    self.head.prev = part_node
+                self.head = part_node
+                self.size += 1
+            def remove(self, part_node:Linked_Node):
+                if part_node.prev is not None:
+                    part_node.prev.next = part_node.next
+                else:
+                    # Is head.
+                    self.head = part_node.next
+                if part_node.next is not None:
+                    part_node.next.prev = part_node.prev
+                self.size -= 1
+
+        
+        
+        plist = Partion_list()
+        par_dist : Dict[str,Linked_Node] = {}
+        Q_minus_F = frozenset(set(dfa_dict.keys()) - F)
+        nodeone = Linked_Node(F)
+        nodetwo = Linked_Node(Q_minus_F)
+        plist.insert_front(nodeone)
+        plist.insert_front(nodetwo)
+        for name in dfa_dict.keys():
+            if name in F:
+                par_dist[name] = nodeone
+            else:
+                par_dist[name] = nodetwo
+        while True:
+            prevsize = plist.size
+            pt = plist.head
+            have_parted_more = False
+            while pt is not None:
+                for c in all_symbols:
+                    group_by_despart : Dict[FrozenSet[str], Set[str]] = {}
+                    for name in pt.partition:
+                        des = dfa_dict[name][c]
+                        despart = par_dist[des]
+                        if despart in group_by_despart:
+                            group_by_despart[despart].add(name)
+                        else:
+                            group_by_despart[despart] = {name}
+                    if len(group_by_despart) > 1:
+                        # Partition according to this grouping.
+                        # Remove from list first.
+                        plist.remove(pt)
+                        # Form several more partitions and insert.
+                        for newset in group_by_despart.values():
+                            fs = frozenset(newset)
+                            fn = Linked_Node(fs)
+                            plist.insert_front(fn)
+                            for changed_name in fs:
+                                par_dist[changed_name] = fn
+                        # Restart the list loop.
+                        have_parted_more = True
+                        break
+                if have_parted_more:
+                    break
+                pt = pt.next
+            nowsize = plist.size
+            if nowsize == prevsize:
+                break
+        redict = {"q_start":"","graph":{},"accepted_q":[]}
+        node_id = 0
+        pt = plist.head
+        while pt is not None:
+            if len(pt.qname) == 0:
+                pt.qname = f"q{node_id}"
+                node_id += 1
+            if st_name in pt.partition:
+                redict["q_start"] = pt.qname
+            one_name = next(iter(pt.partition))
+            # If one in F, then every one must be in F.
+            if one_name in F:
+                redict["accepted_q"].append(pt.qname)
+            if pt.qname not in redict["graph"]:
+                    redict["graph"][pt.qname] = {}
+            for c in all_symbols:
+                des = dfa_dict[one_name][c]
+                par_node = par_dist[des]
+                if len(par_node.qname) == 0:
+                    par_node.qname = f"q{node_id}"
+                    node_id += 1
+                redict["graph"][pt.qname][c] = par_node.qname
+            pt = pt.next
+        return redict
+
+       
+
+       
+        
+
+if __name__ == '__main__':
+    print(minimize_dfa({"q0":{"0":"q3","1":"q1"},"q1":{"0":"q2","1":"q5"},
+                        "q2":{"0":"q2","1":"q5"},"q3":{"0":"q0","1":"q4"},
+                        "q4":{"0":"q2","1":"q5"},"q5":{"0":"q5","1":"q5"}},"q0",["q1","q2","q4"]))
\ No newline at end of file
diff --git a/README.md b/README.md
index 36578b5..d2db42b 100644
--- a/README.md
+++ b/README.md
@@ -7,6 +7,7 @@ Another issue is that it does not match across lines in grep cos dot does not ma
 ## NFA2DFA
 Convert NFA into DFA.  
 Test case is from Sipser, check README.md in NFA2DFA folder for detail.  
+Also add minimization of DFA into NFA2DFA, check mindfa.py for detail.  
 ## NFASimu  
 NFA simulation on the fly.  
 ## Regex2NFA