troyvvgroup · ShaunWeatherly · Jan 17, 2025 · Jan 14, 2025 · Jan 14, 2025 · Jan 14, 2025
diff --git a/setup.py b/setup.py
@@ -22,6 +22,7 @@
         "numpy>=1.22.0",
         "scipy>=1.7.0",
         "pyscf>=2.0.0",
+        "networkx",
         "matplotlib",
         "libdmet @ git+https://github.com/gkclab/libdmet_preview.git",
         "attrs",

diff --git a/src/quemb/molbe/autofrag.py b/src/quemb/molbe/autofrag.py
@@ -1,12 +1,221 @@
-# Author: Oinam Romesh Meitei
-
+# Author: Oinam Romesh Meitei, Shaun Weatherly
 
+import networkx as nx  # type: ignore
+import numpy as np
 from numpy.linalg import norm
+from pyscf import gto
 
 from quemb.molbe.helper import get_core
 from quemb.shared.helper import unused
 
 
+def euclidean_norm(
+    i_coord: float,
+    j_coord: float,
+):
+    return np.linalg.norm(np.asarray(i_coord - j_coord))
+
+
+def remove_nonnunique_frags(
+    fragment_map: dict,
+):
+    for adx, basa in enumerate(fragment_map["fsites"]):
+        for bdx, basb in enumerate(fragment_map["fsites"]):
+            if adx == bdx:
+                pass
+            elif set(basb).issubset(set(basa)):
+                fragment_map["center"][adx] = (
+                    fragment_map["center"][adx] + fragment_map["center"][bdx]
+                )
+                del fragment_map["center"][bdx]
+                del fragment_map["fsites"][bdx]
+                del fragment_map["fs"][bdx]
+
+    return fragment_map
+
+
+def graphgen(
+    mol: gto.Mole,
+    be_type: str = "BE2",
+    frozen_core: bool = True,
+    remove_nonunique_frags: bool = True,
+    frag_prefix: str = "f",
+    connectivity: str = "euclidean",
+    # draw_graph: bool = True,
+):
+    """Generate fragments via adjacency graph.
+
+    Generalizes the BEn fragmentation scheme to arbitrary fragment sizes using a
+    graph theoretic heuristic. In brief: atoms are assigned to nodes in an
+    adjacency graph and edges are weighted by some distance metric. For a given
+    fragment center site, Dijkstra's algorithm is used to find the shortest path
+    from that center to its neighbors. The number of nodes visited on that shortest
+    path determines the degree of separation of the corresponding neighbor. I.e.,
+    all atoms whose shortest paths from the center site visit at most 1 node must
+    be direct neighbors to the center site, which gives BE2-type fragments; all
+    atoms whose shortest paths visit at most 2 nodes must then be second-order
+    neighbors, hence BE3; and so on.
+
+    Parameters
+    ----------
+    mol : pyscf.gto.mole.Mole
+        The molecule object.
+    be_type : str
+        The order of nearest neighbors (with respect to the center atom)
+        included in a fragment. Supports all 'BEn', with 'n' in -
+        [1, 2, 3, 4, 5, 6, 7, 8, 9] having been tested.
+    frozen_core: bool
+        Whether to exclude core AO indices from the fragmentation process.
+        True by default.
+    remove_nonunique_frags: bool
+        Whether to remove fragments which are strict subsets of another
+        fragment in the system. True by default.
+    frag_prefix: str
+        Prefix to be appended to the fragment datanames. Useful for managing
+        fragment scratch directories.
+    connectivity: str
+        Keyword string specifying the distance metric to be used for edge
+        weights in the fragment adjacency graph. Currently supports "euclidean"
+        (which uses the square of the distance between atoms in real
+        space to determine connectivity within a fragment.)
+    """
+    assert mol is not None
+
+    fragment_type_order = int(be_type[-1])
+    natm = mol.natm
+
+    adx_map = {
+        adx: {
+            "bas": bas,
+            "label": mol.atom_symbol(adx),
+            "coord": mol.atom_coord(adx),
+            "shortest_paths": dict(),
+        }
+        for adx, bas in enumerate(mol.aoslice_by_atom())
+    }
+
+    fragment_map = {
+        "fsites": list(tuple()),
+        "fs": list(tuple(tuple())),
+        "edge": list(tuple(tuple())),
+        "center": list(tuple()),
+        "centerf_idx": list(tuple()),
+        "ebe_weights": list(tuple()),
+        "sites": list(),
+        "dnames": list(),
+        "core_offset": int(0),
+        "adjacency_mat": np.zeros((natm, natm), np.float64),
+        "adjacency_graph": nx.Graph(),
+    }
+    fragment_map["adjacency_graph"].add_nodes_from(adx_map)
+
+    for adx, map in adx_map.items():
+        start_ = map["bas"][2]
+        stop_ = map["bas"][3]
+        if frozen_core:
+            _, _, core_list = get_core(mol)
+            start_ -= fragment_map["core_offset"]
+            ncore_ = int(core_list[adx])
+            stop_ -= fragment_map["core_offset"] + ncore_
+            fragment_map["core_offset"] += ncore_
+            fragment_map["sites"].append(tuple([i for i in range(start_, stop_)]))
+        else:
+            fragment_map["sites"].append(tuple([i for i in range(start_, stop_)]))
+
+    if connectivity.lower() in ["euclidean_distance", "euclidean"]:
+        # Begin by constructing the adjacency matrix and adjacency graph
+        # for the system. Each node corresponds to an atom, such that each
+        # pair of nodes can be assigned an edge weighted by the square of
+        # their distance in real space.
+        for adx in range(natm):
+            for bdx in range(adx + 1, natm):
+                dr = (
+                    euclidean_norm(
+                        adx_map[adx]["coord"],
+                        adx_map[bdx]["coord"],
+                    )
+                    ** 2
+                )
+                fragment_map["adjacency_mat"][adx, bdx] = dr
+                fragment_map["adjacency_graph"].add_edge(adx, bdx, weight=dr)
+
+        # For a given center site (adx), find the set of shortest
+        # paths to all other sites. The number of nodes visited
+        # on that path gives the degree of separation of the
+        # sites.
+        for adx, map in adx_map.items():
+            fsites_temp = fragment_map["sites"][adx]
+            fs_temp = []
+            fs_temp.append(fragment_map["sites"][adx])
+            map["shortest_paths"] = dict(
+                nx.single_source_all_shortest_paths(
+                    fragment_map["adjacency_graph"],
+                    source=adx,
+                    weight=lambda a, b, _: (
+                        fragment_map["adjacency_graph"][a][b]["weight"]
+                    ),
+                    method="dijkstra",
+                )
+            )
+
+            # If the degree of separation is smaller than the *n*
+            # in your fragment type, BE*n*, then that site is appended to
+            # the set of fragment sites for adx.
+            for bdx, path in map["shortest_paths"].items():
+                if 0 < (len(path[0]) - 1) < fragment_type_order:
+                    fsites_temp = fsites_temp + fragment_map["sites"][bdx]
+                    fs_temp.append(fragment_map["sites"][bdx])
+
+            fragment_map["fsites"].append(tuple(fsites_temp))
+            fragment_map["fs"].append(tuple(fs_temp))
+            fragment_map["center"].append(tuple(fragment_map["sites"][adx]))
+
+    elif connectivity.lower() in ["resistance_distance", "resistance"]:
+        raise NotImplementedError("Work in progress...")
+
+    elif connectivity.lower() in ["entanglement"]:
+        raise NotImplementedError("Work in progress...")
+
+    else:
+        raise AttributeError(f"Connectivity metric not recognized: '{connectivity}'")
+
+    # Remove all fragments whose AO indices can be identified as subsets of
+    # another fragment's. The center site for the removed frag is then
+    # added to that of the superset. Because doing so will necessarily
+    # change the definition of fragments, we repeat it up to `natm` times
+    # such that all fragments are guaranteed to be distinct sets.
+    if remove_nonunique_frags:
+        for _ in range(0, natm):
+            fragment_map = remove_nonnunique_frags(fragment_map)
+
+    # Define the 'edges' for fragment A as the intersect of its sites
+    # with the set of all center sites outside of A:
+    for adx, fs in enumerate(fragment_map["fs"]):
+        edge: set[tuple] = set()
+        for bdx, center in enumerate(fragment_map["center"]):
+            if adx == bdx:
+                pass
+            else:
+                overlap = set(fs).intersection(set((center,)))
+                if overlap:
+                    edge = edge.union(overlap)
+        fragment_map["edge"].append(tuple(edge))
+
+    # Update relative center site indices (centerf_idx) and weights
+    # for center site contributions to the energy (ebe_weights):
+    for adx, center in enumerate(fragment_map["center"]):
+        centerf_idx = [fragment_map["fsites"][adx].index(cdx) for cdx in center]
+        ebe_weight = [1.0, tuple(centerf_idx)]
+        fragment_map["centerf_idx"].append(tuple(centerf_idx))
+        fragment_map["ebe_weights"].append(tuple(ebe_weight))
+
+    # Finally, set fragment data names for scratch and bookkeeping:
+    for adx, _ in enumerate(fragment_map["fs"]):
+        fragment_map["dnames"].append(str(frag_prefix) + str(adx))
+
+    return fragment_map
+
+
 def autogen(
     mol,
     frozen_core=True,

diff --git a/src/quemb/molbe/fragment.py b/src/quemb/molbe/fragment.py
@@ -1,7 +1,7 @@
 # Author: Oinam Romesh Meitei
 
 
-from quemb.molbe.autofrag import autogen
+from quemb.molbe.autofrag import autogen, graphgen
 from quemb.molbe.helper import get_core
 from quemb.molbe.lchain import chain as _ext_chain
 from quemb.shared.helper import copy_docstring
@@ -82,24 +82,47 @@ def __init__(
 
         # Check for frozen core approximation
         if frozen_core:
-            self.ncore, self.no_core_idx, self.core_list = get_core(mol)
+            self.ncore, self.no_core_idx, self.core_list = get_core(self.mol)
 
         # Check type of fragmentation function
         if frag_type == "hchain_simple":
             # This is an experimental feature.
             self.hchain_simple()
+
         elif frag_type == "chain":
             if mol is None:
                 raise ValueError(
                     "Provide pyscf gto.M object in fragpart() and restart!"
                 )
             self.chain(mol, frozen_core=frozen_core, closed=closed)
+
+        elif frag_type == "graphgen":
+            if self.mol is None:
+                raise ValueError(
+                    "Provide pyscf gto.M object in fragpart() and restart!"
+                )
+            fragment_map = graphgen(
+                mol=self.mol.copy(),
+                be_type=be_type,
+                frozen_core=frozen_core,
+                remove_nonunique_frags=True,
+                frag_prefix="f",
+                connectivity="euclidean",
+            )
+
+            self.fsites = fragment_map["fsites"]
+            self.edge = fragment_map["edge"]
+            self.center = fragment_map["center"]
+            # self.edge_idx = fragment_map["edge"]
+            self.centerf_idx = fragment_map["centerf_idx"]
+            self.ebe_weight = fragment_map["ebe_weights"]
+            self.Nfrag = len(self.fsites)
+
         elif frag_type == "autogen":
             if mol is None:
                 raise ValueError(
                     "Provide pyscf gto.M object in fragpart() and restart!"
                 )
-
             fgs = autogen(
                 mol,
                 be_type=be_type,
@@ -124,6 +147,7 @@ def __init__(
                 self.add_center_atom,
             ) = fgs
             self.Nfrag = len(self.fsites)
+
         else:
             raise ValueError(f"Fragmentation type = {frag_type} not implemented!")
 

diff --git a/src/quemb/molbe/lo.py b/src/quemb/molbe/lo.py
@@ -360,7 +360,7 @@ def localize(
                 W_ = C_ @ W_
 
             self.W = get_loc(
-                self.mol, W_, "PM", pop_method=pop_method, init_guess=init_guess
+                self.mf.mol, W_, "PM", pop_method=pop_method, init_guess=init_guess
             )
 
             if not self.frozen_core:

diff --git a/src/quemb/molbe/solver.py b/src/quemb/molbe/solver.py
@@ -908,10 +908,10 @@ def solve_block2(
     mc.fcisolver.twodot_to_onedot = DMRG_args.twodot_to_onedot
     mc.fcisolver.maxIter = DMRG_args.max_iter
     mc.fcisolver.block_extra_keyword = DMRG_args.block_extra_keyword
-    mc.fcisolver.scratchDirectory = str(frag_scratch)
-    mc.fcisolver.runtimeDir = str(frag_scratch)
+    mc.fcisolver.scratchDirectory = frag_scratch.path
+    mc.fcisolver.runtimeDir = frag_scratch.path
     mc.fcisolver.memory = DMRG_args.max_mem
-    os.chdir(frag_scratch)
+    os.chdir(frag_scratch.path)
 
     mc.kernel(orbs)
     rdm1, rdm2 = dmrgscf.DMRGCI.make_rdm12(