Merge pull request #16 from ACEsuit/jh/clean_purify

WIP: purification clean up and performance fix
ACEsuit · Jul 31, 2024 · 415e838 · 415e838
2 parents bb62760 + 5986913
commit 415e838
Show file tree

Hide file tree

Showing 3 changed files with 95 additions and 113 deletions.
diff --git a/src/defaults.jl b/src/defaults.jl
@@ -343,6 +343,11 @@ function mb_ace_basis(kwargs)
                                maxdeg=maxdeg,
                                N = cor_order, )
       _rem = kwargs[:delete2b] ? 1 : 0
+      # remove all zero-basis functions that we might have accidentally created so that we purify less extra basis
+      dirtybasis = ACE1.RPI.remove_zeros(dirtybasis)
+      # and finally cleanup the rest of the basis 
+      dirtybasis = ACE1._cleanup(dirtybasis)
+      # finally purify
       rpibasis = ACE1x.Purify.pureRPIBasis(dirtybasis; remove = _rem)
    else
       rpibasis = ACE1.ace_basis(species = AtomicNumber.(elements),

diff --git a/src/pure/Purify.jl b/src/pure/Purify.jl
@@ -5,7 +5,7 @@ using ACE1x
 
 using ACE1x: getspeclenlist, spec2col, insertspect!
 
-using SparseArrays: sparse, spzeros, SparseVector
+using SparseArrays: sparse, spzeros, SparseVector, dropzeros
 using RepLieGroups.O3: ClebschGordan
 
 const NLMZ = ACE1.RPI.PSH1pBasisFcn
@@ -15,13 +15,20 @@ function pureRPIBasis(basis::ACE1.RPIBasis; remove = 0)
    corord = maximum(length.(get_nl(basis)))
    @assert corord >= 2
 
+   # clean zeros for sanity and prevent useless basis being purified
+   basis = ACE1.RPI.remove_zeros(basis)
+   basis = ACE1._cleanup(basis)
+
    pin, pcut = basis.pibasis.basis1p.J.J.pl, basis.pibasis.basis1p.J.J.pr
    ninc = (pin + pcut) * (corord - 1)
    zList = basis.pibasis.zlist.list
    species = sort([list.z for list in zList])
 
    # get the original C_sym for each atom
    C_sym_list = deepcopy(basis.A2Bmaps)
+   # remove zeros that generated for some reason...
+   C_sym_list = ntuple(i -> dropzeros(C_sym_list[i]), length(C_sym_list))
+
 
    # setting correpsonding basis to 0 if we want to remove some basis
    for C_sym in C_sym_list
@@ -44,7 +51,9 @@ function pureRPIBasis(basis::ACE1.RPIBasis; remove = 0)
    end
    maxn = maximum(maxn_list)
    maxl = maximum(maxl_list)
-   Ydim = length(basis.pibasis.basis1p.SH)
+   Ydim = length(ACE1.SphericalHarmonics.SHBasis(maxl))
+
+
    # === get extended b1p, spec1p, Rn_coef and purification operator that map extended impure basis to pure basis
    # get extended polynomial basis
    Rn = basis.pibasis.basis1p.J
@@ -75,34 +84,27 @@ function pureRPIBasis(basis::ACE1.RPIBasis; remove = 0)
 
    # A2Bmaps <- C_sym * P * C_pure are constructed individually for each atom
    spec_core_list = ACE1x.getACEcoreSpec_1px_multi(basis, spec1p_x_core, zList)
-
-   # Sanity check
-   for t in eachindex(zList)
-       @assert length(spec_core_list[t]) == length(ACE1.get_basis_spec(basis.pibasis, t))
-   end
 
    # now for each of the species, we get the transformation, and also the extended basis
    newA2Bmap_list = []
    spec_x_list = []
+   cg = ClebschGordan()
    for i in eachindex(zList)
       # get spec and spec_core respectively, they should be equivalent
       spec = ACE1.get_basis_spec(basis.pibasis, i)
       spec_core = spec_core_list[i]
 
       spec3b_core = spec_core[length.(spec_core) .== 2]
       Cnn_all = Dict{Vector{Int64}, SparseVector{Float64, Int64}}()
-      getCnn_all_multi!(Cnn_all, Rn_coef, spec1p_x_core, spec3b_core)
-      C_pure, spec_x_core, pure_spec = generalImpure2PureMap3D_env_test_multi(Cnn_all, Rn_coef, spec_core, spec1p_x_core, corord)
+      updateCnn_all!(Cnn_all, Rn_coef, spec1p_x_core, spec3b_core, cg)
+      C_pure, spec_x_core, pure_spec = getPurifyOpCCS(Cnn_all, Rn_coef, spec_core, spec1p_x_core, corord; cg = cg)
       # get spec_x in terms of ACE1 spec
       spec_x = ACE1x.getACE1Spec_multi(spec_x_core, spec1p_x_core, species[i])
       # a spec_x_list, each elements inside a tuple correpsonding to a species
       push!(spec_x_list, spec_x)
 
       # get the projection map
       inv_spec_x = Dict([ key => idx for (idx, key) in enumerate(spec_x) ]...)
-      # examples for using the inv_spec_x just for my own note...
-      # eg. inv_spec_x[spec_x[23]]
-      #     inv_spec_x[spec[20]]
 
       P = spzeros(length(spec), length(spec_x))
       for i in eachindex(spec)
@@ -113,33 +115,7 @@ function pureRPIBasis(basis::ACE1.RPIBasis; remove = 0)
       # newA2Bmap = P * C_pure
       push!(newA2Bmap_list, newA2Bmap)
    end
-
-
-   # TODO: remove this comment block after making sure everything works fine
-   # === 
-   # construct new b1p_x from spec_x_list to create a minimal amount of spec1p, this part should be independent of the transformation 
-   # since the transformation is only applied on the AA matrix
-
-   # try unsorted first, if needed sort later
-   # thin_spec1p_x = Vector{ACE1.RPI.PSH1pBasisFcn}()
-
-   # for each atom, we extract the 1p basis
-   # for _spec in spec_x_list
-   #    for bb in _spec 
-   #       for b in bb.oneps
-   #          new_b = NLMZ(b.n, b.l, b.m, 0)
-   #          push!(thin_spec1p_x, new_b)
-   #       end
-   #    end
-   # end
-   # sort!(thin_spec1p_x, lt = (x, y) -> (x.z < y.z) || (x.z == y.z && x.n < y.n) || (x.z == y.z && x.n == y.n && x.l < y.l) || (x.z == y.z && x.n == y.n && x.l == y.l && x.m < y.m))
-   # unique!(thin_spec1p_x)
-
-   # create new b1p_x basis
-   # new_b1p_x = BasicPSH1pBasis(Rn_x, basis.pibasis.basis1p.zlist, spec1p_x)
-
-   # === end of comment block
-
+
    # get pibasis from spec
    pibasis_x = ACE1.pibasis_from_specs(b1p_x, Tuple(spec_x_list))
 
@@ -150,15 +126,13 @@ function pureRPIBasis(basis::ACE1.RPIBasis; remove = 0)
    pure_rpibasis = ACE1.RPI.remove_zeros(pure_rpibasis)
 
    # and finally clean up
-   pure_rpibasis = ACE1._cleanup(pure_rpibasis)
+   pure_rpibasis = ACE1._cleanup(pure_rpibasis; tol = 1e-12)
 
    return pure_rpibasis
 end
 
 
 """
-Impure2Pure3D with envelope, currently works for any body order
-
 param: Cnn_all :: Dict{Vector{Int}, SparseVector{Float64, Int64}}(), coefficient of order ≤ 2 basis in fitting
 param: Pnn_all :: Dict{Vector{Int}, SparseVector{Float64, Int64}}(), coefficient of radial basis for expanding with CG coeffs
 param: spec_core :: Vector{Vector{Int}}}, specification of ACE basis in ACEcore style
@@ -168,69 +142,67 @@ param: Remove :: Integer, all basis of order ≤ Remove will be purified
 Return: order_C :: Matrix, a transformation matrix for the purification
 Return: spec_x_order :: Vector{Vector{Int}}, extended specification in ACEcore style
 Return: pure_spec :: Vector{Vector{Int}}, pure basis that has to be evaluated
-
 """
-function generalImpure2PureMap3D_env_test_multi(Cnn_all::Dict, Pnn_all::Dict, spec_core::Vector{Vector{Int}}, spec1p::Vector{Tuple{Int16, Int, Int}}, Remove::Integer)
+function getPurifyOpCCS(Cnn_all::Dict, Pnn_all::Dict, spec_core::Vector{Vector{Int}}, spec1p::Vector{Tuple{Int16, Int, Int}}, Remove::Integer; cg = ClebschGordan())
 
    old_spec = deepcopy(spec_core)
    spec = deepcopy(spec_core)
    spec_len_list = getspeclenlist(old_spec)
-
-   # println("original number of basis: ", spec_len_list)
-
    # for each order
    for Remove_ν = 3:Remove
-
-      for Recursive_ν in reverse([i for i = 3:Remove_ν])
-      # we first have to remove the current order
+      for Recursive_ν in reverse(3:Remove_ν)
+        # we first have to remove the current order
          i = sum(spec_len_list[1:Recursive_ν-1]) + 1
          while (i <= sum(spec_len_list[1:Recursive_ν]))
-            # adjusting coefficent for the term \matcal{A}_{k1...kN} * A_{N+1}
+            # adjusting coefficent for the term \matcal{A}_{k1...kN} * A_{k_{N+1}}
             # first we get the coefficient corresponding to purified basis of order ν - 1
-
-            if spec2col(spec[i][1:end - 1], spec) == nothing
-               insertspect!(spec, spec[i][1:end - 1])
+            _target = spec[i][1:end - 1]
+            if spec2col(_target, spec) == -1
+               insertspect!(spec, _target)
                i += 1
                # update info
-               spec_len_list = getspeclenlist(spec)
-               spec3b = spec[length.(spec) .== 2]
-               getCnn_all_multi!(Cnn_all, Pnn_all, spec1p, spec3b)
+               spec_len_list[length(_target)] += 1
+               if length(_target) >= 2
+                  spec3b = spec[length.(spec) .== 2]
+                  updateCnn_all!(Cnn_all, Pnn_all, spec1p, spec3b, cg)
+               end
             end
 
-            # adjusting coefficent for terms Σ^{ν -1}_{β = 1} P^{κ} \mathcal{A}
-            # update Cnn_all if that are not in dict
+            # adjusting coefficent for terms Σ^{ν - 1}_{β = 1} P^{κ} \mathcal{A}
+            # update Cnn_all if they are not in dict, but we don't need to purify that since we only need the coefficients and its nnz
+            # to add basis in the next step
             for j = 1:Recursive_ν - 1
-               if spec2col([spec[i][j], spec[i][Recursive_ν]], spec) == nothing
-                  insertspect!(spec, [spec[i][j], spec[i][Recursive_ν]])
-                  i += 1
-                  # update info
-                  spec_len_list = getspeclenlist(spec)
+               _target = Int64[spec[i][j], spec[i][Recursive_ν]]
+               if spec2col(_target, spec) == -1
                   spec3b = spec[length.(spec) .== 2]
-                  getCnn_all_multi!(Cnn_all, Pnn_all, spec1p, spec3b)
+                  updateCnn_all!(Cnn_all, Pnn_all, spec1p, vcat(spec3b, [_target]), cg)
                end
             end
-
+            # now this is the actual pure basis that we need to expand 
             P_κ_list  = [Cnn_all[[spec[i][j], spec[i][Recursive_ν]]] for j = 1:Recursive_ν - 1]
             for (idx, P_κ) in enumerate(P_κ_list)
                for k = 1:length(P_κ.nzind) # for each kappa, P_κ.nzind[k] == κ in the sum
                   # first we get the coefficient corresponding to the \mathcal{A}
-                  pureA_spec = [spec[i][r] for r = 1:Recursive_ν-1 if r != idx] # r!=idx since κ runs through the 'idx' the coordinate
+                  pureA_spec = Int64[spec[i][r] for r = 1:Recursive_ν-1 if r != idx] # r!=idx since κ runs through the 'idx' the coordinate
                   push!(pureA_spec, P_κ.nzind[k]) # add κ into the sum
-                  if spec2col(sort(pureA_spec), spec) == nothing
+                  sort!(pureA_spec)
+                  if spec2col(pureA_spec, spec) == -1
                      # add an extra basis to evalute, that also require purification
-                     insertspect!(spec, sort(pureA_spec))
+                     insertspect!(spec, pureA_spec)
                      i += 1
                      # update info
-                     spec_len_list = getspeclenlist(spec)
-                     spec3b = spec[length.(spec) .== 2]
-                     getCnn_all_multi!(Cnn_all, Pnn_all, spec1p, spec3b)
+                     spec_len_list[length(pureA_spec)] += 1
+                     if length(pureA_spec) == 2
+                        spec3b = spec[length.(spec) .== 2]
+                        updateCnn_all!(Cnn_all, Pnn_all, spec1p, spec3b, cg)
+                     end
                   end
                end
             end
-            # update info
+            # update info, simply for sanity and this is not necessary
             spec_len_list = getspeclenlist(spec)
             spec3b = spec[length.(spec) .== 2]
-            getCnn_all_multi!(Cnn_all, Pnn_all, spec1p, spec3b)
+            updateCnn_all!(Cnn_all, Pnn_all, spec1p, spec3b, cg)
             i += 1
          end
       end
@@ -240,15 +212,13 @@ function generalImpure2PureMap3D_env_test_multi(Cnn_all::Dict, Pnn_all::Dict, sp
    pure_spec = deepcopy(spec)
 
    S = length(spec)
-   C = spzeros(5 * S, 5 * S) # transformation matrix, init large enough for additional basis evaluation
-
-
+   hmap = Dict{Int, Vector{Tuple{Int, Float64}}}(i => [] for i = 1:S)
    # from here no more extra pure basis should be inserted into the spec
    # println("Number of basis needed to be purified: ", spec_len_list)
 
    # corresponding to 2 and 3 body basis
    for i = 1:sum(spec_len_list[1:2])
-      C[i, i] = 1.0
+      push!(hmap[i], (i, 1.0))
    end
 
    # Base case, adjust coefficient for 3 body (ν = 2)
@@ -259,7 +229,7 @@ function generalImpure2PureMap3D_env_test_multi(Cnn_all::Dict, Pnn_all::Dict, sp
             # add an extra basis to evalute, but it does not require purification
             push!(spec, [pnn.nzind[k]])
          end
-         C[i, spec2col([pnn.nzind[k]], spec)] -= pnn.nzval[k]
+         push!(hmap[i], (spec2col([pnn.nzind[k]], spec), -pnn.nzval[k]))
       end
    end
 
@@ -270,17 +240,20 @@ function generalImpure2PureMap3D_env_test_multi(Cnn_all::Dict, Pnn_all::Dict, sp
          for i = sum(spec_len_list[1:ν-1]) + 1:sum(spec_len_list[1:ν])
             # adjusting coefficent for the term \matcal{A}_{k1...kN} * A_{N+1}
             # first we get the coefficient corresponding to purified basis of order ν - 1
-            last_ip2pmap = C[spec2col(spec[i][1:end - 1], spec), :]
+            _target = spec2col(spec[i][1:end - 1], spec)
+            last_ip2pmap = hmap[_target]
 
-            for k = 1:length(last_ip2pmap.nzind) # for each of the coefficient in last_ip2pmap
+            for k in eachindex(last_ip2pmap) # for each of the coefficient in last_ip2pmap, might be repeated
                # first we get the corresponing specification correpsonding to (spec of last_ip2pmap[i], spec[end])
-               target_spec = [t for t in spec[last_ip2pmap.nzind[k]]]
+               # target_spec = [t for t in spec[last_ip2pmap[1][k]]]
+               target_spec = [t for t in spec[last_ip2pmap[k][1]]]
                push!(target_spec, spec[i][end])
-               if !(sort(target_spec) in spec)
+               sort!(target_spec)
+               if !(target_spec in spec)
                   # add an extra basis to evalute, but it does not require purification
-                  push!(spec, sort(target_spec))
+                  push!(spec, target_spec)
                end
-               C[i, spec2col(sort(target_spec), spec)] += last_ip2pmap.nzval[k]
+               push!(hmap[i], (spec2col(target_spec, spec), last_ip2pmap[k][2]))
             end
 
             # adjusting coefficent for terms Σ^{ν -1}_{β = 1} P^{κ} \mathcal{A}
@@ -290,9 +263,11 @@ function generalImpure2PureMap3D_env_test_multi(Cnn_all::Dict, Pnn_all::Dict, sp
                   # first we get the coefficient corresponding to the \mathcal{A}
                   pureA_spec = [spec[i][r] for r = 1:ν-1 if r != idx] # r!=idx since κ runs through the 'idx' the coordinate
                   push!(pureA_spec, P_κ.nzind[k]) # add κ into the sum
-                  last_ip2pmap = C[spec2col(sort(pureA_spec), spec), :]
-                  for m = 1:length(last_ip2pmap.nzind)
-                     C[i, last_ip2pmap.nzind[m]] -= P_κ.nzval[k] .* last_ip2pmap.nzval[m]
+                  sort!(pureA_spec)
+                  _target = spec2col(pureA_spec, spec)
+                  last_ip2pmap = hmap[_target]
+                  for m in eachindex(last_ip2pmap)
+                     push!(hmap[i], (last_ip2pmap[m][1], -P_κ.nzval[k] * last_ip2pmap[m][2]))
                   end
                end
             end
@@ -301,38 +276,42 @@ function generalImpure2PureMap3D_env_test_multi(Cnn_all::Dict, Pnn_all::Dict, sp
       end
    end
 
-   # assert that it does not go over the bound of initialized C
-   @assert 5 * length(old_spec) > length(spec)
-
-   # create an ordered spec
    spec_x_order = deepcopy(pure_spec)
    for i = length(pure_spec) + 1 : length(spec)
       insertspect!(spec_x_order, spec[i])
    end
 
-   # init a matrix C for storing coefficients in according to spec in order
-   order_C = spzeros(length(spec_x_order), length(spec_x_order))
-   inv_spec_x = Dict([ key => idx for (idx, key) in enumerate(spec_x_order) ]...)
-   # for each basis
+   inv_spec_x_ordered = Dict{Vector{Int64}, Int64}([ key => idx for (idx, key) in enumerate(spec_x_order) ]...)
+   inv_spec = Dict{Vector{Int64}, Int64}([ key => idx for (idx, key) in enumerate(spec) ]...)
+
+   newhmap = Dict{Int, Vector{Tuple{Int, Float64}}}()
+
    for i in eachindex(spec_x_order)
-      # if they are basis that has to be purified
       if spec_x_order[i] in pure_spec
-         # get the correpsonding row of the target spec in original matrix
-         old_ip2pmap = C[spec2col(spec_x_order[i], spec), :]
-         @simd ivdep for j = 1:length(old_ip2pmap.nzind)
-            # order_C[i, spec2col(spec[old_ip2pmap.nzind[j]], spec_x_order)] = old_ip2pmap.nzval[j]
-            order_C[i, inv_spec_x[spec[old_ip2pmap.nzind[j]]]] = old_ip2pmap.nzval[j]
-         end
-      else # if not, only set the diagonal element to be 1
-         order_C[i, i] = 1
+         prev_row = inv_spec[spec_x_order[i]]
+         newhmap[i] = [ (inv_spec_x_ordered[spec[j]], val) for (j, val) in hmap[prev_row]]
+      else
+         newhmap[i] = [(i, 1.0)]
       end
    end
 
-
-   return order_C, spec_x_order, pure_spec
+   Irow, Jcol, vals = Int[], Int[], Float64[]
+   for i in keys(newhmap)
+      for j in newhmap[i]
+         # push!.( (Irow, Jcol, vals), (i, j[1], j[2]) )
+         push!(Irow, i)
+         push!(Jcol, j[1])
+         push!(vals, j[2])
+      end
+   end
+   Nk = length(spec)
+   C = sparse(Irow, Jcol, vals, Nk, Nk)
+   return C, spec_x_order, pure_spec
 end
 
 
+
+
 """
 Add elements in Cnn_all if needed, which a Dict containing the coefficients of product of spec3b. This function modifies Cnn_all directly.
 
@@ -342,9 +321,7 @@ param: spec1p :: Vector{Tuple{Int}}}, specification of 1 particle basis
 param: spec3b :: Vector{Tuple{Int}}}, specification of 3 body basis
 
 """
-function getCnn_all_multi!(Cnn_all::Dict, Pnn_all::Dict, spec1p, spec3b)
-   # Cnn_all = Dict{Vector{Int64}, SparseVector{Float64, Int64}}()
-   cg = ClebschGordan()
+function updateCnn_all!(Cnn_all::Dict, Pnn_all::Dict, spec1p, spec3b, cg)
    for nlm in spec3b
       if !(nlm in keys(Cnn_all))
          niceidx1, niceidx2 = spec1p[nlm[1]], spec1p[nlm[2]]