Refactor DigestionAgent to use HashSetPool for indices

Added `using` directive for `MzLibUtil`. Introduced a static readonly `HashSetPool<int>` named `HashSetPool` to manage a pool of hash sets. Updated `DigestionAgent` constructor to initialize `HashSetPool`. Refactored `GetDigestionSiteIndices` to use a hash set from `HashSetPool` for storing indices, ensuring no duplicates. Explicitly added start and end of protein sequence as cleavage sites. Implemented `try-finally` block to return hash set to pool after use. Final list of indices is now sorted before returning.
smith-chem-wisc · Jan 14, 2025 · be82276 · be82276
1 parent 8620b8f
commit be82276
Showing 1 changed file with 35 additions and 22 deletions.
diff --git a/mzLib/Omics/Digestion/DigestionAgent.cs b/mzLib/Omics/Digestion/DigestionAgent.cs
@@ -1,9 +1,12 @@
-using Omics.Modifications;
+using MzLibUtil;
+using Omics.Modifications;
 
 namespace Omics.Digestion
 {
     public abstract class DigestionAgent
     {
+        protected static readonly HashSetPool<int> HashSetPool = new HashSetPool<int>(8);
+
         protected DigestionAgent(string name, CleavageSpecificity cleavageSpecificity, List<DigestionMotif> motifList, Modification cleavageMod)
         {
             Name = name;
@@ -73,40 +76,50 @@ protected static bool ValidMaxLength(int? length, int maxLength)
         /// <returns></returns>
         public List<int> GetDigestionSiteIndices(string sequence)
         {
-            var indices = new List<int>();
+            List<int>? indicesList;
+            var indices = HashSetPool.Get(); // use hash set to ensure no duplicates
 
-            for (int r = 0; r < sequence.Length; r++)
+            try
             {
-                var cutSiteIndex = -1;
-                bool cleavagePrevented = false;
+                indices.Add(0); // The start of the protein is treated as a cleavage site to retain the n-terminal peptide
 
-                foreach (DigestionMotif motif in DigestionMotifs)
+                for (int r = 0; r < sequence.Length; r++)
                 {
-                    var motifResults = motif.Fits(sequence, r);
-                    bool motifFits = motifResults.Item1;
-                    bool motifPreventsCleavage = motifResults.Item2;
+                    var cutSiteIndex = -1;
+                    bool cleavagePrevented = false;
 
-                    if (motifFits && r + motif.CutIndex < sequence.Length)
+                    foreach (DigestionMotif motif in DigestionMotifs)
                     {
-                        cutSiteIndex = Math.Max(r + motif.CutIndex, cutSiteIndex);
+                        var motifResults = motif.Fits(sequence, r);
+                        bool motifFits = motifResults.Item1;
+                        bool motifPreventsCleavage = motifResults.Item2;
+
+                        if (motifFits && r + motif.CutIndex < sequence.Length)
+                        {
+                            cutSiteIndex = Math.Max(r + motif.CutIndex, cutSiteIndex);
+                        }
+
+                        if (motifPreventsCleavage) // if any motif prevents cleave
+                        {
+                            cleavagePrevented = true;
+                        }
                     }
 
-                    if (motifPreventsCleavage) // if any motif prevents cleave
+                    // if no motif prevents cleave
+                    if (!cleavagePrevented && cutSiteIndex != -1)
                     {
-                        cleavagePrevented = true;
+                        indices.Add(cutSiteIndex);
                     }
                 }
 
-                // if no motif prevents cleave
-                if (!cleavagePrevented && cutSiteIndex != -1)
-                {
-                    indices.Add(cutSiteIndex);
-                }
+                indices.Add(sequence.Length); // The end of the protein is treated as a cleavage site to retain the c-terminal peptide
             }
-
-            indices.Add(0); // The start of the protein is treated as a cleavage site to retain the n-terminal peptide
-            indices.Add(sequence.Length); // The end of the protein is treated as a cleavage site to retain the c-terminal peptide
-            return indices.Distinct().OrderBy(i => i).ToList();
+            finally
+            {
+                indicesList = indices.ToList();
+                HashSetPool.Return(indices);
+            }
+            return indicesList;
         }
     }
 }