Skip to content

Commit

Permalink
Refactor DigestionAgent to use HashSetPool for indices
Browse files Browse the repository at this point in the history
Added `using` directive for `MzLibUtil`. Introduced a static readonly `HashSetPool<int>` named `HashSetPool` to manage a pool of hash sets. Updated `DigestionAgent` constructor to initialize `HashSetPool`. Refactored `GetDigestionSiteIndices` to use a hash set from `HashSetPool` for storing indices, ensuring no duplicates. Explicitly added start and end of protein sequence as cleavage sites. Implemented `try-finally` block to return hash set to pool after use. Final list of indices is now sorted before returning.
  • Loading branch information
nbollis committed Jan 14, 2025
1 parent 8620b8f commit be82276
Showing 1 changed file with 35 additions and 22 deletions.
57 changes: 35 additions & 22 deletions mzLib/Omics/Digestion/DigestionAgent.cs
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
using Omics.Modifications;
using MzLibUtil;
using Omics.Modifications;

namespace Omics.Digestion
{
public abstract class DigestionAgent
{
protected static readonly HashSetPool<int> HashSetPool = new HashSetPool<int>(8);

protected DigestionAgent(string name, CleavageSpecificity cleavageSpecificity, List<DigestionMotif> motifList, Modification cleavageMod)
{
Name = name;
Expand Down Expand Up @@ -73,40 +76,50 @@ protected static bool ValidMaxLength(int? length, int maxLength)
/// <returns></returns>
public List<int> GetDigestionSiteIndices(string sequence)
{
var indices = new List<int>();
List<int>? indicesList;
var indices = HashSetPool.Get(); // use hash set to ensure no duplicates

for (int r = 0; r < sequence.Length; r++)
try
{
var cutSiteIndex = -1;
bool cleavagePrevented = false;
indices.Add(0); // The start of the protein is treated as a cleavage site to retain the n-terminal peptide

foreach (DigestionMotif motif in DigestionMotifs)
for (int r = 0; r < sequence.Length; r++)
{
var motifResults = motif.Fits(sequence, r);
bool motifFits = motifResults.Item1;
bool motifPreventsCleavage = motifResults.Item2;
var cutSiteIndex = -1;
bool cleavagePrevented = false;

if (motifFits && r + motif.CutIndex < sequence.Length)
foreach (DigestionMotif motif in DigestionMotifs)
{
cutSiteIndex = Math.Max(r + motif.CutIndex, cutSiteIndex);
var motifResults = motif.Fits(sequence, r);
bool motifFits = motifResults.Item1;
bool motifPreventsCleavage = motifResults.Item2;

if (motifFits && r + motif.CutIndex < sequence.Length)
{
cutSiteIndex = Math.Max(r + motif.CutIndex, cutSiteIndex);
}

if (motifPreventsCleavage) // if any motif prevents cleave
{
cleavagePrevented = true;
}
}

if (motifPreventsCleavage) // if any motif prevents cleave
// if no motif prevents cleave
if (!cleavagePrevented && cutSiteIndex != -1)
{
cleavagePrevented = true;
indices.Add(cutSiteIndex);
}
}

// if no motif prevents cleave
if (!cleavagePrevented && cutSiteIndex != -1)
{
indices.Add(cutSiteIndex);
}
indices.Add(sequence.Length); // The end of the protein is treated as a cleavage site to retain the c-terminal peptide
}

indices.Add(0); // The start of the protein is treated as a cleavage site to retain the n-terminal peptide
indices.Add(sequence.Length); // The end of the protein is treated as a cleavage site to retain the c-terminal peptide
return indices.Distinct().OrderBy(i => i).ToList();
finally
{
indicesList = indices.ToList();
HashSetPool.Return(indices);
}
return indicesList;
}
}
}

0 comments on commit be82276

Please sign in to comment.