Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Digestion: Fixed mod terminal fix and variable mod ordering #825

Merged
merged 40 commits into from
Jan 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
8620b8f
Added bassic object pools
nbollis Jan 14, 2025
be82276
Refactor DigestionAgent to use HashSetPool for indices
nbollis Jan 14, 2025
a5bc0b6
Merge branch 'master' into ObjectPools
nbollis Jan 14, 2025
2bfde71
string interpolation in BPWSM extensions
nbollis Jan 14, 2025
11ccba9
Adjusted IEnumerable return in Protease.GetUnmodified
nbollis Jan 14, 2025
72c9de6
Digestion Optimizations
nbollis Jan 14, 2025
d68e40f
Moved testing class to proper subdirectory
nbollis Jan 14, 2025
585ffc5
Adjusted ModFits to have the correct localization for peptide and pro…
nbollis Jan 14, 2025
513a12b
Cleaned up hashset return
nbollis Jan 14, 2025
cfe0586
Merge branch 'ObjectPools' of https://github.com/nbollis/mzLib into O…
nbollis Jan 14, 2025
b13e940
Digestion Agent Hashset Return Cleanup
nbollis Jan 14, 2025
db610b2
set fixed mods now modifies in place using a pooled dictionary
nbollis Jan 14, 2025
ce3b412
Merge branch 'master' into DigestionFixedFix
nbollis Jan 14, 2025
468c46d
Added comments to digeston
nbollis Jan 14, 2025
2c12e59
Refactor code for readability and efficiency
nbollis Jan 14, 2025
349d7b1
Merge branch 'DigestionFixedFix' of https://github.com/nbollis/mzLib …
nbollis Jan 14, 2025
81aefc5
Added many comments
nbollis Jan 15, 2025
51dfbfe
merged in pool comments
nbollis Jan 15, 2025
21ac5b1
Merge branch 'master' into DigestionFixedFix
nbollis Jan 15, 2025
43d6882
set fixed mods namechange
nbollis Jan 16, 2025
ddbcf01
Eliminated IsN or IS5' in favor of unified method
nbollis Jan 16, 2025
0b59b0b
Extracted all variable modification combination generation to parent …
nbollis Jan 16, 2025
9096c21
removed fixed mods changes
nbollis Jan 16, 2025
e6a1680
Fixed mod terminal adjustment
nbollis Jan 16, 2025
010f93f
removed unnecessary namespace
nbollis Jan 16, 2025
c8abae3
Merge branch 'DigestionFixedFix' into DigestionFixedAndVariableFixed
nbollis Jan 16, 2025
fac20c9
Replaced List with Sorted Sets in Variable Mod Dictionary Pool
nbollis Jan 16, 2025
4e93b36
Extracted AppendFixedToVariabel
nbollis Jan 16, 2025
957d213
Changed from ref to in parameters
nbollis Jan 16, 2025
096a432
Refactor modification comparison handling
nbollis Jan 16, 2025
f8048a5
supposedToBeDifferent
trishorts Jan 14, 2025
3d30ad5
dunno
trishorts Jan 14, 2025
797a246
correct protein accession now
trishorts Jan 15, 2025
6a02d83
j
trishorts Jan 15, 2025
d752e6d
Adjusted shortreed Test
nbollis Jan 16, 2025
43c0caa
Merge branch 'master' into DigestionFixedAndVariableFixed
nbollis Jan 17, 2025
1a3e410
Refactor modification comparison logic
nbollis Jan 17, 2025
9024441
merge
nbollis Jan 17, 2025
781f3b1
Removed IComparable from Modificaiton Motiff
nbollis Jan 17, 2025
f3f6216
Renamed xml databases to be more verbose
nbollis Jan 17, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 14 additions & 14 deletions mzLib/Omics/Digestion/DigestionProduct.cs
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
using MzLibUtil;
using MzLibUtil;
using Omics.Modifications;

namespace Omics.Digestion
{
public abstract class DigestionProduct
{
protected static readonly DictionaryPool<int, List<Modification>> DictionaryPool = new();
protected static readonly DictionaryPool<int, SortedSet<Modification>> DictionaryPool = new();
protected static readonly DictionaryPool<int, Modification> FixedModDictionaryPool = new(8);

protected string _baseSequence;
Expand Down Expand Up @@ -54,29 +54,30 @@ protected DigestionProduct(IBioPolymer parent, int oneBasedStartResidue, int one
/// Then, it iterates through all possible numbers of modifications and generates the corresponding modification patterns.
/// The returned dictionary is then appended with fixed modifications and used to construct a peptide with set mods
/// </remarks>
protected static IEnumerable<Dictionary<int, Modification>> GetVariableModificationPatterns(Dictionary<int, List<Modification>> possibleVariableModifications, int maxModsForPeptide, int peptideLength)
protected static IEnumerable<Dictionary<int, Modification>> GetVariableModificationPatterns(Dictionary<int, SortedSet<Modification>> possibleVariableModifications, int maxModsForPeptide, int peptideLength)
{
if (possibleVariableModifications.Count <= 0)
yield break;

int[] baseVariableModificationPattern = new int[peptideLength + 4];
int totalAvailableMods = possibleVariableModifications.Values.Sum(modList => modList?.Count ?? 0);
int maxVariableMods = Math.Min(totalAvailableMods, maxModsForPeptide);
var variableModKvpList = possibleVariableModifications.ToList();

for (int variable_modifications = 0; variable_modifications <= maxVariableMods; variable_modifications++)
{
foreach (int[] variable_modification_pattern in GetVariableModificationPatternsRecursive(possibleVariableModifications.ToList(),
foreach (int[] variable_modification_pattern in GetVariableModificationPatternsRecursive(variableModKvpList,
possibleVariableModifications.Count - variable_modifications, baseVariableModificationPattern, 0))
{
// use modification pattern to construct a dictionary of modifications for the peptide
var modificationPattern = new Dictionary<int, Modification>(possibleVariableModifications.Count);

foreach (KeyValuePair<int, List<Modification>> kvp in possibleVariableModifications)
foreach (var variableModSet in possibleVariableModifications)
{
int modIndex = variable_modification_pattern[kvp.Key] - 1;
int modIndex = variable_modification_pattern[variableModSet.Key] - 1;
if (modIndex >= 0)
{
modificationPattern.Add(kvp.Key, kvp.Value[modIndex]);
modificationPattern.Add(variableModSet.Key, variableModSet.Value.ElementAt(modIndex));
}
}

Expand Down Expand Up @@ -162,13 +163,13 @@ protected void PopulateFixedModsOneIsNorFivePrimeTerminus(int length,
/// This method iterates through all variable modifications and assigns them to the appropriate positions in the peptide.
/// It considers different location restrictions such as N-terminal, C-terminal, and anywhere within the peptide.
/// </remarks>
protected void PopulateVariableModifications(List<Modification> allVariableMods, in Dictionary<int, List<Modification>> twoBasedDictToPopulate)
protected void PopulateVariableModifications(List<Modification> allVariableMods, in Dictionary<int, SortedSet<Modification>> twoBasedDictToPopulate)
{
int peptideLength = OneBasedEndResidue - OneBasedStartResidue + 1;
var pepNTermVariableMods = new List<Modification>();
var pepNTermVariableMods = new SortedSet<Modification>();
twoBasedDictToPopulate.Add(1, pepNTermVariableMods);

var pepCTermVariableMods = new List<Modification>();
var pepCTermVariableMods = new SortedSet<Modification>();
twoBasedDictToPopulate.Add(peptideLength + 2, pepCTermVariableMods);

// VARIABLE MODS
Expand All @@ -187,7 +188,7 @@ protected void PopulateVariableModifications(List<Modification> allVariableMods,
{
if (!twoBasedDictToPopulate.TryGetValue(r + 2, out var residueVariableMods))
{
residueVariableMods = new List<Modification>() { variableModification };
residueVariableMods = new SortedSet<Modification>() { variableModification };
twoBasedDictToPopulate.Add(r + 2, residueVariableMods);
}
else
Expand Down Expand Up @@ -232,7 +233,7 @@ protected void PopulateVariableModifications(List<Modification> allVariableMods,
{
if (!twoBasedDictToPopulate.TryGetValue(r + 2, out var residueVariableMods))
{
residueVariableMods = new List<Modification>() { variableModification };
residueVariableMods = new SortedSet<Modification>() { variableModification };
twoBasedDictToPopulate.Add(r + 2, residueVariableMods);
}
else
Expand Down Expand Up @@ -267,7 +268,6 @@ protected void AppendFixedModificationsToVariable(in Dictionary<int, Modificatio
{
if (variableModPattern.ContainsKey(fixedModPattern.Key))
continue;

numFixedMods++;
variableModPattern.Add(fixedModPattern.Key, fixedModPattern.Value);
}
Expand All @@ -287,7 +287,7 @@ protected void AppendFixedModificationsToVariable(in Dictionary<int, Modificatio
/// This method uses recursion to generate all possible combinations of variable modifications for a given peptide.
/// It considers both modified and unmodified residues and generates patterns accordingly.
/// </remarks>
private static IEnumerable<int[]> GetVariableModificationPatternsRecursive(List<KeyValuePair<int, List<Modification>>> possibleVariableModifications,
private static IEnumerable<int[]> GetVariableModificationPatternsRecursive(List<KeyValuePair<int, SortedSet<Modification>>> possibleVariableModifications,
int unmodifiedResiduesDesired, int[] variableModificationPattern, int index)
{
if (index < possibleVariableModifications.Count - 1)
Expand Down
26 changes: 20 additions & 6 deletions mzLib/Omics/Modifications/Modification.cs
Original file line number Diff line number Diff line change
@@ -1,10 +1,5 @@
using Chemistry;
using MassSpectrometry;
using Omics.Modifications;
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Text;

namespace Omics.Modifications
Expand All @@ -13,7 +8,7 @@
/// Represents a modification
/// Mods.txt format was taken from https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/docs/ptmlist.txt
/// </summary>
public class Modification
public class Modification : IComparable<Modification>
{
public string IdWithMotif { get; private set; }
public string OriginalId { get; private set; }
Expand Down Expand Up @@ -59,7 +54,7 @@
}
}

public Modification(string _originalId = null, string _accession = null, string _modificationType = null, string _featureType = null,

Check warning on line 57 in mzLib/Omics/Modifications/Modification.cs

View workflow job for this annotation

GitHub Actions / build

Cannot convert null literal to non-nullable reference type.

Check warning on line 57 in mzLib/Omics/Modifications/Modification.cs

View workflow job for this annotation

GitHub Actions / build

Cannot convert null literal to non-nullable reference type.

Check warning on line 57 in mzLib/Omics/Modifications/Modification.cs

View workflow job for this annotation

GitHub Actions / build

Cannot convert null literal to non-nullable reference type.

Check warning on line 57 in mzLib/Omics/Modifications/Modification.cs

View workflow job for this annotation

GitHub Actions / build

Cannot convert null literal to non-nullable reference type.

Check warning on line 57 in mzLib/Omics/Modifications/Modification.cs

View workflow job for this annotation

GitHub Actions / build

Cannot convert null literal to non-nullable reference type.

Check warning on line 57 in mzLib/Omics/Modifications/Modification.cs

View workflow job for this annotation

GitHub Actions / build

Cannot convert null literal to non-nullable reference type.

Check warning on line 57 in mzLib/Omics/Modifications/Modification.cs

View workflow job for this annotation

GitHub Actions / integration

Cannot convert null literal to non-nullable reference type.

Check warning on line 57 in mzLib/Omics/Modifications/Modification.cs

View workflow job for this annotation

GitHub Actions / integration

Cannot convert null literal to non-nullable reference type.

Check warning on line 57 in mzLib/Omics/Modifications/Modification.cs

View workflow job for this annotation

GitHub Actions / integration

Cannot convert null literal to non-nullable reference type.
ModificationMotif _target = null, string _locationRestriction = "Unassigned.", ChemicalFormula _chemicalFormula = null,
double? _monoisotopicMass = null, Dictionary<string, IList<string>> _databaseReference = null,
Dictionary<string, IList<string>> _taxonomicRange = null, List<string> _keywords = null,
Expand Down Expand Up @@ -129,7 +124,7 @@
}
}

public override bool Equals(object o)

Check warning on line 127 in mzLib/Omics/Modifications/Modification.cs

View workflow job for this annotation

GitHub Actions / build

Nullability of type of parameter 'o' doesn't match overridden member (possibly because of nullability attributes).

Check warning on line 127 in mzLib/Omics/Modifications/Modification.cs

View workflow job for this annotation

GitHub Actions / build

Nullability of type of parameter 'o' doesn't match overridden member (possibly because of nullability attributes).

Check warning on line 127 in mzLib/Omics/Modifications/Modification.cs

View workflow job for this annotation

GitHub Actions / integration

Nullability of type of parameter 'o' doesn't match overridden member (possibly because of nullability attributes).
{
Modification m = o as Modification;
return o != null
Expand Down Expand Up @@ -299,5 +294,24 @@

return sb.ToString();
}


// Used in the sorted sets for variable mod generation to ensure that modifications are consistently ordered
// UniProt annotations also contain an evidence level. Future work could include this in the ordering of modifications for digestion.
public int CompareTo(Modification? other)
{
if (other == null) return 1;

int idComparison = string.Compare(this.IdWithMotif, other.IdWithMotif, StringComparison.Ordinal);
if (idComparison != 0) return idComparison;

int typeComparison = string.Compare(this.ModificationType, other.ModificationType, StringComparison.Ordinal);
if (typeComparison != 0) return typeComparison;

int locRestrictionComparison = string.Compare(this.LocationRestriction, other.LocationRestriction, StringComparison.Ordinal);
if (locRestrictionComparison != 0) return locRestrictionComparison;

return Nullable.Compare(this.MonoisotopicMass, other.MonoisotopicMass);
}
}
}
4 changes: 2 additions & 2 deletions mzLib/Omics/Modifications/ModificationLocalization.cs
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ public static bool ModFits(Modification attemptToLocalize, string sequence, int
switch (attemptToLocalize.LocationRestriction)
{
case "N-terminal." when bioPolymerOneBasedIndex > 2:
case "Peptide N-terminal." when digestionProductOneBasedIndex > 1:
case "Peptide N-terminal." when digestionProductOneBasedIndex > 1 || bioPolymerOneBasedIndex == 1:
case "C-terminal." when bioPolymerOneBasedIndex < sequence.Length:
case "Peptide C-terminal." when digestionProductOneBasedIndex < digestionProductLength:
case "Peptide C-terminal." when digestionProductOneBasedIndex < digestionProductLength || bioPolymerOneBasedIndex == sequence.Length:
case "5'-terminal." when bioPolymerOneBasedIndex > 2:
// first residue in oligo but not first in nucleic acid
case "Oligo 5'-terminal." when digestionProductOneBasedIndex > 1
Expand Down
13 changes: 0 additions & 13 deletions mzLib/Omics/Modifications/ModificationMotif.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,19 +28,6 @@ public static bool TryGetMotif(string motifString, out ModificationMotif motif)
}
return false;
}
// Commented out by AVC on 4/5/23. Methods were unused and untested
// since 2017.
// public override bool Equals(object o)
// {
// ModificationMotif m = o as ModificationMotif;
// return m != null
// && m.motifString == motifString;
// }
//
// public override int GetHashCode()
// {
// return motifString.GetHashCode();
// }

public override string ToString()
{
Expand Down
Loading
Loading