Skip to content

Commit

Permalink
Refactor modification comparison logic
Browse files Browse the repository at this point in the history
Removed `ModificationComparer` and updated `Modification` and `ModificationMotif` classes to implement `IComparable` for custom comparison logic. Added new test cases to validate changes. Cleaned up unused code and adjusted methods to ensure proper functionality.
  • Loading branch information
nbollis committed Jan 17, 2025
1 parent d752e6d commit 1a3e410
Show file tree
Hide file tree
Showing 4 changed files with 140 additions and 68 deletions.
57 changes: 9 additions & 48 deletions mzLib/Omics/Digestion/DigestionProduct.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ namespace Omics.Digestion
{
public abstract class DigestionProduct
{
private static readonly ModificationComparer ModComparer = new();
protected static readonly DictionaryPool<int, SortedSet<Modification>> DictionaryPool = new();
protected static readonly DictionaryPool<int, Modification> FixedModDictionaryPool = new(8);

Expand Down Expand Up @@ -63,21 +62,22 @@ protected static IEnumerable<Dictionary<int, Modification>> GetVariableModificat
int[] baseVariableModificationPattern = new int[peptideLength + 4];
int totalAvailableMods = possibleVariableModifications.Values.Sum(modList => modList?.Count ?? 0);
int maxVariableMods = Math.Min(totalAvailableMods, maxModsForPeptide);
var variableModKvpList = possibleVariableModifications.ToList();

for (int variable_modifications = 0; variable_modifications <= maxVariableMods; variable_modifications++)
{
foreach (int[] variable_modification_pattern in GetVariableModificationPatternsRecursive(possibleVariableModifications.ToList(),
foreach (int[] variable_modification_pattern in GetVariableModificationPatternsRecursive(variableModKvpList,
possibleVariableModifications.Count - variable_modifications, baseVariableModificationPattern, 0))
{
// use modification pattern to construct a dictionary of modifications for the peptide
var modificationPattern = new Dictionary<int, Modification>(possibleVariableModifications.Count);

foreach (var kvp in possibleVariableModifications)
foreach (var variableModSet in possibleVariableModifications)
{
int modIndex = variable_modification_pattern[kvp.Key] - 1;
int modIndex = variable_modification_pattern[variableModSet.Key] - 1;
if (modIndex >= 0)
{
modificationPattern.Add(kvp.Key, kvp.Value.ElementAt(modIndex));
modificationPattern.Add(variableModSet.Key, variableModSet.Value.ElementAt(modIndex));
}
}

Expand Down Expand Up @@ -166,10 +166,10 @@ protected void PopulateFixedModsOneIsNorFivePrimeTerminus(int length,
protected void PopulateVariableModifications(List<Modification> allVariableMods, in Dictionary<int, SortedSet<Modification>> twoBasedDictToPopulate)
{
int peptideLength = OneBasedEndResidue - OneBasedStartResidue + 1;
var pepNTermVariableMods = new SortedSet<Modification>(ModComparer);
var pepNTermVariableMods = new SortedSet<Modification>();
twoBasedDictToPopulate.Add(1, pepNTermVariableMods);

var pepCTermVariableMods = new SortedSet<Modification>(ModComparer);
var pepCTermVariableMods = new SortedSet<Modification>();
twoBasedDictToPopulate.Add(peptideLength + 2, pepCTermVariableMods);

// VARIABLE MODS
Expand All @@ -188,7 +188,7 @@ protected void PopulateVariableModifications(List<Modification> allVariableMods,
{
if (!twoBasedDictToPopulate.TryGetValue(r + 2, out var residueVariableMods))
{
residueVariableMods = new SortedSet<Modification>(ModComparer) { variableModification };
residueVariableMods = new SortedSet<Modification>() { variableModification };
twoBasedDictToPopulate.Add(r + 2, residueVariableMods);
}
else
Expand Down Expand Up @@ -233,7 +233,7 @@ protected void PopulateVariableModifications(List<Modification> allVariableMods,
{
if (!twoBasedDictToPopulate.TryGetValue(r + 2, out var residueVariableMods))
{
residueVariableMods = new SortedSet<Modification>(ModComparer) { variableModification };
residueVariableMods = new SortedSet<Modification>() { variableModification };
twoBasedDictToPopulate.Add(r + 2, residueVariableMods);
}
else
Expand Down Expand Up @@ -268,7 +268,6 @@ protected void AppendFixedModificationsToVariable(in Dictionary<int, Modificatio
{
if (variableModPattern.ContainsKey(fixedModPattern.Key))
continue;

numFixedMods++;
variableModPattern.Add(fixedModPattern.Key, fixedModPattern.Value);
}
Expand Down Expand Up @@ -357,44 +356,6 @@ private bool CanBeCTerminalOrThreePrime(Modification mod, int peptideLength)
&& ModificationLocalization.ModFits(mod, Parent.BaseSequence, peptideLength, peptideLength, OneBasedStartResidue + peptideLength - 1);
}

// Used in the sorted sets for variable mod generation to ensure that modifications are consistently ordered
private class ModificationComparer : IComparer<Modification>
{
public int Compare(Modification? x, Modification? y)
{
if (ReferenceEquals(x, y)) return 0;
if (y is null) return 1;
if (x is null) return -1;

var idWithMotifComparison = string.Compare(x.IdWithMotif, y.IdWithMotif, StringComparison.Ordinal);
if (idWithMotifComparison != 0)
return idWithMotifComparison;

var originalIdComparison = string.Compare(x.OriginalId, y.OriginalId, StringComparison.Ordinal);
if (originalIdComparison != 0)
return originalIdComparison;

var accessionComparison = string.Compare(x.Accession, y.Accession, StringComparison.Ordinal);
if (accessionComparison != 0)
return accessionComparison;

var modificationTypeComparison = string.Compare(x.ModificationType, y.ModificationType, StringComparison.Ordinal);
if (modificationTypeComparison != 0)
return modificationTypeComparison;

var featureTypeComparison = string.Compare(x.FeatureType, y.FeatureType, StringComparison.Ordinal);
if (featureTypeComparison != 0)
return featureTypeComparison;

var locationRestrictionComparison = string.Compare(x.LocationRestriction, y.LocationRestriction, StringComparison.Ordinal);
if (locationRestrictionComparison != 0)
return locationRestrictionComparison;

return
string.Compare(x.FileOrigin, y.FileOrigin, StringComparison.Ordinal);
}
}

#endregion
}
}
29 changes: 23 additions & 6 deletions mzLib/Omics/Modifications/Modification.cs
Original file line number Diff line number Diff line change
@@ -1,10 +1,5 @@
using Chemistry;
using MassSpectrometry;
using Omics.Modifications;
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Text;

namespace Omics.Modifications
Expand All @@ -13,7 +8,7 @@ namespace Omics.Modifications
/// Represents a modification
/// Mods.txt format was taken from https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/docs/ptmlist.txt
/// </summary>
public class Modification
public class Modification : IComparable<Modification>
{
public string IdWithMotif { get; private set; }
public string OriginalId { get; private set; }
Expand Down Expand Up @@ -299,5 +294,27 @@ public string ModificationErrorsToString() //reports errors in required fields.

return sb.ToString();
}


// Used in the sorted sets for variable mod generation to ensure that modifications are consistently ordered
// UniProt annotations also contain an evidence level. Future work could include this in the ordering of modifications for digestion.
public int CompareTo(Modification? other)
{
if (other == null) return 1;

int idComparison = string.Compare(this.IdWithMotif, other.IdWithMotif, StringComparison.Ordinal);
if (idComparison != 0) return idComparison;

int typeComparison = string.Compare(this.ModificationType, other.ModificationType, StringComparison.Ordinal);
if (typeComparison != 0) return typeComparison;

int motifComparison = this.Target.CompareTo(other.Target);
if (motifComparison != 0) return motifComparison;

int locRestrictionComparison = string.Compare(this.LocationRestriction, other.LocationRestriction, StringComparison.Ordinal);
if (locRestrictionComparison != 0) return locRestrictionComparison;

return Nullable.Compare(this.MonoisotopicMass, other.MonoisotopicMass);
}
}
}
22 changes: 8 additions & 14 deletions mzLib/Omics/Modifications/ModificationMotif.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

namespace Omics.Modifications
{
public class ModificationMotif
public class ModificationMotif : IComparable<ModificationMotif>
{
private static readonly Regex ModificationMotifRegex = new Regex(@"^[A-Za-z]+$", RegexOptions.Compiled);
private readonly string motifString;
Expand All @@ -28,19 +28,13 @@ public static bool TryGetMotif(string motifString, out ModificationMotif motif)
}
return false;
}
// Commented out by AVC on 4/5/23. Methods were unused and untested
// since 2017.
// public override bool Equals(object o)
// {
// ModificationMotif m = o as ModificationMotif;
// return m != null
// && m.motifString == motifString;
// }
//
// public override int GetHashCode()
// {
// return motifString.GetHashCode();
// }

public int CompareTo(ModificationMotif? other)
{
if (other == null) return 1;

return string.Compare(motifString, other.motifString, StringComparison.Ordinal);
}

public override string ToString()
{
Expand Down
100 changes: 100 additions & 0 deletions mzLib/Test/TestModifications.cs
Original file line number Diff line number Diff line change
Expand Up @@ -802,5 +802,105 @@ public static void TestUniprotResidualMod()

Assert.That(peptide.FullSequence == "PEPT[UniProt:acetylation on T]IDE");
}

[TestCase("A", true)]
[TestCase("a", false)]
[TestCase("Aa", true)]
[TestCase("aaA", true)]
[TestCase("AAA", false)]
[TestCase("AaaA", false)]
[TestCase("123", false)]
[TestCase("Aa1", false)]
public void TryGetMotif_ValidatesMotifStringCorrectly(string input, bool expected)
{
bool result = ModificationMotif.TryGetMotif(input, out var motif);
NUnit.Framework.Assert.That(result, Is.EqualTo(expected));
}

[Test]
public void CompareTo_ReturnsCorrectComparison()
{
ModificationMotif.TryGetMotif("Aa", out var motif1);
ModificationMotif.TryGetMotif("Ab", out var motif2);
ModificationMotif.TryGetMotif("Aa", out var motif3);

NUnit.Framework.Assert.That(motif1.CompareTo(motif2), Is.LessThan(0));
NUnit.Framework.Assert.That(motif2.CompareTo(motif1), Is.GreaterThan(0));
NUnit.Framework.Assert.That(motif1.CompareTo(motif3), Is.EqualTo(0));
}

[Test]
public void ToString_ReturnsMotifString()
{
ModificationMotif.TryGetMotif("Aa", out var motif);
NUnit.Framework.Assert.That(motif.ToString(), Is.EqualTo("Aa"));
}

[Test]
public void CompareTo_SameModification_ReturnsZero()
{
ModificationMotif.TryGetMotif("A", out var motif);
var chemicalFormula = new ChemicalFormula();
var mod1 = new Modification("mod1", "accession1", "type1", "feature1", motif, "N-terminal.", chemicalFormula, 100.0);
var mod2 = new Modification("mod1", "accession1", "type1", "feature1", motif, "N-terminal.", chemicalFormula, 100.0);

NUnit.Framework.Assert.That(mod1.CompareTo(mod2), Is.EqualTo(0));
}

[Test]
public void CompareTo_DifferentIdWithMotif_ReturnsNonZero()
{
ModificationMotif.TryGetMotif("A", out var motif);
var chemicalFormula = new ChemicalFormula();
var mod1 = new Modification("mod1", "accession1", "type1", "feature1", motif, "N-terminal.", chemicalFormula, 100.0);
var mod2 = new Modification("mod2", "accession1", "type1", "feature1", motif, "N-terminal.", chemicalFormula, 100.0);

NUnit.Framework.Assert.That(mod1.CompareTo(mod2), Is.Not.EqualTo(0));
}

[Test]
public void CompareTo_DifferentModificationType_ReturnsNonZero()
{
ModificationMotif.TryGetMotif("A", out var motif);
var chemicalFormula = new ChemicalFormula();
var mod1 = new Modification("mod1", "accession1", "type1", "feature1", motif, "N-terminal.", chemicalFormula, 100.0);
var mod2 = new Modification("mod1", "accession1", "type2", "feature1", motif, "N-terminal.", chemicalFormula, 100.0);

NUnit.Framework.Assert.That(mod1.CompareTo(mod2), Is.Not.EqualTo(0));
}

[Test]
public void CompareTo_DifferentTarget_ReturnsNonZero()
{
ModificationMotif.TryGetMotif("A", out var motif1);
ModificationMotif.TryGetMotif("B", out var motif2);
var chemicalFormula = new ChemicalFormula();
var mod1 = new Modification("mod1", "accession1", "type1", "feature1", motif1, "N-terminal.", chemicalFormula, 100.0);
var mod2 = new Modification("mod1", "accession1", "type1", "feature1", motif2, "N-terminal.", chemicalFormula, 100.0);

NUnit.Framework.Assert.That(mod1.CompareTo(mod2), Is.Not.EqualTo(0));
}

[Test]
public void CompareTo_DifferentLocationRestriction_ReturnsNonZero()
{
ModificationMotif.TryGetMotif("A", out var motif);
var chemicalFormula = new ChemicalFormula();
var mod1 = new Modification("mod1", "accession1", "type1", "feature1", motif, "N-terminal.", chemicalFormula, 100.0);
var mod2 = new Modification("mod1", "accession1", "type1", "feature1", motif, "C-terminal.", chemicalFormula, 100.0);

NUnit.Framework.Assert.That(mod1.CompareTo(mod2), Is.Not.EqualTo(0));
}

[Test]
public void CompareTo_DifferentMonoisotopicMass_ReturnsNonZero()
{
ModificationMotif.TryGetMotif("A", out var motif);
var chemicalFormula = new ChemicalFormula();
var mod1 = new Modification("mod1", "accession1", "type1", "feature1", motif, "N-terminal.", chemicalFormula, 100.0);
var mod2 = new Modification("mod1", "accession1", "type1", "feature1", motif, "N-terminal.", chemicalFormula, 101.0);

NUnit.Framework.Assert.That(mod1.CompareTo(mod2), Is.Not.EqualTo(0));
}
}
}

0 comments on commit 1a3e410

Please sign in to comment.