diff --git a/mzLib/Chemistry/ChemicalFormula.cs b/mzLib/Chemistry/ChemicalFormula.cs index e8aaef111..148ed209d 100644 --- a/mzLib/Chemistry/ChemicalFormula.cs +++ b/mzLib/Chemistry/ChemicalFormula.cs @@ -31,7 +31,7 @@ namespace Chemistry /// Formula can change!!! If isotopes or elements are changed. /// [Serializable] - public sealed class ChemicalFormula : IEquatable + public sealed class ChemicalFormula : IEquatable, IHasChemicalFormula { // Main data stores, the isotopes and elements @@ -59,12 +59,14 @@ public ChemicalFormula() Elements = new Dictionary(); } - public ChemicalFormula(ChemicalFormula capFormula) + public ChemicalFormula(IHasChemicalFormula capFormula) { - Isotopes = new Dictionary(capFormula.Isotopes); - Elements = new Dictionary(capFormula.Elements); + Isotopes = new Dictionary(capFormula.ThisChemicalFormula.Isotopes); + Elements = new Dictionary(capFormula.ThisChemicalFormula.Elements); } + public ChemicalFormula ThisChemicalFormula => this; + /// /// Gets the average mass of this chemical formula /// @@ -523,5 +525,44 @@ private string GetHillNotation() otherParts.Sort(); return s + string.Join("", otherParts); } + + public override string ToString() + { + return $"{ThisChemicalFormula.Formula} : {MonoisotopicMass}"; + } + + public static ChemicalFormula operator -(ChemicalFormula left, IHasChemicalFormula right) + { + if (left == null) + if (right == null) + return null; + else + { + var formula = new ChemicalFormula(); + formula.Remove(right); + return formula; + } + if (right == null) + return new ChemicalFormula(left); + + + ChemicalFormula newFormula = new ChemicalFormula(left); + newFormula.Remove(right); + return newFormula; + } + + public static ChemicalFormula operator +(ChemicalFormula left, IHasChemicalFormula right) + { + // if left is null, return right. If right is null, return left. If both are null, return null. If both are not null, add them + if (left == null) + return right == null ? null : new ChemicalFormula(right); + if (right == null) + return new ChemicalFormula(left); + + ChemicalFormula newFormula = new ChemicalFormula(left); + newFormula.Add(right); + return newFormula; + + } } } \ No newline at end of file diff --git a/mzLib/Omics/BioPolymerWithSetModsExtensions.cs b/mzLib/Omics/BioPolymerWithSetModsExtensions.cs new file mode 100644 index 000000000..2e5d29718 --- /dev/null +++ b/mzLib/Omics/BioPolymerWithSetModsExtensions.cs @@ -0,0 +1,142 @@ +using System.Text; +using Chemistry; +using Omics.Modifications; + +namespace Omics; + +public static class BioPolymerWithSetModsExtensions +{ + /// + /// This method returns the full sequence with mass shifts INSTEAD OF PTMs in brackets [] + /// Some external tools cannot parse PTMs, instead requiring a numerical input indicating the mass of a PTM in brackets + /// after the position of that modification + /// N-terminal mas shifts are in brackets prior to the first amino acid and apparently missing the + sign + /// + /// + public static string FullSequenceWithMassShift(this IBioPolymerWithSetMods withSetMods) + { + var subsequence = new StringBuilder(); + + // modification on peptide N-terminus + if (withSetMods.AllModsOneIsNterminus.TryGetValue(1, out Modification mod)) + { + subsequence.Append('[' + mod.MonoisotopicMass.RoundedDouble(6).ToString() + ']'); + } + + for (int r = 0; r < withSetMods.Length; r++) + { + subsequence.Append(withSetMods[r]); + + // modification on this residue + if (withSetMods.AllModsOneIsNterminus.TryGetValue(r + 2, out mod)) + { + if (mod.MonoisotopicMass > 0) + { + subsequence.Append("[+" + mod.MonoisotopicMass.RoundedDouble(6).ToString() + ']'); + } + else + { + subsequence.Append("[" + mod.MonoisotopicMass.RoundedDouble(6).ToString() + ']'); + } + } + } + + // modification on peptide C-terminus + if (withSetMods.AllModsOneIsNterminus.TryGetValue(withSetMods.Length + 2, out mod)) + { + if (mod.MonoisotopicMass > 0) + { + subsequence.Append("[+" + mod.MonoisotopicMass.RoundedDouble(6).ToString() + ']'); + } + else + { + subsequence.Append("[" + mod.MonoisotopicMass.RoundedDouble(6).ToString() + ']'); + } + } + return subsequence.ToString(); + } + + /// + /// This method returns the full sequence only with the specified modifications in the modstoWritePruned dictionary + /// + /// + /// + /// + public static string EssentialSequence(this IBioPolymerWithSetMods withSetMods, + IReadOnlyDictionary modstoWritePruned) + { + string essentialSequence = withSetMods.BaseSequence; + if (modstoWritePruned != null) + { + var sbsequence = new StringBuilder(); + + // variable modification on peptide N-terminus + if (withSetMods.AllModsOneIsNterminus.TryGetValue(1, out Modification pep_n_term_variable_mod)) + { + if (modstoWritePruned.ContainsKey(pep_n_term_variable_mod.ModificationType)) + { + sbsequence.Append('[' + pep_n_term_variable_mod.ModificationType + ":" + pep_n_term_variable_mod.IdWithMotif + ']'); + } + } + for (int r = 0; r < withSetMods.Length; r++) + { + sbsequence.Append(withSetMods[r]); + // variable modification on this residue + if (withSetMods.AllModsOneIsNterminus.TryGetValue(r + 2, out Modification residue_variable_mod)) + { + if (modstoWritePruned.ContainsKey(residue_variable_mod.ModificationType)) + { + sbsequence.Append('[' + residue_variable_mod.ModificationType + ":" + residue_variable_mod.IdWithMotif + ']'); + } + } + } + + // variable modification on peptide C-terminus + if (withSetMods.AllModsOneIsNterminus.TryGetValue(withSetMods.Length + 2, out Modification pep_c_term_variable_mod)) + { + if (modstoWritePruned.ContainsKey(pep_c_term_variable_mod.ModificationType)) + { + sbsequence.Append('[' + pep_c_term_variable_mod.ModificationType + ":" + pep_c_term_variable_mod.IdWithMotif + ']'); + } + } + + essentialSequence = sbsequence.ToString(); + } + return essentialSequence; + } + + /// + /// Determines the full sequence of a BioPolymerWithSetMods from its base sequence and modifications + /// + /// + /// + public static string DetermineFullSequence(this IBioPolymerWithSetMods withSetMods) + { + var subSequence = new StringBuilder(); + + // modification on peptide N-terminus + if (withSetMods.AllModsOneIsNterminus.TryGetValue(1, out Modification mod)) + { + subSequence.Append('[' + mod.ModificationType + ":" + mod.IdWithMotif + ']'); + } + + for (int r = 0; r < withSetMods.Length; r++) + { + subSequence.Append(withSetMods[r]); + + // modification on this residue + if (withSetMods.AllModsOneIsNterminus.TryGetValue(r + 2, out mod)) + { + subSequence.Append('[' + mod.ModificationType + ":" + mod.IdWithMotif + ']'); + } + } + + // modification on peptide C-terminus + if (withSetMods.AllModsOneIsNterminus.TryGetValue(withSetMods.Length + 2, out mod)) + { + subSequence.Append('[' + mod.ModificationType + ":" + mod.IdWithMotif + ']'); + } + + return subSequence.ToString(); + } +} \ No newline at end of file diff --git a/mzLib/Proteomics/ProteolyticDigestion/CleavageSpecificity.cs b/mzLib/Omics/Digestion/CleavageSpecificity.cs similarity index 83% rename from mzLib/Proteomics/ProteolyticDigestion/CleavageSpecificity.cs rename to mzLib/Omics/Digestion/CleavageSpecificity.cs index c09f06ebd..46221fa35 100644 --- a/mzLib/Proteomics/ProteolyticDigestion/CleavageSpecificity.cs +++ b/mzLib/Omics/Digestion/CleavageSpecificity.cs @@ -1,4 +1,4 @@ -namespace Proteomics.ProteolyticDigestion +namespace Omics.Digestion { public enum CleavageSpecificity { diff --git a/mzLib/Omics/Digestion/DigestionAgent.cs b/mzLib/Omics/Digestion/DigestionAgent.cs new file mode 100644 index 000000000..d860659b9 --- /dev/null +++ b/mzLib/Omics/Digestion/DigestionAgent.cs @@ -0,0 +1,107 @@ +using Omics.Modifications; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Omics.Digestion +{ + public abstract class DigestionAgent + { + protected DigestionAgent(string name, CleavageSpecificity cleavageSpecificity, List motifList, Modification cleavageMod) + { + Name = name; + CleavageSpecificity = cleavageSpecificity; + DigestionMotifs = motifList ?? new List(); + CleavageMod = cleavageMod; + } + + public string Name { get; init; } + public CleavageSpecificity CleavageSpecificity { get; init; } + public List DigestionMotifs { get; init; } + public Modification CleavageMod { get; set; } + + public override string ToString() + { + return Name; + } + + /// + /// Is length of given peptide okay, given minimum and maximum? + /// + /// + /// + /// + /// + protected static bool ValidLength(int length, int minLength, int maxLength) + { + return ValidMinLength(length, minLength) && ValidMaxLength(length, maxLength); + } + + /// + /// Is length of given peptide okay, given minimum? + /// + /// + /// + /// + protected static bool ValidMinLength(int length, int minLength) + { + return length >= minLength; + } + + /// + /// Is length of given peptide okay, given maximum? + /// + /// + /// + /// + protected static bool ValidMaxLength(int? length, int maxLength) + { + return !length.HasValue || length <= maxLength; + } + + /// + /// Gets the indices after which this protease will cleave a given protein sequence + /// + /// + /// + public List GetDigestionSiteIndices(string sequence) + { + var indices = new List(); + + for (int r = 0; r < sequence.Length; r++) + { + var cutSiteIndex = -1; + bool cleavagePrevented = false; + + foreach (DigestionMotif motif in DigestionMotifs) + { + var motifResults = motif.Fits(sequence, r); + bool motifFits = motifResults.Item1; + bool motifPreventsCleavage = motifResults.Item2; + + if (motifFits && r + motif.CutIndex < sequence.Length) + { + cutSiteIndex = Math.Max(r + motif.CutIndex, cutSiteIndex); + } + + if (motifPreventsCleavage) // if any motif prevents cleave + { + cleavagePrevented = true; + } + } + + // if no motif prevents cleave + if (!cleavagePrevented && cutSiteIndex != -1) + { + indices.Add(cutSiteIndex); + } + } + + indices.Add(0); // The start of the protein is treated as a cleavage site to retain the n-terminal peptide + indices.Add(sequence.Length); // The end of the protein is treated as a cleavage site to retain the c-terminal peptide + return indices.Distinct().OrderBy(i => i).ToList(); + } + } +} diff --git a/mzLib/Proteomics/ProteolyticDigestion/DigestionMotif.cs b/mzLib/Omics/Digestion/DigestionMotif.cs similarity index 97% rename from mzLib/Proteomics/ProteolyticDigestion/DigestionMotif.cs rename to mzLib/Omics/Digestion/DigestionMotif.cs index 64e319f2d..52d447779 100644 --- a/mzLib/Proteomics/ProteolyticDigestion/DigestionMotif.cs +++ b/mzLib/Omics/Digestion/DigestionMotif.cs @@ -1,9 +1,7 @@ -using MzLibUtil; -using System.Collections.Generic; -using System.Linq; -using System.Text.RegularExpressions; +using System.Text.RegularExpressions; +using MzLibUtil; -namespace Proteomics.ProteolyticDigestion +namespace Omics.Digestion { public class DigestionMotif { diff --git a/mzLib/Omics/Digestion/DigestionProduct.cs b/mzLib/Omics/Digestion/DigestionProduct.cs new file mode 100644 index 000000000..1f45b1a19 --- /dev/null +++ b/mzLib/Omics/Digestion/DigestionProduct.cs @@ -0,0 +1,191 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using Omics.Modifications; + +namespace Omics.Digestion +{ + public abstract class DigestionProduct + { + protected string _baseSequence; + + protected DigestionProduct(IBioPolymer parent, int oneBasedStartResidue, int oneBasedEndResidue, int missedCleavages, + CleavageSpecificity cleavageSpecificityForFdrCategory, string? description = null, string? baseSequence = null) + { + Parent = parent; + OneBasedStartResidue = oneBasedStartResidue; + OneBasedEndResidue = oneBasedEndResidue; + MissedCleavages = missedCleavages; + CleavageSpecificityForFdrCategory = cleavageSpecificityForFdrCategory; + Description = description; + _baseSequence = baseSequence; + } + + [field: NonSerialized] public IBioPolymer Parent { get; protected set; } // BioPolymer that this lysis product is a digestion product of + public string Description { get; protected set; } //unstructured explanation of source + public int OneBasedStartResidue { get; }// the residue number at which the peptide begins (the first residue in a protein is 1) + public int OneBasedEndResidue { get; }// the residue number at which the peptide ends + public int MissedCleavages { get; } // the number of missed cleavages this peptide has with respect to the digesting protease + public virtual char PreviousResidue => OneBasedStartResidue > 1 ? Parent[OneBasedStartResidue - 2] : '-'; + + public virtual char NextResidue => OneBasedEndResidue < Parent.Length ? Parent[OneBasedEndResidue] : '-'; + public string BaseSequence => + _baseSequence ??= Parent.BaseSequence.Substring(OneBasedStartResidue - 1, + OneBasedEndResidue - OneBasedStartResidue + 1); + public CleavageSpecificity CleavageSpecificityForFdrCategory { get; set; } //structured explanation of source + public int Length => BaseSequence.Length; //how many residues long the peptide is + public char this[int zeroBasedIndex] => BaseSequence[zeroBasedIndex]; + + protected static IEnumerable> GetVariableModificationPatterns(Dictionary> possibleVariableModifications, int maxModsForPeptide, int peptideLength) + { + if (possibleVariableModifications.Count == 0) + { + yield return null; + } + else + { + var possible_variable_modifications = new Dictionary>(possibleVariableModifications); + + int[] base_variable_modification_pattern = new int[peptideLength + 4]; + var totalAvailableMods = possible_variable_modifications.Sum(b => b.Value == null ? 0 : b.Value.Count); + for (int variable_modifications = 0; variable_modifications <= Math.Min(totalAvailableMods, maxModsForPeptide); variable_modifications++) + { + foreach (int[] variable_modification_pattern in GetVariableModificationPatterns(new List>>(possible_variable_modifications), + possible_variable_modifications.Count - variable_modifications, base_variable_modification_pattern, 0)) + { + yield return GetNewVariableModificationPattern(variable_modification_pattern, possible_variable_modifications); + } + } + } + } + + protected Dictionary GetFixedModsOneIsNorFivePrimeTerminus(int length, + IEnumerable allKnownFixedModifications) + { + var fixedModsOneIsNterminus = new Dictionary(length + 3); + foreach (Modification mod in allKnownFixedModifications) + { + switch (mod.LocationRestriction) + { + case "5'-terminal.": + case "Oligo 5'-terminal.": + case "N-terminal.": + case "Peptide N-terminal.": + //the modification is protease associated and is applied to the n-terminal cleaved residue, not at the beginign of the protein + if (mod.ModificationType == "Protease" && ModificationLocalization.ModFits(mod, Parent.BaseSequence, 1, length, OneBasedStartResidue)) + { + if (OneBasedStartResidue != 1) + { + fixedModsOneIsNterminus[2] = mod; + } + } + //Normal N-terminal peptide modification + else if (ModificationLocalization.ModFits(mod, Parent.BaseSequence, 1, length, OneBasedStartResidue)) + { + fixedModsOneIsNterminus[1] = mod; + } + break; + + case "Anywhere.": + for (int i = 2; i <= length + 1; i++) + { + if (ModificationLocalization.ModFits(mod, Parent.BaseSequence, i - 1, length, OneBasedStartResidue + i - 2)) + { + fixedModsOneIsNterminus[i] = mod; + } + } + break; + + case "3'-terminal.": + case "Oligo 3'-terminal.": + case "C-terminal.": + case "Peptide C-terminal.": + //the modification is protease associated and is applied to the c-terminal cleaved residue, not if it is at the end of the protein + if (mod.ModificationType == "Protease" && ModificationLocalization.ModFits(mod, Parent.BaseSequence, length, length, OneBasedStartResidue + length - 1)) + { + if (OneBasedEndResidue != Parent.Length) + { + fixedModsOneIsNterminus[length + 1] = mod; + } + } + //Normal C-terminal peptide modification + else if (ModificationLocalization.ModFits(mod, Parent.BaseSequence, length, length, OneBasedStartResidue + length - 1)) + { + fixedModsOneIsNterminus[length + 2] = mod; + } + break; + + default: + throw new NotSupportedException("This terminus localization is not supported."); + } + } + return fixedModsOneIsNterminus; + } + + + private static IEnumerable GetVariableModificationPatterns(List>> possibleVariableModifications, + int unmodifiedResiduesDesired, int[] variableModificationPattern, int index) + { + if (index < possibleVariableModifications.Count - 1) + { + if (unmodifiedResiduesDesired > 0) + { + variableModificationPattern[possibleVariableModifications[index].Key] = 0; + foreach (int[] new_variable_modification_pattern in GetVariableModificationPatterns(possibleVariableModifications, + unmodifiedResiduesDesired - 1, variableModificationPattern, index + 1)) + { + yield return new_variable_modification_pattern; + } + } + if (unmodifiedResiduesDesired < possibleVariableModifications.Count - index) + { + for (int i = 1; i <= possibleVariableModifications[index].Value.Count; i++) + { + variableModificationPattern[possibleVariableModifications[index].Key] = i; + foreach (int[] new_variable_modification_pattern in GetVariableModificationPatterns(possibleVariableModifications, + unmodifiedResiduesDesired, variableModificationPattern, index + 1)) + { + yield return new_variable_modification_pattern; + } + } + } + } + else + { + if (unmodifiedResiduesDesired > 0) + { + variableModificationPattern[possibleVariableModifications[index].Key] = 0; + yield return variableModificationPattern; + } + else + { + for (int i = 1; i <= possibleVariableModifications[index].Value.Count; i++) + { + variableModificationPattern[possibleVariableModifications[index].Key] = i; + yield return variableModificationPattern; + } + } + } + } + + private static Dictionary GetNewVariableModificationPattern(int[] variableModificationArray, + IEnumerable>> possibleVariableModifications) + { + var modification_pattern = new Dictionary(); + + foreach (KeyValuePair> kvp in possibleVariableModifications) + { + if (variableModificationArray[kvp.Key] > 0) + { + modification_pattern.Add(kvp.Key, kvp.Value[variableModificationArray[kvp.Key] - 1]); + } + } + + return modification_pattern; + } + + + } +} diff --git a/mzLib/Omics/Digestion/IDigestionParams.cs b/mzLib/Omics/Digestion/IDigestionParams.cs new file mode 100644 index 000000000..6e4d84e83 --- /dev/null +++ b/mzLib/Omics/Digestion/IDigestionParams.cs @@ -0,0 +1,15 @@ +using Omics.Fragmentation; + +namespace Omics.Digestion +{ + public interface IDigestionParams + { + int MaxMissedCleavages { get; set; } + int MinLength { get; set; } + int MaxLength { get; set; } + int MaxModificationIsoforms { get; set; } + int MaxMods { get; set; } + DigestionAgent DigestionAgent { get; } + FragmentationTerminus FragmentationTerminus { get; } + } +} diff --git a/mzLib/Omics/Fragmentation/FragmentationTerminus.cs b/mzLib/Omics/Fragmentation/FragmentationTerminus.cs index ff0bf2a2b..146309caa 100644 --- a/mzLib/Omics/Fragmentation/FragmentationTerminus.cs +++ b/mzLib/Omics/Fragmentation/FragmentationTerminus.cs @@ -11,7 +11,9 @@ public enum FragmentationTerminus Both, //N- and C-terminus N, //N-terminus only C, //C-terminus only - None //used for internal fragments, could be used for top down intact mass? + None, //used for internal fragments, could be used for top down intact mass? + FivePrime, // 5' for NucleicAcids + ThreePrime, // 3' for NucleicAcids } } diff --git a/mzLib/Omics/Fragmentation/Oligo/DissociationTypeCollection.cs b/mzLib/Omics/Fragmentation/Oligo/DissociationTypeCollection.cs new file mode 100644 index 000000000..d5b020160 --- /dev/null +++ b/mzLib/Omics/Fragmentation/Oligo/DissociationTypeCollection.cs @@ -0,0 +1 @@ +using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; using Chemistry; using MassSpectrometry; namespace Omics.Fragmentation.Oligo { /// /// Methods dealing with specific product type for RNA molecules /// public static class DissociationTypeCollection { /// /// Product Ion types by dissociation method /// private static readonly Dictionary> ProductsFromDissociationType = new Dictionary>() { { DissociationType.Unknown, new List() }, { DissociationType.CID, new List { ProductType.aBaseLoss, ProductType.c, ProductType.dWaterLoss, ProductType.w, ProductType.y, ProductType.yWaterLoss, ProductType.M } }, { DissociationType.LowCID, new List() { } }, { DissociationType.IRMPD, new List() { } }, { DissociationType.ECD, new List { } }, { DissociationType.PQD, new List { ProductType.a, ProductType.aBaseLoss, ProductType.b, ProductType.c, ProductType.d, ProductType.dWaterLoss, ProductType.w, ProductType.x, ProductType.y, ProductType.yWaterLoss, ProductType.d, ProductType.M } }, { DissociationType.ETD, new List { } }, { DissociationType.HCD, new List { ProductType.w, ProductType.y, ProductType.aBaseLoss, ProductType.dWaterLoss, ProductType.M } }, { DissociationType.AnyActivationType, new List { } }, { DissociationType.EThcD, new List { } }, { DissociationType.Custom, new List { } }, { DissociationType.ISCID, new List { } } }; /// /// Returns list of products types based upon the dissociation type /// /// /// public static List GetRnaProductTypesFromDissociationType(this DissociationType dissociationType) => ProductsFromDissociationType[dissociationType]; /// /// Mass to be added or subtracted /// private static readonly Dictionary FragmentIonCaps = new Dictionary { { ProductType.a, ChemicalFormula.ParseFormula("H") }, { ProductType.aWaterLoss, ChemicalFormula.ParseFormula("H-1O-1") }, { ProductType.b, ChemicalFormula.ParseFormula("OH") }, { ProductType.bWaterLoss, ChemicalFormula.ParseFormula("H-1") }, { ProductType.c, ChemicalFormula.ParseFormula("O3H2P") }, { ProductType.cWaterLoss, ChemicalFormula.ParseFormula("O2P") }, { ProductType.d, ChemicalFormula.ParseFormula("O4H2P") }, { ProductType.dWaterLoss, ChemicalFormula.ParseFormula("O3P") }, { ProductType.w, ChemicalFormula.ParseFormula("H") }, { ProductType.wWaterLoss, ChemicalFormula.ParseFormula("H-1O-1") }, { ProductType.x, ChemicalFormula.ParseFormula("O-1H") }, { ProductType.xWaterLoss, ChemicalFormula.ParseFormula("O-2H-1") }, { ProductType.y, ChemicalFormula.ParseFormula("O-3P-1") }, { ProductType.yWaterLoss, ChemicalFormula.ParseFormula("O-4H-2P-1") }, { ProductType.z, ChemicalFormula.ParseFormula("O-4P-1") }, { ProductType.zWaterLoss, ChemicalFormula.ParseFormula("O-5H-2P-1") }, //fragment - Base chemical formula is the corresponding fragment chemical formula subtracing 1 H as H is lost when base is removed { ProductType.aBaseLoss, ChemicalFormula.ParseFormula("H-2") }, // "H-1" -H { ProductType.bBaseLoss, ChemicalFormula.ParseFormula("O1H-2") }, //"OH1" -H { ProductType.cBaseLoss, ChemicalFormula.ParseFormula("O3H-1P") }, //"O3P" -H { ProductType.dBaseLoss, ChemicalFormula.ParseFormula("O4H-1P") }, //"O4H2P" -H { ProductType.wBaseLoss, ChemicalFormula.ParseFormula("H-2") }, //"H"-H { ProductType.xBaseLoss, ChemicalFormula.ParseFormula("O-1H-2") }, //"O-1H" -H { ProductType.yBaseLoss, ChemicalFormula.ParseFormula("O-3H-2P-1") }, //"O-3P-1" -H { ProductType.zBaseLoss, ChemicalFormula.ParseFormula("O-4H-3P-1") }, //"O-4H-1P-1" -1 { ProductType.M, new ChemicalFormula() } }; /// /// Returns mass shift by product type /// /// /// public static double GetRnaMassShiftFromProductType(this ProductType type) => FragmentIonCaps[type].MonoisotopicMass; public static FragmentationTerminus GetRnaTerminusType(this ProductType fragmentType) { switch (fragmentType) { case ProductType.a: case ProductType.aWaterLoss: case ProductType.aBaseLoss: case ProductType.b: case ProductType.bWaterLoss: case ProductType.bBaseLoss: case ProductType.c: case ProductType.cWaterLoss: case ProductType.cBaseLoss: case ProductType.d: case ProductType.dWaterLoss: case ProductType.dBaseLoss: return FragmentationTerminus.FivePrime; case ProductType.w: case ProductType.wWaterLoss: case ProductType.wBaseLoss: case ProductType.x: case ProductType.xWaterLoss: case ProductType.xBaseLoss: case ProductType.y: case ProductType.yWaterLoss: case ProductType.yBaseLoss: case ProductType.z: case ProductType.zWaterLoss: case ProductType.zBaseLoss: return FragmentationTerminus.ThreePrime; case ProductType.M: return FragmentationTerminus.None; case ProductType.aStar: case ProductType.aDegree: case ProductType.bAmmoniaLoss: case ProductType.yAmmoniaLoss: case ProductType.zPlusOne: case ProductType.D: case ProductType.Ycore: case ProductType.Y: default: throw new ArgumentOutOfRangeException(nameof(fragmentType), fragmentType, null); } } /// /// Product ion types by Fragmentation Terminus /// private static readonly Dictionary> ProductIonTypesFromSpecifiedTerminus = new Dictionary> { { FragmentationTerminus.FivePrime, new List { ProductType.a, ProductType.aWaterLoss, ProductType.aBaseLoss, ProductType.b, ProductType.bWaterLoss, ProductType.bBaseLoss, ProductType.c, ProductType.cWaterLoss, ProductType.cBaseLoss, ProductType.d, ProductType.dWaterLoss, ProductType.dBaseLoss, } }, { FragmentationTerminus.ThreePrime, new List { ProductType.w, ProductType.wWaterLoss, ProductType.wBaseLoss, ProductType.x, ProductType.xWaterLoss, ProductType.xBaseLoss, ProductType.y, ProductType.yWaterLoss, ProductType.yBaseLoss, ProductType.z, ProductType.zWaterLoss, ProductType.zBaseLoss, } }, { FragmentationTerminus.Both, new List { ProductType.a, ProductType.aWaterLoss, ProductType.aBaseLoss, ProductType.b, ProductType.bWaterLoss, ProductType.bBaseLoss, ProductType.c, ProductType.cWaterLoss, ProductType.cBaseLoss, ProductType.d, ProductType.dWaterLoss, ProductType.dBaseLoss, ProductType.w, ProductType.wWaterLoss, ProductType.wBaseLoss, ProductType.x, ProductType.xWaterLoss, ProductType.xBaseLoss, ProductType.y, ProductType.yWaterLoss, ProductType.yBaseLoss, ProductType.z, ProductType.zWaterLoss, ProductType.zBaseLoss, ProductType.M } } }; public static List GetRnaTerminusSpecificProductTypes( this FragmentationTerminus fragmentationTerminus) { return ProductIonTypesFromSpecifiedTerminus[fragmentationTerminus]; } /// /// Returns all product ion types based upon specified terminus /// /// /// /// public static List GetRnaTerminusSpecificProductTypesFromDissociation( this DissociationType dissociationType, FragmentationTerminus fragmentationTerminus) { var terminusSpecific = fragmentationTerminus.GetRnaTerminusSpecificProductTypes(); var dissociationSpecific = dissociationType.GetRnaProductTypesFromDissociationType(); return terminusSpecific.Intersect(dissociationSpecific).ToList(); } } } \ No newline at end of file diff --git a/mzLib/Omics/Fragmentation/Peptide/DissociationTypeCollection.cs b/mzLib/Omics/Fragmentation/Peptide/DissociationTypeCollection.cs index d88706a0e..b1a79421d 100644 --- a/mzLib/Omics/Fragmentation/Peptide/DissociationTypeCollection.cs +++ b/mzLib/Omics/Fragmentation/Peptide/DissociationTypeCollection.cs @@ -29,7 +29,7 @@ public static List GetTerminusSpecificProductTypesFromDissociation( lock (TerminusSpecificProductTypesFromDissociation) { var productCollection = TerminusSpecificProductTypes.ProductIonTypesFromSpecifiedTerminus[fragmentationTerminus] - .Intersect(ProductsFromDissociationType[dissociationType]); + .Intersect(DissociationTypeCollection.ProductsFromDissociationType[dissociationType]); if (!TerminusSpecificProductTypesFromDissociation.TryGetValue((dissociationType, fragmentationTerminus), out productTypes)) { @@ -100,6 +100,27 @@ public static List GetWaterAndAmmoniaLossProductTypesFromDissociati { ProductType.D, null},// diagnostic ions are not shifted but added sumarily { ProductType.Ycore, null},// neutral Molecular product can be used with neutral loss as fragment { ProductType.Y, null},// diagnostic ions are not shifted but added sumarily + + // Rna Specific types are not shifted through this method, but added to not break the code. + // This is due to proteins and rna having shared ions (e.g. b,y) that have different mass shifts + // This behavior for rna is handled by DissociationTypeCollection.GetRnaMassShiftFromProductType + {ProductType.aWaterLoss , null}, + {ProductType.aBaseLoss , null}, + {ProductType.bBaseLoss , null}, + {ProductType.cWaterLoss , null}, + {ProductType.cBaseLoss , null}, + {ProductType.d , null}, + {ProductType.dWaterLoss , null}, + {ProductType.dBaseLoss , null}, + {ProductType.w , null}, + {ProductType.wWaterLoss , null}, + {ProductType.wBaseLoss , null}, + {ProductType.xWaterLoss , null}, + {ProductType.xBaseLoss , null}, + {ProductType.yBaseLoss , null}, + {ProductType.z , null}, + {ProductType.zWaterLoss , null}, + {ProductType.zBaseLoss , null}, }; private static Dictionary DissociationTypeToTerminusMassShift = new Dictionary(); @@ -154,6 +175,29 @@ public static double GetMassShiftFromProductType(ProductType productType) case ProductType.D: NeutralMassShiftFromProductType[productType] = 0; break;// no change case ProductType.Ycore: NeutralMassShiftFromProductType[productType] = 0; break;// no change case ProductType.Y: NeutralMassShiftFromProductType[productType] = 0; break;// no change + + // Nucleic Acid Specific Product Types + case ProductType.aWaterLoss: + case ProductType.aBaseLoss: + case ProductType.bBaseLoss: + case ProductType.cWaterLoss: + case ProductType.cBaseLoss: + case ProductType.d: + case ProductType.dWaterLoss: + case ProductType.dBaseLoss: + case ProductType.w: + case ProductType.wWaterLoss: + case ProductType.wBaseLoss: + case ProductType.xWaterLoss: + case ProductType.xBaseLoss: + case ProductType.yBaseLoss: + case ProductType.z: + case ProductType.zWaterLoss: + case ProductType.zBaseLoss: + return 0.0; + + default: + throw new ArgumentOutOfRangeException(nameof(productType), productType, null); } } diff --git a/mzLib/Omics/Fragmentation/Product.cs b/mzLib/Omics/Fragmentation/Product.cs index 3c28ca2c9..928919c09 100644 --- a/mzLib/Omics/Fragmentation/Product.cs +++ b/mzLib/Omics/Fragmentation/Product.cs @@ -3,7 +3,7 @@ namespace Omics.Fragmentation { - public class Product : IHasMass + public class Product : IHasMass, IEquatable { public double NeutralMass { get; } public ProductType ProductType { get; } @@ -89,14 +89,18 @@ public override bool Equals(object obj) return obj is Product other && Equals(other); } - public bool Equals(Product product) + public bool Equals(Product? product) { - return this.ProductType.Equals(product.ProductType) - && this.NeutralMass.Equals(product.NeutralMass) - && this.FragmentNumber == product.FragmentNumber - && this.NeutralLoss.Equals(product.NeutralLoss) - && this.SecondaryFragmentNumber == product.SecondaryFragmentNumber - && this.SecondaryProductType == product.SecondaryProductType; + return product != null + && NeutralMass.Equals(product.NeutralMass) + && ProductType == product.ProductType + && NeutralLoss.Equals(product.NeutralLoss) + && Terminus == product.Terminus + && FragmentNumber == product.FragmentNumber + && ResiduePosition == product.ResiduePosition + && SecondaryProductType == product.SecondaryProductType + && SecondaryFragmentNumber == product.SecondaryFragmentNumber + && MonoisotopicMass.Equals(product.MonoisotopicMass); } public override int GetHashCode() diff --git a/mzLib/Omics/Fragmentation/ProductType.cs b/mzLib/Omics/Fragmentation/ProductType.cs index 5427e19c3..f4538dc36 100644 --- a/mzLib/Omics/Fragmentation/ProductType.cs +++ b/mzLib/Omics/Fragmentation/ProductType.cs @@ -19,23 +19,47 @@ public enum ProductType //y° y-H2O //z [C]+[M]-NH2 + // Base ions are for Nucleic acids in which the base is cleaved as a neutral loss during fragmentation + // schematic for RNA fragmentation modes can be found below + // https://www.researchgate.net/figure/The-standard-nomenclature-for-oligonucleotide-fragmentation-during-collisioninduced_fig6_271536997 + // Base loss ions are for Nucleic acids in which the base is cleaved as a neutral loss during fragmentation + // These base losses have only been explicetly confirmed for 3' fragments (a,b,c,d) + // The base loss ions for 5' fragments (w,x,y,z) are theoretical and have not been confirmed + a, aStar, aDegree, + aWaterLoss, + aBaseLoss, b, bAmmoniaLoss, bWaterLoss, //BnoB1ions, + bBaseLoss, c, + cWaterLoss, + cBaseLoss, + d, + dWaterLoss, + dBaseLoss, + w, + wWaterLoss, + wBaseLoss, x, + xWaterLoss, + xBaseLoss, y, yAmmoniaLoss, yWaterLoss, - zPlusOne,//This is zDot plus H + yBaseLoss, + z, + zPlusOne, //This is zDot plus H zDot, - M, //this is the molecular ion // [M] - D, //this is a diagnostic ion // Modification loss mass - Ycore, //Glyco core Y ions // [pep] + Neutral core Glycan mass (such as: [pep] + [N]) //Which already consider the loss of H2O and H-transfer - Y //Glyco Y ions // [pep] + other Glycan mass + zWaterLoss, + zBaseLoss, + M, //this is the molecular ion // [M] + D, //this is a diagnostic ion // Modification loss mass + Ycore, //Glyco core Y ions // [pep] + Neutral core Glycan mass (such as: [pep] + [N]) //Which already consider the loss of H2O and H-transfer + Y //Glyco Y ions // [pep] + other Glycan mass } } diff --git a/mzLib/Omics/IBioPolymer.cs b/mzLib/Omics/IBioPolymer.cs new file mode 100644 index 000000000..e7506e046 --- /dev/null +++ b/mzLib/Omics/IBioPolymer.cs @@ -0,0 +1,28 @@ +using Chemistry; +using MassSpectrometry; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using Omics.Digestion; +using Omics.Modifications; + +namespace Omics +{ + public interface IBioPolymer + { + string Name { get; } + string BaseSequence { get; } + int Length { get; } + string DatabaseFilePath { get; } + bool IsDecoy { get; } + bool IsContaminant { get; } + string Organism { get; } + string Accession { get; } + IDictionary> OneBasedPossibleLocalizedModifications { get; } + char this[int zeroBasedIndex] => BaseSequence[zeroBasedIndex]; + + IEnumerable Digest(IDigestionParams digestionParams, List allKnownFixedModifications, + List variableModifications, List silacLabels = null, (SilacLabel startLabel, SilacLabel endLabel)? turnoverLabels = null, bool topDownTruncationSearch = false); + } +} diff --git a/mzLib/Omics/IBioPolymerWithSetMods.cs b/mzLib/Omics/IBioPolymerWithSetMods.cs new file mode 100644 index 000000000..5ff39769e --- /dev/null +++ b/mzLib/Omics/IBioPolymerWithSetMods.cs @@ -0,0 +1,66 @@ +using System.Text; +using Chemistry; +using MassSpectrometry; +using Omics.Digestion; +using Omics.Fragmentation; +using Omics.Modifications; + +namespace Omics +{ + /// + /// Interface for modified and unmodified precursor ions + /// + /// + /// Proteins -> PeptideWithSetModifications : ProteolyticPeptide + /// Nucleic Acids -> OligoWithSetMods : NucleolyticOligo + /// + public interface IBioPolymerWithSetMods : IHasChemicalFormula + { + string BaseSequence { get; } + string FullSequence { get; } + double MostAbundantMonoisotopicMass { get; } + string SequenceWithChemicalFormulas { get; } + int OneBasedStartResidue { get; } + int OneBasedEndResidue { get; } + int MissedCleavages { get; } + CleavageSpecificity CleavageSpecificityForFdrCategory { get; set; } + char PreviousResidue { get; } + char NextResidue { get; } + IDigestionParams DigestionParams { get; } + Dictionary AllModsOneIsNterminus { get; } + int NumMods { get; } + int NumFixedMods { get; } + int NumVariableMods { get; } + int Length { get; } + char this[int zeroBasedIndex] => BaseSequence[zeroBasedIndex]; + IBioPolymer Parent { get; } + + public void Fragment(DissociationType dissociationType, FragmentationTerminus fragmentationTerminus, + List products); + + public void FragmentInternally(DissociationType dissociationType, int minLengthOfFragments, + List products); + + public static string GetBaseSequenceFromFullSequence(string fullSequence) + { + StringBuilder sb = new StringBuilder(); + int bracketCount = 0; + foreach (char c in fullSequence) + { + if (c == '[') + { + bracketCount++; + } + else if (c == ']') + { + bracketCount--; + } + else if (bracketCount == 0) + { + sb.Append(c); + } + } + return sb.ToString(); + } + } +} diff --git a/mzLib/Proteomics/Modifications/Modification.cs b/mzLib/Omics/Modifications/Modification.cs similarity index 96% rename from mzLib/Proteomics/Modifications/Modification.cs rename to mzLib/Omics/Modifications/Modification.cs index 153d94b0b..5b2beaa81 100644 --- a/mzLib/Proteomics/Modifications/Modification.cs +++ b/mzLib/Omics/Modifications/Modification.cs @@ -1,13 +1,18 @@ using Chemistry; using MassSpectrometry; +using Omics.Modifications; using System; using System.Collections.Generic; using System.Globalization; using System.Linq; using System.Text; -namespace Proteomics +namespace Omics.Modifications { + /// + /// Represents a modification + /// Mods.txt format was taken from https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/docs/ptmlist.txt + /// public class Modification { public string IdWithMotif { get; private set; } @@ -109,24 +114,20 @@ public static string ModLocationOnPeptideOrProtein(string _locationRestriction) switch (_locationRestriction) { case "N-terminal.": - return _locationRestriction; - case "C-terminal.": - return _locationRestriction; - case "Peptide N-terminal.": - return _locationRestriction; - case "Peptide C-terminal.": - return _locationRestriction; - case "Anywhere.": + case "3'-terminal.": + case "5'-terminal.": + case "Oligo 3'-terminal.": + case "Oligo 5'-terminal.": return _locationRestriction; default: return "Unassigned."; } - } + } public override bool Equals(object o) { @@ -298,7 +299,5 @@ public string ModificationErrorsToString() //reports errors in required fields. return sb.ToString(); } - - } } \ No newline at end of file diff --git a/mzLib/Omics/Modifications/ModificationLocalization.cs b/mzLib/Omics/Modifications/ModificationLocalization.cs new file mode 100644 index 000000000..bbf25d1a3 --- /dev/null +++ b/mzLib/Omics/Modifications/ModificationLocalization.cs @@ -0,0 +1,66 @@ +namespace Omics.Modifications +{ + public static class ModificationLocalization + { + public static bool ModFits(Modification attemptToLocalize, string sequence, int digestionProductOneBasedIndex, int digestionProductLength, int bioPolymerOneBasedIndex) + { + // First find the capital letter... + var motif = attemptToLocalize.Target; + var motifStartLocation = motif.ToString().IndexOf(motif.ToString().First(b => char.IsUpper(b))); + + // Look up starting at and including the capital letter + var proteinToMotifOffset = bioPolymerOneBasedIndex - motifStartLocation - 1; + var indexUp = 0; + while (indexUp < motif.ToString().Length) + { + if (indexUp + proteinToMotifOffset < 0 || indexUp + proteinToMotifOffset >= sequence.Length + || !MotifMatches(motif.ToString()[indexUp], sequence[indexUp + proteinToMotifOffset])) + { + return false; + } + indexUp++; + } + switch (attemptToLocalize.LocationRestriction) + { + case "N-terminal." when bioPolymerOneBasedIndex > 2: + case "Peptide N-terminal." when digestionProductOneBasedIndex > 1: + case "C-terminal." when bioPolymerOneBasedIndex < sequence.Length: + case "Peptide C-terminal." when digestionProductOneBasedIndex < digestionProductLength: + case "5'-terminal." when bioPolymerOneBasedIndex > 2: + // first residue in oligo but not first in nucleic acid + case "Oligo 5'-terminal." when digestionProductOneBasedIndex > 1 + || bioPolymerOneBasedIndex == 1: + case "3'-terminal." when bioPolymerOneBasedIndex < sequence.Length: + // not the last residue in oligo but not in nucleic acid + case "Oligo 3'-terminal." when digestionProductOneBasedIndex < digestionProductLength + || bioPolymerOneBasedIndex == sequence.Length: + return false; + + default: + // I guess Anywhere. and Unassigned. are true since how do you localize anywhere or unassigned. + + return true; + } + } + + public static bool UniprotModExists(IBioPolymer bioPolymer, int i, Modification attemptToLocalize) + { + // uniprot mods with same mass takes precedence over variable mods + if (bioPolymer.OneBasedPossibleLocalizedModifications.TryGetValue(i, out List modsAtThisLocation)) { + return modsAtThisLocation.Any(p => Math.Abs((double)(p.MonoisotopicMass - attemptToLocalize.MonoisotopicMass)) < 0.001 && p.ModificationType == "UniProt"); + } + + return false; + } + + private static bool MotifMatches(char motifChar, char sequenceChar) + { + char upperMotifChar = char.ToUpper(motifChar); + return upperMotifChar.Equals('X') + || upperMotifChar.Equals(sequenceChar) + || upperMotifChar.Equals('B') && new[] { 'D', 'N' }.Contains(sequenceChar) + || upperMotifChar.Equals('J') && new[] { 'I', 'L' }.Contains(sequenceChar) + || upperMotifChar.Equals('Z') && new[] { 'E', 'Q' }.Contains(sequenceChar); + } + } +} \ No newline at end of file diff --git a/mzLib/Proteomics/Modifications/ModificationMotif.cs b/mzLib/Omics/Modifications/ModificationMotif.cs similarity index 94% rename from mzLib/Proteomics/Modifications/ModificationMotif.cs rename to mzLib/Omics/Modifications/ModificationMotif.cs index 45d990bec..4e0833dda 100644 --- a/mzLib/Proteomics/Modifications/ModificationMotif.cs +++ b/mzLib/Omics/Modifications/ModificationMotif.cs @@ -1,7 +1,6 @@ -using System.Linq; -using System.Text.RegularExpressions; +using System.Text.RegularExpressions; -namespace Proteomics +namespace Omics.Modifications { public class ModificationMotif { diff --git a/mzLib/Proteomics/Modifications/SilacLabel.cs b/mzLib/Omics/Modifications/SilacLabel.cs similarity index 94% rename from mzLib/Proteomics/Modifications/SilacLabel.cs rename to mzLib/Omics/Modifications/SilacLabel.cs index 7a0660203..415a46135 100644 --- a/mzLib/Proteomics/Modifications/SilacLabel.cs +++ b/mzLib/Omics/Modifications/SilacLabel.cs @@ -1,9 +1,6 @@ -using System; -using System.Collections.Generic; -using System.Globalization; -using System.Text; +using System.Globalization; -namespace Proteomics +namespace Omics.Modifications { /// /// Silac labels used to modify unlabeled proteins diff --git a/mzLib/Omics/SpectrumMatch/LibrarySpectrum.cs b/mzLib/Omics/SpectrumMatch/LibrarySpectrum.cs index 7173ec999..df3d79eec 100644 --- a/mzLib/Omics/SpectrumMatch/LibrarySpectrum.cs +++ b/mzLib/Omics/SpectrumMatch/LibrarySpectrum.cs @@ -20,7 +20,8 @@ public string Name get { return Sequence + "/" + ChargeState; } } - public LibrarySpectrum(string sequence, double precursorMz, int chargeState, List peaks, double? rt, bool isDecoy = false) : base(peaks.Select(p => p.Mz).ToArray(), peaks.Select(p => p.Intensity).ToArray(), false) + public LibrarySpectrum(string sequence, double precursorMz, int chargeState, List peaks, double? rt, bool isDecoy = false) + : base(peaks.Select(p => p.Mz).ToArray(), peaks.Select(p => p.Intensity).ToArray(), false) { Sequence = sequence; PrecursorMz = precursorMz; diff --git a/mzLib/Proteomics/Modifications/ModificationLocalization.cs b/mzLib/Proteomics/Modifications/ModificationLocalization.cs deleted file mode 100644 index b12c0e19c..000000000 --- a/mzLib/Proteomics/Modifications/ModificationLocalization.cs +++ /dev/null @@ -1,69 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; - -namespace Proteomics -{ - public static class ModificationLocalization - { - public static bool ModFits(Modification attemptToLocalize, string proteinSequence, int peptideOneBasedIndex, int peptideLength, int proteinOneBasedIndex) - { - // First find the capital letter... - var motif = attemptToLocalize.Target; - var motifStartLocation = motif.ToString().IndexOf(motif.ToString().First(b => char.IsUpper(b))); - - // Look up starting at and including the capital letter - var proteinToMotifOffset = proteinOneBasedIndex - motifStartLocation - 1; - var indexUp = 0; - while (indexUp < motif.ToString().Length) - { - if (indexUp + proteinToMotifOffset < 0 || indexUp + proteinToMotifOffset >= proteinSequence.Length - || !MotifMatches(motif.ToString()[indexUp], proteinSequence[indexUp + proteinToMotifOffset])) - { - return false; - } - indexUp++; - } - if (attemptToLocalize.LocationRestriction == "N-terminal." && proteinOneBasedIndex > 2) - { - return false; - } - if (attemptToLocalize.LocationRestriction == "Peptide N-terminal." && peptideOneBasedIndex > 1) - { - return false; - } - if (attemptToLocalize.LocationRestriction == "C-terminal." && proteinOneBasedIndex < proteinSequence.Length) - { - return false; - } - if (attemptToLocalize.LocationRestriction == "Peptide C-terminal." && peptideOneBasedIndex < peptideLength) - { - return false; - } - - // I guess Anywhere. and Unassigned. are true since how do you localize anywhere or unassigned. - - return true; - } - - public static bool UniprotModExists(Protein protein, int i, Modification attemptToLocalize) - { - // uniprot mods with same mass takes precedence over variable mods - if (protein.OneBasedPossibleLocalizedModifications.TryGetValue(i, out List modsAtThisLocation)) { - return modsAtThisLocation.Any(p => Math.Abs((double)(p.MonoisotopicMass - attemptToLocalize.MonoisotopicMass)) < 0.001 && p.ModificationType == "UniProt"); - } - - return false; - } - - private static bool MotifMatches(char motifChar, char sequenceChar) - { - char upperMotifChar = char.ToUpper(motifChar); - return upperMotifChar.Equals('X') - || upperMotifChar.Equals(sequenceChar) - || upperMotifChar.Equals('B') && new[] { 'D', 'N' }.Contains(sequenceChar) - || upperMotifChar.Equals('J') && new[] { 'I', 'L' }.Contains(sequenceChar) - || upperMotifChar.Equals('Z') && new[] { 'E', 'Q' }.Contains(sequenceChar); - } - } -} \ No newline at end of file diff --git a/mzLib/Proteomics/Protein/Protein.cs b/mzLib/Proteomics/Protein/Protein.cs index b16050ec3..86e51b54f 100644 --- a/mzLib/Proteomics/Protein/Protein.cs +++ b/mzLib/Proteomics/Protein/Protein.cs @@ -3,11 +3,14 @@ using System.Collections.Generic; using System.Linq; using System.Text.RegularExpressions; +using Omics; +using Omics.Digestion; using Omics.Fragmentation; +using Omics.Modifications; namespace Proteomics { - public class Protein + public class Protein : IBioPolymer { private List _proteolysisProducts; @@ -185,34 +188,15 @@ public IEnumerable ProteolysisProducts public double Probability { get; set; } // for protein pep project - public int Length - { - get - { - return BaseSequence.Length; - } - } + public int Length => BaseSequence.Length; - public string FullDescription - { - get - { - return Accession + "|" + Name + "|" + FullName; - } - } + public string FullDescription => Accession + "|" + Name + "|" + FullName; public string Name { get; } public string FullName { get; } public bool IsContaminant { get; } internal IDictionary> OriginalNonVariantModifications { get; set; } - - public char this[int zeroBasedIndex] - { - get - { - return BaseSequence[zeroBasedIndex]; - } - } + public char this[int zeroBasedIndex] => BaseSequence[zeroBasedIndex]; /// /// Formats a string for a UniProt fasta header. See https://www.uniprot.org/help/fasta-headers. @@ -235,32 +219,51 @@ public string GetEnsemblFastaHeader() /// /// Gets peptides for digestion of a protein + /// Legacy + /// + public IEnumerable Digest(DigestionParams digestionParams, + List allKnownFixedModifications, List variableModifications, + List silacLabels = null, (SilacLabel startLabel, SilacLabel endLabel)? turnoverLabels = null, + bool topDownTruncationSearch = false) => + Digest((IDigestionParams)digestionParams, allKnownFixedModifications, variableModifications, silacLabels, turnoverLabels, topDownTruncationSearch) + .Cast(); + + /// + /// Gets peptides for digestion of a protein + /// Implemented with interfaces to allow for use of both Proteomics and Omics classes /// - public IEnumerable Digest(DigestionParams digestionParams, List allKnownFixedModifications, + public IEnumerable Digest(IDigestionParams digestionParams, List allKnownFixedModifications, List variableModifications, List silacLabels = null, (SilacLabel startLabel, SilacLabel endLabel)? turnoverLabels = null, bool topDownTruncationSearch = false) { + + if (digestionParams is not DigestionParams digestionParameters) + throw new ArgumentException( + "DigestionParameters must be of type DigestionParams for protein digestion"); + + //can't be null allKnownFixedModifications = allKnownFixedModifications ?? new List(); // add in any modifications that are caused by protease digestion - if (digestionParams.Protease.CleavageMod != null && !allKnownFixedModifications.Contains(digestionParams.Protease.CleavageMod)) + if (digestionParameters.Protease.CleavageMod != null && !allKnownFixedModifications.Contains(digestionParameters.Protease.CleavageMod)) { - allKnownFixedModifications.Add(digestionParams.Protease.CleavageMod); + allKnownFixedModifications.Add(digestionParameters.Protease.CleavageMod); } variableModifications = variableModifications ?? new List(); - CleavageSpecificity searchModeType = digestionParams.SearchModeType; + CleavageSpecificity searchModeType = digestionParameters.SearchModeType; - ProteinDigestion digestion = new(digestionParams, allKnownFixedModifications, variableModifications); + ProteinDigestion digestion = new(digestionParameters, allKnownFixedModifications, variableModifications); IEnumerable unmodifiedPeptides = searchModeType == CleavageSpecificity.Semi ? digestion.SpeedySemiSpecificDigestion(this) : digestion.Digestion(this, topDownTruncationSearch); - if (digestionParams.KeepNGlycopeptide || digestionParams.KeepOGlycopeptide) + if (digestionParameters.KeepNGlycopeptide || digestionParameters.KeepOGlycopeptide) { - unmodifiedPeptides = GetGlycoPeptides(unmodifiedPeptides, digestionParams.KeepNGlycopeptide, digestionParams.KeepOGlycopeptide); + unmodifiedPeptides = GetGlycoPeptides(unmodifiedPeptides, digestionParameters.KeepNGlycopeptide, digestionParameters.KeepOGlycopeptide); } - IEnumerable modifiedPeptides = unmodifiedPeptides.SelectMany(peptide => peptide.GetModifiedPeptides(allKnownFixedModifications, digestionParams, variableModifications)); + IEnumerable modifiedPeptides = unmodifiedPeptides.SelectMany(peptide => + peptide.GetModifiedPeptides(allKnownFixedModifications, digestionParameters, variableModifications)); //Remove terminal modifications (if needed) if (searchModeType == CleavageSpecificity.SingleN || @@ -273,7 +276,7 @@ public IEnumerable Digest(DigestionParams digestion //add silac labels (if needed) if (silacLabels != null) { - return GetSilacPeptides(modifiedPeptides, silacLabels, digestionParams.GeneratehUnlabeledProteinsForSilac, turnoverLabels); + return GetSilacPeptides(modifiedPeptides, silacLabels, digestionParameters.GeneratehUnlabeledProteinsForSilac, turnoverLabels); } return modifiedPeptides; diff --git a/mzLib/Proteomics/Protein/ProteoformLevelClassifier.cs b/mzLib/Proteomics/Protein/ProteoformLevelClassifier.cs index 32bec5161..2cdb0b20f 100644 --- a/mzLib/Proteomics/Protein/ProteoformLevelClassifier.cs +++ b/mzLib/Proteomics/Protein/ProteoformLevelClassifier.cs @@ -1,4 +1,5 @@ -using Proteomics.ProteolyticDigestion; +using Omics; +using Proteomics.ProteolyticDigestion; using System; using System.Collections.Generic; using System.Linq; @@ -24,7 +25,7 @@ public static string ClassifyPrSM(string fullSequenceString, string geneString) //determine sequence ambiguity - string firstBaseSequence = PeptideWithSetModifications.GetBaseSequenceFromFullSequence(sequences[0]).ToUpper(); //get first sequence with modifications removed + string firstBaseSequence = IBioPolymerWithSetMods.GetBaseSequenceFromFullSequence(sequences[0]).ToUpper(); //get first sequence with modifications removed bool sequenceIdentified = !SequenceContainsUnknownAminoAcids(firstBaseSequence); //check if there are any ambiguous amino acids (i.e. B, J, X, Z) //for every other sequence reported if (sequenceIdentified) //if there weren't any unknown amino acids reported. @@ -32,7 +33,7 @@ public static string ClassifyPrSM(string fullSequenceString, string geneString) for (int i = 1; i < sequences.Length; i++) { //if the unmodified sequences don't match, then there's sequence ambiguity - if (!firstBaseSequence.Equals(PeptideWithSetModifications.GetBaseSequenceFromFullSequence(sequences[i]).ToUpper())) + if (!firstBaseSequence.Equals(IBioPolymerWithSetMods.GetBaseSequenceFromFullSequence(sequences[i]).ToUpper())) { sequenceIdentified = false; break; diff --git a/mzLib/Proteomics/Protein/SequenceVariation.cs b/mzLib/Proteomics/Protein/SequenceVariation.cs index 34279711b..bd52854f3 100644 --- a/mzLib/Proteomics/Protein/SequenceVariation.cs +++ b/mzLib/Proteomics/Protein/SequenceVariation.cs @@ -1,5 +1,6 @@ using System.Collections.Generic; using System.Linq; +using Omics.Modifications; namespace Proteomics { diff --git a/mzLib/Proteomics/Protein/VariantApplication.cs b/mzLib/Proteomics/Protein/VariantApplication.cs index c65accb71..760f2d70a 100644 --- a/mzLib/Proteomics/Protein/VariantApplication.cs +++ b/mzLib/Proteomics/Protein/VariantApplication.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using System.Linq; +using Omics.Modifications; namespace Proteomics { diff --git a/mzLib/Proteomics/ProteolyticDigestion/DigestionParams.cs b/mzLib/Proteomics/ProteolyticDigestion/DigestionParams.cs index 2fbbf50fc..a63a2f541 100644 --- a/mzLib/Proteomics/ProteolyticDigestion/DigestionParams.cs +++ b/mzLib/Proteomics/ProteolyticDigestion/DigestionParams.cs @@ -1,9 +1,11 @@ using System; +using Omics.Digestion; using Omics.Fragmentation; +using Proteomics.ProteolyticDigestion; -namespace Proteomics.ProteolyticDigestion +namespace Proteomics.ProteolyticDigestion { - public class DigestionParams + public class DigestionParams : IDigestionParams { // this parameterless constructor needs to exist to read the toml. // if you can figure out a way to get rid of it, feel free... @@ -18,11 +20,11 @@ public DigestionParams(string protease = "trypsin", int maxMissedCleavages = 2, { Protease = ProteaseDictionary.Dictionary[protease]; MaxMissedCleavages = maxMissedCleavages; - MinPeptideLength = minPeptideLength; - MaxPeptideLength = maxPeptideLength; + MinLength = minPeptideLength; + MaxLength = maxPeptideLength; + MaxMods = maxModsForPeptides; MaxModificationIsoforms = maxModificationIsoforms; InitiatorMethionineBehavior = initiatorMethionineBehavior; - MaxModsForPeptide = maxModsForPeptides; SearchModeType = searchModeType; FragmentationTerminus = fragmentationTerminus; RecordSpecificProtease(); @@ -31,13 +33,14 @@ public DigestionParams(string protease = "trypsin", int maxMissedCleavages = 2, KeepOGlycopeptide = keepOGlycopeptide; } - public int MaxMissedCleavages { get; private set; } public InitiatorMethionineBehavior InitiatorMethionineBehavior { get; private set; } - public int MinPeptideLength { get; private set; } - public int MaxPeptideLength { get; private set; } - public int MaxModificationIsoforms { get; private set; } - public int MaxModsForPeptide { get; private set; } - public Protease Protease { get; private set; } + public int MaxMissedCleavages { get; set; } + public int MaxModificationIsoforms { get; set; } + public int MinLength { get; set; } + public int MaxLength { get; set; } + public int MaxMods { get; set; } + public DigestionAgent DigestionAgent => Protease; + public CleavageSpecificity SearchModeType { get; private set; } //for fast semi and nonspecific searching of proteases public FragmentationTerminus FragmentationTerminus { get; private set; } //for fast semi searching of proteases public Protease SpecificProtease { get; private set; } //for fast semi and nonspecific searching of proteases @@ -45,21 +48,30 @@ public DigestionParams(string protease = "trypsin", int maxMissedCleavages = 2, public bool KeepNGlycopeptide { get; private set; } public bool KeepOGlycopeptide { get; private set; } + #region Properties overridden by more generic interface + + public Protease Protease { get; private set; } + public int MinPeptideLength => MinLength; + public int MaxPeptideLength => MaxLength; + public int MaxModsForPeptide => MaxMods; + + #endregion + public override bool Equals(object obj) { return obj is DigestionParams a - && MaxMissedCleavages.Equals(a.MaxMissedCleavages) - && MinPeptideLength.Equals(a.MinPeptideLength) - && MaxPeptideLength.Equals(a.MaxPeptideLength) - && InitiatorMethionineBehavior.Equals(a.InitiatorMethionineBehavior) - && MaxModificationIsoforms.Equals(a.MaxModificationIsoforms) - && MaxModsForPeptide.Equals(a.MaxModsForPeptide) - && Protease.Equals(a.Protease) - && SearchModeType.Equals(a.SearchModeType) - && FragmentationTerminus.Equals(a.FragmentationTerminus) - && GeneratehUnlabeledProteinsForSilac.Equals(a.GeneratehUnlabeledProteinsForSilac) - && KeepNGlycopeptide.Equals(a.KeepNGlycopeptide) - && KeepOGlycopeptide.Equals(a.KeepOGlycopeptide); + && MaxMissedCleavages.Equals(a.MaxMissedCleavages) + && MinLength.Equals(a.MinLength) + && MaxLength.Equals(a.MaxLength) + && InitiatorMethionineBehavior.Equals(a.InitiatorMethionineBehavior) + && MaxModificationIsoforms.Equals(a.MaxModificationIsoforms) + && MaxMods.Equals(a.MaxMods) + && Protease.Equals(a.Protease) + && SearchModeType.Equals(a.SearchModeType) + && FragmentationTerminus.Equals(a.FragmentationTerminus) + && GeneratehUnlabeledProteinsForSilac.Equals(a.GeneratehUnlabeledProteinsForSilac) + && KeepNGlycopeptide.Equals(a.KeepNGlycopeptide) + && KeepOGlycopeptide.Equals(a.KeepOGlycopeptide); } public override int GetHashCode() @@ -68,14 +80,14 @@ public override int GetHashCode() MaxMissedCleavages.GetHashCode() ^ InitiatorMethionineBehavior.GetHashCode() ^ MaxModificationIsoforms.GetHashCode() - ^ MaxModsForPeptide.GetHashCode(); + ^ MaxMods.GetHashCode(); } public override string ToString() { - return MaxMissedCleavages + "," + InitiatorMethionineBehavior + "," + MinPeptideLength + "," + MaxPeptideLength + "," - + MaxModificationIsoforms + "," + MaxModsForPeptide + "," + SpecificProtease.Name + "," + SearchModeType + "," + FragmentationTerminus + "," - + GeneratehUnlabeledProteinsForSilac + "," + KeepNGlycopeptide + "," + KeepOGlycopeptide; + return MaxMissedCleavages + "," + InitiatorMethionineBehavior + "," + MinLength + "," + MaxLength + "," + + MaxModificationIsoforms + "," + MaxMods + "," + SpecificProtease.Name + "," + SearchModeType + "," + FragmentationTerminus + "," + + GeneratehUnlabeledProteinsForSilac + "," + KeepNGlycopeptide + "," + KeepOGlycopeptide; } private void RecordSpecificProtease() diff --git a/mzLib/Proteomics/ProteolyticDigestion/PeptideWithSetModifications.cs b/mzLib/Proteomics/ProteolyticDigestion/PeptideWithSetModifications.cs index e6d4381c8..1c02827cb 100644 --- a/mzLib/Proteomics/ProteolyticDigestion/PeptideWithSetModifications.cs +++ b/mzLib/Proteomics/ProteolyticDigestion/PeptideWithSetModifications.cs @@ -5,16 +5,19 @@ using System.Collections.Generic; using System.Linq; using System.Text; +using Omics; +using Omics.Digestion; using Omics.Fragmentation; using Omics.Fragmentation.Peptide; +using Omics.Modifications; namespace Proteomics.ProteolyticDigestion { [Serializable] - public class PeptideWithSetModifications : ProteolyticPeptide + public class PeptideWithSetModifications : ProteolyticPeptide, IBioPolymerWithSetMods { public string FullSequence { get; private set; } //sequence with modifications - public readonly int NumFixedMods; + public int NumFixedMods { get; } // Parameter to store a hash code corresponding to a Decoy or a Target peptide // If the peptide in question is a decoy, this pairs it to the target it was generated from // If the peptide in question is a target, this pairs it to its corresponding decoy @@ -35,15 +38,15 @@ public class PeptideWithSetModifications : ProteolyticPeptide /// /// Creates a PeptideWithSetModifications object from a protein. Used when a Protein is digested. /// - public PeptideWithSetModifications(Protein protein, DigestionParams digestionParams, int oneBasedStartResidueInProtein, + public PeptideWithSetModifications(Protein protein, IDigestionParams digestionParams, int oneBasedStartResidueInProtein, int oneBasedEndResidueInProtein, CleavageSpecificity cleavageSpecificity, string peptideDescription, int missedCleavages, Dictionary allModsOneIsNterminus, int numFixedMods, string baseSequence = null, int? pairedTargetDecoyHash = null) : base(protein, oneBasedStartResidueInProtein, oneBasedEndResidueInProtein, missedCleavages, cleavageSpecificity, peptideDescription, baseSequence) { _allModsOneIsNterminus = allModsOneIsNterminus; NumFixedMods = numFixedMods; - _digestionParams = digestionParams; - DetermineFullSequence(); + _digestionParams = digestionParams as DigestionParams; + FullSequence = this.DetermineFullSequence(); ProteinAccession = protein.Accession; UpdateCleavageSpecificity(); PairedTargetDecoyHash = pairedTargetDecoyHash; // Added PairedTargetDecoyHash as a nullable integer @@ -54,7 +57,7 @@ public PeptideWithSetModifications(Protein protein, DigestionParams digestionPar /// Useful for reading in MetaMorpheus search engine output into mzLib objects. /// public PeptideWithSetModifications(string sequence, Dictionary allKnownMods, int numFixedMods = 0, - DigestionParams digestionParams = null, Protein p = null, int oneBasedStartResidueInProtein = int.MinValue, + IDigestionParams digestionParams = null, Protein p = null, int oneBasedStartResidueInProtein = int.MinValue, int oneBasedEndResidueInProtein = int.MinValue, int missedCleavages = int.MinValue, CleavageSpecificity cleavageSpecificity = CleavageSpecificity.Full, string peptideDescription = null, int? pairedTargetDecoyHash = null) : base(p, oneBasedStartResidueInProtein, oneBasedEndResidueInProtein, missedCleavages, cleavageSpecificity, peptideDescription) @@ -65,10 +68,10 @@ public PeptideWithSetModifications(string sequence, Dictionary _digestionParams; - public Dictionary AllModsOneIsNterminus - { - get { return _allModsOneIsNterminus; } - } + public Dictionary AllModsOneIsNterminus => _allModsOneIsNterminus; - public int NumMods - { - get { return AllModsOneIsNterminus.Count; } - } + public int NumMods => AllModsOneIsNterminus.Count; - public int NumVariableMods - { - get { return NumMods - NumFixedMods; } - } + public int NumVariableMods => NumMods - NumFixedMods; public double MonoisotopicMass { @@ -117,7 +108,8 @@ public double MonoisotopicMass } } - + + public ChemicalFormula ThisChemicalFormula => FullChemicalFormula; public ChemicalFormula FullChemicalFormula { get @@ -149,7 +141,9 @@ public double MostAbundantMonoisotopicMass { IsotopicDistribution dist = IsotopicDistribution.GetDistribution(this.FullChemicalFormula); double maxIntensity = dist.Intensities.Max(); - _mostAbundantMonoisotopicMass = (double)ClassExtensions.RoundedDouble(dist.Masses.ToList()[dist.Intensities.ToList().IndexOf(maxIntensity)]); + _mostAbundantMonoisotopicMass = + (double)ClassExtensions.RoundedDouble( + dist.Masses.ToList()[dist.Intensities.ToList().IndexOf(maxIntensity)]); } return (double)ClassExtensions.RoundedDouble(_mostAbundantMonoisotopicMass.Value); } @@ -214,6 +208,8 @@ public string SequenceWithChemicalFormulas } } + public IBioPolymer Parent => Protein; + /// /// Generates theoretical fragments for given dissociation type for this peptide. /// The "products" parameter is filled with these fragments. @@ -617,48 +613,6 @@ public void FragmentInternally(DissociationType dissociationType, int minLengthO } } - public virtual string EssentialSequence(IReadOnlyDictionary modstoWritePruned) - { - string essentialSequence = BaseSequence; - if (modstoWritePruned != null) - { - var sbsequence = new StringBuilder(); - - // variable modification on peptide N-terminus - if (AllModsOneIsNterminus.TryGetValue(1, out Modification pep_n_term_variable_mod)) - { - if (modstoWritePruned.ContainsKey(pep_n_term_variable_mod.ModificationType)) - { - sbsequence.Append('[' + pep_n_term_variable_mod.ModificationType + ":" + pep_n_term_variable_mod.IdWithMotif + ']'); - } - } - for (int r = 0; r < Length; r++) - { - sbsequence.Append(this[r]); - // variable modification on this residue - if (AllModsOneIsNterminus.TryGetValue(r + 2, out Modification residue_variable_mod)) - { - if (modstoWritePruned.ContainsKey(residue_variable_mod.ModificationType)) - { - sbsequence.Append('[' + residue_variable_mod.ModificationType + ":" + residue_variable_mod.IdWithMotif + ']'); - } - } - } - - // variable modification on peptide C-terminus - if (AllModsOneIsNterminus.TryGetValue(Length + 2, out Modification pep_c_term_variable_mod)) - { - if (modstoWritePruned.ContainsKey(pep_c_term_variable_mod.ModificationType)) - { - sbsequence.Append('[' + pep_c_term_variable_mod.ModificationType + ":" + pep_c_term_variable_mod.IdWithMotif + ']'); - } - } - - essentialSequence = sbsequence.ToString(); - } - return essentialSequence; - } - public PeptideWithSetModifications Localize(int j, double massToLocalize) { var dictWithLocalizedMass = new Dictionary(AllModsOneIsNterminus); @@ -778,7 +732,7 @@ public bool IncludesSpliceSite(SpliceSite site) } //need to determine what the cleavage sites are for the protease used (will allow us to determine if new cleavage sites were made by variant) - List proteasesCleavageSites = DigestionParams.Protease.DigestionMotifs; + List proteasesCleavageSites = DigestionParams.DigestionAgent.DigestionMotifs; //if the variant ends the AA before the peptide starts then it may have caused c-terminal cleavage //see if the protease used for digestion has C-terminal cleavage sites List cTerminalResidue = proteasesCleavageSites.Where(dm => dm.CutIndex == 1).Select(d => d.InducingCleavage).ToList(); @@ -930,11 +884,6 @@ public override string ToString() return FullSequence + string.Join("\t", AllModsOneIsNterminus.Select(m => m.ToString())); } - public string FullSequenceWithMassShift() - { - return DetermineFullSequenceWithMassShifts(); - } - public override bool Equals(object obj) { var q = obj as PeptideWithSetModifications; @@ -946,9 +895,9 @@ public override bool Equals(object obj) return q != null && q.FullSequence.Equals(this.FullSequence) - && q.OneBasedStartResidueInProtein == this.OneBasedStartResidueInProtein + && q.OneBasedStartResidue == this.OneBasedStartResidue && (q.Protein.Accession == null && this.Protein.Accession == null || q.Protein.Accession.Equals(this.Protein.Accession)) - && q.DigestionParams.Protease.Equals(this.DigestionParams.Protease); + && q.DigestionParams.DigestionAgent.Equals(this.DigestionParams.DigestionAgent); } public override int GetHashCode() @@ -959,7 +908,7 @@ public override int GetHashCode() } else { - return FullSequence.GetHashCode() + DigestionParams.Protease.GetHashCode(); + return FullSequence.GetHashCode() + DigestionParams.DigestionAgent.GetHashCode(); } } @@ -973,6 +922,10 @@ public void SetNonSerializedPeptideInfo(Dictionary idToMod _digestionParams = dp; } + public void SetNonSerializedPeptideInfo(Dictionary idToMod, + Dictionary accessionToProtein, IDigestionParams dp) => + SetNonSerializedPeptideInfo(idToMod, accessionToProtein, (DigestionParams)dp); + private void GetModsAfterDeserialization(Dictionary idToMod) { _allModsOneIsNterminus = new Dictionary(); @@ -1044,112 +997,11 @@ private void GetProteinAfterDeserialization(Dictionary idToProt Protein = protein; } - public static string GetBaseSequenceFromFullSequence(string fullSequence) - { - StringBuilder sb = new StringBuilder(); - int bracketCount = 0; - foreach (char c in fullSequence) - { - if (c == '[') - { - bracketCount++; - } - else if (c == ']') - { - bracketCount--; - } - else if (bracketCount == 0) - { - sb.Append(c); - } - } - return sb.ToString(); - } - - private void DetermineFullSequence() - { - var subsequence = new StringBuilder(); - - // modification on peptide N-terminus - if (AllModsOneIsNterminus.TryGetValue(1, out Modification mod)) - { - subsequence.Append('[' + mod.ModificationType + ":" + mod.IdWithMotif + ']'); - } - - for (int r = 0; r < Length; r++) - { - subsequence.Append(this[r]); - - // modification on this residue - if (AllModsOneIsNterminus.TryGetValue(r + 2, out mod)) - { - subsequence.Append('[' + mod.ModificationType + ":" + mod.IdWithMotif + ']'); - } - } - - // modification on peptide C-terminus - if (AllModsOneIsNterminus.TryGetValue(Length + 2, out mod)) - { - subsequence.Append('[' + mod.ModificationType + ":" + mod.IdWithMotif + ']'); - } - - FullSequence = subsequence.ToString(); - } - /// - /// This method returns the full sequence with mass shifts INSTEAD OF PTMs in brackets [] - /// Some external tools cannot parse PTMs, instead requiring a numerical input indicating the mass of a PTM in brackets - /// after the position of that modification - /// N-terminal mas shifts are in brackets prior to the first amino acid and apparently missing the + sign - /// - /// - private string DetermineFullSequenceWithMassShifts() - { - var subsequence = new StringBuilder(); - - // modification on peptide N-terminus - if (AllModsOneIsNterminus.TryGetValue(1, out Modification mod)) - { - subsequence.Append('[' + mod.MonoisotopicMass.RoundedDouble(6).ToString() + ']'); - } - - for (int r = 0; r < Length; r++) - { - subsequence.Append(this[r]); - - // modification on this residue - if (AllModsOneIsNterminus.TryGetValue(r + 2, out mod)) - { - if (mod.MonoisotopicMass > 0) - { - subsequence.Append("[+" + mod.MonoisotopicMass.RoundedDouble(6).ToString() + ']'); - } - else - { - subsequence.Append("[" + mod.MonoisotopicMass.RoundedDouble(6).ToString() + ']'); - } - } - } - - // modification on peptide C-terminus - if (AllModsOneIsNterminus.TryGetValue(Length + 2, out mod)) - { - if (mod.MonoisotopicMass > 0) - { - subsequence.Append("[+" + mod.MonoisotopicMass.RoundedDouble(6).ToString() + ']'); - } - else - { - subsequence.Append("[" + mod.MonoisotopicMass.RoundedDouble(6).ToString() + ']'); - } - } - return subsequence.ToString(); - } - private void UpdateCleavageSpecificity() { if (CleavageSpecificityForFdrCategory == CleavageSpecificity.Unknown) { - CleavageSpecificityForFdrCategory = DigestionParams.SpecificProtease.GetCleavageSpecificity(Protein, OneBasedStartResidueInProtein, OneBasedEndResidueInProtein, DigestionParams.InitiatorMethionineBehavior == InitiatorMethionineBehavior.Retain); + CleavageSpecificityForFdrCategory = _digestionParams.SpecificProtease.GetCleavageSpecificity(Protein, OneBasedStartResidueInProtein, OneBasedEndResidueInProtein, _digestionParams.InitiatorMethionineBehavior == InitiatorMethionineBehavior.Retain); PeptideDescription = CleavageSpecificityForFdrCategory.ToString(); } } @@ -1213,7 +1065,7 @@ public PeptideWithSetModifications GetReverseDecoyFromTarget(int[] revisedAminoA char[] newBase = new char[this.BaseSequence.Length]; Array.Fill(newBase, '0'); char[] evaporatingBase = this.BaseSequence.ToCharArray(); - List motifs = this.DigestionParams.Protease.DigestionMotifs; + List motifs = this.DigestionParams.DigestionAgent.DigestionMotifs; if (motifs != null && motifs.Count > 0) { foreach (var motif in motifs.Where(m => m.InducingCleavage != ""))//check the empty "" for topdown @@ -1286,7 +1138,7 @@ public PeptideWithSetModifications GetReverseDecoyFromTarget(int[] revisedAminoA proteinSequence = aStringBuilder.ToString(); Protein decoyProtein = new Protein(proteinSequence, "DECOY_" + this.Protein.Accession, null, new List>(), new Dictionary>(), null, null, null, true); - DigestionParams d = this.DigestionParams; + DigestionParams d = _digestionParams; // Creates a hash code corresponding to the target's sequence int targetHash = GetHashCode(); @@ -1333,7 +1185,7 @@ public PeptideWithSetModifications GetScrambledDecoyFromTarget(int[] revisedAmin char[] newBase = new char[this.BaseSequence.Length]; Array.Fill(newBase, '0'); char[] evaporatingBase = this.BaseSequence.ToCharArray(); - List motifs = this.DigestionParams.Protease.DigestionMotifs; + List motifs = this.DigestionParams.DigestionAgent.DigestionMotifs; if (motifs != null && motifs.Count > 0) { foreach (var motif in motifs.Where(m => m.InducingCleavage != ""))//check the empty "" for topdown @@ -1465,7 +1317,7 @@ public PeptideWithSetModifications GetScrambledDecoyFromTarget(int[] revisedAmin proteinSequence = aStringBuilder.ToString(); Protein decoyProtein = new Protein(proteinSequence, "DECOY_" + this.Protein.Accession, null, new List>(), new Dictionary>(), null, null, null, true); - DigestionParams d = this.DigestionParams; + DigestionParams d = _digestionParams; // Creates a hash code corresponding to the target's sequence int targetHash = GetHashCode(); PeptideWithSetModifications decoyPeptide; @@ -1559,7 +1411,7 @@ public PeptideWithSetModifications GetPeptideMirror(int[] revisedOrderNisOne) Protein decoyProtein = new Protein(proteinSequence, "DECOY_" + this.Protein.Accession, null, new List>(), new Dictionary>(), null, null, null, true); - DigestionParams d = this.DigestionParams; + DigestionParams d = _digestionParams; //now fill in the revised amino acid order int oldStringPosition = this.BaseSequence.Length - 1; diff --git a/mzLib/Proteomics/ProteolyticDigestion/Protease.cs b/mzLib/Proteomics/ProteolyticDigestion/Protease.cs index 836e5c5d8..5bca90400 100644 --- a/mzLib/Proteomics/ProteolyticDigestion/Protease.cs +++ b/mzLib/Proteomics/ProteolyticDigestion/Protease.cs @@ -1,27 +1,23 @@ using System; using System.Collections.Generic; using System.Linq; +using Omics.Digestion; +using Omics.Modifications; namespace Proteomics.ProteolyticDigestion { - public class Protease + public class Protease : DigestionAgent { - public Protease(string name, CleavageSpecificity cleavageSpecificity, string psiMSAccessionNumber, string psiMSName, List motifList, Modification modDetails = null) + public Protease(string name, CleavageSpecificity cleavageSpecificity, string psiMSAccessionNumber, + string psiMSName, List motifList, Modification modDetails = null) + : base(name, cleavageSpecificity, motifList, modDetails) { - Name = name; - CleavageSpecificity = cleavageSpecificity; PsiMsAccessionNumber = psiMSAccessionNumber; PsiMsName = psiMSName; - DigestionMotifs = motifList ?? new List(); - CleavageMod = modDetails; } - public string Name { get; } - public CleavageSpecificity CleavageSpecificity { get; } public string PsiMsAccessionNumber { get; } public string PsiMsName { get; } - public List DigestionMotifs { get; } - public Modification CleavageMod { get; set; } public override string ToString() { @@ -115,14 +111,14 @@ internal List GetUnmodifiedPeptides(Protein protein, int max { // retain methionine if ((initiatorMethionineBehavior != InitiatorMethionineBehavior.Cleave || protein[0] != 'M') - && OkayLength(protein.Length, minPeptideLength, maxPeptideLength)) + && ValidLength(protein.Length, minPeptideLength, maxPeptideLength)) { peptides.Add(new ProteolyticPeptide(protein, 1, protein.Length, 0, CleavageSpecificity.Full, "full")); } // cleave methionine if ((initiatorMethionineBehavior != InitiatorMethionineBehavior.Retain && protein[0] == 'M') - && OkayLength(protein.Length - 1, minPeptideLength, maxPeptideLength)) + && ValidLength(protein.Length - 1, minPeptideLength, maxPeptideLength)) { peptides.Add(new ProteolyticPeptide(protein, 2, protein.Length, 0, CleavageSpecificity.Full, "full:M cleaved")); } @@ -132,7 +128,7 @@ internal List GetUnmodifiedPeptides(Protein protein, int max peptides.AddRange( protein.ProteolysisProducts .Where(proteolysisProduct => proteolysisProduct.OneBasedEndPosition.HasValue && proteolysisProduct.OneBasedBeginPosition.HasValue - && OkayLength(proteolysisProduct.OneBasedEndPosition.Value - proteolysisProduct.OneBasedBeginPosition.Value + 1, minPeptideLength, maxPeptideLength)) + && ValidLength(proteolysisProduct.OneBasedEndPosition.Value - proteolysisProduct.OneBasedBeginPosition.Value + 1, minPeptideLength, maxPeptideLength)) .Select(proteolysisProduct => new ProteolyticPeptide(protein, proteolysisProduct.OneBasedBeginPosition.Value, proteolysisProduct.OneBasedEndPosition.Value, 0, CleavageSpecificity.None, proteolysisProduct.Type))); } @@ -156,49 +152,6 @@ internal List GetUnmodifiedPeptides(Protein protein, int max return peptides; } - /// - /// Gets the indices after which this protease will cleave a given protein sequence - /// - /// - /// - internal List GetDigestionSiteIndices(string proteinSequence) - { - var indices = new List(); - - for (int r = 0; r < proteinSequence.Length; r++) - { - var cutSiteIndex = -1; - bool cleavagePrevented = false; - - foreach (DigestionMotif motif in DigestionMotifs) - { - var motifResults = motif.Fits(proteinSequence, r); - bool motifFits = motifResults.Item1; - bool motifPreventsCleavage = motifResults.Item2; - - if (motifFits && r + motif.CutIndex < proteinSequence.Length) - { - cutSiteIndex = Math.Max(r + motif.CutIndex, cutSiteIndex); - } - - if (motifPreventsCleavage) // if any motif prevents cleave - { - cleavagePrevented = true; - } - } - - // if no motif prevents cleave - if (!cleavagePrevented && cutSiteIndex != -1) - { - indices.Add(cutSiteIndex); - } - } - - indices.Add(0); // The start of the protein is treated as a cleavage site to retain the n-terminal peptide - indices.Add(proteinSequence.Length); // The end of the protein is treated as a cleavage site to retain the c-terminal peptide - return indices.Distinct().OrderBy(i => i).ToList(); - } - /// /// Retain N-terminal residue? /// @@ -227,17 +180,6 @@ internal static bool Cleave(int oneBasedCleaveAfter, InitiatorMethionineBehavior && nTerminus == 'M'; } - /// - /// Is length of given peptide okay, given minimum and maximum? - /// - /// - /// - /// - /// - internal static bool OkayLength(int peptideLength, int minPeptideLength, int maxPeptideLength) - { - return OkayMinLength(peptideLength, minPeptideLength) && OkayMaxLength(peptideLength, maxPeptideLength); - } /// /// Gets protein intervals for digestion by this specific protease. @@ -259,13 +201,13 @@ private IEnumerable FullDigestion(Protein protein, Initiator for (int i = 0; i < oneBasedIndicesToCleaveAfter.Count - missedCleavages - 1; i++) { if (Retain(i, initiatorMethionineBehavior, firstResidueInProtein) - && OkayLength(oneBasedIndicesToCleaveAfter[i + missedCleavages + 1] - oneBasedIndicesToCleaveAfter[i], minPeptideLength, maxPeptideLength)) + && ValidLength(oneBasedIndicesToCleaveAfter[i + missedCleavages + 1] - oneBasedIndicesToCleaveAfter[i], minPeptideLength, maxPeptideLength)) { yield return new ProteolyticPeptide(protein, oneBasedIndicesToCleaveAfter[i] + 1, oneBasedIndicesToCleaveAfter[i + missedCleavages + 1], missedCleavages, CleavageSpecificity.Full, "full"); } if (Cleave(i, initiatorMethionineBehavior, firstResidueInProtein) && oneBasedIndicesToCleaveAfter[1] != 1 //prevent duplicates if that bond is cleaved by the protease - && OkayLength(oneBasedIndicesToCleaveAfter[i + missedCleavages + 1] - 1, minPeptideLength, maxPeptideLength)) + && ValidLength(oneBasedIndicesToCleaveAfter[i + missedCleavages + 1] - 1, minPeptideLength, maxPeptideLength)) { yield return new ProteolyticPeptide(protein, 2, oneBasedIndicesToCleaveAfter[i + missedCleavages + 1], missedCleavages, CleavageSpecificity.Full, "full:M cleaved"); @@ -291,7 +233,7 @@ private IEnumerable FullDigestion(Protein protein, Initiator && proteolysisProduct.OneBasedBeginPosition.HasValue //and the proteolytic peptide even has a beginning && !oneBasedIndicesToCleaveAfter.Contains(proteolysisProduct.OneBasedBeginPosition.Value - 1) //and we haven't already cleaved here && (proteolysisProduct.OneBasedBeginPosition.Value != 1 || !Cleave(0, initiatorMethionineBehavior, firstResidueInProtein)) //and it's not the initiator methionine - && OkayLength(oneBasedIndicesToCleaveAfter[cleavageIndexWithinProteolysisProduct + missedCleavages] - proteolysisProduct.OneBasedBeginPosition.Value + 1, minPeptideLength, maxPeptideLength); //and it's the correct size + && ValidLength(oneBasedIndicesToCleaveAfter[cleavageIndexWithinProteolysisProduct + missedCleavages] - proteolysisProduct.OneBasedBeginPosition.Value + 1, minPeptideLength, maxPeptideLength); //and it's the correct size if (startPeptide) { yield return new ProteolyticPeptide(protein, proteolysisProduct.OneBasedBeginPosition.Value, oneBasedIndicesToCleaveAfter[cleavageIndexWithinProteolysisProduct + missedCleavages], @@ -308,7 +250,7 @@ private IEnumerable FullDigestion(Protein protein, Initiator && oneBasedIndicesToCleaveAfter[cleavageIndexWithinProteolysisProduct - missedCleavages - 1] + 1 >= proteolysisProduct.OneBasedBeginPosition //and it's not before the beginning && proteolysisProduct.OneBasedEndPosition.HasValue //and the proteolytic peptide even has an end && !oneBasedIndicesToCleaveAfter.Contains(proteolysisProduct.OneBasedEndPosition.Value) //and we haven't already cleaved here - && OkayLength(proteolysisProduct.OneBasedEndPosition.Value - oneBasedIndicesToCleaveAfter[cleavageIndexWithinProteolysisProduct - missedCleavages - 1] + 1 - 1, minPeptideLength, maxPeptideLength); //and it's the correct size + && ValidLength(proteolysisProduct.OneBasedEndPosition.Value - oneBasedIndicesToCleaveAfter[cleavageIndexWithinProteolysisProduct - missedCleavages - 1] + 1 - 1, minPeptideLength, maxPeptideLength); //and it's the correct size if (endPeptide) { yield return new ProteolyticPeptide(protein, oneBasedIndicesToCleaveAfter[cleavageIndexWithinProteolysisProduct - missedCleavages - 1] + 1, proteolysisProduct.OneBasedEndPosition.Value, @@ -341,7 +283,7 @@ private IEnumerable FullDigestion(Protein protein, Initiator lastCleavage++; } if (lastCleavage - firstCleavage < maximumMissedCleavages && //if there aren't too many missed cleavages - OkayLength(proteolysisProduct.OneBasedEndPosition.Value - proteolysisProduct.OneBasedBeginPosition.Value, minPeptideLength, maxPeptideLength)) //and it's the correct size + ValidLength(proteolysisProduct.OneBasedEndPosition.Value - proteolysisProduct.OneBasedBeginPosition.Value, minPeptideLength, maxPeptideLength)) //and it's the correct size { yield return new ProteolyticPeptide(protein, proteolysisProduct.OneBasedBeginPosition.Value, proteolysisProduct.OneBasedEndPosition.Value, lastCleavage - firstCleavage, CleavageSpecificity.Full, proteolysisProduct.Type + " end"); @@ -404,7 +346,7 @@ private IEnumerable SemiProteolyticDigestion(Protein protein } for (int j = cTerminusProtein; j > nTerminusProtein; j--)//We are hitting the c-terminus here { - if (OkayLength(j - nTerminusProtein, minPeptideLength, maxPeptideLength)) + if (ValidLength(j - nTerminusProtein, minPeptideLength, maxPeptideLength)) { intervals.Add(localOneBasedIndicesToCleaveAfter.Contains(j) ? new ProteolyticPeptide(protein, nTerminusProtein + 1, j, j - nTerminusProtein, CleavageSpecificity.Full, "full") : @@ -428,7 +370,7 @@ private IEnumerable SemiProteolyticDigestion(Protein protein int start = nTerminusProtein + 1;//plus one to not doublecount the n terminus (in addition to the M term skip) for (int j = start; j < cTerminusProtein; j++) { - if (OkayLength(cTerminusProtein - j, minPeptideLength, maxPeptideLength) + if (ValidLength(cTerminusProtein - j, minPeptideLength, maxPeptideLength) && !localOneBasedIndicesToCleaveAfter.Contains(j)) { intervals.Add(new ProteolyticPeptide(protein, j + 1, cTerminusProtein, cTerminusProtein - j, CleavageSpecificity.Semi, "semi")); @@ -454,7 +396,7 @@ private IEnumerable SemiProteolyticDigestion(Protein protein // Start peptide for (int j = start; j < oneBasedIndicesToCleaveAfter[i]; j++) { - if (OkayLength(j - start + 1, minPeptideLength, maxPeptideLength)) + if (ValidLength(j - start + 1, minPeptideLength, maxPeptideLength)) { intervals.Add(new ProteolyticPeptide(protein, start, j, j - start, CleavageSpecificity.Full, proteolysisProduct.Type + " start")); } @@ -479,7 +421,7 @@ private IEnumerable SemiProteolyticDigestion(Protein protein // Fin (End) for (int j = oneBasedIndicesToCleaveAfter[i] + 1; j < end; j++) { - if (OkayLength(end - j + 1, minPeptideLength, maxPeptideLength)) + if (ValidLength(end - j + 1, minPeptideLength, maxPeptideLength)) { intervals.Add(new ProteolyticPeptide(protein, j, end, end - j, CleavageSpecificity.Full, proteolysisProduct.Type + " end")); } @@ -504,7 +446,7 @@ private static IEnumerable FixedTermini(int nTerminusProtein { bool preventMethionineFromBeingDuplicated = nTerminusProtein == 1 && cleave && retain; //prevents duplicate sequences containing N-terminal methionine List intervals = new List(); - if (!preventMethionineFromBeingDuplicated && OkayLength(cTerminusProtein - nTerminusProtein, minPeptideLength, maxPeptideLength)) //adds the full length maximum cleavages, no semi + if (!preventMethionineFromBeingDuplicated && ValidLength(cTerminusProtein - nTerminusProtein, minPeptideLength, maxPeptideLength)) //adds the full length maximum cleavages, no semi { intervals.Add(new ProteolyticPeptide(protein, nTerminusProtein + 1, cTerminusProtein, cTerminusProtein - nTerminusProtein, CleavageSpecificity.Full, "full" + (cleave ? ":M cleaved" : ""))); // Maximum sequence length @@ -516,7 +458,7 @@ private static IEnumerable FixedTermini(int nTerminusProtein List fixedCTermIntervals = new List(); if (!preventMethionineFromBeingDuplicated) { - var indexesOfAcceptableLength = internalIndices.Where(j => OkayLength(cTerminusProtein - j, minPeptideLength, maxPeptideLength)); + var indexesOfAcceptableLength = internalIndices.Where(j => ValidLength(cTerminusProtein - j, minPeptideLength, maxPeptideLength)); foreach (var j in indexesOfAcceptableLength) { if (localOneBasedIndicesToCleaveAfter.Contains(j) || (j == 1 && cleave)) //if cleaved on cleavable index or after initiator methionine, record as full @@ -535,7 +477,7 @@ private static IEnumerable FixedTermini(int nTerminusProtein } IEnumerable fixedNTermIntervals = internalIndices - .Where(j => OkayLength(j - nTerminusProtein, minPeptideLength, maxPeptideLength)) + .Where(j => ValidLength(j - nTerminusProtein, minPeptideLength, maxPeptideLength)) .Select(j => localOneBasedIndicesToCleaveAfter.Contains(j) ? new ProteolyticPeptide(protein, nTerminusProtein + 1, j, j - nTerminusProtein, CleavageSpecificity.Full, "full" + (cleave ? ":M cleaved" : "")) : new ProteolyticPeptide(protein, nTerminusProtein + 1, j, j - nTerminusProtein, CleavageSpecificity.Semi, "semi" + (cleave ? ":M cleaved" : ""))); @@ -565,7 +507,7 @@ private List SingleN_Digestion(Protein protein, InitiatorMet //This happens when maxPeptideLength == int.MaxValue or something close to it for (; proteinStart <= protein.Length; proteinStart++) { - if (OkayMinLength(protein.Length - proteinStart + 1, minPeptideLength)) + if (ValidMinLength(protein.Length - proteinStart + 1, minPeptideLength)) { //need Math.Max if max length is int.MaxLength, since +proteinStart will make it negative //if the max length is too big to be an int (ie infinity), just do the protein length. @@ -651,7 +593,7 @@ private List SingleC_Digestion(Protein protein, InitiatorMet for (int proteinEnd = 1; proteinEnd <= protein.Length; proteinEnd++) { //length of peptide will be at least the start index - if (OkayMinLength(proteinEnd - lengthDifference, minPeptideLength)) //is the maximum possible length longer than the minimum? + if (ValidMinLength(proteinEnd - lengthDifference, minPeptideLength)) //is the maximum possible length longer than the minimum? { //use the start index as the max of the N-terminus or the c-terminus minus the max (+1 because inclusive, otherwise peptides will be one AA too long) peptides.Add(new ProteolyticPeptide(protein, Math.Max(proteinStart, proteinEnd - maxPeptideLength + 1), proteinEnd, 0, CleavageSpecificity.SingleC, "SingleC")); @@ -713,27 +655,5 @@ private List SingleC_Digestion(Protein protein, InitiatorMet } return peptides; } - - /// - /// Is length of given peptide okay, given minimum? - /// - /// - /// - /// - private static bool OkayMinLength(int peptideLength, int minPeptideLength) - { - return peptideLength >= minPeptideLength; - } - - /// - /// Is length of given peptide okay, given maximum? - /// - /// - /// - /// - private static bool OkayMaxLength(int? peptideLength, int maxPeptideLength) - { - return !peptideLength.HasValue || peptideLength <= maxPeptideLength; - } } } \ No newline at end of file diff --git a/mzLib/Proteomics/ProteolyticDigestion/ProteaseDictionary.cs b/mzLib/Proteomics/ProteolyticDigestion/ProteaseDictionary.cs index ef3a3352d..45a901ec2 100644 --- a/mzLib/Proteomics/ProteolyticDigestion/ProteaseDictionary.cs +++ b/mzLib/Proteomics/ProteolyticDigestion/ProteaseDictionary.cs @@ -4,6 +4,8 @@ using System.IO; using System.Linq; using MzLibUtil; +using Omics.Digestion; +using Omics.Modifications; namespace Proteomics.ProteolyticDigestion { diff --git a/mzLib/Proteomics/ProteolyticDigestion/ProteinDigestion.cs b/mzLib/Proteomics/ProteolyticDigestion/ProteinDigestion.cs index caea1494c..4ee48dbe3 100644 --- a/mzLib/Proteomics/ProteolyticDigestion/ProteinDigestion.cs +++ b/mzLib/Proteomics/ProteolyticDigestion/ProteinDigestion.cs @@ -1,6 +1,8 @@ using System.Collections.Generic; using System.Linq; +using Omics.Digestion; using Omics.Fragmentation; +using Omics.Modifications; namespace Proteomics.ProteolyticDigestion { @@ -18,8 +20,8 @@ public ProteinDigestion(DigestionParams digestionParams, IEnumerable [Serializable] - public class ProteolyticPeptide + public class ProteolyticPeptide : DigestionProduct { - protected string _baseSequence; - - internal ProteolyticPeptide(Protein protein, int oneBasedStartResidueInProtein, int oneBasedEndResidueInProtein, int missedCleavages, CleavageSpecificity cleavageSpecificityForFdrCategory, string peptideDescription = null, string baseSequence = null) - { - _protein = protein; - OneBasedStartResidueInProtein = oneBasedStartResidueInProtein; - OneBasedEndResidueInProtein = oneBasedEndResidueInProtein; - MissedCleavages = missedCleavages; - CleavageSpecificityForFdrCategory = cleavageSpecificityForFdrCategory; - PeptideDescription = peptideDescription; - _baseSequence = baseSequence; - } - - [NonSerialized] private Protein _protein; // protein that this peptide is a digestion product of - public int OneBasedStartResidueInProtein { get; } // the residue number at which the peptide begins (the first residue in a protein is 1) - public int OneBasedEndResidueInProtein { get; } // the residue number at which the peptide ends - public int MissedCleavages { get; } // the number of missed cleavages this peptide has with respect to the digesting protease - public string PeptideDescription { get; internal set; } //unstructured explanation of source - public CleavageSpecificity CleavageSpecificityForFdrCategory { get; internal set; } //structured explanation of source - public int Length { get { return BaseSequence.Length; } } //how many residues long the peptide is - - public virtual char PreviousAminoAcid + + internal ProteolyticPeptide(Protein protein, int oneBasedStartResidueInProtein, int oneBasedEndResidueInProtein, int missedCleavages, CleavageSpecificity cleavageSpecificityForFdrCategory, string peptideDescription = null, string baseSequence = null) : + base(protein, oneBasedStartResidueInProtein, oneBasedEndResidueInProtein, missedCleavages, cleavageSpecificityForFdrCategory, peptideDescription, baseSequence) { - get - { - return OneBasedStartResidueInProtein > 1 ? Protein[OneBasedStartResidueInProtein - 2] : '-'; - } - } - public virtual char NextAminoAcid - { - get - { - return OneBasedEndResidueInProtein < Protein.Length ? Protein[OneBasedEndResidueInProtein] : '-'; - } } + public Protein Protein { - get { return _protein; } - protected set { _protein = value; } + get => Parent as Protein; + protected set => Parent = value; } - public string BaseSequence - { - get - { - if (_baseSequence == null) - { - _baseSequence = Protein.BaseSequence.Substring(OneBasedStartResidueInProtein - 1, OneBasedEndResidueInProtein - OneBasedStartResidueInProtein + 1); - } - return _baseSequence; - } - } + #region Properties overridden by more generic interface + + public int OneBasedEndResidueInProtein => OneBasedEndResidue; + public int OneBasedStartResidueInProtein => OneBasedStartResidue; + public virtual char PreviousAminoAcid => PreviousResidue; + public virtual char NextAminoAcid => NextResidue; - public char this[int zeroBasedIndex] + public string PeptideDescription { - get - { - return BaseSequence[zeroBasedIndex]; - } + get => Description; + set => Description = value; } + #endregion + /// /// Gets the peptides for a specific protein interval /// @@ -85,7 +54,7 @@ public char this[int zeroBasedIndex] internal IEnumerable GetModifiedPeptides(IEnumerable allKnownFixedModifications, DigestionParams digestionParams, List variableModifications) { - int peptideLength = OneBasedEndResidueInProtein - OneBasedStartResidueInProtein + 1; + int peptideLength = OneBasedEndResidue - OneBasedStartResidue + 1; int maximumVariableModificationIsoforms = digestionParams.MaxModificationIsoforms; int maxModsForPeptide = digestionParams.MaxModsForPeptide; var twoBasedPossibleVariableAndLocalizeableModifications = new Dictionary>(peptideLength + 4); @@ -106,7 +75,7 @@ internal IEnumerable GetModifiedPeptides(IEnumerabl for (int r = 0; r < peptideLength; r++) { - if (ModificationLocalization.ModFits(variableModification, Protein.BaseSequence, r + 1, peptideLength, OneBasedStartResidueInProtein + r) + if (ModificationLocalization.ModFits(variableModification, Protein.BaseSequence, r + 1, peptideLength, OneBasedStartResidue + r) && variableModification.LocationRestriction == "Anywhere." && !ModificationLocalization.UniprotModExists(Protein, r + 1, variableModification)) { if (!twoBasedPossibleVariableAndLocalizeableModifications.TryGetValue(r + 2, out List residueVariableMods)) @@ -130,7 +99,7 @@ internal IEnumerable GetModifiedPeptides(IEnumerabl // LOCALIZED MODS foreach (var kvp in Protein.OneBasedPossibleLocalizedModifications) { - bool inBounds = kvp.Key >= OneBasedStartResidueInProtein && kvp.Key <= OneBasedEndResidueInProtein; + bool inBounds = kvp.Key >= OneBasedStartResidue && kvp.Key <= OneBasedEndResidue; if (!inBounds) { continue; @@ -178,7 +147,7 @@ internal IEnumerable GetModifiedPeptides(IEnumerabl foreach (Dictionary kvp in GetVariableModificationPatterns(twoBasedPossibleVariableAndLocalizeableModifications, maxModsForPeptide, peptideLength)) { int numFixedMods = 0; - foreach (var ok in GetFixedModsOneIsNterminus(peptideLength, allKnownFixedModifications)) + foreach (var ok in GetFixedModsOneIsNorFivePrimeTerminus(peptideLength, allKnownFixedModifications)) { if (!kvp.ContainsKey(ok.Key)) { @@ -186,7 +155,7 @@ internal IEnumerable GetModifiedPeptides(IEnumerabl kvp.Add(ok.Key, ok.Value); } } - yield return new PeptideWithSetModifications(Protein, digestionParams, OneBasedStartResidueInProtein, OneBasedEndResidueInProtein, + yield return new PeptideWithSetModifications(Protein, digestionParams, OneBasedStartResidue, OneBasedEndResidue, CleavageSpecificityForFdrCategory, PeptideDescription, MissedCleavages, kvp, numFixedMods); variable_modification_isoforms++; if (variable_modification_isoforms == maximumVariableModificationIsoforms) @@ -204,7 +173,7 @@ internal IEnumerable GetModifiedPeptides(IEnumerabl /// private bool CanBeNTerminalMod(Modification variableModification, int peptideLength) { - return ModificationLocalization.ModFits(variableModification, Protein.BaseSequence, 1, peptideLength, OneBasedStartResidueInProtein) + return ModificationLocalization.ModFits(variableModification, Protein.BaseSequence, 1, peptideLength, OneBasedStartResidue) && (variableModification.LocationRestriction == "N-terminal." || variableModification.LocationRestriction == "Peptide N-terminal."); } @@ -216,152 +185,8 @@ private bool CanBeNTerminalMod(Modification variableModification, int peptideLen /// private bool CanBeCTerminalMod(Modification variableModification, int peptideLength) { - return ModificationLocalization.ModFits(variableModification, Protein.BaseSequence, peptideLength, peptideLength, OneBasedStartResidueInProtein + peptideLength - 1) + return ModificationLocalization.ModFits(variableModification, Protein.BaseSequence, peptideLength, peptideLength, OneBasedStartResidue + peptideLength - 1) && (variableModification.LocationRestriction == "C-terminal." || variableModification.LocationRestriction == "Peptide C-terminal."); } - - private static IEnumerable> GetVariableModificationPatterns(Dictionary> possibleVariableModifications, int maxModsForPeptide, int peptideLength) - { - if (possibleVariableModifications.Count == 0) - { - yield return null; - } - else - { - var possible_variable_modifications = new Dictionary>(possibleVariableModifications); - - int[] base_variable_modification_pattern = new int[peptideLength + 4]; - var totalAvailableMods = possible_variable_modifications.Sum(b => b.Value == null ? 0 : b.Value.Count); - for (int variable_modifications = 0; variable_modifications <= Math.Min(totalAvailableMods, maxModsForPeptide); variable_modifications++) - { - foreach (int[] variable_modification_pattern in GetVariableModificationPatterns(new List>>(possible_variable_modifications), - possible_variable_modifications.Count - variable_modifications, base_variable_modification_pattern, 0)) - { - yield return GetNewVariableModificationPattern(variable_modification_pattern, possible_variable_modifications); - } - } - } - } - - private static IEnumerable GetVariableModificationPatterns(List>> possibleVariableModifications, - int unmodifiedResiduesDesired, int[] variableModificationPattern, int index) - { - if (index < possibleVariableModifications.Count - 1) - { - if (unmodifiedResiduesDesired > 0) - { - variableModificationPattern[possibleVariableModifications[index].Key] = 0; - foreach (int[] new_variable_modification_pattern in GetVariableModificationPatterns(possibleVariableModifications, - unmodifiedResiduesDesired - 1, variableModificationPattern, index + 1)) - { - yield return new_variable_modification_pattern; - } - } - if (unmodifiedResiduesDesired < possibleVariableModifications.Count - index) - { - for (int i = 1; i <= possibleVariableModifications[index].Value.Count; i++) - { - variableModificationPattern[possibleVariableModifications[index].Key] = i; - foreach (int[] new_variable_modification_pattern in GetVariableModificationPatterns(possibleVariableModifications, - unmodifiedResiduesDesired, variableModificationPattern, index + 1)) - { - yield return new_variable_modification_pattern; - } - } - } - } - else - { - if (unmodifiedResiduesDesired > 0) - { - variableModificationPattern[possibleVariableModifications[index].Key] = 0; - yield return variableModificationPattern; - } - else - { - for (int i = 1; i <= possibleVariableModifications[index].Value.Count; i++) - { - variableModificationPattern[possibleVariableModifications[index].Key] = i; - yield return variableModificationPattern; - } - } - } - } - - private static Dictionary GetNewVariableModificationPattern(int[] variableModificationArray, - IEnumerable>> possibleVariableModifications) - { - var modification_pattern = new Dictionary(); - - foreach (KeyValuePair> kvp in possibleVariableModifications) - { - if (variableModificationArray[kvp.Key] > 0) - { - modification_pattern.Add(kvp.Key, kvp.Value[variableModificationArray[kvp.Key] - 1]); - } - } - - return modification_pattern; - } - - private Dictionary GetFixedModsOneIsNterminus(int peptideLength, - IEnumerable allKnownFixedModifications) - { - var fixedModsOneIsNterminus = new Dictionary(peptideLength + 3); - foreach (Modification mod in allKnownFixedModifications) - { - switch (mod.LocationRestriction) - { - case "N-terminal.": - case "Peptide N-terminal.": - //the modification is protease associated and is applied to the n-terminal cleaved residue, not at the beginign of the protein - if (mod.ModificationType == "Protease" && ModificationLocalization.ModFits(mod, Protein.BaseSequence, 1, peptideLength, OneBasedStartResidueInProtein)) - { - if (OneBasedStartResidueInProtein != 1) - { - fixedModsOneIsNterminus[2] = mod; - } - } - //Normal N-terminal peptide modification - else if (ModificationLocalization.ModFits(mod, Protein.BaseSequence, 1, peptideLength, OneBasedStartResidueInProtein)) - { - fixedModsOneIsNterminus[1] = mod; - } - break; - - case "Anywhere.": - for (int i = 2; i <= peptideLength + 1; i++) - { - if (ModificationLocalization.ModFits(mod, Protein.BaseSequence, i - 1, peptideLength, OneBasedStartResidueInProtein + i - 2)) - { - fixedModsOneIsNterminus[i] = mod; - } - } - break; - - case "C-terminal.": - case "Peptide C-terminal.": - //the modification is protease associated and is applied to the c-terminal cleaved residue, not if it is at the end of the protein - if (mod.ModificationType == "Protease" && ModificationLocalization.ModFits(mod, Protein.BaseSequence, peptideLength, peptideLength, OneBasedStartResidueInProtein + peptideLength - 1)) - { - if (OneBasedEndResidueInProtein != Protein.Length) - { - fixedModsOneIsNterminus[peptideLength+1] = mod; - } - - } - //Normal C-terminal peptide modification - else if (ModificationLocalization.ModFits(mod, Protein.BaseSequence, peptideLength, peptideLength, OneBasedStartResidueInProtein + peptideLength - 1)) - { - fixedModsOneIsNterminus[peptideLength + 2] = mod; - } - break; - - default: - throw new NotSupportedException("This terminus localization is not supported."); - } - } - return fixedModsOneIsNterminus; - } } } \ No newline at end of file diff --git a/mzLib/Test/DatabaseTests/TestDatabaseLoaders.cs b/mzLib/Test/DatabaseTests/TestDatabaseLoaders.cs index c9fd100dc..fe3e70c06 100644 --- a/mzLib/Test/DatabaseTests/TestDatabaseLoaders.cs +++ b/mzLib/Test/DatabaseTests/TestDatabaseLoaders.cs @@ -24,6 +24,7 @@ using System.Collections.Generic; using System.IO; using System.Linq; +using Omics.Modifications; using UsefulProteomicsDatabases; using Stopwatch = System.Diagnostics.Stopwatch; diff --git a/mzLib/Test/DatabaseTests/TestProteinReader.cs b/mzLib/Test/DatabaseTests/TestProteinReader.cs index b38e8d3d7..7c5affb4e 100644 --- a/mzLib/Test/DatabaseTests/TestProteinReader.cs +++ b/mzLib/Test/DatabaseTests/TestProteinReader.cs @@ -21,6 +21,7 @@ using System.IO; using System.Linq; using NUnit.Framework; +using Omics.Modifications; using Proteomics; using UsefulProteomicsDatabases; using Stopwatch = System.Diagnostics.Stopwatch; diff --git a/mzLib/Test/DatabaseTests/TestProteomicsReadWrite.cs b/mzLib/Test/DatabaseTests/TestProteomicsReadWrite.cs index 20850a3ac..c48034df4 100644 --- a/mzLib/Test/DatabaseTests/TestProteomicsReadWrite.cs +++ b/mzLib/Test/DatabaseTests/TestProteomicsReadWrite.cs @@ -5,6 +5,7 @@ using MassSpectrometry; using NUnit.Framework; using Omics.Fragmentation; +using Omics.Modifications; using Proteomics; using Proteomics.ProteolyticDigestion; using UsefulProteomicsDatabases; diff --git a/mzLib/Test/DatabaseTests/TestVariantProtein.cs b/mzLib/Test/DatabaseTests/TestVariantProtein.cs index 1940a182a..a16232ab0 100644 --- a/mzLib/Test/DatabaseTests/TestVariantProtein.cs +++ b/mzLib/Test/DatabaseTests/TestVariantProtein.cs @@ -3,6 +3,7 @@ using System.IO; using System.Linq; using NUnit.Framework; +using Omics.Modifications; using Proteomics; using Proteomics.ProteolyticDigestion; using UsefulProteomicsDatabases; diff --git a/mzLib/Test/FileReadingTests/TestMsDataFile.cs b/mzLib/Test/FileReadingTests/TestMsDataFile.cs index e0fa08960..eed172009 100644 --- a/mzLib/Test/FileReadingTests/TestMsDataFile.cs +++ b/mzLib/Test/FileReadingTests/TestMsDataFile.cs @@ -28,6 +28,7 @@ using System.Collections.Generic; using System.IO; using System.Linq; +using Omics.Modifications; using Stopwatch = System.Diagnostics.Stopwatch; namespace Test.FileReadingTests diff --git a/mzLib/Test/Test.csproj b/mzLib/Test/Test.csproj index 90bc7dc42..edad7a835 100644 --- a/mzLib/Test/Test.csproj +++ b/mzLib/Test/Test.csproj @@ -477,6 +477,7 @@ + diff --git a/mzLib/Test/TestChemicalFormula.cs b/mzLib/Test/TestChemicalFormula.cs index b78e3eaad..124fb7548 100644 --- a/mzLib/Test/TestChemicalFormula.cs +++ b/mzLib/Test/TestChemicalFormula.cs @@ -23,6 +23,7 @@ using System.Collections.Generic; using System.Diagnostics.CodeAnalysis; using System.Linq; +using SharpLearning.Containers.Matrices; using Stopwatch = System.Diagnostics.Stopwatch; namespace Test @@ -1005,6 +1006,42 @@ public static void TestAddChemicalFormula() Assert.AreEqual("CC{12}", formulaB.Formula); } + [Test] + public static void TestAddChemicalFormulaOperator() + { + ChemicalFormula formulaB = ChemicalFormula.ParseFormula("C"); + ChemicalFormula formulaA = ChemicalFormula.ParseFormula("C{12}"); + + var addedFormula = formulaA + formulaB; + formulaB.Add(formulaA); + + Assert.AreEqual("CC{12}", formulaB.Formula); + Assert.AreEqual("CC{12}", addedFormula.Formula); + + var leftNull = null + formulaB; + Assert.AreEqual(formulaB, leftNull); + + var rightNull = formulaB + null; + Assert.AreEqual(formulaB, rightNull); + + ChemicalFormula nullFormula = null; + var bothNull = nullFormula + nullFormula; + Assert.AreEqual(null, bothNull); + } + + [Test] + [TestCase("C", "N", "CN-1")] + [TestCase(null, "N", "N-1")] + [TestCase("C", null, "C")] + public static void TestSubtractChemicalFormulaOperator(string formA, string formB, string expected) + { + ChemicalFormula formulaA = formA == null ? null : ChemicalFormula.ParseFormula(formA); + ChemicalFormula formulaB = formB == null ? null : ChemicalFormula.ParseFormula(formB); + + var subtractedFormula = formulaA - formulaB; + Assert.AreEqual(expected, subtractedFormula.Formula); + } + [Test] public static void NotEqual() { diff --git a/mzLib/Test/TestClassExtensions.cs b/mzLib/Test/TestClassExtensions.cs index 2a4572c8c..4de0e5ff5 100644 --- a/mzLib/Test/TestClassExtensions.cs +++ b/mzLib/Test/TestClassExtensions.cs @@ -96,5 +96,23 @@ public static void TestAllSame() Assert.That(!differentDouble.AllSame()); Assert.That(!differentSpectrum.AllSame()); } + + [Test] + [TestCase(1874.28, 373.8487, -5)] + [TestCase(1874.28, 467.5627, -4)] + [TestCase(1874.28, 623.7527, -3)] + [TestCase(1874.28, 936.1327, -2)] + [TestCase(1874.28, 1873.273, -1)] + [TestCase(1874.28, 375.8633, 5)] + [TestCase(1874.28, 469.5773, 4)] + [TestCase(1874.28, 625.7673, 3)] + [TestCase(1874.28, 938.1473, 2)] + [TestCase(1874.28, 1875.287, 1)] + + public static void TestToMzAndMass(double mass, double mz, int charge) + { + Assert.That(mass, Is.EqualTo(mz.ToMass(charge)).Within(0.01)); + Assert.That(mz, Is.EqualTo(mass.ToMz(charge)).Within(0.01)); + } } } \ No newline at end of file diff --git a/mzLib/Test/TestDeconvolution.cs b/mzLib/Test/TestDeconvolution.cs index e87279374..f8a9e3c7e 100644 --- a/mzLib/Test/TestDeconvolution.cs +++ b/mzLib/Test/TestDeconvolution.cs @@ -12,6 +12,8 @@ using System.IO; using System.Linq; using MassSpectrometry; +using Omics.Digestion; +using Omics.Modifications; using Test.FileReadingTests; namespace Test diff --git a/mzLib/Test/TestDigestionMotif.cs b/mzLib/Test/TestDigestionMotif.cs index ddf12783a..23041d823 100644 --- a/mzLib/Test/TestDigestionMotif.cs +++ b/mzLib/Test/TestDigestionMotif.cs @@ -7,6 +7,8 @@ using System.Diagnostics.CodeAnalysis; using System.IO; using System.Linq; +using Omics.Digestion; +using Omics.Modifications; using UsefulProteomicsDatabases; using Stopwatch = System.Diagnostics.Stopwatch; @@ -571,10 +573,10 @@ public static void TestProteolyticDigestion() List pwsmsC = humanInsulin.Digest(speedySemiC, null, null).ToList(); Assert.IsTrue(pwsmsN.Count == 7); Assert.IsTrue(pwsmsC.Count == 9); - Assert.IsFalse(pwsmsN.Any(x => x.Length > speedySemiN.MaxPeptideLength)); - Assert.IsFalse(pwsmsC.Any(x => x.Length > speedySemiC.MaxPeptideLength)); - Assert.IsFalse(pwsmsN.Any(x => x.Length < speedySemiN.MinPeptideLength)); - Assert.IsFalse(pwsmsC.Any(x => x.Length < speedySemiC.MinPeptideLength)); + Assert.IsFalse(pwsmsN.Any(x => x.Length > speedySemiN.MaxLength)); + Assert.IsFalse(pwsmsC.Any(x => x.Length > speedySemiC.MaxLength)); + Assert.IsFalse(pwsmsN.Any(x => x.Length < speedySemiN.MinLength)); + Assert.IsFalse(pwsmsC.Any(x => x.Length < speedySemiC.MinLength)); } } } \ No newline at end of file diff --git a/mzLib/Test/TestFragments.cs b/mzLib/Test/TestFragments.cs index 630b1542b..872d941e7 100644 --- a/mzLib/Test/TestFragments.cs +++ b/mzLib/Test/TestFragments.cs @@ -29,6 +29,8 @@ using System; using System.Collections.Generic; using System.Linq; +using Omics.Digestion; +using Omics.Modifications; using Stopwatch = System.Diagnostics.Stopwatch; namespace Test diff --git a/mzLib/Test/TestModFits.cs b/mzLib/Test/TestModFits.cs index 3360bb7a3..3b0b2c225 100644 --- a/mzLib/Test/TestModFits.cs +++ b/mzLib/Test/TestModFits.cs @@ -1,6 +1,7 @@ using NUnit.Framework; using Proteomics; using System; +using Omics.Modifications; using Stopwatch = System.Diagnostics.Stopwatch; namespace Test diff --git a/mzLib/Test/TestModifications.cs b/mzLib/Test/TestModifications.cs index b46eaaf65..12f1566e3 100644 --- a/mzLib/Test/TestModifications.cs +++ b/mzLib/Test/TestModifications.cs @@ -28,6 +28,7 @@ using System.Collections.Generic; using System.Linq; using Omics.Fragmentation; +using Omics.Modifications; using UsefulProteomicsDatabases; using Stopwatch = System.Diagnostics.Stopwatch; diff --git a/mzLib/Test/TestPeptideWithSetMods.cs b/mzLib/Test/TestPeptideWithSetMods.cs index 8ab0623a4..6bcc55492 100644 --- a/mzLib/Test/TestPeptideWithSetMods.cs +++ b/mzLib/Test/TestPeptideWithSetMods.cs @@ -6,7 +6,10 @@ using System.Collections.Generic; using System.IO; using System.Linq; +using Omics; +using Omics.Digestion; using Omics.Fragmentation; +using Omics.Modifications; using UsefulProteomicsDatabases; using Stopwatch = System.Diagnostics.Stopwatch; @@ -47,8 +50,8 @@ public static void TestDifferentProteaseEquals() PeptideWithSetModifications pep2 = myProtein.Digest(digest2, new List(), new List()).First(); Assert.That(pep1.FullSequence.Equals(pep2.FullSequence)); - Assert.That(pep1.Protein.Equals(pep2.Protein)); - Assert.That(!pep1.DigestionParams.Protease.Equals(pep2.DigestionParams.Protease)); + Assert.That(pep1.Parent.Equals(pep2.Parent)); + Assert.That(!pep1.DigestionParams.DigestionAgent.Equals(pep2.DigestionParams.DigestionAgent)); Assert.That(!pep1.Equals(pep2)); Assert.That(!pep1.GetHashCode().Equals(pep2.GetHashCode())); } @@ -104,8 +107,8 @@ public static void TestSpeedyNonAndSemiSpecificMaxLength() DigestionParams semiCParams = new DigestionParams("Asp-N", 3, 7, 50, searchModeType: CleavageSpecificity.Semi, fragmentationTerminus: FragmentationTerminus.C); List nPwsms = Q07065.Digest(semiNParams, null, null).ToList(); List cPwsms = Q07065.Digest(semiCParams, null, null).ToList(); - Assert.IsFalse(nPwsms.Any(x => x.Length > semiNParams.MaxPeptideLength)); - Assert.IsFalse(cPwsms.Any(x => x.Length > semiCParams.MaxPeptideLength)); + Assert.IsFalse(nPwsms.Any(x => x.Length > semiNParams.MaxLength)); + Assert.IsFalse(cPwsms.Any(x => x.Length > semiCParams.MaxLength)); Assert.IsTrue(nPwsms.Any(x => x.Length == semiNParams.MaxPeptideLength)); Assert.IsTrue(cPwsms.Any(x => x.Length == semiCParams.MaxPeptideLength)); @@ -114,10 +117,10 @@ public static void TestSpeedyNonAndSemiSpecificMaxLength() DigestionParams nonCParams = new DigestionParams("Asp-N", 3, 7, 50, searchModeType: CleavageSpecificity.None, fragmentationTerminus: FragmentationTerminus.C); nPwsms = Q07065.Digest(nonNParams, null, null).ToList(); cPwsms = Q07065.Digest(nonCParams, null, null).ToList(); - Assert.IsFalse(nPwsms.Any(x => x.Length > nonNParams.MaxPeptideLength)); - Assert.IsFalse(cPwsms.Any(x => x.Length > nonCParams.MaxPeptideLength)); - Assert.IsTrue(nPwsms.Any(x => x.Length == nonNParams.MaxPeptideLength)); - Assert.IsTrue(cPwsms.Any(x => x.Length == nonCParams.MaxPeptideLength)); + Assert.IsFalse(nPwsms.Any(x => x.Length > nonNParams.MaxLength)); + Assert.IsFalse(cPwsms.Any(x => x.Length > nonCParams.MaxLength)); + Assert.IsTrue(nPwsms.Any(x => x.Length == nonNParams.MaxLength)); + Assert.IsTrue(cPwsms.Any(x => x.Length == nonCParams.MaxLength)); Assert.IsTrue(nPwsms.Any(x => x.Length == nonNParams.MinPeptideLength)); Assert.IsTrue(cPwsms.Any(x => x.Length == nonCParams.MinPeptideLength)); } @@ -168,7 +171,7 @@ public static void TestNonAndSemiSpecificDigests() //Check that, when we digested with semi, we made all possible semi sequences, labeled full and semi correctly, and have no duplicates foreach (string s in expectedProductsSemiFiveCleavages) //foreach precursor peptide { - for (int i = 0; i < s.Length - semiDigestionParams.MinPeptideLength; i++) //cleave it to be semi + for (int i = 0; i < s.Length - semiDigestionParams.MinLength; i++) //cleave it to be semi { string sToFind = s.Substring(i); //get a peptide from this precursor (fixed C) var peps = fiveCleavageProductsSemiTrypsin.Where(x => x.BaseSequence.Equals(sToFind)).ToArray(); //find the peptide in the digested list @@ -189,7 +192,7 @@ public static void TestNonAndSemiSpecificDigests() Assert.IsTrue(pwsmRemake.CleavageSpecificityForFdrCategory == pep.CleavageSpecificityForFdrCategory); //Repeat the above going from the other direction (fixed N) - sToFind = s.Substring(0, semiDigestionParams.MinPeptideLength + i); //get a peptide from this precursor (fixed N) + sToFind = s.Substring(0, semiDigestionParams.MinLength + i); //get a peptide from this precursor (fixed N) peps = fiveCleavageProductsSemiTrypsin.Where(x => x.BaseSequence.Equals(sToFind)).ToArray();//find the peptide in the digested list Assert.IsTrue(peps.Length == 1);//There should be exactly one! More than that means there are duplicates, fewer means we didn't generate it! pep = peps[0];//get that single peptide @@ -506,7 +509,7 @@ public static void TestSingleProteasesTinyProtein() List nPwsms = P56381.Digest(singleN, null, null).ToList(); List cPwsms = P56381.Digest(singleC, null, null).ToList(); Assert.IsTrue(nPwsms.Count == cPwsms.Count); - Assert.IsTrue(nPwsms.Count == P56381.Length - singleN.MinPeptideLength + 1); + Assert.IsTrue(nPwsms.Count == P56381.Length - singleN.MinLength + 1); } [Test] diff --git a/mzLib/Test/TestPeptides.cs b/mzLib/Test/TestPeptides.cs index 492866aa5..c6a0193ce 100644 --- a/mzLib/Test/TestPeptides.cs +++ b/mzLib/Test/TestPeptides.cs @@ -24,6 +24,7 @@ using System; using System.Collections.Generic; using System.Linq; +using Omics.Digestion; using Omics.Fragmentation; using UsefulProteomicsDatabases; using Stopwatch = System.Diagnostics.Stopwatch; diff --git a/mzLib/Test/TestProductMassesMightHaveDuplicates.cs b/mzLib/Test/TestProductMassesMightHaveDuplicates.cs index 007bc6804..dd6702d16 100644 --- a/mzLib/Test/TestProductMassesMightHaveDuplicates.cs +++ b/mzLib/Test/TestProductMassesMightHaveDuplicates.cs @@ -9,6 +9,7 @@ using Omics.Fragmentation; using Stopwatch = System.Diagnostics.Stopwatch; using Omics.Fragmentation.Peptide; +using Omics.Modifications; namespace Test { diff --git a/mzLib/Test/TestProteinDatabase.cs b/mzLib/Test/TestProteinDatabase.cs index f3261c1ac..b03fd98c7 100644 --- a/mzLib/Test/TestProteinDatabase.cs +++ b/mzLib/Test/TestProteinDatabase.cs @@ -3,6 +3,7 @@ using System.Collections.Generic; using System.IO; using System.Linq; +using Omics.Modifications; using UsefulProteomicsDatabases; namespace Test diff --git a/mzLib/Test/TestProteinDigestion.cs b/mzLib/Test/TestProteinDigestion.cs index 8780b6df4..8d96e8a4c 100644 --- a/mzLib/Test/TestProteinDigestion.cs +++ b/mzLib/Test/TestProteinDigestion.cs @@ -8,7 +8,9 @@ using System.Collections.Generic; using System.IO; using System.Linq; +using Omics.Digestion; using Omics.Fragmentation; +using Omics.Modifications; using UsefulProteomicsDatabases; using static Chemistry.PeriodicTable; using Stopwatch = System.Diagnostics.Stopwatch; diff --git a/mzLib/Test/TestProteinProperties.cs b/mzLib/Test/TestProteinProperties.cs index 65f72fdc5..f22e71df9 100644 --- a/mzLib/Test/TestProteinProperties.cs +++ b/mzLib/Test/TestProteinProperties.cs @@ -4,6 +4,7 @@ using System.Collections.Generic; using System.IO; using System.Linq; +using Omics.Modifications; using Stopwatch = System.Diagnostics.Stopwatch; namespace Test diff --git a/mzLib/Test/TestPtmListLoader.cs b/mzLib/Test/TestPtmListLoader.cs index d70946717..3cdcf1412 100644 --- a/mzLib/Test/TestPtmListLoader.cs +++ b/mzLib/Test/TestPtmListLoader.cs @@ -4,6 +4,7 @@ using System; using System.IO; using System.Linq; +using Omics.Modifications; using UsefulProteomicsDatabases; using Stopwatch = System.Diagnostics.Stopwatch; diff --git a/mzLib/Test/TestRetentionTimePrediction.cs b/mzLib/Test/TestRetentionTimePrediction.cs index fb197279d..dcce8e73b 100644 --- a/mzLib/Test/TestRetentionTimePrediction.cs +++ b/mzLib/Test/TestRetentionTimePrediction.cs @@ -4,6 +4,7 @@ using Proteomics.RetentionTimePrediction; using System; using System.Collections.Generic; +using Omics.Modifications; using Stopwatch = System.Diagnostics.Stopwatch; namespace Test diff --git a/mzLib/Test/TestSeqCoverage.cs b/mzLib/Test/TestSeqCoverage.cs index f7ce51e85..e6af1b7dd 100644 --- a/mzLib/Test/TestSeqCoverage.cs +++ b/mzLib/Test/TestSeqCoverage.cs @@ -5,6 +5,8 @@ using System.Collections.Generic; using System.IO; using System.Linq; +using Omics.Digestion; +using Omics.Modifications; using Stopwatch = System.Diagnostics.Stopwatch; namespace Test diff --git a/mzLib/Transcriptomics/Digestion/rnases.tsv b/mzLib/Transcriptomics/Digestion/rnases.tsv new file mode 100644 index 000000000..481cdc342 --- /dev/null +++ b/mzLib/Transcriptomics/Digestion/rnases.tsv @@ -0,0 +1,11 @@ +Name Sequences Inducing Cleavage Sequences Preventing Cleavage Cleavage Terminus Cleavage Specificity Site Regular Expression Cleavage Mass Shifts Notes +RNase T1 G| full +RNase T2 "G|,C|,A|,U|" full +RNase A "C|,U|" full +top-down none +RNase 1 "G|,C|,A|,U|" full +RNase PhyM "A|,U|" full +RNase U2 "G|,A|" full +Cusativin C| full +RNase_MC1 "|U" full +colicin_E5 "G|U" full colicin E5 cuts after G (or Q) followed by U diff --git a/mzLib/Transcriptomics/Interfaces/INucleicAcid.cs b/mzLib/Transcriptomics/Interfaces/INucleicAcid.cs new file mode 100644 index 000000000..295769fd0 --- /dev/null +++ b/mzLib/Transcriptomics/Interfaces/INucleicAcid.cs @@ -0,0 +1,19 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using Chemistry; +using MassSpectrometry; +using Omics; +using Omics.Modifications; + +namespace Transcriptomics +{ + public interface INucleicAcid : IHasChemicalFormula, IBioPolymer + { + IHasChemicalFormula FivePrimeTerminus { get; set; } + + IHasChemicalFormula ThreePrimeTerminus { get; set; } + } +} diff --git a/mzLib/Transcriptomics/Interfaces/INucleotide.cs b/mzLib/Transcriptomics/Interfaces/INucleotide.cs new file mode 100644 index 000000000..aa60dcea7 --- /dev/null +++ b/mzLib/Transcriptomics/Interfaces/INucleotide.cs @@ -0,0 +1,15 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using Chemistry; + +namespace Transcriptomics +{ + public interface INucleotide : IHasChemicalFormula + { + char Letter { get; } + string Symbol { get; } + } +} diff --git a/mzLib/Transcriptomics/Transcriptomics.csproj b/mzLib/Transcriptomics/Transcriptomics.csproj new file mode 100644 index 000000000..a670300ca --- /dev/null +++ b/mzLib/Transcriptomics/Transcriptomics.csproj @@ -0,0 +1,27 @@ + + + + net6.0 + x64 + enable + enable + + + + full + true + + + + + + + + + + + Always + + + + diff --git a/mzLib/UsefulProteomicsDatabases/DecoyProteinGenerator.cs b/mzLib/UsefulProteomicsDatabases/DecoyProteinGenerator.cs index 4da442aee..67a442782 100644 --- a/mzLib/UsefulProteomicsDatabases/DecoyProteinGenerator.cs +++ b/mzLib/UsefulProteomicsDatabases/DecoyProteinGenerator.cs @@ -3,6 +3,7 @@ using System.Collections.Generic; using System.Linq; using System.Threading.Tasks; +using Omics.Modifications; namespace UsefulProteomicsDatabases { diff --git a/mzLib/UsefulProteomicsDatabases/Loaders.cs b/mzLib/UsefulProteomicsDatabases/Loaders.cs index 90ec2b2eb..9c1948a2d 100644 --- a/mzLib/UsefulProteomicsDatabases/Loaders.cs +++ b/mzLib/UsefulProteomicsDatabases/Loaders.cs @@ -29,6 +29,7 @@ using System.Threading; using System.Threading.Tasks; using System.Xml.Serialization; +using Omics.Modifications; using UsefulProteomicsDatabases.Generated; using TopDownProteomics.IO.Obo; diff --git a/mzLib/UsefulProteomicsDatabases/ProteinDbLoader.cs b/mzLib/UsefulProteomicsDatabases/ProteinDbLoader.cs index b9af8edb2..8544c2233 100644 --- a/mzLib/UsefulProteomicsDatabases/ProteinDbLoader.cs +++ b/mzLib/UsefulProteomicsDatabases/ProteinDbLoader.cs @@ -10,6 +10,7 @@ using System.Text; using System.Text.RegularExpressions; using System.Xml; +using Omics.Modifications; namespace UsefulProteomicsDatabases { diff --git a/mzLib/UsefulProteomicsDatabases/ProteinDbWriter.cs b/mzLib/UsefulProteomicsDatabases/ProteinDbWriter.cs index 2ca130ed6..155945558 100644 --- a/mzLib/UsefulProteomicsDatabases/ProteinDbWriter.cs +++ b/mzLib/UsefulProteomicsDatabases/ProteinDbWriter.cs @@ -5,6 +5,7 @@ using System.IO; using System.Linq; using System.Xml; +using Omics.Modifications; namespace UsefulProteomicsDatabases { diff --git a/mzLib/UsefulProteomicsDatabases/ProteinXmlEntry.cs b/mzLib/UsefulProteomicsDatabases/ProteinXmlEntry.cs index 95a9ce5ac..a93c896e7 100644 --- a/mzLib/UsefulProteomicsDatabases/ProteinXmlEntry.cs +++ b/mzLib/UsefulProteomicsDatabases/ProteinXmlEntry.cs @@ -4,6 +4,7 @@ using System.Linq; using System.Text.RegularExpressions; using System.Xml; +using Omics.Modifications; namespace UsefulProteomicsDatabases { diff --git a/mzLib/UsefulProteomicsDatabases/PtmListLoader.cs b/mzLib/UsefulProteomicsDatabases/PtmListLoader.cs index ddcd50268..ed75a92be 100644 --- a/mzLib/UsefulProteomicsDatabases/PtmListLoader.cs +++ b/mzLib/UsefulProteomicsDatabases/PtmListLoader.cs @@ -7,6 +7,7 @@ using System.Globalization; using System.IO; using System.Linq; +using Omics.Modifications; namespace UsefulProteomicsDatabases { diff --git a/mzLib/UsefulProteomicsDatabases/UnimodLoader.cs b/mzLib/UsefulProteomicsDatabases/UnimodLoader.cs index 56512e841..a765d7d5c 100644 --- a/mzLib/UsefulProteomicsDatabases/UnimodLoader.cs +++ b/mzLib/UsefulProteomicsDatabases/UnimodLoader.cs @@ -3,6 +3,7 @@ using System.Collections.Generic; using System.IO; using System.Xml.Serialization; +using Omics.Modifications; using UsefulProteomicsDatabases.Generated; namespace UsefulProteomicsDatabases diff --git a/mzLib/mzLib.nuspec b/mzLib/mzLib.nuspec index 840254ea6..67bfe4fa1 100644 --- a/mzLib/mzLib.nuspec +++ b/mzLib/mzLib.nuspec @@ -2,7 +2,7 @@ mzLib - 5.0.004 + 5.0.543 mzLib Stef S. Stef S. @@ -56,6 +56,8 @@ + + @@ -76,5 +78,7 @@ + + diff --git a/mzLib/mzLib.sln b/mzLib/mzLib.sln index 725813979..f8b4f8a51 100644 --- a/mzLib/mzLib.sln +++ b/mzLib/mzLib.sln @@ -35,6 +35,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Readers", "Readers\Readers. EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Omics", "Omics\Omics.csproj", "{48CA975B-65DD-4A03-89A3-EA2448293894}" EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Transcriptomics", "Transcriptomics\Transcriptomics.csproj", "{4661C249-5B9A-4D9F-B852-15EAF3B4F9A3}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|x64 = Debug|x64 @@ -137,6 +139,12 @@ Global {48CA975B-65DD-4A03-89A3-EA2448293894}.Release|x64.Build.0 = Release|x64 {48CA975B-65DD-4A03-89A3-EA2448293894}.TestAndRelease|x64.ActiveCfg = Release|x64 {48CA975B-65DD-4A03-89A3-EA2448293894}.TestAndRelease|x64.Build.0 = Release|x64 + {4661C249-5B9A-4D9F-B852-15EAF3B4F9A3}.Debug|x64.ActiveCfg = Debug|x64 + {4661C249-5B9A-4D9F-B852-15EAF3B4F9A3}.Debug|x64.Build.0 = Debug|x64 + {4661C249-5B9A-4D9F-B852-15EAF3B4F9A3}.Release|x64.ActiveCfg = Release|x64 + {4661C249-5B9A-4D9F-B852-15EAF3B4F9A3}.Release|x64.Build.0 = Release|x64 + {4661C249-5B9A-4D9F-B852-15EAF3B4F9A3}.TestAndRelease|x64.ActiveCfg = Release|x64 + {4661C249-5B9A-4D9F-B852-15EAF3B4F9A3}.TestAndRelease|x64.Build.0 = Release|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE