-
Notifications
You must be signed in to change notification settings - Fork 34
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Rearranging classes to fit within the Transcriptomics Structure (#743)
* correct Within calculation * update unit tests * this is the spot * add space * first move * psmFromTsv unit tests * moved library spectrum * empty unit test for library spectrum * m * library spectrum unit tests * lib spec unit tests * PSMTSV unit tests * add tests for variants and localized glycans * capitalization convention * read internal ions test * uncomment lines * moved fragmentation and library spectrum to new project Omics * Revert "moved fragmentation and library spectrum to new project Omics" This reverts commit d1bc75c. * someInterfaces * good midpont * omics classes and interfaces seem tobe working * move LibrarySpectrum class to Omics. Create SpectrumMatchFromTsvHeader interface in Omics * not working * Fixed up the PR * fix broken test * some unit tests * dhg * Expanded test coverage on file classes * new header and xlink psmtsv reader unit tests * space update * update nuspec for omics and added peptide folder to omics fragmentatkion * Moved around most everything that wil need to be for Transcriptomics implementation * Made all tests pass * Moved a few methods out of PeptideWithSetModifications and into IBioPolymerWithSetMods * Moved methods from ProteolyticPeptide to LysisProduct * Marked RNase.tsv to copy always * Cleaned up the code quite a bit * Updated product class equalit members * Updated product class equalit members * This one method keeps fighting me * Removed AnyCPU * Added tests to ChemicalFormual operators * Update mzLib.sln * Updated Nuspec * Changed naming convention in Digestion Agent * Added comment to clarify base loss ions from 5' end Adjusted Chemical Formula subtraction when null - value * Added Comments to BioPolymerWithSetModsExtensions Made LysisProduct and abstract class and its constructor protected * Renamed LysisProduct to DigestionProduct Changed IDigestionParams.Enzyme to IDigestionParams.DigestionAgent * changed name on one method in DigestionProduct * Renamed get fixed mods method in DigestionProduct --------- Co-authored-by: MICHAEL SHORTREED <[email protected]> Co-authored-by: trishorts <[email protected]>
- Loading branch information
1 parent
627f4a1
commit 167fa9b
Showing
65 changed files
with
1,133 additions
and
685 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
using System.Text; | ||
using Chemistry; | ||
using Omics.Modifications; | ||
|
||
namespace Omics; | ||
|
||
public static class BioPolymerWithSetModsExtensions | ||
{ | ||
/// <summary> | ||
/// This method returns the full sequence with mass shifts INSTEAD OF PTMs in brackets [] | ||
/// Some external tools cannot parse PTMs, instead requiring a numerical input indicating the mass of a PTM in brackets | ||
/// after the position of that modification | ||
/// N-terminal mas shifts are in brackets prior to the first amino acid and apparently missing the + sign | ||
/// </summary> | ||
/// <returns></returns> | ||
public static string FullSequenceWithMassShift(this IBioPolymerWithSetMods withSetMods) | ||
{ | ||
var subsequence = new StringBuilder(); | ||
|
||
// modification on peptide N-terminus | ||
if (withSetMods.AllModsOneIsNterminus.TryGetValue(1, out Modification mod)) | ||
{ | ||
subsequence.Append('[' + mod.MonoisotopicMass.RoundedDouble(6).ToString() + ']'); | ||
} | ||
|
||
for (int r = 0; r < withSetMods.Length; r++) | ||
{ | ||
subsequence.Append(withSetMods[r]); | ||
|
||
// modification on this residue | ||
if (withSetMods.AllModsOneIsNterminus.TryGetValue(r + 2, out mod)) | ||
{ | ||
if (mod.MonoisotopicMass > 0) | ||
{ | ||
subsequence.Append("[+" + mod.MonoisotopicMass.RoundedDouble(6).ToString() + ']'); | ||
} | ||
else | ||
{ | ||
subsequence.Append("[" + mod.MonoisotopicMass.RoundedDouble(6).ToString() + ']'); | ||
} | ||
} | ||
} | ||
|
||
// modification on peptide C-terminus | ||
if (withSetMods.AllModsOneIsNterminus.TryGetValue(withSetMods.Length + 2, out mod)) | ||
{ | ||
if (mod.MonoisotopicMass > 0) | ||
{ | ||
subsequence.Append("[+" + mod.MonoisotopicMass.RoundedDouble(6).ToString() + ']'); | ||
} | ||
else | ||
{ | ||
subsequence.Append("[" + mod.MonoisotopicMass.RoundedDouble(6).ToString() + ']'); | ||
} | ||
} | ||
return subsequence.ToString(); | ||
} | ||
|
||
/// <summary> | ||
/// This method returns the full sequence only with the specified modifications in the modstoWritePruned dictionary | ||
/// </summary> | ||
/// <param name="withSetMods"></param> | ||
/// <param name="modstoWritePruned"></param> | ||
/// <returns></returns> | ||
public static string EssentialSequence(this IBioPolymerWithSetMods withSetMods, | ||
IReadOnlyDictionary<string, int> modstoWritePruned) | ||
{ | ||
string essentialSequence = withSetMods.BaseSequence; | ||
if (modstoWritePruned != null) | ||
{ | ||
var sbsequence = new StringBuilder(); | ||
|
||
// variable modification on peptide N-terminus | ||
if (withSetMods.AllModsOneIsNterminus.TryGetValue(1, out Modification pep_n_term_variable_mod)) | ||
{ | ||
if (modstoWritePruned.ContainsKey(pep_n_term_variable_mod.ModificationType)) | ||
{ | ||
sbsequence.Append('[' + pep_n_term_variable_mod.ModificationType + ":" + pep_n_term_variable_mod.IdWithMotif + ']'); | ||
} | ||
} | ||
for (int r = 0; r < withSetMods.Length; r++) | ||
{ | ||
sbsequence.Append(withSetMods[r]); | ||
// variable modification on this residue | ||
if (withSetMods.AllModsOneIsNterminus.TryGetValue(r + 2, out Modification residue_variable_mod)) | ||
{ | ||
if (modstoWritePruned.ContainsKey(residue_variable_mod.ModificationType)) | ||
{ | ||
sbsequence.Append('[' + residue_variable_mod.ModificationType + ":" + residue_variable_mod.IdWithMotif + ']'); | ||
} | ||
} | ||
} | ||
|
||
// variable modification on peptide C-terminus | ||
if (withSetMods.AllModsOneIsNterminus.TryGetValue(withSetMods.Length + 2, out Modification pep_c_term_variable_mod)) | ||
{ | ||
if (modstoWritePruned.ContainsKey(pep_c_term_variable_mod.ModificationType)) | ||
{ | ||
sbsequence.Append('[' + pep_c_term_variable_mod.ModificationType + ":" + pep_c_term_variable_mod.IdWithMotif + ']'); | ||
} | ||
} | ||
|
||
essentialSequence = sbsequence.ToString(); | ||
} | ||
return essentialSequence; | ||
} | ||
|
||
/// <summary> | ||
/// Determines the full sequence of a BioPolymerWithSetMods from its base sequence and modifications | ||
/// </summary> | ||
/// <param name="withSetMods"></param> | ||
/// <returns></returns> | ||
public static string DetermineFullSequence(this IBioPolymerWithSetMods withSetMods) | ||
{ | ||
var subSequence = new StringBuilder(); | ||
|
||
// modification on peptide N-terminus | ||
if (withSetMods.AllModsOneIsNterminus.TryGetValue(1, out Modification mod)) | ||
{ | ||
subSequence.Append('[' + mod.ModificationType + ":" + mod.IdWithMotif + ']'); | ||
} | ||
|
||
for (int r = 0; r < withSetMods.Length; r++) | ||
{ | ||
subSequence.Append(withSetMods[r]); | ||
|
||
// modification on this residue | ||
if (withSetMods.AllModsOneIsNterminus.TryGetValue(r + 2, out mod)) | ||
{ | ||
subSequence.Append('[' + mod.ModificationType + ":" + mod.IdWithMotif + ']'); | ||
} | ||
} | ||
|
||
// modification on peptide C-terminus | ||
if (withSetMods.AllModsOneIsNterminus.TryGetValue(withSetMods.Length + 2, out mod)) | ||
{ | ||
subSequence.Append('[' + mod.ModificationType + ":" + mod.IdWithMotif + ']'); | ||
} | ||
|
||
return subSequence.ToString(); | ||
} | ||
} |
2 changes: 1 addition & 1 deletion
2
...oteolyticDigestion/CleavageSpecificity.cs → mzLib/Omics/Digestion/CleavageSpecificity.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
namespace Proteomics.ProteolyticDigestion | ||
namespace Omics.Digestion | ||
{ | ||
public enum CleavageSpecificity | ||
{ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
using Omics.Modifications; | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using System.Text; | ||
using System.Threading.Tasks; | ||
|
||
namespace Omics.Digestion | ||
{ | ||
public abstract class DigestionAgent | ||
{ | ||
protected DigestionAgent(string name, CleavageSpecificity cleavageSpecificity, List<DigestionMotif> motifList, Modification cleavageMod) | ||
{ | ||
Name = name; | ||
CleavageSpecificity = cleavageSpecificity; | ||
DigestionMotifs = motifList ?? new List<DigestionMotif>(); | ||
CleavageMod = cleavageMod; | ||
} | ||
|
||
public string Name { get; init; } | ||
public CleavageSpecificity CleavageSpecificity { get; init; } | ||
public List<DigestionMotif> DigestionMotifs { get; init; } | ||
public Modification CleavageMod { get; set; } | ||
|
||
public override string ToString() | ||
{ | ||
return Name; | ||
} | ||
|
||
/// <summary> | ||
/// Is length of given peptide okay, given minimum and maximum? | ||
/// </summary> | ||
/// <param name="length"></param> | ||
/// <param name="minLength"></param> | ||
/// <param name="maxLength"></param> | ||
/// <returns></returns> | ||
protected static bool ValidLength(int length, int minLength, int maxLength) | ||
{ | ||
return ValidMinLength(length, minLength) && ValidMaxLength(length, maxLength); | ||
} | ||
|
||
/// <summary> | ||
/// Is length of given peptide okay, given minimum? | ||
/// </summary> | ||
/// <param name="length"></param> | ||
/// <param name="minLength"></param> | ||
/// <returns></returns> | ||
protected static bool ValidMinLength(int length, int minLength) | ||
{ | ||
return length >= minLength; | ||
} | ||
|
||
/// <summary> | ||
/// Is length of given peptide okay, given maximum? | ||
/// </summary> | ||
/// <param name="length"></param> | ||
/// <param name="maxLength"></param> | ||
/// <returns></returns> | ||
protected static bool ValidMaxLength(int? length, int maxLength) | ||
{ | ||
return !length.HasValue || length <= maxLength; | ||
} | ||
|
||
/// <summary> | ||
/// Gets the indices after which this protease will cleave a given protein sequence | ||
/// </summary> | ||
/// <param name="sequence"></param> | ||
/// <returns></returns> | ||
public List<int> GetDigestionSiteIndices(string sequence) | ||
{ | ||
var indices = new List<int>(); | ||
|
||
for (int r = 0; r < sequence.Length; r++) | ||
{ | ||
var cutSiteIndex = -1; | ||
bool cleavagePrevented = false; | ||
|
||
foreach (DigestionMotif motif in DigestionMotifs) | ||
{ | ||
var motifResults = motif.Fits(sequence, r); | ||
bool motifFits = motifResults.Item1; | ||
bool motifPreventsCleavage = motifResults.Item2; | ||
|
||
if (motifFits && r + motif.CutIndex < sequence.Length) | ||
{ | ||
cutSiteIndex = Math.Max(r + motif.CutIndex, cutSiteIndex); | ||
} | ||
|
||
if (motifPreventsCleavage) // if any motif prevents cleave | ||
{ | ||
cleavagePrevented = true; | ||
} | ||
} | ||
|
||
// if no motif prevents cleave | ||
if (!cleavagePrevented && cutSiteIndex != -1) | ||
{ | ||
indices.Add(cutSiteIndex); | ||
} | ||
} | ||
|
||
indices.Add(0); // The start of the protein is treated as a cleavage site to retain the n-terminal peptide | ||
indices.Add(sequence.Length); // The end of the protein is treated as a cleavage site to retain the c-terminal peptide | ||
return indices.Distinct().OrderBy(i => i).ToList(); | ||
} | ||
} | ||
} |
8 changes: 3 additions & 5 deletions
8
...cs/ProteolyticDigestion/DigestionMotif.cs → mzLib/Omics/Digestion/DigestionMotif.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.