Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
trishorts committed May 23, 2024
2 parents 5ddeab3 + 84b2e9d commit c9551e3
Show file tree
Hide file tree
Showing 6 changed files with 210 additions and 29 deletions.
16 changes: 13 additions & 3 deletions mzLib/FlashLFQ/FlashLFQResults.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using MathNet.Numerics.Statistics;
using Easy.Common.Extensions;
using MathNet.Numerics.Statistics;
using System;
using System.Collections.Generic;
using System.IO;
Expand All @@ -13,20 +14,28 @@ public class FlashLfqResults
public readonly Dictionary<string, Peptide> PeptideModifiedSequences;
public readonly Dictionary<string, ProteinGroup> ProteinGroups;
public readonly Dictionary<SpectraFileInfo, List<ChromatographicPeak>> Peaks;
private readonly HashSet<string> _peptideModifiedSequencesToQuantify;

public FlashLfqResults(List<SpectraFileInfo> spectraFiles, List<Identification> identifications)
public FlashLfqResults(List<SpectraFileInfo> spectraFiles, List<Identification> identifications, HashSet<string> peptides = null)
{
SpectraFiles = spectraFiles;
PeptideModifiedSequences = new Dictionary<string, Peptide>();
ProteinGroups = new Dictionary<string, ProteinGroup>();
Peaks = new Dictionary<SpectraFileInfo, List<ChromatographicPeak>>();
if(peptides == null || !peptides.Any())
{
peptides = identifications.Select(id => id.ModifiedSequence).ToHashSet();
}
_peptideModifiedSequencesToQuantify = peptides;

foreach (SpectraFileInfo file in spectraFiles)
{
Peaks.Add(file, new List<ChromatographicPeak>());
}

foreach (Identification id in identifications)

// Only quantify peptides within the set of valid peptide modified (full) sequences. This is done to enable pepitde-level FDR control of reported results
foreach (Identification id in identifications.Where(id => peptides.Contains(id.ModifiedSequence)))
{
if (!PeptideModifiedSequences.TryGetValue(id.ModifiedSequence, out Peptide peptide))
{
Expand Down Expand Up @@ -120,6 +129,7 @@ public void CalculatePeptideResults(bool quantifyAmbiguousPeptides)
var groupedPeaks = filePeaks.Value
.Where(p => p.NumIdentificationsByFullSeq == 1)
.GroupBy(p => p.Identifications.First().ModifiedSequence)
.Where(group => _peptideModifiedSequencesToQuantify.Contains(group.Key))
.ToList();

foreach (var sequenceWithPeaks in groupedPeaks)
Expand Down
51 changes: 47 additions & 4 deletions mzLib/FlashLFQ/FlashLfqEngine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
using System.Threading.Tasks;
using UsefulProteomicsDatabases;
using System.Runtime.CompilerServices;
using Easy.Common.Extensions;

[assembly: InternalsVisibleTo("TestFlashLFQ")]

Expand Down Expand Up @@ -57,6 +58,12 @@ public class FlashLfqEngine
private Stopwatch _globalStopwatch;
private List<Identification> _allIdentifications;
/// <summary>
/// These peptides will be reported in the QuantifiedPeptides output and used for protein quant.
/// Other peptides may appear in the QuantifiedPeaks output, but this list is used to enable
/// peptide-level FDR filtering
/// </summary>
public HashSet<string> PeptidesModifiedSequencesToQuantify { get; init; }
/// <summary>
/// Dictionary linking a modified sequence to a List of tuples containing
/// the mass shifts (isotope mass - monoisotopic mass) and normalized abundances for the
/// isotopes for a given peptide
Expand All @@ -67,6 +74,13 @@ public class FlashLfqEngine
internal Dictionary<SpectraFileInfo, Ms1ScanInfo[]> _ms1Scans;
internal PeakIndexingEngine _peakIndexingEngine;

/// <summary>
/// Create an instance of FlashLFQ that will quantify peptides based on their precursor intensity in MS1 spectra
/// </summary>
/// <param name="allIdentifications">A list of identifications corresponding to MS2 peptide detections. One ID per peptide per file</param>
/// <param name="integrate">Optional. Bool indicating whether peaks should be integrated before quantification. It is HIGHLY recommended this is set to FALSE</param>
/// <param name="peptideSequencesToUse">Optional. A list of strings corresponding to the modified sequences of peptides that should be quantified/used for
/// protein level quant. Reccommended use is to pass in the full sequence of every peptide at 1% peptide-level FDR</param>
public FlashLfqEngine(
List<Identification> allIdentifications,
bool normalize = false,
Expand All @@ -93,6 +107,7 @@ public FlashLfqEngine(
int mcmcBurninSteps = 1000,
bool useSharedPeptidesForProteinQuant = false,
bool pairedSamples = false,
List<string> peptideSequencesToUse = null,
int? randomSeed = null)
{
Loaders.LoadElements();
Expand Down Expand Up @@ -128,6 +143,8 @@ public FlashLfqEngine(
McmcSteps = mcmcSteps;
McmcBurninSteps = mcmcBurninSteps;
UseSharedPeptidesForProteinQuant = useSharedPeptidesForProteinQuant;
PeptidesModifiedSequencesToQuantify = peptideSequencesToUse.IsNotNullOrEmpty() ? new HashSet<string>(peptideSequencesToUse)
: allIdentifications.Select(id => id.ModifiedSequence).ToHashSet();
RandomSeed = randomSeed;

if (MaxThreads == -1 || MaxThreads >= Environment.ProcessorCount)
Expand All @@ -149,7 +166,7 @@ public FlashLfqResults Run()
{
_globalStopwatch.Start();
_ms1Scans = new Dictionary<SpectraFileInfo, Ms1ScanInfo[]>();
_results = new FlashLfqResults(_spectraFileInfo, _allIdentifications);
_results = new FlashLfqResults(_spectraFileInfo, _allIdentifications, PeptidesModifiedSequencesToQuantify);

// build m/z index keys
CalculateTheoreticalIsotopeDistributions();
Expand Down Expand Up @@ -588,7 +605,8 @@ private MbrScorer BuildMbrScorer(List<ChromatographicPeak> acceptorFileIdentifie
// Construct a distribution of ppm errors for all MSMS peaks in the acceptor file
var apexToAcceptorFilePeakDict = new Dictionary<IndexedMassSpectralPeak, ChromatographicPeak>();
List<double> ppmErrors = new List<double>();
foreach (var peak in acceptorFileIdentifiedPeaks.Where(p => p.Apex != null))
foreach (var peak in acceptorFileIdentifiedPeaks.Where(p => p.Apex != null
&& PeptidesModifiedSequencesToQuantify.Contains(p.Identifications.First().ModifiedSequence)))
{
if (!apexToAcceptorFilePeakDict.ContainsKey(peak.Apex.IndexedPeak))
{
Expand Down Expand Up @@ -677,6 +695,7 @@ private void QuantifyMatchBetweenRunsPeaks(SpectraFileInfo idAcceptorFile)
!p.IsMbrPeak
&& p.NumIdentificationsByFullSeq == 1
&& p.IsotopicEnvelopes.Any()
&& PeptidesModifiedSequencesToQuantify.Contains(p.Identifications.First().ModifiedSequence) // Only do MBR for peptides that we want to quantify
&& !acceptorFileIdentifiedSequences.Contains(p.Identifications.First().ModifiedSequence)
&& (!RequireMsmsIdInCondition || p.Identifications.Any(v => v.ProteinGroups.Any(g => thisFilesMsmsIdentifiedProteins.Contains(g))))).ToList();

Expand Down Expand Up @@ -1060,11 +1079,35 @@ private void RunErrorChecking(SpectraFileInfo spectraFile)

if (!tryPeak.IsMbrPeak && !storedPeak.IsMbrPeak)
{
storedPeak.MergeFeatureWith(tryPeak, Integrate);
if (PeptidesModifiedSequencesToQuantify.Contains(tryPeak.Identifications.First().ModifiedSequence))
{
if (PeptidesModifiedSequencesToQuantify.Contains(storedPeak.Identifications.First().ModifiedSequence))
{
storedPeak.MergeFeatureWith(tryPeak, Integrate);
}
else
{
// If the stored peak id isn't in the list of peptides to quantify, overwrite it
errorCheckedPeaksGroupedByApex[tryPeak.Apex.IndexedPeak] = tryPeak;
}
}
else
{
continue; // if the tryPeak id isn't in the list of peptides to quantify, it is discarded
}

}
else if (tryPeak.IsMbrPeak && !storedPeak.IsMbrPeak)
{
continue;
if(PeptidesModifiedSequencesToQuantify.Contains(storedPeak.Identifications.First().ModifiedSequence))
{
continue;
}
else
{
// If the stored peak id isn't in the list of peptides to quantify, overwrite it
errorCheckedPeaksGroupedByApex[tryPeak.Apex.IndexedPeak] = tryPeak;
}
}
else if (tryPeak.IsMbrPeak && storedPeak.IsMbrPeak)
{
Expand Down
48 changes: 44 additions & 4 deletions mzLib/Omics/Fragmentation/Peptide/DissociationTypeCollection.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ namespace Omics.Fragmentation.Peptide
{
public class DissociationTypeCollection
{
/// <summary>
/// Collection of product types to generate based upon the dissociation type
/// </summary>
public static Dictionary<DissociationType, List<ProductType>> ProductsFromDissociationType = new Dictionary<DissociationType, List<ProductType>>
{
{ DissociationType.Unknown, new List<ProductType>() },
Expand All @@ -22,9 +25,23 @@ public class DissociationTypeCollection
{ DissociationType.ISCID, new List<ProductType>() }
};


/// <summary>
/// Collection of product types to generate based upon the dissociation type and fragmentation terminus
/// </summary>
/// <param name="dissociationType">Dissociation Type to get products of</param>
/// <param name="fragmentationTerminus">Terminus to get products of</param>
/// <returns></returns>
public static List<ProductType> GetTerminusSpecificProductTypesFromDissociation(DissociationType dissociationType, FragmentationTerminus fragmentationTerminus)
{
if (!TerminusSpecificProductTypesFromDissociation.TryGetValue((dissociationType, fragmentationTerminus), out List<ProductType> productTypes))
List<ProductType> productTypes;
// if using custom dissociation, calculate every time instead of caching
if (dissociationType == DissociationType.Custom)
{
productTypes = TerminusSpecificProductTypes.ProductIonTypesFromSpecifiedTerminus[fragmentationTerminus]
.Intersect(DissociationTypeCollection.ProductsFromDissociationType[dissociationType]).ToList();
}
else if (!TerminusSpecificProductTypesFromDissociation.TryGetValue((dissociationType, fragmentationTerminus), out productTypes))
{
lock (TerminusSpecificProductTypesFromDissociation)
{
Expand All @@ -42,6 +59,12 @@ public static List<ProductType> GetTerminusSpecificProductTypesFromDissociation(
return productTypes;
}

/// <summary>
/// Get the product types that can lose water or ammonia from a given dissociation type and fragmentation terminus
/// </summary>
/// <param name="dissociationType"></param>
/// <param name="fragmentationTerminus"></param>
/// <returns></returns>
public static List<ProductType> GetWaterAndAmmoniaLossProductTypesFromDissociation(DissociationType dissociationType, FragmentationTerminus fragmentationTerminus)
{
List<ProductType> productList = new();
Expand Down Expand Up @@ -131,15 +154,25 @@ public static List<ProductType> GetWaterAndAmmoniaLossProductTypesFromDissociati
/// </summary>
public static (double[], double[]) GetNAndCTerminalMassShiftsForDissociationType(DissociationType dissociationType)
{
if (!DissociationTypeToTerminusMassShift.TryGetValue(dissociationType, out var massShifts))
(double[], double[]) massShifts;
// if using custom dissociation, calculate every time instead of caching
if (dissociationType == DissociationType.Custom)
{
massShifts = (
GetTerminusSpecificProductTypesFromDissociation(dissociationType, FragmentationTerminus.N)
.Select(GetMassShiftFromProductType).ToArray(),
GetTerminusSpecificProductTypesFromDissociation(dissociationType, FragmentationTerminus.C)
.Select(GetMassShiftFromProductType).ToArray());
}
else if (!DissociationTypeToTerminusMassShift.TryGetValue(dissociationType, out massShifts))
{
lock (DissociationTypeToTerminusMassShift)
{
if (!DissociationTypeToTerminusMassShift.TryGetValue(dissociationType, out massShifts))
{
DissociationTypeToTerminusMassShift.Add(dissociationType,
(GetTerminusSpecificProductTypesFromDissociation(dissociationType, FragmentationTerminus.N).Select(p => GetMassShiftFromProductType(p)).ToArray(),
GetTerminusSpecificProductTypesFromDissociation(dissociationType, FragmentationTerminus.C).Select(p => GetMassShiftFromProductType(p)).ToArray()));
(GetTerminusSpecificProductTypesFromDissociation(dissociationType, FragmentationTerminus.N).Select(GetMassShiftFromProductType).ToArray(),
GetTerminusSpecificProductTypesFromDissociation(dissociationType, FragmentationTerminus.C).Select(GetMassShiftFromProductType).ToArray()));

massShifts = DissociationTypeToTerminusMassShift[dissociationType];
}
Expand All @@ -149,6 +182,13 @@ public static (double[], double[]) GetNAndCTerminalMassShiftsForDissociationType
return massShifts;
}

/// <summary>
/// Get the mass shift for a given product type due to breaking the amide bond
/// </summary>
/// <param name="productType"></param>
/// <returns></returns>
/// <exception cref="ArgumentOutOfRangeException"></exception>
/// <exception cref="MzLibUtil.MzLibException"></exception>
public static double GetMassShiftFromProductType(ProductType productType)
{
if (NeutralMassShiftFromProductType.TryGetValue(productType, out double? shift))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
{
public class TerminusSpecificProductTypes
{
/// <summary>
/// The types of ions that can be generated from a peptide fragment, based on the terminus of the fragment
/// </summary>
public static Dictionary<FragmentationTerminus, List<ProductType>> ProductIonTypesFromSpecifiedTerminus = new Dictionary<FragmentationTerminus, List<ProductType>>
{
{FragmentationTerminus.N, new List<ProductType>{ ProductType.a, ProductType.aDegree, ProductType.aStar, ProductType.b, ProductType.bWaterLoss, ProductType.bAmmoniaLoss, ProductType.c } }, //all ion types that include the N-terminus
Expand All @@ -10,6 +13,9 @@ public class TerminusSpecificProductTypes
{FragmentationTerminus.None, new List<ProductType>() }
};

/// <summary>
/// The terminus of the peptide fragment that the product ion is generated from
/// </summary>
public static Dictionary<ProductType, FragmentationTerminus> ProductTypeToFragmentationTerminus = new Dictionary<ProductType, FragmentationTerminus>
{
{ ProductType.a, FragmentationTerminus.N },
Expand Down
38 changes: 36 additions & 2 deletions mzLib/Test/TestFragments.cs
Original file line number Diff line number Diff line change
Expand Up @@ -788,13 +788,47 @@ public static void Test_CustomDissociationType()
{
DissociationTypeCollection.ProductsFromDissociationType[DissociationType.Custom].Add(ProductType.b);
DissociationTypeCollection.ProductsFromDissociationType[DissociationType.Custom].Add(ProductType.y);
DissociationTypeCollection.ProductsFromDissociationType[DissociationType.Custom].Add(ProductType.c);
DissociationTypeCollection.ProductsFromDissociationType[DissociationType.Custom].Add(ProductType.x);
Assert.IsTrue(DissociationTypeCollection.ProductsFromDissociationType[DissociationType.Custom].Contains(ProductType.b));

var productCollection = TerminusSpecificProductTypes.ProductIonTypesFromSpecifiedTerminus[FragmentationTerminus.N].Intersect(DissociationTypeCollection.ProductsFromDissociationType[DissociationType.Custom]);
var productCollection = TerminusSpecificProductTypes
.ProductIonTypesFromSpecifiedTerminus[FragmentationTerminus.N]
.Intersect(DissociationTypeCollection.ProductsFromDissociationType[DissociationType.Custom]);
Assert.IsTrue(productCollection.Contains(ProductType.b));
Assert.IsTrue(productCollection.Contains(ProductType.c));

productCollection = TerminusSpecificProductTypes.ProductIonTypesFromSpecifiedTerminus[FragmentationTerminus.C].Intersect(DissociationTypeCollection.ProductsFromDissociationType[DissociationType.Custom]);
productCollection = TerminusSpecificProductTypes
.ProductIonTypesFromSpecifiedTerminus[FragmentationTerminus.C]
.Intersect(DissociationTypeCollection.ProductsFromDissociationType[DissociationType.Custom]);
Assert.IsTrue(productCollection.Contains(ProductType.y));
Assert.IsTrue(productCollection.Contains(ProductType.x));

var peptide = new PeptideWithSetModifications("PEPTIDE", new Dictionary<string, Modification>());
var products = new List<Product>();
peptide.Fragment(DissociationType.Custom, FragmentationTerminus.Both, products);

Assert.That(products.Any(p => p.ProductType == ProductType.b));
Assert.That(products.Any(p => p.ProductType == ProductType.y));
Assert.That(products.Any(p => p.ProductType == ProductType.c));
Assert.That(products.Any(p => p.ProductType == ProductType.x));

DissociationTypeCollection.ProductsFromDissociationType[DissociationType.Custom].Clear();
Assert.That(!DissociationTypeCollection.ProductsFromDissociationType[DissociationType.Custom].Any());
DissociationTypeCollection.ProductsFromDissociationType[DissociationType.Custom].AddRange(new List<ProductType> { ProductType.b, ProductType.y });

var secondTimeProducts = new List<Product>();
peptide.Fragment(DissociationType.Custom, FragmentationTerminus.Both, secondTimeProducts);
Assert.That(secondTimeProducts.Any(p => p.ProductType == ProductType.b));
Assert.That(secondTimeProducts.Any(p => p.ProductType == ProductType.y));
Assert.That(secondTimeProducts.All(p => p.ProductType != ProductType.c));
Assert.That(secondTimeProducts.All(p => p.ProductType != ProductType.x));

var originalOnlyby = products.Where(p => p.ProductType is ProductType.b or ProductType.y).ToList();
Assert.That(originalOnlyby.Count, Is.EqualTo(secondTimeProducts.Count));

for (int i = 0; i < secondTimeProducts.Count; i++)
Assert.That(secondTimeProducts[i].Equals(originalOnlyby[i]));
}

[Test]
Expand Down
Loading

0 comments on commit c9551e3

Please sign in to comment.