diff --git a/MetaMorpheus/EngineLayer/GlobalVariables.cs b/MetaMorpheus/EngineLayer/GlobalVariables.cs index 50bd8425ff..be7303908c 100644 --- a/MetaMorpheus/EngineLayer/GlobalVariables.cs +++ b/MetaMorpheus/EngineLayer/GlobalVariables.cs @@ -19,6 +19,7 @@ using Transcriptomics.Digestion; using UsefulProteomicsDatabases; using System.Security.Cryptography; +using Omics.Modifications.IO; namespace EngineLayer { diff --git a/MetaMorpheus/EngineLayer/Ms2ScanWithSpecificMass.cs b/MetaMorpheus/EngineLayer/Ms2ScanWithSpecificMass.cs index 32b70b377b..1e9ea710ac 100644 --- a/MetaMorpheus/EngineLayer/Ms2ScanWithSpecificMass.cs +++ b/MetaMorpheus/EngineLayer/Ms2ScanWithSpecificMass.cs @@ -26,6 +26,23 @@ public Ms2ScanWithSpecificMass(MsDataScan mzLibScan, double precursorMonoisotopi TheScan = mzLibScan; + // Build the lightweight metadata snapshot + ScanMetadata = new ScanMetadata( + OneBasedScanNumber: mzLibScan.OneBasedScanNumber, + OneBasedPrecursorScanNumber: mzLibScan.OneBasedPrecursorScanNumber, + RetentionTime: mzLibScan.RetentionTime, + NumPeaks: mzLibScan.MassSpectrum.Size, + TotalIonCurrent: mzLibScan.TotalIonCurrent, + NativeId: mzLibScan.NativeId, + FullFilePath: fullFilePath, + PrecursorCharge: precursorCharge, + PrecursorMonoisotopicPeakMz: precursorMonoisotopicPeakMz, + PrecursorMass: PrecursorMass, + PrecursorIntensity: PrecursorIntensity, + PrecursorEnvelopePeakCount: PrecursorEnvelopePeakCount, + PrecursorFractionalIntensity: PrecursorFractionalIntensity, + OneOverK0: mzLibScan is TimsDataScan tims ? tims.OneOverK0 : null); + if (commonParam.DissociationType != DissociationType.LowCID) { ExperimentalFragments = neutralExperimentalFragments ?? GetNeutralExperimentalFragments(mzLibScan, commonParam); @@ -41,6 +58,14 @@ public Ms2ScanWithSpecificMass(MsDataScan mzLibScan, double precursorMonoisotopi } public MsDataScan TheScan { get; } + + /// + /// Lightweight, immutable snapshot of scan and precursor metadata. + /// Designed to be passed to SpectralMatch so the heavyweight scan objects + /// can be released from memory after scoring. + /// + public ScanMetadata ScanMetadata { get; } + public double PrecursorMonoisotopicPeakMz { get; } public double PrecursorMass { get; } public int PrecursorCharge { get; } diff --git a/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs b/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs index c4c60e6ae5..813affa7df 100644 --- a/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs +++ b/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs @@ -1,145 +1,146 @@ -using Proteomics; +using Proteomics; +using FlashLFQ; using System.Collections.Generic; using System.IO; using System.Linq; using System.Text; using MassSpectrometry; using Omics.Modifications; -using ThermoFisher.CommonCore.Data; using Omics; +using Omics.BioPolymerGroup; +using Omics.SpectralMatch; using Transcriptomics.Digestion; +using MzLibUtil; +using Easy.Common.Extensions; namespace EngineLayer { - public class ProteinGroup + /// + /// MetaMorpheus-specific protein group, extending the generic BioPolymerGroup from mzLib. + /// Adds PEP-based scoring, SILAC-aware peptide output, and MetaMorpheus-specific TSV column names. + /// Quantification and modification occupancy are handled by the base class's SampleGroupResult system. + /// + /// Backward-compatible alias properties (e.g., Proteins, AllPeptides, ProteinGroupScore) delegate + /// to the corresponding BioPolymerGroup base properties. Consumers should gradually migrate to + /// the base class names. + /// + /// Score() is handled entirely by the base class. + /// CalculateSequenceCoverage() and GetTabSeparatedHeader() are hidden (via new) because + /// CalculateSequenceCoverage() accesses MetaMorpheus-specific SpectralMatch members + /// (GetAminoAcidCoverage, BestMatchingBioPolymersWithSetMods, FragmentCoveragePositionInPeptide) + /// that are not on the ISpectralMatch interface, and GetTabSeparatedHeader() uses + /// MetaMorpheus-specific column names and includes BestPeptidePEP. + /// + public class ProteinGroup : BioPolymerGroup { public ProteinGroup(HashSet proteins, HashSet peptides, HashSet uniquePeptides) + : base(proteins, peptides, uniquePeptides) { - Proteins = proteins; - ListOfProteinsOrderedByAccession = Proteins.OrderBy(p => p.Accession).ToList(); - ProteinGroupName = string.Join("|", ListOfProteinsOrderedByAccession.Select(p => p.Accession)); - AllPeptides = peptides; - UniquePeptides = uniquePeptides; - AllPsmsBelowOnePercentFDR = new HashSet(); + AllPsmsBelowOnePercentFDR = new HashSet(); SequenceCoverageFraction = new List(); SequenceCoverageDisplayList = new List(); SequenceCoverageDisplayListWithMods = new List(); FragmentSequenceCoverageDisplayList = new List(); - ProteinGroupScore = 0; BestPeptideScore = 0; - QValue = 0; - IsDecoy = false; - IsContaminant = false; - ModsInfo = new List(); - - // if any of the proteins in the protein group are decoys, the protein group is a decoy - foreach (var protein in proteins) - { - if (protein.IsDecoy) - { - IsDecoy = true; - break; - } - - if (protein.IsContaminant) - { - IsContaminant = true; - break; - } - } } - public bool IsDecoy { get; } + #region Backward-Compatible Property Aliases - public bool IsContaminant { get; } - - public List FilesForQuantification { get; set; } + /// Maps to . + public HashSet Proteins + { + get => BioPolymers; + set => BioPolymers = value; + } - public HashSet Proteins { get; set; } + /// Maps to . + public string ProteinGroupName => BioPolymerGroupName; - public string ProteinGroupName { get; private set; } + /// Maps to . + public double ProteinGroupScore + { + get => BioPolymerGroupScore; + set => BioPolymerGroupScore = value; + } - public double ProteinGroupScore { get; set; } + /// Maps to . + public HashSet AllPeptides + { + get => AllBioPolymersWithSetMods; + set => AllBioPolymersWithSetMods = value; + } - public HashSet AllPeptides { get; set; } + /// Maps to . + public HashSet UniquePeptides + { + get => UniqueBioPolymersWithSetMods; + set => UniqueBioPolymersWithSetMods = value; + } - public HashSet UniquePeptides { get; set; } - /// - /// Contains all PSMs associated with this protein group that pass the configured quality threshold. - /// The specific filtering criteria depends on the and threshold passed to - /// during protein scoring: - /// - /// - /// - /// PSMs where QValue ≤ threshold AND QValueNotch ≤ threshold - /// - /// - /// - /// PSMs where PEP_QValue ≤ threshold - /// - /// - /// The default threshold is 0.01 (1% FDR), but this can vary based on the filter configuration. - /// This collection is populated during - /// and is used for: - /// - /// Calculating the via the method - /// Determining , , and - /// Computing sequence coverage in - /// Reporting the number of PSMs in protein group output - /// - /// - /// - /// Note: The property name "AllPsmsBelowOnePercentFDR" is a legacy name. The actual threshold - /// used is determined by the FilterThreshold parameter passed to ProteinScoringAndFdrEngine. - /// - public HashSet AllPsmsBelowOnePercentFDR { get; set; } + /// Maps to . + public double BestPeptideScore + { + get => BestBioPolymerWithSetModsScore; + set => BestBioPolymerWithSetModsScore = value; + } - public List SequenceCoverageFraction { get; private set; } + /// Maps to . + public double BestPeptideQValue + { + get => BestBioPolymerWithSetModsQValue; + set => BestBioPolymerWithSetModsQValue = value; + } - public List SequenceCoverageDisplayList { get; private set; } + /// Maps to . + public List ListOfProteinsOrderedByAccession => ListOfBioPolymersOrderedByAccession; - public List SequenceCoverageDisplayListWithMods { get; private set; } + /// Maps to . Filtered to SpectraFileInfo. + public List FilesForQuantification + { + get => SamplesForQuantification?.OfType().ToList(); + set => SamplesForQuantification = value?.Cast().ToList(); + } - public List FragmentSequenceCoverageDisplayList { get; private set; } + /// Maps to . Keyed by SpectraFileInfo. + public Dictionary IntensitiesByFile + { + get => IntensitiesBySample?.ToDictionary(kvp => (SpectraFileInfo)kvp.Key, kvp => kvp.Value); + set => IntensitiesBySample = value?.ToDictionary(kvp => (ISampleInfo)kvp.Key, kvp => kvp.Value); + } - public double QValue { get; set; } + #endregion - public double BestPeptideQValue { get; set; } + #region MetaMorpheus-Specific Properties /// /// The minimum Posterior Error Probability (PEP) among all PSMs in . - /// Lower values indicate higher confidence that the best peptide identification is correct. - /// This value is populated during and is used - /// for protein group ranking when using PEP-based filtering (). + /// Lower values indicate higher confidence. Populated during protein FDR and used for PEP-based ranking. /// public double BestPeptidePEP { get; set; } - public double BestPeptideScore { get; set; } - - public int CumulativeTarget { get; set; } - - public int CumulativeDecoy { get; set; } - - public bool DisplayModsOnPeptides { get; set; } - - public List ModsInfo { get; private set; } - - public Dictionary IntensitiesByFile { get; set; } - - private List ListOfProteinsOrderedByAccession; + // Sequence coverage stored as flat lists (MM-specific format). + // BioPolymerGroup uses CoverageResult instead; these are kept for TSV output compatibility. + public List SequenceCoverageFraction { get; private set; } + public List SequenceCoverageDisplayList { get; private set; } + public List SequenceCoverageDisplayListWithMods { get; private set; } + public List FragmentSequenceCoverageDisplayList { get; private set; } private string UniquePeptidesOutput; private string SharedPeptidesOutput; - //Get unique and identified peptides for output - //Convert the output if it's a SILAC experiment + #endregion + + #region Peptide Output + + /// + /// Populates unique and shared peptide output strings, converting to light SILAC sequences if needed. + /// public void GetIdentifiedPeptidesOutput(List labels) { var SharedPeptides = AllPeptides.Except(UniquePeptides); if (labels == null) { - //TODO add unit test with displaymodsonpeptides if (!DisplayModsOnPeptides) { UniquePeptidesOutput = @@ -182,7 +183,15 @@ public void GetIdentifiedPeptidesOutput(List labels) } } - public string GetTabSeparatedHeader() + #endregion + + #region TSV Output (MetaMorpheus column names, base quantification format) + + /// + /// MetaMorpheus TSV header with "Protein" column names and BestPeptidePEP. + /// Quantification/occupancy columns use the base BioPolymerGroup SampleGroupResult format. + /// + public new string GetTabSeparatedHeader() { var sb = new StringBuilder(); sb.Append("Protein Accession" + '\t'); @@ -199,36 +208,21 @@ public string GetTabSeparatedHeader() sb.Append("Sequence Coverage" + '\t'); sb.Append("Sequence Coverage with Mods" + '\t'); sb.Append("Fragment Sequence Coverage" + '\t'); - sb.Append("Modification Info List" + "\t"); - if (FilesForQuantification != null) + + // Quantification and occupancy columns from base SampleGroupResult system. + // Dynamic columns appear only when SampleGroupResults has been explicitly populated + // upstream (e.g., LFQ-success path). Workflows that return early from quantification + // leave it null/empty and emit only the static columns. + if (SampleGroupResults != null) { - bool unfractionated = FilesForQuantification.Select(p => p.Fraction).Distinct().Count() == 1; - bool conditionsUndefined = FilesForQuantification.All(p => string.IsNullOrEmpty(p.Condition)); - - // this is a hacky way to test for SILAC-labeled data... - // Currently SILAC will report 1 column of intensities per label per spectra file, and is NOT summarized - // into biorep-level intensity values. the SILAC code uses the "condition" field to organize this info, - // even if the experimental design is not defined by the user. So the following bool is a way to distinguish - // between experimental design being used in SILAC automatically vs. being defined by the user - bool silacExperimentalDesign = - FilesForQuantification.Any(p => !File.Exists(p.FullFilePathWithExtension)); - - foreach (var sampleGroup in FilesForQuantification.GroupBy(p => p.Condition)) + foreach (var group in SampleGroupResults) { - foreach (var sample in sampleGroup.GroupBy(p => p.BiologicalReplicate).OrderBy(p => p.Key)) - { - if ((conditionsUndefined && unfractionated) || silacExperimentalDesign) - { - // if the data is unfractionated and the conditions haven't been defined, just use the file name as the intensity header - sb.Append("Intensity_" + sample.First().FilenameWithoutExtension + "\t"); - } - else - { - // if the data is fractionated and/or the conditions have been defined, label the header w/ the condition and biorep number - sb.Append("Intensity_" + sample.First().Condition + "_" + - (sample.First().BiologicalReplicate + 1) + "\t"); - } - } + sb.Append($"SpectralCount_{group.Label}\t"); + if (group.HasIntensityData) + sb.Append($"Intensity_{group.Label}\t"); + sb.Append($"CountOccupancy_{group.Label}\t"); + if (group.HasIntensityData) + sb.Append($"IntensityOccupancy_{group.Label}\t"); } } @@ -277,14 +271,12 @@ public override string ToString() masses.Add(new OligoWithSetMods(sequence, GlobalVariables.AllRnaModsKnownDictionary).MonoisotopicMass); else masses.Add(new Proteomics.AminoAcidPolymer.Peptide(sequence).MonoisotopicMass); - } catch (System.Exception) { masses.Add(double.NaN); } } - sb.Append(GlobalVariables.CheckLengthOfOutput(string.Join("|", masses))); sb.Append("\t"); @@ -305,31 +297,20 @@ public override string ToString() { sb.Append(GlobalVariables.CheckLengthOfOutput(SharedPeptidesOutput)); } - sb.Append("\t"); // number of peptides if (!DisplayModsOnPeptides) - { sb.Append("" + AllPeptides.Select(p => p.BaseSequence).Distinct().Count()); - } else - { sb.Append("" + AllPeptides.Select(p => p.FullSequence).Distinct().Count()); - } - sb.Append("\t"); // number of unique peptides if (!DisplayModsOnPeptides) - { sb.Append("" + UniquePeptides.Select(p => p.BaseSequence).Distinct().Count()); - } else - { sb.Append("" + UniquePeptides.Select(p => p.FullSequence).Distinct().Count()); - } - sb.Append("\t"); // sequence coverage percent @@ -349,27 +330,34 @@ public override string ToString() sb.Append(GlobalVariables.CheckLengthOfOutput(string.Join("|", FragmentSequenceCoverageDisplayList))); sb.Append("\t"); - //Detailed mods information list - sb.Append(GlobalVariables.CheckLengthOfOutput(string.Join("|", ModsInfo))); - sb.Append("\t"); - - // MS1 intensity (retrieved from FlashLFQ in the SearchTask) - if (IntensitiesByFile != null && FilesForQuantification != null) + // Quantification and occupancy from base SampleGroupResult system. + // Mirrors the header: dynamic columns appear only when SampleGroupResults has been + // populated upstream so header and rows stay consistent within a file. + if (SampleGroupResults != null) { - foreach (var sampleGroup in FilesForQuantification.GroupBy(p => p.Condition)) + bool isProteinLevel = GroupType == BioPolymerGroupType.Parent; + IEnumerable orderedKeys = isProteinLevel + ? ListOfProteinsOrderedByAccession.Select(p => p.Accession) + : AllPeptides.Select(p => p.BaseSequence).Distinct().OrderBy(s => s); + + foreach (var group in SampleGroupResults) { - foreach (var sample in sampleGroup.GroupBy(p => p.BiologicalReplicate).OrderBy(p => p.Key)) + sb.Append(group.SpectralCount); + sb.Append("\t"); + + if (group.HasIntensityData) { - // if the samples are fractionated, the protein will only have 1 intensity in the first fraction - // and the other fractions will be zero. we could find the first/only fraction with an intensity, - // but simply summing the fractions is easier than finding the single non-zero value - double summedIntensity = sample.Sum(file => IntensitiesByFile[file]); + if (group.Intensity > 0) + sb.Append(group.Intensity); + sb.Append("\t"); + } - if (summedIntensity > 0) - { - sb.Append(summedIntensity); - } + sb.Append(GlobalVariables.CheckLengthOfOutput(group.FormatOccupancy(orderedKeys, isProteinLevel, intensityBased: false))); + sb.Append("\t"); + if (group.HasIntensityData) + { + sb.Append(GlobalVariables.CheckLengthOfOutput(group.FormatOccupancy(orderedKeys, isProteinLevel, intensityBased: true))); sb.Append("\t"); } } @@ -381,18 +369,11 @@ public override string ToString() // isDecoy if (IsDecoy) - { sb.Append("D"); - } else if (IsContaminant) - { sb.Append("C"); - } else - { sb.Append("T"); - } - sb.Append("\t"); // cumulative target @@ -421,16 +402,17 @@ public override string ToString() return sb.ToString(); } - // Score() method is only used internally, to make protein grouping faster - // this is NOT an output and is NOT used for protein FDR calculations - public void Score() - { - // sum the scores of the best PSM per base sequence - ProteinGroupScore = AllPsmsBelowOnePercentFDR.GroupBy(p => p.BaseSequence) - .Select(p => p.Select(x => x.Score).Max()).Sum(); - } + #endregion - public void CalculateSequenceCoverage() + #region Coverage + + /// + /// Computes sequence coverage using MetaMorpheus-specific SpectralMatch members + /// (GetAminoAcidCoverage, BestMatchingBioPolymersWithSetMods, FragmentCoveragePositionInPeptide). + /// Results are stored in the flat list properties (SequenceCoverageFraction, etc.) + /// rather than in . + /// + public new void CalculateSequenceCoverage() { var proteinsWithUnambigSeqPsms = new Dictionary>(); var proteinsWithPsmsWithLocalizedMods = new Dictionary>(); @@ -441,14 +423,15 @@ public void CalculateSequenceCoverage() proteinsWithPsmsWithLocalizedMods.Add(protein, new List()); } - foreach (var psm in AllPsmsBelowOnePercentFDR) + foreach (var psm in AllPsmsBelowOnePercentFDR.OfType()) { // null BaseSequence means that the amino acid sequence is ambiguous; do not use these to calculate sequence coverage if (psm.BaseSequence != null) { psm.GetAminoAcidCoverage(); - foreach (var peptide in psm.BestMatchingBioPolymersWithSetMods.Select(psm => psm.SpecificBioPolymer).DistinctBy(pep => pep.FullSequence)) + foreach (var peptide in psm.BestMatchingBioPolymersWithSetMods + .Select(p => p.SpecificBioPolymer).DistinctBy(pep => pep.FullSequence)) { // might be unambiguous but also shared; make sure this protein group contains this peptide+protein combo if (Proteins.Contains(peptide.Parent)) @@ -462,7 +445,6 @@ public void CalculateSequenceCoverage() } } } - } } @@ -474,14 +456,18 @@ public void CalculateSequenceCoverage() HashSet coveredResiduesInProteinOneBased = new(); //loop through PSMs - foreach (SpectralMatch psm in AllPsmsBelowOnePercentFDR.Where(psm => psm.BaseSequence != null)) + foreach (SpectralMatch psm in AllPsmsBelowOnePercentFDR.OfType() + .Where(psm => psm.BaseSequence != null)) { //Calculate the covered bases within the psm. This is one based numbering for the peptide only psm.GetAminoAcidCoverage(); if (psm.FragmentCoveragePositionInPeptide == null) continue; + //loop through each peptide within the psm - IEnumerable pwsms = psm.BestMatchingBioPolymersWithSetMods.Select(p => p.SpecificBioPolymer) + IEnumerable pwsms = psm.BestMatchingBioPolymersWithSetMods + .Select(p => p.SpecificBioPolymer) .Where(p => p.Parent.Accession == protein.Accession); + foreach (var pwsm in pwsms) { //create a hashset to store the covered residues for the peptide, converted to the corresponding indices of the protein @@ -489,8 +475,7 @@ public void CalculateSequenceCoverage() //add the peptide start position within the protein to each covered index of the psm foreach (var position in psm.FragmentCoveragePositionInPeptide) { - coveredResiduesInPeptide.Add(position + pwsm.OneBasedStartResidue - - 1); //subtract one because these are both one based + coveredResiduesInPeptide.Add(position + pwsm.OneBasedStartResidue - 1); //subtract one because these are both one based } //Add the peptide specific positions, to the overall hashset for the protein @@ -564,135 +549,47 @@ public void CalculateSequenceCoverage() { if (mod.Value.LocationRestriction.Equals("N-terminal.")) { - sequenceCoverageDisplay = sequenceCoverageDisplay.Insert( - 0, - $"[{mod.Value.IdWithMotif}]-"); + sequenceCoverageDisplay = sequenceCoverageDisplay.Insert(0, $"[{mod.Value.IdWithMotif}]-"); } else if (mod.Value.LocationRestriction.Equals("Anywhere.")) { int modStringIndex = sequenceCoverageDisplay.Length - (protein.Length - mod.Key); - sequenceCoverageDisplay = sequenceCoverageDisplay.Insert( - modStringIndex, - $"[{mod.Value.IdWithMotif}]"); + sequenceCoverageDisplay = sequenceCoverageDisplay.Insert(modStringIndex, $"[{mod.Value.IdWithMotif}]"); } else if (mod.Value.LocationRestriction.Equals("C-terminal.")) { - sequenceCoverageDisplay = sequenceCoverageDisplay.Insert( - sequenceCoverageDisplay.Length, - $"-[{mod.Value.IdWithMotif}]"); + sequenceCoverageDisplay = sequenceCoverageDisplay.Insert(sequenceCoverageDisplay.Length, $"-[{mod.Value.IdWithMotif}]"); } } SequenceCoverageDisplayListWithMods.Add(sequenceCoverageDisplay); - - if (!modsOnThisProtein.Any()) - { - continue; - } - - // calculate spectral count % of modified observations - var pepModTotals = new List(); // count of modified peptides for each mod/index - var pepTotals = new List(); // count of all peptides for each mod/index - var modIndex = new List<(int index, string modName)>(); // index and name of the modified position - - foreach (var pep in proteinsWithPsmsWithLocalizedMods[protein]) - { - foreach (var mod in pep.AllModsOneIsNterminus) - { - int pepNumTotal = 0; //For one mod, The total Pep Num - - if (mod.Value.ModificationType.Contains("Common Variable") - || mod.Value.ModificationType.Contains("Common Fixed") - || mod.Value.LocationRestriction.Equals(ModLocationOnPeptideOrProtein.PepC) - || mod.Value.LocationRestriction.Equals(ModLocationOnPeptideOrProtein.NPep)) - { - continue; - } - - int indexInProtein; - if (mod.Value.LocationRestriction.Equals("N-terminal.")) - { - indexInProtein = 1; - } - else if (mod.Value.LocationRestriction.Equals("Anywhere.")) - { - indexInProtein = pep.OneBasedStartResidue + mod.Key - 2; - } - else if (mod.Value.LocationRestriction.Equals("C-terminal.")) - { - indexInProtein = protein.Length; - } - else - { - // In case it's a peptide terminal mod, skip! - // we don't want this annotated in the protein's modifications - continue; - } - - var modKey = (indexInProtein, mod.Value.IdWithMotif); - if (modIndex.Contains(modKey)) - { - pepModTotals[modIndex.IndexOf(modKey)] += 1; - } - else - { - modIndex.Add(modKey); - foreach (var pept in proteinsWithPsmsWithLocalizedMods[protein]) - { - if (indexInProtein >= pept.OneBasedStartResidue - (indexInProtein == 1 ? 1 : 0) - && indexInProtein <= pept.OneBasedEndResidue) - { - pepNumTotal += 1; - } - } - - pepTotals.Add(pepNumTotal); - pepModTotals.Add(1); - } - } - } - - var modStrings = new List<(int aaNum, string part)>(); - for (int i = 0; i < pepModTotals.Count; i++) - { - string aa = modIndex[i].index.ToString(); - string modName = modIndex[i].modName.ToString(); - string occupancy = ((double)pepModTotals[i] / (double)pepTotals[i]).ToString("F2"); - string fractOccupancy = $"{pepModTotals[i].ToString()}/{pepTotals[i].ToString()}"; - string tempString = ($"#aa{aa}[{modName},info:occupancy={occupancy}({fractOccupancy})]"); - modStrings.Add((modIndex[i].index, tempString)); - } - - var modInfoString = string.Join(";", modStrings.OrderBy(x => x.aaNum).Select(x => x.part)); - - if (!string.IsNullOrEmpty(modInfoString)) - { - ModsInfo.Add(modInfoString); - } } } - public void MergeProteinGroupWith(ProteinGroup other) - { - this.Proteins.UnionWith(other.Proteins); - this.AllPeptides.UnionWith(other.AllPeptides); - this.UniquePeptides.UnionWith(other.UniquePeptides); - this.AllPsmsBelowOnePercentFDR.UnionWith(other.AllPsmsBelowOnePercentFDR); - other.ProteinGroupScore = 0; + #endregion - ListOfProteinsOrderedByAccession = Proteins.OrderBy(p => p.Accession).ToList(); + #region Merge and Subset - ProteinGroupName = string.Join("|", ListOfProteinsOrderedByAccession.Select(p => p.Accession)); + /// + /// Merges another ProteinGroup into this one. Delegates entirely to + /// which handles PSMs, biopolymers, peptides, and name. + /// + public void MergeProteinGroupWith(ProteinGroup other) + { + base.MergeWith(other); } + /// + /// Creates a ProteinGroup subset containing only data from the specified spectra file. + /// public ProteinGroup ConstructSubsetProteinGroup(string fullFilePath, List silacLabels = null) { var allPsmsForThisFile = - new HashSet( + new HashSet( AllPsmsBelowOnePercentFDR.Where(p => p.FullFilePath.Equals(fullFilePath))); var allPeptidesForThisFile = new HashSet( - allPsmsForThisFile.SelectMany(p => p.BestMatchingBioPolymersWithSetMods.Select(v => v.SpecificBioPolymer))); + allPsmsForThisFile.SelectMany(p => p.GetIdentifiedBioPolymersWithSetMods())); var allUniquePeptidesForThisFile = new HashSet(UniquePeptides.Intersect(allPeptidesForThisFile)); @@ -708,7 +605,7 @@ public ProteinGroup ConstructSubsetProteinGroup(string fullFilePath, List p.FullFilePathWithExtension == fullFilePath) .FirstOrDefault(); //check that file name wasn't changed (can occur in SILAC searches) - if (!silacLabels.IsNullOrEmpty() && spectraFileInfo == null) + if (!MzLibUtil.ClassExtensions.IsNullOrEmpty(silacLabels) && spectraFileInfo == null) { foreach (SilacLabel label in silacLabels) { @@ -736,23 +633,32 @@ public ProteinGroup ConstructSubsetProteinGroup(string fullFilePath, List { spectraFileInfo }; + if (spectraFileInfo != null) + { + subsetPg.FilesForQuantification = new List { spectraFileInfo }; + } } - if (IntensitiesByFile == null) + if (IntensitiesByFile == null || spectraFileInfo == null) { subsetPg.IntensitiesByFile = null; } else { subsetPg.IntensitiesByFile = new Dictionary - { { spectraFileInfo, IntensitiesByFile[spectraFileInfo] } }; + { { spectraFileInfo, IntensitiesByFile.GetValueOrDefault(spectraFileInfo, 0) } }; } return subsetPg; } - //method only considers accessions, not peptides + #endregion + + #region Equality + + /// + /// Compares by ordered accession list. + /// public bool Equals(ProteinGroup grp) { //Check for null and compare run-time types. @@ -767,5 +673,7 @@ public bool Equals(ProteinGroup grp) return true; } + + #endregion } -} \ No newline at end of file +} diff --git a/MetaMorpheus/EngineLayer/ProteinScoringAndFdr/ProteinScoringAndFdrEngine.cs b/MetaMorpheus/EngineLayer/ProteinScoringAndFdr/ProteinScoringAndFdrEngine.cs index 141cafcfb8..c098df986d 100644 --- a/MetaMorpheus/EngineLayer/ProteinScoringAndFdr/ProteinScoringAndFdrEngine.cs +++ b/MetaMorpheus/EngineLayer/ProteinScoringAndFdr/ProteinScoringAndFdrEngine.cs @@ -165,8 +165,8 @@ private List DoProteinFdr(List proteinGroups) } pg.BestPeptideScore = pg.AllPsmsBelowOnePercentFDR.Max(psm => psm.Score); - pg.BestPeptideQValue = pg.AllPsmsBelowOnePercentFDR.Min(psm => psm.FdrInfo.QValueNotch); - pg.BestPeptidePEP = pg.AllPsmsBelowOnePercentFDR.Min(psm => psm.FdrInfo.PEP); + pg.BestPeptideQValue = pg.AllPsmsBelowOnePercentFDR.OfType().Min(psm => psm.FdrInfo.QValueNotch); + pg.BestPeptidePEP = pg.AllPsmsBelowOnePercentFDR.OfType().Min(psm => psm.FdrInfo.PEP); } // pick the best for each paired accession based on filter type diff --git a/MetaMorpheus/EngineLayer/Silac/SilacConversions.cs b/MetaMorpheus/EngineLayer/Silac/SilacConversions.cs index c4d7e2f962..ddb97dcf0b 100644 --- a/MetaMorpheus/EngineLayer/Silac/SilacConversions.cs +++ b/MetaMorpheus/EngineLayer/Silac/SilacConversions.cs @@ -8,6 +8,8 @@ using Omics.Modifications; using Omics.Digestion; using EngineLayer.SpectrumMatch; +using Easy.Common.Extensions; +using MzLibUtil; using MassSpectrometry; namespace EngineLayer @@ -453,26 +455,30 @@ public static void SilacConversionsPostQuantification(List allSilacL flashLfqResults.CalculateProteinResultsMedianPolish(true); //update proteingroups to have all files for quantification + // Modification occupancy is now computed by BioPolymerGroup.PopulateSampleGroupResults() if (proteinGroups != null) { List allInfo = originalToLabeledFileInfoDictionary.SelectMany(x => x.Value).ToList(); foreach (ProteinGroup proteinGroup in proteinGroups) { proteinGroup.FilesForQuantification = allInfo; - proteinGroup.IntensitiesByFile = new Dictionary(); + // Build the dictionary locally, then assign in one shot. + // The IntensitiesByFile getter returns a copy, so .Add() on it would be lost. + var intensities = new Dictionary(); foreach (var spectraFile in allInfo) { if (flashLfqResults.ProteinGroups.TryGetValue(proteinGroup.ProteinGroupName, out var flashLfqProteinGroup)) { - proteinGroup.IntensitiesByFile.Add(spectraFile, flashLfqProteinGroup.GetIntensity(spectraFile)); + intensities.Add(spectraFile, flashLfqProteinGroup.GetIntensity(spectraFile)); } else { //needed for decoys/contaminants/proteins that aren't quantified - proteinGroup.IntensitiesByFile.Add(spectraFile, 0); + intensities.Add(spectraFile, 0); } } + proteinGroup.IntensitiesByFile = intensities; } } diff --git a/MetaMorpheus/EngineLayer/SpectralMatch.cs b/MetaMorpheus/EngineLayer/SpectralMatch.cs index f8dc74e1e6..d132c8313a 100644 --- a/MetaMorpheus/EngineLayer/SpectralMatch.cs +++ b/MetaMorpheus/EngineLayer/SpectralMatch.cs @@ -7,6 +7,7 @@ using System.Collections.Generic; using System.Linq; using Omics; +using Omics.SpectralMatch; using System; using Omics.Digestion; using EngineLayer.CrosslinkSearch; @@ -14,7 +15,7 @@ namespace EngineLayer { - public abstract class SpectralMatch : IComparable + public abstract class SpectralMatch : ISpectralMatch, IComparable { public const double ToleranceForScoreDifferentiation = 1e-9; @@ -22,34 +23,13 @@ protected SpectralMatch(IBioPolymerWithSetMods peptide, int notch, double score, { _BestMatchingBioPolymersWithSetMods = new List(); ScanIndex = scanIndex; - FullFilePath = scan.FullFilePath; - ScanNumber = scan.OneBasedScanNumber; - PrecursorScanNumber = scan.OneBasedPrecursorScanNumber; - ScanRetentionTime = scan.RetentionTime; - ScanExperimentalPeaks = scan.NumPeaks; - PrecursorScanIntensity = scan.PrecursorIntensity; - TotalIonCurrent = scan.TotalIonCurrent; - ScanPrecursorCharge = scan.PrecursorCharge; - ScanPrecursorMonoisotopicPeakMz = scan.PrecursorMonoisotopicPeakMz; - ScanPrecursorMass = scan.PrecursorMass; - PrecursorScanEnvelopePeakCount = scan.PrecursorEnvelopePeakCount; - PrecursorFractionalIntensity = scan.PrecursorFractionalIntensity; + ScanMetadata = scan.ScanMetadata; PrecursorScanDeconvolutionScore = scan.PrecursorDeconvolutionScore; DigestionParams = commonParameters.DigestionParams; - NativeId = scan.NativeId; RunnerUpScore = commonParameters.ScoreCutoff; SpectralAngle = -1; IsobaricMassTagReporterIonIntensities = scan.IsobaricMassTagReporterIonIntensities; - if (scan.TheScan is TimsDataScan timsScan) - { - ScanOneOverK0 = timsScan.OneOverK0; - } - else - { - ScanOneOverK0 = null; // this is only used for ion mobility data, so it can be null - } - AddOrReplace(peptide, score, notch, true, matchedFragmentIons); } @@ -69,25 +49,80 @@ protected SpectralMatch(IBioPolymerWithSetMods peptide, int notch, double score, public int PsmCount { get; internal set; } public Dictionary ModsIdentified { get; private set; } // these should never be null under normal circumstances public List LocalizedScores { get; internal set; } - public int ScanNumber { get; } - public int? PrecursorScanNumber { get; } - public double ScanRetentionTime { get; } - public int ScanExperimentalPeaks { get; } - public double TotalIonCurrent { get; } - public int ScanPrecursorCharge { get; } - public double ScanPrecursorMonoisotopicPeakMz { get; } - public double PrecursorScanIntensity { get; } - public int PrecursorScanEnvelopePeakCount { get; } - public double PrecursorFractionalIntensity { get; } + + #region Scan metadata — delegated to ScanMetadata record + + /// + /// Lightweight, immutable snapshot of scan and precursor metadata. + /// Replaces the individual scan-derived fields that were previously unpacked + /// from Ms2ScanWithSpecificMass during construction. + /// + public ScanMetadata ScanMetadata { get; } + + // Pass-through properties for backwards compatibility. + // Callers can be migrated to ScanMetadata.* over time. + public int ScanNumber => ScanMetadata.OneBasedScanNumber; + public int? PrecursorScanNumber => ScanMetadata.OneBasedPrecursorScanNumber; + public double ScanRetentionTime => ScanMetadata.RetentionTime; + public int ScanExperimentalPeaks => ScanMetadata.NumPeaks; + public double TotalIonCurrent => ScanMetadata.TotalIonCurrent; + public int ScanPrecursorCharge => ScanMetadata.PrecursorCharge; + public double ScanPrecursorMonoisotopicPeakMz => ScanMetadata.PrecursorMonoisotopicPeakMz; + public double PrecursorScanIntensity => ScanMetadata.PrecursorIntensity; + public int PrecursorScanEnvelopePeakCount => ScanMetadata.PrecursorEnvelopePeakCount; + public double PrecursorFractionalIntensity => ScanMetadata.PrecursorFractionalIntensity; + public double ScanPrecursorMass => ScanMetadata.PrecursorMass; + public double? ScanOneOverK0 => ScanMetadata.OneOverK0; + public string FullFilePath => ScanMetadata.FullFilePath; + public string NativeId => ScanMetadata.NativeId; + /// /// Method-agnostic envelope-quality score in [0, 1] from mzLib's DeconvolutionScorer for /// the precursor envelope of this PSM. 0 indicates no envelope was computed or the /// envelope is maximally bad; higher = better-shaped Averagine match. /// public double PrecursorScanDeconvolutionScore { get; } - public double ScanPrecursorMass { get; } - public double? ScanOneOverK0 { get; set; } // this is only used for ion mobility data, so it can be null - public string FullFilePath { get; private set; } + + #endregion + + #region ISpectralMatch explicit interface implementations + + /// Maps to for ISpectralMatch compatibility. + int ISpectralMatch.OneBasedScanNumber => ScanNumber; + + /// + /// Consolidates quantification intensities for ISpectralMatch compatibility. + /// Returns ReporterIonIntensities if available (isobaric), a singleton array of + /// PrecursorScanIntensity for LFQ, or null if neither is populated. + /// + double[]? ISpectralMatch.Intensities => + IsobaricMassTagReporterIonIntensities ?? + (PrecursorScanIntensity > 0 ? new[] { PrecursorScanIntensity } : null); + + /// + /// Returns the identified biopolymers (peptides/proteoforms) for ISpectralMatch compatibility. + /// Unwraps SpectralMatchHypothesis to the underlying IBioPolymerWithSetMods. + /// + public IEnumerable GetIdentifiedBioPolymersWithSetMods() => + BestMatchingBioPolymersWithSetMods.Select(h => h.SpecificBioPolymer); + + public int CompareTo(ISpectralMatch? other) + { + if (other is null) return 1; + if (other is SpectralMatch mm) return CompareTo(mm); + // Fallback: compare by score descending + return Score.CompareTo(other.Score); + } + + public bool Equals(ISpectralMatch? other) + { + if (other is null) return false; + return FullFilePath == other.FullFilePath + && ScanNumber == other.OneBasedScanNumber + && FullSequence == other.FullSequence; + } + + #endregion /// /// Refers to the index of the Ms2ScanWithSpecificMass in an array of Ms2ScansWithSpecificMass that is sorted by precursor mass /// @@ -110,7 +145,6 @@ public FdrInfo GetFdrInfo(bool peptideLevel) public double Score { get; private set; } public double SpectralAngle { get; set; } - public string NativeId; // this is a property of the scan. used for mzID writing public double DeltaScore { get { return (Score - RunnerUpScore); } } @@ -165,8 +199,9 @@ public void SetMs2Scan(MsDataScan scan) protected List _BestMatchingBioPolymersWithSetMods; /// - /// An array containing the intensities of the reporter ions for isobaric mass tags. - /// If multiplex quantification wasn't performed, this will be null + /// An array containing the intensities of the reporter ions for isobaric mass tags (TMT, iTRAQ, diLeu, etc.). + /// Null if multiplex quantification wasn't performed. + /// Array order matches the reporter ion order defined by the mass tag modification. /// public double[]? IsobaricMassTagReporterIonIntensities { get; private set; } @@ -388,6 +423,12 @@ protected SpectralMatch(SpectralMatch psm, List bestMat BaseSequence = PsmTsvWriter.Resolve(bestMatchingPeptides.Select(b => b.SpecificBioPolymer.BaseSequence)).ResolvedValue; FullSequence = PsmTsvWriter.Resolve(bestMatchingPeptides.Select(b => b.SpecificBioPolymer.FullSequence)).ResolvedValue; + // Scan metadata is an immutable record — safe to share the reference + ScanMetadata = psm.ScanMetadata; + ScanIndex = psm.ScanIndex; + PrecursorScanDeconvolutionScore = psm.PrecursorScanDeconvolutionScore; + IsobaricMassTagReporterIonIntensities = psm.IsobaricMassTagReporterIonIntensities; + ModsChemicalFormula = psm.ModsChemicalFormula; Notch = psm.Notch; BioPolymerWithSetModsLength = psm.BioPolymerWithSetModsLength; @@ -401,17 +442,8 @@ protected SpectralMatch(SpectralMatch psm, List bestMat PsmCount = psm.PsmCount; ModsIdentified = psm.ModsIdentified; LocalizedScores = psm.LocalizedScores; - ScanNumber = psm.ScanNumber; - PrecursorScanNumber = psm.PrecursorScanNumber; - ScanRetentionTime = psm.ScanRetentionTime; - ScanExperimentalPeaks = psm.ScanExperimentalPeaks; - TotalIonCurrent = psm.TotalIonCurrent; - ScanPrecursorCharge = psm.ScanPrecursorCharge; - ScanPrecursorMonoisotopicPeakMz = psm.ScanPrecursorMonoisotopicPeakMz; - ScanPrecursorMass = psm.ScanPrecursorMass; - FullFilePath = psm.FullFilePath; - ScanIndex = psm.ScanIndex; FdrInfo = psm.FdrInfo; + PeptideFdrInfo = psm.PeptideFdrInfo; Score = psm.Score; RunnerUpScore = psm.RunnerUpScore; IsDecoy = psm.IsDecoy; @@ -612,4 +644,4 @@ public int CompareTo(SpectralMatch otherPsm) } } -} \ No newline at end of file +} diff --git a/MetaMorpheus/EngineLayer/Util/IsobaricMassTag.cs b/MetaMorpheus/EngineLayer/Util/IsobaricMassTag.cs index 12ff97e230..66d36edc16 100644 --- a/MetaMorpheus/EngineLayer/Util/IsobaricMassTag.cs +++ b/MetaMorpheus/EngineLayer/Util/IsobaricMassTag.cs @@ -24,7 +24,7 @@ public enum IsobaricMassTagType /// This class contains information about the Isobaric Mass Tag (e.g., TMT), including the theoretical m/z values of the reporter ions /// as well as methods designed to retrieve the intensities of those reporter ions from a given MzSpectrum. /// It does not store any intensity information itself. Intensity information is associated with each Ms2ScanWithSpecificMass object or SpectralMatch, - /// in the IsobaricMassTagReporterIonIntensities property. + /// in the IsobaricMassTagReporterIonIntensities property (Ms2ScanWithSpecificMass) or ReporterIonIntensities property (SpectralMatch). /// public class IsobaricMassTag { diff --git a/MetaMorpheus/GuiFunctions/MetaDraw/FragmentResearching/FragmentationReanalysisViewModel.cs b/MetaMorpheus/GuiFunctions/MetaDraw/FragmentResearching/FragmentationReanalysisViewModel.cs index 4bd98670f4..e9543a031c 100644 --- a/MetaMorpheus/GuiFunctions/MetaDraw/FragmentResearching/FragmentationReanalysisViewModel.cs +++ b/MetaMorpheus/GuiFunctions/MetaDraw/FragmentResearching/FragmentationReanalysisViewModel.cs @@ -6,6 +6,7 @@ using System.Windows; using Easy.Common.Extensions; using EngineLayer; +using GuiFunctions.Util; using iText.StyledXmlParser.Jsoup; using MassSpectrometry; using MzLibUtil; @@ -26,12 +27,13 @@ public class FragmentationReanalysisViewModel : BaseViewModel { private readonly bool _isProtein; private static readonly object _fragmentationLock = new(); + private static readonly object _productsLock = new(); public FragmentationReanalysisViewModel(bool isProtein = true) { _isProtein = isProtein; ProductIonMassTolerance = 20; - PossibleProducts = [.. GetPossibleProducts()]; + PossibleProducts = new ThreadSafeObservableCollection(GetPossibleProducts()); IEnumerable values; CommonParameters common; @@ -77,15 +79,13 @@ public void LoadFragmentationParameters(CommonParameters common, SearchParameter FragmentationParamsViewModel = new(common, search); } - private ObservableCollection _possibleProducts; - public ObservableCollection PossibleProducts + private ThreadSafeObservableCollection _possibleProducts; + public ThreadSafeObservableCollection PossibleProducts { get => _possibleProducts; set { _possibleProducts = value; OnPropertyChanged(nameof(PossibleProducts)); } } - private IEnumerable _productsToUse => PossibleProducts.Where(p => p.Use).Select(p => p.ProductType); - private bool _persist; public bool Persist { @@ -249,7 +249,7 @@ public List MatchIonsWithNewTypes(MsDataScan ms2Scan, Spectr List internalProducts = new List(); // Snapshot products before acquiring lock to avoid enumerating collection while it may be modified by UI thread - var productsSnapshot = _productsToUse.ToList(); + var productsSnapshot = GetProductsSnapshot(); // Lock to ensure thread-safe mutation of static DissociationTypeCollection dictionary lock (_fragmentationLock) { @@ -298,6 +298,14 @@ public List MatchIonsWithNewTypes(MsDataScan ms2Scan, Spectr .ToList(); } + private List GetProductsSnapshot() + { + lock (_productsLock) + { + return PossibleProducts.Where(p => p.Use).Select(p => p.ProductType).ToList(); + } + } + public static readonly IEqualityComparer MatchedFragmentIonComparer = new MatchedFragmentIonEqualityComparer(); public class MatchedFragmentIonEqualityComparer : IEqualityComparer diff --git a/MetaMorpheus/GuiFunctions/Util/ThreadSafeObservableCollection.cs b/MetaMorpheus/GuiFunctions/Util/ThreadSafeObservableCollection.cs new file mode 100644 index 0000000000..581a1ab331 --- /dev/null +++ b/MetaMorpheus/GuiFunctions/Util/ThreadSafeObservableCollection.cs @@ -0,0 +1,78 @@ +using System.Collections; +using System.Collections.Generic; +using System.Collections.ObjectModel; +using System.Collections.Specialized; +using System.Linq; +using System.Windows; +using System.Windows.Threading; + +namespace GuiFunctions.Util; + +/// +/// An ObservableCollection that is safe for concurrent reads and writes. +/// All mutations are serialized with a lock, and GetEnumerator returns a +/// snapshot so callers never see a "collection was modified" exception. +/// CollectionChanged notifications are automatically marshalled to the UI +/// dispatcher when raised from a background thread. +/// +public class ThreadSafeObservableCollection : ObservableCollection, IEnumerable, IEnumerable +{ + private readonly object _lock = new(); + + public ThreadSafeObservableCollection() { } + + public ThreadSafeObservableCollection(IEnumerable collection) : base(collection) { } + + // ── Mutations ──────────────────────────────────────────────────────────── + + protected override void InsertItem(int index, T item) + { + lock (_lock) base.InsertItem(index, item); + } + + protected override void RemoveItem(int index) + { + lock (_lock) base.RemoveItem(index); + } + + protected override void SetItem(int index, T item) + { + lock (_lock) base.SetItem(index, item); + } + + protected override void MoveItem(int oldIndex, int newIndex) + { + lock (_lock) base.MoveItem(oldIndex, newIndex); + } + + protected override void ClearItems() + { + lock (_lock) base.ClearItems(); + } + + // ── Snapshot enumeration ───────────────────────────────────────────────── + // `new` hides Collection.GetEnumerator so plain foreach on the concrete type also gets + // the snapshot. Callers that statically type as the base ObservableCollection/Collection + // will still see the live enumerator and can hit InvalidOperationException under mutation. + + public new IEnumerator GetEnumerator() + { + T[] snapshot; + lock (_lock) + snapshot = Items.ToArray(); // Items is the protected List from Collection + return ((IEnumerable)snapshot).GetEnumerator(); + } + + IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); + + // ── UI-thread marshalling ───────────────────────────────────────────────── + + protected override void OnCollectionChanged(NotifyCollectionChangedEventArgs e) + { + Dispatcher dispatcher = Application.Current?.Dispatcher; + if (dispatcher != null && !dispatcher.CheckAccess()) + dispatcher.Invoke(() => base.OnCollectionChanged(e)); + else + base.OnCollectionChanged(e); + } +} diff --git a/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs b/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs index 7082ae991e..6af74ec6d9 100644 --- a/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs +++ b/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs @@ -73,6 +73,7 @@ public MyTaskResults Run(string OutputFolder, List dbFilenameList, Li if (Parameters.GlycoSearchParameters.DoParsimony) { GlycoProteinAnalysis(fspList, individualFileFolderPath, individualFileFolder); //Creat the proteinGroups file + WriteProteinResults(individualFileFolderPath, individualFileFolder); } foreach (GlycoSpectralMatch gsm in fspList) //maybe this needs to be the filterd list??? @@ -158,6 +159,11 @@ public MyTaskResults Run(string OutputFolder, List dbFilenameList, Li QuantificationAnalysis(); WriteQuantificationResults(); + if (glycoSearchParameters.DoParsimony) + { + WriteProteinResults(OutputFolder, null, MyTaskResults); + } + if (Parameters.GlycoSearchParameters.WritePrunedDataBase) { WritePrunedDatabase(Parameters.AllPsms.Cast().ToList(), Parameters.BioPolymerList.Cast().ToList(), Parameters.GlycoSearchParameters.ModsToWriteSelection, Parameters.DatabaseFilenameList, Parameters.OutputFolder, Parameters.SearchTaskId); @@ -319,7 +325,6 @@ private void GlycoProteinAnalysis(List gsms, string outputFo ProteinGroups = proteinScoringAndFdrResults.SortedAndScoredProteinGroups; Status("Done constructing protein groups!", Parameters.SearchTaskId); - WriteProteinResults(outputFolder, individualFileFolder, myTaskResults); } private void GlycoAccessionAnalysis(List gsms, string individualFileFolderPath, string individualFileFolder = null) @@ -457,7 +462,8 @@ private void QuantificationAnalysis() string.Join("|", proteinsOrderedByAccession.Select(p => p.GeneNames.Select(x => x.Item2).FirstOrDefault())), string.Join("|", proteinsOrderedByAccession.Select(p => p.Organism).Distinct())); - foreach (var psm in proteinGroup.AllPsmsBelowOnePercentFDR.Where(v => v.FullSequence != null)) + foreach (var psm in proteinGroup.AllPsmsBelowOnePercentFDR.OfType() + .Where(v => v.FullSequence != null)) { if (psmToProteinGroups.TryGetValue(psm, out var flashLfqProteinGroups)) { @@ -520,7 +526,8 @@ private void QuantificationAnalysis() var flashLFQIdentifications = new List(); foreach (var spectraFile in psmsGroupedByFile) { - var rawfileinfo = spectraFileInfo.Where(p => p.FullFilePathWithExtension.Equals(spectraFile.Key)).First(); + var rawfileinfo = spectraFileInfo.FirstOrDefault(p => p.FullFilePathWithExtension.Equals(spectraFile.Key)); + if (rawfileinfo == null) continue; foreach (var psm in spectraFile) { @@ -544,25 +551,30 @@ private void QuantificationAnalysis() Parameters.FlashLfqResults = FlashLfqEngine.Run(); } - // get protein intensity back from FlashLFQ - if (ProteinGroups != null && Parameters.FlashLfqResults != null) + // Propagate quantification data to protein groups + if (ProteinGroups != null) { foreach (var proteinGroup in ProteinGroups) { proteinGroup.FilesForQuantification = spectraFileInfo; - proteinGroup.IntensitiesByFile = new Dictionary(); - foreach (var spectraFile in proteinGroup.FilesForQuantification) + if (Parameters.FlashLfqResults != null) { - if (Parameters.FlashLfqResults.ProteinGroups.TryGetValue(proteinGroup.ProteinGroupName, out var flashLfqProteinGroup)) - { - proteinGroup.IntensitiesByFile.Add(spectraFile, flashLfqProteinGroup.GetIntensity(spectraFile)); - } - else + var intensities = new Dictionary(); + foreach (var spectraFile in spectraFileInfo) { - proteinGroup.IntensitiesByFile.Add(spectraFile, 0); + intensities.Add(spectraFile, + Parameters.FlashLfqResults.ProteinGroups.TryGetValue(proteinGroup.ProteinGroupName, out var flashLfqProteinGroup) + ? flashLfqProteinGroup.GetIntensity(spectraFile) + : 0); } + proteinGroup.IntensitiesByFile = intensities; } + + // Populate SampleGroupResults from the shared spectraFileInfo so + // every PG carries the same dynamic-column schema. Without this, the writer + // would have no way to produce uniform headers/rows. + proteinGroup.PopulateSampleGroupResults(); } } } @@ -590,11 +602,8 @@ private void WriteQuantificationResults() file.Key.FilenameWithoutExtension + "_QuantifiedPeaks", new List { Parameters.SearchTaskId, "IndividualFileResults", file.Key.FullFilePathWithExtension }); WritePeptideQuantificationResultsToTsv(Parameters.FlashLfqResults, Path.Combine(Parameters.IndividualResultsOutputFolder, file.Key.FilenameWithoutExtension), file.Key.FilenameWithoutExtension + "_QuantifiedPeptides", new List { Parameters.SearchTaskId, "IndividualFileResults", file.Key.FullFilePathWithExtension }); - if (true) - { - WriteProteinQuantificationResultsToTsv(Parameters.FlashLfqResults, Path.Combine(Parameters.IndividualResultsOutputFolder, file.Key.FilenameWithoutExtension), - file.Key.FilenameWithoutExtension + "_QuantifiedProteins", new List { Parameters.SearchTaskId, "IndividualFileResults", file.Key.FullFilePathWithExtension }); - } + WriteProteinQuantificationResultsToTsv(Parameters.FlashLfqResults, Path.Combine(Parameters.IndividualResultsOutputFolder, file.Key.FilenameWithoutExtension), + file.Key.FilenameWithoutExtension + "_QuantifiedProteins", new List { Parameters.SearchTaskId, "IndividualFileResults", file.Key.FullFilePathWithExtension }); } } } diff --git a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs index 6f4de70728..a8ccd243ce 100644 --- a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs +++ b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs @@ -16,7 +16,6 @@ using System.IO.Compression; using System.Linq; using System.Text; -using Chemistry; using EngineLayer.DatabaseLoading; using MzLibUtil; using Omics.Digestion; @@ -39,7 +38,7 @@ public class PostSearchAnalysisTask : MetaMorpheusTask /// /// Used for storage of results for writing to Results.tsv. It is explained in the method ConstructResultsDictionary() /// - private Dictionary<(string,string),string> ResultsDictionary { get; set; } + private Dictionary<(string, string), string> ResultsDictionary { get; set; } /// /// Used for storage of results for writing digestion product counts to a .tsv. /// @@ -364,7 +363,8 @@ private void QuantificationAnalysis() string.Join("|", proteinsOrderedByAccession.Select(p => p.GeneNames.Select(x => x.Item2).FirstOrDefault())), string.Join("|", proteinsOrderedByAccession.Select(p => p.Organism).Distinct())); - foreach (var psm in proteinGroup.AllPsmsBelowOnePercentFDR.Where(v => v.FullSequence != null)) + foreach (var psm in proteinGroup.AllPsmsBelowOnePercentFDR.OfType() + .Where(v => v.FullSequence != null)) { if (psmToProteinGroups.TryGetValue(psm, out var flashLfqProteinGroups)) { @@ -537,7 +537,6 @@ private void QuantificationAnalysis() var undefinedPg = new ProteinGroup("UNDEFINED", "", ""); //sort the unambiguous psms by protease to make MBR compatible with multiple proteases Dictionary> proteaseSortedPsms = new Dictionary>(); - Dictionary proteaseSortedFlashLFQResults = new Dictionary(); foreach (IDigestionParams dp in Parameters.ListOfDigestionParams) { @@ -560,18 +559,19 @@ private void QuantificationAnalysis() var flashLFQIdentifications = new List(); foreach (var spectraFile in psmsGroupedByFile) { - var rawfileinfo = spectraFileInfo.First(p => p.FullFilePathWithExtension.Equals(spectraFile.Key)); + var rawfileinfo = spectraFileInfo.FirstOrDefault(p => p.FullFilePathWithExtension.Equals(spectraFile.Key)); + if (rawfileinfo == null) continue; foreach (var psm in spectraFile) { flashLFQIdentifications.Add( new Identification( fileInfo: rawfileinfo, - psm.BaseSequence, + psm.BaseSequence, psm.FullSequence, - psm.BioPolymerWithSetModsMonoisotopicMass.Value, - psm.ScanRetentionTime, - psm.ScanPrecursorCharge, + psm.BioPolymerWithSetModsMonoisotopicMass.Value, + psm.ScanRetentionTime, + psm.ScanPrecursorCharge, psmToProteinGroups[psm], psmScore: psm.Score, qValue: psmsForQuantification.FilterType == FilterType.QValue ? psm.FdrInfo.QValue : psm.FdrInfo.PEP_QValue, @@ -597,25 +597,30 @@ private void QuantificationAnalysis() Parameters.FlashLfqResults = flashLfqEngine.Run(); } - // get protein intensity back from FlashLFQ - if (ProteinGroups != null && Parameters.FlashLfqResults != null) + // Propagate quantification data to protein groups so that PopulateSampleGroupResults() + // has the per-file context it needs to produce spectral-count and intensity-based occupancy columns. + // + // FilesForQuantification is always assigned once spectraFileInfo is available so that + // count-based occupancy is written even when FlashLFQ produced no peaks (e.g., when + // flashLFQIdentifications is empty and FlashLfqResults remains null). + // IntensitiesByFile is always assigned (with zeros if FlashLFQ produced no results) + // so that HasIntensityData is true and intensity-based occupancy columns are always written. + if (ProteinGroups != null) { foreach (var proteinGroup in ProteinGroups) { proteinGroup.FilesForQuantification = spectraFileInfo; - proteinGroup.IntensitiesByFile = new Dictionary(); - foreach (var spectraFile in proteinGroup.FilesForQuantification) + // Build the dictionary locally, then assign in one shot. + var intensities = new Dictionary(); + foreach (var spectraFile in spectraFileInfo) { - if (Parameters.FlashLfqResults.ProteinGroups.TryGetValue(proteinGroup.ProteinGroupName, out var flashLfqProteinGroup)) - { - proteinGroup.IntensitiesByFile.Add(spectraFile, flashLfqProteinGroup.GetIntensity(spectraFile)); - } - else - { - proteinGroup.IntensitiesByFile.Add(spectraFile, 0); - } + intensities.Add(spectraFile, + Parameters.FlashLfqResults?.ProteinGroups.TryGetValue(proteinGroup.ProteinGroupName, out var flashLfqProteinGroup) == true + ? flashLfqProteinGroup.GetIntensity(spectraFile) + : 0); } + proteinGroup.IntensitiesByFile = intensities; } } @@ -625,6 +630,16 @@ private void QuantificationAnalysis() SilacConversions.SilacConversionsPostQuantification(allSilacLabels, startLabel, endLabel, spectraFileInfo, ProteinGroups, Parameters.ListOfDigestionParams, Parameters.FlashLfqResults, Parameters.AllSpectralMatches.Cast().ToList(), Parameters.SearchParameters.ModsToWriteSelection, quantifyUnlabeledPeptides); } + + // Populate SampleGroupResults AFTER all quant-state mutation (including SILAC + // re-labeling) so every PG carries the same dynamic-column schema for the writer. + if (ProteinGroups != null) + { + foreach (var proteinGroup in ProteinGroups) + { + proteinGroup.PopulateSampleGroupResults(); + } + } } catch (Exception e) { @@ -837,14 +852,14 @@ private void UpdateSpectralLibrary() ); - //group psms by peptide and charge, then write highest scoring PSM to dictionary - Dictionary<(string, int), SpectralMatch> psmSeqChargeDictionary = peptidesForSpectralLibrary - .GroupBy(p => (p.FullSequence, p.ScanPrecursorCharge)) - .ToDictionary( - // Key is a (FullSequence, Charge) tuple - keySelector: g => g.Key, - // Value is the highest scoring psm in the group - elementSelector: g => g.MaxBy(p => p.Score)); + //group psms by peptide and charge, then write highest scoring PSM to dictionary + Dictionary<(string, int), SpectralMatch> psmSeqChargeDictionary = peptidesForSpectralLibrary + .GroupBy(p => (p.FullSequence, p.ScanPrecursorCharge)) + .ToDictionary( + // Key is a (FullSequence, Charge) tuple + keySelector: g => g.Key, + // Value is the highest scoring psm in the group + elementSelector: g => g.MaxBy(p => p.Score)); //load the original library var originalLibrarySpectra = Parameters.SpectralLibrary.GetAllLibrarySpectra(); @@ -917,8 +932,7 @@ private void SpectralLibraryGeneration() includeHighQValuePsms: false); //group psms by peptide and charge, the psms having same sequence and same charge will be in the same group - var fullSeqChargeGrouping = - peptidesForSpectralLibrary.GroupBy(p => (p.FullSequence, p.ScanPrecursorCharge)); + var fullSeqChargeGrouping = peptidesForSpectralLibrary.GroupBy(p => (p.FullSequence, p.ScanPrecursorCharge)); List spectraLibrary = new(); foreach (var matchGroup in fullSeqChargeGrouping) { @@ -1202,7 +1216,7 @@ private void WriteVariantResults() new FdrAnalysisEngine(possibleVariantPsms, Parameters.NumNotches, CommonParameters, FileSpecificParameters, new List { Parameters.SearchTaskId }, "variant_PSMs", doPEP: false).Run(); - possibleVariantPsms + possibleVariantPsms = possibleVariantPsms .OrderBy(p => p.FdrInfo.QValue) .ThenByDescending(p => p.Score) .ThenBy(p => p.FdrInfo.CumulativeTarget) @@ -1308,9 +1322,9 @@ private void WriteVariantResults() foreach (var variant in variants) { - if (variantPWSM.IntersectsAndIdentifiesVariation(variant).identifies == true) + if (variantPWSM.IntersectsAndIdentifiesVariation(variant).identifies == true && variant.Description.IsNotNullOrEmpty()) { - if (culture.CompareInfo.IndexOf(variant.VariantCallFormatDataString.Description, "missense_variant", CompareOptions.IgnoreCase) >= 0) + if (culture.CompareInfo.IndexOf(variant.Description, "missense_variant", CompareOptions.IgnoreCase) >= 0) { if (variant.VariantCallFormatDataString.ReferenceAlleleString.Length == 1 && variant.VariantCallFormatDataString.AlternateAlleleString.Length == 1) { @@ -1331,7 +1345,7 @@ private void WriteVariantResults() MNVmissenseVariants.AddOrCreate(variantPWSM.Protein, variant); } } - else if (culture.CompareInfo.IndexOf(variant.VariantCallFormatDataString.Description, "frameshift_variant", CompareOptions.IgnoreCase) >= 0) + else if (culture.CompareInfo.IndexOf(variant.Description, "frameshift_variant", CompareOptions.IgnoreCase) >= 0) { if (frameshiftIdentified == false) { @@ -1340,7 +1354,7 @@ private void WriteVariantResults() } frameshiftVariants.AddOrCreate(variantPWSM.Protein, variant); } - else if (culture.CompareInfo.IndexOf(variant.VariantCallFormatDataString.Description, "stop_gained", CompareOptions.IgnoreCase) >= 0) + else if (culture.CompareInfo.IndexOf(variant.Description, "stop_gained", CompareOptions.IgnoreCase) >= 0) { if (stopGainIdentified == false) { @@ -1349,7 +1363,7 @@ private void WriteVariantResults() } stopGainVariants.AddOrCreate(variantPWSM.Protein, variant); } - else if ((culture.CompareInfo.IndexOf(variant.VariantCallFormatDataString.Description, "conservative_inframe_insertion", CompareOptions.IgnoreCase) >= 0) || (culture.CompareInfo.IndexOf(variant.VariantCallFormatDataString.Description, "disruptive_inframe_insertion", CompareOptions.IgnoreCase) >= 0)) + else if ((culture.CompareInfo.IndexOf(variant.Description, "conservative_inframe_insertion", CompareOptions.IgnoreCase) >= 0) || (culture.CompareInfo.IndexOf(variant.Description, "disruptive_inframe_insertion", CompareOptions.IgnoreCase) >= 0)) { if (insertionIdentified == false) { @@ -1358,7 +1372,7 @@ private void WriteVariantResults() } insertionVariants.AddOrCreate(variantPWSM.Protein, variant); } - else if ((culture.CompareInfo.IndexOf(variant.VariantCallFormatDataString.Description, "conservative_inframe_deletion", CompareOptions.IgnoreCase) >= 0) || (culture.CompareInfo.IndexOf(variant.VariantCallFormatDataString.Description, "disruptive_inframe_deletion", CompareOptions.IgnoreCase) >= 0)) + else if ((culture.CompareInfo.IndexOf(variant.Description, "conservative_inframe_deletion", CompareOptions.IgnoreCase) >= 0) || (culture.CompareInfo.IndexOf(variant.Description, "disruptive_inframe_deletion", CompareOptions.IgnoreCase) >= 0)) { if (deletionIdentified == false) { @@ -1367,7 +1381,7 @@ private void WriteVariantResults() } deletionVariants.AddOrCreate(variantPWSM.Protein, variant); } - else if (culture.CompareInfo.IndexOf(variant.VariantCallFormatDataString.Description, "stop_loss", CompareOptions.IgnoreCase) >= 0) + else if (culture.CompareInfo.IndexOf(variant.Description, "stop_loss", CompareOptions.IgnoreCase) >= 0) { if (stopLossIdentifed == false) { @@ -1520,8 +1534,7 @@ private static void WritePsmsForPercolator(List psmList, string w output.WriteLine(directions.ToString()); int idNumber = 0; - psmList.OrderByDescending(p => p.Score); - foreach (SpectralMatch psm in psmList.Where(p => p.PsmData_forPEPandPercolator != null)) + foreach (SpectralMatch psm in psmList.Where(p => p.PsmData_forPEPandPercolator != null).OrderByDescending(p => p.Score)) { foreach (var peptide in psm.BestMatchingBioPolymersWithSetMods) { @@ -1598,7 +1611,7 @@ private void ConstructResultsDictionary() if (Parameters.SearchParameters.DoParsimony) { - ResultsDictionary.Add(("All", $"{GlobalVariables.AnalyteType.GetBioPolymerLabel()}s"), ""); + ResultsDictionary.Add(("All", $"{GlobalVariables.AnalyteType.GetBioPolymerLabel()}s"), ""); if (Parameters.CurrentRawFileList.Count > 1 && Parameters.SearchParameters.WriteIndividualFiles) { foreach (var rawFile in Parameters.CurrentRawFileList) @@ -1721,4 +1734,4 @@ private void WriteDigestionCountHistogram() FinishedWritingFile(countHistogramPath, nestedIds); } } -} \ No newline at end of file +} diff --git a/MetaMorpheus/Test/GuiTests/ThreadSafeObservableCollectionTests.cs b/MetaMorpheus/Test/GuiTests/ThreadSafeObservableCollectionTests.cs new file mode 100644 index 0000000000..0c155fca01 --- /dev/null +++ b/MetaMorpheus/Test/GuiTests/ThreadSafeObservableCollectionTests.cs @@ -0,0 +1,182 @@ +using GuiFunctions.Util; +using NUnit.Framework; +using System.Collections.Generic; +using System.Collections.Specialized; +using System.Linq; +using System.Threading; +using System.Threading.Tasks; + +namespace Test.GuiTests; + +[TestFixture] +public class ThreadSafeObservableCollectionTests +{ + [Test] + public void DefaultConstructor_StartsEmpty() + { + var c = new ThreadSafeObservableCollection(); + Assert.That(c.Count, Is.EqualTo(0)); + } + + [Test] + public void Constructor_WithInitialCollection_CopiesItems() + { + var c = new ThreadSafeObservableCollection(new[] { 1, 2, 3 }); + Assert.That(c.Count, Is.EqualTo(3)); + Assert.That(c.ToList(), Is.EqualTo(new[] { 1, 2, 3 })); + } + + // Plain foreach on the concrete type uses the snapshot enumerator (via the `new` GetEnumerator), + // so mutation mid-iteration does not throw and is not observed by the in-flight enumeration. + [Test] + public void SnapshotEnumeration_MutationDuringEnumerationDoesNotThrow() + { + var c = new ThreadSafeObservableCollection(Enumerable.Range(0, 10)); + + var enumerated = new List(); + foreach (var item in c) + { + enumerated.Add(item); + if (item == 5) c.Add(999); + } + + Assert.That(enumerated, Is.EqualTo(Enumerable.Range(0, 10).ToList())); + Assert.That(c.Count, Is.EqualTo(11)); + } + + // Pins the snapshot guarantee at the GetEnumerator call site on the concrete type. + [Test] + public void GetEnumerator_OnConcreteType_ReturnsSnapshot() + { + var c = new ThreadSafeObservableCollection(new[] { 1, 2, 3 }); + var e = c.GetEnumerator(); + c.Add(4); + + var enumerated = new List(); + while (e.MoveNext()) enumerated.Add(e.Current); + + Assert.That(enumerated, Is.EqualTo(new[] { 1, 2, 3 })); + Assert.That(c.Count, Is.EqualTo(4)); + } + + // The explicit IEnumerable.GetEnumerator is what LINQ uses; verify a representative LINQ op + // returns the snapshot view rather than a live one. + [Test] + public void Linq_ToListReturnsSnapshot() + { + var c = new ThreadSafeObservableCollection(new[] { 1, 2, 3 }); + var snapshot = c.ToList(); + c.Add(4); + Assert.That(snapshot, Is.EqualTo(new[] { 1, 2, 3 })); + Assert.That(c.ToList(), Is.EqualTo(new[] { 1, 2, 3, 4 })); + } + + // Concurrent Adds must all be preserved (no lost writes from races on the base list). + [Test] + public void ConcurrentAdds_AllItemsPreserved() + { + const int threads = 8; + const int itemsPerThread = 500; + var c = new ThreadSafeObservableCollection(); + + Parallel.For(0, threads, t => + { + for (int i = 0; i < itemsPerThread; i++) + c.Add(t * itemsPerThread + i); + }); + + Assert.That(c.Count, Is.EqualTo(threads * itemsPerThread)); + // Every produced value should appear exactly once. + Assert.That(c.ToList().Distinct().Count(), Is.EqualTo(threads * itemsPerThread)); + } + + // Enumeration on one thread while another thread mutates must not throw. + [Test] + public void ConcurrentEnumerationDuringMutation_DoesNotThrow() + { + var c = new ThreadSafeObservableCollection(Enumerable.Range(0, 100)); + var cts = new CancellationTokenSource(); + + var mutator = Task.Run(() => + { + int i = 1000; + while (!cts.IsCancellationRequested) + { + c.Add(i++); + if (c.Count > 200) c.RemoveAt(0); + } + }); + + var enumerator = Task.Run(() => + { + for (int i = 0; i < 1000; i++) + _ = c.Sum(); + }); + + Assert.DoesNotThrow(() => enumerator.Wait()); + cts.Cancel(); + mutator.Wait(); + } + + [Test] + public void CollectionChanged_FiresOnAdd() + { + var c = new ThreadSafeObservableCollection(); + NotifyCollectionChangedAction? lastAction = null; + c.CollectionChanged += (_, e) => lastAction = e.Action; + + c.Add(1); + + Assert.That(lastAction, Is.EqualTo(NotifyCollectionChangedAction.Add)); + } + + [Test] + public void CollectionChanged_FiresOnRemove() + { + var c = new ThreadSafeObservableCollection(new[] { 1, 2, 3 }); + NotifyCollectionChangedAction? lastAction = null; + c.CollectionChanged += (_, e) => lastAction = e.Action; + + c.Remove(2); + + Assert.That(lastAction, Is.EqualTo(NotifyCollectionChangedAction.Remove)); + } + + [Test] + public void CollectionChanged_FiresOnClear() + { + var c = new ThreadSafeObservableCollection(new[] { 1, 2 }); + NotifyCollectionChangedAction? lastAction = null; + c.CollectionChanged += (_, e) => lastAction = e.Action; + + c.Clear(); + + Assert.That(lastAction, Is.EqualTo(NotifyCollectionChangedAction.Reset)); + } + + [Test] + public void CollectionChanged_FiresOnReplace() + { + var c = new ThreadSafeObservableCollection(new[] { 1, 2, 3 }); + NotifyCollectionChangedAction? lastAction = null; + c.CollectionChanged += (_, e) => lastAction = e.Action; + + c[1] = 99; + + Assert.That(lastAction, Is.EqualTo(NotifyCollectionChangedAction.Replace)); + Assert.That(c[1], Is.EqualTo(99)); + } + + [Test] + public void CollectionChanged_FiresOnMove() + { + var c = new ThreadSafeObservableCollection(new[] { 1, 2, 3 }); + NotifyCollectionChangedAction? lastAction = null; + c.CollectionChanged += (_, e) => lastAction = e.Action; + + c.Move(0, 2); + + Assert.That(lastAction, Is.EqualTo(NotifyCollectionChangedAction.Move)); + Assert.That(c.ToList(), Is.EqualTo(new[] { 2, 3, 1 })); + } +} diff --git a/MetaMorpheus/Test/MetaDraw/FragmentReanalysis.cs b/MetaMorpheus/Test/MetaDraw/FragmentReanalysis.cs index f5c1fae469..c393a66f4e 100644 --- a/MetaMorpheus/Test/MetaDraw/FragmentReanalysis.cs +++ b/MetaMorpheus/Test/MetaDraw/FragmentReanalysis.cs @@ -20,6 +20,7 @@ namespace Test.MetaDraw { [ExcludeFromCodeCoverage] + [NonParallelizable] internal class FragmentReanalysis { [Test] diff --git a/MetaMorpheus/Test/MetaDraw/MetaDrawTest.cs b/MetaMorpheus/Test/MetaDraw/MetaDrawTest.cs index 6c055c8dae..2260d26edf 100644 --- a/MetaMorpheus/Test/MetaDraw/MetaDrawTest.cs +++ b/MetaMorpheus/Test/MetaDraw/MetaDrawTest.cs @@ -2196,6 +2196,7 @@ public static void TestCrosslinkSpectralLibraryReading() } [Test] + [NonParallelizable] public void ExportPlot_RefragmentationWithAdditionalFragmentIons_WritesExpectedIons() { // Arrange diff --git a/MetaMorpheus/Test/Multiplex_Labeling_TMT_iTRAQ.cs b/MetaMorpheus/Test/Multiplex_Labeling_TMT_iTRAQ.cs index 2c7d3e03a1..3aa8c7302c 100644 --- a/MetaMorpheus/Test/Multiplex_Labeling_TMT_iTRAQ.cs +++ b/MetaMorpheus/Test/Multiplex_Labeling_TMT_iTRAQ.cs @@ -626,7 +626,7 @@ public static void TestDoNotCountDiagnosticIonsInScore_LowCID() //The below theoretical does not accurately represent B-Y ions double[] sorted_theoretical_product_masses_for_this_peptide = new double[] { precursorMass + (2 * Constants.ProtonMass) - 275.1350, precursorMass + (2 * Constants.ProtonMass) - 258.127, precursorMass + (2 * Constants.ProtonMass) - 257.1244, 50, 60, 70, 147.0764, precursorMass + (2 * Constants.ProtonMass) - 147.0764, precursorMass + (2 * Constants.ProtonMass) - 70, precursorMass + (2 * Constants.ProtonMass) - 60, precursorMass + (2 * Constants.ProtonMass) - 50, 257.1244, 258.127, 275.1350 }; //{ 50, 60, 70, 147.0764, 257.1244, 258.127, 275.1350 } List productsWithLocalizedMassDiff = new(); - + //add one diagnostic ion productsWithLocalizedMassDiff.Add(new Product(ProductType.D, FragmentationTerminus.Both, sorted_theoretical_product_masses_for_this_peptide[11], 1, 1, 0)); @@ -984,5 +984,50 @@ public static void TestSearchTaskExceptionOnNullMassTag() Assert.Throws(() => throw new MetaMorpheusException("Could not find isobaric mass tag with the name " + invalidModId)); } } + + [Test] + public static void TestTmtProteinGroupsHaveNoQuantColumns() + { + // TMT runs return early from QuantificationAnalysis before FilesForQuantification + // is assigned, so protein groups must NOT emit Intensity_/IntensityOccupancy_/ + // SpectralCount_ columns. This guards against future regressions. + var searchTask = Toml.ReadFile( + Path.Combine(TestContext.CurrentContext.TestDirectory, @"TMT_test\TMT-Task1-SearchTaskconfig.toml"), + MetaMorpheusTask.tomlConfig); + // DoParsimony must be true to generate protein groups output file + searchTask.SearchParameters.DoParsimony = true; + + string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestTmtNoQuantColumns"); + var engine = new EverythingRunnerEngine( + new List<(string, MetaMorpheusTask)> { ("search", searchTask) }, + new List { Path.Combine(TestContext.CurrentContext.TestDirectory, @"TMT_test\VA084TQ_6.mzML") }, + new List { new DbForTask(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TMT_test\mouseTmt.fasta"), false) }, + outputFolder); + try + { + engine.Run(); + + var pgLines = File.ReadAllLines( + Path.Combine(outputFolder, "search", "AllProteinGroups.tsv")).ToList(); + Assert.That(pgLines.Count, Is.GreaterThan(1), "No protein groups written"); + + var header = pgLines[0].Split('\t').ToList(); + + // None of the per-file quant column families should appear for TMT + Assert.That(header.Any(h => h.StartsWith("Intensity_")), Is.False, "Unexpected Intensity_ column in TMT output"); + Assert.That(header.Any(h => h.StartsWith("SpectralCount_")), Is.False, "Unexpected SpectralCount_ column in TMT output"); + Assert.That(header.Any(h => h.StartsWith("IntensityOccupancy_")), Is.False, "Unexpected IntensityOccupancy_ column in TMT output"); + Assert.That(header.Any(h => h.StartsWith("CountOccupancy_")), Is.False, "Unexpected CountOccupancy_ column in TMT output"); + + // All rows must still have consistent column counts + Assert.That(pgLines.Select(l => l.Split('\t').Length).AllSame(), + Is.True, "Column count mismatch across protein group rows"); + } + finally + { + if (Directory.Exists(outputFolder)) + Directory.Delete(outputFolder, true); + } + } } -} \ No newline at end of file +} diff --git a/MetaMorpheus/Test/ProteinGroupTest.cs b/MetaMorpheus/Test/ProteinGroupTest.cs index eb0356600d..7a852816bf 100644 --- a/MetaMorpheus/Test/ProteinGroupTest.cs +++ b/MetaMorpheus/Test/ProteinGroupTest.cs @@ -1,4 +1,5 @@ using EngineLayer; +using FlashLFQ; using NUnit.Framework; using Proteomics; using System.Collections.Generic; @@ -15,6 +16,7 @@ using System.Text.RegularExpressions; using EngineLayer.DatabaseLoading; using Omics; +using MzLibUtil; namespace Test { @@ -84,7 +86,8 @@ public static void ProteinGroupToStringTest() new HashSet() { pwsm1, pwsm2 }, new HashSet() { pwsm1, pwsm2 }); //string exectedProteinGroupToString = proteinGroup1.ToString(); - string exectedProteinGroupToString = "prot1|prot2\t|\t\t\t779.30073507823|778.3167194953201\t2\t\t\t2\t2\t\t\t\t\t\t0\tT\t0\t0\t0\t0\t0\t0"; + string exectedProteinGroupToString = + "prot1|prot2\t|\t\t\t779.30073507823|778.3167194953201\t2\t\t\t2\t2\t\t\t\t\t0\tT\t0\t0\t0\t0\t0\t0"; Assert.That(proteinGroup1.ToString(), Is.EqualTo(exectedProteinGroupToString)); @@ -92,7 +95,8 @@ public static void ProteinGroupToStringTest() List proteinList3 = new List { prot3 }; ProteinGroup proteinGroup3 = new ProteinGroup(new HashSet(proteinList3), new HashSet(), new HashSet()); - string exectedProteinGroupWithDecoyToString = "prot1|prot2\t|\t\t\t779.30073507823|778.3167194953201\t2\t\t\t2\t2\t\t\t\t\t\t0\tT\t0\t0\t0\t0\t0\t0"; + string exectedProteinGroupWithDecoyToString = + "prot1|prot2\t|\t\t\t779.30073507823|778.3167194953201\t2\t\t\t2\t2\t\t\t\t\t0\tT\t0\t0\t0\t0\t0\t0"; Assert.That(proteinGroup1.ToString(), Is.EqualTo(exectedProteinGroupWithDecoyToString)); } @@ -116,7 +120,123 @@ public static void TestProteinGroupStringAndHeaderHaveSameNumberOfTabs() string[] rowEntries = pgRow.Split("\t"); Assert.That(headerFields.Length, Is.EqualTo(rowEntries.Length)); Assert.That(Regex.Matches(pgHeader, @"\t").Count, Is.EqualTo(Regex.Matches(pgRow, @"\t").Count)); - } + } + + // No upstream quant setup -> no dynamic columns in header or row. + [Test] + public static void TestProteinGroupNoDynamicColumnsWhenSampleGroupResultsNotPopulated() + { + Protein prot1 = new Protein("MEDEEK", "prot1"); + PeptideWithSetModifications pwsm1 = new PeptideWithSetModifications(prot1, new DigestionParams(), 1, 3, CleavageSpecificity.Full, "", 0, new Dictionary(), 0); + ProteinGroup pg = new ProteinGroup(new HashSet { prot1 }, + new HashSet { pwsm1 }, new HashSet { pwsm1 }); + + Assert.That(pg.SampleGroupResults, Is.Null); + + string header = pg.GetTabSeparatedHeader(); + string row = pg.ToString(); + + Assert.That(header.Contains("SpectralCount_"), Is.False); + Assert.That(header.Contains("Intensity_"), Is.False); + Assert.That(header.Contains("CountOccupancy_"), Is.False); + Assert.That(header.Contains("IntensityOccupancy_"), Is.False); + Assert.That(header.Split('\t').Length, Is.EqualTo(row.Split('\t').Length)); + + // Header/row generation must not lazy-populate. + Assert.That(pg.SampleGroupResults, Is.Null); + } + + // FilesForQuantification + IntensitiesByFile + populate -> all 4 column families appear, + // one per sample group, with matching header/row tab counts. + [Test] + public static void TestProteinGroupDynamicColumnsWithIntensitiesPopulated() + { + Protein prot1 = new Protein("MEDEEK", "prot1"); + PeptideWithSetModifications pwsm1 = new PeptideWithSetModifications(prot1, new DigestionParams(), 1, 3, CleavageSpecificity.Full, "", 0, new Dictionary(), 0); + ProteinGroup pg = new ProteinGroup(new HashSet { prot1 }, + new HashSet { pwsm1 }, new HashSet { pwsm1 }); + + var fileA = new SpectraFileInfo(@"X:\fakeA.mzML", condition: "", biorep: 0, fraction: 0, techrep: 0); + var fileB = new SpectraFileInfo(@"X:\fakeB.mzML", condition: "", biorep: 1, fraction: 0, techrep: 0); + pg.FilesForQuantification = new List { fileA, fileB }; + pg.IntensitiesByFile = new Dictionary + { + { fileA, 100.0 }, + { fileB, 200.0 } + }; + + pg.PopulateSampleGroupResults(); + + string header = pg.GetTabSeparatedHeader(); + string row = pg.ToString(); + string[] headerFields = header.Split('\t'); + + Assert.That(headerFields.Length, Is.EqualTo(row.Split('\t').Length)); + Assert.That(headerFields.Count(h => h.StartsWith("SpectralCount_")), Is.EqualTo(2)); + Assert.That(headerFields.Count(h => h.StartsWith("Intensity_")), Is.EqualTo(2)); + Assert.That(headerFields.Count(h => h.StartsWith("CountOccupancy_")), Is.EqualTo(2)); + Assert.That(headerFields.Count(h => h.StartsWith("IntensityOccupancy_")), Is.EqualTo(2)); + } + + // FilesForQuantification set without IntensitiesByFile -> only count-based dynamic columns. + [Test] + public static void TestProteinGroupCountOnlyColumnsWhenNoIntensities() + { + Protein prot1 = new Protein("MEDEEK", "prot1"); + PeptideWithSetModifications pwsm1 = new PeptideWithSetModifications(prot1, new DigestionParams(), 1, 3, CleavageSpecificity.Full, "", 0, new Dictionary(), 0); + ProteinGroup pg = new ProteinGroup(new HashSet { prot1 }, + new HashSet { pwsm1 }, new HashSet { pwsm1 }); + + var fileA = new SpectraFileInfo(@"X:\fakeA.mzML", condition: "", biorep: 0, fraction: 0, techrep: 0); + pg.FilesForQuantification = new List { fileA }; + + pg.PopulateSampleGroupResults(); + + string header = pg.GetTabSeparatedHeader(); + string row = pg.ToString(); + + Assert.That(header.Contains("SpectralCount_"), Is.True); + Assert.That(header.Contains("CountOccupancy_"), Is.True); + Assert.That(header.Contains("Intensity_"), Is.False); + Assert.That(header.Contains("IntensityOccupancy_"), Is.False); + Assert.That(header.Split('\t').Length, Is.EqualTo(row.Split('\t').Length)); + } + + // Mutating FilesForQuantification/IntensitiesByFile (as SilacConversions does) invalidates + // SampleGroupResults; the post-mutation populate must reflect the new file list. + [Test] + public static void TestProteinGroupPopulateSampleGroupsReflectsPostSilacState() + { + Protein prot1 = new Protein("MEDEEK", "prot1"); + PeptideWithSetModifications pwsm1 = new PeptideWithSetModifications(prot1, new DigestionParams(), 1, 3, CleavageSpecificity.Full, "", 0, new Dictionary(), 0); + ProteinGroup pg = new ProteinGroup(new HashSet { prot1 }, + new HashSet { pwsm1 }, new HashSet { pwsm1 }); + + var light = new SpectraFileInfo(@"X:\sample_light.mzML", condition: "", biorep: 0, fraction: 0, techrep: 0); + pg.FilesForQuantification = new List { light }; + pg.IntensitiesByFile = new Dictionary { { light, 100.0 } }; + + var heavy = new SpectraFileInfo(@"X:\sample_heavy.mzML", condition: "", biorep: 1, fraction: 0, techrep: 0); + pg.FilesForQuantification = new List { light, heavy }; + pg.IntensitiesByFile = new Dictionary + { + { light, 100.0 }, + { heavy, 250.0 } + }; + Assert.That(pg.SampleGroupResults, Is.Null); + + pg.PopulateSampleGroupResults(); + + string header = pg.GetTabSeparatedHeader(); + string row = pg.ToString(); + string[] headerFields = header.Split('\t'); + + Assert.That(headerFields.Count(h => h.StartsWith("SpectralCount_")), Is.EqualTo(2)); + Assert.That(headerFields.Count(h => h.StartsWith("Intensity_")), Is.EqualTo(2)); + Assert.That(headerFields.Count(h => h.StartsWith("CountOccupancy_")), Is.EqualTo(2)); + Assert.That(headerFields.Count(h => h.StartsWith("IntensityOccupancy_")), Is.EqualTo(2)); + Assert.That(headerFields.Length, Is.EqualTo(row.Split('\t').Length)); + } [Test] public static void ProteinGroupMergeTest() @@ -212,7 +332,6 @@ public static void TestModificationInfoListInProteinGroupsOutput() ).Select(b => (b.ModificationType, b.IdWithMotif)).ToList() } }; - SearchTask task2 = new SearchTask { CommonParameters = new CommonParameters(), @@ -226,13 +345,20 @@ public static void TestModificationInfoListInProteinGroupsOutput() } }; List<(string, MetaMorpheusTask)> taskList = new List<(string, MetaMorpheusTask)> { ("task1", task1), ("task2", task2) }; - string mzmlName = @"TestData\PrunedDbSpectra.mzml"; - string fastaName = @"TestData\DbForPrunedDb.fasta"; string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestPrunedGeneration"); + // Copy inputs into a clean per-test folder so QuantificationAnalysis does not pick up + // a stale ExperimentalDesign.tsv left in the shared TestData\ directory by other tests. + string inputFolder = Path.Combine(outputFolder, "inputs"); + Directory.CreateDirectory(inputFolder); + string mzmlName = Path.Combine(inputFolder, "PrunedDbSpectra.mzml"); + string fastaName = Path.Combine(inputFolder, "DbForPrunedDb.fasta"); + File.Copy(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\PrunedDbSpectra.mzml"), mzmlName, true); + File.Copy(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\DbForPrunedDb.fasta"), fastaName, true); + var engine = new EverythingRunnerEngine(taskList, new List { mzmlName }, new List { new DbForTask(fastaName, false) }, outputFolder); engine.Run(); - string final = Path.Combine(MySetUpClass.outputFolder, "task2", "DbForPrunedDbGPTMDproteinPruned.xml"); + string final = Path.Combine(outputFolder, "task2", "DbForPrunedDbGPTMDproteinPruned.xml"); List proteins = ProteinDbLoader.LoadProteinXML(final, true, DecoyType.Reverse, new List(), false, new List(), out var ok); // ensures that protein out put contains the correct number of proteins to match the following conditions. // all proteins in DB have baseSequence!=null (not ambiguous) @@ -244,12 +370,82 @@ public static void TestModificationInfoListInProteinGroupsOutput() Assert.That(totalNumberOfMods, Is.EqualTo(4)); List proteinGroupsOutput = File.ReadAllLines(Path.Combine(outputFolder, "task2", "AllQuantifiedProteinGroups.tsv")).ToList(); - string firstDataLine = proteinGroupsOutput[2]; - string modInfoListProteinTwo = firstDataLine.Split('\t')[14]; - Assert.That(modInfoListProteinTwo, Is.EqualTo("#aa71[Oxidation on S,info:occupancy=0.50(1/2)]")); + Assert.That(proteinGroupsOutput.Count, Is.EqualTo(8)); + + // Use the header row to locate occupancy columns dynamically, + // guarding against future column-order changes. + bool allEqualColumns = proteinGroupsOutput.Select(x => x.Split('\t').Length).AllSame(); + Assert.That(allEqualColumns, Is.True, "All rows in the protein groups output should have the same number of columns."); + + List header = proteinGroupsOutput[0].Split('\t').ToList(); + int countOccupancyIndex = header.IndexOf(header.First(h => h.StartsWith("CountOccupancy_"))); + int intensityOccupancyIndex = header.IndexOf(header.First(h => h.StartsWith("IntensityOccupancy_"))); + + string[] testDataFields = proteinGroupsOutput.First(x => x.StartsWith("P10591")).Split('\t'); + string countOccupancy = testDataFields[countOccupancyIndex]; + string intensityOccupancy = testDataFields[intensityOccupancyIndex]; + + // Tests count-based PTM occupancy: pos{residue}[{modName},info:fraction={count-fraction}({modified PSMs}/{total PSMs})] + Assert.That(countOccupancy, Is.EqualTo( + "pos71[Oxidation on S,info:fraction=0.50(1/2)]|pos71[Oxidation on S,info:fraction=0.50(1/2)]")); + + // Tests intensity-based PTM occupancy: pos{residue}[{modName},info:fraction={intensity-fraction}({mod intensity}/{total intensity})] + Assert.That(intensityOccupancy, Is.EqualTo( + "pos71[Oxidation on S,info:fraction=0.1899(1.279E+05/6.736E+05)]|pos71[Oxidation on S,info:fraction=0.1899(1.279E+05/6.736E+05)]")); Directory.Delete(outputFolder, true); } + + [Test] + public static void TestGetIdentifiedPeptidesOutputOnAllBranches() + { + // Arrange: one protein with two peptides that differ only by a mod + ModificationMotif.TryGetMotif("C", out ModificationMotif motif); + var mod = new Modification(_originalId: "Carbamidomethyl on C", _modificationType: "Common Fixed", + _target: motif, _locationRestriction: "Anywhere.", _monoisotopicMass: 57.02146); + + var oneBasedMods = new Dictionary> { { 2, new List { mod } } }; + var protein = new Protein("MCPEPTIDE", "prot1", oneBasedModifications: oneBasedMods); + + var modsOnPwsm1 = new Dictionary { { 2, mod } }; + var pwsm1 = new PeptideWithSetModifications(protein, new DigestionParams(), 1, 9, + CleavageSpecificity.Full, "", 0, modsOnPwsm1, 0); // FullSequence != BaseSequence + var pwsm2 = new PeptideWithSetModifications(protein, new DigestionParams(), 1, 9, + CleavageSpecificity.Full, "", 0, new Dictionary(), 0); + + var pg = new EngineLayer.ProteinGroup( + new HashSet { protein }, + new HashSet { pwsm1, pwsm2 }, + new HashSet { pwsm1, pwsm2 }); + + // Branch 1: labels == null, DisplayModsOnPeptides == false → BaseSequence + pg.DisplayModsOnPeptides = false; + pg.GetIdentifiedPeptidesOutput(null); + var tsv1 = pg.ToString(); + Assert.That(tsv1, Does.Contain(pwsm1.BaseSequence)); + Assert.That(tsv1.Split('\t')[4], Does.Not.Contain("[")); // unique-peptides column has no mod notation + + // Branch 2: labels == null, DisplayModsOnPeptides == true → FullSequence (includes mod) + pg.DisplayModsOnPeptides = true; + pg.GetIdentifiedPeptidesOutput(null); + var tsv2 = pg.ToString(); + Assert.That(tsv2, Does.Contain(pwsm1.FullSequence)); + + // SILAC branches: use an empty label list (labels != null) + var labels = new List(); + + // Branch 3: labels != null, DisplayModsOnPeptides == false → light BaseSequence + pg.DisplayModsOnPeptides = false; + Assert.DoesNotThrow(() => pg.GetIdentifiedPeptidesOutput(labels)); + var tsv3 = pg.ToString(); + Assert.That(tsv3.Split('\t')[4], Is.Not.Empty); // unique-peptides column populated + + // Branch 4: labels != null, DisplayModsOnPeptides == true → light FullSequence + pg.DisplayModsOnPeptides = true; + Assert.DoesNotThrow(() => pg.GetIdentifiedPeptidesOutput(labels)); + var tsv4 = pg.ToString(); + Assert.That(tsv4.Split('\t')[4], Is.Not.Empty); + } } } diff --git a/MetaMorpheus/Test/QuantificationTest.cs b/MetaMorpheus/Test/QuantificationTest.cs index d5ef5ed333..3e3b688744 100644 --- a/MetaMorpheus/Test/QuantificationTest.cs +++ b/MetaMorpheus/Test/QuantificationTest.cs @@ -166,7 +166,7 @@ public static void TestProteinQuantFileHeaders(bool hasDefinedExperimentalDesign string condition = hasDefinedExperimentalDesign ? "TestCondition" : ""; // create the protein database - Protein prot = new(peptide, @""); + Protein prot = new(peptide, @"test"); // necessary to pass name to protein. otherwise dbloader will do crazy things string dbName = Path.Combine(unitTestFolder, "testDB.fasta"); UsefulProteomicsDatabases.ProteinDbWriter.WriteFastaDatabase(new List { prot }, dbName, ">"); @@ -233,7 +233,7 @@ public static void TestProteinQuantFileHeaders(bool hasDefinedExperimentalDesign // check the intensity column headers List splitHeader = lines[0].Split(new char[] { '\t' }).ToList(); - List intensityColumnHeaders = splitHeader.Where(p => p.Contains("Intensity", StringComparison.OrdinalIgnoreCase)).ToList(); + List intensityColumnHeaders = splitHeader.Where(p => p.Contains("Intensity_", StringComparison.OrdinalIgnoreCase)).ToList(); Assert.That(intensityColumnHeaders.Count == 2); diff --git a/MetaMorpheus/Test/RobTest.cs b/MetaMorpheus/Test/RobTest.cs index be33f6b456..16ef22ad73 100644 --- a/MetaMorpheus/Test/RobTest.cs +++ b/MetaMorpheus/Test/RobTest.cs @@ -14,6 +14,11 @@ using Omics.Modifications; using Omics; using Transcriptomics; +using EngineLayer.ModernSearch; +using EngineLayer.Indexing; +using System.IO; +using TaskLayer; +using UsefulProteomicsDatabases; namespace Test { @@ -256,7 +261,9 @@ public static void TestPTMOutput() ProteinScoringAndFdrEngine f = new ProteinScoringAndFdrEngine(proteinGroups, filteredPsms, false, false, true, new CommonParameters(), null, new List()); f.Run(); - Assert.That(proteinGroups.First().ModsInfo[0], Is.EqualTo("#aa5[resMod on S,info:occupancy=0.67(2/3)];#aa10[iModOne on I,info:occupancy=0.33(2/6)];#aa10[iModTwo on I,info:occupancy=0.33(2/6)]")); + // Occupancy is now computed by BioPolymerGroup.PopulateSampleGroupResults(). + // Without FlashLFQ, SampleGroupResults should be null or empty. + Assert.That(proteinGroups.First().SampleGroupResults, Is.Null); } [Test] diff --git a/MetaMorpheus/Test/SearchTaskTest.cs b/MetaMorpheus/Test/SearchTaskTest.cs index 6c09a506c2..9e49ea5e81 100644 --- a/MetaMorpheus/Test/SearchTaskTest.cs +++ b/MetaMorpheus/Test/SearchTaskTest.cs @@ -264,31 +264,140 @@ public static void PostSearchNormalizeTest() string myDatabase = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\DbForPrunedDb.fasta"); string folderPath = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestNormalizationExperDesign"); string experimentalDesignFile = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\ExperimentalDesign.tsv"); - using (StreamWriter output = new StreamWriter(experimentalDesignFile)) + DbForTask db = new DbForTask(myDatabase, false); + + try { - output.WriteLine("FileName\tCondition\tBiorep\tFraction\tTechrep"); - output.WriteLine("PrunedDbSpectra.mzml" + "\t" + "condition" + "\t" + "1" + "\t" + "1" + "\t" + "1"); + using (StreamWriter output = new StreamWriter(experimentalDesignFile)) + { + output.WriteLine("FileName\tCondition\tBiorep\tFraction\tTechrep"); + output.WriteLine("PrunedDbSpectra.mzml" + "\t" + "condition" + "\t" + "1" + "\t" + "1" + "\t" + "1"); + } + + // run the task + Directory.CreateDirectory(folderPath); + searchTask.RunTask(folderPath, new List { db }, new List { myFile }, "normal"); + + Directory.Delete(folderPath, true); + + // delete the exper design and try again. this should skip quantification + File.Delete(experimentalDesignFile); + + // run the task + Directory.CreateDirectory(folderPath); + searchTask.RunTask(folderPath, new List { db }, new List { myFile }, "normal"); + + // PSMs should be present but no quant output + Assert.That(!File.Exists(Path.Combine(folderPath, "AllQuantifiedPeptides.tsv"))); + Assert.That(File.Exists(Path.Combine(folderPath, "AllPSMs.psmtsv"))); + + Directory.Delete(folderPath, true); } - DbForTask db = new DbForTask(myDatabase, false); + finally + { + // Always remove the ExperimentalDesign.tsv we wrote into shared TestData\, even + // on assertion/exception, so subsequent tests in the same suite run don't pick + // up a stale file (which would silently break quantification for them). + if (File.Exists(experimentalDesignFile)) File.Delete(experimentalDesignFile); + if (Directory.Exists(folderPath)) Directory.Delete(folderPath, true); + } + } - // run the task - Directory.CreateDirectory(folderPath); - searchTask.RunTask(folderPath, new List { db }, new List { myFile }, "normal"); + // Malformed exp design (Normalize=false) -> quant skipped, protein-groups TSV has no dynamic quant columns. + // Filename remains AllQuantifiedProteinGroups.tsv because it's driven by DoLabelFreeQuantification, not by quant success. + [Test] + public static void PostSearchMalformedExperimentalDesignSkipsQuant() + { + SearchTask searchTask = new SearchTask() + { + SearchParameters = new SearchParameters + { + Normalize = false, + DoParsimony = true + }, + CommonParameters = new(precursorDeconParams: new IsoDecDeconvolutionParameters()) + }; - Directory.Delete(folderPath, true); + string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestMalformedExpDesign"); + string inputFolder = Path.Combine(outputFolder, "inputs"); + Directory.CreateDirectory(inputFolder); + string mzmlPath = Path.Combine(inputFolder, "PrunedDbSpectra.mzml"); + string fastaPath = Path.Combine(inputFolder, "DbForPrunedDb.fasta"); + string expDesignPath = Path.Combine(inputFolder, "ExperimentalDesign.tsv"); + File.Copy(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\PrunedDbSpectra.mzml"), mzmlPath, true); + File.Copy(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\DbForPrunedDb.fasta"), fastaPath, true); + + // Lists an unrelated file so ReadExperimentalDesign emits errors. + using (StreamWriter w = new StreamWriter(expDesignPath)) + { + w.WriteLine("FileName\tCondition\tBiorep\tFraction\tTechrep"); + w.WriteLine("UnrelatedFile.mzml\tcondition\t1\t1\t1"); + } - // delete the exper design and try again. this should skip quantification - File.Delete(experimentalDesignFile); + try + { + searchTask.RunTask(outputFolder, new List { new DbForTask(fastaPath, false) }, new List { mzmlPath }, "normal"); + + Assert.That(File.Exists(Path.Combine(outputFolder, "AllQuantifiedPeptides.tsv")), Is.False); + + string pgPath = Path.Combine(outputFolder, "AllQuantifiedProteinGroups.tsv"); + Assert.That(File.Exists(pgPath), Is.True); + var lines = File.ReadAllLines(pgPath); + Assert.That(lines.Length, Is.GreaterThan(1)); + var header = lines[0]; + Assert.That(header.Contains("SpectralCount_"), Is.False); + Assert.That(header.Contains("Intensity_"), Is.False); + Assert.That(header.Contains("CountOccupancy_"), Is.False); + Assert.That(header.Contains("IntensityOccupancy_"), Is.False); + Assert.That(lines.Select(l => l.Split('\t').Length).AllSame(), Is.True); + } + finally + { + if (Directory.Exists(outputFolder)) Directory.Delete(outputFolder, true); + } + } - // run the task - Directory.CreateDirectory(folderPath); - searchTask.RunTask(folderPath, new List { db }, new List { myFile }, "normal"); + // No exp design + Normalize=false -> defaults built, LFQ runs, dynamic columns appear. + // Complements PostSearchNormalizeTest (no exp design + Normalize=true -> skip). + [Test] + public static void PostSearchNoExpDesignNoNormalizeRunsQuant() + { + SearchTask searchTask = new SearchTask() + { + SearchParameters = new SearchParameters + { + Normalize = false, + DoParsimony = true + }, + CommonParameters = new(precursorDeconParams: new IsoDecDeconvolutionParameters()) + }; - // PSMs should be present but no quant output - Assert.That(!File.Exists(Path.Combine(folderPath, "AllQuantifiedPeptides.tsv"))); - Assert.That(File.Exists(Path.Combine(folderPath, "AllPSMs.psmtsv"))); + string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestNoExpDesignNoNormalize"); + string inputFolder = Path.Combine(outputFolder, "inputs"); + Directory.CreateDirectory(inputFolder); + string mzmlPath = Path.Combine(inputFolder, "PrunedDbSpectra.mzml"); + string fastaPath = Path.Combine(inputFolder, "DbForPrunedDb.fasta"); + File.Copy(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\PrunedDbSpectra.mzml"), mzmlPath, true); + File.Copy(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\DbForPrunedDb.fasta"), fastaPath, true); - Directory.Delete(folderPath, true); + try + { + searchTask.RunTask(outputFolder, new List { new DbForTask(fastaPath, false) }, new List { mzmlPath }, "normal"); + + string pgPath = Path.Combine(outputFolder, "AllQuantifiedProteinGroups.tsv"); + Assert.That(File.Exists(pgPath), Is.True); + var lines = File.ReadAllLines(pgPath); + Assert.That(lines.Length, Is.GreaterThan(1)); + var header = lines[0]; + + Assert.That(header.Contains("SpectralCount_"), Is.True); + Assert.That(header.Contains("CountOccupancy_"), Is.True); + Assert.That(lines.Select(l => l.Split('\t').Length).AllSame(), Is.True); + } + finally + { + if (Directory.Exists(outputFolder)) Directory.Delete(outputFolder, true); + } } /// @@ -700,4 +809,4 @@ public static void TestSearchTaskResultsTextContents() Directory.Delete(folderPath, true); } } -} \ No newline at end of file +} diff --git a/MetaMorpheus/Test/SeqCoverageTest.cs b/MetaMorpheus/Test/SeqCoverageTest.cs index 5cf618d215..c8790a71e0 100644 --- a/MetaMorpheus/Test/SeqCoverageTest.cs +++ b/MetaMorpheus/Test/SeqCoverageTest.cs @@ -100,13 +100,6 @@ public static void TryFailSequenceCoverage() var firstSequenceCoverageDisplayListWithMods = fjkd.ProteinGroups.First().SequenceCoverageDisplayListWithMods.First(); Assert.That(firstSequenceCoverageDisplayListWithMods, Is.EqualTo("[mod1 on M]-MM[mod3 on M]KM[mod3 on M]MK-[mod5 on K]")); - var firstModInfo = fjkd.ProteinGroups.First().ModsInfo.First(); - Assert.That(firstModInfo.Contains(@"#aa1[mod1 on M,info:occupancy=1.00(2/2)]")); - Assert.That(firstModInfo.Contains(@"#aa2[mod3 on M,info:occupancy=0.50(1/2)]")); - Assert.That(!(firstModInfo.Contains(@"#aa3"))); - Assert.That(firstModInfo.Contains(@"#aa4[mod3 on M,info:occupancy=0.50(1/2)]")); - Assert.That(!(firstModInfo.Contains(@"#aa5"))); - Assert.That(firstModInfo.Contains(@"#aa6[mod5 on K,info:occupancy=1.00(2/2)]")); Console.WriteLine("Test output: " + firstSequenceCoverageDisplayList); } @@ -170,4 +163,4 @@ public static void TestFragmentSequenceCoverage() Assert.That(firstSequenceCoverageDisplayList == "MmkMMK"); } } -} \ No newline at end of file +} diff --git a/MetaMorpheus/Test/SilacTest.cs b/MetaMorpheus/Test/SilacTest.cs index 4b3c5e54a8..23261d05f6 100644 --- a/MetaMorpheus/Test/SilacTest.cs +++ b/MetaMorpheus/Test/SilacTest.cs @@ -1,6 +1,12 @@ using EngineLayer; +using EngineLayer.FdrAnalysis; +using EngineLayer.SpectrumMatch; using MassSpectrometry; -using NUnit.Framework; +using NUnit.Framework; +using Omics; +using Omics.Digestion; +using Omics.Fragmentation; +using Omics.Modifications; using Proteomics; using Proteomics.AminoAcidPolymer; using Proteomics.ProteolyticDigestion; @@ -8,11 +14,8 @@ using System.Collections.Generic; using System.IO; using EngineLayer.DatabaseLoading; -using Omics.Modifications; using TaskLayer; using UsefulProteomicsDatabases; -using Omics; -using Omics.Digestion; namespace Test { @@ -58,8 +61,9 @@ public static void TestSilacNoLightProtein() //test proteins string[] output = File.ReadAllLines(TestContext.CurrentContext.TestDirectory + @"/TestSilac/AllQuantifiedProteinGroups.tsv"); Assert.That(output.Length, Is.EqualTo(2)); - Assert.That(output[0].Contains("Modification Info List\tIntensity_silac(R+3.988)\tIntensity_silac(R+10.008)")); //test that two files were made and no light file - Assert.That(output[1].Contains("875000.0000000009\t437500.00000000047")); //test the heavier intensity is half that of the heavy (per the raw file) + Assert.That(output[0].Contains("SpectralCount_silac(R+3.988)\tIntensity_silac(R+3.988)\tCountOccupancy_silac(R+3.988)\tIntensityOccupancy_silac(R+3.988)\tSpectralCount_silac(R+10.008)\tIntensity_silac(R+10.008)")); //test that two conditions were made and no light condition + Assert.That(output[1].Contains("875000.0000000009")); //test the heavy intensity + Assert.That(output[1].Contains("437500.00000000047")); //test the heavier intensity is half that of the heavy (per the raw file) //test peptides output = File.ReadAllLines(TestContext.CurrentContext.TestDirectory + @"/TestSilac/AllQuantifiedPeptides.tsv"); @@ -132,8 +136,9 @@ public static void TestSilacMultipleModsPerCondition() //test proteins string[] output = File.ReadAllLines(TestContext.CurrentContext.TestDirectory + @"/TestSilac/AllQuantifiedProteinGroups.tsv"); Assert.That(output.Length, Is.EqualTo(2)); - Assert.That(output[0].Contains("Intensity_silac\tIntensity_silac(K+8.014 & R+6.020)")); //test that two files were made - Assert.That(output[1].Contains("1374999.999999999\t687499.9999999995")); //test the heavy intensity is half that of the light (per the raw file) + Assert.That(output[0].Contains("SpectralCount_silac\tIntensity_silac\tCountOccupancy_silac\tIntensityOccupancy_silac\tSpectralCount_silac(K+8.014 & R+6.020)\tIntensity_silac(K+8.014 & R+6.020)")); //test that two conditions were made + Assert.That(output[1].Contains("1374999.999999999")); //test the light intensity + Assert.That(output[1].Contains("687499.9999999995")); //test the heavy intensity is half that of the light (per the raw file) //test peptides output = File.ReadAllLines(TestContext.CurrentContext.TestDirectory + @"/TestSilac/AllQuantifiedPeptides.tsv"); @@ -223,8 +228,13 @@ public static void TestSilacQuantification() //test proteins string[] output = File.ReadAllLines(TestContext.CurrentContext.TestDirectory + @"\TestSilac\AllQuantifiedProteinGroups.tsv"); Assert.That(output.Length, Is.EqualTo(2)); - Assert.That(output[0].Contains("Intensity_silac\tIntensity_silacPart2\tIntensity_silac(K+8.014)\tIntensity_silacPart2(K+8.014)")); //test that two files were made - Assert.That(output[1].Contains("875000.0000000009\t875000.0000000009\t437500.00000000047\t437500.00000000047")); //test the heavy intensity is half that of the light (per the raw file) + Assert.That(output[0].Contains( + "SpectralCount_silac\tIntensity_silac\tCountOccupancy_silac\tIntensityOccupancy_silac\t" + + "SpectralCount_silacPart2\tIntensity_silacPart2\tCountOccupancy_silacPart2\tIntensityOccupancy_silacPart2\t" + + "SpectralCount_silac(K+8.014)\tIntensity_silac(K+8.014)\tCountOccupancy_silac(K+8.014)\tIntensityOccupancy_silac(K+8.014)\t" + + "SpectralCount_silacPart2(K+8.014)\tIntensity_silacPart2(K+8.014)")); //test that all four conditions were made + Assert.That(output[1].Contains("875000.0000000009")); //test the light intensities (both files) + Assert.That(output[1].Contains("437500.00000000047")); //test the heavy intensity is half that of the light (per the raw file) //test peptides output = File.ReadAllLines(TestContext.CurrentContext.TestDirectory + @"\TestSilac\AllQuantifiedPeptides.tsv"); @@ -670,5 +680,40 @@ public static void TestSilacHelperMethods() //Test no crash in weird situations SilacConversions.SilacConversionsPostQuantification(null, null, null, new List(), null, new HashSet(), null, new List(), new Dictionary(), true); } + + /// + /// Verifies that the SILAC clone constructor preserves IsobaricMassTagReporterIonIntensities + /// and PeptideFdrInfo, both of which were previously dropped during cloning. + /// Regression guard for the ptm_stoich branch fixes. + /// + [Test] + public static void TestSilacClonePreservesQuantAndFdrData() + { + var protein = new Protein("PEPTIDE", "ACCESSION"); + var pwsm = new PeptideWithSetModifications(protein, new DigestionParams(), 1, 7, CleavageSpecificity.Full, "", 0, new Dictionary(), 0); + var scan = new Ms2ScanWithSpecificMass( + new TestDataFile(pwsm, "quadratic").GetOneBasedScan(2), 100, 1, null, new CommonParameters()); + + var psm = new PeptideSpectralMatch(pwsm, 0, 10, 0, scan, new CommonParameters(), new List()); + psm.ResolveAllAmbiguities(); + + // Set fields that the clone constructor must preserve + var reporterIons = new double[] { 100.0, 200.0, 300.0 }; + typeof(PeptideSpectralMatch).BaseType.GetProperty("IsobaricMassTagReporterIonIntensities", System.Reflection.BindingFlags.Instance | System.Reflection.BindingFlags.Public) + .SetValue(psm, reporterIons); + psm.PeptideFdrInfo = new FdrInfo { QValue = 0.05, PEP = 0.1 }; + + // Clone (SILAC path) + var clone = psm.Clone(new List + { + new SpectralMatchHypothesis(0, pwsm, new List(), 10) + }); + + // Assertions + Assert.That(clone.IsobaricMassTagReporterIonIntensities, Is.EqualTo(reporterIons)); + Assert.That(clone.PeptideFdrInfo, Is.Not.Null); + Assert.That(clone.PeptideFdrInfo.QValue, Is.EqualTo(0.05)); + Assert.That(clone.PeptideFdrInfo.PEP, Is.EqualTo(0.1)); + } } -} \ No newline at end of file +}