From 62118a8fcc2ac7ab10d4d4872a50e22c08e9cbdf Mon Sep 17 00:00:00 2001 From: Peter Cruz Parrilla Date: Sun, 16 Feb 2025 04:17:24 -0600 Subject: [PATCH 01/30] Initial implementation of occupancy by intensity. Requires ptm_stoich branch of mzlib. Code is untestested. Have seen some discrepancies with the quantity of quantified mods and their indexing. --- .../ProteinParsimony/ProteinGroup.cs | 34 ++-- .../SearchTask/PostSearchAnalysisTask.cs | 174 ++++++++++++------ 2 files changed, 134 insertions(+), 74 deletions(-) diff --git a/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs b/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs index a7370ddeda..8dbea26b85 100644 --- a/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs +++ b/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs @@ -89,7 +89,7 @@ public ProteinGroup(HashSet proteins, HashSet ModsInfo { get; private set; } + public List ModsInfo { get; set; } public Dictionary IntensitiesByFile { get; set; } @@ -613,22 +613,28 @@ public void CalculateSequenceCoverage() } } - var modStrings = new List<(int aaNum, string part)>(); - for (int i = 0; i < pepModTotals.Count; i++) + // modInfo will be updated by the PostSearchAnalysisTask. However, leaving this code + // here for now in case we want to use it in the future. + bool quantifyModsByPSM = false; + if (quantifyModsByPSM) { - string aa = modIndex[i].index.ToString(); - string modName = modIndex[i].modName.ToString(); - string occupancy = ((double)pepModTotals[i] / (double)pepTotals[i]).ToString("F2"); - string fractOccupancy = $"{pepModTotals[i].ToString()}/{pepTotals[i].ToString()}"; - string tempString = ($"#aa{aa}[{modName},info:occupancy={occupancy}({fractOccupancy})]"); - modStrings.Add((modIndex[i].index, tempString)); - } + var modStrings = new List<(int aaNum, string part)>(); + for (int i = 0; i < pepModTotals.Count; i++) + { + string aa = modIndex[i].index.ToString(); + string modName = modIndex[i].modName.ToString(); + string occupancy = ((double)pepModTotals[i] / (double)pepTotals[i]).ToString("F2"); + string fractOccupancy = $"{pepModTotals[i].ToString()}/{pepTotals[i].ToString()}"; + string tempString = ($"#aa{aa}[{modName},info:occupancy={occupancy}({fractOccupancy})]"); + modStrings.Add((modIndex[i].index, tempString)); + } - var modInfoString = string.Join(";", modStrings.OrderBy(x => x.aaNum).Select(x => x.part)); + var modInfoString = string.Join(";", modStrings.OrderBy(x => x.aaNum).Select(x => x.part)); - if (!string.IsNullOrEmpty(modInfoString)) - { - ModsInfo.Add(modInfoString); + if (!string.IsNullOrEmpty(modInfoString)) + { + ModsInfo.Add(modInfoString); + } } } } diff --git a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs index 48d58865da..4f83116fa1 100644 --- a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs +++ b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs @@ -1,4 +1,5 @@ -using Easy.Common.Extensions; +using Chemistry; +using Easy.Common.Extensions; using EngineLayer; using EngineLayer.FdrAnalysis; using EngineLayer.HistogramAnalysis; @@ -7,6 +8,9 @@ using FlashLFQ; using MassSpectrometry; using MathNet.Numerics.Distributions; +using MzLibUtil; +using Omics.Modifications; +using Omics.SpectrumMatch; using Proteomics; using Proteomics.ProteolyticDigestion; using System; @@ -20,8 +24,6 @@ using Chemistry; using EngineLayer.DatabaseLoading; using MzLibUtil; -using Omics.Digestion; -using Omics.BioPolymer; using Omics.Modifications; using Omics.SpectrumMatch; using Omics; @@ -43,7 +45,7 @@ public class PostSearchAnalysisTask : MetaMorpheusTask /// /// Used for storage of results for writing to Results.tsv. It is explained in the method ConstructResultsDictionary() /// - private Dictionary<(string,string),string> ResultsDictionary { get; set; } + private Dictionary<(string, string), string> ResultsDictionary { get; set; } /// /// Used for storage of results for writing digestion product counts to a .tsv. /// @@ -117,14 +119,14 @@ public MyTaskResults Run() if (Parameters.SearchParameters.DoLabelFreeQuantification && Parameters.FlashLfqResults != null) { SpectralRecoveryResults = SpectralRecoveryRunner.RunSpectralRecoveryAlgorithm(Parameters, CommonParameters, FileSpecificParameters); - } + } } - if(Parameters.SearchParameters.UpdateSpectralLibrary) + if (Parameters.SearchParameters.UpdateSpectralLibrary) { UpdateSpectralLibrary(); } - + if (Parameters.SearchParameters.WriteDigestionProductCountFile) { WriteDigestionCountByProtein(); @@ -597,13 +599,77 @@ private void QuantificationAnalysis() Parameters.FlashLfqResults = flashLfqEngine.Run(); } - // get protein intensity back from FlashLFQ - if (ProteinGroups != null && Parameters.FlashLfqResults != null) + + if (ProteinGroups != null && Parameters.FlashLfqResults != null) + { + // get modification stoichiometry using FlashLFQ intensities + var peptides = flashLfqEngine.PeptideModifiedSequencesToQuantify + .Where(pep => Parameters.FlashLfqResults.PeptideModifiedSequences.ContainsKey(pep)) + .Select(pep => (Parameters.FlashLfqResults.PeptideModifiedSequences[pep].Sequence, + Parameters.FlashLfqResults.PeptideModifiedSequences[pep].BaseSequence, + Parameters.FlashLfqResults.PeptideModifiedSequences[pep].ProteinGroups.Select(pg => pg.ProteinGroupName).ToList(), + Parameters.FlashLfqResults.PeptideModifiedSequences[pep].GetTotalIntensity())).ToList(); + + PositionFrequencyAnalysis pfa = new PositionFrequencyAnalysis(); + pfa.ProteinGroupsOccupancyByPeptide(peptides, true, true, true); // one-based indexes, ignores terminal mods on all peptides. + + var proteinGroupsOccupancyByProteins = pfa.Occupancy; + var quantifiedProteinGroups = ProteinGroups.Where(pg => Parameters.FlashLfqResults.ProteinGroups.ContainsKey(pg.ProteinGroupName)); + + foreach (var proteinGroup in quantifiedProteinGroups) { - foreach (var proteinGroup in ProteinGroups) + var modInfoString = new StringBuilder(); + + foreach (var protein in proteinGroup.Proteins) { - proteinGroup.FilesForQuantification = spectraFileInfo; - proteinGroup.IntensitiesByFile = new Dictionary(); + List peptideBaseSequencesSeen = new List(); + foreach (var peptide in proteinGroup.AllPeptides) + { + if (proteinGroupsOccupancyByProteins[proteinGroup.ProteinGroupName].Proteins[protein.Accession].Peptides.ContainsKey(peptide.BaseSequence) + && !peptideBaseSequencesSeen.Contains(peptide.BaseSequence)) + { + proteinGroupsOccupancyByProteins[proteinGroup.ProteinGroupName] + .Proteins[protein.Accession].Peptides[peptide.BaseSequence] + .PeptideToProteinPositions(peptide.OneBasedStartResidueInProtein); + + peptideBaseSequencesSeen.Add(peptide.BaseSequence); + } + } + + proteinGroupsOccupancyByProteins[proteinGroup.ProteinGroupName] + .Proteins[protein.Accession] + .SetProteinModsFromPeptides(); + + // build modInfoString for this protein + var occupancyPGProtein = proteinGroupsOccupancyByProteins[proteinGroup.ProteinGroupName].Proteins[protein.Accession]; + modInfoString.Append($""); + var aaModStrings = new List(); + + foreach (var modpos in occupancyPGProtein.ModifiedAminoAcidPositionsInProtein.OrderBy(x => x.Key)) + { + var aaModString = new StringBuilder(); + aaModString.Append($"aa#{modpos.Key.ToString()}"); + + foreach (var mod in occupancyPGProtein.ModifiedAminoAcidPositionsInProtein[modpos.Key]) + { + aaModString.Append($"[{mod.Key}, info:occupancy={mod.Value.Intensity.ToString()}]"); + } + + aaModStrings.Add(aaModString.ToString()); + } + if (aaModStrings.IsNotNullOrEmpty()) + { + modInfoString.Append($"protein:{protein.Accession}{{{string.Join(";", aaModStrings)}}}"); + } + } + proteinGroup.ModsInfo.Add(modInfoString.ToString()); + } + + // get protein intensity back from FlashLFQ + foreach (var proteinGroup in ProteinGroups) + { + proteinGroup.FilesForQuantification = spectraFileInfo; + proteinGroup.IntensitiesByFile = new Dictionary(); foreach (var spectraFile in proteinGroup.FilesForQuantification) { @@ -619,16 +685,11 @@ private void QuantificationAnalysis() } } - //Silac stuff for post-quantification - if (Parameters.SearchParameters.SilacLabels != null && Parameters.AllSpectralMatches.First() is PeptideSpectralMatch) //if we're doing silac - { - SilacConversions.SilacConversionsPostQuantification(allSilacLabels, startLabel, endLabel, spectraFileInfo, ProteinGroups, Parameters.ListOfDigestionParams, - Parameters.FlashLfqResults, Parameters.AllSpectralMatches.Cast().ToList(), Parameters.SearchParameters.ModsToWriteSelection, quantifyUnlabeledPeptides); - } - } - catch (Exception e) + //Silac stuff for post-quantification + if (Parameters.SearchParameters.SilacLabels != null && Parameters.AllPsms.First() is PeptideSpectralMatch) //if we're doing silac { - EngineCrashed("Quantification", e); + SilacConversions.SilacConversionsPostQuantification(allSilacLabels, startLabel, endLabel, spectraFileInfo, ProteinGroups, Parameters.ListOfDigestionParams, + Parameters.FlashLfqResults, Parameters.AllSpectralMatches.Cast().ToList(), Parameters.SearchParameters.ModsToWriteSelection, quantifyUnlabeledPeptides); } } @@ -707,7 +768,7 @@ private void WritePsmResults() // write summary text if (psmsForPsmResults.FilteringNotPerformed) { - + Parameters.SearchTaskResults.AddPsmPeptideProteinSummaryText( $"PEP could not be calculated due to an insufficient number of {GlobalVariables.AnalyteType.GetSpectralMatchLabel()}s. Results were filtered by q-value." + Environment.NewLine); @@ -758,9 +819,9 @@ private void WriteIndividualPsmResults() // generated by analyzing one file by itself. Therefore, the FDR info should change between AllResults and FileSpecific string strippedFileName = Path.GetFileNameWithoutExtension(psmFileGroup.Key); var psmsForThisFile = psmFileGroup.ToList(); - CalculatePsmAndPeptideFdr(psmsForThisFile,"PSM", false); + CalculatePsmAndPeptideFdr(psmsForThisFile, "PSM", false); var psmsToWrite = FilteredPsms.Filter(psmsForThisFile, - CommonParameters, + CommonParameters, includeDecoys: Parameters.SearchParameters.WriteDecoys, includeContaminants: Parameters.SearchParameters.WriteContaminants, includeAmbiguous: true, @@ -837,14 +898,14 @@ private void UpdateSpectralLibrary() ); - //group psms by peptide and charge, then write highest scoring PSM to dictionary - Dictionary<(string, int), SpectralMatch> psmSeqChargeDictionary = peptidesForSpectralLibrary - .GroupBy(p => (p.FullSequence, p.ScanPrecursorCharge)) - .ToDictionary( - // Key is a (FullSequence, Charge) tuple - keySelector: g => g.Key, - // Value is the highest scoring psm in the group - elementSelector: g => g.MaxBy(p => p.Score)); + //group psms by peptide and charge, then write highest scoring PSM to dictionary + Dictionary<(string, int), SpectralMatch> psmSeqChargeDictionary = peptidesForSpectralLibrary + .GroupBy(p => (p.FullSequence, p.ScanPrecursorCharge)) + .ToDictionary( + // Key is a (FullSequence, Charge) tuple + keySelector: g => g.Key, + // Value is the highest scoring psm in the group + elementSelector: g => g.MaxBy(p => p.Score)); //load the original library var originalLibrarySpectra = Parameters.SpectralLibrary.GetAllLibrarySpectra(); @@ -916,28 +977,22 @@ private void SpectralLibraryGeneration() includeAmbiguous: false, includeHighQValuePsms: false); - //group psms by peptide and charge, the psms having same sequence and same charge will be in the same group - var fullSeqChargeGrouping = - peptidesForSpectralLibrary.GroupBy(p => (p.FullSequence, p.ScanPrecursorCharge)); - List spectraLibrary = new(); - foreach (var matchGroup in fullSeqChargeGrouping) - { - SpectralMatch bestPsm = matchGroup.MaxBy(p => p.Score); - if (bestPsm == null) continue; - spectraLibrary.Add(new LibrarySpectrum( - bestPsm.FullSequence, - bestPsm.ScanPrecursorMonoisotopicPeakMz, - bestPsm.ScanPrecursorCharge, - bestPsm.MatchedFragmentIons, - bestPsm.ScanRetentionTime)); - } - - WriteSpectrumLibrary(spectraLibrary, Parameters.OutputFolder); - } - catch (Exception e) + //group psms by peptide and charge, the psms having same sequence and same charge will be in the same group + var fullSeqChargeGrouping = peptidesForSpectralLibrary.GroupBy(p => (p.FullSequence, p.ScanPrecursorCharge)); + List spectraLibrary = new(); + foreach (var matchGroup in fullSeqChargeGrouping) { - EngineCrashed("SpectralLibraryGeneration", e); + SpectralMatch bestPsm = matchGroup.MaxBy(p => p.Score); + if (bestPsm == null) continue; + spectraLibrary.Add(new LibrarySpectrum( + bestPsm.FullSequence, + bestPsm.ScanPrecursorMonoisotopicPeakMz, + bestPsm.ScanPrecursorCharge, + bestPsm.MatchedFragmentIons, + bestPsm.ScanRetentionTime)); } + + WriteSpectrumLibrary(spectraLibrary, Parameters.OutputFolder); } private void WriteProteinResults() @@ -951,7 +1006,7 @@ private void WriteProteinResults() string proteinResultsText = $"All target {GlobalVariables.AnalyteType.GetBioPolymerLabel().ToLower()} groups with q-value <= 0.01 (1% FDR): " + ProteinGroups.Count(b => b.QValue <= 0.01 && !b.IsDecoy); ResultsDictionary[("All", $"{GlobalVariables.AnalyteType.GetBioPolymerLabel()}s")] = proteinResultsText; } - + string fileName = $"All{GlobalVariables.AnalyteType.GetBioPolymerLabel()}Groups.tsv"; if (Parameters.SearchParameters.DoLabelFreeQuantification) { @@ -1201,12 +1256,11 @@ public static double[] GetMultiplexIonIntensities(SpectralMatch psm, double[] th .ToArray(); double[] expIonMzs = diagnosticIons.Select(ion => ion.Mz).ToArray(); double[] ionIntensities = new double[theoreticalIonMzs.Length]; - - int expIonIndex = 0; - for (int theoreticalIonIndex = 0; theoreticalIonIndex < ionIntensities.Length; theoreticalIonIndex++) + + for (int ionIndex = 0; ionIndex < ionIntensities.Length; ionIndex++) { - while (expIonIndex < expIonMzs.Length && - expIonMzs[expIonIndex] < tolerance.GetMinimumValue(theoreticalIonMzs[theoreticalIonIndex])) + while (peakIndex <= lastPeakIndex && + scan.XArray[peakIndex] < tolerance.GetMinimumValue(theoreticalIonMzs[ionIndex])) { expIonIndex++; } @@ -1646,7 +1700,7 @@ private void ConstructResultsDictionary() if (Parameters.SearchParameters.DoParsimony) { - ResultsDictionary.Add(("All", $"{GlobalVariables.AnalyteType.GetBioPolymerLabel()}s"), ""); + ResultsDictionary.Add(("All", $"{GlobalVariables.AnalyteType.GetBioPolymerLabel()}s"), ""); if (Parameters.CurrentRawFileList.Count > 1 && Parameters.SearchParameters.WriteIndividualFiles) { foreach (var rawFile in Parameters.CurrentRawFileList) @@ -1669,7 +1723,7 @@ private string AllResultsTotals() } } - var keys = ResultsDictionary.Keys.Where(k => k.Item1 != "All").OrderBy(k=>k.Item1).ToList(); + var keys = ResultsDictionary.Keys.Where(k=>k.Item1 != "All").OrderBy(k=>k.Item1).ToList(); if (keys.Any()) { sb.AppendLine(); From 38840d83ddd980a41a957c789c62797efab25481 Mon Sep 17 00:00:00 2001 From: pcruzparri Date: Sat, 22 Feb 2025 19:34:45 -0600 Subject: [PATCH 02/30] Updating the ProteinGroup.ModInfo writing --- .../SearchTask/PostSearchAnalysisTask.cs | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs index 4f83116fa1..ff07aa1f05 100644 --- a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs +++ b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs @@ -618,8 +618,6 @@ private void QuantificationAnalysis() foreach (var proteinGroup in quantifiedProteinGroups) { - var modInfoString = new StringBuilder(); - foreach (var protein in proteinGroup.Proteins) { List peptideBaseSequencesSeen = new List(); @@ -630,7 +628,7 @@ private void QuantificationAnalysis() { proteinGroupsOccupancyByProteins[proteinGroup.ProteinGroupName] .Proteins[protein.Accession].Peptides[peptide.BaseSequence] - .PeptideToProteinPositions(peptide.OneBasedStartResidueInProtein); + .OneBasedStartIndexInProtein = peptide.OneBasedStartResidueInProtein; peptideBaseSequencesSeen.Add(peptide.BaseSequence); } @@ -642,7 +640,6 @@ private void QuantificationAnalysis() // build modInfoString for this protein var occupancyPGProtein = proteinGroupsOccupancyByProteins[proteinGroup.ProteinGroupName].Proteins[protein.Accession]; - modInfoString.Append($""); var aaModStrings = new List(); foreach (var modpos in occupancyPGProtein.ModifiedAminoAcidPositionsInProtein.OrderBy(x => x.Key)) @@ -650,19 +647,21 @@ private void QuantificationAnalysis() var aaModString = new StringBuilder(); aaModString.Append($"aa#{modpos.Key.ToString()}"); - foreach (var mod in occupancyPGProtein.ModifiedAminoAcidPositionsInProtein[modpos.Key]) + var totalPositionIntensity = occupancyPGProtein.PeptidesByProteinPosition[modpos.Key].Sum(x => x.Intensity); + + foreach (var mod in modpos.Value) { - aaModString.Append($"[{mod.Key}, info:occupancy={mod.Value.Intensity.ToString()}]"); + var modStoichiometry = mod.Value.Intensity / totalPositionIntensity; + aaModString.Append($"[{mod.Key}, info:occupancy={modStoichiometry.ToString("N4")}({totalPositionIntensity})]"); } aaModStrings.Add(aaModString.ToString()); } if (aaModStrings.IsNotNullOrEmpty()) { - modInfoString.Append($"protein:{protein.Accession}{{{string.Join(";", aaModStrings)}}}"); + proteinGroup.ModsInfo.Add($"protein:{protein.Accession}{{{string.Join(";", aaModStrings)}}}"); } } - proteinGroup.ModsInfo.Add(modInfoString.ToString()); } // get protein intensity back from FlashLFQ From 7873281ec3d8208b5fae72ff7d6ab5cca8a8551a Mon Sep 17 00:00:00 2001 From: Peter Cruz Parrilla Date: Tue, 5 Aug 2025 14:03:11 -0500 Subject: [PATCH 03/30] Saving latest work. Still need to rebase onto master and address bug (uninitialized object). commit wip. --- .../ProteinParsimony/ProteinGroup.cs | 244 +++++++++++------- .../SearchTask/PostSearchAnalysisTask.cs | 171 +++++++----- 2 files changed, 260 insertions(+), 155 deletions(-) diff --git a/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs b/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs index 8dbea26b85..d224ba2304 100644 --- a/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs +++ b/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs @@ -9,6 +9,9 @@ using ThermoFisher.CommonCore.Data; using Omics; using Transcriptomics.Digestion; +using MzLibUtil; +using Easy.Common.Extensions; +using SharpLearning.InputOutput.Csv; namespace EngineLayer { @@ -32,7 +35,6 @@ public ProteinGroup(HashSet proteins, HashSet(); // if any of the proteins in the protein group are decoys, the protein group is a decoy foreach (var protein in proteins) @@ -89,7 +91,7 @@ public ProteinGroup(HashSet proteins, HashSet ModsInfo { get; set; } + public Dictionary ModsInfo { get; set; } public Dictionary IntensitiesByFile { get; set; } @@ -165,7 +167,8 @@ public string GetTabSeparatedHeader() sb.Append("Sequence Coverage" + '\t'); sb.Append("Sequence Coverage with Mods" + '\t'); sb.Append("Fragment Sequence Coverage" + '\t'); - sb.Append("Modification Info List" + "\t"); + //sb.Append("Modification Info List" + "\t"); + if (FilesForQuantification != null) { bool unfractionated = FilesForQuantification.Select(p => p.Fraction).Distinct().Count() == 1; @@ -186,11 +189,14 @@ public string GetTabSeparatedHeader() if ((conditionsUndefined && unfractionated) || silacExperimentalDesign) { // if the data is unfractionated and the conditions haven't been defined, just use the file name as the intensity header + sb.Append("Mods_" + sample.First().FilenameWithoutExtension + "\t"); sb.Append("Intensity_" + sample.First().FilenameWithoutExtension + "\t"); } else { // if the data is fractionated and/or the conditions have been defined, label the header w/ the condition and biorep number + sb.Append("Mods_" + sample.First().Condition + "_" + + (sample.First().BiologicalReplicate + 1) + "\t"); sb.Append("Intensity_" + sample.First().Condition + "_" + (sample.First().BiologicalReplicate + 1) + "\t"); } @@ -315,16 +321,19 @@ public override string ToString() sb.Append("\t"); //Detailed mods information list - sb.Append(GlobalVariables.CheckLengthOfOutput(string.Join("|", ModsInfo))); - sb.Append("\t"); + //sb.Append(GlobalVariables.CheckLengthOfOutput(string.Join("|", ModsInfo))); + //sb.Append("\t"); - // MS1 intensity (retrieved from FlashLFQ in the SearchTask) + // MS1 intensity and mod stoichiometry (retrieved from FlashLFQ in the SearchTask) if (IntensitiesByFile != null && FilesForQuantification != null) { foreach (var sampleGroup in FilesForQuantification.GroupBy(p => p.Condition)) { foreach (var sample in sampleGroup.GroupBy(p => p.BiologicalReplicate).OrderBy(p => p.Key)) { + sb.Append(ModInfoStringFromGroupedFiles(sample.ToList())); + sb.Append("\t"); + // if the samples are fractionated, the protein will only have 1 intensity in the first fraction // and the other fractions will be zero. we could find the first/only fraction with an intensity, // but simply summing the fractions is easier than finding the single non-zero value @@ -382,6 +391,63 @@ public override string ToString() return sb.ToString(); } + // internal method for grouping ModsInfo by file + public string ModInfoStringFromGroupedFiles(List spectraFiles) + { + if (ModsInfo.IsNotNullOrEmpty()) + { + var modInfoString = new StringBuilder(); + + // Create a combined quantified protein group for all fraction/techrep + var modsInfo = ModsInfo[spectraFiles.First()]; + foreach (var spectraFile in spectraFiles.Skip(1)) + { + foreach (var protein in ModsInfo[spectraFile].Proteins) + { + foreach (var peptide in protein.Value.Peptides) + { + modsInfo.Proteins[protein.Key].Peptides[peptide.Key].MergePeptide(peptide.Value); + } + } + } + + var proteinGroupOccupanciesPerProtein = modsInfo.Proteins.Values.Select(x => new KeyValuePair>> + (x, x.GetModStoichiometryFromProteinMods())).ToDictionary(x => x.Key, x => x.Value); + + foreach (var protein in proteinGroupOccupanciesPerProtein.Keys) + { + if (!proteinGroupOccupanciesPerProtein[protein].IsNotNullOrEmpty()) + { + continue; + } + + modInfoString.Append(protein.Accession + ":{"); + var protSeq = Proteins.Where(prot => prot.Accession == protein.Accession).Select(prot => prot.BaseSequence).ToList(); + + foreach (var modpos in proteinGroupOccupanciesPerProtein[protein].Keys.Order()) + { + var loc = modpos == 0 ? "N-term" : modpos == protein.Sequence.Length + 1 ? "C-term" : "aa#" + modpos.ToString(); //something's worng with the positions sometimes being longer than the sequence + modInfoString.Append(loc); + + var modStrings = new List(); + var modposTotalIntensity = protein.Peptides.Where(x => protein.PeptidesByProteinPosition[modpos].Contains(x.Value.BaseSequence)).Sum(x => x.Value.Intensity); + foreach (var mod in proteinGroupOccupanciesPerProtein[protein][modpos]) + { + modStrings.Add($"{mod.Key}, info: occupancy={mod.Value.Intensity.ToString("N4")}({modposTotalIntensity})"); + } + modInfoString.Append("[" + string.Join(";", modStrings) + "]"); + } + modInfoString.Append("}"); + } + return modInfoString.ToString(); + } + else + { + return ""; + } + + } + // this method is only used internally, to make protein grouping faster // this is NOT an output and is NOT used for protein FDR calculations public void Score() @@ -551,91 +617,91 @@ public void CalculateSequenceCoverage() continue; } - // calculate spectral count % of modified observations - var pepModTotals = new List(); // count of modified peptides for each mod/index - var pepTotals = new List(); // count of all peptides for each mod/index - var modIndex = new List<(int index, string modName)>(); // index and name of the modified position - - foreach (var pep in proteinsWithPsmsWithLocalizedMods[protein]) - { - foreach (var mod in pep.AllModsOneIsNterminus) - { - int pepNumTotal = 0; //For one mod, The total Pep Num - - if (mod.Value.ModificationType.Contains("Common Variable") - || mod.Value.ModificationType.Contains("Common Fixed") - || mod.Value.LocationRestriction.Equals(ModLocationOnPeptideOrProtein.PepC) - || mod.Value.LocationRestriction.Equals(ModLocationOnPeptideOrProtein.NPep)) - { - continue; - } - - int indexInProtein; - if (mod.Value.LocationRestriction.Equals("N-terminal.")) - { - indexInProtein = 1; - } - else if (mod.Value.LocationRestriction.Equals("Anywhere.")) - { - indexInProtein = pep.OneBasedStartResidue + mod.Key - 2; - } - else if (mod.Value.LocationRestriction.Equals("C-terminal.")) - { - indexInProtein = protein.Length; - } - else - { - // In case it's a peptide terminal mod, skip! - // we don't want this annotated in the protein's modifications - continue; - } - - var modKey = (indexInProtein, mod.Value.IdWithMotif); - if (modIndex.Contains(modKey)) - { - pepModTotals[modIndex.IndexOf(modKey)] += 1; - } - else - { - modIndex.Add(modKey); - foreach (var pept in proteinsWithPsmsWithLocalizedMods[protein]) - { - if (indexInProtein >= pept.OneBasedStartResidue - (indexInProtein == 1 ? 1 : 0) - && indexInProtein <= pept.OneBasedEndResidue) - { - pepNumTotal += 1; - } - } - - pepTotals.Add(pepNumTotal); - pepModTotals.Add(1); - } - } - } - + // PREVIOUS OCCUPANCY CODE // modInfo will be updated by the PostSearchAnalysisTask. However, leaving this code // here for now in case we want to use it in the future. - bool quantifyModsByPSM = false; - if (quantifyModsByPSM) - { - var modStrings = new List<(int aaNum, string part)>(); - for (int i = 0; i < pepModTotals.Count; i++) - { - string aa = modIndex[i].index.ToString(); - string modName = modIndex[i].modName.ToString(); - string occupancy = ((double)pepModTotals[i] / (double)pepTotals[i]).ToString("F2"); - string fractOccupancy = $"{pepModTotals[i].ToString()}/{pepTotals[i].ToString()}"; - string tempString = ($"#aa{aa}[{modName},info:occupancy={occupancy}({fractOccupancy})]"); - modStrings.Add((modIndex[i].index, tempString)); - } - - var modInfoString = string.Join(";", modStrings.OrderBy(x => x.aaNum).Select(x => x.part)); - - if (!string.IsNullOrEmpty(modInfoString)) - { - ModsInfo.Add(modInfoString); - } - } + // calculate spectral count % of modified observations + //var pepModTotals = new List(); // count of modified peptides for each mod/index + //var pepTotals = new List(); // count of all peptides for each mod/index + //var modIndex = new List<(int index, string modName)>(); // index and name of the modified position + // + //foreach (var pep in proteinsWithPsmsWithLocalizedMods[protein]) + //{ + // foreach (var mod in pep.AllModsOneIsNterminus) + // { + // int pepNumTotal = 0; //For one mod, The total Pep Num + // + // if (mod.Value.ModificationType.Contains("Common Variable") + // || mod.Value.ModificationType.Contains("Common Fixed") + // || mod.Value.LocationRestriction.Equals(ModLocationOnPeptideOrProtein.PepC) + // || mod.Value.LocationRestriction.Equals(ModLocationOnPeptideOrProtein.NPep)) + // { + // continue; + // } + // + // int indexInProtein; + // if (mod.Value.LocationRestriction.Equals("N-terminal.")) + // { + // indexInProtein = 1; + // } + // else if (mod.Value.LocationRestriction.Equals("Anywhere.")) + // { + // indexInProtein = pep.OneBasedStartResidue + mod.Key - 2; + // } + // else if (mod.Value.LocationRestriction.Equals("C-terminal.")) + // { + // indexInProtein = protein.Length; + // } + // else + // { + // // In case it's a peptide terminal mod, skip! + // // we don't want this annotated in the protein's modifications + // continue; + // } + // + // var modKey = (indexInProtein, mod.Value.IdWithMotif); + // if (modIndex.Contains(modKey)) + // { + // pepModTotals[modIndex.IndexOf(modKey)] += 1; + // } + // else + // { + // modIndex.Add(modKey); + // foreach (var pept in proteinsWithPsmsWithLocalizedMods[protein]) + // { + // if (indexInProtein >= pept.OneBasedStartResidue - (indexInProtein == 1 ? 1 : 0) + // && indexInProtein <= pept.OneBasedEndResidue) + // { + // pepNumTotal += 1; + // } + // } + // + // pepTotals.Add(pepNumTotal); + // pepModTotals.Add(1); + // } + // } + //} + //bool quantifyModsByPSM = false; + //if (quantifyModsByPSM) + //{ + // var modStrings = new List<(int aaNum, string part)>(); + // for (int i = 0; i < pepModTotals.Count; i++) + // { + // string aa = modIndex[i].index.ToString(); + // string modName = modIndex[i].modName.ToString(); + // string occupancy = ((double)pepModTotals[i] / (double)pepTotals[i]).ToString("F2"); + // string fractOccupancy = $"{pepModTotals[i].ToString()}/{pepTotals[i].ToString()}"; + // string tempString = ($"#aa{aa}[{modName},info:occupancy={occupancy}({fractOccupancy})]"); + // modStrings.Add((modIndex[i].index, tempString)); + // } + // + // var modInfoString = string.Join(";", modStrings.OrderBy(x => x.aaNum).Select(x => x.part)); + // + // if (!string.IsNullOrEmpty(modInfoString)) + // { + // ModsInfo.Add(modInfoString); + // } + //} } } diff --git a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs index ff07aa1f05..4f52ba6e40 100644 --- a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs +++ b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs @@ -11,6 +11,7 @@ using MzLibUtil; using Omics.Modifications; using Omics.SpectrumMatch; +using OpenMcdf.Extensions.OLEProperties; using Proteomics; using Proteomics.ProteolyticDigestion; using System; @@ -20,6 +21,7 @@ using System.IO.Compression; using System.Linq; using System.Text; +using System.Windows.Markup; using TaskLayer.MbrAnalysis; using Chemistry; using EngineLayer.DatabaseLoading; @@ -599,91 +601,128 @@ private void QuantificationAnalysis() Parameters.FlashLfqResults = flashLfqEngine.Run(); } - + // get protein intensity and mod stoichiometry back from FlashLFQ if (ProteinGroups != null && Parameters.FlashLfqResults != null) { - // get modification stoichiometry using FlashLFQ intensities - var peptides = flashLfqEngine.PeptideModifiedSequencesToQuantify - .Where(pep => Parameters.FlashLfqResults.PeptideModifiedSequences.ContainsKey(pep)) - .Select(pep => (Parameters.FlashLfqResults.PeptideModifiedSequences[pep].Sequence, - Parameters.FlashLfqResults.PeptideModifiedSequences[pep].BaseSequence, - Parameters.FlashLfqResults.PeptideModifiedSequences[pep].ProteinGroups.Select(pg => pg.ProteinGroupName).ToList(), - Parameters.FlashLfqResults.PeptideModifiedSequences[pep].GetTotalIntensity())).ToList(); - - PositionFrequencyAnalysis pfa = new PositionFrequencyAnalysis(); - pfa.ProteinGroupsOccupancyByPeptide(peptides, true, true, true); // one-based indexes, ignores terminal mods on all peptides. - - var proteinGroupsOccupancyByProteins = pfa.Occupancy; - var quantifiedProteinGroups = ProteinGroups.Where(pg => Parameters.FlashLfqResults.ProteinGroups.ContainsKey(pg.ProteinGroupName)); - - foreach (var proteinGroup in quantifiedProteinGroups) + // get protein intensity back from FlashLFQ + foreach (var proteinGroup in ProteinGroups) { - foreach (var protein in proteinGroup.Proteins) - { - List peptideBaseSequencesSeen = new List(); - foreach (var peptide in proteinGroup.AllPeptides) - { - if (proteinGroupsOccupancyByProteins[proteinGroup.ProteinGroupName].Proteins[protein.Accession].Peptides.ContainsKey(peptide.BaseSequence) - && !peptideBaseSequencesSeen.Contains(peptide.BaseSequence)) - { - proteinGroupsOccupancyByProteins[proteinGroup.ProteinGroupName] - .Proteins[protein.Accession].Peptides[peptide.BaseSequence] - .OneBasedStartIndexInProtein = peptide.OneBasedStartResidueInProtein; - - peptideBaseSequencesSeen.Add(peptide.BaseSequence); - } - } - - proteinGroupsOccupancyByProteins[proteinGroup.ProteinGroupName] - .Proteins[protein.Accession] - .SetProteinModsFromPeptides(); - - // build modInfoString for this protein - var occupancyPGProtein = proteinGroupsOccupancyByProteins[proteinGroup.ProteinGroupName].Proteins[protein.Accession]; - var aaModStrings = new List(); + proteinGroup.FilesForQuantification = spectraFileInfo; + proteinGroup.IntensitiesByFile = new Dictionary(); + proteinGroup.ModsInfo = new Dictionary(); - foreach (var modpos in occupancyPGProtein.ModifiedAminoAcidPositionsInProtein.OrderBy(x => x.Key)) + foreach (var spectraFile in proteinGroup.FilesForQuantification) + { + if (Parameters.FlashLfqResults.ProteinGroups.TryGetValue(proteinGroup.ProteinGroupName, out var flashLfqProteinGroup)) { - var aaModString = new StringBuilder(); - aaModString.Append($"aa#{modpos.Key.ToString()}"); - - var totalPositionIntensity = occupancyPGProtein.PeptidesByProteinPosition[modpos.Key].Sum(x => x.Intensity); - - foreach (var mod in modpos.Value) - { - var modStoichiometry = mod.Value.Intensity / totalPositionIntensity; - aaModString.Append($"[{mod.Key}, info:occupancy={modStoichiometry.ToString("N4")}({totalPositionIntensity})]"); - } - - aaModStrings.Add(aaModString.ToString()); + proteinGroup.IntensitiesByFile.Add(spectraFile, flashLfqProteinGroup.GetIntensity(spectraFile)); } - if (aaModStrings.IsNotNullOrEmpty()) + else { - proteinGroup.ModsInfo.Add($"protein:{protein.Accession}{{{string.Join(";", aaModStrings)}}}"); + proteinGroup.IntensitiesByFile.Add(spectraFile, 0); + continue; // no need to calculate stoichiometry if the protein group is not in the FlashLFQ results } - } - } - // get protein intensity back from FlashLFQ - foreach (var proteinGroup in ProteinGroups) - { - proteinGroup.FilesForQuantification = spectraFileInfo; - proteinGroup.IntensitiesByFile = new Dictionary(); + // get modification stoichiometry using FlashLFQ spectraFile-specific intensities + var pgQuantifiedPeptides = Parameters.FlashLfqResults.PeptideModifiedSequences.Where(x => proteinGroup.AllPeptides.Select(x=>x.FullSequence).Contains(x.Key)).ToList(); - foreach (var spectraFile in proteinGroup.FilesForQuantification) + if (pgQuantifiedPeptides.IsNotNullOrEmpty()) { - if (Parameters.FlashLfqResults.ProteinGroups.TryGetValue(proteinGroup.ProteinGroupName, out var flashLfqProteinGroup)) + var peptides = pgQuantifiedPeptides.Where(pep => pep.Value.GetIntensity(spectraFile) > 0) + .Select(pep => (pep.Value.Sequence, + new List { proteinGroup.ProteinGroupName }, + pep.Value.GetIntensity(spectraFile))).ToList(); + if (!peptides.IsNotNullOrEmpty()) { - proteinGroup.IntensitiesByFile.Add(spectraFile, flashLfqProteinGroup.GetIntensity(spectraFile)); + proteinGroup.ModsInfo.Add(spectraFile, new QuantifiedProteinGroup(proteinGroup.ProteinGroupName)); + continue; } - else + + PositionFrequencyAnalysis pfa = new PositionFrequencyAnalysis(); + pfa.CalculateOccupancies(peptides, false); // one-based indexes, ignores terminal mods on all peptides. + + // set the one-based start index in protein for each peptide + foreach (var protein in pfa.ProteinGroupOccupancies.First().Value.Proteins.Values) { - proteinGroup.IntensitiesByFile.Add(spectraFile, 0); + if (protein.Sequence == null) + { + protein.Sequence = proteinGroup.Proteins.Where(p => p.Accession == protein.Accession).Select(p => p.BaseSequence).First(); + } + List peptideBaseSequencesSeen = new List(); + foreach (var peptide in proteinGroup.AllPeptides) + { + if (protein.Peptides.ContainsKey(peptide.BaseSequence) + && !peptideBaseSequencesSeen.Contains(peptide.BaseSequence)) + { + protein.Peptides[peptide.BaseSequence] + .OneBasedStartIndexInProtein = peptide.OneBasedStartResidueInProtein; + + peptideBaseSequencesSeen.Add(peptide.BaseSequence); + } + } } + + proteinGroup.ModsInfo.Add(spectraFile, pfa.ProteinGroupOccupancies.First().Value); // Getting stoich one protein group at a time, so only getting First() is ok here. + } } } + + + //foreach (var proteinGroup in ProteinGroups) + //{ + // if (proteinGroup.FilesForQuantification.IsNotNullOrEmpty()) + // { + // var pgQuantifiedPeptides = Parameters.FlashLfqResults.PeptideModifiedSequences.Where(p => peptideSequencesForQuantification.Contains(p.Key)).Select(p => p.Value); + // + // + // + // + // var sb = new StringBuilder(); + // + // + // foreach (var file in proteinGroup.FilesForQuantification) + // { + // + // + // + // + // proteinGroupsOccupancyByProteins[proteinGroup.ProteinGroupName] + // .Proteins[protein.Accession] + // .SetProteinModsFromPeptides(); + // + // // build modInfoString for this protein + // var occupancyPGProtein = proteinGroupsOccupancyByProteins[proteinGroup.ProteinGroupName].Proteins[protein.Accession]; + // var proteinStoichiometry = occupancyPGProtein.GetModStoichiometryFromProteinMods(); + // var aaModStrings = new List(); + // + // foreach (var modpos in occupancyPGProtein.ModifiedAminoAcidPositionsInProtein.Keys.Order()) + // { + // var totalIntensity = occupancyPGProtein.Peptides.Where(pep => occupancyPGProtein.PeptidesByProteinPosition[modpos].Contains(pep.Key)).Sum(pep => pep.Value.Intensity); + // var aaModString = new StringBuilder(); + // aaModString.Append($"aa#{modpos.ToString()}"); + // + // foreach (var mod in occupancyPGProtein.ModifiedAminoAcidPositionsInProtein[modpos]) + // { + // var modStoichiometry = mod.Value.Intensity; + // aaModString.Append($"[{mod.Key}, info:occupancy={modStoichiometry.ToString("N4")}({totalIntensity})]"); + // } + // + // aaModStrings.Add(aaModString.ToString()); + // } + // if (aaModStrings.IsNotNullOrEmpty()) + // { + // sb.Append($"protein:{protein.Accession}{{{string.Join(";", aaModStrings)}}}"); + // } + // } + // proteinGroup.ModsInfo = new Dictionary(); + // proteinGroup.ModsInfo.Add(file.First(), sb.ToString()); + // } + // } + //} + } + //Silac stuff for post-quantification if (Parameters.SearchParameters.SilacLabels != null && Parameters.AllPsms.First() is PeptideSpectralMatch) //if we're doing silac { From 82472e120ac9ceafbb53738a6e442d24c86092b1 Mon Sep 17 00:00:00 2001 From: Peter Cruz Parrilla Date: Tue, 23 Sep 2025 14:15:09 -0500 Subject: [PATCH 04/30] temp save --- .../EngineLayer/ProteinParsimony/ProteinGroup.cs | 13 +++++++------ .../TaskLayer/SearchTask/PostSearchAnalysisTask.cs | 9 +++++---- MetaMorpheus/Test/RobTest.cs | 4 +++- MetaMorpheus/Test/SeqCoverageTest.cs | 13 +++++++------ 4 files changed, 22 insertions(+), 17 deletions(-) diff --git a/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs b/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs index d224ba2304..2f51712980 100644 --- a/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs +++ b/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs @@ -10,8 +10,10 @@ using Omics; using Transcriptomics.Digestion; using MzLibUtil; +using MzLibUtil.PositionFrequencyAnalysis; using Easy.Common.Extensions; using SharpLearning.InputOutput.Csv; +using MzLibUtil.PositionFrequencyAnalysis; namespace EngineLayer { @@ -92,7 +94,6 @@ public ProteinGroup(HashSet proteins, HashSet ModsInfo { get; set; } - public Dictionary IntensitiesByFile { get; set; } private List ListOfProteinsOrderedByAccession; @@ -411,7 +412,7 @@ public string ModInfoStringFromGroupedFiles(List spectraFiles) } } - var proteinGroupOccupanciesPerProtein = modsInfo.Proteins.Values.Select(x => new KeyValuePair>> + var proteinGroupOccupanciesPerProtein = modsInfo.Proteins.Values.Select(x => new KeyValuePair>> (x, x.GetModStoichiometryFromProteinMods())).ToDictionary(x => x.Key, x => x.Value); foreach (var protein in proteinGroupOccupanciesPerProtein.Keys) @@ -422,18 +423,18 @@ public string ModInfoStringFromGroupedFiles(List spectraFiles) } modInfoString.Append(protein.Accession + ":{"); - var protSeq = Proteins.Where(prot => prot.Accession == protein.Accession).Select(prot => prot.BaseSequence).ToList(); + var protSeq = Proteins.Where(prot => prot.Accession == protein.Accession).First().BaseSequence; foreach (var modpos in proteinGroupOccupanciesPerProtein[protein].Keys.Order()) { - var loc = modpos == 0 ? "N-term" : modpos == protein.Sequence.Length + 1 ? "C-term" : "aa#" + modpos.ToString(); //something's worng with the positions sometimes being longer than the sequence + var loc = modpos == 0 ? "N-term" : modpos == protein.Sequence.Length + 1 ? "C-term" : $"{protSeq[modpos-1]}#" + modpos.ToString(); //something's worng with the positions sometimes being longer than the sequence modInfoString.Append(loc); var modStrings = new List(); var modposTotalIntensity = protein.Peptides.Where(x => protein.PeptidesByProteinPosition[modpos].Contains(x.Value.BaseSequence)).Sum(x => x.Value.Intensity); foreach (var mod in proteinGroupOccupanciesPerProtein[protein][modpos]) { - modStrings.Add($"{mod.Key}, info: occupancy={mod.Value.Intensity.ToString("N4")}({modposTotalIntensity})"); + modStrings.Add($"{mod.Key}, info: occupancy={mod.Value.ToString("N4")}({modposTotalIntensity})"); } modInfoString.Append("[" + string.Join(";", modStrings) + "]"); } @@ -741,7 +742,7 @@ public ProteinGroup ConstructSubsetProteinGroup(string fullFilePath, List p.FullFilePathWithExtension == fullFilePath) .FirstOrDefault(); //check that file name wasn't changed (can occur in SILAC searches) - if (!silacLabels.IsNullOrEmpty() && spectraFileInfo == null) + if (!MzLibUtil.ClassExtensions.IsNullOrEmpty(silacLabels) && spectraFileInfo == null) { foreach (SilacLabel label in silacLabels) { diff --git a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs index 4f52ba6e40..d590a640c1 100644 --- a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs +++ b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs @@ -639,10 +639,11 @@ private void QuantificationAnalysis() } PositionFrequencyAnalysis pfa = new PositionFrequencyAnalysis(); - pfa.CalculateOccupancies(peptides, false); // one-based indexes, ignores terminal mods on all peptides. + var proteins = proteinGroup.Proteins.Select(p => new KeyValuePair(p.Accession, p.BaseSequence)).ToDictionary(); + pfa.SetUpQuantificationObjectsFromFullSequences(peptides, proteins); // one-based indexes, ignores terminal mods on all peptides. // set the one-based start index in protein for each peptide - foreach (var protein in pfa.ProteinGroupOccupancies.First().Value.Proteins.Values) + foreach (var protein in pfa.ProteinGroups.First().Value.Proteins.Values) { if (protein.Sequence == null) { @@ -655,14 +656,14 @@ private void QuantificationAnalysis() && !peptideBaseSequencesSeen.Contains(peptide.BaseSequence)) { protein.Peptides[peptide.BaseSequence] - .OneBasedStartIndexInProtein = peptide.OneBasedStartResidueInProtein; + .OneBasedStartIndexInProtein = peptide.OneBasedStartResidue; peptideBaseSequencesSeen.Add(peptide.BaseSequence); } } } - proteinGroup.ModsInfo.Add(spectraFile, pfa.ProteinGroupOccupancies.First().Value); // Getting stoich one protein group at a time, so only getting First() is ok here. + proteinGroup.ModsInfo.Add(spectraFile, pfa.ProteinGroups.First().Value); // Getting stoich one protein group at a time, so only getting First() is ok here. } } diff --git a/MetaMorpheus/Test/RobTest.cs b/MetaMorpheus/Test/RobTest.cs index 09f9ba9d1c..e7a227da5e 100644 --- a/MetaMorpheus/Test/RobTest.cs +++ b/MetaMorpheus/Test/RobTest.cs @@ -254,7 +254,9 @@ public static void TestPTMOutput() ProteinScoringAndFdrEngine f = new ProteinScoringAndFdrEngine(proteinGroups, psms, false, false, true, new CommonParameters(), null, new List()); f.Run(); - Assert.That(proteinGroups.First().ModsInfo[0], Is.EqualTo("#aa5[resMod on S,info:occupancy=0.67(2/3)];#aa10[iModOne on I,info:occupancy=0.33(2/6)];#aa10[iModTwo on I,info:occupancy=0.33(2/6)]")); + // Commented out the assertions below because the occupancy is no longer at the PSM level. + // Needs updating. + //Assert.That(proteinGroups.First().ModsInfo[0], Is.EqualTo("#aa5[resMod on S,info:occupancy=0.67(2/3)];#aa10[iModOne on I,info:occupancy=0.33(2/6)];#aa10[iModTwo on I,info:occupancy=0.33(2/6)]")); } [Test] diff --git a/MetaMorpheus/Test/SeqCoverageTest.cs b/MetaMorpheus/Test/SeqCoverageTest.cs index dbc34d33bc..2d353ba8d2 100644 --- a/MetaMorpheus/Test/SeqCoverageTest.cs +++ b/MetaMorpheus/Test/SeqCoverageTest.cs @@ -97,13 +97,14 @@ public static void TryFailSequenceCoverage() var firstSequenceCoverageDisplayListWithMods = fjkd.ProteinGroups.First().SequenceCoverageDisplayListWithMods.First(); Assert.That(firstSequenceCoverageDisplayListWithMods, Is.EqualTo("[mod1 on M]-MM[mod3 on M]KM[mod3 on M]MK-[mod5 on K]")); + // This needs to update to intensity based occupancy reporting var firstModInfo = fjkd.ProteinGroups.First().ModsInfo.First(); - Assert.That(firstModInfo.Contains(@"#aa1[mod1 on M,info:occupancy=1.00(2/2)]")); - Assert.That(firstModInfo.Contains(@"#aa2[mod3 on M,info:occupancy=0.50(1/2)]")); - Assert.That(!(firstModInfo.Contains(@"#aa3"))); - Assert.That(firstModInfo.Contains(@"#aa4[mod3 on M,info:occupancy=0.50(1/2)]")); - Assert.That(!(firstModInfo.Contains(@"#aa5"))); - Assert.That(firstModInfo.Contains(@"#aa6[mod5 on K,info:occupancy=1.00(2/2)]")); + //Assert.That(firstModInfo.Contains(@"#aa1[mod1 on M,info:occupancy=1.00(2/2)]")); + //Assert.That(firstModInfo.Contains(@"#aa2[mod3 on M,info:occupancy=0.50(1/2)]")); + //Assert.That(!(firstModInfo.Contains(@"#aa3"))); + //Assert.That(firstModInfo.Contains(@"#aa4[mod3 on M,info:occupancy=0.50(1/2)]")); + //Assert.That(!(firstModInfo.Contains(@"#aa5"))); + //Assert.That(firstModInfo.Contains(@"#aa6[mod5 on K,info:occupancy=1.00(2/2)]")); Console.WriteLine("Test output: " + firstSequenceCoverageDisplayList); } From c4d2c7ebbc7245945612eeebdbf840e50df693f3 Mon Sep 17 00:00:00 2001 From: pcruzparri Date: Tue, 30 Sep 2025 11:08:00 -0500 Subject: [PATCH 05/30] fixing protein sequence issue. Also added a condition to check if variants can be written. --- .../ProteinParsimony/ProteinGroup.cs | 5 ++-- .../SearchTask/PostSearchAnalysisTask.cs | 25 ++----------------- 2 files changed, 4 insertions(+), 26 deletions(-) diff --git a/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs b/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs index 2f51712980..19a2366272 100644 --- a/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs +++ b/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs @@ -417,17 +417,16 @@ public string ModInfoStringFromGroupedFiles(List spectraFiles) foreach (var protein in proteinGroupOccupanciesPerProtein.Keys) { - if (!proteinGroupOccupanciesPerProtein[protein].IsNotNullOrEmpty()) + if (proteinGroupOccupanciesPerProtein[protein].IsNullOrEmpty()) { continue; } modInfoString.Append(protein.Accession + ":{"); - var protSeq = Proteins.Where(prot => prot.Accession == protein.Accession).First().BaseSequence; foreach (var modpos in proteinGroupOccupanciesPerProtein[protein].Keys.Order()) { - var loc = modpos == 0 ? "N-term" : modpos == protein.Sequence.Length + 1 ? "C-term" : $"{protSeq[modpos-1]}#" + modpos.ToString(); //something's worng with the positions sometimes being longer than the sequence + var loc = modpos == 0 ? "N-terminal" : modpos == protein.Sequence.Length + 1 ? "C-terminal" : $"{protein.Sequence[modpos-1]}#" + modpos.ToString(); modInfoString.Append(loc); var modStrings = new List(); diff --git a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs index d590a640c1..8b41d0c5fd 100644 --- a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs +++ b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs @@ -640,28 +640,7 @@ private void QuantificationAnalysis() PositionFrequencyAnalysis pfa = new PositionFrequencyAnalysis(); var proteins = proteinGroup.Proteins.Select(p => new KeyValuePair(p.Accession, p.BaseSequence)).ToDictionary(); - pfa.SetUpQuantificationObjectsFromFullSequences(peptides, proteins); // one-based indexes, ignores terminal mods on all peptides. - - // set the one-based start index in protein for each peptide - foreach (var protein in pfa.ProteinGroups.First().Value.Proteins.Values) - { - if (protein.Sequence == null) - { - protein.Sequence = proteinGroup.Proteins.Where(p => p.Accession == protein.Accession).Select(p => p.BaseSequence).First(); - } - List peptideBaseSequencesSeen = new List(); - foreach (var peptide in proteinGroup.AllPeptides) - { - if (protein.Peptides.ContainsKey(peptide.BaseSequence) - && !peptideBaseSequencesSeen.Contains(peptide.BaseSequence)) - { - protein.Peptides[peptide.BaseSequence] - .OneBasedStartIndexInProtein = peptide.OneBasedStartResidue; - - peptideBaseSequencesSeen.Add(peptide.BaseSequence); - } - } - } + pfa.SetUpQuantificationObjectsFromFullSequences(peptides, proteins); // uses zero-based indexes for the mods. proteinGroup.ModsInfo.Add(spectraFile, pfa.ProteinGroups.First().Value); // Getting stoich one protein group at a time, so only getting First() is ok here. @@ -1455,7 +1434,7 @@ private void WriteVariantResults() foreach (var variant in variants) { - if (variantPWSM.IntersectsAndIdentifiesVariation(variant).identifies == true) + if (variantPWSM.IntersectsAndIdentifiesVariation(variant).identifies == true && variant.Description.Description.IsNotNullOrEmpty()) { if (culture.CompareInfo.IndexOf(variant.Description.Description, "missense_variant", CompareOptions.IgnoreCase) >= 0) { From 92b45a1d413a9942c5263cce7f1fef5f1fbd539e Mon Sep 17 00:00:00 2001 From: Peter Cruz Parrilla Date: Tue, 30 Sep 2025 11:44:51 -0500 Subject: [PATCH 06/30] . --- MetaMorpheus/Test/ProteinGroupTest.cs | 3 +++ MetaMorpheus/Test/RobTest.cs | 3 +-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/MetaMorpheus/Test/ProteinGroupTest.cs b/MetaMorpheus/Test/ProteinGroupTest.cs index 7e1135da76..a2a775fddd 100644 --- a/MetaMorpheus/Test/ProteinGroupTest.cs +++ b/MetaMorpheus/Test/ProteinGroupTest.cs @@ -85,6 +85,8 @@ public static void ProteinGroupToStringTest() //string exectedProteinGroupToString = proteinGroup1.ToString(); string exectedProteinGroupToString = "prot1|prot2\t|\t\t\t779.30073507823|778.3167194953201\t2\t\t\t2\t2\t\t\t\t\t\t0\tT\t0\t0\t0\t0\t0"; + var out1 = proteinGroup1.ToString(); + var out1h = proteinGroup1.GetTabSeparatedHeader(); Assert.That(proteinGroup1.ToString(), Is.EqualTo(exectedProteinGroupToString)); @@ -93,6 +95,7 @@ public static void ProteinGroupToStringTest() ProteinGroup proteinGroup3 = new ProteinGroup(new HashSet(proteinList3), new HashSet(), new HashSet()); string exectedProteinGroupWithDecoyToString = "prot1|prot2\t|\t\t\t779.30073507823|778.3167194953201\t2\t\t\t2\t2\t\t\t\t\t\t0\tT\t0\t0\t0\t0\t0"; + var out2 = proteinGroup1.ToString(); Assert.That(proteinGroup1.ToString(), Is.EqualTo(exectedProteinGroupWithDecoyToString)); } diff --git a/MetaMorpheus/Test/RobTest.cs b/MetaMorpheus/Test/RobTest.cs index e7a227da5e..3f895bbc5d 100644 --- a/MetaMorpheus/Test/RobTest.cs +++ b/MetaMorpheus/Test/RobTest.cs @@ -254,9 +254,8 @@ public static void TestPTMOutput() ProteinScoringAndFdrEngine f = new ProteinScoringAndFdrEngine(proteinGroups, psms, false, false, true, new CommonParameters(), null, new List()); f.Run(); - // Commented out the assertions below because the occupancy is no longer at the PSM level. - // Needs updating. //Assert.That(proteinGroups.First().ModsInfo[0], Is.EqualTo("#aa5[resMod on S,info:occupancy=0.67(2/3)];#aa10[iModOne on I,info:occupancy=0.33(2/6)];#aa10[iModTwo on I,info:occupancy=0.33(2/6)]")); + Assert.That(true == false); // intentional test fail. } [Test] From 0d15de5d26d84b96bbe0f0917856d9cea6ca4053 Mon Sep 17 00:00:00 2001 From: Peter Cruz Parrilla Date: Tue, 30 Sep 2025 14:07:00 -0500 Subject: [PATCH 07/30] fixing changes during rebasing to reflect master code again. --- .../SearchTask/PostSearchAnalysisTask.cs | 24 +++++++++---------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs index 8b41d0c5fd..d2dc4cf227 100644 --- a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs +++ b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs @@ -1,5 +1,4 @@ -using Chemistry; -using Easy.Common.Extensions; +using Easy.Common.Extensions; using EngineLayer; using EngineLayer.FdrAnalysis; using EngineLayer.HistogramAnalysis; @@ -8,10 +7,6 @@ using FlashLFQ; using MassSpectrometry; using MathNet.Numerics.Distributions; -using MzLibUtil; -using Omics.Modifications; -using Omics.SpectrumMatch; -using OpenMcdf.Extensions.OLEProperties; using Proteomics; using Proteomics.ProteolyticDigestion; using System; @@ -21,11 +16,13 @@ using System.IO.Compression; using System.Linq; using System.Text; -using System.Windows.Markup; using TaskLayer.MbrAnalysis; using Chemistry; using EngineLayer.DatabaseLoading; using MzLibUtil; +using MzLibUtil.PositionFrequencyAnalysis; +using Omics.Digestion; +using Omics.BioPolymer; using Omics.Modifications; using Omics.SpectrumMatch; using Omics; @@ -704,7 +701,7 @@ private void QuantificationAnalysis() } //Silac stuff for post-quantification - if (Parameters.SearchParameters.SilacLabels != null && Parameters.AllPsms.First() is PeptideSpectralMatch) //if we're doing silac + if (Parameters.SearchParameters.SilacLabels != null && Parameters.AllSpectralMatches.First() is PeptideSpectralMatch) //if we're doing silac { SilacConversions.SilacConversionsPostQuantification(allSilacLabels, startLabel, endLabel, spectraFileInfo, ProteinGroups, Parameters.ListOfDigestionParams, Parameters.FlashLfqResults, Parameters.AllSpectralMatches.Cast().ToList(), Parameters.SearchParameters.ModsToWriteSelection, quantifyUnlabeledPeptides); @@ -1272,13 +1269,14 @@ public static double[] GetMultiplexIonIntensities(SpectralMatch psm, double[] th .Where(ion => ion.NeutralTheoreticalProduct.ProductType == Omics.Fragmentation.ProductType.D) .OrderBy(ion => ion.Mz) .ToArray(); - double[] expIonMzs = diagnosticIons.Select(ion => ion.Mz).ToArray(); + double[] expIonMzs = diagnosticIons.Select(ion => ion.Mz).ToArray(); double[] ionIntensities = new double[theoreticalIonMzs.Length]; - - for (int ionIndex = 0; ionIndex < ionIntensities.Length; ionIndex++) + + int expIonIndex = 0; + for (int theoreticalIonIndex = 0; theoreticalIonIndex < ionIntensities.Length; theoreticalIonIndex++) { - while (peakIndex <= lastPeakIndex && - scan.XArray[peakIndex] < tolerance.GetMinimumValue(theoreticalIonMzs[ionIndex])) + while (expIonIndex < expIonMzs.Length && + expIonMzs[expIonIndex] < tolerance.GetMinimumValue(theoreticalIonMzs[theoreticalIonIndex])) { expIonIndex++; } From 8de14267986fc04494e2497ca5eb6401f6f089e4 Mon Sep 17 00:00:00 2001 From: Peter Cruz Parrilla Date: Wed, 1 Oct 2025 17:15:48 -0500 Subject: [PATCH 08/30] fixes to tests and occupancy to pass said tests. need to do a final test before pr. --- .../ProteinParsimony/ProteinGroup.cs | 9 ++- .../EngineLayer/Silac/SilacConversions.cs | 25 ++++++++ .../SearchTask/PostSearchAnalysisTask.cs | 58 +------------------ MetaMorpheus/Test/ProteinGroupTest.cs | 8 ++- MetaMorpheus/Test/QuantificationTest.cs | 2 +- MetaMorpheus/Test/RobTest.cs | 11 +++- MetaMorpheus/Test/SeqCoverageTest.cs | 8 --- MetaMorpheus/Test/SilacTest.cs | 15 +++-- 8 files changed, 56 insertions(+), 80 deletions(-) diff --git a/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs b/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs index 19a2366272..93b2e2b4d1 100644 --- a/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs +++ b/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs @@ -349,9 +349,13 @@ public override string ToString() } } } + else + { + sb.Append("\t"); + } - // number of PSMs for listed peptides - sb.Append("" + AllPsmsBelowOnePercentFDR.Count); + // number of PSMs for listed peptides + sb.Append("" + AllPsmsBelowOnePercentFDR.Count); sb.Append("\t"); // isDecoy @@ -445,7 +449,6 @@ public string ModInfoStringFromGroupedFiles(List spectraFiles) { return ""; } - } // this method is only used internally, to make protein grouping faster diff --git a/MetaMorpheus/EngineLayer/Silac/SilacConversions.cs b/MetaMorpheus/EngineLayer/Silac/SilacConversions.cs index 4cfec7575c..3ed641bfd1 100644 --- a/MetaMorpheus/EngineLayer/Silac/SilacConversions.cs +++ b/MetaMorpheus/EngineLayer/Silac/SilacConversions.cs @@ -9,6 +9,8 @@ using Omics; using Omics.Digestion; using EngineLayer.SpectrumMatch; +using MzLibUtil.PositionFrequencyAnalysis; +using Easy.Common.Extensions; namespace EngineLayer { @@ -460,6 +462,7 @@ public static void SilacConversionsPostQuantification(List allSilacL { proteinGroup.FilesForQuantification = allInfo; proteinGroup.IntensitiesByFile = new Dictionary(); + proteinGroup.ModsInfo = new Dictionary(); foreach (var spectraFile in allInfo) { @@ -472,6 +475,28 @@ public static void SilacConversionsPostQuantification(List allSilacL //needed for decoys/contaminants/proteins that aren't quantified proteinGroup.IntensitiesByFile.Add(spectraFile, 0); } + + // get modification stoichiometry using FlashLFQ spectraFile-specific intensities + var pgQuantifiedPeptides = flashLfqResults.PeptideModifiedSequences.Where(x => proteinGroup.AllPeptides.Select(x => x.FullSequence).Contains(x.Key)).ToList(); + + if (pgQuantifiedPeptides.IsNotNullOrEmpty()) + { + var peptides = pgQuantifiedPeptides.Where(pep => pep.Value.GetIntensity(spectraFile) > 0) + .Select(pep => (pep.Value.Sequence, + new List { proteinGroup.ProteinGroupName }, + pep.Value.GetIntensity(spectraFile))).ToList(); + if (!peptides.IsNotNullOrEmpty()) + { + proteinGroup.ModsInfo.Add(spectraFile, new QuantifiedProteinGroup(proteinGroup.ProteinGroupName)); + continue; + } + + PositionFrequencyAnalysis pfa = new PositionFrequencyAnalysis(); + var proteins = proteinGroup.Proteins.Select(p => new KeyValuePair(p.Accession, p.BaseSequence)).ToDictionary(); + pfa.SetUpQuantificationObjectsFromFullSequences(peptides, proteins); // uses zero-based indexes for the mods. + + proteinGroup.ModsInfo.Add(spectraFile, pfa.ProteinGroups.First().Value); // Getting stoich one protein group at a time, so only getting First() is ok here. + } } } } diff --git a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs index d2dc4cf227..1fb53b59df 100644 --- a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs +++ b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs @@ -617,7 +617,6 @@ private void QuantificationAnalysis() else { proteinGroup.IntensitiesByFile.Add(spectraFile, 0); - continue; // no need to calculate stoichiometry if the protein group is not in the FlashLFQ results } // get modification stoichiometry using FlashLFQ spectraFile-specific intensities @@ -629,7 +628,7 @@ private void QuantificationAnalysis() .Select(pep => (pep.Value.Sequence, new List { proteinGroup.ProteinGroupName }, pep.Value.GetIntensity(spectraFile))).ToList(); - if (!peptides.IsNotNullOrEmpty()) + if (peptides.IsNullOrEmpty()) { proteinGroup.ModsInfo.Add(spectraFile, new QuantifiedProteinGroup(proteinGroup.ProteinGroupName)); continue; @@ -640,64 +639,9 @@ private void QuantificationAnalysis() pfa.SetUpQuantificationObjectsFromFullSequences(peptides, proteins); // uses zero-based indexes for the mods. proteinGroup.ModsInfo.Add(spectraFile, pfa.ProteinGroups.First().Value); // Getting stoich one protein group at a time, so only getting First() is ok here. - } } } - - - - //foreach (var proteinGroup in ProteinGroups) - //{ - // if (proteinGroup.FilesForQuantification.IsNotNullOrEmpty()) - // { - // var pgQuantifiedPeptides = Parameters.FlashLfqResults.PeptideModifiedSequences.Where(p => peptideSequencesForQuantification.Contains(p.Key)).Select(p => p.Value); - // - // - // - // - // var sb = new StringBuilder(); - // - // - // foreach (var file in proteinGroup.FilesForQuantification) - // { - // - // - // - // - // proteinGroupsOccupancyByProteins[proteinGroup.ProteinGroupName] - // .Proteins[protein.Accession] - // .SetProteinModsFromPeptides(); - // - // // build modInfoString for this protein - // var occupancyPGProtein = proteinGroupsOccupancyByProteins[proteinGroup.ProteinGroupName].Proteins[protein.Accession]; - // var proteinStoichiometry = occupancyPGProtein.GetModStoichiometryFromProteinMods(); - // var aaModStrings = new List(); - // - // foreach (var modpos in occupancyPGProtein.ModifiedAminoAcidPositionsInProtein.Keys.Order()) - // { - // var totalIntensity = occupancyPGProtein.Peptides.Where(pep => occupancyPGProtein.PeptidesByProteinPosition[modpos].Contains(pep.Key)).Sum(pep => pep.Value.Intensity); - // var aaModString = new StringBuilder(); - // aaModString.Append($"aa#{modpos.ToString()}"); - // - // foreach (var mod in occupancyPGProtein.ModifiedAminoAcidPositionsInProtein[modpos]) - // { - // var modStoichiometry = mod.Value.Intensity; - // aaModString.Append($"[{mod.Key}, info:occupancy={modStoichiometry.ToString("N4")}({totalIntensity})]"); - // } - // - // aaModStrings.Add(aaModString.ToString()); - // } - // if (aaModStrings.IsNotNullOrEmpty()) - // { - // sb.Append($"protein:{protein.Accession}{{{string.Join(";", aaModStrings)}}}"); - // } - // } - // proteinGroup.ModsInfo = new Dictionary(); - // proteinGroup.ModsInfo.Add(file.First(), sb.ToString()); - // } - // } - //} } //Silac stuff for post-quantification diff --git a/MetaMorpheus/Test/ProteinGroupTest.cs b/MetaMorpheus/Test/ProteinGroupTest.cs index a2a775fddd..09f6803afa 100644 --- a/MetaMorpheus/Test/ProteinGroupTest.cs +++ b/MetaMorpheus/Test/ProteinGroupTest.cs @@ -85,8 +85,9 @@ public static void ProteinGroupToStringTest() //string exectedProteinGroupToString = proteinGroup1.ToString(); string exectedProteinGroupToString = "prot1|prot2\t|\t\t\t779.30073507823|778.3167194953201\t2\t\t\t2\t2\t\t\t\t\t\t0\tT\t0\t0\t0\t0\t0"; - var out1 = proteinGroup1.ToString(); - var out1h = proteinGroup1.GetTabSeparatedHeader(); + var out1 = proteinGroup1.ToString().Split("\t"); + var out1h = proteinGroup1.GetTabSeparatedHeader().Split("\t"); + var out1zipped = out1h.Zip(out1, (a, b) => (a, b)).ToDictionary(); Assert.That(proteinGroup1.ToString(), Is.EqualTo(exectedProteinGroupToString)); @@ -249,7 +250,8 @@ public static void TestModificationInfoListInProteinGroupsOutput() List proteinGroupsOutput = File.ReadAllLines(Path.Combine(outputFolder, "task2", "AllQuantifiedProteinGroups.tsv")).ToList(); string firstDataLine = proteinGroupsOutput[2]; string modInfoListProteinTwo = firstDataLine.Split('\t')[14]; - Assert.That(modInfoListProteinTwo, Is.EqualTo("#aa71[Oxidation on S,info:occupancy=0.50(1/2)]")); + Assert.That(modInfoListProteinTwo, Is.EqualTo("P10591:{M#65[Common Variable:Oxidation on M, info: occupancy=1.0000(654315.977066199)]S#71[Less Common:Oxidation on S, info: occupancy=0.1957(654315.977066199)]}" + + "P10592:{M#65[Common Variable:Oxidation on M, info: occupancy=1.0000(654315.977066199)]S#71[Less Common:Oxidation on S, info: occupancy=0.1957(654315.977066199)]}")); Directory.Delete(outputFolder, true); } diff --git a/MetaMorpheus/Test/QuantificationTest.cs b/MetaMorpheus/Test/QuantificationTest.cs index d8752b6b53..5ed86608a2 100644 --- a/MetaMorpheus/Test/QuantificationTest.cs +++ b/MetaMorpheus/Test/QuantificationTest.cs @@ -166,7 +166,7 @@ public static void TestProteinQuantFileHeaders(bool hasDefinedExperimentalDesign string condition = hasDefinedExperimentalDesign ? "TestCondition" : ""; // create the protein database - Protein prot = new(peptide, @""); + Protein prot = new(peptide, @"test"); // necessary to pass name to protein. otherwise dbloader will do crazy things string dbName = Path.Combine(unitTestFolder, "testDB.fasta"); UsefulProteomicsDatabases.ProteinDbWriter.WriteFastaDatabase(new List { prot }, dbName, ">"); diff --git a/MetaMorpheus/Test/RobTest.cs b/MetaMorpheus/Test/RobTest.cs index 3f895bbc5d..e213b4f807 100644 --- a/MetaMorpheus/Test/RobTest.cs +++ b/MetaMorpheus/Test/RobTest.cs @@ -13,6 +13,11 @@ using Omics.Modifications; using Omics; using Transcriptomics; +using EngineLayer.ModernSearch; +using EngineLayer.Indexing; +using System.IO; +using TaskLayer; +using UsefulProteomicsDatabases; namespace Test { @@ -254,8 +259,10 @@ public static void TestPTMOutput() ProteinScoringAndFdrEngine f = new ProteinScoringAndFdrEngine(proteinGroups, psms, false, false, true, new CommonParameters(), null, new List()); f.Run(); - //Assert.That(proteinGroups.First().ModsInfo[0], Is.EqualTo("#aa5[resMod on S,info:occupancy=0.67(2/3)];#aa10[iModOne on I,info:occupancy=0.33(2/6)];#aa10[iModTwo on I,info:occupancy=0.33(2/6)]")); - Assert.That(true == false); // intentional test fail. + // inclined to delete this test + // Otherwise, readapt to run flashlfq and then correctly check modinfo... + Assert.That(proteinGroups.First().ModsInfo, Is.Null); + Assert.That(proteinGroups.First().ModInfoStringFromGroupedFiles(proteinGroups.First().FilesForQuantification), Is.EqualTo("")); } [Test] diff --git a/MetaMorpheus/Test/SeqCoverageTest.cs b/MetaMorpheus/Test/SeqCoverageTest.cs index 2d353ba8d2..48fa90a1f4 100644 --- a/MetaMorpheus/Test/SeqCoverageTest.cs +++ b/MetaMorpheus/Test/SeqCoverageTest.cs @@ -97,14 +97,6 @@ public static void TryFailSequenceCoverage() var firstSequenceCoverageDisplayListWithMods = fjkd.ProteinGroups.First().SequenceCoverageDisplayListWithMods.First(); Assert.That(firstSequenceCoverageDisplayListWithMods, Is.EqualTo("[mod1 on M]-MM[mod3 on M]KM[mod3 on M]MK-[mod5 on K]")); - // This needs to update to intensity based occupancy reporting - var firstModInfo = fjkd.ProteinGroups.First().ModsInfo.First(); - //Assert.That(firstModInfo.Contains(@"#aa1[mod1 on M,info:occupancy=1.00(2/2)]")); - //Assert.That(firstModInfo.Contains(@"#aa2[mod3 on M,info:occupancy=0.50(1/2)]")); - //Assert.That(!(firstModInfo.Contains(@"#aa3"))); - //Assert.That(firstModInfo.Contains(@"#aa4[mod3 on M,info:occupancy=0.50(1/2)]")); - //Assert.That(!(firstModInfo.Contains(@"#aa5"))); - //Assert.That(firstModInfo.Contains(@"#aa6[mod5 on K,info:occupancy=1.00(2/2)]")); Console.WriteLine("Test output: " + firstSequenceCoverageDisplayList); } diff --git a/MetaMorpheus/Test/SilacTest.cs b/MetaMorpheus/Test/SilacTest.cs index f1dffeb225..c6637aa66a 100644 --- a/MetaMorpheus/Test/SilacTest.cs +++ b/MetaMorpheus/Test/SilacTest.cs @@ -58,8 +58,8 @@ public static void TestSilacNoLightProtein() //test proteins string[] output = File.ReadAllLines(TestContext.CurrentContext.TestDirectory + @"/TestSilac/AllQuantifiedProteinGroups.tsv"); Assert.That(output.Length, Is.EqualTo(2)); - Assert.That(output[0].Contains("Modification Info List\tIntensity_silac(R+3.988)\tIntensity_silac(R+10.008)")); //test that two files were made and no light file - Assert.That(output[1].Contains("875000.0000000009\t437500.00000000047")); //test the heavier intensity is half that of the heavy (per the raw file) + Assert.That(output[0].Contains("Mods_silac(R+3.988)\tIntensity_silac(R+3.988)\tMods_silac(R+10.008)\tIntensity_silac(R+10.008)")); //test that two files were made and no light file + Assert.That(output[1].Contains("875000.0000000009\t\t437500.00000000047")); //test the heavier intensity is half that of the heavy (per the raw file) //test peptides output = File.ReadAllLines(TestContext.CurrentContext.TestDirectory + @"/TestSilac/AllQuantifiedPeptides.tsv"); @@ -132,8 +132,8 @@ public static void TestSilacMultipleModsPerCondition() //test proteins string[] output = File.ReadAllLines(TestContext.CurrentContext.TestDirectory + @"/TestSilac/AllQuantifiedProteinGroups.tsv"); Assert.That(output.Length, Is.EqualTo(2)); - Assert.That(output[0].Contains("Intensity_silac\tIntensity_silac(K+8.014 & R+6.020)")); //test that two files were made - Assert.That(output[1].Contains("1374999.999999999\t687499.9999999995")); //test the heavy intensity is half that of the light (per the raw file) + Assert.That(output[0].Contains("Mods_silac\tIntensity_silac\tMods_silac(K+8.014 & R+6.020)\tIntensity_silac(K+8.014 & R+6.020)")); //test that two files were made + Assert.That(output[1].Contains("1374999.999999999\t\t687499.9999999995")); //test the heavy intensity is half that of the light (per the raw file) //test peptides output = File.ReadAllLines(TestContext.CurrentContext.TestDirectory + @"/TestSilac/AllQuantifiedPeptides.tsv"); @@ -223,8 +223,11 @@ public static void TestSilacQuantification() //test proteins string[] output = File.ReadAllLines(TestContext.CurrentContext.TestDirectory + @"\TestSilac\AllQuantifiedProteinGroups.tsv"); Assert.That(output.Length, Is.EqualTo(2)); - Assert.That(output[0].Contains("Intensity_silac\tIntensity_silacPart2\tIntensity_silac(K+8.014)\tIntensity_silacPart2(K+8.014)")); //test that two files were made - Assert.That(output[1].Contains("875000.0000000009\t875000.0000000009\t437500.00000000047\t437500.00000000047")); //test the heavy intensity is half that of the light (per the raw file) + Assert.That(output[0].Contains("Mods_silac\tIntensity_silac\t"+ + "Mods_silacPart2\tIntensity_silacPart2\t"+ + "Mods_silac(K+8.014)\tIntensity_silac(K+8.014)\t"+ + "Mods_silacPart2(K+8.014)\tIntensity_silacPart2(K+8.014)")); //test that two files were made + Assert.That(output[1].Contains("875000.0000000009\t\t875000.0000000009\t\t437500.00000000047\t\t437500.00000000047")); //test the heavy intensity is half that of the light (per the raw file) //test peptides output = File.ReadAllLines(TestContext.CurrentContext.TestDirectory + @"\TestSilac\AllQuantifiedPeptides.tsv"); From bd8a7d40935991986a18e8b1d09141f01bf9395f Mon Sep 17 00:00:00 2001 From: Peter Cruz Parrilla Date: Thu, 2 Oct 2025 13:15:16 -0500 Subject: [PATCH 09/30] Fixes to code for remaining tests. TestModificationInfoListProteinGroupsOutput() was not consistent when run individually vs when run with all other tests. The ordering of the output was changing. Fixed to not look for the first data line but rather for the one with the right PG. --- MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs | 8 ++++++-- MetaMorpheus/Test/ProteinGroupTest.cs | 6 ++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs b/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs index 93b2e2b4d1..d6046dfca6 100644 --- a/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs +++ b/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs @@ -170,7 +170,7 @@ public string GetTabSeparatedHeader() sb.Append("Fragment Sequence Coverage" + '\t'); //sb.Append("Modification Info List" + "\t"); - if (FilesForQuantification != null) + if (IntensitiesByFile != null && FilesForQuantification != null) { bool unfractionated = FilesForQuantification.Select(p => p.Fraction).Distinct().Count() == 1; bool conditionsUndefined = FilesForQuantification.All(p => string.IsNullOrEmpty(p.Condition)); @@ -204,8 +204,12 @@ public string GetTabSeparatedHeader() } } } + else + { + sb.Append("\t"); + } - sb.Append("Number of PSMs" + '\t'); + sb.Append("Number of PSMs" + '\t'); sb.Append("Protein Decoy/Contaminant/Target" + '\t'); sb.Append("Protein Cumulative Target" + '\t'); sb.Append("Protein Cumulative Decoy" + '\t'); diff --git a/MetaMorpheus/Test/ProteinGroupTest.cs b/MetaMorpheus/Test/ProteinGroupTest.cs index 09f6803afa..bcfb5ca326 100644 --- a/MetaMorpheus/Test/ProteinGroupTest.cs +++ b/MetaMorpheus/Test/ProteinGroupTest.cs @@ -248,8 +248,10 @@ public static void TestModificationInfoListInProteinGroupsOutput() Assert.That(totalNumberOfMods, Is.EqualTo(4)); List proteinGroupsOutput = File.ReadAllLines(Path.Combine(outputFolder, "task2", "AllQuantifiedProteinGroups.tsv")).ToList(); - string firstDataLine = proteinGroupsOutput[2]; - string modInfoListProteinTwo = firstDataLine.Split('\t')[14]; + string testDataLine = proteinGroupsOutput.Where(x => x.StartsWith("P10591")).First(); + string modInfoListProteinTwo = testDataLine.Split('\t')[14]; + + Assert.That(8, Is.EqualTo(proteinGroupsOutput.Count)); Assert.That(modInfoListProteinTwo, Is.EqualTo("P10591:{M#65[Common Variable:Oxidation on M, info: occupancy=1.0000(654315.977066199)]S#71[Less Common:Oxidation on S, info: occupancy=0.1957(654315.977066199)]}" + "P10592:{M#65[Common Variable:Oxidation on M, info: occupancy=1.0000(654315.977066199)]S#71[Less Common:Oxidation on S, info: occupancy=0.1957(654315.977066199)]}")); From 40b17f01d222dd3960f569d1e38096173f786214 Mon Sep 17 00:00:00 2001 From: Peter Cruz Parrilla Date: Thu, 6 Nov 2025 11:14:38 -0600 Subject: [PATCH 10/30] update + master merging fixes --- .../ProteinParsimony/ProteinGroup.cs | 52 +++++--- .../EngineLayer/Silac/SilacConversions.cs | 12 +- .../SearchTask/PostSearchAnalysisTask.cs | 124 ++++++++++-------- 3 files changed, 110 insertions(+), 78 deletions(-) diff --git a/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs b/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs index d6046dfca6..5ad2783ff0 100644 --- a/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs +++ b/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs @@ -1,6 +1,4 @@ using FlashLFQ; -using Proteomics; -using Proteomics.ProteolyticDigestion; using System.Collections.Generic; using System.IO; using System.Linq; @@ -12,8 +10,6 @@ using MzLibUtil; using MzLibUtil.PositionFrequencyAnalysis; using Easy.Common.Extensions; -using SharpLearning.InputOutput.Csv; -using MzLibUtil.PositionFrequencyAnalysis; namespace EngineLayer { @@ -408,19 +404,29 @@ public string ModInfoStringFromGroupedFiles(List spectraFiles) var modInfoString = new StringBuilder(); // Create a combined quantified protein group for all fraction/techrep - var modsInfo = ModsInfo[spectraFiles.First()]; + var groupedFilesQuantifiedProteinGroup = ModsInfo[spectraFiles.First()]; foreach (var spectraFile in spectraFiles.Skip(1)) { - foreach (var protein in ModsInfo[spectraFile].Proteins) + foreach (var protein in ModsInfo[spectraFile].Proteins.Values) { - foreach (var peptide in protein.Value.Peptides) + foreach (var peptide in protein.Peptides.Values) { - modsInfo.Proteins[protein.Key].Peptides[peptide.Key].MergePeptide(peptide.Value); + // TODO: If all flashlfq quantified peptides have the same spectrafiles, I can get rid of this condition. + // Need to also double check that merging of peptides is properly updating total position intensity. If so, + // only write mods with finite, >0 intensities. + if (groupedFilesQuantifiedProteinGroup.Proteins[protein.Accession].Peptides.ContainsKey(peptide.BaseSequence)) + { + groupedFilesQuantifiedProteinGroup.Proteins[protein.Accession].Peptides[peptide.BaseSequence].MergePeptide(peptide); + } + else + { + groupedFilesQuantifiedProteinGroup.Proteins[protein.Accession].Peptides[peptide.BaseSequence] = peptide; + } } } } - var proteinGroupOccupanciesPerProtein = modsInfo.Proteins.Values.Select(x => new KeyValuePair>> + var proteinGroupOccupanciesPerProtein = groupedFilesQuantifiedProteinGroup.Proteins.Values.Select(x => new KeyValuePair>> (x, x.GetModStoichiometryFromProteinMods())).ToDictionary(x => x.Key, x => x.Value); foreach (var protein in proteinGroupOccupanciesPerProtein.Keys) @@ -434,21 +440,35 @@ public string ModInfoStringFromGroupedFiles(List spectraFiles) foreach (var modpos in proteinGroupOccupanciesPerProtein[protein].Keys.Order()) { - var loc = modpos == 0 ? "N-terminal" : modpos == protein.Sequence.Length + 1 ? "C-terminal" : $"{protein.Sequence[modpos-1]}#" + modpos.ToString(); - modInfoString.Append(loc); + var modposTotalIntensity = protein.Peptides.Values.Where(x => protein.PeptidesByProteinPosition[modpos].Contains(x.BaseSequence)).Sum(x => x.Intensity); - var modStrings = new List(); - var modposTotalIntensity = protein.Peptides.Where(x => protein.PeptidesByProteinPosition[modpos].Contains(x.Value.BaseSequence)).Sum(x => x.Value.Intensity); - foreach (var mod in proteinGroupOccupanciesPerProtein[protein][modpos]) + if (double.IsFinite(modposTotalIntensity)) // Need to check if is finite because quantified peptides can have an intensity of Zero, leading to NaN occupancies. { - modStrings.Add($"{mod.Key}, info: occupancy={mod.Value.ToString("N4")}({modposTotalIntensity})"); + var loc = modpos == 0 ? "N-terminal" : modpos == protein.Sequence.Length + 1 ? "C-terminal" : $"{protein.Sequence[modpos-1]}#" + modpos.ToString(); + modInfoString.Append(loc); + + var modStrings = new List(); + + foreach (var mod in proteinGroupOccupanciesPerProtein[protein][modpos]) + { + if (mod.Value > 0) // Do not write mods with zero occupancy. + { + modStrings.Add($"{mod.Key}, info: occupancy={mod.Value.ToString("N4")}({modposTotalIntensity})"); + } + } + + // If mods with nonzero fractions found, append them + if (modStrings.Count > 0) + { + modInfoString.Append("[" + string.Join(";", modStrings) + "]"); + } } - modInfoString.Append("[" + string.Join(";", modStrings) + "]"); } modInfoString.Append("}"); } return modInfoString.ToString(); } + else { return ""; diff --git a/MetaMorpheus/EngineLayer/Silac/SilacConversions.cs b/MetaMorpheus/EngineLayer/Silac/SilacConversions.cs index 3ed641bfd1..5d50ad76fe 100644 --- a/MetaMorpheus/EngineLayer/Silac/SilacConversions.cs +++ b/MetaMorpheus/EngineLayer/Silac/SilacConversions.cs @@ -11,6 +11,7 @@ using EngineLayer.SpectrumMatch; using MzLibUtil.PositionFrequencyAnalysis; using Easy.Common.Extensions; +using MzLibUtil; namespace EngineLayer { @@ -481,11 +482,10 @@ public static void SilacConversionsPostQuantification(List allSilacL if (pgQuantifiedPeptides.IsNotNullOrEmpty()) { - var peptides = pgQuantifiedPeptides.Where(pep => pep.Value.GetIntensity(spectraFile) > 0) - .Select(pep => (pep.Value.Sequence, - new List { proteinGroup.ProteinGroupName }, - pep.Value.GetIntensity(spectraFile))).ToList(); - if (!peptides.IsNotNullOrEmpty()) + var peptides = pgQuantifiedPeptides.Select(pep => new QuantifiedPeptideRecord(pep.Value.Sequence, + new HashSet { proteinGroup.ProteinGroupName }, + pep.Value.GetIntensity(spectraFile))).ToList(); + if (peptides.IsNullOrEmpty()) { proteinGroup.ModsInfo.Add(spectraFile, new QuantifiedProteinGroup(proteinGroup.ProteinGroupName)); continue; @@ -493,7 +493,7 @@ public static void SilacConversionsPostQuantification(List allSilacL PositionFrequencyAnalysis pfa = new PositionFrequencyAnalysis(); var proteins = proteinGroup.Proteins.Select(p => new KeyValuePair(p.Accession, p.BaseSequence)).ToDictionary(); - pfa.SetUpQuantificationObjectsFromFullSequences(peptides, proteins); // uses zero-based indexes for the mods. + pfa.SetUpQuantificationFromQuantifiedPeptideRecords(peptides, proteins); // uses zero-based indexes for the mods. proteinGroup.ModsInfo.Add(spectraFile, pfa.ProteinGroups.First().Value); // Getting stoich one protein group at a time, so only getting First() is ok here. } diff --git a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs index 1fb53b59df..62ec972057 100644 --- a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs +++ b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs @@ -568,11 +568,11 @@ private void QuantificationAnalysis() flashLFQIdentifications.Add( new Identification( fileInfo: rawfileinfo, - psm.BaseSequence, + psm.BaseSequence, psm.FullSequence, - psm.BioPolymerWithSetModsMonoisotopicMass.Value, - psm.ScanRetentionTime, - psm.ScanPrecursorCharge, + psm.BioPolymerWithSetModsMonoisotopicMass.Value, + psm.ScanRetentionTime, + psm.ScanPrecursorCharge, psmToProteinGroups[psm], psmScore: psm.Score, qValue: psmsForQuantification.FilterType == FilterType.QValue ? psm.FdrInfo.QValue : psm.FdrInfo.PEP_QValue, @@ -598,57 +598,64 @@ private void QuantificationAnalysis() Parameters.FlashLfqResults = flashLfqEngine.Run(); } - // get protein intensity and mod stoichiometry back from FlashLFQ - if (ProteinGroups != null && Parameters.FlashLfqResults != null) - { - // get protein intensity back from FlashLFQ - foreach (var proteinGroup in ProteinGroups) + // get protein intensity and mod stoichiometry back from FlashLFQ + if (ProteinGroups != null && Parameters.FlashLfqResults != null) { - proteinGroup.FilesForQuantification = spectraFileInfo; - proteinGroup.IntensitiesByFile = new Dictionary(); - proteinGroup.ModsInfo = new Dictionary(); - - foreach (var spectraFile in proteinGroup.FilesForQuantification) + // get protein intensity back from FlashLFQ + foreach (var proteinGroup in ProteinGroups) { - if (Parameters.FlashLfqResults.ProteinGroups.TryGetValue(proteinGroup.ProteinGroupName, out var flashLfqProteinGroup)) - { - proteinGroup.IntensitiesByFile.Add(spectraFile, flashLfqProteinGroup.GetIntensity(spectraFile)); - } - else - { - proteinGroup.IntensitiesByFile.Add(spectraFile, 0); - } - - // get modification stoichiometry using FlashLFQ spectraFile-specific intensities - var pgQuantifiedPeptides = Parameters.FlashLfqResults.PeptideModifiedSequences.Where(x => proteinGroup.AllPeptides.Select(x=>x.FullSequence).Contains(x.Key)).ToList(); + proteinGroup.FilesForQuantification = spectraFileInfo; + proteinGroup.IntensitiesByFile = new Dictionary(); + proteinGroup.ModsInfo = new Dictionary(); - if (pgQuantifiedPeptides.IsNotNullOrEmpty()) + foreach (var spectraFile in proteinGroup.FilesForQuantification) { - var peptides = pgQuantifiedPeptides.Where(pep => pep.Value.GetIntensity(spectraFile) > 0) - .Select(pep => (pep.Value.Sequence, - new List { proteinGroup.ProteinGroupName }, - pep.Value.GetIntensity(spectraFile))).ToList(); - if (peptides.IsNullOrEmpty()) + if (Parameters.FlashLfqResults.ProteinGroups.TryGetValue(proteinGroup.ProteinGroupName, out var flashLfqProteinGroup)) { - proteinGroup.ModsInfo.Add(spectraFile, new QuantifiedProteinGroup(proteinGroup.ProteinGroupName)); - continue; + proteinGroup.IntensitiesByFile.Add(spectraFile, flashLfqProteinGroup.GetIntensity(spectraFile)); } + else + { + proteinGroup.IntensitiesByFile.Add(spectraFile, 0); + } + + // Now get the modification stoichiometries using FlashLFQ spectraFile-specific intensities + // and add them to the proteinGroup.ModsInfo property. The stoichiometry strings will only be + // extracted from the protein group's ToString method. - PositionFrequencyAnalysis pfa = new PositionFrequencyAnalysis(); - var proteins = proteinGroup.Proteins.Select(p => new KeyValuePair(p.Accession, p.BaseSequence)).ToDictionary(); - pfa.SetUpQuantificationObjectsFromFullSequences(peptides, proteins); // uses zero-based indexes for the mods. + // Pull out only the peptides in this protein group that were quantified by FlashLFQ + var pgQuantifiedPeptides = Parameters.FlashLfqResults.PeptideModifiedSequences.Where(x => proteinGroup.AllPeptides.Select(x => x.FullSequence).Contains(x.Key)).ToList(); + + if (pgQuantifiedPeptides.IsNotNullOrEmpty()) + { + var peptides = pgQuantifiedPeptides.Select(pep => new QuantifiedPeptideRecord(pep.Value.Sequence, + new HashSet { proteinGroup.ProteinGroupName }, + pep.Value.GetIntensity(spectraFile))).ToList(); + if (peptides.IsNullOrEmpty()) + { + proteinGroup.ModsInfo.Add(spectraFile, new QuantifiedProteinGroup(proteinGroup.ProteinGroupName)); + continue; + } - proteinGroup.ModsInfo.Add(spectraFile, pfa.ProteinGroups.First().Value); // Getting stoich one protein group at a time, so only getting First() is ok here. + var proteins = proteinGroup.Proteins.Select(p => new KeyValuePair(p.Accession, p.BaseSequence)).ToDictionary(); + PositionFrequencyAnalysis pfa = new PositionFrequencyAnalysis(); + pfa.SetUpQuantificationFromQuantifiedPeptideRecords(peptides, proteins); // uses zero-based indexes for the mods. + proteinGroup.ModsInfo.Add(spectraFile, pfa.ProteinGroups.First().Value); // Getting stoich one protein group at a time, so only getting First() is ok here. + } } } } - } - //Silac stuff for post-quantification - if (Parameters.SearchParameters.SilacLabels != null && Parameters.AllSpectralMatches.First() is PeptideSpectralMatch) //if we're doing silac + //Silac stuff for post-quantification + if (Parameters.SearchParameters.SilacLabels != null && Parameters.AllSpectralMatches.First() is PeptideSpectralMatch) //if we're doing silac + { + SilacConversions.SilacConversionsPostQuantification(allSilacLabels, startLabel, endLabel, spectraFileInfo, ProteinGroups, Parameters.ListOfDigestionParams, + Parameters.FlashLfqResults, Parameters.AllSpectralMatches.Cast().ToList(), Parameters.SearchParameters.ModsToWriteSelection, quantifyUnlabeledPeptides); + } + } + catch (Exception e) { - SilacConversions.SilacConversionsPostQuantification(allSilacLabels, startLabel, endLabel, spectraFileInfo, ProteinGroups, Parameters.ListOfDigestionParams, - Parameters.FlashLfqResults, Parameters.AllSpectralMatches.Cast().ToList(), Parameters.SearchParameters.ModsToWriteSelection, quantifyUnlabeledPeptides); + EngineCrashed("Quantification", e); } } @@ -936,22 +943,27 @@ private void SpectralLibraryGeneration() includeAmbiguous: false, includeHighQValuePsms: false); - //group psms by peptide and charge, the psms having same sequence and same charge will be in the same group - var fullSeqChargeGrouping = peptidesForSpectralLibrary.GroupBy(p => (p.FullSequence, p.ScanPrecursorCharge)); - List spectraLibrary = new(); - foreach (var matchGroup in fullSeqChargeGrouping) + //group psms by peptide and charge, the psms having same sequence and same charge will be in the same group + var fullSeqChargeGrouping = peptidesForSpectralLibrary.GroupBy(p => (p.FullSequence, p.ScanPrecursorCharge)); + List spectraLibrary = new(); + foreach (var matchGroup in fullSeqChargeGrouping) + { + SpectralMatch bestPsm = matchGroup.MaxBy(p => p.Score); + if (bestPsm == null) continue; + spectraLibrary.Add(new LibrarySpectrum( + bestPsm.FullSequence, + bestPsm.ScanPrecursorMonoisotopicPeakMz, + bestPsm.ScanPrecursorCharge, + bestPsm.MatchedFragmentIons, + bestPsm.ScanRetentionTime)); + } + + WriteSpectrumLibrary(spectraLibrary, Parameters.OutputFolder); + } + catch (Exception e) { - SpectralMatch bestPsm = matchGroup.MaxBy(p => p.Score); - if (bestPsm == null) continue; - spectraLibrary.Add(new LibrarySpectrum( - bestPsm.FullSequence, - bestPsm.ScanPrecursorMonoisotopicPeakMz, - bestPsm.ScanPrecursorCharge, - bestPsm.MatchedFragmentIons, - bestPsm.ScanRetentionTime)); + EngineCrashed("SpectralLibraryGeneration", e); } - - WriteSpectrumLibrary(spectraLibrary, Parameters.OutputFolder); } private void WriteProteinResults() From 09661cd5ce10dcd59bb1f0d8009df0115c6ef104 Mon Sep 17 00:00:00 2001 From: Peter Cruz Parrilla Date: Fri, 7 Nov 2025 14:13:24 -0600 Subject: [PATCH 11/30] cleaned output --- .../EngineLayer/ProteinParsimony/ProteinGroup.cs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs b/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs index 5ad2783ff0..45dc0f7e36 100644 --- a/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs +++ b/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs @@ -442,19 +442,20 @@ public string ModInfoStringFromGroupedFiles(List spectraFiles) { var modposTotalIntensity = protein.Peptides.Values.Where(x => protein.PeptidesByProteinPosition[modpos].Contains(x.BaseSequence)).Sum(x => x.Intensity); - if (double.IsFinite(modposTotalIntensity)) // Need to check if is finite because quantified peptides can have an intensity of Zero, leading to NaN occupancies. + bool writeModPos = proteinGroupOccupanciesPerProtein[protein][modpos].Values.Any(x => x > 0); + + // Need to check if is finite because quantified peptides can have an intensity of Zero, leading to NaN occupancies. + // Also, only write mod positions with nonzero total intensity. + if (double.IsFinite(modposTotalIntensity) && modposTotalIntensity > 0 && writeModPos) { - var loc = modpos == 0 ? "N-terminal" : modpos == protein.Sequence.Length + 1 ? "C-terminal" : $"{protein.Sequence[modpos-1]}#" + modpos.ToString(); + var loc = modpos == 0 ? "N-terminal" : modpos == protein.Sequence.Length + 1 ? "C-terminal" : $"{protein.Sequence[modpos - 1]}#" + modpos.ToString(); modInfoString.Append(loc); var modStrings = new List(); foreach (var mod in proteinGroupOccupanciesPerProtein[protein][modpos]) { - if (mod.Value > 0) // Do not write mods with zero occupancy. - { - modStrings.Add($"{mod.Key}, info: occupancy={mod.Value.ToString("N4")}({modposTotalIntensity})"); - } + modStrings.Add($"{mod.Key}, info: occupancy={mod.Value.ToString("N4")}({modposTotalIntensity})"); } // If mods with nonzero fractions found, append them From 0150f1f4879d0c85764f9c56eb238f2e41da4cfe Mon Sep 17 00:00:00 2001 From: Peter Cruz Parrilla Date: Fri, 21 Nov 2025 10:02:23 -0600 Subject: [PATCH 12/30] Cleaning up mod info outputs --- .../ProteinParsimony/ProteinGroup.cs | 53 ++++++++++++++++++- 1 file changed, 51 insertions(+), 2 deletions(-) diff --git a/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs b/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs index 45dc0f7e36..bb368f612d 100644 --- a/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs +++ b/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs @@ -401,7 +401,6 @@ public string ModInfoStringFromGroupedFiles(List spectraFiles) { if (ModsInfo.IsNotNullOrEmpty()) { - var modInfoString = new StringBuilder(); // Create a combined quantified protein group for all fraction/techrep var groupedFilesQuantifiedProteinGroup = ModsInfo[spectraFiles.First()]; @@ -429,13 +428,63 @@ public string ModInfoStringFromGroupedFiles(List spectraFiles) var proteinGroupOccupanciesPerProtein = groupedFilesQuantifiedProteinGroup.Proteins.Values.Select(x => new KeyValuePair>> (x, x.GetModStoichiometryFromProteinMods())).ToDictionary(x => x.Key, x => x.Value); + // Clean stoichiometry results by removing "Common Variable", "Common Fixed", + // and "PeptideTermMod" modification types. + // Also remove any modifications with NaN occupancies. + + var modsToRemove = new List<(QuantifiedProtein proteinKey, int modposKey, string modnameKey)>(); + foreach (var protein in proteinGroupOccupanciesPerProtein.Keys) + { + var proteinModsDict = proteinGroupOccupanciesPerProtein[protein]; + foreach (var modpos in proteinModsDict.Keys) + { + var posMods = proteinModsDict[modpos]; + foreach (var mod in posMods.Keys) + { + if (mod.Contains("Common Variable") + || mod.Contains("Common Fixed") + || mod.Contains("PeptideTermMod") + || !posMods[mod].IsFinite()) + { + modsToRemove.Add((protein, modpos, mod)); + } + } + } + } + foreach (var modToIgnore in modsToRemove) + { + proteinGroupOccupanciesPerProtein[modToIgnore.proteinKey][modToIgnore.modposKey].Remove(modToIgnore.modnameKey); + } + foreach (var modToIgnore in modsToRemove) + { + var proteinToClean = proteinGroupOccupanciesPerProtein[modToIgnore.proteinKey]; + + if (proteinToClean.ContainsKey(modToIgnore.modposKey) + && proteinToClean[modToIgnore.modposKey].IsNullOrEmpty()) + { + proteinToClean.Remove(modToIgnore.modposKey); + } + } + + + // If mods not found for the proteins in protein group, do not write anything + // Since all proteins in the protein group would have the same mod info with + // different mod position key names but same number of keys, we only check + // that the first protein's mod dict is not empty. + if (proteinGroupOccupanciesPerProtein.First().Value.IsNullOrEmpty()) + { + return ""; + } + + // Else, build the mod info string for the protein group. + var modInfoString = new StringBuilder(); + foreach (var protein in proteinGroupOccupanciesPerProtein.Keys) { if (proteinGroupOccupanciesPerProtein[protein].IsNullOrEmpty()) { continue; } - modInfoString.Append(protein.Accession + ":{"); foreach (var modpos in proteinGroupOccupanciesPerProtein[protein].Keys.Order()) From 2fbe1ace8a6c20d984c4b1f0cbc69ed3197070a4 Mon Sep 17 00:00:00 2001 From: Peter Cruz Parrilla Date: Wed, 10 Dec 2025 09:44:24 -0600 Subject: [PATCH 13/30] final fix so mods with 0 intensity do not get written. --- MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs b/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs index bb368f612d..3bdedee7f8 100644 --- a/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs +++ b/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs @@ -441,10 +441,12 @@ public string ModInfoStringFromGroupedFiles(List spectraFiles) var posMods = proteinModsDict[modpos]; foreach (var mod in posMods.Keys) { + // Remove common mods, peptide terminus mods not in protein, and mods with 0 or NaN intensity to not be written. if (mod.Contains("Common Variable") || mod.Contains("Common Fixed") || mod.Contains("PeptideTermMod") - || !posMods[mod].IsFinite()) + || !posMods[mod].IsFinite() + || posMods[mod]==0) { modsToRemove.Add((protein, modpos, mod)); } From d521c0af0745c1d349cd6b38d2a1953939ca320f Mon Sep 17 00:00:00 2001 From: Peter Cruz Parrilla Date: Wed, 10 Dec 2025 12:14:53 -0600 Subject: [PATCH 14/30] made filter by common variable and common fixed boolean. Need to link it to gui options. default will be to still write those mods out. --- MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs b/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs index 3bdedee7f8..bf0db66b7b 100644 --- a/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs +++ b/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs @@ -433,6 +433,8 @@ public string ModInfoStringFromGroupedFiles(List spectraFiles) // Also remove any modifications with NaN occupancies. var modsToRemove = new List<(QuantifiedProtein proteinKey, int modposKey, string modnameKey)>(); + bool filterByCommonMods = false; + foreach (var protein in proteinGroupOccupanciesPerProtein.Keys) { var proteinModsDict = proteinGroupOccupanciesPerProtein[protein]; @@ -442,10 +444,9 @@ public string ModInfoStringFromGroupedFiles(List spectraFiles) foreach (var mod in posMods.Keys) { // Remove common mods, peptide terminus mods not in protein, and mods with 0 or NaN intensity to not be written. - if (mod.Contains("Common Variable") - || mod.Contains("Common Fixed") + if (((mod.Contains("Common Variable") && filterByCommonMods) || (mod.Contains("Common Fixed")) && filterByCommonMods) || mod.Contains("PeptideTermMod") - || !posMods[mod].IsFinite() + || !posMods[mod].IsFinite() // When all peptides containing a localized residue have intensities of 0, it leads to 0/0=NaN occupancies. || posMods[mod]==0) { modsToRemove.Add((protein, modpos, mod)); From f7938d83fbccff8381bb776931d70b5440f66708 Mon Sep 17 00:00:00 2001 From: pcruzparri Date: Mon, 23 Mar 2026 11:39:25 -0500 Subject: [PATCH 15/30] temp: integrating ScanMetadata --- .../EngineLayer/Ms2ScanWithSpecificMass.cs | 25 ++++++ MetaMorpheus/EngineLayer/SpectralMatch.cs | 88 ++++++++----------- .../EngineLayer/Util/IsobaricMassTag.cs | 2 +- .../SearchTask/PostSearchAnalysisTask.cs | 6 +- 4 files changed, 67 insertions(+), 54 deletions(-) diff --git a/MetaMorpheus/EngineLayer/Ms2ScanWithSpecificMass.cs b/MetaMorpheus/EngineLayer/Ms2ScanWithSpecificMass.cs index f3cb9127f6..6f25d597bb 100644 --- a/MetaMorpheus/EngineLayer/Ms2ScanWithSpecificMass.cs +++ b/MetaMorpheus/EngineLayer/Ms2ScanWithSpecificMass.cs @@ -24,6 +24,23 @@ public Ms2ScanWithSpecificMass(MsDataScan mzLibScan, double precursorMonoisotopi TheScan = mzLibScan; + // Build the lightweight metadata snapshot + ScanMetadata = new ScanMetadata( + OneBasedScanNumber: mzLibScan.OneBasedScanNumber, + OneBasedPrecursorScanNumber: mzLibScan.OneBasedPrecursorScanNumber, + RetentionTime: mzLibScan.RetentionTime, + NumPeaks: mzLibScan.MassSpectrum.Size, + TotalIonCurrent: mzLibScan.TotalIonCurrent, + NativeId: mzLibScan.NativeId, + FullFilePath: fullFilePath, + PrecursorCharge: precursorCharge, + PrecursorMonoisotopicPeakMz: precursorMonoisotopicPeakMz, + PrecursorMass: PrecursorMass, + PrecursorIntensity: PrecursorIntensity, + PrecursorEnvelopePeakCount: PrecursorEnvelopePeakCount, + PrecursorFractionalIntensity: PrecursorFractionalIntensity, + OneOverK0: mzLibScan is TimsDataScan tims ? tims.OneOverK0 : null); + if (commonParam.DissociationType != DissociationType.LowCID) { ExperimentalFragments = neutralExperimentalFragments ?? GetNeutralExperimentalFragments(mzLibScan, commonParam); @@ -39,6 +56,14 @@ public Ms2ScanWithSpecificMass(MsDataScan mzLibScan, double precursorMonoisotopi } public MsDataScan TheScan { get; } + + /// + /// Lightweight, immutable snapshot of scan and precursor metadata. + /// Designed to be passed to SpectralMatch so the heavyweight scan objects + /// can be released from memory after scoring. + /// + public ScanMetadata ScanMetadata { get; } + public double PrecursorMonoisotopicPeakMz { get; } public double PrecursorMass { get; } public int PrecursorCharge { get; } diff --git a/MetaMorpheus/EngineLayer/SpectralMatch.cs b/MetaMorpheus/EngineLayer/SpectralMatch.cs index 393427e4d9..30511572c8 100644 --- a/MetaMorpheus/EngineLayer/SpectralMatch.cs +++ b/MetaMorpheus/EngineLayer/SpectralMatch.cs @@ -22,32 +22,11 @@ protected SpectralMatch(IBioPolymerWithSetMods peptide, int notch, double score, { _BestMatchingBioPolymersWithSetMods = new List(); ScanIndex = scanIndex; - FullFilePath = scan.FullFilePath; - ScanNumber = scan.OneBasedScanNumber; - PrecursorScanNumber = scan.OneBasedPrecursorScanNumber; - ScanRetentionTime = scan.RetentionTime; - ScanExperimentalPeaks = scan.NumPeaks; - PrecursorScanIntensity = scan.PrecursorIntensity; - TotalIonCurrent = scan.TotalIonCurrent; - ScanPrecursorCharge = scan.PrecursorCharge; - ScanPrecursorMonoisotopicPeakMz = scan.PrecursorMonoisotopicPeakMz; - ScanPrecursorMass = scan.PrecursorMass; - PrecursorScanEnvelopePeakCount = scan.PrecursorEnvelopePeakCount; - PrecursorFractionalIntensity = scan.PrecursorFractionalIntensity; + ScanMetadata = scan.ScanMetadata; DigestionParams = commonParameters.DigestionParams; - NativeId = scan.NativeId; RunnerUpScore = commonParameters.ScoreCutoff; SpectralAngle = -1; - IsobaricMassTagReporterIonIntensities = scan.IsobaricMassTagReporterIonIntensities; - - if (scan.TheScan is TimsDataScan timsScan) - { - ScanOneOverK0 = timsScan.OneOverK0; - } - else - { - ScanOneOverK0 = null; // this is only used for ion mobility data, so it can be null - } + ReporterIonIntensities = scan.IsobaricMassTagReporterIonIntensities; AddOrReplace(peptide, score, notch, true, matchedFragmentIons); } @@ -68,19 +47,34 @@ protected SpectralMatch(IBioPolymerWithSetMods peptide, int notch, double score, public int PsmCount { get; internal set; } public Dictionary ModsIdentified { get; private set; } // these should never be null under normal circumstances public List LocalizedScores { get; internal set; } - public int ScanNumber { get; } - public int? PrecursorScanNumber { get; } - public double ScanRetentionTime { get; } - public int ScanExperimentalPeaks { get; } - public double TotalIonCurrent { get; } - public int ScanPrecursorCharge { get; } - public double ScanPrecursorMonoisotopicPeakMz { get; } - public double PrecursorScanIntensity { get; } - public int PrecursorScanEnvelopePeakCount { get; } - public double PrecursorFractionalIntensity { get; } - public double ScanPrecursorMass { get; } - public double? ScanOneOverK0 { get; set; } // this is only used for ion mobility data, so it can be null - public string FullFilePath { get; private set; } + + #region Scan metadata — delegated to ScanMetadata record + + /// + /// Lightweight, immutable snapshot of scan and precursor metadata. + /// Replaces the individual scan-derived fields that were previously unpacked + /// from Ms2ScanWithSpecificMass during construction. + /// + public ScanMetadata ScanMetadata { get; } + + // Pass-through properties for backwards compatibility. + // Callers can be migrated to ScanMetadata.* over time. + public int ScanNumber => ScanMetadata.OneBasedScanNumber; + public int? PrecursorScanNumber => ScanMetadata.OneBasedPrecursorScanNumber; + public double ScanRetentionTime => ScanMetadata.RetentionTime; + public int ScanExperimentalPeaks => ScanMetadata.NumPeaks; + public double TotalIonCurrent => ScanMetadata.TotalIonCurrent; + public int ScanPrecursorCharge => ScanMetadata.PrecursorCharge; + public double ScanPrecursorMonoisotopicPeakMz => ScanMetadata.PrecursorMonoisotopicPeakMz; + public double PrecursorScanIntensity => ScanMetadata.PrecursorIntensity; + public int PrecursorScanEnvelopePeakCount => ScanMetadata.PrecursorEnvelopePeakCount; + public double PrecursorFractionalIntensity => ScanMetadata.PrecursorFractionalIntensity; + public double ScanPrecursorMass => ScanMetadata.PrecursorMass; + public double? ScanOneOverK0 => ScanMetadata.OneOverK0; + public string FullFilePath => ScanMetadata.FullFilePath; + public string NativeId => ScanMetadata.NativeId; + + #endregion /// /// Refers to the index of the Ms2ScanWithSpecificMass in an array of Ms2ScansWithSpecificMass that is sorted by precursor mass /// @@ -103,7 +97,6 @@ public FdrInfo GetFdrInfo(bool peptideLevel) public double Score { get; private set; } public double SpectralAngle { get; set; } - public string NativeId; // this is a property of the scan. used for mzID writing public double DeltaScore { get { return (Score - RunnerUpScore); } } @@ -158,10 +151,11 @@ public void SetMs2Scan(MsDataScan scan) protected List _BestMatchingBioPolymersWithSetMods; /// - /// An array containing the intensities of the reporter ions for isobaric mass tags. - /// If multiplex quantification wasn't performed, this will be null + /// An array containing the intensities of the reporter ions for isobaric mass tags (TMT, iTRAQ, diLeu, etc.). + /// Null if multiplex quantification wasn't performed. + /// Array order matches the reporter ion order defined by the mass tag modification. /// - public double[]? IsobaricMassTagReporterIonIntensities { get; private set; } + public double[]? ReporterIonIntensities { get; private set; } public IEnumerable BestMatchingBioPolymersWithSetMods { @@ -381,6 +375,10 @@ protected SpectralMatch(SpectralMatch psm, List bestMat BaseSequence = PsmTsvWriter.Resolve(bestMatchingPeptides.Select(b => b.SpecificBioPolymer.BaseSequence)).ResolvedValue; FullSequence = PsmTsvWriter.Resolve(bestMatchingPeptides.Select(b => b.SpecificBioPolymer.FullSequence)).ResolvedValue; + // Scan metadata is an immutable record — safe to share the reference + ScanMetadata = psm.ScanMetadata; + ScanIndex = psm.ScanIndex; + ModsChemicalFormula = psm.ModsChemicalFormula; Notch = psm.Notch; BioPolymerWithSetModsLength = psm.BioPolymerWithSetModsLength; @@ -394,16 +392,6 @@ protected SpectralMatch(SpectralMatch psm, List bestMat PsmCount = psm.PsmCount; ModsIdentified = psm.ModsIdentified; LocalizedScores = psm.LocalizedScores; - ScanNumber = psm.ScanNumber; - PrecursorScanNumber = psm.PrecursorScanNumber; - ScanRetentionTime = psm.ScanRetentionTime; - ScanExperimentalPeaks = psm.ScanExperimentalPeaks; - TotalIonCurrent = psm.TotalIonCurrent; - ScanPrecursorCharge = psm.ScanPrecursorCharge; - ScanPrecursorMonoisotopicPeakMz = psm.ScanPrecursorMonoisotopicPeakMz; - ScanPrecursorMass = psm.ScanPrecursorMass; - FullFilePath = psm.FullFilePath; - ScanIndex = psm.ScanIndex; FdrInfo = psm.FdrInfo; Score = psm.Score; RunnerUpScore = psm.RunnerUpScore; diff --git a/MetaMorpheus/EngineLayer/Util/IsobaricMassTag.cs b/MetaMorpheus/EngineLayer/Util/IsobaricMassTag.cs index 12ff97e230..66d36edc16 100644 --- a/MetaMorpheus/EngineLayer/Util/IsobaricMassTag.cs +++ b/MetaMorpheus/EngineLayer/Util/IsobaricMassTag.cs @@ -24,7 +24,7 @@ public enum IsobaricMassTagType /// This class contains information about the Isobaric Mass Tag (e.g., TMT), including the theoretical m/z values of the reporter ions /// as well as methods designed to retrieve the intensities of those reporter ions from a given MzSpectrum. /// It does not store any intensity information itself. Intensity information is associated with each Ms2ScanWithSpecificMass object or SpectralMatch, - /// in the IsobaricMassTagReporterIonIntensities property. + /// in the IsobaricMassTagReporterIonIntensities property (Ms2ScanWithSpecificMass) or ReporterIonIntensities property (SpectralMatch). /// public class IsobaricMassTag { diff --git a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs index 08c6fa1a9e..9fbc8eb391 100644 --- a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs +++ b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs @@ -1171,12 +1171,12 @@ private void WritePsmPlusMultiplexIons(IEnumerable psms, string f sb.Append(psm.ToString(Parameters.SearchParameters.ModsToWriteSelection, writePeptideLevelResults).Trim()); sb.Append('\t'); - if (psm.IsobaricMassTagReporterIonIntensities != null && psm.IsobaricMassTagReporterIonIntensities.Length > 0) + if (psm.ReporterIonIntensities != null && psm.ReporterIonIntensities.Length > 0) { - for (int i = 0; i < psm.IsobaricMassTagReporterIonIntensities.Length; i++) + for (int i = 0; i < psm.ReporterIonIntensities.Length; i++) { if (i > 0) sb.Append('\t'); - sb.Append(psm.IsobaricMassTagReporterIonIntensities[i].ToString("F1", CultureInfo.InvariantCulture)); + sb.Append(psm.ReporterIonIntensities[i].ToString("F1", CultureInfo.InvariantCulture)); } } else From c759f00d0d40446f2f97db2482d3fa6841a4f253 Mon Sep 17 00:00:00 2001 From: Peter Cruz Parrilla Date: Mon, 23 Mar 2026 12:35:44 -0500 Subject: [PATCH 16/30] git commit fixing mzlib/mm modification change causing integration issue. --- MetaMorpheus/EngineLayer/GlobalVariables.cs | 3 ++- .../SearchTask/PostSearchAnalysisTask.cs | 17 ++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/MetaMorpheus/EngineLayer/GlobalVariables.cs b/MetaMorpheus/EngineLayer/GlobalVariables.cs index e1a668a9ca..a851df5c0f 100644 --- a/MetaMorpheus/EngineLayer/GlobalVariables.cs +++ b/MetaMorpheus/EngineLayer/GlobalVariables.cs @@ -17,6 +17,7 @@ using TopDownProteomics; using Transcriptomics.Digestion; using UsefulProteomicsDatabases; +using Omics.Modifications.IO; namespace EngineLayer { @@ -53,7 +54,7 @@ public static class GlobalVariables public static GlobalSettings GlobalSettings { get; set; } public static IEnumerable UnimodDeserialized { get; private set; } public static IEnumerable UniprotDeseralized { get; private set; } - public static UsefulProteomicsDatabases.Generated.obo PsiModDeserialized { get; private set; } + public static obo PsiModDeserialized { get; private set; } public static IEnumerable AllModsKnown { get { return _AllModsKnown.AsEnumerable(); } } public static IEnumerable AllRnaModsKnown { get { return _AllRnaModsKnown.AsEnumerable(); } } public static IEnumerable AllModTypesKnown { get { return _AllModTypesKnown.AsEnumerable(); } } diff --git a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs index 0f82a7e1af..a95f0051ac 100644 --- a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs +++ b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs @@ -16,7 +16,6 @@ using System.IO.Compression; using System.Linq; using System.Text; -using Chemistry; using EngineLayer.DatabaseLoading; using MzLibUtil; using MzLibUtil.PositionFrequencyAnalysis; @@ -1334,11 +1333,11 @@ private void WriteVariantResults() foreach (var variant in variants) { - if (variantPWSM.IntersectsAndIdentifiesVariation(variant).identifies == true && variant.Description.Description.IsNotNullOrEmpty()) + if (variantPWSM.IntersectsAndIdentifiesVariation(variant).identifies == true && variant.Description.IsNotNullOrEmpty()) { - if (culture.CompareInfo.IndexOf(variant.Description.Description, "missense_variant", CompareOptions.IgnoreCase) >= 0) + if (culture.CompareInfo.IndexOf(variant.Description, "missense_variant", CompareOptions.IgnoreCase) >= 0) { - if (variant.Description.ReferenceAlleleString.Length == 1 && variant.Description.AlternateAlleleString.Length == 1) + if (variant.VariantCallFormatDataString.ReferenceAlleleString.Length == 1 && variant.VariantCallFormatDataString.AlternateAlleleString.Length == 1) { if (SNVmissenseIdentified == false) { @@ -1357,7 +1356,7 @@ private void WriteVariantResults() MNVmissenseVariants.AddOrCreate(variantPWSM.Protein, variant); } } - else if (culture.CompareInfo.IndexOf(variant.Description.Description, "frameshift_variant", CompareOptions.IgnoreCase) >= 0) + else if (culture.CompareInfo.IndexOf(variant.Description, "frameshift_variant", CompareOptions.IgnoreCase) >= 0) { if (frameshiftIdentified == false) { @@ -1366,7 +1365,7 @@ private void WriteVariantResults() } frameshiftVariants.AddOrCreate(variantPWSM.Protein, variant); } - else if (culture.CompareInfo.IndexOf(variant.Description.Description, "stop_gained", CompareOptions.IgnoreCase) >= 0) + else if (culture.CompareInfo.IndexOf(variant.Description, "stop_gained", CompareOptions.IgnoreCase) >= 0) { if (stopGainIdentified == false) { @@ -1375,7 +1374,7 @@ private void WriteVariantResults() } stopGainVariants.AddOrCreate(variantPWSM.Protein, variant); } - else if ((culture.CompareInfo.IndexOf(variant.Description.Description, "conservative_inframe_insertion", CompareOptions.IgnoreCase) >= 0) || (culture.CompareInfo.IndexOf(variant.Description.Description, "disruptive_inframe_insertion", CompareOptions.IgnoreCase) >= 0)) + else if ((culture.CompareInfo.IndexOf(variant.Description, "conservative_inframe_insertion", CompareOptions.IgnoreCase) >= 0) || (culture.CompareInfo.IndexOf(variant.Description, "disruptive_inframe_insertion", CompareOptions.IgnoreCase) >= 0)) { if (insertionIdentified == false) { @@ -1384,7 +1383,7 @@ private void WriteVariantResults() } insertionVariants.AddOrCreate(variantPWSM.Protein, variant); } - else if ((culture.CompareInfo.IndexOf(variant.Description.Description, "conservative_inframe_deletion", CompareOptions.IgnoreCase) >= 0) || (culture.CompareInfo.IndexOf(variant.Description.Description, "disruptive_inframe_deletion", CompareOptions.IgnoreCase) >= 0)) + else if ((culture.CompareInfo.IndexOf(variant.Description, "conservative_inframe_deletion", CompareOptions.IgnoreCase) >= 0) || (culture.CompareInfo.IndexOf(variant.Description, "disruptive_inframe_deletion", CompareOptions.IgnoreCase) >= 0)) { if (deletionIdentified == false) { @@ -1393,7 +1392,7 @@ private void WriteVariantResults() } deletionVariants.AddOrCreate(variantPWSM.Protein, variant); } - else if (culture.CompareInfo.IndexOf(variant.Description.Description, "stop_loss", CompareOptions.IgnoreCase) >= 0) + else if (culture.CompareInfo.IndexOf(variant.Description, "stop_loss", CompareOptions.IgnoreCase) >= 0) { if (stopLossIdentifed == false) { From 47356e66838a28328b9fb31f2a0581d0479925a3 Mon Sep 17 00:00:00 2001 From: Peter Cruz Parrilla Date: Mon, 23 Mar 2026 15:53:17 -0500 Subject: [PATCH 17/30] time stamping updates to proteingroup implementing biopolymergroup. --- .../ProteinParsimony/ProteinGroup.cs | 671 +++++------------- 1 file changed, 197 insertions(+), 474 deletions(-) diff --git a/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs b/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs index 235b062fd3..cc8b6a8137 100644 --- a/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs +++ b/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs @@ -1,4 +1,4 @@ -using Proteomics; +using Proteomics; using FlashLFQ; using System.Collections.Generic; using System.IO; @@ -6,8 +6,8 @@ using System.Text; using MassSpectrometry; using Omics.Modifications; -using ThermoFisher.CommonCore.Data; using Omics; +using Omics.BioPolymerGroup; using Transcriptomics.Digestion; using MzLibUtil; using MzLibUtil.PositionFrequencyAnalysis; @@ -15,133 +15,136 @@ namespace EngineLayer { - public class ProteinGroup + /// + /// MetaMorpheus-specific protein group, extending the generic BioPolymerGroup from mzLib. + /// Adds PEP-based scoring, SILAC-aware peptide output, and MetaMorpheus-specific TSV column names. + /// Quantification and modification occupancy are handled by the base class's SampleGroupResult system. + /// + /// Backward-compatible alias properties (e.g., Proteins, AllPeptides, ProteinGroupScore) delegate + /// to the corresponding BioPolymerGroup base properties. Consumers should gradually migrate to + /// the base class names. + /// + /// NOTE: AllPsmsBelowOnePercentFDR is shadowed here as HashSet<SpectralMatch> because + /// MetaMorpheus's SpectralMatch does not yet implement mzLib's ISpectralMatch interface. + /// When SpectralMatch is updated to implement ISpectralMatch, this shadow and the related + /// method shadows (Score, CalculateSequenceCoverage) can be removed. + /// + public class ProteinGroup : BioPolymerGroup { public ProteinGroup(HashSet proteins, HashSet peptides, HashSet uniquePeptides) + : base(proteins, peptides, uniquePeptides) { - Proteins = proteins; - ListOfProteinsOrderedByAccession = Proteins.OrderBy(p => p.Accession).ToList(); - ProteinGroupName = string.Join("|", ListOfProteinsOrderedByAccession.Select(p => p.Accession)); - AllPeptides = peptides; - UniquePeptides = uniquePeptides; AllPsmsBelowOnePercentFDR = new HashSet(); SequenceCoverageFraction = new List(); SequenceCoverageDisplayList = new List(); SequenceCoverageDisplayListWithMods = new List(); FragmentSequenceCoverageDisplayList = new List(); - ProteinGroupScore = 0; BestPeptideScore = 0; - QValue = 0; - IsDecoy = false; - IsContaminant = false; - - // if any of the proteins in the protein group are decoys, the protein group is a decoy - foreach (var protein in proteins) - { - if (protein.IsDecoy) - { - IsDecoy = true; - break; - } - - if (protein.IsContaminant) - { - IsContaminant = true; - break; - } - } } - public bool IsDecoy { get; } + #region Backward-Compatible Property Aliases - public bool IsContaminant { get; } + /// Maps to . + public HashSet Proteins + { + get => BioPolymers; + set => BioPolymers = value; + } - public List FilesForQuantification { get; set; } + /// Maps to . + public string ProteinGroupName => BioPolymerGroupName; - public HashSet Proteins { get; set; } + /// Maps to . + public double ProteinGroupScore + { + get => BioPolymerGroupScore; + set => BioPolymerGroupScore = value; + } - public string ProteinGroupName { get; private set; } + /// Maps to . + public HashSet AllPeptides + { + get => AllBioPolymersWithSetMods; + set => AllBioPolymersWithSetMods = value; + } - public double ProteinGroupScore { get; set; } + /// Maps to . + public HashSet UniquePeptides + { + get => UniqueBioPolymersWithSetMods; + set => UniqueBioPolymersWithSetMods = value; + } - public HashSet AllPeptides { get; set; } + /// Maps to . + public double BestPeptideScore + { + get => BestBioPolymerWithSetModsScore; + set => BestBioPolymerWithSetModsScore = value; + } - public HashSet UniquePeptides { get; set; } - /// - /// Contains all PSMs associated with this protein group that pass the configured quality threshold. - /// The specific filtering criteria depends on the and threshold passed to - /// during protein scoring: - /// - /// - /// - /// PSMs where QValue ≤ threshold AND QValueNotch ≤ threshold - /// - /// - /// - /// PSMs where PEP_QValue ≤ threshold - /// - /// - /// The default threshold is 0.01 (1% FDR), but this can vary based on the filter configuration. - /// This collection is populated during - /// and is used for: - /// - /// Calculating the via the method - /// Determining , , and - /// Computing sequence coverage in - /// Reporting the number of PSMs in protein group output - /// - /// - /// - /// Note: The property name "AllPsmsBelowOnePercentFDR" is a legacy name. The actual threshold - /// used is determined by the FilterThreshold parameter passed to ProteinScoringAndFdrEngine. - /// - public HashSet AllPsmsBelowOnePercentFDR { get; set; } + /// Maps to . + public double BestPeptideQValue + { + get => BestBioPolymerWithSetModsQValue; + set => BestBioPolymerWithSetModsQValue = value; + } - public List SequenceCoverageFraction { get; private set; } + /// Maps to . + public List ListOfProteinsOrderedByAccession => ListOfBioPolymersOrderedByAccession; - public List SequenceCoverageDisplayList { get; private set; } + /// Maps to . Filtered to SpectraFileInfo. + public List FilesForQuantification + { + get => SamplesForQuantification?.OfType().ToList(); + set => SamplesForQuantification = value?.Cast().ToList(); + } - public List SequenceCoverageDisplayListWithMods { get; private set; } + /// Maps to . Keyed by SpectraFileInfo. + public Dictionary IntensitiesByFile + { + get => IntensitiesBySample?.ToDictionary(kvp => (SpectraFileInfo)kvp.Key, kvp => kvp.Value); + set => IntensitiesBySample = value?.ToDictionary(kvp => (ISampleInfo)kvp.Key, kvp => kvp.Value); + } - public List FragmentSequenceCoverageDisplayList { get; private set; } + #endregion - public double QValue { get; set; } + #region MetaMorpheus-Specific Properties - public double BestPeptideQValue { get; set; } + /// + /// Shadowed as HashSet<SpectralMatch> because MM's SpectralMatch does not implement ISpectralMatch. + /// Remove this shadow when SpectralMatch implements ISpectralMatch. + /// + public new HashSet AllPsmsBelowOnePercentFDR { get; set; } /// /// The minimum Posterior Error Probability (PEP) among all PSMs in . - /// Lower values indicate higher confidence that the best peptide identification is correct. - /// This value is populated during and is used - /// for protein group ranking when using PEP-based filtering (). + /// Lower values indicate higher confidence. Populated during protein FDR and used for PEP-based ranking. /// public double BestPeptidePEP { get; set; } - public double BestPeptideScore { get; set; } - - public int CumulativeTarget { get; set; } - - public int CumulativeDecoy { get; set; } - - public bool DisplayModsOnPeptides { get; set; } - - public Dictionary ModsInfo { get; set; } - public Dictionary IntensitiesByFile { get; set; } - - private List ListOfProteinsOrderedByAccession; + // Sequence coverage stored as flat lists (MM-specific format). + // BioPolymerGroup uses CoverageResult instead; these are kept for TSV output compatibility. + public List SequenceCoverageFraction { get; private set; } + public List SequenceCoverageDisplayList { get; private set; } + public List SequenceCoverageDisplayListWithMods { get; private set; } + public List FragmentSequenceCoverageDisplayList { get; private set; } private string UniquePeptidesOutput; private string SharedPeptidesOutput; - //Get unique and identified peptides for output - //Convert the output if it's a SILAC experiment + #endregion + + #region Peptide Output + + /// + /// Populates unique and shared peptide output strings, converting to light SILAC sequences if needed. + /// public void GetIdentifiedPeptidesOutput(List labels) { var SharedPeptides = AllPeptides.Except(UniquePeptides); if (labels == null) { - //TODO add unit test with displaymodsonpeptides if (!DisplayModsOnPeptides) { UniquePeptidesOutput = @@ -184,7 +187,15 @@ public void GetIdentifiedPeptidesOutput(List labels) } } - public string GetTabSeparatedHeader() + #endregion + + #region TSV Output (MetaMorpheus column names, base quantification format) + + /// + /// MetaMorpheus TSV header with "Protein" column names and BestPeptidePEP. + /// Quantification/occupancy columns use the base BioPolymerGroup SampleGroupResult format. + /// + public new string GetTabSeparatedHeader() { var sb = new StringBuilder(); sb.Append("Protein Accession" + '\t'); @@ -201,48 +212,24 @@ public string GetTabSeparatedHeader() sb.Append("Sequence Coverage" + '\t'); sb.Append("Sequence Coverage with Mods" + '\t'); sb.Append("Fragment Sequence Coverage" + '\t'); - //sb.Append("Modification Info List" + "\t"); - if (IntensitiesByFile != null && FilesForQuantification != null) + // Quantification and occupancy columns from base SampleGroupResult system + if (SampleGroupResults.IsNullOrEmpty()) PopulateSampleGroupResults(); + + if (SampleGroupResults != null) { - bool unfractionated = FilesForQuantification.Select(p => p.Fraction).Distinct().Count() == 1; - bool conditionsUndefined = FilesForQuantification.All(p => string.IsNullOrEmpty(p.Condition)); - - // this is a hacky way to test for SILAC-labeled data... - // Currently SILAC will report 1 column of intensities per label per spectra file, and is NOT summarized - // into biorep-level intensity values. the SILAC code uses the "condition" field to organize this info, - // even if the experimental design is not defined by the user. So the following bool is a way to distinguish - // between experimental design being used in SILAC automatically vs. being defined by the user - bool silacExperimentalDesign = - FilesForQuantification.Any(p => !File.Exists(p.FullFilePathWithExtension)); - - foreach (var sampleGroup in FilesForQuantification.GroupBy(p => p.Condition)) + foreach (var group in SampleGroupResults) { - foreach (var sample in sampleGroup.GroupBy(p => p.BiologicalReplicate).OrderBy(p => p.Key)) - { - if ((conditionsUndefined && unfractionated) || silacExperimentalDesign) - { - // if the data is unfractionated and the conditions haven't been defined, just use the file name as the intensity header - sb.Append("Mods_" + sample.First().FilenameWithoutExtension + "\t"); - sb.Append("Intensity_" + sample.First().FilenameWithoutExtension + "\t"); - } - else - { - // if the data is fractionated and/or the conditions have been defined, label the header w/ the condition and biorep number - sb.Append("Mods_" + sample.First().Condition + "_" + - (sample.First().BiologicalReplicate + 1) + "\t"); - sb.Append("Intensity_" + sample.First().Condition + "_" + - (sample.First().BiologicalReplicate + 1) + "\t"); - } - } + sb.Append($"SpectralCount_{group.Label}\t"); + if (group.HasIntensityData) + sb.Append($"Intensity_{group.Label}\t"); + sb.Append($"CountOccupancy_{group.Label}\t"); + if (group.HasIntensityData) + sb.Append($"IntensityOccupancy_{group.Label}\t"); } } - else - { - sb.Append("\t"); - } - sb.Append("Number of PSMs" + '\t'); + sb.Append("Number of PSMs" + '\t'); sb.Append("Protein Decoy/Contaminant/Target" + '\t'); sb.Append("Protein Cumulative Target" + '\t'); sb.Append("Protein Cumulative Decoy" + '\t'); @@ -257,7 +244,7 @@ public override string ToString() { var sb = new StringBuilder(); - // list of protein accession numbers + // protein accessions sb.Append(ProteinGroupName); sb.Append("\t"); @@ -271,12 +258,12 @@ public override string ToString() ListOfProteinsOrderedByAccession.Select(p => p.Organism).Distinct()))); sb.Append("\t"); - // list of protein names + // full names sb.Append(GlobalVariables.CheckLengthOfOutput(string.Join("|", ListOfProteinsOrderedByAccession.Select(p => p.FullName).Distinct()))); sb.Append("\t"); - // list of masses + // masses var sequences = ListOfProteinsOrderedByAccession.Select(p => p.BaseSequence).Distinct(); List masses = new List(); foreach (var sequence in sequences) @@ -287,14 +274,12 @@ public override string ToString() masses.Add(new OligoWithSetMods(sequence, GlobalVariables.AllRnaModsKnownDictionary).MonoisotopicMass); else masses.Add(new Proteomics.AminoAcidPolymer.Peptide(sequence).MonoisotopicMass); - } catch (System.Exception) { masses.Add(double.NaN); } } - sb.Append(GlobalVariables.CheckLengthOfOutput(string.Join("|", masses))); sb.Append("\t"); @@ -302,284 +287,128 @@ public override string ToString() sb.Append("" + Proteins.Count); sb.Append("\t"); - // list of unique peptides + // unique peptides if (UniquePeptidesOutput != null) { sb.Append(GlobalVariables.CheckLengthOfOutput(UniquePeptidesOutput)); } - sb.Append("\t"); - // list of shared peptides + // shared peptides if (SharedPeptidesOutput != null) { sb.Append(GlobalVariables.CheckLengthOfOutput(SharedPeptidesOutput)); } - sb.Append("\t"); // number of peptides if (!DisplayModsOnPeptides) - { sb.Append("" + AllPeptides.Select(p => p.BaseSequence).Distinct().Count()); - } else - { sb.Append("" + AllPeptides.Select(p => p.FullSequence).Distinct().Count()); - } - sb.Append("\t"); // number of unique peptides if (!DisplayModsOnPeptides) - { sb.Append("" + UniquePeptides.Select(p => p.BaseSequence).Distinct().Count()); - } else - { sb.Append("" + UniquePeptides.Select(p => p.FullSequence).Distinct().Count()); - } - sb.Append("\t"); - // sequence coverage percent + // sequence coverage sb.Append(GlobalVariables.CheckLengthOfOutput(string.Join("|", SequenceCoverageFraction.Select(p => string.Format("{0:0.#####}", p))))); sb.Append("\t"); - - // sequence coverage sb.Append(GlobalVariables.CheckLengthOfOutput(string.Join("|", SequenceCoverageDisplayList))); sb.Append("\t"); - - // sequence coverage with mods sb.Append(GlobalVariables.CheckLengthOfOutput(string.Join("|", SequenceCoverageDisplayListWithMods))); sb.Append("\t"); - - // fragment sequence coverage sb.Append(GlobalVariables.CheckLengthOfOutput(string.Join("|", FragmentSequenceCoverageDisplayList))); sb.Append("\t"); - //Detailed mods information list - //sb.Append(GlobalVariables.CheckLengthOfOutput(string.Join("|", ModsInfo))); - //sb.Append("\t"); + // Quantification and occupancy from base SampleGroupResult system + if (SampleGroupResults.IsNullOrEmpty()) PopulateSampleGroupResults(); - // MS1 intensity and mod stoichiometry (retrieved from FlashLFQ in the SearchTask) - if (IntensitiesByFile != null && FilesForQuantification != null) + if (SampleGroupResults != null) { - foreach (var sampleGroup in FilesForQuantification.GroupBy(p => p.Condition)) + bool isProteinLevel = GroupType == BioPolymerGroupType.Protein; + IEnumerable orderedKeys = isProteinLevel + ? ListOfProteinsOrderedByAccession.Select(p => p.Accession) + : AllPeptides.Select(p => p.BaseSequence).Distinct().OrderBy(s => s); + + foreach (var group in SampleGroupResults) { - foreach (var sample in sampleGroup.GroupBy(p => p.BiologicalReplicate).OrderBy(p => p.Key)) + sb.Append(group.SpectralCount); + sb.Append("\t"); + + if (group.HasIntensityData) { - sb.Append(ModInfoStringFromGroupedFiles(sample.ToList())); + if (group.Intensity > 0) + sb.Append(group.Intensity); sb.Append("\t"); + } - // if the samples are fractionated, the protein will only have 1 intensity in the first fraction - // and the other fractions will be zero. we could find the first/only fraction with an intensity, - // but simply summing the fractions is easier than finding the single non-zero value - double summedIntensity = sample.Sum(file => IntensitiesByFile[file]); - - if (summedIntensity > 0) - { - sb.Append(summedIntensity); - } + sb.Append(GlobalVariables.CheckLengthOfOutput(group.FormatCountOccupancy(orderedKeys, isProteinLevel))); + sb.Append("\t"); + if (group.HasIntensityData) + { + sb.Append(GlobalVariables.CheckLengthOfOutput(group.FormatIntensityOccupancy(orderedKeys, isProteinLevel))); sb.Append("\t"); } } } - else - { - sb.Append("\t"); - } - // number of PSMs for listed peptides - sb.Append("" + AllPsmsBelowOnePercentFDR.Count); + // number of PSMs + sb.Append("" + AllPsmsBelowOnePercentFDR.Count); sb.Append("\t"); - // isDecoy + // decoy/contaminant/target if (IsDecoy) - { sb.Append("D"); - } else if (IsContaminant) - { sb.Append("C"); - } else - { sb.Append("T"); - } - sb.Append("\t"); - // cumulative target + // cumulative target/decoy sb.Append(CumulativeTarget); sb.Append("\t"); - - // cumulative decoy sb.Append(CumulativeDecoy); sb.Append("\t"); - // q value + // q value, best peptide score, best peptide q value, best peptide PEP sb.Append(QValue); sb.Append("\t"); - - // best peptide score sb.Append(BestPeptideScore); sb.Append("\t"); - - // best peptide q value sb.Append(BestPeptideQValue); sb.Append("\t"); - - // best peptide PEP sb.Append(BestPeptidePEP); return sb.ToString(); } - // internal method for grouping ModsInfo by file - public string ModInfoStringFromGroupedFiles(List spectraFiles) - { - if (ModsInfo.IsNotNullOrEmpty()) - { - - // Create a combined quantified protein group for all fraction/techrep - var groupedFilesQuantifiedProteinGroup = ModsInfo[spectraFiles.First()]; - foreach (var spectraFile in spectraFiles.Skip(1)) - { - foreach (var protein in ModsInfo[spectraFile].Proteins.Values) - { - foreach (var peptide in protein.Peptides.Values) - { - // TODO: If all flashlfq quantified peptides have the same spectrafiles, I can get rid of this condition. - // Need to also double check that merging of peptides is properly updating total position intensity. If so, - // only write mods with finite, >0 intensities. - if (groupedFilesQuantifiedProteinGroup.Proteins[protein.Accession].Peptides.ContainsKey(peptide.BaseSequence)) - { - groupedFilesQuantifiedProteinGroup.Proteins[protein.Accession].Peptides[peptide.BaseSequence].MergePeptide(peptide); - } - else - { - groupedFilesQuantifiedProteinGroup.Proteins[protein.Accession].Peptides[peptide.BaseSequence] = peptide; - } - } - } - } - - var proteinGroupOccupanciesPerProtein = groupedFilesQuantifiedProteinGroup.Proteins.Values.Select(x => new KeyValuePair>> - (x, x.GetModStoichiometryFromProteinMods())).ToDictionary(x => x.Key, x => x.Value); - - // Clean stoichiometry results by removing "Common Variable", "Common Fixed", - // and "PeptideTermMod" modification types. - // Also remove any modifications with NaN occupancies. - - var modsToRemove = new List<(QuantifiedProtein proteinKey, int modposKey, string modnameKey)>(); - bool filterByCommonMods = false; - - foreach (var protein in proteinGroupOccupanciesPerProtein.Keys) - { - var proteinModsDict = proteinGroupOccupanciesPerProtein[protein]; - foreach (var modpos in proteinModsDict.Keys) - { - var posMods = proteinModsDict[modpos]; - foreach (var mod in posMods.Keys) - { - // Remove common mods, peptide terminus mods not in protein, and mods with 0 or NaN intensity to not be written. - if (((mod.Contains("Common Variable") && filterByCommonMods) || (mod.Contains("Common Fixed")) && filterByCommonMods) - || mod.Contains("PeptideTermMod") - || !posMods[mod].IsFinite() // When all peptides containing a localized residue have intensities of 0, it leads to 0/0=NaN occupancies. - || posMods[mod]==0) - { - modsToRemove.Add((protein, modpos, mod)); - } - } - } - } - foreach (var modToIgnore in modsToRemove) - { - proteinGroupOccupanciesPerProtein[modToIgnore.proteinKey][modToIgnore.modposKey].Remove(modToIgnore.modnameKey); - } - foreach (var modToIgnore in modsToRemove) - { - var proteinToClean = proteinGroupOccupanciesPerProtein[modToIgnore.proteinKey]; - - if (proteinToClean.ContainsKey(modToIgnore.modposKey) - && proteinToClean[modToIgnore.modposKey].IsNullOrEmpty()) - { - proteinToClean.Remove(modToIgnore.modposKey); - } - } - - - // If mods not found for the proteins in protein group, do not write anything - // Since all proteins in the protein group would have the same mod info with - // different mod position key names but same number of keys, we only check - // that the first protein's mod dict is not empty. - if (proteinGroupOccupanciesPerProtein.First().Value.IsNullOrEmpty()) - { - return ""; - } - - // Else, build the mod info string for the protein group. - var modInfoString = new StringBuilder(); - - foreach (var protein in proteinGroupOccupanciesPerProtein.Keys) - { - if (proteinGroupOccupanciesPerProtein[protein].IsNullOrEmpty()) - { - continue; - } - modInfoString.Append(protein.Accession + ":{"); - - foreach (var modpos in proteinGroupOccupanciesPerProtein[protein].Keys.Order()) - { - var modposTotalIntensity = protein.Peptides.Values.Where(x => protein.PeptidesByProteinPosition[modpos].Contains(x.BaseSequence)).Sum(x => x.Intensity); - - bool writeModPos = proteinGroupOccupanciesPerProtein[protein][modpos].Values.Any(x => x > 0); - - // Need to check if is finite because quantified peptides can have an intensity of Zero, leading to NaN occupancies. - // Also, only write mod positions with nonzero total intensity. - if (double.IsFinite(modposTotalIntensity) && modposTotalIntensity > 0 && writeModPos) - { - var loc = modpos == 0 ? "N-terminal" : modpos == protein.Sequence.Length + 1 ? "C-terminal" : $"{protein.Sequence[modpos - 1]}#" + modpos.ToString(); - modInfoString.Append(loc); - - var modStrings = new List(); - - foreach (var mod in proteinGroupOccupanciesPerProtein[protein][modpos]) - { - modStrings.Add($"{mod.Key}, info: occupancy={mod.Value.ToString("N4")}({modposTotalIntensity})"); - } - - // If mods with nonzero fractions found, append them - if (modStrings.Count > 0) - { - modInfoString.Append("[" + string.Join(";", modStrings) + "]"); - } - } - } - modInfoString.Append("}"); - } - return modInfoString.ToString(); - } + #endregion - else - { - return ""; - } - } + #region Scoring and Coverage - // Score() method is only used internally, to make protein grouping faster - // this is NOT an output and is NOT used for protein FDR calculations - public void Score() + /// + /// Shadows base Score() to use the MM-specific SpectralMatch-typed AllPsmsBelowOnePercentFDR. + /// + public new void Score() { - // sum the scores of the best PSM per base sequence ProteinGroupScore = AllPsmsBelowOnePercentFDR.GroupBy(p => p.BaseSequence) .Select(p => p.Select(x => x.Score).Max()).Sum(); } - public void CalculateSequenceCoverage() + /// + /// Shadows base CalculateSequenceCoverage() to use MM's SpectralMatch concrete type. + /// Results are stored in the flat list properties (SequenceCoverageFraction, etc.) + /// rather than in . + /// + public new void CalculateSequenceCoverage() { var proteinsWithUnambigSeqPsms = new Dictionary>(); var proteinsWithPsmsWithLocalizedMods = new Dictionary>(); @@ -592,77 +421,64 @@ public void CalculateSequenceCoverage() foreach (var psm in AllPsmsBelowOnePercentFDR) { - // null BaseSequence means that the amino acid sequence is ambiguous; do not use these to calculate sequence coverage if (psm.BaseSequence != null) { psm.GetAminoAcidCoverage(); - foreach (var peptide in psm.BestMatchingBioPolymersWithSetMods.Select(psm => psm.SpecificBioPolymer).DistinctBy(pep => pep.FullSequence)) + foreach (var peptide in psm.BestMatchingBioPolymersWithSetMods + .Select(p => p.SpecificBioPolymer).DistinctBy(pep => pep.FullSequence)) { - // might be unambiguous but also shared; make sure this protein group contains this peptide+protein combo if (Proteins.Contains(peptide.Parent)) { proteinsWithUnambigSeqPsms[peptide.Parent].Add(peptide); - // null FullSequence means that mods were not successfully localized; do not display them on the sequence coverage mods info if (peptide.FullSequence != null) { proteinsWithPsmsWithLocalizedMods[peptide.Parent].Add(peptide); } } } - } } - //Calculate sequence coverage at the amino acid level by looking at fragment specific coverage - //loop through proteins + // Fragment-level coverage foreach (IBioPolymer protein in ListOfProteinsOrderedByAccession) { - //create a hash set for storing covered one-based residue numbers of protein HashSet coveredResiduesInProteinOneBased = new(); - //loop through PSMs foreach (SpectralMatch psm in AllPsmsBelowOnePercentFDR.Where(psm => psm.BaseSequence != null)) { - //Calculate the covered bases within the psm. This is one based numbering for the peptide only psm.GetAminoAcidCoverage(); if (psm.FragmentCoveragePositionInPeptide == null) continue; - //loop through each peptide within the psm - IEnumerable pwsms = psm.BestMatchingBioPolymersWithSetMods.Select(p => p.SpecificBioPolymer) + + IEnumerable pwsms = psm.BestMatchingBioPolymersWithSetMods + .Select(p => p.SpecificBioPolymer) .Where(p => p.Parent.Accession == protein.Accession); + foreach (var pwsm in pwsms) { - //create a hashset to store the covered residues for the peptide, converted to the corresponding indices of the protein HashSet coveredResiduesInPeptide = new(); - //add the peptide start position within the protein to each covered index of the psm foreach (var position in psm.FragmentCoveragePositionInPeptide) { - coveredResiduesInPeptide.Add(position + pwsm.OneBasedStartResidue - - 1); //subtract one because these are both one based + coveredResiduesInPeptide.Add(position + pwsm.OneBasedStartResidue - 1); } - - //Add the peptide specific positions, to the overall hashset for the protein coveredResiduesInProteinOneBased.UnionWith(coveredResiduesInPeptide); } } - // create upper/lowercase string char[] fragmentCoverageArray = protein.BaseSequence.ToLower().ToCharArray(); foreach (var residue in coveredResiduesInProteinOneBased) { fragmentCoverageArray[residue - 1] = char.ToUpper(fragmentCoverageArray[residue - 1]); } - FragmentSequenceCoverageDisplayList.Add(new string(fragmentCoverageArray)); } - //Calculates the coverage at the peptide level... if a peptide is present all of the AAs in the peptide are covered + // Peptide-level coverage foreach (var protein in ListOfProteinsOrderedByAccession) { HashSet coveredOneBasedResidues = new HashSet(); - // get residue numbers of each peptide in the protein and identify them as observed if the sequence is unambiguous foreach (var peptide in proteinsWithUnambigSeqPsms[protein]) { for (int i = peptide.OneBasedStartResidue; i <= peptide.OneBasedEndResidue; i++) @@ -671,27 +487,19 @@ public void CalculateSequenceCoverage() } } - // calculate sequence coverage percent double seqCoverageFract = (double)coveredOneBasedResidues.Count / protein.Length; - - // add the percent coverage SequenceCoverageFraction.Add(seqCoverageFract); - // convert the observed amino acids to upper case if they are unambiguously observed string sequenceCoverageDisplay = protein.BaseSequence.ToLower(); var coverageArray = sequenceCoverageDisplay.ToCharArray(); foreach (var obsResidueLocation in coveredOneBasedResidues) { coverageArray[obsResidueLocation - 1] = char.ToUpper(coverageArray[obsResidueLocation - 1]); } - sequenceCoverageDisplay = new string(coverageArray); - - // add the coverage display SequenceCoverageDisplayList.Add(sequenceCoverageDisplay); - // put mods in the sequence coverage display - // get mods to display in sequence (only unambiguously identified mods) + // Mods in sequence coverage display var modsOnThisProtein = new HashSet>(); foreach (var pep in proteinsWithPsmsWithLocalizedMods[protein]) { @@ -713,133 +521,40 @@ public void CalculateSequenceCoverage() { if (mod.Value.LocationRestriction.Equals("N-terminal.")) { - sequenceCoverageDisplay = sequenceCoverageDisplay.Insert( - 0, - $"[{mod.Value.IdWithMotif}]-"); + sequenceCoverageDisplay = sequenceCoverageDisplay.Insert(0, $"[{mod.Value.IdWithMotif}]-"); } else if (mod.Value.LocationRestriction.Equals("Anywhere.")) { int modStringIndex = sequenceCoverageDisplay.Length - (protein.Length - mod.Key); - sequenceCoverageDisplay = sequenceCoverageDisplay.Insert( - modStringIndex, - $"[{mod.Value.IdWithMotif}]"); + sequenceCoverageDisplay = sequenceCoverageDisplay.Insert(modStringIndex, $"[{mod.Value.IdWithMotif}]"); } else if (mod.Value.LocationRestriction.Equals("C-terminal.")) { - sequenceCoverageDisplay = sequenceCoverageDisplay.Insert( - sequenceCoverageDisplay.Length, - $"-[{mod.Value.IdWithMotif}]"); + sequenceCoverageDisplay = sequenceCoverageDisplay.Insert(sequenceCoverageDisplay.Length, $"-[{mod.Value.IdWithMotif}]"); } } SequenceCoverageDisplayListWithMods.Add(sequenceCoverageDisplay); - - if (!modsOnThisProtein.Any()) - { - continue; - } - - // PREVIOUS OCCUPANCY CODE - // modInfo will be updated by the PostSearchAnalysisTask. However, leaving this code - // here for now in case we want to use it in the future. - // calculate spectral count % of modified observations - //var pepModTotals = new List(); // count of modified peptides for each mod/index - //var pepTotals = new List(); // count of all peptides for each mod/index - //var modIndex = new List<(int index, string modName)>(); // index and name of the modified position - // - //foreach (var pep in proteinsWithPsmsWithLocalizedMods[protein]) - //{ - // foreach (var mod in pep.AllModsOneIsNterminus) - // { - // int pepNumTotal = 0; //For one mod, The total Pep Num - // - // if (mod.Value.ModificationType.Contains("Common Variable") - // || mod.Value.ModificationType.Contains("Common Fixed") - // || mod.Value.LocationRestriction.Equals(ModLocationOnPeptideOrProtein.PepC) - // || mod.Value.LocationRestriction.Equals(ModLocationOnPeptideOrProtein.NPep)) - // { - // continue; - // } - // - // int indexInProtein; - // if (mod.Value.LocationRestriction.Equals("N-terminal.")) - // { - // indexInProtein = 1; - // } - // else if (mod.Value.LocationRestriction.Equals("Anywhere.")) - // { - // indexInProtein = pep.OneBasedStartResidue + mod.Key - 2; - // } - // else if (mod.Value.LocationRestriction.Equals("C-terminal.")) - // { - // indexInProtein = protein.Length; - // } - // else - // { - // // In case it's a peptide terminal mod, skip! - // // we don't want this annotated in the protein's modifications - // continue; - // } - // - // var modKey = (indexInProtein, mod.Value.IdWithMotif); - // if (modIndex.Contains(modKey)) - // { - // pepModTotals[modIndex.IndexOf(modKey)] += 1; - // } - // else - // { - // modIndex.Add(modKey); - // foreach (var pept in proteinsWithPsmsWithLocalizedMods[protein]) - // { - // if (indexInProtein >= pept.OneBasedStartResidue - (indexInProtein == 1 ? 1 : 0) - // && indexInProtein <= pept.OneBasedEndResidue) - // { - // pepNumTotal += 1; - // } - // } - // - // pepTotals.Add(pepNumTotal); - // pepModTotals.Add(1); - // } - // } - //} - //bool quantifyModsByPSM = false; - //if (quantifyModsByPSM) - //{ - // var modStrings = new List<(int aaNum, string part)>(); - // for (int i = 0; i < pepModTotals.Count; i++) - // { - // string aa = modIndex[i].index.ToString(); - // string modName = modIndex[i].modName.ToString(); - // string occupancy = ((double)pepModTotals[i] / (double)pepTotals[i]).ToString("F2"); - // string fractOccupancy = $"{pepModTotals[i].ToString()}/{pepTotals[i].ToString()}"; - // string tempString = ($"#aa{aa}[{modName},info:occupancy={occupancy}({fractOccupancy})]"); - // modStrings.Add((modIndex[i].index, tempString)); - // } - // - // var modInfoString = string.Join(";", modStrings.OrderBy(x => x.aaNum).Select(x => x.part)); - // - // if (!string.IsNullOrEmpty(modInfoString)) - // { - // ModsInfo.Add(modInfoString); - // } - //} } } + #endregion + + #region Merge and Subset + + /// + /// Merges another ProteinGroup into this one. Handles the SpectralMatch-typed PSM set locally, + /// then delegates to for biopolymer/peptide/name updates. + /// public void MergeProteinGroupWith(ProteinGroup other) { - this.Proteins.UnionWith(other.Proteins); - this.AllPeptides.UnionWith(other.AllPeptides); - this.UniquePeptides.UnionWith(other.UniquePeptides); this.AllPsmsBelowOnePercentFDR.UnionWith(other.AllPsmsBelowOnePercentFDR); - other.ProteinGroupScore = 0; - - ListOfProteinsOrderedByAccession = Proteins.OrderBy(p => p.Accession).ToList(); - - ProteinGroupName = string.Join("|", ListOfProteinsOrderedByAccession.Select(p => p.Accession)); + base.MergeWith(other); } + /// + /// Creates a ProteinGroup subset containing only data from the specified spectra file. + /// public ProteinGroup ConstructSubsetProteinGroup(string fullFilePath, List silacLabels = null) { var allPsmsForThisFile = @@ -862,7 +577,7 @@ public ProteinGroup ConstructSubsetProteinGroup(string fullFilePath, List p.FullFilePathWithExtension == fullFilePath) .FirstOrDefault(); - //check that file name wasn't changed (can occur in SILAC searches) + if (!MzLibUtil.ClassExtensions.IsNullOrEmpty(silacLabels) && spectraFileInfo == null) { foreach (SilacLabel label in silacLabels) @@ -878,7 +593,7 @@ public ProteinGroup ConstructSubsetProteinGroup(string fullFilePath, List + /// Compares by ordered accession list. + /// public bool Equals(ProteinGroup grp) { - //Check for null and compare run-time types. - if (grp == null) + if (grp == null) { return false; } - else if (!this.ListOfProteinsOrderedByAccession.Select(a=>a.Accession).ToList().SequenceEqual(grp.ListOfProteinsOrderedByAccession.Select(a => a.Accession).ToList())) + else if (!this.ListOfProteinsOrderedByAccession.Select(a => a.Accession).ToList() + .SequenceEqual(grp.ListOfProteinsOrderedByAccession.Select(a => a.Accession).ToList())) { return false; } return true; } + + #endregion } -} \ No newline at end of file +} From 86f525be53d17de24f37dcbbd1fbbf6325ccd5fb Mon Sep 17 00:00:00 2001 From: pcruzparri Date: Wed, 25 Mar 2026 11:33:46 -0500 Subject: [PATCH 18/30] working with vignette data and spectralmatchandbiopolymergrouprefactoring branch --- .../ProteinParsimony/ProteinGroup.cs | 37 +++++++-------- .../ProteinScoringAndFdrEngine.cs | 4 +- .../EngineLayer/Silac/SilacConversions.cs | 33 +++----------- MetaMorpheus/EngineLayer/SpectralMatch.cs | 43 +++++++++++++++++- .../PostGlycoSearchAnalysisTask.cs | 14 +++--- .../SearchTask/PostSearchAnalysisTask.cs | 45 +++++-------------- MetaMorpheus/Test/RobTest.cs | 7 ++- 7 files changed, 91 insertions(+), 92 deletions(-) diff --git a/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs b/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs index cc8b6a8137..376bf956fa 100644 --- a/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs +++ b/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs @@ -8,9 +8,9 @@ using Omics.Modifications; using Omics; using Omics.BioPolymerGroup; +using Omics.SpectralMatch; using Transcriptomics.Digestion; using MzLibUtil; -using MzLibUtil.PositionFrequencyAnalysis; using Easy.Common.Extensions; namespace EngineLayer @@ -24,10 +24,9 @@ namespace EngineLayer /// to the corresponding BioPolymerGroup base properties. Consumers should gradually migrate to /// the base class names. /// - /// NOTE: AllPsmsBelowOnePercentFDR is shadowed here as HashSet<SpectralMatch> because - /// MetaMorpheus's SpectralMatch does not yet implement mzLib's ISpectralMatch interface. - /// When SpectralMatch is updated to implement ISpectralMatch, this shadow and the related - /// method shadows (Score, CalculateSequenceCoverage) can be removed. + /// Score() and CalculateSequenceCoverage() are overridden (via new) because they access + /// MetaMorpheus-specific SpectralMatch members (GetAminoAcidCoverage, BestMatchingBioPolymersWithSetMods, + /// FragmentCoveragePositionInPeptide) that are not on the ISpectralMatch interface. /// public class ProteinGroup : BioPolymerGroup { @@ -35,7 +34,7 @@ public ProteinGroup(HashSet proteins, HashSet uniquePeptides) : base(proteins, peptides, uniquePeptides) { - AllPsmsBelowOnePercentFDR = new HashSet(); + AllPsmsBelowOnePercentFDR = new HashSet(); SequenceCoverageFraction = new List(); SequenceCoverageDisplayList = new List(); SequenceCoverageDisplayListWithMods = new List(); @@ -111,12 +110,6 @@ public Dictionary IntensitiesByFile #region MetaMorpheus-Specific Properties - /// - /// Shadowed as HashSet<SpectralMatch> because MM's SpectralMatch does not implement ISpectralMatch. - /// Remove this shadow when SpectralMatch implements ISpectralMatch. - /// - public new HashSet AllPsmsBelowOnePercentFDR { get; set; } - /// /// The minimum Posterior Error Probability (PEP) among all PSMs in . /// Lower values indicate higher confidence. Populated during protein FDR and used for PEP-based ranking. @@ -395,7 +388,8 @@ public override string ToString() #region Scoring and Coverage /// - /// Shadows base Score() to use the MM-specific SpectralMatch-typed AllPsmsBelowOnePercentFDR. + /// Computes protein group score as the sum of the best score per unique base sequence. + /// Overrides base to use MetaMorpheus-specific scoring logic. /// public new void Score() { @@ -404,7 +398,8 @@ public override string ToString() } /// - /// Shadows base CalculateSequenceCoverage() to use MM's SpectralMatch concrete type. + /// Computes sequence coverage using MetaMorpheus-specific SpectralMatch members + /// (GetAminoAcidCoverage, BestMatchingBioPolymersWithSetMods, FragmentCoveragePositionInPeptide). /// Results are stored in the flat list properties (SequenceCoverageFraction, etc.) /// rather than in . /// @@ -419,7 +414,7 @@ public override string ToString() proteinsWithPsmsWithLocalizedMods.Add(protein, new List()); } - foreach (var psm in AllPsmsBelowOnePercentFDR) + foreach (var psm in AllPsmsBelowOnePercentFDR.OfType()) { if (psm.BaseSequence != null) { @@ -446,7 +441,8 @@ public override string ToString() { HashSet coveredResiduesInProteinOneBased = new(); - foreach (SpectralMatch psm in AllPsmsBelowOnePercentFDR.Where(psm => psm.BaseSequence != null)) + foreach (SpectralMatch psm in AllPsmsBelowOnePercentFDR.OfType() + .Where(psm => psm.BaseSequence != null)) { psm.GetAminoAcidCoverage(); if (psm.FragmentCoveragePositionInPeptide == null) continue; @@ -543,12 +539,11 @@ public override string ToString() #region Merge and Subset /// - /// Merges another ProteinGroup into this one. Handles the SpectralMatch-typed PSM set locally, - /// then delegates to for biopolymer/peptide/name updates. + /// Merges another ProteinGroup into this one. Delegates entirely to + /// which handles PSMs, biopolymers, peptides, and name. /// public void MergeProteinGroupWith(ProteinGroup other) { - this.AllPsmsBelowOnePercentFDR.UnionWith(other.AllPsmsBelowOnePercentFDR); base.MergeWith(other); } @@ -558,11 +553,11 @@ public void MergeProteinGroupWith(ProteinGroup other) public ProteinGroup ConstructSubsetProteinGroup(string fullFilePath, List silacLabels = null) { var allPsmsForThisFile = - new HashSet( + new HashSet( AllPsmsBelowOnePercentFDR.Where(p => p.FullFilePath.Equals(fullFilePath))); var allPeptidesForThisFile = new HashSet( - allPsmsForThisFile.SelectMany(p => p.BestMatchingBioPolymersWithSetMods.Select(v => v.SpecificBioPolymer))); + allPsmsForThisFile.SelectMany(p => p.GetIdentifiedBioPolymersWithSetMods())); var allUniquePeptidesForThisFile = new HashSet(UniquePeptides.Intersect(allPeptidesForThisFile)); diff --git a/MetaMorpheus/EngineLayer/ProteinScoringAndFdr/ProteinScoringAndFdrEngine.cs b/MetaMorpheus/EngineLayer/ProteinScoringAndFdr/ProteinScoringAndFdrEngine.cs index 141cafcfb8..c098df986d 100644 --- a/MetaMorpheus/EngineLayer/ProteinScoringAndFdr/ProteinScoringAndFdrEngine.cs +++ b/MetaMorpheus/EngineLayer/ProteinScoringAndFdr/ProteinScoringAndFdrEngine.cs @@ -165,8 +165,8 @@ private List DoProteinFdr(List proteinGroups) } pg.BestPeptideScore = pg.AllPsmsBelowOnePercentFDR.Max(psm => psm.Score); - pg.BestPeptideQValue = pg.AllPsmsBelowOnePercentFDR.Min(psm => psm.FdrInfo.QValueNotch); - pg.BestPeptidePEP = pg.AllPsmsBelowOnePercentFDR.Min(psm => psm.FdrInfo.PEP); + pg.BestPeptideQValue = pg.AllPsmsBelowOnePercentFDR.OfType().Min(psm => psm.FdrInfo.QValueNotch); + pg.BestPeptidePEP = pg.AllPsmsBelowOnePercentFDR.OfType().Min(psm => psm.FdrInfo.PEP); } // pick the best for each paired accession based on filter type diff --git a/MetaMorpheus/EngineLayer/Silac/SilacConversions.cs b/MetaMorpheus/EngineLayer/Silac/SilacConversions.cs index 554c6466e1..ddb97dcf0b 100644 --- a/MetaMorpheus/EngineLayer/Silac/SilacConversions.cs +++ b/MetaMorpheus/EngineLayer/Silac/SilacConversions.cs @@ -8,7 +8,6 @@ using Omics.Modifications; using Omics.Digestion; using EngineLayer.SpectrumMatch; -using MzLibUtil.PositionFrequencyAnalysis; using Easy.Common.Extensions; using MzLibUtil; using MassSpectrometry; @@ -456,48 +455,30 @@ public static void SilacConversionsPostQuantification(List allSilacL flashLfqResults.CalculateProteinResultsMedianPolish(true); //update proteingroups to have all files for quantification + // Modification occupancy is now computed by BioPolymerGroup.PopulateSampleGroupResults() if (proteinGroups != null) { List allInfo = originalToLabeledFileInfoDictionary.SelectMany(x => x.Value).ToList(); foreach (ProteinGroup proteinGroup in proteinGroups) { proteinGroup.FilesForQuantification = allInfo; - proteinGroup.IntensitiesByFile = new Dictionary(); - proteinGroup.ModsInfo = new Dictionary(); + // Build the dictionary locally, then assign in one shot. + // The IntensitiesByFile getter returns a copy, so .Add() on it would be lost. + var intensities = new Dictionary(); foreach (var spectraFile in allInfo) { if (flashLfqResults.ProteinGroups.TryGetValue(proteinGroup.ProteinGroupName, out var flashLfqProteinGroup)) { - proteinGroup.IntensitiesByFile.Add(spectraFile, flashLfqProteinGroup.GetIntensity(spectraFile)); + intensities.Add(spectraFile, flashLfqProteinGroup.GetIntensity(spectraFile)); } else { //needed for decoys/contaminants/proteins that aren't quantified - proteinGroup.IntensitiesByFile.Add(spectraFile, 0); - } - - // get modification stoichiometry using FlashLFQ spectraFile-specific intensities - var pgQuantifiedPeptides = flashLfqResults.PeptideModifiedSequences.Where(x => proteinGroup.AllPeptides.Select(x => x.FullSequence).Contains(x.Key)).ToList(); - - if (pgQuantifiedPeptides.IsNotNullOrEmpty()) - { - var peptides = pgQuantifiedPeptides.Select(pep => new QuantifiedPeptideRecord(pep.Value.Sequence, - new HashSet { proteinGroup.ProteinGroupName }, - pep.Value.GetIntensity(spectraFile))).ToList(); - if (peptides.IsNullOrEmpty()) - { - proteinGroup.ModsInfo.Add(spectraFile, new QuantifiedProteinGroup(proteinGroup.ProteinGroupName)); - continue; - } - - PositionFrequencyAnalysis pfa = new PositionFrequencyAnalysis(); - var proteins = proteinGroup.Proteins.Select(p => new KeyValuePair(p.Accession, p.BaseSequence)).ToDictionary(); - pfa.SetUpQuantificationFromQuantifiedPeptideRecords(peptides, proteins); // uses zero-based indexes for the mods. - - proteinGroup.ModsInfo.Add(spectraFile, pfa.ProteinGroups.First().Value); // Getting stoich one protein group at a time, so only getting First() is ok here. + intensities.Add(spectraFile, 0); } } + proteinGroup.IntensitiesByFile = intensities; } } diff --git a/MetaMorpheus/EngineLayer/SpectralMatch.cs b/MetaMorpheus/EngineLayer/SpectralMatch.cs index 30511572c8..7fa7512755 100644 --- a/MetaMorpheus/EngineLayer/SpectralMatch.cs +++ b/MetaMorpheus/EngineLayer/SpectralMatch.cs @@ -7,6 +7,7 @@ using System.Collections.Generic; using System.Linq; using Omics; +using Omics.SpectralMatch; using System; using Omics.Digestion; using EngineLayer.CrosslinkSearch; @@ -14,7 +15,7 @@ namespace EngineLayer { - public abstract class SpectralMatch : IComparable + public abstract class SpectralMatch : ISpectralMatch, IComparable { public const double ToleranceForScoreDifferentiation = 1e-9; @@ -75,6 +76,46 @@ protected SpectralMatch(IBioPolymerWithSetMods peptide, int notch, double score, public string NativeId => ScanMetadata.NativeId; #endregion + + #region ISpectralMatch explicit interface implementations + + /// Maps to for ISpectralMatch compatibility. + int ISpectralMatch.OneBasedScanNumber => ScanNumber; + + /// + /// Consolidates quantification intensities for ISpectralMatch compatibility. + /// Returns ReporterIonIntensities if available (isobaric), a singleton array of + /// PrecursorScanIntensity for LFQ, or null if neither is populated. + /// + double[]? ISpectralMatch.Intensities => + ReporterIonIntensities ?? + (PrecursorScanIntensity > 0 ? new[] { PrecursorScanIntensity } : null); + + /// + /// Returns the identified biopolymers (peptides/proteoforms) for ISpectralMatch compatibility. + /// Unwraps SpectralMatchHypothesis to the underlying IBioPolymerWithSetMods. + /// + public IEnumerable GetIdentifiedBioPolymersWithSetMods() => + BestMatchingBioPolymersWithSetMods.Select(h => h.SpecificBioPolymer); + + public int CompareTo(ISpectralMatch? other) + { + if (other is null) return 1; + if (other is SpectralMatch mm) return CompareTo(mm); + // Fallback: compare by score descending + return Score.CompareTo(other.Score); + } + + public bool Equals(ISpectralMatch? other) + { + if (other is null) return false; + return FullFilePath == other.FullFilePath + && ScanNumber == other.OneBasedScanNumber + && FullSequence == other.FullSequence; + } + + #endregion + /// /// Refers to the index of the Ms2ScanWithSpecificMass in an array of Ms2ScansWithSpecificMass that is sorted by precursor mass /// diff --git a/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs b/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs index 7082ae991e..1d2267ff23 100644 --- a/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs +++ b/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs @@ -457,7 +457,8 @@ private void QuantificationAnalysis() string.Join("|", proteinsOrderedByAccession.Select(p => p.GeneNames.Select(x => x.Item2).FirstOrDefault())), string.Join("|", proteinsOrderedByAccession.Select(p => p.Organism).Distinct())); - foreach (var psm in proteinGroup.AllPsmsBelowOnePercentFDR.Where(v => v.FullSequence != null)) + foreach (var psm in proteinGroup.AllPsmsBelowOnePercentFDR.OfType() + .Where(v => v.FullSequence != null)) { if (psmToProteinGroups.TryGetValue(psm, out var flashLfqProteinGroups)) { @@ -550,19 +551,22 @@ private void QuantificationAnalysis() foreach (var proteinGroup in ProteinGroups) { proteinGroup.FilesForQuantification = spectraFileInfo; - proteinGroup.IntensitiesByFile = new Dictionary(); - foreach (var spectraFile in proteinGroup.FilesForQuantification) + // Build the dictionary locally, then assign in one shot. + // The IntensitiesByFile getter returns a copy, so .Add() on it would be lost. + var intensities = new Dictionary(); + foreach (var spectraFile in spectraFileInfo) { if (Parameters.FlashLfqResults.ProteinGroups.TryGetValue(proteinGroup.ProteinGroupName, out var flashLfqProteinGroup)) { - proteinGroup.IntensitiesByFile.Add(spectraFile, flashLfqProteinGroup.GetIntensity(spectraFile)); + intensities.Add(spectraFile, flashLfqProteinGroup.GetIntensity(spectraFile)); } else { - proteinGroup.IntensitiesByFile.Add(spectraFile, 0); + intensities.Add(spectraFile, 0); } } + proteinGroup.IntensitiesByFile = intensities; } } } diff --git a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs index 966612d553..aac37c2a1e 100644 --- a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs +++ b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs @@ -18,7 +18,6 @@ using System.Text; using EngineLayer.DatabaseLoading; using MzLibUtil; -using MzLibUtil.PositionFrequencyAnalysis; using Omics.Digestion; using Omics.BioPolymer; using Omics.Modifications; @@ -364,7 +363,8 @@ private void QuantificationAnalysis() string.Join("|", proteinsOrderedByAccession.Select(p => p.GeneNames.Select(x => x.Item2).FirstOrDefault())), string.Join("|", proteinsOrderedByAccession.Select(p => p.Organism).Distinct())); - foreach (var psm in proteinGroup.AllPsmsBelowOnePercentFDR.Where(v => v.FullSequence != null)) + foreach (var psm in proteinGroup.AllPsmsBelowOnePercentFDR.OfType() + .Where(v => v.FullSequence != null)) { if (psmToProteinGroups.TryGetValue(psm, out var flashLfqProteinGroups)) { @@ -597,51 +597,30 @@ private void QuantificationAnalysis() Parameters.FlashLfqResults = flashLfqEngine.Run(); } - // get protein intensity and mod stoichiometry back from FlashLFQ + // get protein intensity back from FlashLFQ + // Modification occupancy is now computed by BioPolymerGroup.PopulateSampleGroupResults() + // which is called lazily by ToString()/GetTabSeparatedHeader(). if (ProteinGroups != null && Parameters.FlashLfqResults != null) { - // get protein intensity back from FlashLFQ foreach (var proteinGroup in ProteinGroups) { proteinGroup.FilesForQuantification = spectraFileInfo; - proteinGroup.IntensitiesByFile = new Dictionary(); - proteinGroup.ModsInfo = new Dictionary(); - foreach (var spectraFile in proteinGroup.FilesForQuantification) + // Build the dictionary locally, then assign in one shot. + // The IntensitiesByFile getter returns a copy, so .Add() on it would be lost. + var intensities = new Dictionary(); + foreach (var spectraFile in spectraFileInfo) { if (Parameters.FlashLfqResults.ProteinGroups.TryGetValue(proteinGroup.ProteinGroupName, out var flashLfqProteinGroup)) { - proteinGroup.IntensitiesByFile.Add(spectraFile, flashLfqProteinGroup.GetIntensity(spectraFile)); + intensities.Add(spectraFile, flashLfqProteinGroup.GetIntensity(spectraFile)); } else { - proteinGroup.IntensitiesByFile.Add(spectraFile, 0); - } - - // Now get the modification stoichiometries using FlashLFQ spectraFile-specific intensities - // and add them to the proteinGroup.ModsInfo property. The stoichiometry strings will only be - // extracted from the protein group's ToString method. - - // Pull out only the peptides in this protein group that were quantified by FlashLFQ - var pgQuantifiedPeptides = Parameters.FlashLfqResults.PeptideModifiedSequences.Where(x => proteinGroup.AllPeptides.Select(x => x.FullSequence).Contains(x.Key)).ToList(); - - if (pgQuantifiedPeptides.IsNotNullOrEmpty()) - { - var peptides = pgQuantifiedPeptides.Select(pep => new QuantifiedPeptideRecord(pep.Value.Sequence, - new HashSet { proteinGroup.ProteinGroupName }, - pep.Value.GetIntensity(spectraFile))).ToList(); - if (peptides.IsNullOrEmpty()) - { - proteinGroup.ModsInfo.Add(spectraFile, new QuantifiedProteinGroup(proteinGroup.ProteinGroupName)); - continue; - } - - var proteins = proteinGroup.Proteins.Select(p => new KeyValuePair(p.Accession, p.BaseSequence)).ToDictionary(); - PositionFrequencyAnalysis pfa = new PositionFrequencyAnalysis(); - pfa.SetUpQuantificationFromQuantifiedPeptideRecords(peptides, proteins); // uses zero-based indexes for the mods. - proteinGroup.ModsInfo.Add(spectraFile, pfa.ProteinGroups.First().Value); // Getting stoich one protein group at a time, so only getting First() is ok here. + intensities.Add(spectraFile, 0); } } + proteinGroup.IntensitiesByFile = intensities; } } diff --git a/MetaMorpheus/Test/RobTest.cs b/MetaMorpheus/Test/RobTest.cs index 1cf2a70324..16ef22ad73 100644 --- a/MetaMorpheus/Test/RobTest.cs +++ b/MetaMorpheus/Test/RobTest.cs @@ -261,10 +261,9 @@ public static void TestPTMOutput() ProteinScoringAndFdrEngine f = new ProteinScoringAndFdrEngine(proteinGroups, filteredPsms, false, false, true, new CommonParameters(), null, new List()); f.Run(); - // inclined to delete this test - // Otherwise, readapt to run flashlfq and then correctly check modinfo... - Assert.That(proteinGroups.First().ModsInfo, Is.Null); - Assert.That(proteinGroups.First().ModInfoStringFromGroupedFiles(proteinGroups.First().FilesForQuantification), Is.EqualTo("")); + // Occupancy is now computed by BioPolymerGroup.PopulateSampleGroupResults(). + // Without FlashLFQ, SampleGroupResults should be null or empty. + Assert.That(proteinGroups.First().SampleGroupResults, Is.Null); } [Test] From 0c39b464e46b6fcf32439db54e27178af1471c95 Mon Sep 17 00:00:00 2001 From: Peter Cruz Parrilla Date: Wed, 25 Mar 2026 15:40:30 -0500 Subject: [PATCH 19/30] cleaning up mod pipeline plus minor qol recommendations from copilot. --- .../ProteinParsimony/ProteinGroup.cs | 4 +- .../PostGlycoSearchAnalysisTask.cs | 33 ++++++-------- .../SearchTask/PostSearchAnalysisTask.cs | 44 +++++++++---------- 3 files changed, 38 insertions(+), 43 deletions(-) diff --git a/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs b/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs index 376bf956fa..821dc6ce8a 100644 --- a/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs +++ b/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs @@ -604,14 +604,14 @@ public ProteinGroup ConstructSubsetProteinGroup(string fullFilePath, List { spectraFileInfo }; } - if (IntensitiesByFile == null) + if (IntensitiesByFile == null || spectraFileInfo == null) { subsetPg.IntensitiesByFile = null; } else { subsetPg.IntensitiesByFile = new Dictionary - { { spectraFileInfo, IntensitiesByFile[spectraFileInfo] } }; + { { spectraFileInfo, IntensitiesByFile.GetValueOrDefault(spectraFileInfo, 0) } }; } return subsetPg; diff --git a/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs b/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs index 1d2267ff23..e973c519d3 100644 --- a/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs +++ b/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs @@ -521,7 +521,8 @@ private void QuantificationAnalysis() var flashLFQIdentifications = new List(); foreach (var spectraFile in psmsGroupedByFile) { - var rawfileinfo = spectraFileInfo.Where(p => p.FullFilePathWithExtension.Equals(spectraFile.Key)).First(); + var rawfileinfo = spectraFileInfo.FirstOrDefault(p => p.FullFilePathWithExtension.Equals(spectraFile.Key)); + if (rawfileinfo == null) continue; foreach (var psm in spectraFile) { @@ -545,28 +546,25 @@ private void QuantificationAnalysis() Parameters.FlashLfqResults = FlashLfqEngine.Run(); } - // get protein intensity back from FlashLFQ - if (ProteinGroups != null && Parameters.FlashLfqResults != null) + // Propagate quantification data to protein groups + if (ProteinGroups != null) { foreach (var proteinGroup in ProteinGroups) { proteinGroup.FilesForQuantification = spectraFileInfo; - // Build the dictionary locally, then assign in one shot. - // The IntensitiesByFile getter returns a copy, so .Add() on it would be lost. - var intensities = new Dictionary(); - foreach (var spectraFile in spectraFileInfo) + if (Parameters.FlashLfqResults != null) { - if (Parameters.FlashLfqResults.ProteinGroups.TryGetValue(proteinGroup.ProteinGroupName, out var flashLfqProteinGroup)) + var intensities = new Dictionary(); + foreach (var spectraFile in spectraFileInfo) { - intensities.Add(spectraFile, flashLfqProteinGroup.GetIntensity(spectraFile)); - } - else - { - intensities.Add(spectraFile, 0); + intensities.Add(spectraFile, + Parameters.FlashLfqResults.ProteinGroups.TryGetValue(proteinGroup.ProteinGroupName, out var flashLfqProteinGroup) + ? flashLfqProteinGroup.GetIntensity(spectraFile) + : 0); } + proteinGroup.IntensitiesByFile = intensities; } - proteinGroup.IntensitiesByFile = intensities; } } } @@ -594,11 +592,8 @@ private void WriteQuantificationResults() file.Key.FilenameWithoutExtension + "_QuantifiedPeaks", new List { Parameters.SearchTaskId, "IndividualFileResults", file.Key.FullFilePathWithExtension }); WritePeptideQuantificationResultsToTsv(Parameters.FlashLfqResults, Path.Combine(Parameters.IndividualResultsOutputFolder, file.Key.FilenameWithoutExtension), file.Key.FilenameWithoutExtension + "_QuantifiedPeptides", new List { Parameters.SearchTaskId, "IndividualFileResults", file.Key.FullFilePathWithExtension }); - if (true) - { - WriteProteinQuantificationResultsToTsv(Parameters.FlashLfqResults, Path.Combine(Parameters.IndividualResultsOutputFolder, file.Key.FilenameWithoutExtension), - file.Key.FilenameWithoutExtension + "_QuantifiedProteins", new List { Parameters.SearchTaskId, "IndividualFileResults", file.Key.FullFilePathWithExtension }); - } + WriteProteinQuantificationResultsToTsv(Parameters.FlashLfqResults, Path.Combine(Parameters.IndividualResultsOutputFolder, file.Key.FilenameWithoutExtension), + file.Key.FilenameWithoutExtension + "_QuantifiedProteins", new List { Parameters.SearchTaskId, "IndividualFileResults", file.Key.FullFilePathWithExtension }); } } } diff --git a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs index aac37c2a1e..f43e73e097 100644 --- a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs +++ b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs @@ -537,7 +537,6 @@ private void QuantificationAnalysis() var undefinedPg = new ProteinGroup("UNDEFINED", "", ""); //sort the unambiguous psms by protease to make MBR compatible with multiple proteases Dictionary> proteaseSortedPsms = new Dictionary>(); - Dictionary proteaseSortedFlashLFQResults = new Dictionary(); foreach (IDigestionParams dp in Parameters.ListOfDigestionParams) { @@ -560,7 +559,8 @@ private void QuantificationAnalysis() var flashLFQIdentifications = new List(); foreach (var spectraFile in psmsGroupedByFile) { - var rawfileinfo = spectraFileInfo.First(p => p.FullFilePathWithExtension.Equals(spectraFile.Key)); + var rawfileinfo = spectraFileInfo.FirstOrDefault(p => p.FullFilePathWithExtension.Equals(spectraFile.Key)); + if (rawfileinfo == null) continue; foreach (var psm in spectraFile) { @@ -597,30 +597,33 @@ private void QuantificationAnalysis() Parameters.FlashLfqResults = flashLfqEngine.Run(); } - // get protein intensity back from FlashLFQ - // Modification occupancy is now computed by BioPolymerGroup.PopulateSampleGroupResults() - // which is called lazily by ToString()/GetTabSeparatedHeader(). - if (ProteinGroups != null && Parameters.FlashLfqResults != null) + // Propagate quantification data to protein groups so that PopulateSampleGroupResults() + // has the per-file context it needs to produce spectral-count and intensity-based occupancy columns. + // + // FilesForQuantification is always assigned once spectraFileInfo is available so that + // count-based occupancy is written even when FlashLFQ produced no peaks (e.g., when + // flashLFQIdentifications is empty and FlashLfqResults remains null). + // IntensitiesByFile is only assigned when FlashLFQ actually ran and returned results. + if (ProteinGroups != null) { foreach (var proteinGroup in ProteinGroups) { proteinGroup.FilesForQuantification = spectraFileInfo; - // Build the dictionary locally, then assign in one shot. - // The IntensitiesByFile getter returns a copy, so .Add() on it would be lost. - var intensities = new Dictionary(); - foreach (var spectraFile in spectraFileInfo) + if (Parameters.FlashLfqResults != null) { - if (Parameters.FlashLfqResults.ProteinGroups.TryGetValue(proteinGroup.ProteinGroupName, out var flashLfqProteinGroup)) + // Build the dictionary locally, then assign in one shot. + // The IntensitiesByFile getter returns a copy, so .Add() on it would be lost. + var intensities = new Dictionary(); + foreach (var spectraFile in spectraFileInfo) { - intensities.Add(spectraFile, flashLfqProteinGroup.GetIntensity(spectraFile)); - } - else - { - intensities.Add(spectraFile, 0); + intensities.Add(spectraFile, + Parameters.FlashLfqResults.ProteinGroups.TryGetValue(proteinGroup.ProteinGroupName, out var flashLfqProteinGroup) + ? flashLfqProteinGroup.GetIntensity(spectraFile) + : 0); } + proteinGroup.IntensitiesByFile = intensities; } - proteinGroup.IntensitiesByFile = intensities; } } @@ -771,8 +774,6 @@ private void WriteIndividualPsmResults() includeAmbiguous: true, includeHighQValuePsms: Parameters.SearchParameters.WriteHighQValuePsms); - int count = psmsToWrite.Where(psm => psm.PsmFdrInfo.PEP <= 0.01).Count(); - // write PSMs string writtenFile = Path.Combine(Parameters.IndividualResultsOutputFolder, strippedFileName + $"_{GlobalVariables.AnalyteType.GetSpectralMatchLabel()}s.{GlobalVariables.AnalyteType.GetSpectralMatchExtension()}"); WritePsmsToTsv(psmsToWrite, writtenFile); @@ -1206,7 +1207,7 @@ private void WriteVariantResults() new FdrAnalysisEngine(possibleVariantPsms, Parameters.NumNotches, CommonParameters, FileSpecificParameters, new List { Parameters.SearchTaskId }, "variant_PSMs", doPEP: false).Run(); - possibleVariantPsms + possibleVariantPsms = possibleVariantPsms .OrderBy(p => p.FdrInfo.QValue) .ThenByDescending(p => p.Score) .ThenBy(p => p.FdrInfo.CumulativeTarget) @@ -1524,8 +1525,7 @@ private static void WritePsmsForPercolator(List psmList, string w output.WriteLine(directions.ToString()); int idNumber = 0; - psmList.OrderByDescending(p => p.Score); - foreach (SpectralMatch psm in psmList.Where(p => p.PsmData_forPEPandPercolator != null)) + foreach (SpectralMatch psm in psmList.Where(p => p.PsmData_forPEPandPercolator != null).OrderByDescending(p => p.Score)) { foreach (var peptide in psm.BestMatchingBioPolymersWithSetMods) { From 10cea57bfeaffe385192aa17a9757b91b6159247 Mon Sep 17 00:00:00 2001 From: Peter Cruz Parrilla Date: Tue, 7 Apr 2026 20:06:54 -0500 Subject: [PATCH 20/30] integrate mzlib stoich update --- MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs b/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs index 821dc6ce8a..ea8900f176 100644 --- a/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs +++ b/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs @@ -324,7 +324,7 @@ public override string ToString() if (SampleGroupResults != null) { - bool isProteinLevel = GroupType == BioPolymerGroupType.Protein; + bool isProteinLevel = GroupType == BioPolymerGroupType.Parent; IEnumerable orderedKeys = isProteinLevel ? ListOfProteinsOrderedByAccession.Select(p => p.Accession) : AllPeptides.Select(p => p.BaseSequence).Distinct().OrderBy(s => s); @@ -341,12 +341,12 @@ public override string ToString() sb.Append("\t"); } - sb.Append(GlobalVariables.CheckLengthOfOutput(group.FormatCountOccupancy(orderedKeys, isProteinLevel))); + sb.Append(GlobalVariables.CheckLengthOfOutput(group.FormatOccupancy(orderedKeys, isProteinLevel, intensityBased: false))); sb.Append("\t"); if (group.HasIntensityData) { - sb.Append(GlobalVariables.CheckLengthOfOutput(group.FormatIntensityOccupancy(orderedKeys, isProteinLevel))); + sb.Append(GlobalVariables.CheckLengthOfOutput(group.FormatOccupancy(orderedKeys, isProteinLevel, intensityBased: true))); sb.Append("\t"); } } From bb54e1b2d17d52c3dc8215a06d842273e9e5ece9 Mon Sep 17 00:00:00 2001 From: Peter Cruz Parrilla Date: Mon, 13 Apr 2026 14:13:50 -0500 Subject: [PATCH 21/30] fixing tests to expect the new column outputs --- .../PostGlycoSearchAnalysisTask.cs | 21 ++++++++++++- .../SearchTask/PostSearchAnalysisTask.cs | 16 +++++++++- MetaMorpheus/Test/ProteinGroupTest.cs | 31 ++++++++++++++----- MetaMorpheus/Test/QuantificationTest.cs | 2 +- MetaMorpheus/Test/SilacTest.cs | 24 ++++++++------ 5 files changed, 73 insertions(+), 21 deletions(-) diff --git a/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs b/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs index e973c519d3..7476b26d8a 100644 --- a/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs +++ b/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs @@ -73,6 +73,7 @@ public MyTaskResults Run(string OutputFolder, List dbFilenameList, Li if (Parameters.GlycoSearchParameters.DoParsimony) { GlycoProteinAnalysis(fspList, individualFileFolderPath, individualFileFolder); //Creat the proteinGroups file + WriteProteinResults(individualFileFolderPath, individualFileFolder); } foreach (GlycoSpectralMatch gsm in fspList) //maybe this needs to be the filterd list??? @@ -158,6 +159,11 @@ public MyTaskResults Run(string OutputFolder, List dbFilenameList, Li QuantificationAnalysis(); WriteQuantificationResults(); + if (glycoSearchParameters.DoParsimony) + { + WriteProteinResults(OutputFolder, null, MyTaskResults); + } + if (Parameters.GlycoSearchParameters.WritePrunedDataBase) { WritePrunedDatabase(Parameters.AllPsms.Cast().ToList(), Parameters.BioPolymerList.Cast().ToList(), Parameters.GlycoSearchParameters.ModsToWriteSelection, Parameters.DatabaseFilenameList, Parameters.OutputFolder, Parameters.SearchTaskId); @@ -319,7 +325,6 @@ private void GlycoProteinAnalysis(List gsms, string outputFo ProteinGroups = proteinScoringAndFdrResults.SortedAndScoredProteinGroups; Status("Done constructing protein groups!", Parameters.SearchTaskId); - WriteProteinResults(outputFolder, individualFileFolder, myTaskResults); } private void GlycoAccessionAnalysis(List gsms, string individualFileFolderPath, string individualFileFolder = null) @@ -390,6 +395,20 @@ private void QuantificationAnalysis() { if (!Parameters.GlycoSearchParameters.DoQuantification) { + // Always set FilesForQuantification before writing the TSV so that + // PopulateSampleGroupResults() uses the consistent first branch (keyed + // on the full searched-file list) rather than the per-PSM else branch. + var spectraFileInfoForGroups = Parameters.CurrentRawFileList + .Select((f, i) => new SpectraFileInfo(f, "", i, 0, 0)) + .ToList(); + if (ProteinGroups != null) + { + foreach (var pg in ProteinGroups) + { + if (pg.FilesForQuantification == null) + pg.FilesForQuantification = spectraFileInfoForGroups; + } + } return; } diff --git a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs index f43e73e097..22b99150b6 100644 --- a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs +++ b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs @@ -274,6 +274,20 @@ private void QuantificationAnalysis() if (!Parameters.SearchParameters.DoLabelFreeQuantification) { + // Always set FilesForQuantification before writing the TSV so that + // PopulateSampleGroupResults() uses the consistent first branch (keyed + // on the full searched-file list) rather than the per-PSM else branch. + var spectraFileInfoForGroups = Parameters.CurrentRawFileList + .Select((f, i) => new SpectraFileInfo(f, "", i, 0, 0)) + .ToList(); + if (ProteinGroups != null) + { + foreach (var pg in ProteinGroups) + { + if (pg.FilesForQuantification == null) + pg.FilesForQuantification = spectraFileInfoForGroups; + } + } return; } @@ -1725,4 +1739,4 @@ private void WriteDigestionCountHistogram() FinishedWritingFile(countHistogramPath, nestedIds); } } -} \ No newline at end of file +} diff --git a/MetaMorpheus/Test/ProteinGroupTest.cs b/MetaMorpheus/Test/ProteinGroupTest.cs index bcfb5ca326..0b7b8cbd44 100644 --- a/MetaMorpheus/Test/ProteinGroupTest.cs +++ b/MetaMorpheus/Test/ProteinGroupTest.cs @@ -84,10 +84,11 @@ public static void ProteinGroupToStringTest() new HashSet() { pwsm1, pwsm2 }, new HashSet() { pwsm1, pwsm2 }); //string exectedProteinGroupToString = proteinGroup1.ToString(); - string exectedProteinGroupToString = "prot1|prot2\t|\t\t\t779.30073507823|778.3167194953201\t2\t\t\t2\t2\t\t\t\t\t\t0\tT\t0\t0\t0\t0\t0"; + string exectedProteinGroupToString = + "prot1|prot2\t|\t\t\t779.30073507823|778.3167194953201\t2\t\t\t2\t2\t\t\t\t\t0\tT\t0\t0\t0\t0\t0\t0"; var out1 = proteinGroup1.ToString().Split("\t"); var out1h = proteinGroup1.GetTabSeparatedHeader().Split("\t"); - var out1zipped = out1h.Zip(out1, (a, b) => (a, b)).ToDictionary(); + Assert.That(out1.Count(), Is.EqualTo(out1h.Count())); Assert.That(proteinGroup1.ToString(), Is.EqualTo(exectedProteinGroupToString)); @@ -95,7 +96,8 @@ public static void ProteinGroupToStringTest() List proteinList3 = new List { prot3 }; ProteinGroup proteinGroup3 = new ProteinGroup(new HashSet(proteinList3), new HashSet(), new HashSet()); - string exectedProteinGroupWithDecoyToString = "prot1|prot2\t|\t\t\t779.30073507823|778.3167194953201\t2\t\t\t2\t2\t\t\t\t\t\t0\tT\t0\t0\t0\t0\t0"; + string exectedProteinGroupWithDecoyToString = + "prot1|prot2\t|\t\t\t779.30073507823|778.3167194953201\t2\t\t\t2\t2\t\t\t\t\t0\tT\t0\t0\t0\t0\t0\t0"; var out2 = proteinGroup1.ToString(); Assert.That(proteinGroup1.ToString(), Is.EqualTo(exectedProteinGroupWithDecoyToString)); } @@ -248,12 +250,25 @@ public static void TestModificationInfoListInProteinGroupsOutput() Assert.That(totalNumberOfMods, Is.EqualTo(4)); List proteinGroupsOutput = File.ReadAllLines(Path.Combine(outputFolder, "task2", "AllQuantifiedProteinGroups.tsv")).ToList(); - string testDataLine = proteinGroupsOutput.Where(x => x.StartsWith("P10591")).First(); - string modInfoListProteinTwo = testDataLine.Split('\t')[14]; + Assert.That(proteinGroupsOutput.Count, Is.EqualTo(8)); - Assert.That(8, Is.EqualTo(proteinGroupsOutput.Count)); - Assert.That(modInfoListProteinTwo, Is.EqualTo("P10591:{M#65[Common Variable:Oxidation on M, info: occupancy=1.0000(654315.977066199)]S#71[Less Common:Oxidation on S, info: occupancy=0.1957(654315.977066199)]}" + - "P10592:{M#65[Common Variable:Oxidation on M, info: occupancy=1.0000(654315.977066199)]S#71[Less Common:Oxidation on S, info: occupancy=0.1957(654315.977066199)]}")); + // Use the header row to locate occupancy columns dynamically, + // guarding against future column-order changes. + List header = proteinGroupsOutput[0].Split('\t').ToList(); + int countOccupancyIndex = header.IndexOf(header.First(h => h.StartsWith("CountOccupancy_"))); + int intensityOccupancyIndex = header.IndexOf(header.First(h => h.StartsWith("IntensityOccupancy_"))); + + string[] testDataFields = proteinGroupsOutput.First(x => x.StartsWith("P10591")).Split('\t'); + string countOccupancy = testDataFields[countOccupancyIndex]; + string intensityOccupancy = testDataFields[intensityOccupancyIndex]; + + // Tests count-based PTM occupancy: pos{residue}[{modName},info:fraction={count-fraction}({modified PSMs}/{total PSMs})] + Assert.That(countOccupancy, Is.EqualTo( + "pos71[Oxidation on S,info:fraction=0.50(1/2)]|pos71[Oxidation on S,info:fraction=0.50(1/2)]")); + + // Tests intensity-based PTM occupancy: pos{residue}[{modName},info:fraction={intensity-fraction}({mod intensity}/{total intensity})] + Assert.That(intensityOccupancy, Is.EqualTo( + "pos71[Oxidation on S,info:fraction=0.1899(1.279E+05/6.736E+05)]|pos71[Oxidation on S,info:fraction=0.1899(1.279E+05/6.736E+05)]")); Directory.Delete(outputFolder, true); } diff --git a/MetaMorpheus/Test/QuantificationTest.cs b/MetaMorpheus/Test/QuantificationTest.cs index b5cff3bde3..3e3b688744 100644 --- a/MetaMorpheus/Test/QuantificationTest.cs +++ b/MetaMorpheus/Test/QuantificationTest.cs @@ -233,7 +233,7 @@ public static void TestProteinQuantFileHeaders(bool hasDefinedExperimentalDesign // check the intensity column headers List splitHeader = lines[0].Split(new char[] { '\t' }).ToList(); - List intensityColumnHeaders = splitHeader.Where(p => p.Contains("Intensity", StringComparison.OrdinalIgnoreCase)).ToList(); + List intensityColumnHeaders = splitHeader.Where(p => p.Contains("Intensity_", StringComparison.OrdinalIgnoreCase)).ToList(); Assert.That(intensityColumnHeaders.Count == 2); diff --git a/MetaMorpheus/Test/SilacTest.cs b/MetaMorpheus/Test/SilacTest.cs index 80b80effd6..3679c5dd5e 100644 --- a/MetaMorpheus/Test/SilacTest.cs +++ b/MetaMorpheus/Test/SilacTest.cs @@ -58,8 +58,9 @@ public static void TestSilacNoLightProtein() //test proteins string[] output = File.ReadAllLines(TestContext.CurrentContext.TestDirectory + @"/TestSilac/AllQuantifiedProteinGroups.tsv"); Assert.That(output.Length, Is.EqualTo(2)); - Assert.That(output[0].Contains("Mods_silac(R+3.988)\tIntensity_silac(R+3.988)\tMods_silac(R+10.008)\tIntensity_silac(R+10.008)")); //test that two files were made and no light file - Assert.That(output[1].Contains("875000.0000000009\t\t437500.00000000047")); //test the heavier intensity is half that of the heavy (per the raw file) + Assert.That(output[0].Contains("SpectralCount_silac(R+3.988)\tIntensity_silac(R+3.988)\tCountOccupancy_silac(R+3.988)\tIntensityOccupancy_silac(R+3.988)\tSpectralCount_silac(R+10.008)\tIntensity_silac(R+10.008)")); //test that two conditions were made and no light condition + Assert.That(output[1].Contains("875000.0000000009")); //test the heavy intensity + Assert.That(output[1].Contains("437500.00000000047")); //test the heavier intensity is half that of the heavy (per the raw file) //test peptides output = File.ReadAllLines(TestContext.CurrentContext.TestDirectory + @"/TestSilac/AllQuantifiedPeptides.tsv"); @@ -132,8 +133,9 @@ public static void TestSilacMultipleModsPerCondition() //test proteins string[] output = File.ReadAllLines(TestContext.CurrentContext.TestDirectory + @"/TestSilac/AllQuantifiedProteinGroups.tsv"); Assert.That(output.Length, Is.EqualTo(2)); - Assert.That(output[0].Contains("Mods_silac\tIntensity_silac\tMods_silac(K+8.014 & R+6.020)\tIntensity_silac(K+8.014 & R+6.020)")); //test that two files were made - Assert.That(output[1].Contains("1374999.999999999\t\t687499.9999999995")); //test the heavy intensity is half that of the light (per the raw file) + Assert.That(output[0].Contains("SpectralCount_silac\tIntensity_silac\tCountOccupancy_silac\tIntensityOccupancy_silac\tSpectralCount_silac(K+8.014 & R+6.020)\tIntensity_silac(K+8.014 & R+6.020)")); //test that two conditions were made + Assert.That(output[1].Contains("1374999.999999999")); //test the light intensity + Assert.That(output[1].Contains("687499.9999999995")); //test the heavy intensity is half that of the light (per the raw file) //test peptides output = File.ReadAllLines(TestContext.CurrentContext.TestDirectory + @"/TestSilac/AllQuantifiedPeptides.tsv"); @@ -223,11 +225,13 @@ public static void TestSilacQuantification() //test proteins string[] output = File.ReadAllLines(TestContext.CurrentContext.TestDirectory + @"\TestSilac\AllQuantifiedProteinGroups.tsv"); Assert.That(output.Length, Is.EqualTo(2)); - Assert.That(output[0].Contains("Mods_silac\tIntensity_silac\t"+ - "Mods_silacPart2\tIntensity_silacPart2\t"+ - "Mods_silac(K+8.014)\tIntensity_silac(K+8.014)\t"+ - "Mods_silacPart2(K+8.014)\tIntensity_silacPart2(K+8.014)")); //test that two files were made - Assert.That(output[1].Contains("875000.0000000009\t\t875000.0000000009\t\t437500.00000000047\t\t437500.00000000047")); //test the heavy intensity is half that of the light (per the raw file) + Assert.That(output[0].Contains( + "SpectralCount_silac\tIntensity_silac\tCountOccupancy_silac\tIntensityOccupancy_silac\t" + + "SpectralCount_silacPart2\tIntensity_silacPart2\tCountOccupancy_silacPart2\tIntensityOccupancy_silacPart2\t" + + "SpectralCount_silac(K+8.014)\tIntensity_silac(K+8.014)\tCountOccupancy_silac(K+8.014)\tIntensityOccupancy_silac(K+8.014)\t" + + "SpectralCount_silacPart2(K+8.014)\tIntensity_silacPart2(K+8.014)")); //test that all four conditions were made + Assert.That(output[1].Contains("875000.0000000009")); //test the light intensities (both files) + Assert.That(output[1].Contains("437500.00000000047")); //test the heavy intensity is half that of the light (per the raw file) //test peptides output = File.ReadAllLines(TestContext.CurrentContext.TestDirectory + @"\TestSilac\AllQuantifiedPeptides.tsv"); @@ -674,4 +678,4 @@ public static void TestSilacHelperMethods() SilacConversions.SilacConversionsPostQuantification(null, null, null, new List(), null, new HashSet(), null, new List(), new Dictionary(), true); } } -} \ No newline at end of file +} From 312affcf8a88e07f6017867a3a366240cc9e96af Mon Sep 17 00:00:00 2001 From: pcruzparri Date: Mon, 20 Apr 2026 01:18:46 -0500 Subject: [PATCH 22/30] Test fixes and consistent column generation enforcement --- .../FragmentationReanalysisViewModel.cs | 20 +++-- .../Util/ThreadSafeObservableCollection.cs | 78 +++++++++++++++++++ .../SearchTask/PostSearchAnalysisTask.cs | 54 +++++-------- .../Test/MetaDraw/FragmentReanalysis.cs | 1 + .../FragmentReanalysisRaceConditionTest.cs | 2 +- MetaMorpheus/Test/MetaDraw/MetaDrawTest.cs | 1 + MetaMorpheus/Test/ProteinGroupTest.cs | 7 +- 7 files changed, 121 insertions(+), 42 deletions(-) create mode 100644 MetaMorpheus/GuiFunctions/Util/ThreadSafeObservableCollection.cs diff --git a/MetaMorpheus/GuiFunctions/MetaDraw/FragmentResearching/FragmentationReanalysisViewModel.cs b/MetaMorpheus/GuiFunctions/MetaDraw/FragmentResearching/FragmentationReanalysisViewModel.cs index 4bd98670f4..e9543a031c 100644 --- a/MetaMorpheus/GuiFunctions/MetaDraw/FragmentResearching/FragmentationReanalysisViewModel.cs +++ b/MetaMorpheus/GuiFunctions/MetaDraw/FragmentResearching/FragmentationReanalysisViewModel.cs @@ -6,6 +6,7 @@ using System.Windows; using Easy.Common.Extensions; using EngineLayer; +using GuiFunctions.Util; using iText.StyledXmlParser.Jsoup; using MassSpectrometry; using MzLibUtil; @@ -26,12 +27,13 @@ public class FragmentationReanalysisViewModel : BaseViewModel { private readonly bool _isProtein; private static readonly object _fragmentationLock = new(); + private static readonly object _productsLock = new(); public FragmentationReanalysisViewModel(bool isProtein = true) { _isProtein = isProtein; ProductIonMassTolerance = 20; - PossibleProducts = [.. GetPossibleProducts()]; + PossibleProducts = new ThreadSafeObservableCollection(GetPossibleProducts()); IEnumerable values; CommonParameters common; @@ -77,15 +79,13 @@ public void LoadFragmentationParameters(CommonParameters common, SearchParameter FragmentationParamsViewModel = new(common, search); } - private ObservableCollection _possibleProducts; - public ObservableCollection PossibleProducts + private ThreadSafeObservableCollection _possibleProducts; + public ThreadSafeObservableCollection PossibleProducts { get => _possibleProducts; set { _possibleProducts = value; OnPropertyChanged(nameof(PossibleProducts)); } } - private IEnumerable _productsToUse => PossibleProducts.Where(p => p.Use).Select(p => p.ProductType); - private bool _persist; public bool Persist { @@ -249,7 +249,7 @@ public List MatchIonsWithNewTypes(MsDataScan ms2Scan, Spectr List internalProducts = new List(); // Snapshot products before acquiring lock to avoid enumerating collection while it may be modified by UI thread - var productsSnapshot = _productsToUse.ToList(); + var productsSnapshot = GetProductsSnapshot(); // Lock to ensure thread-safe mutation of static DissociationTypeCollection dictionary lock (_fragmentationLock) { @@ -298,6 +298,14 @@ public List MatchIonsWithNewTypes(MsDataScan ms2Scan, Spectr .ToList(); } + private List GetProductsSnapshot() + { + lock (_productsLock) + { + return PossibleProducts.Where(p => p.Use).Select(p => p.ProductType).ToList(); + } + } + public static readonly IEqualityComparer MatchedFragmentIonComparer = new MatchedFragmentIonEqualityComparer(); public class MatchedFragmentIonEqualityComparer : IEqualityComparer diff --git a/MetaMorpheus/GuiFunctions/Util/ThreadSafeObservableCollection.cs b/MetaMorpheus/GuiFunctions/Util/ThreadSafeObservableCollection.cs new file mode 100644 index 0000000000..97c6bda1b9 --- /dev/null +++ b/MetaMorpheus/GuiFunctions/Util/ThreadSafeObservableCollection.cs @@ -0,0 +1,78 @@ +using System.Collections; +using System.Collections.Generic; +using System.Collections.ObjectModel; +using System.Collections.Specialized; +using System.Linq; +using System.Windows; +using System.Windows.Threading; + +namespace GuiFunctions.Util; + +/// +/// An ObservableCollection that is safe for concurrent reads and writes. +/// All mutations are serialized with a lock, and GetEnumerator returns a +/// snapshot so callers never see a "collection was modified" exception. +/// CollectionChanged notifications are automatically marshalled to the UI +/// dispatcher when raised from a background thread. +/// +public class ThreadSafeObservableCollection : ObservableCollection, IEnumerable, IEnumerable +{ + private readonly object _lock = new(); + + public ThreadSafeObservableCollection() { } + + public ThreadSafeObservableCollection(IEnumerable collection) : base(collection) { } + + // ── Mutations ──────────────────────────────────────────────────────────── + + protected override void InsertItem(int index, T item) + { + lock (_lock) base.InsertItem(index, item); + } + + protected override void RemoveItem(int index) + { + lock (_lock) base.RemoveItem(index); + } + + protected override void SetItem(int index, T item) + { + lock (_lock) base.SetItem(index, item); + } + + protected override void MoveItem(int oldIndex, int newIndex) + { + lock (_lock) base.MoveItem(oldIndex, newIndex); + } + + protected override void ClearItems() + { + lock (_lock) base.ClearItems(); + } + + // ── Snapshot enumeration ───────────────────────────────────────────────── + // Explicit interface implementation ensures LINQ (which dispatches through + // IEnumerable) gets the snapshot enumerator, not the base class one. + + IEnumerator IEnumerable.GetEnumerator() + { + T[] snapshot; + lock (_lock) + snapshot = Items.ToArray(); // Items is the protected List from Collection + return ((IEnumerable)snapshot).GetEnumerator(); + } + + IEnumerator IEnumerable.GetEnumerator() => + ((IEnumerable)this).GetEnumerator(); + + // ── UI-thread marshalling ───────────────────────────────────────────────── + + protected override void OnCollectionChanged(NotifyCollectionChangedEventArgs e) + { + Dispatcher dispatcher = Application.Current?.Dispatcher; + if (dispatcher != null && !dispatcher.CheckAccess()) + dispatcher.Invoke(() => base.OnCollectionChanged(e)); + else + base.OnCollectionChanged(e); + } +} diff --git a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs index 22b99150b6..48a8a2ecdd 100644 --- a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs +++ b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs @@ -274,20 +274,6 @@ private void QuantificationAnalysis() if (!Parameters.SearchParameters.DoLabelFreeQuantification) { - // Always set FilesForQuantification before writing the TSV so that - // PopulateSampleGroupResults() uses the consistent first branch (keyed - // on the full searched-file list) rather than the per-PSM else branch. - var spectraFileInfoForGroups = Parameters.CurrentRawFileList - .Select((f, i) => new SpectraFileInfo(f, "", i, 0, 0)) - .ToList(); - if (ProteinGroups != null) - { - foreach (var pg in ProteinGroups) - { - if (pg.FilesForQuantification == null) - pg.FilesForQuantification = spectraFileInfoForGroups; - } - } return; } @@ -300,7 +286,7 @@ private void QuantificationAnalysis() } // construct file info for FlashLFQ - List spectraFileInfo; + List spectraFileInfo = null; // get experimental design info string pathToFirstSpectraFile = Directory.GetParent(Parameters.CurrentRawFileList.First()).FullName; @@ -324,16 +310,18 @@ private void QuantificationAnalysis() if (errors.Any()) { - Warn("Error reading experimental design file: " + errors.First() + ". Skipping quantification"); - return; + Warn("Error reading experimental design file: " + errors.First() + ". Falling back to default experimental design."); + spectraFileInfo = null; } } - else if (Parameters.SearchParameters.Normalize) + + if (Parameters.SearchParameters.Normalize && (spectraFileInfo == null || !File.Exists(assumedExperimentalDesignPath))) { - Warn("Could not find experimental design file at " + assumedExperimentalDesignPath + ", which is required for normalization. Skipping quantification"); + Warn("Could not find or parse experimental design file at " + assumedExperimentalDesignPath + ", which is required for normalization. Skipping quantification"); return; } - else + + if (spectraFileInfo == null || !spectraFileInfo.Any()) { spectraFileInfo = new List(); @@ -617,27 +605,27 @@ private void QuantificationAnalysis() // FilesForQuantification is always assigned once spectraFileInfo is available so that // count-based occupancy is written even when FlashLFQ produced no peaks (e.g., when // flashLFQIdentifications is empty and FlashLfqResults remains null). - // IntensitiesByFile is only assigned when FlashLFQ actually ran and returned results. + // IntensitiesByFile is always assigned (with zeros if FlashLFQ produced no results) + // so that HasIntensityData is true and intensity-based occupancy columns are always written. if (ProteinGroups != null) { foreach (var proteinGroup in ProteinGroups) { proteinGroup.FilesForQuantification = spectraFileInfo; - if (Parameters.FlashLfqResults != null) + // Build the dictionary locally, then assign in one shot. + var intensities = new Dictionary(); + foreach (var spectraFile in spectraFileInfo) { - // Build the dictionary locally, then assign in one shot. - // The IntensitiesByFile getter returns a copy, so .Add() on it would be lost. - var intensities = new Dictionary(); - foreach (var spectraFile in spectraFileInfo) - { - intensities.Add(spectraFile, - Parameters.FlashLfqResults.ProteinGroups.TryGetValue(proteinGroup.ProteinGroupName, out var flashLfqProteinGroup) - ? flashLfqProteinGroup.GetIntensity(spectraFile) - : 0); - } - proteinGroup.IntensitiesByFile = intensities; + intensities.Add(spectraFile, + Parameters.FlashLfqResults?.ProteinGroups.TryGetValue(proteinGroup.ProteinGroupName, out var flashLfqProteinGroup) == true + ? flashLfqProteinGroup.GetIntensity(spectraFile) + : 0); } + proteinGroup.IntensitiesByFile = intensities; + + // Reset cached SampleGroupResults so it re-populates with the updated intensity data. + proteinGroup.SampleGroupResults = null; } } diff --git a/MetaMorpheus/Test/MetaDraw/FragmentReanalysis.cs b/MetaMorpheus/Test/MetaDraw/FragmentReanalysis.cs index f5c1fae469..c393a66f4e 100644 --- a/MetaMorpheus/Test/MetaDraw/FragmentReanalysis.cs +++ b/MetaMorpheus/Test/MetaDraw/FragmentReanalysis.cs @@ -20,6 +20,7 @@ namespace Test.MetaDraw { [ExcludeFromCodeCoverage] + [NonParallelizable] internal class FragmentReanalysis { [Test] diff --git a/MetaMorpheus/Test/MetaDraw/FragmentReanalysisRaceConditionTest.cs b/MetaMorpheus/Test/MetaDraw/FragmentReanalysisRaceConditionTest.cs index c0912210cf..6ec49ec1fc 100644 --- a/MetaMorpheus/Test/MetaDraw/FragmentReanalysisRaceConditionTest.cs +++ b/MetaMorpheus/Test/MetaDraw/FragmentReanalysisRaceConditionTest.cs @@ -94,4 +94,4 @@ public static void MatchIonsWithNewTypes_ProductsChangedDuringExecution_DoesNotT Assert.That(caughtException, Is.Null, $"InvalidOperationException thrown: {caughtException?.Message}"); } } -} \ No newline at end of file +} diff --git a/MetaMorpheus/Test/MetaDraw/MetaDrawTest.cs b/MetaMorpheus/Test/MetaDraw/MetaDrawTest.cs index 6c055c8dae..2260d26edf 100644 --- a/MetaMorpheus/Test/MetaDraw/MetaDrawTest.cs +++ b/MetaMorpheus/Test/MetaDraw/MetaDrawTest.cs @@ -2196,6 +2196,7 @@ public static void TestCrosslinkSpectralLibraryReading() } [Test] + [NonParallelizable] public void ExportPlot_RefragmentationWithAdditionalFragmentIons_WritesExpectedIons() { // Arrange diff --git a/MetaMorpheus/Test/ProteinGroupTest.cs b/MetaMorpheus/Test/ProteinGroupTest.cs index 0b7b8cbd44..cf8b3cafb5 100644 --- a/MetaMorpheus/Test/ProteinGroupTest.cs +++ b/MetaMorpheus/Test/ProteinGroupTest.cs @@ -15,6 +15,7 @@ using System.Text.RegularExpressions; using EngineLayer.DatabaseLoading; using Omics; +using MzLibUtil; namespace Test { @@ -218,7 +219,6 @@ public static void TestModificationInfoListInProteinGroupsOutput() ).Select(b => (b.ModificationType, b.IdWithMotif)).ToList() } }; - SearchTask task2 = new SearchTask { CommonParameters = new CommonParameters(), @@ -238,7 +238,7 @@ public static void TestModificationInfoListInProteinGroupsOutput() var engine = new EverythingRunnerEngine(taskList, new List { mzmlName }, new List { new DbForTask(fastaName, false) }, outputFolder); engine.Run(); - string final = Path.Combine(MySetUpClass.outputFolder, "task2", "DbForPrunedDbGPTMDproteinPruned.xml"); + string final = Path.Combine(outputFolder, "task2", "DbForPrunedDbGPTMDproteinPruned.xml"); List proteins = ProteinDbLoader.LoadProteinXML(final, true, DecoyType.Reverse, new List(), false, new List(), out var ok); // ensures that protein out put contains the correct number of proteins to match the following conditions. // all proteins in DB have baseSequence!=null (not ambiguous) @@ -254,6 +254,9 @@ public static void TestModificationInfoListInProteinGroupsOutput() // Use the header row to locate occupancy columns dynamically, // guarding against future column-order changes. + bool allEqualColumns = proteinGroupsOutput.Select(x => x.Split('\t').Length).AllSame(); + Assert.That(allEqualColumns, Is.True, "All rows in the protein groups output should have the same number of columns."); + List header = proteinGroupsOutput[0].Split('\t').ToList(); int countOccupancyIndex = header.IndexOf(header.First(h => h.StartsWith("CountOccupancy_"))); int intensityOccupancyIndex = header.IndexOf(header.First(h => h.StartsWith("IntensityOccupancy_"))); From 092062445574b53d125909b77e67616a3cb8f02c Mon Sep 17 00:00:00 2001 From: Peter Cruz Parrilla Date: Thu, 14 May 2026 15:27:53 -0500 Subject: [PATCH 23/30] cleaning --- .../ProteinParsimony/ProteinGroup.cs | 21 +++++++------------ .../PostGlycoSearchAnalysisTask.cs | 2 ++ 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs b/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs index ea8900f176..43d74566c4 100644 --- a/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs +++ b/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs @@ -24,9 +24,12 @@ namespace EngineLayer /// to the corresponding BioPolymerGroup base properties. Consumers should gradually migrate to /// the base class names. /// - /// Score() and CalculateSequenceCoverage() are overridden (via new) because they access - /// MetaMorpheus-specific SpectralMatch members (GetAminoAcidCoverage, BestMatchingBioPolymersWithSetMods, - /// FragmentCoveragePositionInPeptide) that are not on the ISpectralMatch interface. + /// Score() is handled entirely by the base class. + /// CalculateSequenceCoverage() and GetTabSeparatedHeader() are hidden (via new) because + /// CalculateSequenceCoverage() accesses MetaMorpheus-specific SpectralMatch members + /// (GetAminoAcidCoverage, BestMatchingBioPolymersWithSetMods, FragmentCoveragePositionInPeptide) + /// that are not on the ISpectralMatch interface, and GetTabSeparatedHeader() uses + /// MetaMorpheus-specific column names and includes BestPeptidePEP. /// public class ProteinGroup : BioPolymerGroup { @@ -385,17 +388,7 @@ public override string ToString() #endregion - #region Scoring and Coverage - - /// - /// Computes protein group score as the sum of the best score per unique base sequence. - /// Overrides base to use MetaMorpheus-specific scoring logic. - /// - public new void Score() - { - ProteinGroupScore = AllPsmsBelowOnePercentFDR.GroupBy(p => p.BaseSequence) - .Select(p => p.Select(x => x.Score).Max()).Sum(); - } + #region Coverage /// /// Computes sequence coverage using MetaMorpheus-specific SpectralMatch members diff --git a/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs b/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs index 7476b26d8a..37fca9fea4 100644 --- a/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs +++ b/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs @@ -584,6 +584,8 @@ private void QuantificationAnalysis() } proteinGroup.IntensitiesByFile = intensities; } + + proteinGroup.SampleGroupResults = null; } } } From c19eaeb44b33c4ba9266f559f15215ee74b20e73 Mon Sep 17 00:00:00 2001 From: Peter Cruz Parrilla Date: Fri, 15 May 2026 12:39:43 -0500 Subject: [PATCH 24/30] fixing changes to intended perviously behavior (early returns on quant if exp design is no bueno) and meeting test expectations. --- .../ProteinParsimony/ProteinGroup.cs | 18 ++-- MetaMorpheus/EngineLayer/SpectralMatch.cs | 8 +- .../PostGlycoSearchAnalysisTask.cs | 19 +--- .../SearchTask/PostSearchAnalysisTask.cs | 33 ++++--- .../Test/Multiplex_Labeling_TMT_iTRAQ.cs | 87 ++++++++++++++----- MetaMorpheus/Test/ProteinGroupTest.cs | 62 ++++++++++++- MetaMorpheus/Test/SearchTaskTest.cs | 48 ++++++---- MetaMorpheus/Test/SeqCoverageTest.cs | 2 +- 8 files changed, 195 insertions(+), 82 deletions(-) diff --git a/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs b/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs index 43d74566c4..17c57dac57 100644 --- a/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs +++ b/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs @@ -209,9 +209,10 @@ public void GetIdentifiedPeptidesOutput(List labels) sb.Append("Sequence Coverage with Mods" + '\t'); sb.Append("Fragment Sequence Coverage" + '\t'); - // Quantification and occupancy columns from base SampleGroupResult system - if (SampleGroupResults.IsNullOrEmpty()) PopulateSampleGroupResults(); - + // Quantification and occupancy columns from base SampleGroupResult system. + // Dynamic columns appear only when SampleGroupResults has been explicitly populated + // upstream (e.g., LFQ-success path). Workflows that return early from quantification + // leave it null/empty and emit only the static columns. if (SampleGroupResults != null) { foreach (var group in SampleGroupResults) @@ -322,9 +323,9 @@ public override string ToString() sb.Append(GlobalVariables.CheckLengthOfOutput(string.Join("|", FragmentSequenceCoverageDisplayList))); sb.Append("\t"); - // Quantification and occupancy from base SampleGroupResult system - if (SampleGroupResults.IsNullOrEmpty()) PopulateSampleGroupResults(); - + // Quantification and occupancy from base SampleGroupResult system. + // Mirrors the header: dynamic columns appear only when SampleGroupResults has been + // populated upstream so header and rows stay consistent within a file. if (SampleGroupResults != null) { bool isProteinLevel = GroupType == BioPolymerGroupType.Parent; @@ -594,7 +595,10 @@ public ProteinGroup ConstructSubsetProteinGroup(string fullFilePath, List { spectraFileInfo }; + if (spectraFileInfo != null) + { + subsetPg.FilesForQuantification = new List { spectraFileInfo }; + } } if (IntensitiesByFile == null || spectraFileInfo == null) diff --git a/MetaMorpheus/EngineLayer/SpectralMatch.cs b/MetaMorpheus/EngineLayer/SpectralMatch.cs index 7fa7512755..74fb662bd5 100644 --- a/MetaMorpheus/EngineLayer/SpectralMatch.cs +++ b/MetaMorpheus/EngineLayer/SpectralMatch.cs @@ -27,7 +27,7 @@ protected SpectralMatch(IBioPolymerWithSetMods peptide, int notch, double score, DigestionParams = commonParameters.DigestionParams; RunnerUpScore = commonParameters.ScoreCutoff; SpectralAngle = -1; - ReporterIonIntensities = scan.IsobaricMassTagReporterIonIntensities; + IsobaricMassTagReporterIonIntensities = scan.IsobaricMassTagReporterIonIntensities; AddOrReplace(peptide, score, notch, true, matchedFragmentIons); } @@ -88,7 +88,7 @@ protected SpectralMatch(IBioPolymerWithSetMods peptide, int notch, double score, /// PrecursorScanIntensity for LFQ, or null if neither is populated. /// double[]? ISpectralMatch.Intensities => - ReporterIonIntensities ?? + IsobaricMassTagReporterIonIntensities ?? (PrecursorScanIntensity > 0 ? new[] { PrecursorScanIntensity } : null); /// @@ -196,7 +196,7 @@ public void SetMs2Scan(MsDataScan scan) /// Null if multiplex quantification wasn't performed. /// Array order matches the reporter ion order defined by the mass tag modification. /// - public double[]? ReporterIonIntensities { get; private set; } + public double[]? IsobaricMassTagReporterIonIntensities { get; private set; } public IEnumerable BestMatchingBioPolymersWithSetMods { @@ -634,4 +634,4 @@ public int CompareTo(SpectralMatch otherPsm) } } -} \ No newline at end of file +} diff --git a/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs b/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs index 37fca9fea4..6af74ec6d9 100644 --- a/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs +++ b/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs @@ -395,20 +395,6 @@ private void QuantificationAnalysis() { if (!Parameters.GlycoSearchParameters.DoQuantification) { - // Always set FilesForQuantification before writing the TSV so that - // PopulateSampleGroupResults() uses the consistent first branch (keyed - // on the full searched-file list) rather than the per-PSM else branch. - var spectraFileInfoForGroups = Parameters.CurrentRawFileList - .Select((f, i) => new SpectraFileInfo(f, "", i, 0, 0)) - .ToList(); - if (ProteinGroups != null) - { - foreach (var pg in ProteinGroups) - { - if (pg.FilesForQuantification == null) - pg.FilesForQuantification = spectraFileInfoForGroups; - } - } return; } @@ -585,7 +571,10 @@ private void QuantificationAnalysis() proteinGroup.IntensitiesByFile = intensities; } - proteinGroup.SampleGroupResults = null; + // Populate SampleGroupResults from the shared spectraFileInfo so + // every PG carries the same dynamic-column schema. Without this, the writer + // would have no way to produce uniform headers/rows. + proteinGroup.PopulateSampleGroupResults(); } } } diff --git a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs index 48a8a2ecdd..a5d9c9fdb6 100644 --- a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs +++ b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs @@ -286,7 +286,7 @@ private void QuantificationAnalysis() } // construct file info for FlashLFQ - List spectraFileInfo = null; + List spectraFileInfo; // get experimental design info string pathToFirstSpectraFile = Directory.GetParent(Parameters.CurrentRawFileList.First()).FullName; @@ -310,18 +310,16 @@ private void QuantificationAnalysis() if (errors.Any()) { - Warn("Error reading experimental design file: " + errors.First() + ". Falling back to default experimental design."); - spectraFileInfo = null; + Warn("Error reading experimental design file: " + errors.First() + ". Skipping quantification"); + return; } } - - if (Parameters.SearchParameters.Normalize && (spectraFileInfo == null || !File.Exists(assumedExperimentalDesignPath))) + else if (Parameters.SearchParameters.Normalize) { - Warn("Could not find or parse experimental design file at " + assumedExperimentalDesignPath + ", which is required for normalization. Skipping quantification"); + Warn("Could not find experimental design file at " + assumedExperimentalDesignPath + ", which is required for normalization. Skipping quantification"); return; } - - if (spectraFileInfo == null || !spectraFileInfo.Any()) + else { spectraFileInfo = new List(); @@ -623,9 +621,6 @@ private void QuantificationAnalysis() : 0); } proteinGroup.IntensitiesByFile = intensities; - - // Reset cached SampleGroupResults so it re-populates with the updated intensity data. - proteinGroup.SampleGroupResults = null; } } @@ -635,6 +630,16 @@ private void QuantificationAnalysis() SilacConversions.SilacConversionsPostQuantification(allSilacLabels, startLabel, endLabel, spectraFileInfo, ProteinGroups, Parameters.ListOfDigestionParams, Parameters.FlashLfqResults, Parameters.AllSpectralMatches.Cast().ToList(), Parameters.SearchParameters.ModsToWriteSelection, quantifyUnlabeledPeptides); } + + // Populate SampleGroupResults AFTER all quant-state mutation (including SILAC + // re-labeling) so every PG carries the same dynamic-column schema for the writer. + if (ProteinGroups != null) + { + foreach (var proteinGroup in ProteinGroups) + { + proteinGroup.PopulateSampleGroupResults(); + } + } } catch (Exception e) { @@ -1160,12 +1165,12 @@ private void WritePsmPlusMultiplexIons(IEnumerable psms, string f sb.Append(psm.ToString(Parameters.SearchParameters.ModsToWriteSelection, writePeptideLevelResults).Trim()); sb.Append('\t'); - if (psm.ReporterIonIntensities != null && psm.ReporterIonIntensities.Length > 0) + if (psm.IsobaricMassTagReporterIonIntensities != null && psm.IsobaricMassTagReporterIonIntensities.Length > 0) { - for (int i = 0; i < psm.ReporterIonIntensities.Length; i++) + for (int i = 0; i < psm.IsobaricMassTagReporterIonIntensities.Length; i++) { if (i > 0) sb.Append('\t'); - sb.Append(psm.ReporterIonIntensities[i].ToString("F1", CultureInfo.InvariantCulture)); + sb.Append(psm.IsobaricMassTagReporterIonIntensities[i].ToString("F1", CultureInfo.InvariantCulture)); } } else diff --git a/MetaMorpheus/Test/Multiplex_Labeling_TMT_iTRAQ.cs b/MetaMorpheus/Test/Multiplex_Labeling_TMT_iTRAQ.cs index 2c7d3e03a1..5db6ff1fdf 100644 --- a/MetaMorpheus/Test/Multiplex_Labeling_TMT_iTRAQ.cs +++ b/MetaMorpheus/Test/Multiplex_Labeling_TMT_iTRAQ.cs @@ -176,18 +176,18 @@ public static void TestTmtIonsArentTreatedLikePeptideIsotopicEnvelopes() // We're going to create a fake MsDataScan that contains all fragment ions for PEPTIDEK and all TMT reporter ions // The TMT reporter ions will have intensities that mimic the isotopic distribution predicted by the Averagine Model - + List allIonMzs = fragments.Select(m => m.NeutralMass.ToMz(1)).OrderBy(m => m).ToList(); - + double[] mzArray = allIonMzs.ToArray(); double[] intensityArray = new double[mzArray.Length]; // Corresponding to 126, 127N, 127C, 128N, 128C, 129N, 129C, 130N, 130C, 131N, 131C - var tmtIntensities = new double[] { 92, 92, 8, 12, 92, 92, 16, 20, 2, 3, 5 }; + var tmtIntensities = new double[] { 92, 92, 8, 12, 92, 92, 16, 20, 2, 3, 5 }; for (int i = 0; i < intensityArray.Length; i++) { - if(i < tmtIntensities.Length) + if (i < tmtIntensities.Length) { intensityArray[i] = tmtIntensities[i]; } @@ -199,9 +199,9 @@ public static void TestTmtIonsArentTreatedLikePeptideIsotopicEnvelopes() // Create a MS1 scan var ms1Spectrum = new MzSpectrum(new double[] { peptide.MonoisotopicMass.ToMz(2), (peptide.MonoisotopicMass + Constants.C13MinusC12).ToMz(2) }, new double[] { 10000, 5000 }, false); - var ms1Scan = new MsDataScan(ms1Spectrum, 1, 1, true, Polarity.Positive, 1.0, new MzRange(100, 2000), + var ms1Scan = new MsDataScan(ms1Spectrum, 1, 1, true, Polarity.Positive, 1.0, new MzRange(100, 2000), scanFilter: "", - MZAnalyzerType.Orbitrap, + MZAnalyzerType.Orbitrap, totalIonCurrent: 1000, null, noiseData: null, @@ -216,13 +216,13 @@ public static void TestTmtIonsArentTreatedLikePeptideIsotopicEnvelopes() var ms2Spectrum = new MzSpectrum(mzArray, intensityArray, false); var ms2Scan = new MsDataScan(ms2Spectrum, 2, 2, true, Polarity.Positive, 1.1, new MzRange(100, 2000), scanFilter: "", MZAnalyzerType.Orbitrap, - totalIonCurrent: 1000, null, null, "scan=2", - selectedIonMz: peptide.MonoisotopicMass.ToMz(2), + totalIonCurrent: 1000, null, null, "scan=2", + selectedIonMz: peptide.MonoisotopicMass.ToMz(2), selectedIonChargeStateGuess: 2, - selectedIonIntensity: 10000, - isolationMZ: peptide.MonoisotopicMass.ToMz(2), + selectedIonIntensity: 10000, + isolationMZ: peptide.MonoisotopicMass.ToMz(2), isolationWidth: 2.2, - DissociationType.HCD, 1, + DissociationType.HCD, 1, selectedIonMonoisotopicGuessMz: peptide.MonoisotopicMass.ToMz(2)); string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestTmtOutput"); @@ -277,9 +277,9 @@ public static void TestTmtQuantificationOutput() List<(string, MetaMorpheusTask)> taskList = new List<(string, MetaMorpheusTask)> { ("search", searchTask) }; string mzmlName = @"TMT_test\VA084TQ_6.mzML"; - string fastaName = @"TMT_test\mouseTMT.fasta"; + string fastaName = @"TMT_test\mouseTMT.fasta"; string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestTmtOutput"); - if(Directory.Exists(outputFolder)) + if (Directory.Exists(outputFolder)) Directory.Delete(outputFolder, true); var engine = new EverythingRunnerEngine(taskList, new List { mzmlName }, new List { new DbForTask(fastaName, false) }, outputFolder); engine.Run(); @@ -291,7 +291,7 @@ public static void TestTmtQuantificationOutput() string[] ionLabelsInHeader = header[^11..]; // Last 11 columns should be the TMT labels Assert.That(ionLabelsInHeader, Is.EquivalentTo(new string[] { "126", "127N", "127C", "128N", "128C", "129N", "129C", "130N", "130C", "131N", "131C" })); - + double channelSum127N = 0; for (int i = 1; i < peaksResults.Length; i++) { @@ -317,7 +317,7 @@ public static void TestMs3TmtQuantificationWith() try { - if(Directory.Exists(outputFolder)) + if (Directory.Exists(outputFolder)) Directory.Delete(outputFolder, true); var engine = new EverythingRunnerEngine(taskList, new List { mzmlName }, new List { new DbForTask(fastaName, false) }, outputFolder); @@ -421,7 +421,7 @@ public static void TestDiLeuQuantificationOutput() Assert.That(peaksResults.Length == 2); ionLabelsInHeader = peaksResults[0].Trim().Split('\t')[^4..]; - Assert.That(ionLabelsInHeader, Is.EquivalentTo(new string[] {"115", "116", "117", "118"})); + Assert.That(ionLabelsInHeader, Is.EquivalentTo(new string[] { "115", "116", "117", "118" })); ionSum = peaksResults[1].Trim().Split('\t')[^4..].Select(s => double.Parse(s)).Sum(); Assert.That(ionSum, Is.EqualTo(115537).Within(1)); @@ -615,7 +615,7 @@ public static void TestDoNotCountDiagnosticIonsInScore_HCD() Assert.That(psm.PeptideDescription, Is.EqualTo("full")); Assert.That(psm.ProteinAccession, Is.EqualTo("Q99LF4")); - Directory.Delete(outputFolder,true); + Directory.Delete(outputFolder, true); } [Test] public static void TestDoNotCountDiagnosticIonsInScore_LowCID() @@ -626,13 +626,13 @@ public static void TestDoNotCountDiagnosticIonsInScore_LowCID() //The below theoretical does not accurately represent B-Y ions double[] sorted_theoretical_product_masses_for_this_peptide = new double[] { precursorMass + (2 * Constants.ProtonMass) - 275.1350, precursorMass + (2 * Constants.ProtonMass) - 258.127, precursorMass + (2 * Constants.ProtonMass) - 257.1244, 50, 60, 70, 147.0764, precursorMass + (2 * Constants.ProtonMass) - 147.0764, precursorMass + (2 * Constants.ProtonMass) - 70, precursorMass + (2 * Constants.ProtonMass) - 60, precursorMass + (2 * Constants.ProtonMass) - 50, 257.1244, 258.127, 275.1350 }; //{ 50, 60, 70, 147.0764, 257.1244, 258.127, 275.1350 } List productsWithLocalizedMassDiff = new(); - + //add one diagnostic ion productsWithLocalizedMassDiff.Add(new Product(ProductType.D, FragmentationTerminus.Both, sorted_theoretical_product_masses_for_this_peptide[11], 1, 1, 0)); for (int i = 0; i < sorted_theoretical_product_masses_for_this_peptide.Length; i++) { - if(i != 11) + if (i != 11) { productsWithLocalizedMassDiff.Add(new Product(ProductType.b, FragmentationTerminus.Both, sorted_theoretical_product_masses_for_this_peptide[i], 1, 1, 0)); } @@ -974,7 +974,7 @@ public static void TestSearchTaskExceptionOnNullMassTag() { // This test simulates what happens in SearchTask when IsobaricMassTag.GetIsobaricMassTag returns null // The actual SearchTask code throws MetaMorpheusException in this case - + string invalidModId = "InvalidModification"; var massTag = IsobaricMassTag.GetIsobaricMassTag(invalidModId); @@ -984,5 +984,50 @@ public static void TestSearchTaskExceptionOnNullMassTag() Assert.Throws(() => throw new MetaMorpheusException("Could not find isobaric mass tag with the name " + invalidModId)); } } + + [Test] + public static void TestTmtProteinGroupsHaveNoQuantColumns() + { + // TMT runs return early from QuantificationAnalysis before FilesForQuantification + // is assigned, so protein groups must NOT emit Intensity_/IntensityOccupancy_/ + // SpectralCount_ columns. This guards against future regressions. + var searchTask = Toml.ReadFile( + Path.Combine(TestContext.CurrentContext.TestDirectory, @"TMT_test\TMT-Task1-SearchTaskconfig.toml"), + MetaMorpheusTask.tomlConfig); + // DoParsimony must be true to generate protein groups output file + searchTask.SearchParameters.DoParsimony = true; + + string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestTmtNoQuantColumns"); + var engine = new EverythingRunnerEngine( + new List<(string, MetaMorpheusTask)> { ("search", searchTask) }, + new List { Path.Combine(TestContext.CurrentContext.TestDirectory, @"TMT_test\VA084TQ_6.mzML") }, + new List { new DbForTask(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TMT_test\mouseTmt.fasta"), false) }, + outputFolder); + try + { + engine.Run(); + + var pgLines = File.ReadAllLines( + Path.Combine(outputFolder, "search", "AllProteinGroups.tsv")).ToList(); + Assert.That(pgLines.Count, Is.GreaterThan(1), "No protein groups written"); + + var header = pgLines[0].Split('\t').ToList(); + + // None of the per-file quant column families should appear for TMT + Assert.That(header.Any(h => h.StartsWith("Intensity_")), Is.False, "Unexpected Intensity_ column in TMT output"); + Assert.That(header.Any(h => h.StartsWith("SpectralCount_")), Is.False, "Unexpected SpectralCount_ column in TMT output"); + Assert.That(header.Any(h => h.StartsWith("IntensityOccupancy_")), Is.False, "Unexpected IntensityOccupancy_ column in TMT output"); + Assert.That(header.Any(h => h.StartsWith("CountOccupancy_")), Is.False, "Unexpected CountOccupancy_ column in TMT output"); + + // All rows must still have consistent column counts + Assert.That(pgLines.Select(l => l.Split('\t').Length).AllSame(), + Is.True, "Column count mismatch across protein group rows"); + } + finally + { + if (Directory.Exists(outputFolder)) + Directory.Delete(outputFolder, true); + } + } } -} \ No newline at end of file +} diff --git a/MetaMorpheus/Test/ProteinGroupTest.cs b/MetaMorpheus/Test/ProteinGroupTest.cs index cf8b3cafb5..2e079e9df1 100644 --- a/MetaMorpheus/Test/ProteinGroupTest.cs +++ b/MetaMorpheus/Test/ProteinGroupTest.cs @@ -232,10 +232,17 @@ public static void TestModificationInfoListInProteinGroupsOutput() } }; List<(string, MetaMorpheusTask)> taskList = new List<(string, MetaMorpheusTask)> { ("task1", task1), ("task2", task2) }; - string mzmlName = @"TestData\PrunedDbSpectra.mzml"; - string fastaName = @"TestData\DbForPrunedDb.fasta"; string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestPrunedGeneration"); + // Copy inputs into a clean per-test folder so QuantificationAnalysis does not pick up + // a stale ExperimentalDesign.tsv left in the shared TestData\ directory by other tests. + string inputFolder = Path.Combine(outputFolder, "inputs"); + Directory.CreateDirectory(inputFolder); + string mzmlName = Path.Combine(inputFolder, "PrunedDbSpectra.mzml"); + string fastaName = Path.Combine(inputFolder, "DbForPrunedDb.fasta"); + File.Copy(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\PrunedDbSpectra.mzml"), mzmlName, true); + File.Copy(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\DbForPrunedDb.fasta"), fastaName, true); + var engine = new EverythingRunnerEngine(taskList, new List { mzmlName }, new List { new DbForTask(fastaName, false) }, outputFolder); engine.Run(); string final = Path.Combine(outputFolder, "task2", "DbForPrunedDbGPTMDproteinPruned.xml"); @@ -275,6 +282,57 @@ public static void TestModificationInfoListInProteinGroupsOutput() Directory.Delete(outputFolder, true); } + + [Test] + public static void TestGetIdentifiedPeptidesOutputOnAllBranches() + { + // Arrange: one protein with two peptides that differ only by a mod + ModificationMotif.TryGetMotif("C", out ModificationMotif motif); + var mod = new Modification(_originalId: "Carbamidomethyl on C", _modificationType: "Common Fixed", + _target: motif, _locationRestriction: "Anywhere.", _monoisotopicMass: 57.02146); + + var oneBasedMods = new Dictionary> { { 2, new List { mod } } }; + var protein = new Protein("MCPEPTIDE", "prot1", oneBasedModifications: oneBasedMods); + + var modsOnPwsm1 = new Dictionary { { 2, mod } }; + var pwsm1 = new PeptideWithSetModifications(protein, new DigestionParams(), 1, 9, + CleavageSpecificity.Full, "", 0, modsOnPwsm1, 0); // FullSequence != BaseSequence + var pwsm2 = new PeptideWithSetModifications(protein, new DigestionParams(), 1, 9, + CleavageSpecificity.Full, "", 0, new Dictionary(), 0); + + var pg = new EngineLayer.ProteinGroup( + new HashSet { protein }, + new HashSet { pwsm1, pwsm2 }, + new HashSet { pwsm1, pwsm2 }); + + // Branch 1: labels == null, DisplayModsOnPeptides == false → BaseSequence + pg.DisplayModsOnPeptides = false; + pg.GetIdentifiedPeptidesOutput(null); + var tsv1 = pg.ToString(); + Assert.That(tsv1, Does.Contain(pwsm1.BaseSequence)); + Assert.That(tsv1.Split('\t')[4], Does.Not.Contain("[")); // unique-peptides column has no mod notation + + // Branch 2: labels == null, DisplayModsOnPeptides == true → FullSequence (includes mod) + pg.DisplayModsOnPeptides = true; + pg.GetIdentifiedPeptidesOutput(null); + var tsv2 = pg.ToString(); + Assert.That(tsv2, Does.Contain(pwsm1.FullSequence)); + + // SILAC branches: use an empty label list (labels != null) + var labels = new List(); + + // Branch 3: labels != null, DisplayModsOnPeptides == false → light BaseSequence + pg.DisplayModsOnPeptides = false; + Assert.DoesNotThrow(() => pg.GetIdentifiedPeptidesOutput(labels)); + var tsv3 = pg.ToString(); + Assert.That(tsv3.Split('\t')[4], Is.Not.Empty); // unique-peptides column populated + + // Branch 4: labels != null, DisplayModsOnPeptides == true → light FullSequence + pg.DisplayModsOnPeptides = true; + Assert.DoesNotThrow(() => pg.GetIdentifiedPeptidesOutput(labels)); + var tsv4 = pg.ToString(); + Assert.That(tsv4.Split('\t')[4], Is.Not.Empty); + } } } diff --git a/MetaMorpheus/Test/SearchTaskTest.cs b/MetaMorpheus/Test/SearchTaskTest.cs index 6c09a506c2..678f1d9ed8 100644 --- a/MetaMorpheus/Test/SearchTaskTest.cs +++ b/MetaMorpheus/Test/SearchTaskTest.cs @@ -264,31 +264,43 @@ public static void PostSearchNormalizeTest() string myDatabase = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\DbForPrunedDb.fasta"); string folderPath = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestNormalizationExperDesign"); string experimentalDesignFile = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\ExperimentalDesign.tsv"); - using (StreamWriter output = new StreamWriter(experimentalDesignFile)) - { - output.WriteLine("FileName\tCondition\tBiorep\tFraction\tTechrep"); - output.WriteLine("PrunedDbSpectra.mzml" + "\t" + "condition" + "\t" + "1" + "\t" + "1" + "\t" + "1"); - } DbForTask db = new DbForTask(myDatabase, false); - // run the task - Directory.CreateDirectory(folderPath); - searchTask.RunTask(folderPath, new List { db }, new List { myFile }, "normal"); + try + { + using (StreamWriter output = new StreamWriter(experimentalDesignFile)) + { + output.WriteLine("FileName\tCondition\tBiorep\tFraction\tTechrep"); + output.WriteLine("PrunedDbSpectra.mzml" + "\t" + "condition" + "\t" + "1" + "\t" + "1" + "\t" + "1"); + } - Directory.Delete(folderPath, true); + // run the task + Directory.CreateDirectory(folderPath); + searchTask.RunTask(folderPath, new List { db }, new List { myFile }, "normal"); - // delete the exper design and try again. this should skip quantification - File.Delete(experimentalDesignFile); + Directory.Delete(folderPath, true); - // run the task - Directory.CreateDirectory(folderPath); - searchTask.RunTask(folderPath, new List { db }, new List { myFile }, "normal"); + // delete the exper design and try again. this should skip quantification + File.Delete(experimentalDesignFile); - // PSMs should be present but no quant output - Assert.That(!File.Exists(Path.Combine(folderPath, "AllQuantifiedPeptides.tsv"))); - Assert.That(File.Exists(Path.Combine(folderPath, "AllPSMs.psmtsv"))); + // run the task + Directory.CreateDirectory(folderPath); + searchTask.RunTask(folderPath, new List { db }, new List { myFile }, "normal"); - Directory.Delete(folderPath, true); + // PSMs should be present but no quant output + Assert.That(!File.Exists(Path.Combine(folderPath, "AllQuantifiedPeptides.tsv"))); + Assert.That(File.Exists(Path.Combine(folderPath, "AllPSMs.psmtsv"))); + + Directory.Delete(folderPath, true); + } + finally + { + // Always remove the ExperimentalDesign.tsv we wrote into shared TestData\, even + // on assertion/exception, so subsequent tests in the same suite run don't pick + // up a stale file (which would silently break quantification for them). + if (File.Exists(experimentalDesignFile)) File.Delete(experimentalDesignFile); + if (Directory.Exists(folderPath)) Directory.Delete(folderPath, true); + } } /// diff --git a/MetaMorpheus/Test/SeqCoverageTest.cs b/MetaMorpheus/Test/SeqCoverageTest.cs index 9ec65d6874..c8790a71e0 100644 --- a/MetaMorpheus/Test/SeqCoverageTest.cs +++ b/MetaMorpheus/Test/SeqCoverageTest.cs @@ -163,4 +163,4 @@ public static void TestFragmentSequenceCoverage() Assert.That(firstSequenceCoverageDisplayList == "MmkMMK"); } } -} \ No newline at end of file +} From f6340890adb69edced7f6f540384123333257012 Mon Sep 17 00:00:00 2001 From: Peter Cruz Parrilla Date: Fri, 15 May 2026 15:57:44 -0500 Subject: [PATCH 25/30] more tests --- MetaMorpheus/Test/ProteinGroupTest.cs | 119 +++++++++++++++++++++++++- MetaMorpheus/Test/SearchTaskTest.cs | 99 ++++++++++++++++++++- 2 files changed, 216 insertions(+), 2 deletions(-) diff --git a/MetaMorpheus/Test/ProteinGroupTest.cs b/MetaMorpheus/Test/ProteinGroupTest.cs index 2e079e9df1..7b6dcf96a6 100644 --- a/MetaMorpheus/Test/ProteinGroupTest.cs +++ b/MetaMorpheus/Test/ProteinGroupTest.cs @@ -1,4 +1,5 @@ using EngineLayer; +using FlashLFQ; using NUnit.Framework; using Proteomics; using System.Collections.Generic; @@ -123,7 +124,123 @@ public static void TestProteinGroupStringAndHeaderHaveSameNumberOfTabs() string[] rowEntries = pgRow.Split("\t"); Assert.That(headerFields.Length, Is.EqualTo(rowEntries.Length)); Assert.That(Regex.Matches(pgHeader, @"\t").Count, Is.EqualTo(Regex.Matches(pgRow, @"\t").Count)); - } + } + + // No upstream quant setup -> no dynamic columns in header or row. + [Test] + public static void TestProteinGroupNoDynamicColumnsWhenSampleGroupResultsNotPopulated() + { + Protein prot1 = new Protein("MEDEEK", "prot1"); + PeptideWithSetModifications pwsm1 = new PeptideWithSetModifications(prot1, new DigestionParams(), 1, 3, CleavageSpecificity.Full, "", 0, new Dictionary(), 0); + ProteinGroup pg = new ProteinGroup(new HashSet { prot1 }, + new HashSet { pwsm1 }, new HashSet { pwsm1 }); + + Assert.That(pg.SampleGroupResults, Is.Null); + + string header = pg.GetTabSeparatedHeader(); + string row = pg.ToString(); + + Assert.That(header.Contains("SpectralCount_"), Is.False); + Assert.That(header.Contains("Intensity_"), Is.False); + Assert.That(header.Contains("CountOccupancy_"), Is.False); + Assert.That(header.Contains("IntensityOccupancy_"), Is.False); + Assert.That(header.Split('\t').Length, Is.EqualTo(row.Split('\t').Length)); + + // Header/row generation must not lazy-populate. + Assert.That(pg.SampleGroupResults, Is.Null); + } + + // FilesForQuantification + IntensitiesByFile + populate -> all 4 column families appear, + // one per sample group, with matching header/row tab counts. + [Test] + public static void TestProteinGroupDynamicColumnsWithIntensitiesPopulated() + { + Protein prot1 = new Protein("MEDEEK", "prot1"); + PeptideWithSetModifications pwsm1 = new PeptideWithSetModifications(prot1, new DigestionParams(), 1, 3, CleavageSpecificity.Full, "", 0, new Dictionary(), 0); + ProteinGroup pg = new ProteinGroup(new HashSet { prot1 }, + new HashSet { pwsm1 }, new HashSet { pwsm1 }); + + var fileA = new SpectraFileInfo(@"X:\fakeA.mzML", condition: "", biorep: 0, fraction: 0, techrep: 0); + var fileB = new SpectraFileInfo(@"X:\fakeB.mzML", condition: "", biorep: 1, fraction: 0, techrep: 0); + pg.FilesForQuantification = new List { fileA, fileB }; + pg.IntensitiesByFile = new Dictionary + { + { fileA, 100.0 }, + { fileB, 200.0 } + }; + + pg.PopulateSampleGroupResults(); + + string header = pg.GetTabSeparatedHeader(); + string row = pg.ToString(); + string[] headerFields = header.Split('\t'); + + Assert.That(headerFields.Length, Is.EqualTo(row.Split('\t').Length)); + Assert.That(headerFields.Count(h => h.StartsWith("SpectralCount_")), Is.EqualTo(2)); + Assert.That(headerFields.Count(h => h.StartsWith("Intensity_")), Is.EqualTo(2)); + Assert.That(headerFields.Count(h => h.StartsWith("CountOccupancy_")), Is.EqualTo(2)); + Assert.That(headerFields.Count(h => h.StartsWith("IntensityOccupancy_")), Is.EqualTo(2)); + } + + // FilesForQuantification set without IntensitiesByFile -> only count-based dynamic columns. + [Test] + public static void TestProteinGroupCountOnlyColumnsWhenNoIntensities() + { + Protein prot1 = new Protein("MEDEEK", "prot1"); + PeptideWithSetModifications pwsm1 = new PeptideWithSetModifications(prot1, new DigestionParams(), 1, 3, CleavageSpecificity.Full, "", 0, new Dictionary(), 0); + ProteinGroup pg = new ProteinGroup(new HashSet { prot1 }, + new HashSet { pwsm1 }, new HashSet { pwsm1 }); + + var fileA = new SpectraFileInfo(@"X:\fakeA.mzML", condition: "", biorep: 0, fraction: 0, techrep: 0); + pg.FilesForQuantification = new List { fileA }; + + pg.PopulateSampleGroupResults(); + + string header = pg.GetTabSeparatedHeader(); + string row = pg.ToString(); + + Assert.That(header.Contains("SpectralCount_"), Is.True); + Assert.That(header.Contains("CountOccupancy_"), Is.True); + Assert.That(header.Contains("Intensity_"), Is.False); + Assert.That(header.Contains("IntensityOccupancy_"), Is.False); + Assert.That(header.Split('\t').Length, Is.EqualTo(row.Split('\t').Length)); + } + + // Mutating FilesForQuantification/IntensitiesByFile (as SilacConversions does) invalidates + // SampleGroupResults; the post-mutation populate must reflect the new file list. + [Test] + public static void TestProteinGroupPopulateSampleGroupsReflectsPostSilacState() + { + Protein prot1 = new Protein("MEDEEK", "prot1"); + PeptideWithSetModifications pwsm1 = new PeptideWithSetModifications(prot1, new DigestionParams(), 1, 3, CleavageSpecificity.Full, "", 0, new Dictionary(), 0); + ProteinGroup pg = new ProteinGroup(new HashSet { prot1 }, + new HashSet { pwsm1 }, new HashSet { pwsm1 }); + + var light = new SpectraFileInfo(@"X:\sample_light.mzML", condition: "", biorep: 0, fraction: 0, techrep: 0); + pg.FilesForQuantification = new List { light }; + pg.IntensitiesByFile = new Dictionary { { light, 100.0 } }; + + var heavy = new SpectraFileInfo(@"X:\sample_heavy.mzML", condition: "", biorep: 1, fraction: 0, techrep: 0); + pg.FilesForQuantification = new List { light, heavy }; + pg.IntensitiesByFile = new Dictionary + { + { light, 100.0 }, + { heavy, 250.0 } + }; + Assert.That(pg.SampleGroupResults, Is.Null); + + pg.PopulateSampleGroupResults(); + + string header = pg.GetTabSeparatedHeader(); + string row = pg.ToString(); + string[] headerFields = header.Split('\t'); + + Assert.That(headerFields.Count(h => h.StartsWith("SpectralCount_")), Is.EqualTo(2)); + Assert.That(headerFields.Count(h => h.StartsWith("Intensity_")), Is.EqualTo(2)); + Assert.That(headerFields.Count(h => h.StartsWith("CountOccupancy_")), Is.EqualTo(2)); + Assert.That(headerFields.Count(h => h.StartsWith("IntensityOccupancy_")), Is.EqualTo(2)); + Assert.That(headerFields.Length, Is.EqualTo(row.Split('\t').Length)); + } [Test] public static void ProteinGroupMergeTest() diff --git a/MetaMorpheus/Test/SearchTaskTest.cs b/MetaMorpheus/Test/SearchTaskTest.cs index 678f1d9ed8..9e49ea5e81 100644 --- a/MetaMorpheus/Test/SearchTaskTest.cs +++ b/MetaMorpheus/Test/SearchTaskTest.cs @@ -303,6 +303,103 @@ public static void PostSearchNormalizeTest() } } + // Malformed exp design (Normalize=false) -> quant skipped, protein-groups TSV has no dynamic quant columns. + // Filename remains AllQuantifiedProteinGroups.tsv because it's driven by DoLabelFreeQuantification, not by quant success. + [Test] + public static void PostSearchMalformedExperimentalDesignSkipsQuant() + { + SearchTask searchTask = new SearchTask() + { + SearchParameters = new SearchParameters + { + Normalize = false, + DoParsimony = true + }, + CommonParameters = new(precursorDeconParams: new IsoDecDeconvolutionParameters()) + }; + + string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestMalformedExpDesign"); + string inputFolder = Path.Combine(outputFolder, "inputs"); + Directory.CreateDirectory(inputFolder); + string mzmlPath = Path.Combine(inputFolder, "PrunedDbSpectra.mzml"); + string fastaPath = Path.Combine(inputFolder, "DbForPrunedDb.fasta"); + string expDesignPath = Path.Combine(inputFolder, "ExperimentalDesign.tsv"); + File.Copy(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\PrunedDbSpectra.mzml"), mzmlPath, true); + File.Copy(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\DbForPrunedDb.fasta"), fastaPath, true); + + // Lists an unrelated file so ReadExperimentalDesign emits errors. + using (StreamWriter w = new StreamWriter(expDesignPath)) + { + w.WriteLine("FileName\tCondition\tBiorep\tFraction\tTechrep"); + w.WriteLine("UnrelatedFile.mzml\tcondition\t1\t1\t1"); + } + + try + { + searchTask.RunTask(outputFolder, new List { new DbForTask(fastaPath, false) }, new List { mzmlPath }, "normal"); + + Assert.That(File.Exists(Path.Combine(outputFolder, "AllQuantifiedPeptides.tsv")), Is.False); + + string pgPath = Path.Combine(outputFolder, "AllQuantifiedProteinGroups.tsv"); + Assert.That(File.Exists(pgPath), Is.True); + var lines = File.ReadAllLines(pgPath); + Assert.That(lines.Length, Is.GreaterThan(1)); + var header = lines[0]; + Assert.That(header.Contains("SpectralCount_"), Is.False); + Assert.That(header.Contains("Intensity_"), Is.False); + Assert.That(header.Contains("CountOccupancy_"), Is.False); + Assert.That(header.Contains("IntensityOccupancy_"), Is.False); + Assert.That(lines.Select(l => l.Split('\t').Length).AllSame(), Is.True); + } + finally + { + if (Directory.Exists(outputFolder)) Directory.Delete(outputFolder, true); + } + } + + // No exp design + Normalize=false -> defaults built, LFQ runs, dynamic columns appear. + // Complements PostSearchNormalizeTest (no exp design + Normalize=true -> skip). + [Test] + public static void PostSearchNoExpDesignNoNormalizeRunsQuant() + { + SearchTask searchTask = new SearchTask() + { + SearchParameters = new SearchParameters + { + Normalize = false, + DoParsimony = true + }, + CommonParameters = new(precursorDeconParams: new IsoDecDeconvolutionParameters()) + }; + + string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestNoExpDesignNoNormalize"); + string inputFolder = Path.Combine(outputFolder, "inputs"); + Directory.CreateDirectory(inputFolder); + string mzmlPath = Path.Combine(inputFolder, "PrunedDbSpectra.mzml"); + string fastaPath = Path.Combine(inputFolder, "DbForPrunedDb.fasta"); + File.Copy(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\PrunedDbSpectra.mzml"), mzmlPath, true); + File.Copy(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\DbForPrunedDb.fasta"), fastaPath, true); + + try + { + searchTask.RunTask(outputFolder, new List { new DbForTask(fastaPath, false) }, new List { mzmlPath }, "normal"); + + string pgPath = Path.Combine(outputFolder, "AllQuantifiedProteinGroups.tsv"); + Assert.That(File.Exists(pgPath), Is.True); + var lines = File.ReadAllLines(pgPath); + Assert.That(lines.Length, Is.GreaterThan(1)); + var header = lines[0]; + + Assert.That(header.Contains("SpectralCount_"), Is.True); + Assert.That(header.Contains("CountOccupancy_"), Is.True); + Assert.That(lines.Select(l => l.Split('\t').Length).AllSame(), Is.True); + } + finally + { + if (Directory.Exists(outputFolder)) Directory.Delete(outputFolder, true); + } + } + /// /// Test that we don't get a crash if protein groups are not constructed /// @@ -712,4 +809,4 @@ public static void TestSearchTaskResultsTextContents() Directory.Delete(folderPath, true); } } -} \ No newline at end of file +} From b12dd3543a3bec18ded7802f4824d5d0cae72b93 Mon Sep 17 00:00:00 2001 From: Peter Cruz Parrilla Date: Fri, 15 May 2026 16:29:56 -0500 Subject: [PATCH 26/30] Minor update to thread safe observable collection meant to facilitate iteration through collection without worrying about threading. --- .../Util/ThreadSafeObservableCollection.cs | 10 +- .../ThreadSafeObservableCollectionTests.cs | 182 ++++++++++++++++++ 2 files changed, 187 insertions(+), 5 deletions(-) create mode 100644 MetaMorpheus/Test/GuiTests/ThreadSafeObservableCollectionTests.cs diff --git a/MetaMorpheus/GuiFunctions/Util/ThreadSafeObservableCollection.cs b/MetaMorpheus/GuiFunctions/Util/ThreadSafeObservableCollection.cs index 97c6bda1b9..581a1ab331 100644 --- a/MetaMorpheus/GuiFunctions/Util/ThreadSafeObservableCollection.cs +++ b/MetaMorpheus/GuiFunctions/Util/ThreadSafeObservableCollection.cs @@ -51,10 +51,11 @@ protected override void ClearItems() } // ── Snapshot enumeration ───────────────────────────────────────────────── - // Explicit interface implementation ensures LINQ (which dispatches through - // IEnumerable) gets the snapshot enumerator, not the base class one. + // `new` hides Collection.GetEnumerator so plain foreach on the concrete type also gets + // the snapshot. Callers that statically type as the base ObservableCollection/Collection + // will still see the live enumerator and can hit InvalidOperationException under mutation. - IEnumerator IEnumerable.GetEnumerator() + public new IEnumerator GetEnumerator() { T[] snapshot; lock (_lock) @@ -62,8 +63,7 @@ IEnumerator IEnumerable.GetEnumerator() return ((IEnumerable)snapshot).GetEnumerator(); } - IEnumerator IEnumerable.GetEnumerator() => - ((IEnumerable)this).GetEnumerator(); + IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); // ── UI-thread marshalling ───────────────────────────────────────────────── diff --git a/MetaMorpheus/Test/GuiTests/ThreadSafeObservableCollectionTests.cs b/MetaMorpheus/Test/GuiTests/ThreadSafeObservableCollectionTests.cs new file mode 100644 index 0000000000..0c155fca01 --- /dev/null +++ b/MetaMorpheus/Test/GuiTests/ThreadSafeObservableCollectionTests.cs @@ -0,0 +1,182 @@ +using GuiFunctions.Util; +using NUnit.Framework; +using System.Collections.Generic; +using System.Collections.Specialized; +using System.Linq; +using System.Threading; +using System.Threading.Tasks; + +namespace Test.GuiTests; + +[TestFixture] +public class ThreadSafeObservableCollectionTests +{ + [Test] + public void DefaultConstructor_StartsEmpty() + { + var c = new ThreadSafeObservableCollection(); + Assert.That(c.Count, Is.EqualTo(0)); + } + + [Test] + public void Constructor_WithInitialCollection_CopiesItems() + { + var c = new ThreadSafeObservableCollection(new[] { 1, 2, 3 }); + Assert.That(c.Count, Is.EqualTo(3)); + Assert.That(c.ToList(), Is.EqualTo(new[] { 1, 2, 3 })); + } + + // Plain foreach on the concrete type uses the snapshot enumerator (via the `new` GetEnumerator), + // so mutation mid-iteration does not throw and is not observed by the in-flight enumeration. + [Test] + public void SnapshotEnumeration_MutationDuringEnumerationDoesNotThrow() + { + var c = new ThreadSafeObservableCollection(Enumerable.Range(0, 10)); + + var enumerated = new List(); + foreach (var item in c) + { + enumerated.Add(item); + if (item == 5) c.Add(999); + } + + Assert.That(enumerated, Is.EqualTo(Enumerable.Range(0, 10).ToList())); + Assert.That(c.Count, Is.EqualTo(11)); + } + + // Pins the snapshot guarantee at the GetEnumerator call site on the concrete type. + [Test] + public void GetEnumerator_OnConcreteType_ReturnsSnapshot() + { + var c = new ThreadSafeObservableCollection(new[] { 1, 2, 3 }); + var e = c.GetEnumerator(); + c.Add(4); + + var enumerated = new List(); + while (e.MoveNext()) enumerated.Add(e.Current); + + Assert.That(enumerated, Is.EqualTo(new[] { 1, 2, 3 })); + Assert.That(c.Count, Is.EqualTo(4)); + } + + // The explicit IEnumerable.GetEnumerator is what LINQ uses; verify a representative LINQ op + // returns the snapshot view rather than a live one. + [Test] + public void Linq_ToListReturnsSnapshot() + { + var c = new ThreadSafeObservableCollection(new[] { 1, 2, 3 }); + var snapshot = c.ToList(); + c.Add(4); + Assert.That(snapshot, Is.EqualTo(new[] { 1, 2, 3 })); + Assert.That(c.ToList(), Is.EqualTo(new[] { 1, 2, 3, 4 })); + } + + // Concurrent Adds must all be preserved (no lost writes from races on the base list). + [Test] + public void ConcurrentAdds_AllItemsPreserved() + { + const int threads = 8; + const int itemsPerThread = 500; + var c = new ThreadSafeObservableCollection(); + + Parallel.For(0, threads, t => + { + for (int i = 0; i < itemsPerThread; i++) + c.Add(t * itemsPerThread + i); + }); + + Assert.That(c.Count, Is.EqualTo(threads * itemsPerThread)); + // Every produced value should appear exactly once. + Assert.That(c.ToList().Distinct().Count(), Is.EqualTo(threads * itemsPerThread)); + } + + // Enumeration on one thread while another thread mutates must not throw. + [Test] + public void ConcurrentEnumerationDuringMutation_DoesNotThrow() + { + var c = new ThreadSafeObservableCollection(Enumerable.Range(0, 100)); + var cts = new CancellationTokenSource(); + + var mutator = Task.Run(() => + { + int i = 1000; + while (!cts.IsCancellationRequested) + { + c.Add(i++); + if (c.Count > 200) c.RemoveAt(0); + } + }); + + var enumerator = Task.Run(() => + { + for (int i = 0; i < 1000; i++) + _ = c.Sum(); + }); + + Assert.DoesNotThrow(() => enumerator.Wait()); + cts.Cancel(); + mutator.Wait(); + } + + [Test] + public void CollectionChanged_FiresOnAdd() + { + var c = new ThreadSafeObservableCollection(); + NotifyCollectionChangedAction? lastAction = null; + c.CollectionChanged += (_, e) => lastAction = e.Action; + + c.Add(1); + + Assert.That(lastAction, Is.EqualTo(NotifyCollectionChangedAction.Add)); + } + + [Test] + public void CollectionChanged_FiresOnRemove() + { + var c = new ThreadSafeObservableCollection(new[] { 1, 2, 3 }); + NotifyCollectionChangedAction? lastAction = null; + c.CollectionChanged += (_, e) => lastAction = e.Action; + + c.Remove(2); + + Assert.That(lastAction, Is.EqualTo(NotifyCollectionChangedAction.Remove)); + } + + [Test] + public void CollectionChanged_FiresOnClear() + { + var c = new ThreadSafeObservableCollection(new[] { 1, 2 }); + NotifyCollectionChangedAction? lastAction = null; + c.CollectionChanged += (_, e) => lastAction = e.Action; + + c.Clear(); + + Assert.That(lastAction, Is.EqualTo(NotifyCollectionChangedAction.Reset)); + } + + [Test] + public void CollectionChanged_FiresOnReplace() + { + var c = new ThreadSafeObservableCollection(new[] { 1, 2, 3 }); + NotifyCollectionChangedAction? lastAction = null; + c.CollectionChanged += (_, e) => lastAction = e.Action; + + c[1] = 99; + + Assert.That(lastAction, Is.EqualTo(NotifyCollectionChangedAction.Replace)); + Assert.That(c[1], Is.EqualTo(99)); + } + + [Test] + public void CollectionChanged_FiresOnMove() + { + var c = new ThreadSafeObservableCollection(new[] { 1, 2, 3 }); + NotifyCollectionChangedAction? lastAction = null; + c.CollectionChanged += (_, e) => lastAction = e.Action; + + c.Move(0, 2); + + Assert.That(lastAction, Is.EqualTo(NotifyCollectionChangedAction.Move)); + Assert.That(c.ToList(), Is.EqualTo(new[] { 2, 3, 1 })); + } +} From a505f620a0cd51df6d0d6f3e76e240c8daddd0c3 Mon Sep 17 00:00:00 2001 From: pcruzparri Date: Sun, 17 May 2026 01:01:00 -0500 Subject: [PATCH 27/30] reduce diff lines --- .../ProteinParsimony/ProteinGroup.cs | 76 ++++++++++++++----- .../Deconvolution/DeconHostViewModel.cs | 3 + .../SearchTask/PostSearchAnalysisTask.cs | 14 ++-- .../Test/Multiplex_Labeling_TMT_iTRAQ.cs | 38 +++++----- MetaMorpheus/Test/ProteinGroupTest.cs | 4 - 5 files changed, 87 insertions(+), 48 deletions(-) diff --git a/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs b/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs index 17c57dac57..813affa7df 100644 --- a/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs +++ b/MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs @@ -241,7 +241,7 @@ public override string ToString() { var sb = new StringBuilder(); - // protein accessions + // list of protein accession numbers sb.Append(ProteinGroupName); sb.Append("\t"); @@ -255,12 +255,12 @@ public override string ToString() ListOfProteinsOrderedByAccession.Select(p => p.Organism).Distinct()))); sb.Append("\t"); - // full names + // list of protein names sb.Append(GlobalVariables.CheckLengthOfOutput(string.Join("|", ListOfProteinsOrderedByAccession.Select(p => p.FullName).Distinct()))); sb.Append("\t"); - // masses + // list of masses var sequences = ListOfProteinsOrderedByAccession.Select(p => p.BaseSequence).Distinct(); List masses = new List(); foreach (var sequence in sequences) @@ -284,14 +284,15 @@ public override string ToString() sb.Append("" + Proteins.Count); sb.Append("\t"); - // unique peptides + // list of unique peptides if (UniquePeptidesOutput != null) { sb.Append(GlobalVariables.CheckLengthOfOutput(UniquePeptidesOutput)); } + sb.Append("\t"); - // shared peptides + // list of shared peptides if (SharedPeptidesOutput != null) { sb.Append(GlobalVariables.CheckLengthOfOutput(SharedPeptidesOutput)); @@ -312,14 +313,20 @@ public override string ToString() sb.Append("" + UniquePeptides.Select(p => p.FullSequence).Distinct().Count()); sb.Append("\t"); - // sequence coverage + // sequence coverage percent sb.Append(GlobalVariables.CheckLengthOfOutput(string.Join("|", SequenceCoverageFraction.Select(p => string.Format("{0:0.#####}", p))))); sb.Append("\t"); + + // sequence coverage sb.Append(GlobalVariables.CheckLengthOfOutput(string.Join("|", SequenceCoverageDisplayList))); sb.Append("\t"); + + // sequence coverage with mods sb.Append(GlobalVariables.CheckLengthOfOutput(string.Join("|", SequenceCoverageDisplayListWithMods))); sb.Append("\t"); + + // fragment sequence coverage sb.Append(GlobalVariables.CheckLengthOfOutput(string.Join("|", FragmentSequenceCoverageDisplayList))); sb.Append("\t"); @@ -356,11 +363,11 @@ public override string ToString() } } - // number of PSMs + // number of PSMs for listed peptides sb.Append("" + AllPsmsBelowOnePercentFDR.Count); sb.Append("\t"); - // decoy/contaminant/target + // isDecoy if (IsDecoy) sb.Append("D"); else if (IsContaminant) @@ -369,19 +376,27 @@ public override string ToString() sb.Append("T"); sb.Append("\t"); - // cumulative target/decoy + // cumulative target sb.Append(CumulativeTarget); sb.Append("\t"); + + // cumulative decoy sb.Append(CumulativeDecoy); sb.Append("\t"); - // q value, best peptide score, best peptide q value, best peptide PEP + // q value sb.Append(QValue); sb.Append("\t"); + + // best peptide score sb.Append(BestPeptideScore); sb.Append("\t"); + + // best peptide q value sb.Append(BestPeptideQValue); sb.Append("\t"); + + // best peptide PEP sb.Append(BestPeptidePEP); return sb.ToString(); @@ -410,6 +425,7 @@ public override string ToString() foreach (var psm in AllPsmsBelowOnePercentFDR.OfType()) { + // null BaseSequence means that the amino acid sequence is ambiguous; do not use these to calculate sequence coverage if (psm.BaseSequence != null) { psm.GetAminoAcidCoverage(); @@ -417,10 +433,12 @@ public override string ToString() foreach (var peptide in psm.BestMatchingBioPolymersWithSetMods .Select(p => p.SpecificBioPolymer).DistinctBy(pep => pep.FullSequence)) { + // might be unambiguous but also shared; make sure this protein group contains this peptide+protein combo if (Proteins.Contains(peptide.Parent)) { proteinsWithUnambigSeqPsms[peptide.Parent].Add(peptide); + // null FullSequence means that mods were not successfully localized; do not display them on the sequence coverage mods info if (peptide.FullSequence != null) { proteinsWithPsmsWithLocalizedMods[peptide.Parent].Add(peptide); @@ -430,45 +448,57 @@ public override string ToString() } } - // Fragment-level coverage + //Calculate sequence coverage at the amino acid level by looking at fragment specific coverage + //loop through proteins foreach (IBioPolymer protein in ListOfProteinsOrderedByAccession) { + //create a hash set for storing covered one-based residue numbers of protein HashSet coveredResiduesInProteinOneBased = new(); + //loop through PSMs foreach (SpectralMatch psm in AllPsmsBelowOnePercentFDR.OfType() .Where(psm => psm.BaseSequence != null)) { + //Calculate the covered bases within the psm. This is one based numbering for the peptide only psm.GetAminoAcidCoverage(); if (psm.FragmentCoveragePositionInPeptide == null) continue; + //loop through each peptide within the psm IEnumerable pwsms = psm.BestMatchingBioPolymersWithSetMods .Select(p => p.SpecificBioPolymer) .Where(p => p.Parent.Accession == protein.Accession); foreach (var pwsm in pwsms) { + //create a hashset to store the covered residues for the peptide, converted to the corresponding indices of the protein HashSet coveredResiduesInPeptide = new(); + //add the peptide start position within the protein to each covered index of the psm foreach (var position in psm.FragmentCoveragePositionInPeptide) { - coveredResiduesInPeptide.Add(position + pwsm.OneBasedStartResidue - 1); + coveredResiduesInPeptide.Add(position + pwsm.OneBasedStartResidue - 1); //subtract one because these are both one based } + + //Add the peptide specific positions, to the overall hashset for the protein coveredResiduesInProteinOneBased.UnionWith(coveredResiduesInPeptide); } } + // create upper/lowercase string char[] fragmentCoverageArray = protein.BaseSequence.ToLower().ToCharArray(); foreach (var residue in coveredResiduesInProteinOneBased) { fragmentCoverageArray[residue - 1] = char.ToUpper(fragmentCoverageArray[residue - 1]); } + FragmentSequenceCoverageDisplayList.Add(new string(fragmentCoverageArray)); } - // Peptide-level coverage + //Calculates the coverage at the peptide level... if a peptide is present all of the AAs in the peptide are covered foreach (var protein in ListOfProteinsOrderedByAccession) { HashSet coveredOneBasedResidues = new HashSet(); + // get residue numbers of each peptide in the protein and identify them as observed if the sequence is unambiguous foreach (var peptide in proteinsWithUnambigSeqPsms[protein]) { for (int i = peptide.OneBasedStartResidue; i <= peptide.OneBasedEndResidue; i++) @@ -477,19 +507,27 @@ public override string ToString() } } + // calculate sequence coverage percent double seqCoverageFract = (double)coveredOneBasedResidues.Count / protein.Length; + + // add the percent coverage SequenceCoverageFraction.Add(seqCoverageFract); + // convert the observed amino acids to upper case if they are unambiguously observed string sequenceCoverageDisplay = protein.BaseSequence.ToLower(); var coverageArray = sequenceCoverageDisplay.ToCharArray(); foreach (var obsResidueLocation in coveredOneBasedResidues) { coverageArray[obsResidueLocation - 1] = char.ToUpper(coverageArray[obsResidueLocation - 1]); } + sequenceCoverageDisplay = new string(coverageArray); + + // add the coverage display SequenceCoverageDisplayList.Add(sequenceCoverageDisplay); - // Mods in sequence coverage display + // put mods in the sequence coverage display + // get mods to display in sequence (only unambiguously identified mods) var modsOnThisProtein = new HashSet>(); foreach (var pep in proteinsWithPsmsWithLocalizedMods[protein]) { @@ -566,7 +604,7 @@ public ProteinGroup ConstructSubsetProteinGroup(string fullFilePath, List p.FullFilePathWithExtension == fullFilePath) .FirstOrDefault(); - + //check that file name wasn't changed (can occur in SILAC searches) if (!MzLibUtil.ClassExtensions.IsNullOrEmpty(silacLabels) && spectraFileInfo == null) { foreach (SilacLabel label in silacLabels) @@ -582,7 +620,7 @@ public ProteinGroup ConstructSubsetProteinGroup(string fullFilePath, List public bool Equals(ProteinGroup grp) { - if (grp == null) + //Check for null and compare run-time types. + if (grp == null) { return false; } - else if (!this.ListOfProteinsOrderedByAccession.Select(a => a.Accession).ToList() - .SequenceEqual(grp.ListOfProteinsOrderedByAccession.Select(a => a.Accession).ToList())) + else if (!this.ListOfProteinsOrderedByAccession.Select(a=>a.Accession).ToList().SequenceEqual(grp.ListOfProteinsOrderedByAccession.Select(a => a.Accession).ToList())) { return false; } diff --git a/MetaMorpheus/GuiFunctions/ViewModels/Deconvolution/DeconHostViewModel.cs b/MetaMorpheus/GuiFunctions/ViewModels/Deconvolution/DeconHostViewModel.cs index cd646e6684..496f6c9c97 100644 --- a/MetaMorpheus/GuiFunctions/ViewModels/Deconvolution/DeconHostViewModel.cs +++ b/MetaMorpheus/GuiFunctions/ViewModels/Deconvolution/DeconHostViewModel.cs @@ -113,6 +113,9 @@ public DeconHostViewModel(DeconvolutionParameters? initialPrecursorParameters = } break; + + default: // This will only be hit if a new deconvolution type is added to mzlib and not handled here + throw new ArgumentOutOfRangeException(); } } diff --git a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs index a5d9c9fdb6..a8ccd243ce 100644 --- a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs +++ b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs @@ -110,11 +110,11 @@ public MyTaskResults Run() SpectralLibraryGeneration(); } - if (Parameters.SearchParameters.UpdateSpectralLibrary) + if(Parameters.SearchParameters.UpdateSpectralLibrary) { UpdateSpectralLibrary(); } - + if (Parameters.SearchParameters.WriteDigestionProductCountFile) { WriteDigestionCountByProtein(); @@ -722,7 +722,7 @@ private void WritePsmResults() // write summary text if (psmsForPsmResults.FilteringNotPerformed) { - + Parameters.SearchTaskResults.AddPsmPeptideProteinSummaryText( $"PEP could not be calculated due to an insufficient number of {GlobalVariables.AnalyteType.GetSpectralMatchLabel()}s. Results were filtered by q-value." + Environment.NewLine); @@ -773,14 +773,16 @@ private void WriteIndividualPsmResults() // generated by analyzing one file by itself. Therefore, the FDR info should change between AllResults and FileSpecific string strippedFileName = Path.GetFileNameWithoutExtension(psmFileGroup.Key); var psmsForThisFile = psmFileGroup.ToList(); - CalculatePsmAndPeptideFdr(psmsForThisFile, "PSM", false); + CalculatePsmAndPeptideFdr(psmsForThisFile,"PSM", false); var psmsToWrite = FilteredPsms.Filter(psmsForThisFile, - CommonParameters, + CommonParameters, includeDecoys: Parameters.SearchParameters.WriteDecoys, includeContaminants: Parameters.SearchParameters.WriteContaminants, includeAmbiguous: true, includeHighQValuePsms: Parameters.SearchParameters.WriteHighQValuePsms); + int count = psmsToWrite.Where(psm => psm.PsmFdrInfo.PEP <= 0.01).Count(); + // write PSMs string writtenFile = Path.Combine(Parameters.IndividualResultsOutputFolder, strippedFileName + $"_{GlobalVariables.AnalyteType.GetSpectralMatchLabel()}s.{GlobalVariables.AnalyteType.GetSpectralMatchExtension()}"); WritePsmsToTsv(psmsToWrite, writtenFile); @@ -1632,7 +1634,7 @@ private string AllResultsTotals() } } - var keys = ResultsDictionary.Keys.Where(k=>k.Item1 != "All").OrderBy(k=>k.Item1).ToList(); + var keys = ResultsDictionary.Keys.Where(k => k.Item1 != "All").OrderBy(k=>k.Item1).ToList(); if (keys.Any()) { sb.AppendLine(); diff --git a/MetaMorpheus/Test/Multiplex_Labeling_TMT_iTRAQ.cs b/MetaMorpheus/Test/Multiplex_Labeling_TMT_iTRAQ.cs index 5db6ff1fdf..3aa8c7302c 100644 --- a/MetaMorpheus/Test/Multiplex_Labeling_TMT_iTRAQ.cs +++ b/MetaMorpheus/Test/Multiplex_Labeling_TMT_iTRAQ.cs @@ -176,18 +176,18 @@ public static void TestTmtIonsArentTreatedLikePeptideIsotopicEnvelopes() // We're going to create a fake MsDataScan that contains all fragment ions for PEPTIDEK and all TMT reporter ions // The TMT reporter ions will have intensities that mimic the isotopic distribution predicted by the Averagine Model - + List allIonMzs = fragments.Select(m => m.NeutralMass.ToMz(1)).OrderBy(m => m).ToList(); - + double[] mzArray = allIonMzs.ToArray(); double[] intensityArray = new double[mzArray.Length]; // Corresponding to 126, 127N, 127C, 128N, 128C, 129N, 129C, 130N, 130C, 131N, 131C - var tmtIntensities = new double[] { 92, 92, 8, 12, 92, 92, 16, 20, 2, 3, 5 }; + var tmtIntensities = new double[] { 92, 92, 8, 12, 92, 92, 16, 20, 2, 3, 5 }; for (int i = 0; i < intensityArray.Length; i++) { - if (i < tmtIntensities.Length) + if(i < tmtIntensities.Length) { intensityArray[i] = tmtIntensities[i]; } @@ -199,9 +199,9 @@ public static void TestTmtIonsArentTreatedLikePeptideIsotopicEnvelopes() // Create a MS1 scan var ms1Spectrum = new MzSpectrum(new double[] { peptide.MonoisotopicMass.ToMz(2), (peptide.MonoisotopicMass + Constants.C13MinusC12).ToMz(2) }, new double[] { 10000, 5000 }, false); - var ms1Scan = new MsDataScan(ms1Spectrum, 1, 1, true, Polarity.Positive, 1.0, new MzRange(100, 2000), + var ms1Scan = new MsDataScan(ms1Spectrum, 1, 1, true, Polarity.Positive, 1.0, new MzRange(100, 2000), scanFilter: "", - MZAnalyzerType.Orbitrap, + MZAnalyzerType.Orbitrap, totalIonCurrent: 1000, null, noiseData: null, @@ -216,13 +216,13 @@ public static void TestTmtIonsArentTreatedLikePeptideIsotopicEnvelopes() var ms2Spectrum = new MzSpectrum(mzArray, intensityArray, false); var ms2Scan = new MsDataScan(ms2Spectrum, 2, 2, true, Polarity.Positive, 1.1, new MzRange(100, 2000), scanFilter: "", MZAnalyzerType.Orbitrap, - totalIonCurrent: 1000, null, null, "scan=2", - selectedIonMz: peptide.MonoisotopicMass.ToMz(2), + totalIonCurrent: 1000, null, null, "scan=2", + selectedIonMz: peptide.MonoisotopicMass.ToMz(2), selectedIonChargeStateGuess: 2, - selectedIonIntensity: 10000, - isolationMZ: peptide.MonoisotopicMass.ToMz(2), + selectedIonIntensity: 10000, + isolationMZ: peptide.MonoisotopicMass.ToMz(2), isolationWidth: 2.2, - DissociationType.HCD, 1, + DissociationType.HCD, 1, selectedIonMonoisotopicGuessMz: peptide.MonoisotopicMass.ToMz(2)); string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestTmtOutput"); @@ -277,9 +277,9 @@ public static void TestTmtQuantificationOutput() List<(string, MetaMorpheusTask)> taskList = new List<(string, MetaMorpheusTask)> { ("search", searchTask) }; string mzmlName = @"TMT_test\VA084TQ_6.mzML"; - string fastaName = @"TMT_test\mouseTMT.fasta"; + string fastaName = @"TMT_test\mouseTMT.fasta"; string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestTmtOutput"); - if (Directory.Exists(outputFolder)) + if(Directory.Exists(outputFolder)) Directory.Delete(outputFolder, true); var engine = new EverythingRunnerEngine(taskList, new List { mzmlName }, new List { new DbForTask(fastaName, false) }, outputFolder); engine.Run(); @@ -291,7 +291,7 @@ public static void TestTmtQuantificationOutput() string[] ionLabelsInHeader = header[^11..]; // Last 11 columns should be the TMT labels Assert.That(ionLabelsInHeader, Is.EquivalentTo(new string[] { "126", "127N", "127C", "128N", "128C", "129N", "129C", "130N", "130C", "131N", "131C" })); - + double channelSum127N = 0; for (int i = 1; i < peaksResults.Length; i++) { @@ -317,7 +317,7 @@ public static void TestMs3TmtQuantificationWith() try { - if (Directory.Exists(outputFolder)) + if(Directory.Exists(outputFolder)) Directory.Delete(outputFolder, true); var engine = new EverythingRunnerEngine(taskList, new List { mzmlName }, new List { new DbForTask(fastaName, false) }, outputFolder); @@ -421,7 +421,7 @@ public static void TestDiLeuQuantificationOutput() Assert.That(peaksResults.Length == 2); ionLabelsInHeader = peaksResults[0].Trim().Split('\t')[^4..]; - Assert.That(ionLabelsInHeader, Is.EquivalentTo(new string[] { "115", "116", "117", "118" })); + Assert.That(ionLabelsInHeader, Is.EquivalentTo(new string[] {"115", "116", "117", "118"})); ionSum = peaksResults[1].Trim().Split('\t')[^4..].Select(s => double.Parse(s)).Sum(); Assert.That(ionSum, Is.EqualTo(115537).Within(1)); @@ -615,7 +615,7 @@ public static void TestDoNotCountDiagnosticIonsInScore_HCD() Assert.That(psm.PeptideDescription, Is.EqualTo("full")); Assert.That(psm.ProteinAccession, Is.EqualTo("Q99LF4")); - Directory.Delete(outputFolder, true); + Directory.Delete(outputFolder,true); } [Test] public static void TestDoNotCountDiagnosticIonsInScore_LowCID() @@ -632,7 +632,7 @@ public static void TestDoNotCountDiagnosticIonsInScore_LowCID() for (int i = 0; i < sorted_theoretical_product_masses_for_this_peptide.Length; i++) { - if (i != 11) + if(i != 11) { productsWithLocalizedMassDiff.Add(new Product(ProductType.b, FragmentationTerminus.Both, sorted_theoretical_product_masses_for_this_peptide[i], 1, 1, 0)); } @@ -974,7 +974,7 @@ public static void TestSearchTaskExceptionOnNullMassTag() { // This test simulates what happens in SearchTask when IsobaricMassTag.GetIsobaricMassTag returns null // The actual SearchTask code throws MetaMorpheusException in this case - + string invalidModId = "InvalidModification"; var massTag = IsobaricMassTag.GetIsobaricMassTag(invalidModId); diff --git a/MetaMorpheus/Test/ProteinGroupTest.cs b/MetaMorpheus/Test/ProteinGroupTest.cs index 7b6dcf96a6..7a852816bf 100644 --- a/MetaMorpheus/Test/ProteinGroupTest.cs +++ b/MetaMorpheus/Test/ProteinGroupTest.cs @@ -88,9 +88,6 @@ public static void ProteinGroupToStringTest() //string exectedProteinGroupToString = proteinGroup1.ToString(); string exectedProteinGroupToString = "prot1|prot2\t|\t\t\t779.30073507823|778.3167194953201\t2\t\t\t2\t2\t\t\t\t\t0\tT\t0\t0\t0\t0\t0\t0"; - var out1 = proteinGroup1.ToString().Split("\t"); - var out1h = proteinGroup1.GetTabSeparatedHeader().Split("\t"); - Assert.That(out1.Count(), Is.EqualTo(out1h.Count())); Assert.That(proteinGroup1.ToString(), Is.EqualTo(exectedProteinGroupToString)); @@ -100,7 +97,6 @@ public static void ProteinGroupToStringTest() new HashSet(), new HashSet()); string exectedProteinGroupWithDecoyToString = "prot1|prot2\t|\t\t\t779.30073507823|778.3167194953201\t2\t\t\t2\t2\t\t\t\t\t0\tT\t0\t0\t0\t0\t0\t0"; - var out2 = proteinGroup1.ToString(); Assert.That(proteinGroup1.ToString(), Is.EqualTo(exectedProteinGroupWithDecoyToString)); } From de8d5c430618538b534d5bce3b14238e1f7d8075 Mon Sep 17 00:00:00 2001 From: Peter Cruz Parri Date: Tue, 26 May 2026 10:55:06 -0500 Subject: [PATCH 28/30] Remove default throw in DeconHostViewModel switch Reverts the defensive default case that throws ArgumentOutOfRangeException for unhandled DeconvolutionType enum values (FromFile, Multiple). These values were added in mzLib 1.0.579 and are not yet supported by the GUI. Keeping the switch without a default allows unknown enum values to fall through silently, matching master behavior and fixing test failures in DeconHostViewModelTests and MetaDraw tests that transitively create a DeconHostViewModel. This is intentionally left as out-of-scope for this PR. --- .../ViewModels/Deconvolution/DeconHostViewModel.cs | 3 --- 1 file changed, 3 deletions(-) diff --git a/MetaMorpheus/GuiFunctions/ViewModels/Deconvolution/DeconHostViewModel.cs b/MetaMorpheus/GuiFunctions/ViewModels/Deconvolution/DeconHostViewModel.cs index 496f6c9c97..cd646e6684 100644 --- a/MetaMorpheus/GuiFunctions/ViewModels/Deconvolution/DeconHostViewModel.cs +++ b/MetaMorpheus/GuiFunctions/ViewModels/Deconvolution/DeconHostViewModel.cs @@ -113,9 +113,6 @@ public DeconHostViewModel(DeconvolutionParameters? initialPrecursorParameters = } break; - - default: // This will only be hit if a new deconvolution type is added to mzlib and not handled here - throw new ArgumentOutOfRangeException(); } } From 71ba649fd22d05c3a1b58310ac47c251ee5985c8 Mon Sep 17 00:00:00 2001 From: Peter Cruz Parri Date: Tue, 26 May 2026 12:28:57 -0500 Subject: [PATCH 29/30] Fix SILAC copy constructor dropping IsobaricMassTagReporterIonIntensities and PeptideFdrInfo The protected SpectralMatch copy constructor (used by PeptideSpectralMatch.Clone for SILAC light/heavy conversion) was missing copies of: - IsobaricMassTagReporterIonIntensities (TMT/iTRAQ/diLeu reporter ions) - PeptideFdrInfo (peptide-level FDR stats) Both are now copied from the source PSM to the clone. Adds TestSilacClonePreservesQuantAndFdrData regression test. --- MetaMorpheus/EngineLayer/SpectralMatch.cs | 2 ++ MetaMorpheus/Test/SilacTest.cs | 35 +++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/MetaMorpheus/EngineLayer/SpectralMatch.cs b/MetaMorpheus/EngineLayer/SpectralMatch.cs index bd0544f0e6..d132c8313a 100644 --- a/MetaMorpheus/EngineLayer/SpectralMatch.cs +++ b/MetaMorpheus/EngineLayer/SpectralMatch.cs @@ -427,6 +427,7 @@ protected SpectralMatch(SpectralMatch psm, List bestMat ScanMetadata = psm.ScanMetadata; ScanIndex = psm.ScanIndex; PrecursorScanDeconvolutionScore = psm.PrecursorScanDeconvolutionScore; + IsobaricMassTagReporterIonIntensities = psm.IsobaricMassTagReporterIonIntensities; ModsChemicalFormula = psm.ModsChemicalFormula; Notch = psm.Notch; @@ -442,6 +443,7 @@ protected SpectralMatch(SpectralMatch psm, List bestMat ModsIdentified = psm.ModsIdentified; LocalizedScores = psm.LocalizedScores; FdrInfo = psm.FdrInfo; + PeptideFdrInfo = psm.PeptideFdrInfo; Score = psm.Score; RunnerUpScore = psm.RunnerUpScore; IsDecoy = psm.IsDecoy; diff --git a/MetaMorpheus/Test/SilacTest.cs b/MetaMorpheus/Test/SilacTest.cs index 3679c5dd5e..080fa05651 100644 --- a/MetaMorpheus/Test/SilacTest.cs +++ b/MetaMorpheus/Test/SilacTest.cs @@ -677,5 +677,40 @@ public static void TestSilacHelperMethods() //Test no crash in weird situations SilacConversions.SilacConversionsPostQuantification(null, null, null, new List(), null, new HashSet(), null, new List(), new Dictionary(), true); } + + /// + /// Verifies that the SILAC clone constructor preserves IsobaricMassTagReporterIonIntensities + /// and PeptideFdrInfo, both of which were previously dropped during cloning. + /// Regression guard for the ptm_stoich branch fixes. + /// + [Test] + public static void TestSilacClonePreservesQuantAndFdrData() + { + var protein = new Protein("PEPTIDE", "ACCESSION"); + var pwsm = new PeptideWithSetModifications(protein, new DigestionParams(), 1, 7, CleavageSpecificity.Full, "", 0, new Dictionary(), 0); + var scan = new Ms2ScanWithSpecificMass( + new TestDataFile(pwsm, "quadratic").GetOneBasedScan(2), 100, 1, null, new CommonParameters()); + + var psm = new PeptideSpectralMatch(pwsm, 0, 10, 0, scan, new CommonParameters(), new List()); + psm.ResolveAllAmbiguities(); + + // Set fields that the clone constructor must preserve + var reporterIons = new double[] { 100.0, 200.0, 300.0 }; + typeof(PeptideSpectralMatch).BaseType.GetProperty("IsobaricMassTagReporterIonIntensities", System.Reflection.BindingFlags.Instance | System.Reflection.BindingFlags.Public) + .SetValue(psm, reporterIons); + psm.PeptideFdrInfo = new FdrInfo { QValue = 0.05, PEP = 0.1 }; + + // Clone (SILAC path) + var clone = psm.Clone(new List + { + new SpectralMatchHypothesis(0, pwsm, new List(), 10) + }); + + // Assertions + Assert.That(clone.IsobaricMassTagReporterIonIntensities, Is.EqualTo(reporterIons)); + Assert.That(clone.PeptideFdrInfo, Is.Not.Null); + Assert.That(clone.PeptideFdrInfo.QValue, Is.EqualTo(0.05)); + Assert.That(clone.PeptideFdrInfo.PEP, Is.EqualTo(0.1)); + } } } From e994d03e4cf7fefe40535494affa1a0c6b172437 Mon Sep 17 00:00:00 2001 From: Peter Cruz Parri Date: Thu, 28 May 2026 16:38:41 -0500 Subject: [PATCH 30/30] accidentally removed used modules --- MetaMorpheus/Test/SilacTest.cs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/MetaMorpheus/Test/SilacTest.cs b/MetaMorpheus/Test/SilacTest.cs index 080fa05651..23261d05f6 100644 --- a/MetaMorpheus/Test/SilacTest.cs +++ b/MetaMorpheus/Test/SilacTest.cs @@ -1,6 +1,12 @@ using EngineLayer; +using EngineLayer.FdrAnalysis; +using EngineLayer.SpectrumMatch; using MassSpectrometry; -using NUnit.Framework; +using NUnit.Framework; +using Omics; +using Omics.Digestion; +using Omics.Fragmentation; +using Omics.Modifications; using Proteomics; using Proteomics.AminoAcidPolymer; using Proteomics.ProteolyticDigestion; @@ -8,11 +14,8 @@ using System.Collections.Generic; using System.IO; using EngineLayer.DatabaseLoading; -using Omics.Modifications; using TaskLayer; using UsefulProteomicsDatabases; -using Omics; -using Omics.Digestion; namespace Test {