Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
3b1ddb8
New clean repo with ptm_stoch contents. The methods for occupancy cal…
pcruzparri Aug 22, 2025
2949457
Added TestMzLibUtils tests for quantified mods, peptides, and protein…
pcruzparri Aug 25, 2025
25bf8da
Added PG and Quant object setup tests. Need to finish these tests, th…
pcruzparri Aug 26, 2025
31c40cd
Finshed TestSetUpQuantificationObjects. Removed Peptides field (and i…
pcruzparri Aug 28, 2025
1cbfbaf
Refactored quantification util classes
pcruzparri Sep 1, 2025
6389de1
improving quantprot exception throw.
pcruzparri Oct 2, 2025
302edd7
Extended commenting. Added a peptide record class that stores the pep…
pcruzparri Oct 28, 2025
c1d304a
delayed test fixes....
pcruzparri Nov 3, 2025
10cb9c5
Merge branch 'master' into PTMStoichiometry
Alexander-Sol Nov 4, 2025
65ad247
Adding GeneName and Organism fields to QuantifiedProteinGroup. FIXED …
pcruzparri Nov 4, 2025
e41ca4d
Merge branch 'PTMStoichiometry' of https://github.com/pcruzparri/mzLi…
pcruzparri Nov 4, 2025
0fc4e4f
Merge branch 'master' into PTMStoichiometry
pcruzparri Nov 17, 2025
fd0d1d9
Apply suggestions from code review
pcruzparri Dec 17, 2025
18e4c5e
Apply suggestions from code review
pcruzparri Dec 17, 2025
ebc61cd
Apply suggestion from @Copilot
pcruzparri Dec 17, 2025
2ed9b84
Apply suggestion from @Copilot
pcruzparri Dec 17, 2025
a120689
Merge branch 'master' into PTMStoichiometry
pcruzparri Dec 17, 2025
8fc7a09
Merge branch 'master' into PTMStoichiometry
pcruzparri Mar 10, 2026
762348b
Merge branch 'master' into PTMStoichiometry
pcruzparri Mar 10, 2026
0e74812
Static occuancy methods and integration into Omics.BioPolymerGroup. N…
pcruzparri Mar 11, 2026
edda10f
cleaning docs and small bug risks
pcruzparri Mar 13, 2026
4dfd0c3
removing occupancy from sequence coverage. Adding sample group class …
pcruzparri Mar 16, 2026
ef7efb7
Fixing tests. Still need to add tests for new classes. Updated some p…
pcruzparri Mar 17, 2026
d984007
remove property setter calls to populatesamplegroupresults. method sh…
pcruzparri Mar 17, 2026
79ab239
coverage improvement and small bug fixes.
pcruzparri Mar 18, 2026
ae7d994
minor corrections from claude
pcruzparri Mar 19, 2026
af7abce
temp. added ScanMetadata
pcruzparri Mar 23, 2026
c91cd6f
temp save, but test run works
pcruzparri Mar 25, 2026
186a1ca
cleaning biopolymergroup
pcruzparri Mar 25, 2026
146ce63
make sure psms with multiple pwsm matches do not inflate the psm coun…
pcruzparri Mar 27, 2026
703223a
kept both (entrapment and grouptype) conflicting properties.
pcruzparri Mar 27, 2026
44f9007
restored accidentally deleted code.
pcruzparri Mar 28, 2026
e9f2ad9
Merge branch 'PTMStoichiometry' of https://github.com/pcruzparri/mzLi…
pcruzparri Mar 28, 2026
01d9677
preventativce maintainance
Mar 29, 2026
4eb81ef
unit tests to promote understanding
Mar 29, 2026
129f04b
Bug fix for inflated occupancies due to Full sequences from the same …
pcruzparri Mar 30, 2026
d066fff
Merge pull request #4 from trishorts/PTMStoichiometry
pcruzparri Mar 30, 2026
b0e8f04
fix counting and only report unambiguous mods.
pcruzparri Apr 5, 2026
720e181
Cleaning code and implementing some suggestions. Renamed BioPolymerGr…
pcruzparri Apr 6, 2026
54c3b9f
merging shortreed work + cleanup. important notes: 1) only unambiguou…
pcruzparri Apr 6, 2026
c8b4b14
revert nuspec
pcruzparri Apr 6, 2026
7b2984a
final fix. output seems correct.
pcruzparri Apr 8, 2026
ec53d72
final fix. output seems correct.
pcruzparri Apr 8, 2026
944c72b
Merge branch 'PTMStoichiometry' of https://github.com/pcruzparri/mzLi…
pcruzparri Apr 8, 2026
4d1380a
nuspec?
pcruzparri Apr 8, 2026
5930087
test file instead of folder referencing in dotnet.yaml to see if that…
pcruzparri Apr 9, 2026
5d47580
dotnet.yml ProjectFolder -> ProjectFolder/Project.csproj (#1044)
pcruzparri Apr 9, 2026
198b3b2
Merge branch 'master' into PTMStoichiometry
nbollis Apr 10, 2026
5959f57
Xml protein writer fix (#1041)
Alexander-Sol Apr 10, 2026
466fed1
Merge branch 'master' into PTMStoichiometry
trishorts Apr 15, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 12 additions & 12 deletions .github/workflows/dotnet.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,18 +65,18 @@ jobs:
- name: Change MetaMorpheus mzLib version and restore
run: |
cd ./MetaMorpheus/MetaMorpheus;
dotnet remove CMD package mzLib;
dotnet add CMD package mzLib -v 9.9.9;
dotnet remove GUI package mzLib;
dotnet add GUI package mzLib -v 9.9.9;
dotnet remove GuiFunctions package mzLib;
dotnet add GuiFunctions package mzLib -v 9.9.9;
dotnet remove EngineLayer package mzLib;
dotnet add EngineLayer package mzLib -v 9.9.9;
dotnet remove Test package mzLib;
dotnet add Test package mzLib -v 9.9.9;
dotnet remove TaskLayer package mzLib;
dotnet add TaskLayer package mzLib -v 9.9.9;
dotnet remove CMD/CMD.csproj package mzLib;
dotnet add CMD/CMD.csproj package mzLib -v 9.9.9;
dotnet remove GUI/GUI.csproj package mzLib;
dotnet add GUI/GUI.csproj package mzLib -v 9.9.9;
dotnet remove GuiFunctions/GuiFunctions.csproj package mzLib;
dotnet add GuiFunctions/GuiFunctions.csproj package mzLib -v 9.9.9;
dotnet remove EngineLayer/EngineLayer.csproj package mzLib;
dotnet add EngineLayer/EngineLayer.csproj package mzLib -v 9.9.9;
dotnet remove Test/Test.csproj package mzLib;
dotnet add Test/Test.csproj package mzLib -v 9.9.9;
dotnet remove TaskLayer/TaskLayer.csproj package mzLib;
dotnet add TaskLayer/TaskLayer.csproj package mzLib -v 9.9.9;
dotnet restore;
- name: Build MetaMorpheus
run: cd ./MetaMorpheus/MetaMorpheus && dotnet build --no-restore
Expand Down
12 changes: 7 additions & 5 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
## Ignore Visual Studio temporary files, build results, and
## files generated by popular Visual Studio add-ons.
# LLM/Agent specific files/.claude
/.opencode
/.opencode/config/coverage.json
/AGENTS.md
.claude/
.serena/
.pr_comments/ # Folder for agent-generate PR comments and suggested fixes

# User-specific files
*.suo
Expand Down Expand Up @@ -249,8 +256,3 @@ ModelManifest.xml

# Macintosh files
**/.DS_Store

/.claude
/.opencode
/.opencode/config/coverage.json
/AGENTS.md
7 changes: 7 additions & 0 deletions mzLib/MassSpectrometry/ExperimentalDesign/ISampleInfo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,12 @@ public interface ISampleInfo : IComparable<ISampleInfo>, IEquatable<ISampleInfo>
/// Fraction identifier for fractionated workflows. Returns 0 if not applicable.
/// </summary>
int Fraction { get; }

/// <summary>
/// File name without extension, derived from <see cref="FullFilePathWithExtension"/>.
/// Used for display labels in quantification output columns.
/// </summary>
string FilenameWithoutExtension =>
System.IO.Path.GetFileNameWithoutExtension(FullFilePathWithExtension);
}
}
52 changes: 52 additions & 0 deletions mzLib/MassSpectrometry/ScanMetadata.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
namespace MassSpectrometry;

/// <summary>
/// Lightweight, immutable snapshot of scan and precursor metadata extracted from an MS2 scan.
/// Designed to be shared across spectral matches (PSMs) from the same scan/precursor,
/// avoiding duplication of scalar metadata while allowing the heavyweight scan objects
/// (MsDataScan, MzSpectrum, IsotopicEnvelope[]) to be released from memory after scoring.
///
/// Scan-level properties (OneBasedScanNumber through NativeId) are identical for all
/// precursors deconvoluted from the same raw scan. Precursor-level properties
/// (PrecursorCharge through OneOverK0) are specific to a single deconvoluted precursor
/// and may differ across chimeric identifications from the same scan.
/// </summary>
/// <param name="OneBasedScanNumber">One-based scan number from the raw file.</param>
/// <param name="OneBasedPrecursorScanNumber">One-based scan number of the precursor (MS1) scan, if available.</param>
/// <param name="RetentionTime">Retention time in minutes.</param>
/// <param name="NumPeaks">Number of peaks in the MS2 spectrum at the time of extraction.</param>
/// <param name="TotalIonCurrent">Total ion current of the MS2 scan.</param>
/// <param name="NativeId">Vendor-native scan identifier string.</param>
/// <param name="FullFilePath">Absolute or relative path to the originating spectra file.</param>
/// <param name="PrecursorCharge">Charge state assigned to the deconvoluted precursor.</param>
/// <param name="PrecursorMonoisotopicPeakMz">Monoisotopic m/z of the deconvoluted precursor.</param>
/// <param name="PrecursorMass">Neutral monoisotopic mass of the precursor, derived from m/z and charge.</param>
/// <param name="PrecursorIntensity">MS1 intensity of the precursor ion.</param>
/// <param name="PrecursorEnvelopePeakCount">Number of peaks in the precursor isotopic envelope.</param>
/// <param name="PrecursorFractionalIntensity">Fraction of precursor intensity relative to envelope total. -1 if unavailable.</param>
/// <param name="OneOverK0">Inverse reduced ion mobility (1/K0) for TIMS data; null for non-IMS instruments.</param>
public record ScanMetadata(
// Scan-level properties
int OneBasedScanNumber,
int? OneBasedPrecursorScanNumber,
double RetentionTime,
int NumPeaks,
double TotalIonCurrent,
string NativeId,
string FullFilePath,

// Precursor-level properties
int PrecursorCharge,
double PrecursorMonoisotopicPeakMz,
double PrecursorMass,
double PrecursorIntensity,
int PrecursorEnvelopePeakCount,
double PrecursorFractionalIntensity,
double? OneOverK0 = null)
{
/// <summary>
/// Convenience property deriving the file name without extension from <see cref="FullFilePath"/>.
/// </summary>
public string FilenameWithoutExtension =>
System.IO.Path.GetFileNameWithoutExtension(FullFilePath);
}
28 changes: 24 additions & 4 deletions mzLib/MzLibUtil/ClassExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@
{
public static class ClassExtensions
{
public static readonly string ModificationPattern = @"-?\[(.+?)(?<!\[I+)\]";
public static readonly string ProteinSplitPattern = @";|\|";

private static readonly Regex CompiledModificationPattern = new(ModificationPattern, RegexOptions.Compiled);
private static readonly Regex CompiledProteinSplitPattern = new(ProteinSplitPattern, RegexOptions.Compiled);

/// <summary>
/// Applies a boxcar smoothing algorithm to the input data.
/// </summary>
Expand Down Expand Up @@ -257,12 +263,9 @@
// "(.+?)": captures the content of the mod, which can be anything except for a closing bracket
// "(?<!\[I+)": negative lookbehind to ensure that the closing bracket match does not correspond to a cation charge state (also defined with brackets).
// "\]": indicates the end of the mod
string pattern = @"-?\[(.+?)(?<!\[I+)\]";
Regex regex = new(pattern);

Dictionary<int, string> modDict = new();

MatchCollection matches = regex.Matches(fullSeq);
MatchCollection matches = CompiledModificationPattern.Matches(fullSeq);
int totalCaptureLength = 0;
foreach (Match match in matches)
{
Expand All @@ -283,6 +286,12 @@
return modDict;
}

public static string GetBaseSequenceFromFullSequence(this string fullSeq, string? modPattern=null, string? replacement=null)

Check warning on line 289 in mzLib/MzLibUtil/ClassExtensions.cs

View workflow job for this annotation

GitHub Actions / integration

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

Check warning on line 289 in mzLib/MzLibUtil/ClassExtensions.cs

View workflow job for this annotation

GitHub Actions / integration

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

Check warning on line 289 in mzLib/MzLibUtil/ClassExtensions.cs

View workflow job for this annotation

GitHub Actions / build

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

Check warning on line 289 in mzLib/MzLibUtil/ClassExtensions.cs

View workflow job for this annotation

GitHub Actions / build

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

Check warning on line 289 in mzLib/MzLibUtil/ClassExtensions.cs

View workflow job for this annotation

GitHub Actions / build

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

Check warning on line 289 in mzLib/MzLibUtil/ClassExtensions.cs

View workflow job for this annotation

GitHub Actions / build

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.
{
Regex regex = modPattern != null ? new Regex(modPattern) : CompiledModificationPattern;
return regex.Replace(fullSeq, replacement ?? string.Empty);
}

/// <summary>
/// Fixes an issue where the | appears and throws off the numbering if there are multiple mods on a single amino acid.
/// </summary>
Expand All @@ -296,5 +305,16 @@
Regex regexSpecialChar = new(specialCharacter);
fullSeq = regexSpecialChar.Replace(fullSeq, replacement);
}

/// <summary>
/// Splits a protein group name into individual accessions by <c>;</c> or <c>|</c> delimiters.
/// Expects a clean accession string (e.g., "P12345|Q67890"), not a full sequence with
/// modification annotations — the <c>|</c> character inside modification brackets would
/// cause incorrect splits.
/// </summary>
public static string[] SplitProteinAccessions(this string proteinGroupName)
{
return CompiledProteinSplitPattern.Split(proteinGroupName);
}
}
}
4 changes: 4 additions & 0 deletions mzLib/MzLibUtil/MzLibUtil.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,8 @@
<PackageReference Include="Microsoft.Win32.Registry" Version="5.0.0" />
</ItemGroup>

<ItemGroup>
<Folder Include="PositionFrequencyAnalysis\" />
</ItemGroup>

</Project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
using Easy.Common.Extensions;
using System.Collections.Generic;
using System;

namespace MzLibUtil.PositionFrequencyAnalysis
{
/// <summary>
/// Handles analysis and organization of protein group quantification from peptide records.
/// </summary>
public class PositionFrequencyAnalysis
{
/// <summary>
/// Dictionary mapping protein group names to their quantification data.
/// </summary>
public Dictionary<string, QuantifiedProteinGroup> ProteinGroups { get; private set; }

/// <summary>
/// Populates protein groups with their respective proteins and peptides from a list of quantifide peptide records.
/// The resulting protein groups are stored in the ProteinGroups property with the protein group name strings as keys.
/// </summary>
/// <param name="peptides">A list of <see cref="QuantifiedPeptideRecord"/>, which store a peptide's full sequence, mapped protein groups, and intensity.</param>
/// <param name="proteinSequences">An optional dictionary of protein sequences to use for mapping peptides to proteins.
/// If not provided, the protein sequences will be left null in the <see cref="QuantifiedProtein"/> objects. However, this parameter should not be null if
/// protein stoichiometry is the goal, since it is needed to align the peptides to the parent protein.</param>
public void SetUpQuantificationFromQuantifiedPeptideRecords(List<QuantifiedPeptideRecord> peptides, Dictionary<string, string> proteinSequences = null)
{
ArgumentNullException.ThrowIfNull(peptides);
ProteinGroups = new Dictionary<string, QuantifiedProteinGroup>();
foreach (var peptide in peptides)
{
// Iterate through the peptide's protein groups in case it is a shared peptide protein groups.
// We want to map the peptide separately to each protein group it belongs to, primarily due to
// each protein group is reported separately in MetaMorpheus.
foreach (var pg in peptide.ProteinGroups)
{
// If have not seen that protein group, store it
if (!ProteinGroups.ContainsKey(pg))
{
ProteinGroups[pg] = new QuantifiedProteinGroup(pg);
}
var proteinGroup = ProteinGroups[pg];

foreach (var proteinName in pg.SplitProteinAccessions())
{
// Add the protein to the protein group's dictionary if it has not been added
if (!proteinGroup.Proteins.ContainsKey(proteinName))
{
proteinGroup.Proteins[proteinName] = new QuantifiedProtein(proteinName);
if (proteinSequences != null && proteinSequences.TryGetValue(proteinName, out var sequence))
{
proteinGroup.Proteins[proteinName].Sequence = sequence;
}
}
var protein = proteinGroup.Proteins[proteinName];

// If the peptide's base sequence has not been seen, add it to the protein's dictionary; otherwise, update the existing entry
if (!protein.Peptides.TryGetValue(peptide.BaseSequence, out var quantifiedPeptide))
{
quantifiedPeptide = new QuantifiedPeptide(peptide.FullSequence, intensity: peptide.Intensity);
protein.Peptides[peptide.BaseSequence] = quantifiedPeptide;
}
else
{
// If the peptide's base sequence has been seen, add the new full sequence to the existing peptide
quantifiedPeptide.AddFullSequence(peptide.FullSequence, intensity: peptide.Intensity);
}
}
}
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
namespace MzLibUtil.PositionFrequencyAnalysis
{
/// <summary>
/// A class to store information about a quantified modification.
/// </summary>
public class QuantifiedModification
{
public string Name { get; set; }
public int PeptidePositionZeroIsNTerminus { get; set; }
public int ProteinPositionZeroIsNTerminus { get; set; }
public double Intensity { get; set; }

/// <summary>
/// Constructor for a QuantifiedModification object.
/// </summary>
/// <param name="name">Full name of the modification, in the format "MODTYPE: MODID on MOTIF" </param>
/// <param name="positionInPeptide">Zero-based position in the peptide.</param>
/// <param name="positionInProtein">Zero-based position in the peptide's parent protein.</param>
/// <param name="intensity"></param>
public QuantifiedModification(string name, int positionInPeptide, int? positionInProtein = null, double intensity = 0)
{
Name = name;
PeptidePositionZeroIsNTerminus = positionInPeptide;
ProteinPositionZeroIsNTerminus = positionInProtein ?? -1; // -1 means that the position in the protein is unknown
Intensity = intensity;
}
}
}
Loading
Loading