From aad126b45daaff557def5fdbac9c5b4ef951a2f1 Mon Sep 17 00:00:00 2001
From: Peter Cruz Parrilla <peter.cruzparrilla@gmail.com>
Date: Mon, 10 Mar 2025 15:08:31 -0500
Subject: [PATCH 1/7] Moved the ParseModifications function from SpectrumMatch
 to MzLibUtil. Changed the BioPolymerWithSetModsExtensions class to write full
 sequences separating the C-terminus with a dash. Updated some of the tests
 that failed because of the new notation of C-terminus mods. Some tests are
 still failing, and will be updated once happy with this general change.

---
 mzLib/MzLibUtil/ClassExtensions.cs            | 89 +++++++++++++++++++
 .../Omics/BioPolymerWithSetModsExtensions.cs  |  2 +-
 .../SpectrumMatch/SpectrumMatchFromTsv.cs     | 53 ++---------
 mzLib/Test/TestModifications.cs               |  4 +-
 mzLib/Test/TestMzLibUtil.cs                   | 72 +++++++++++++++
 mzLib/Test/TestProteinDigestion.cs            |  8 +-
 6 files changed, 174 insertions(+), 54 deletions(-)
diff --git a/mzLib/MzLibUtil/ClassExtensions.cs b/mzLib/MzLibUtil/ClassExtensions.cs
index e5b8ce7bc..cc08ed556 100644
--- a/mzLib/MzLibUtil/ClassExtensions.cs
+++ b/mzLib/MzLibUtil/ClassExtensions.cs
@@ -19,12 +19,101 @@
 using System;
 using System.Collections.Generic;
 using System.Linq;
+using System.Runtime.ConstrainedExecution;
 using System.Text.RegularExpressions;
 
 namespace MzLibUtil
 {
     public static class ClassExtensions
     {
+        /// <summary>
+        /// Parses the full sequence to identify mods.
+        /// </summary>
+        /// <param name="fullSequence"> Full sequence of the peptide in question.</param>
+        /// <param name="ignoreTerminusMod"> If true, terminal modifications will be ignored.</param>
+        /// <returns> Dictionary with the key being the amino acid position of the mod and the value being the string representing the mod</returns>
+        public static Dictionary<int, List<string>> ParseModifications(this string fullSequence, bool ignoreTerminusMod = false)
+        {
+            // use a regex to get modifications
+            string pattern = @"\[(.+?)\](?<!\[I+\])"; //The "look-behind" condition prevents matching ] for metal ion modifications
+            Regex regex = new(pattern);
+
+            // remove each match after adding to the dict. Otherwise, getting positions
+            // of the modifications will be rather difficult.
+            //int patternMatches = regex.Matches(fullSequence).Count;
+            Dictionary<int, List<string>> modDict = new();
+
+            string temp = fullSequence;
+            RemoveSpecialCharacters(ref temp);
+            string splitAtCTerminusPattern = @"(?<=[A-Z\]])-(?=\[)";
+            var splitAtCTerminus = Regex.Split(temp, splitAtCTerminusPattern);
+
+            // If the sequence is split at the C-terminus, we need to remove the special character  
+            var fullSeq = splitAtCTerminus[0];
+
+            MatchCollection matches = regex.Matches(fullSeq);
+            int captureLengthSum = 0;
+            int positionToAddToDict = 0;
+            foreach (Match match in matches)
+            {
+                GroupCollection group = match.Groups;
+                string val = group[1].Value;
+                int startIndex = group[0].Index;
+                int captureLength = group[0].Length;
+
+                List<string> modList = new List<string>();
+                modList.Add(val);
+
+                // The position of the amino acids is tracked by the positionToAddToDict variable. It takes the 
+                // startIndex of the modification Match and removes the cumulative length of the modifications
+                // found (including the brackets). The difference will be the number of nonmodification characters, 
+                // or the number of amino acids prior to the startIndex in the sequence. 
+                positionToAddToDict = startIndex - captureLengthSum;
+
+                if ((positionToAddToDict == 0) && ignoreTerminusMod)
+                {
+                    captureLengthSum += captureLength;  
+                    continue;
+                }
+
+                // check to see if key already exist
+                // if the already key exists, update the current position with the capture length + 1.
+                // otherwise, add the modification to the dict.
+                if (modDict.ContainsKey(positionToAddToDict))
+                {
+                    modDict[positionToAddToDict].Add(val);
+                }
+                else
+                {
+                    modDict.Add(positionToAddToDict, modList);
+                }
+                captureLengthSum += captureLength;
+            }
+
+            if (splitAtCTerminus.Length > 1 && !ignoreTerminusMod)
+            {
+                positionToAddToDict = regex.Replace(fullSeq, "").Length+1;
+                var cTerminusModMatches = regex.Matches(splitAtCTerminus[1]);
+
+                modDict.Add(positionToAddToDict, cTerminusModMatches.Select(x => x.Groups[1].Value).ToList());
+            }
+            return modDict;
+        }
+
+        /// <summary>
+        /// Fixes an issue where the | appears and throws off the numbering if there are multiple mods on a single amino acid.
+        /// </summary>
+        /// <param name="fullSequence"></param>
+        /// <param name="replacement"></param>
+        /// <param name="specialCharacter"></param>
+        /// <returns></returns>
+        public static void RemoveSpecialCharacters(ref string fullSequence, string replacement = @"", string specialCharacter = @"\|")
+        {
+            // next regex is used in the event that multiple modifications are on a missed cleavage Lysine (K)
+            Regex regexSpecialChar = new(specialCharacter);
+            fullSequence = regexSpecialChar.Replace(fullSequence, replacement);
+        }
+
         public static double[] BoxCarSmooth(this double[] data, int points)
         {
             // Force to be odd
diff --git a/mzLib/Omics/BioPolymerWithSetModsExtensions.cs b/mzLib/Omics/BioPolymerWithSetModsExtensions.cs
index 20d0e7abe..d09282f6f 100644
--- a/mzLib/Omics/BioPolymerWithSetModsExtensions.cs
+++ b/mzLib/Omics/BioPolymerWithSetModsExtensions.cs
@@ -138,7 +138,7 @@ public static string DetermineFullSequence(this IBioPolymerWithSetMods withSetMo
         // modification on peptide C-terminus
         if (withSetMods.AllModsOneIsNterminus.TryGetValue(withSetMods.Length + 2, out mod))
         {
-            subSequence.Append($"[{mod.ModificationType}:{mod.IdWithMotif}]");
+            subSequence.Append($"-[{mod.ModificationType}:{mod.IdWithMotif}]");
         }
 
         return subSequence.ToString();
diff --git a/mzLib/Omics/SpectrumMatch/SpectrumMatchFromTsv.cs b/mzLib/Omics/SpectrumMatch/SpectrumMatchFromTsv.cs
index a96be9e0c..a0b40abfa 100644
--- a/mzLib/Omics/SpectrumMatch/SpectrumMatchFromTsv.cs
+++ b/mzLib/Omics/SpectrumMatch/SpectrumMatchFromTsv.cs
@@ -4,6 +4,7 @@
 using System.Text.RegularExpressions;
 using Chemistry;
 using Omics.Fragmentation.Peptide;
+using MzLibUtil;
 
 namespace Omics.SpectrumMatch
 {
@@ -92,53 +93,13 @@ public static string RemoveParentheses(string baseSequence)
         }
 
         /// <summary>
-        /// Parses the full sequence to identify mods
+        /// Parses the full sequence to identify mods.
         /// </summary>
-        /// <param name="fullSequence"> Full sequence of the peptide in question</param>
+        /// <param name="fullSeq"> Full sequence of the peptide in question</param>
         /// <returns> Dictionary with the key being the amino acid position of the mod and the value being the string representing the mod</returns>
-        public static Dictionary<int, List<string>> ParseModifications(string fullSeq)
+        public static Dictionary<int, List<string>> ParseModifications(string fullSeq, bool ignoreTerminusMod = false)
         {
-            // use a regex to get all modifications
-            string pattern = @"\[(.+?)\]";
-            Regex regex = new(pattern);
-
-            // remove each match after adding to the dict. Otherwise, getting positions
-            // of the modifications will be rather difficult.
-            //int patternMatches = regex.Matches(fullSeq).Count;
-            Dictionary<int, List<string>> modDict = new();
-
-            RemoveSpecialCharacters(ref fullSeq);
-            MatchCollection matches = regex.Matches(fullSeq);
-            int currentPosition = 0;
-            foreach (Match match in matches)
-            {
-                GroupCollection group = match.Groups;
-                string val = group[1].Value;
-                int startIndex = group[0].Index;
-                int captureLength = group[0].Length;
-                int position = group["(.+?)"].Index;
-
-                List<string> modList = new List<string>();
-                modList.Add(val);
-                // check to see if key already exist
-                // if there is a missed cleavage, then there will be a label on K and a Label on X modification.
-                // And, it'll be like [label]|[label] which complicates the positional stuff a little bit.
-                // if the already key exists, update the current position with the capture length + 1.
-                // otherwise, add the modification to the dict.
-
-                // int to add is startIndex - current position
-                int positionToAddToDict = startIndex - currentPosition;
-                if (modDict.ContainsKey(positionToAddToDict))
-                {
-                    modDict[positionToAddToDict].Add(val);
-                }
-                else
-                {
-                    modDict.Add(positionToAddToDict, modList);
-                }
-                currentPosition += startIndex + captureLength;
-            }
-            return modDict;
+            return fullSeq.ParseModifications(ignoreTerminusMod);
         }
 
         /// <summary>
@@ -150,9 +111,7 @@ public static Dictionary<int, List<string>> ParseModifications(string fullSeq)
         /// <returns></returns>
         public static void RemoveSpecialCharacters(ref string fullSeq, string replacement = @"", string specialCharacter = @"\|")
         {
-            // next regex is used in the event that multiple modifications are on a missed cleavage Lysine (K)
-            Regex regexSpecialChar = new(specialCharacter);
-            fullSeq = regexSpecialChar.Replace(fullSeq, replacement);
+            MzLibUtil.ClassExtensions.RemoveSpecialCharacters(ref fullSeq, replacement, specialCharacter);
         }
 
 
diff --git a/mzLib/Test/TestModifications.cs b/mzLib/Test/TestModifications.cs
index b1a25f91c..77eec399d 100644
--- a/mzLib/Test/TestModifications.cs
+++ b/mzLib/Test/TestModifications.cs
@@ -743,7 +743,7 @@ public static void TestFragmentCTerminalModifiedPeptide()
 
             Protein protein = new Protein("PEPTIDE", "", oneBasedModifications: mods);
             PeptideWithSetModifications peptide = protein.Digest(new DigestionParams(), new List<Modification>(), new List<Modification>()).Where(p => p.AllModsOneIsNterminus.Count == 1).First();
-            Assert.That(peptide.FullSequence == "PEPTIDE[testModType:acetylation on E]");
+            Assert.That(peptide.FullSequence == "PEPTIDE-[testModType:acetylation on E]");
 
             var fragments = new List<Product>();
             peptide.Fragment(DissociationType.HCD, FragmentationTerminus.Both, fragments);
@@ -783,7 +783,7 @@ public static void TestUniprotCTerminalMod()
             Protein protein = new Protein("PEPTIDE", "", oneBasedModifications: mods);
             var peptide = protein.Digest(new DigestionParams(), new List<Modification>(), new List<Modification>() { variableMod }).Where(p => p.AllModsOneIsNterminus.Count == 1).First();
 
-            Assert.That(peptide.FullSequence == "PEPTIDE[UniProt:acetylation on E]");
+            Assert.That(peptide.FullSequence == "PEPTIDE-[UniProt:acetylation on E]");
         }
 
         [Test]
diff --git a/mzLib/Test/TestMzLibUtil.cs b/mzLib/Test/TestMzLibUtil.cs
index 73fbdda41..05919d210 100644
--- a/mzLib/Test/TestMzLibUtil.cs
+++ b/mzLib/Test/TestMzLibUtil.cs
@@ -33,7 +33,79 @@ public static void TestPeriodTolerantFilenameWithoutExtension(string filenameAnd
             Assert.AreEqual(expectedResult, result);
             Assert.AreEqual(expectedResult, extensionResult);
         }
+        [Test]
+        public static void TestParseModificationsSideChainModOnly()
+        {
+            string fullSeq = "DM[Common Variable:Oxidation on M]MELVQPSISGVDLDK";
+            var mods = fullSeq.ParseModifications(ignoreTerminusMod: false);
+            Assert.That(mods.Count == 1);
+            Assert.That(mods.ContainsKey(2));
+            Assert.That(mods[2].Count == 1);
+            Assert.That(mods[2].Contains("Common Variable:Oxidation on M"));
+        }
+
+        [Test]
+        public static void TestParseModificationsSideChainAndCTerminusMods()
+        {
+            string fullSeq = "DM[Common Variable:Oxidation on M]MELVQPSISGVDLDK-[Test Mod: ModName on K C-Terminus]";
+            var mods = fullSeq.ParseModifications(ignoreTerminusMod: false);
+            Assert.That(mods.Count == 2);
+            Assert.That(mods.ContainsKey(2));
+            Assert.That(mods.ContainsKey(18));
+            Assert.That(mods[2].Count == 1);
+            Assert.That(mods[18].Count == 1);
+            Assert.That(mods[2].Contains("Common Variable:Oxidation on M"));
+            Assert.That(mods[18].Contains("Test Mod: ModName on K C-Terminus"));
+        }
+
+        [Test]
+        public static void TestParseModificationsSideChainAndNTerminusMods()
+        {
+            // sequence with two terminal mods
+            string fullSeq = "[UniProt:N-acetylglutamate on E]EEEIAALVIDNGSGMC[Common Fixed:Carbamidomethyl on C]";
+            var mods = fullSeq.ParseModifications(ignoreTerminusMod: false);
+            Assert.That(mods.Count == 2);
+            Assert.That(mods.ContainsKey(0));
+            Assert.That(mods.ContainsKey(16));
+            Assert.That(mods[0].Count == 1);
+            Assert.That(mods[16].Count == 1);
+            Assert.That(mods[0].Contains("UniProt:N-acetylglutamate on E"));
+            Assert.That(mods[16].Contains("Common Fixed:Carbamidomethyl on C"));
+        }
 
+        [Test]
+        public static void TestParseModificationsTwoModsSamePosition()
+        {
+            // sequence with two mods on same terminus
+            string fullSeq = "[UniProt:N-acetylglutamate on E]|[Common Artifact:Water Loss on E]EEEIAALVID[Metal:Calcium on D]NGSGMC";
+            var mods = fullSeq.ParseModifications(ignoreTerminusMod: false);
+            Assert.That(mods.Count == 2);
+            Assert.That(mods.ContainsKey(0));
+            Assert.That(mods.ContainsKey(10));
+            Assert.That(mods[0].Count == 2);
+            Assert.That(mods[10].Count == 1);
+            Assert.That(mods[0].Contains("UniProt:N-acetylglutamate on E"));
+            Assert.That(mods[0].Contains("Common Artifact:Water Loss on E"));
+            Assert.That(mods[10].Contains("Metal:Calcium on D"));
+        }
+
+        [Test]
+        public static void TestParseModificationsIgnoreTerminusMod()
+        {
+            // sequence with mod on both termini and mod on first amino acid side chain
+            string fullSeq = "[UniProt:N-acetylglutamate on E]|[Common Artifact:Water Loss on E]E[Metal:Sodium[I] on E]EEIAALVID[Metal:Calcium[II] on D]NGSGMC[Common Fixed:Carbamidomethyl on C]";
+            var mods = fullSeq.ParseModifications(ignoreTerminusMod: true);
+            Assert.That(mods.Count == 3);
+            Assert.That(mods.ContainsKey(1));
+            Assert.That(mods.ContainsKey(10));
+            Assert.That(mods.ContainsKey(16));
+            Assert.That(mods[1].Count == 1);
+            Assert.That(mods[10].Count == 1);
+            Assert.That(mods[16].Count == 1);
+            Assert.That(mods[1].Contains("Metal:Sodium[I] on E"));
+            Assert.That(mods[10].Contains("Metal:Calcium[II] on D"));
+            Assert.That(mods[16].Contains("Common Fixed:Carbamidomethyl on C"));
+        }
         [Test]
         public static void TestToEnum()
         {
diff --git a/mzLib/Test/TestProteinDigestion.cs b/mzLib/Test/TestProteinDigestion.cs
index bd8b3f36b..085b7529f 100644
--- a/mzLib/Test/TestProteinDigestion.cs
+++ b/mzLib/Test/TestProteinDigestion.cs
@@ -246,7 +246,7 @@ public static void TestPeptideDigestion_FixedModifications_ProtModsOverwritePepM
 
             Assert.AreEqual(1, ok.Count);
 
-            Assert.AreEqual("[:ProtNmod on M]M[:resMod on M][:ProtCmod on M]", ok.First().FullSequence);
+            Assert.AreEqual("[:ProtNmod on M]M[:resMod on M]-[:ProtCmod on M]", ok.First().FullSequence);
 
             Assert.AreEqual("[H]M[H][H]", ok.First().SequenceWithChemicalFormulas);
             Assert.AreEqual(5 * GetElement("H").PrincipalIsotope.AtomicMass + Residue.ResidueMonoisotopicMass['M'] + GetElement("O").PrincipalIsotope.AtomicMass, ok.Last().MonoisotopicMass, 1e-9);
@@ -268,7 +268,7 @@ public static void TestPeptideDigestion_FixedModifications_ProtModsOverwritePepM
 
             // set expected values
             int expectedDigestionProducts = 1;
-            string expectedFullSequence = "[:ProtNmod on M]M[:resMod on M][:ProtCmod on M]";
+            string expectedFullSequence = "[:ProtNmod on M]M[:resMod on M]-[:ProtCmod on M]";
             string expectedSequenceWithChemicalFormulas = "[H]M[H][H]";
             double expectedMonoisotopicMass = 5 * GetElement("H").PrincipalIsotope.AtomicMass + Residue.ResidueMonoisotopicMass['M'] + GetElement("O").PrincipalIsotope.AtomicMass;
 
@@ -308,8 +308,8 @@ public static void TestPeptideDigestion_FixedModifications_ProtModsOverwritePepM
 
             Assert.AreEqual(2, ok.Count);
 
-            Assert.AreEqual("[:ProtNmod on M]M[:resMod on M]K[:PepCmod on K]", ok.First().FullSequence);
-            Assert.AreEqual("[:pepNmod on M]M[:resMod on M][:ProtCmod on M]", ok.Skip(1).First().FullSequence);
+            Assert.AreEqual("[:ProtNmod on M]M[:resMod on M]K-[:PepCmod on K]", ok.First().FullSequence);
+            Assert.AreEqual("[:pepNmod on M]M[:resMod on M]-[:ProtCmod on M]", ok.Skip(1).First().FullSequence);
 
             Assert.AreEqual("[H]M[H]K[H]", ok.First().SequenceWithChemicalFormulas);
             Assert.AreEqual("[H]M[H][H]", ok.Skip(1).First().SequenceWithChemicalFormulas);

From 6fd3eac501221557ec845bf72a5735d97db064d8 Mon Sep 17 00:00:00 2001
From: Peter Cruz Parrilla <peter.cruzparrilla@gmail.com>
Date: Tue, 11 Mar 2025 12:44:35 -0500
Subject: [PATCH 2/7] Cleaned up the ParseModification() method as well as
 updated it to not handle ambiguity(or multiple mods at the same position).
 Modified the corresponding tests or commented them out in case we want to
 revert.

---
 mzLib/MzLibUtil/ClassExtensions.cs            | 58 +++++----------
 .../SpectrumMatch/SpectrumMatchFromTsv.cs     |  2 +-
 mzLib/Test/FileReadingTests/TestPsmFromTsv.cs | 21 +++---
 mzLib/Test/TestMzLibUtil.cs                   | 70 +++++--------------
 4 files changed, 46 insertions(+), 105 deletions(-)

diff --git a/mzLib/MzLibUtil/ClassExtensions.cs b/mzLib/MzLibUtil/ClassExtensions.cs
index cc08ed556..7ab198a02 100644
--- a/mzLib/MzLibUtil/ClassExtensions.cs
+++ b/mzLib/MzLibUtil/ClassExtensions.cs
@@ -27,75 +27,55 @@ namespace MzLibUtil
     public static class ClassExtensions
     {
         /// <summary>
-        /// Parses the full sequence to identify mods.
+        /// Parses the full sequence to identify mods. Note: This method has been updated to NOT handle ambiguous mods on a given position (e.g. M[modA]|[modB]).
+        /// If ambiguity exists, generate a separate full sequence for each mod and parse each separately.
         /// </summary>
         /// <param name="fullSequence"> Full sequence of the peptide in question.</param>
         /// <param name="ignoreTerminusMod"> If true, terminal modifications will be ignored.</param>
         /// <returns> Dictionary with the key being the amino acid position of the mod and the value being the string representing the mod</returns>
-        public static Dictionary<int, List<string>> ParseModifications(this string fullSequence, bool ignoreTerminusMod = false)
+        public static Dictionary<int, string> ParseModifications(this string fullSequence, bool ignoreTerminusMod = false)
         {
             // use a regex to get modifications
-            string pattern = @"\[(.+?)\](?<!\[I+\])"; //The "look-behind" condition prevents matching ] for metal ion modifications
-            Regex regex = new(pattern);
+            string modPattern = @"-?\[(.+?)\](?<!\[I+\])"; //The "look-behind" condition prevents matching ] for metal ion modifications
+            Regex modRegex = new(modPattern);
 
-            // remove each match after adding to the dict. Otherwise, getting positions
-            // of the modifications will be rather difficult.
-            //int patternMatches = regex.Matches(fullSequence).Count;
-            Dictionary<int, List<string>> modDict = new();
+            // use a regex to find C-terminus modification
+            var cTerminusPattern = @"(?<=[A-Z\]])-(?=\[)";
+            Regex cTerminusRegex = new(cTerminusPattern);
 
-            string temp = fullSequence;
-            RemoveSpecialCharacters(ref temp);
-            string splitAtCTerminusPattern = @"(?<=[A-Z\]])-(?=\[)";
-            var splitAtCTerminus = Regex.Split(temp, splitAtCTerminusPattern);
+            var fullSeq = fullSequence;
+            Dictionary<int, string> modDict = new();
 
-            // If the sequence is split at the C-terminus, we need to remove the special character  
-            var fullSeq = splitAtCTerminus[0];
-
-            MatchCollection matches = regex.Matches(fullSeq);
+            MatchCollection matches = modRegex.Matches(fullSeq);
             int captureLengthSum = 0;
             int positionToAddToDict = 0;
             foreach (Match match in matches)
             {
                 GroupCollection group = match.Groups;
-                string val = group[1].Value;
+                string rawModString = group[0].Value;
+                string mod = group[1].Value;
                 int startIndex = group[0].Index;
                 int captureLength = group[0].Length;
 
-                List<string> modList = new List<string>();
-                modList.Add(val);
-
                 // The position of the amino acids is tracked by the positionToAddToDict variable. It takes the 
                 // startIndex of the modification Match and removes the cumulative length of the modifications
                 // found (including the brackets). The difference will be the number of nonmodification characters, 
                 // or the number of amino acids prior to the startIndex in the sequence. 
                 positionToAddToDict = startIndex - captureLengthSum;
 
-                if ((positionToAddToDict == 0) && ignoreTerminusMod)
+                if (((positionToAddToDict == 0) || rawModString.StartsWith("-")) && ignoreTerminusMod) // ignore terminal mods
                 {
-                    captureLengthSum += captureLength;  
+                    captureLengthSum += captureLength;
                     continue;
                 }
 
-                // check to see if key already exist
-                // if the already key exists, update the current position with the capture length + 1.
-                // otherwise, add the modification to the dict.
-                if (modDict.ContainsKey(positionToAddToDict))
-                {
-                    modDict[positionToAddToDict].Add(val);
-                }
-                else
+                if (rawModString.StartsWith("-"))
                 {
-                    modDict.Add(positionToAddToDict, modList);
+                    positionToAddToDict++;
                 }
-                captureLengthSum += captureLength;
-            }
 
-            if (splitAtCTerminus.Length > 1 && !ignoreTerminusMod)
-            {
-                positionToAddToDict = regex.Replace(fullSeq, "").Length+1;
-                var cTerminusModMatches = regex.Matches(splitAtCTerminus[1]);
-
-                modDict.Add(positionToAddToDict, cTerminusModMatches.Select(x => x.Groups[1].Value).ToList());
+                modDict.Add(positionToAddToDict, mod);
+                captureLengthSum += captureLength;
             }
             return modDict;
         }
diff --git a/mzLib/Omics/SpectrumMatch/SpectrumMatchFromTsv.cs b/mzLib/Omics/SpectrumMatch/SpectrumMatchFromTsv.cs
index a0b40abfa..0f5e19af2 100644
--- a/mzLib/Omics/SpectrumMatch/SpectrumMatchFromTsv.cs
+++ b/mzLib/Omics/SpectrumMatch/SpectrumMatchFromTsv.cs
@@ -97,7 +97,7 @@ public static string RemoveParentheses(string baseSequence)
         /// </summary>
         /// <param name="fullSeq"> Full sequence of the peptide in question</param>
         /// <returns> Dictionary with the key being the amino acid position of the mod and the value being the string representing the mod</returns>
-        public static Dictionary<int, List<string>> ParseModifications(string fullSeq, bool ignoreTerminusMod = false)
+        public static Dictionary<int, string> ParseModifications(string fullSeq, bool ignoreTerminusMod = false)
         {
             return fullSeq.ParseModifications(ignoreTerminusMod);
         }
diff --git a/mzLib/Test/FileReadingTests/TestPsmFromTsv.cs b/mzLib/Test/FileReadingTests/TestPsmFromTsv.cs
index 2018158b1..c00df6018 100644
--- a/mzLib/Test/FileReadingTests/TestPsmFromTsv.cs
+++ b/mzLib/Test/FileReadingTests/TestPsmFromTsv.cs
@@ -180,20 +180,17 @@ public static void TestParseModification()
             modDict = Omics.SpectrumMatch.SpectrumMatchFromTsv.ParseModifications(twoMods.FullSequence);
             Assert.That(modDict.Count == 2);
             Assert.That(modDict.ContainsKey(0) && modDict.ContainsKey(104));
-            Assert.That(modDict[0].Count == 1);
-            Assert.That(modDict[0].Contains("UniProt:N-acetylserine on S"));
-            Assert.That(modDict[104].Count == 1);
-            Assert.That(modDict[104].Contains("UniProt:N5-methylglutamine on Q"));
-
+            Assert.That(modDict[0] == "UniProt:N-acetylserine on S");
+            Assert.That(modDict[104] == "UniProt:N5-methylglutamine on Q");
 
+            // Test below commented out because method input updated to not handle two mods on the same position. 
             // psm with two mods on the same amino acid
-            string fullSeq = "[Common Fixed:Carbamidomethyl on C]|[UniProt:N-acetylserine on S]KPRKIEEIKDFLLTARRKDAKSVKIKKNKDNVKFK";
-            modDict = Omics.SpectrumMatch.SpectrumMatchFromTsv.ParseModifications(fullSeq);
-            Assert.That(modDict.Count == 1);
-            Assert.That(modDict.ContainsKey(0));
-            Assert.That(modDict[0].Count == 2);
-            Assert.That(modDict[0].Contains("Common Fixed:Carbamidomethyl on C"));
-            Assert.That(modDict[0].Contains("UniProt:N-acetylserine on S"));
+            //string fullSeq = "[Common Fixed:Carbamidomethyl on C]|[UniProt:N-acetylserine on S]KPRKIEEIKDFLLTARRKDAKSVKIKKNKDNVKFK";
+            //modDict = Omics.SpectrumMatch.SpectrumMatchFromTsv.ParseModifications(fullSeq);
+            //Assert.That(modDict.Count == 1);
+            //Assert.That(modDict.ContainsKey(0));
+            //Assert.That(modDict[0] == "Common Fixed:Carbamidomethyl on C");
+            //Assert.That(modDict[0] == "UniProt:N-acetylserine on S");
         }
 
         [Test]
diff --git a/mzLib/Test/TestMzLibUtil.cs b/mzLib/Test/TestMzLibUtil.cs
index 05919d210..ccda2810a 100644
--- a/mzLib/Test/TestMzLibUtil.cs
+++ b/mzLib/Test/TestMzLibUtil.cs
@@ -40,71 +40,35 @@ public static void TestParseModificationsSideChainModOnly()
             var mods = fullSeq.ParseModifications(ignoreTerminusMod: false);
             Assert.That(mods.Count == 1);
             Assert.That(mods.ContainsKey(2));
-            Assert.That(mods[2].Count == 1);
-            Assert.That(mods[2].Contains("Common Variable:Oxidation on M"));
+            Assert.That(mods[2] == ("Common Variable:Oxidation on M"));
         }
 
         [Test]
-        public static void TestParseModificationsSideChainAndCTerminusMods()
+        public static void TestParseModificationsSideChainAndTerminusMods()
         {
-            string fullSeq = "DM[Common Variable:Oxidation on M]MELVQPSISGVDLDK-[Test Mod: ModName on K C-Terminus]";
+            string fullSeq = "[UniProt:N-acetylglutamate on E]EDM[Common Variable:Oxidation on M]MELVQPSISGVDLDK[Test Mod2: ModName2 on K]-[Test Mod: ModName on K C-Terminus]";
             var mods = fullSeq.ParseModifications(ignoreTerminusMod: false);
-            Assert.That(mods.Count == 2);
-            Assert.That(mods.ContainsKey(2));
-            Assert.That(mods.ContainsKey(18));
-            Assert.That(mods[2].Count == 1);
-            Assert.That(mods[18].Count == 1);
-            Assert.That(mods[2].Contains("Common Variable:Oxidation on M"));
-            Assert.That(mods[18].Contains("Test Mod: ModName on K C-Terminus"));
-        }
-
-        [Test]
-        public static void TestParseModificationsSideChainAndNTerminusMods()
-        {
-            // sequence with two terminal mods
-            string fullSeq = "[UniProt:N-acetylglutamate on E]EEEIAALVIDNGSGMC[Common Fixed:Carbamidomethyl on C]";
-            var mods = fullSeq.ParseModifications(ignoreTerminusMod: false);
-            Assert.That(mods.Count == 2);
+            Assert.That(mods.Count == 4);
             Assert.That(mods.ContainsKey(0));
-            Assert.That(mods.ContainsKey(16));
-            Assert.That(mods[0].Count == 1);
-            Assert.That(mods[16].Count == 1);
-            Assert.That(mods[0].Contains("UniProt:N-acetylglutamate on E"));
-            Assert.That(mods[16].Contains("Common Fixed:Carbamidomethyl on C"));
-        }
-
-        [Test]
-        public static void TestParseModificationsTwoModsSamePosition()
-        {
-            // sequence with two mods on same terminus
-            string fullSeq = "[UniProt:N-acetylglutamate on E]|[Common Artifact:Water Loss on E]EEEIAALVID[Metal:Calcium on D]NGSGMC";
-            var mods = fullSeq.ParseModifications(ignoreTerminusMod: false);
-            Assert.That(mods.Count == 2);
-            Assert.That(mods.ContainsKey(0));
-            Assert.That(mods.ContainsKey(10));
-            Assert.That(mods[0].Count == 2);
-            Assert.That(mods[10].Count == 1);
-            Assert.That(mods[0].Contains("UniProt:N-acetylglutamate on E"));
-            Assert.That(mods[0].Contains("Common Artifact:Water Loss on E"));
-            Assert.That(mods[10].Contains("Metal:Calcium on D"));
+            Assert.That(mods.ContainsKey(3));
+            Assert.That(mods.ContainsKey(18));
+            Assert.That(mods.ContainsKey(19));
+            Assert.That(mods[0] == "UniProt:N-acetylglutamate on E");
+            Assert.That(mods[3] == "Common Variable:Oxidation on M");
+            Assert.That(mods[18] == "Test Mod2: ModName2 on K");
+            Assert.That(mods[19] == "Test Mod: ModName on K C-Terminus");
         }
 
         [Test]
         public static void TestParseModificationsIgnoreTerminusMod()
         {
-            // sequence with mod on both termini and mod on first amino acid side chain
-            string fullSeq = "[UniProt:N-acetylglutamate on E]|[Common Artifact:Water Loss on E]E[Metal:Sodium[I] on E]EEIAALVID[Metal:Calcium[II] on D]NGSGMC[Common Fixed:Carbamidomethyl on C]";
+            string fullSeq = "[UniProt:N-acetylglutamate on E]EDM[Common Variable:Oxidation on M]MELVQPSISGVDLDK[Test Mod2: ModName2 on K]-[Test Mod: ModName on K C-Terminus]";
             var mods = fullSeq.ParseModifications(ignoreTerminusMod: true);
-            Assert.That(mods.Count == 3);
-            Assert.That(mods.ContainsKey(1));
-            Assert.That(mods.ContainsKey(10));
-            Assert.That(mods.ContainsKey(16));
-            Assert.That(mods[1].Count == 1);
-            Assert.That(mods[10].Count == 1);
-            Assert.That(mods[16].Count == 1);
-            Assert.That(mods[1].Contains("Metal:Sodium[I] on E"));
-            Assert.That(mods[10].Contains("Metal:Calcium[II] on D"));
-            Assert.That(mods[16].Contains("Common Fixed:Carbamidomethyl on C"));
+            Assert.That(mods.Count == 2);
+            Assert.That(mods.ContainsKey(3));
+            Assert.That(mods.ContainsKey(18));
+            Assert.That(mods[3] == "Common Variable:Oxidation on M");
+            Assert.That(mods[18] == "Test Mod2: ModName2 on K");
         }
         [Test]
         public static void TestToEnum()

From 1d82ffd1283176a25b49493fb004b155cfea6019 Mon Sep 17 00:00:00 2001
From: Peter Cruz Parrilla <peter.cruzparrilla@gmail.com>
Date: Mon, 17 Mar 2025 13:40:49 -0500
Subject: [PATCH 3/7] updated the remaining tests that were failing.

---
 mzLib/Test/DatabaseTests/fullSequences.txt  |  4 +--
 mzLib/Test/Transcriptomics/TestDigestion.cs | 40 ++++++++++-----------
 2 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/mzLib/Test/DatabaseTests/fullSequences.txt b/mzLib/Test/DatabaseTests/fullSequences.txt
index 84851f13f..b5a08dc3c 100644
--- a/mzLib/Test/DatabaseTests/fullSequences.txt
+++ b/mzLib/Test/DatabaseTests/fullSequences.txt
@@ -128,7 +128,7 @@ V[UniProt:N-methylvaline on V]V[UniProt:N-methylvaline on V]D[UniProt:N-methylas
 V[UniProt:N-methylvaline on V]V[UniProt:N-methylvaline on V]D[UniProt:N-methylaspartate on D]L[UniProt:N-methylleucine on L]M[UniProt:S-methylmethionine on M]A[UniProt:N-methylalanine on A]H[UniProt:N-linked (Glc) (glycation) histidine on H]M[UniProt:S-methylmethionine on M]A[UniProt:N-methylalanine on A]S[UniProt:O-linked (HexNAc) serine on S]K[UniProt:N6,N6-dimethyllysine on K]
 V[UniProt:N-methylvaline on V]V[UniProt:N-methylvaline on V]D[UniProt:N-methylaspartate on D]L[UniProt:N-methylleucine on L]M[UniProt:S-methylmethionine on M]A[UniProt:N-methylalanine on A]H[UniProt:N-linked (Glc) (glycation) histidine on H]M[UniProt:S-methylmethionine on M]A[UniProt:N-methylalanine on A]S[UniProt:Phosphoserine on S]K[UniProt:O-linked (Hex) hydroxylysine on K]
 V[UniProt:N-methylvaline on V]V[UniProt:N-methylvaline on V]D[UniProt:N-methylaspartate on D]L[UniProt:N-methylleucine on L]M[UniProt:S-methylmethionine on M]A[UniProt:N-methylalanine on A]H[UniProt:N-linked (Glc) (glycation) histidine on H]M[UniProt:S-methylmethionine on M]A[UniProt:N-methylalanine on A]S[UniProt:Phosphoserine on S]K[UniProt:N6,N6-dimethyllysine on K]
-E[UniProt:Glutamate methyl ester (Glu) on E][UniProt:Glutamic acid 1-amide on E]
-E[UniProt:Glutamate methyl ester (Glu) on E][UniProt:5-glutamyl 2-aminoadipic acid on E]
+E[UniProt:Glutamate methyl ester (Glu) on E]-[UniProt:Glutamic acid 1-amide on E]
+E[UniProt:Glutamate methyl ester (Glu) on E]-[UniProt:5-glutamyl 2-aminoadipic acid on E]
 [UniProt:N-palmitoyl glycine on G]G[UniProt:N-methylglycine on G]K[UniProt:O-linked (Hex) hydroxylysine on K]
 [UniProt:N-acetylglycine on G]G[UniProt:N-methylglycine on G]K[UniProt:O-linked (Hex) hydroxylysine on K]
diff --git a/mzLib/Test/Transcriptomics/TestDigestion.cs b/mzLib/Test/Transcriptomics/TestDigestion.cs
index dc577a6d3..1645d5530 100644
--- a/mzLib/Test/Transcriptomics/TestDigestion.cs
+++ b/mzLib/Test/Transcriptomics/TestDigestion.cs
@@ -373,7 +373,7 @@ public static void TestTermini_ThreePrimeCyclicPhosphate()
             Assert.That(digestionProducts[0].SequenceWithChemicalFormulas, Is.EqualTo("UAGUCGUUGAUAG"));
             Assert.That(digestionProducts[0].FullSequenceWithMassShift(), Is.EqualTo("UAGUCGUUGAUAG"));
             
-            Assert.That(digestionProducts[1].FullSequence, Is.EqualTo("UAGUCGUUGAUAG[Digestion Termini:Cyclic Phosphate on X]"));
+            Assert.That(digestionProducts[1].FullSequence, Is.EqualTo("UAGUCGUUGAUAG-[Digestion Termini:Cyclic Phosphate on X]"));
             Assert.That(digestionProducts[1].SequenceWithChemicalFormulas, Is.EqualTo("UAGUCGUUGAUAG[H-2O-1]"));
             Assert.That(digestionProducts[1].FullSequenceWithMassShift(), Is.EqualTo("UAGUCGUUGAUAG[-18.010565]"));
 
@@ -383,7 +383,7 @@ public static void TestTermini_ThreePrimeCyclicPhosphate()
                 .Select(p => (OligoWithSetMods)p).ToList();
             Assert.That(digestionProducts.Count, Is.EqualTo(2));
             Assert.That(digestionProducts[0].FullSequence, Is.EqualTo("UAGUCGUUGAUAG"));
-            Assert.That(digestionProducts[1].FullSequence, Is.EqualTo("UAGUCGUUGAUAG[Digestion Termini:Cyclic Phosphate on X]"));
+            Assert.That(digestionProducts[1].FullSequence, Is.EqualTo("UAGUCGUUGAUAG-[Digestion Termini:Cyclic Phosphate on X]"));
 
             // RNase T1 digestion, 3' terminal modification
             digestionParams = new RnaDigestionParams("RNase T1");
@@ -393,7 +393,7 @@ public static void TestTermini_ThreePrimeCyclicPhosphate()
             Assert.That(digestionProducts.Count, Is.EqualTo(5));
             var expected = new List<string>()
             {
-                "UAG", "UCG", "UUG", "AUAG", "AUAG[Digestion Termini:Cyclic Phosphate on X]"
+                "UAG", "UCG", "UUG", "AUAG", "AUAG-[Digestion Termini:Cyclic Phosphate on X]"
             };
             for (int i = 0; i < expected.Count; i++)
             {
@@ -407,10 +407,10 @@ public static void TestTermini_ThreePrimeCyclicPhosphate()
             Assert.That(digestionProducts.Count, Is.EqualTo(8));
             expected = new List<string>()
             {
-                "UAG", "UAG[Digestion Termini:Cyclic Phosphate on X]",
-                "UCG", "UCG[Digestion Termini:Cyclic Phosphate on X]",
-                "UUG", "UUG[Digestion Termini:Cyclic Phosphate on X]",
-                "AUAG","AUAG[Digestion Termini:Cyclic Phosphate on X]"
+                "UAG", "UAG-[Digestion Termini:Cyclic Phosphate on X]",
+                "UCG", "UCG-[Digestion Termini:Cyclic Phosphate on X]",
+                "UUG", "UUG-[Digestion Termini:Cyclic Phosphate on X]",
+                "AUAG","AUAG-[Digestion Termini:Cyclic Phosphate on X]"
             };
 
             for (int i = 0; i < expected.Count; i++)
@@ -1018,11 +1018,11 @@ public static void TestDatabaseAnnotatedMods_TerminalMods()
                 Assert.That(precursors.Any(p => p.NumVariableMods == 1));
                 Assert.That(fullSequences.Contains("GUACUG"));
                 Assert.That(fullSequences.Contains("[Metal:Potassium on G]GUACUG"));
-                Assert.That(fullSequences.Contains("GUACUG[Metal:Sodium on G]"));
+                Assert.That(fullSequences.Contains("GUACUG-[Metal:Sodium on G]"));
 
                 if (rnaDigestionParams.MaxMods != 2) continue;
                 Assert.That(precursors.Any(p => p.NumVariableMods == 2));
-                Assert.That(fullSequences.Contains("[Metal:Potassium on G]GUACUG[Metal:Sodium on G]"));
+                Assert.That(fullSequences.Contains("[Metal:Potassium on G]GUACUG-[Metal:Sodium on G]"));
             }
         }
 
@@ -1079,17 +1079,17 @@ public static void TestDatabaseAnnotatedMods_TerminalMods_WithFirstResidueDataba
                     Assert.That(fullSequences.Contains("GUACUG"));
                     Assert.That(fullSequences.Contains("[Metal:Potassium on G]GUACUG"));
                     Assert.That(fullSequences.Contains("G[Metal:Potassium on G]UACUG"));
-                    Assert.That(fullSequences.Contains("GUACUG[Metal:Sodium on G]"));
+                    Assert.That(fullSequences.Contains("GUACUG-[Metal:Sodium on G]"));
                 }
                 else if (rnaDigestionParams.MaxMods >= 2)
                 {
-                    Assert.That(fullSequences.Contains("G[Metal:Potassium on G]UACUG[Metal:Sodium on G]"));
-                    Assert.That(fullSequences.Contains("[Metal:Potassium on G]GUACUG[Metal:Sodium on G]"));
+                    Assert.That(fullSequences.Contains("G[Metal:Potassium on G]UACUG-[Metal:Sodium on G]"));
+                    Assert.That(fullSequences.Contains("[Metal:Potassium on G]GUACUG-[Metal:Sodium on G]"));
                     Assert.That(fullSequences.Contains("[Metal:Potassium on G]G[Metal:Potassium on G]UACUG"));
                 }
                 else if (rnaDigestionParams.MaxMods >= 3)
                 {
-                    Assert.That(fullSequences.Contains("[Metal:Potassium on G]G[Metal:Potassium on G]UACUG[Metal:Sodium on G]"));
+                    Assert.That(fullSequences.Contains("[Metal:Potassium on G]G[Metal:Potassium on G]UACUG-[Metal:Sodium on G]"));
                 }
             }
         }
@@ -1147,25 +1147,25 @@ public static void TestDatabaseAnnotatedMods_TerminalMods_WithFirstResidueVariab
                     Assert.That(fullSequences.Contains("GUACUG"));
                     Assert.That(fullSequences.Contains("GUACUG[Metal:Potassium on G]"));
                     Assert.That(fullSequences.Contains("G[Metal:Potassium on G]UACUG"));
-                    Assert.That(fullSequences.Contains("GUACUG[Metal:Sodium on G]"));
+                    Assert.That(fullSequences.Contains("GUACUG-[Metal:Sodium on G]"));
                     Assert.That(fullSequences.Contains("[Metal:Potassium on G]GUACUG"));
                 }
                 else if (rnaDigestionParams.MaxMods >= 2)
                 {
                     Assert.That(fullSequences.Contains("G[Metal:Potassium on G]UACUG[Metal:Potassium on G]"));
-                    Assert.That(fullSequences.Contains("G[Metal:Potassium on G]UACUG[Metal:Sodium on G]"));
-                    Assert.That(fullSequences.Contains("GUACUG[Metal:Potassium on G][Metal:Sodium on G]"));
-                    Assert.That(fullSequences.Contains("[Metal:Potassium on G]GUACUG[Metal:Sodium on G]"));
+                    Assert.That(fullSequences.Contains("G[Metal:Potassium on G]UACUG-[Metal:Sodium on G]"));
+                    Assert.That(fullSequences.Contains("GUACUG[Metal:Potassium on G]-[Metal:Sodium on G]"));
+                    Assert.That(fullSequences.Contains("[Metal:Potassium on G]GUACUG-[Metal:Sodium on G]"));
                     Assert.That(fullSequences.Contains("[Metal:Potassium on G]G[Metal:Potassium on G]UACUG"));
                     Assert.That(fullSequences.Contains("[Metal:Potassium on G]GUACUG[Metal:Potassium on G]"));
                 }
                 else if (rnaDigestionParams.MaxMods >= 3)
                 {
-                    Assert.That(fullSequences.Contains("[Metal:Potassium on G]G[Metal:Potassium on G]UACUG[Metal:Sodium on G]"));
+                    Assert.That(fullSequences.Contains("[Metal:Potassium on G]G[Metal:Potassium on G]UACUG-[Metal:Sodium on G]"));
                     Assert.That(fullSequences.Contains("[Metal:Potassium on G]G[Metal:Potassium on G]UACUG[Metal:Potassium on G]"));
 
-                    Assert.That(fullSequences.Contains("G[Metal:Potassium on G]UACUG[Metal:Potassium on G][Metal:Sodium on G]"));
-                    Assert.That(fullSequences.Contains("[Metal:Potassium on G]GUACUG[Metal:Potassium on G][Metal:Sodium on G]"));
+                    Assert.That(fullSequences.Contains("G[Metal:Potassium on G]UACUG[Metal:Potassium on G]-[Metal:Sodium on G]"));
+                    Assert.That(fullSequences.Contains("[Metal:Potassium on G]GUACUG[Metal:Potassium on G]-[Metal:Sodium on G]"));
                 }
             }
         }

From e6dd7bcfc5ffa1de2532b12472cf48dac6904d5e Mon Sep 17 00:00:00 2001
From: Peter Cruz Parrilla <peter.cruzparrilla@gmail.com>
Date: Tue, 18 Mar 2025 10:03:34 -0500
Subject: [PATCH 4/7] Removed two unused lines from ParseModifications

---
 mzLib/MzLibUtil/ClassExtensions.cs | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/mzLib/MzLibUtil/ClassExtensions.cs b/mzLib/MzLibUtil/ClassExtensions.cs
index 7ab198a02..c92c45833 100644
--- a/mzLib/MzLibUtil/ClassExtensions.cs
+++ b/mzLib/MzLibUtil/ClassExtensions.cs
@@ -39,10 +39,6 @@ public static Dictionary<int, string> ParseModifications(this string fullSequenc
             string modPattern = @"-?\[(.+?)\](?<!\[I+\])"; //The "look-behind" condition prevents matching ] for metal ion modifications
             Regex modRegex = new(modPattern);
 
-            // use a regex to find C-terminus modification
-            var cTerminusPattern = @"(?<=[A-Z\]])-(?=\[)";
-            Regex cTerminusRegex = new(cTerminusPattern);
-
             var fullSeq = fullSequence;
             Dictionary<int, string> modDict = new();
 

From e37615f82d5b24b1aabaaf9f9f74681b1d02d771 Mon Sep 17 00:00:00 2001
From: pcruzparri <peter.cruzparrilla@gmail.com>
Date: Mon, 24 Mar 2025 15:02:00 -0500
Subject: [PATCH 5/7] removing RemoveSpecialCharacter method from 
 SpectrumMatchFromTsv

---
 mzLib/Omics/SpectrumMatch/SpectrumMatchFromTsv.cs | 13 -------------
 mzLib/Test/FileReadingTests/TestPsmFromTsv.cs     |  8 ++++----
 2 files changed, 4 insertions(+), 17 deletions(-)

diff --git a/mzLib/Omics/SpectrumMatch/SpectrumMatchFromTsv.cs b/mzLib/Omics/SpectrumMatch/SpectrumMatchFromTsv.cs
index 9795e2022..0cba10ec5 100644
--- a/mzLib/Omics/SpectrumMatch/SpectrumMatchFromTsv.cs
+++ b/mzLib/Omics/SpectrumMatch/SpectrumMatchFromTsv.cs
@@ -102,19 +102,6 @@ public static Dictionary<int, string> ParseModifications(string fullSeq, bool ig
             return fullSeq.ParseModifications(ignoreTerminusMod);
         }
 
-        /// <summary>
-        /// Fixes an issue where the | appears and throws off the numbering if there are multiple mods on a single amino acid.
-        /// </summary>
-        /// <param name="fullSeq"></param>
-        /// <param name="replacement"></param>
-        /// <param name="specialCharacter"></param>
-        /// <returns></returns>
-        public static void RemoveSpecialCharacters(ref string fullSeq, string replacement = @"", string specialCharacter = @"\|")
-        {
-            MzLibUtil.ClassExtensions.RemoveSpecialCharacters(ref fullSeq, replacement, specialCharacter);
-        }
-
-
         protected static List<MatchedFragmentIon> ReadFragmentIonsFromString(string matchedMzString, string matchedIntensityString, string peptideBaseSequence, string matchedMassErrorDaString = null)
         {
             List<MatchedFragmentIon> matchedIons = new List<MatchedFragmentIon>();
diff --git a/mzLib/Test/FileReadingTests/TestPsmFromTsv.cs b/mzLib/Test/FileReadingTests/TestPsmFromTsv.cs
index 9868f378f..48aa4b7c7 100644
--- a/mzLib/Test/FileReadingTests/TestPsmFromTsv.cs
+++ b/mzLib/Test/FileReadingTests/TestPsmFromTsv.cs
@@ -193,23 +193,23 @@ public static void TestRemoveSpecialCharacters()
             // successful removal of the default character
             string toRemove = "ANDVHAO|CNVASDF|ABVCUAE";
             int length = toRemove.Length;
-            SpectrumMatchFromTsv.RemoveSpecialCharacters(ref toRemove);
+            MzLibUtil.ClassExtensions.RemoveSpecialCharacters(ref toRemove);
             Assert.That(toRemove.Length == length - 2);
             Assert.That(toRemove.Equals("ANDVHAOCNVASDFABVCUAE"));
 
             // does not remove default character when prompted otherwise
             toRemove = "ANDVHAO|CNVASDF|ABVCUAE";
-            SpectrumMatchFromTsv.RemoveSpecialCharacters(ref toRemove, specialCharacter: @"\[");
+            MzLibUtil.ClassExtensions.RemoveSpecialCharacters(ref toRemove, specialCharacter: @"\[");
             Assert.That(toRemove.Length == length);
             Assert.That(toRemove.Equals("ANDVHAO|CNVASDF|ABVCUAE"));
 
             // replaces default symbol when prompted
-            SpectrumMatchFromTsv.RemoveSpecialCharacters(ref toRemove, replacement: @"%");
+            MzLibUtil.ClassExtensions.RemoveSpecialCharacters(ref toRemove, replacement: @"%");
             Assert.That(toRemove.Length == length);
             Assert.That(toRemove.Equals("ANDVHAO%CNVASDF%ABVCUAE"));
 
             // replaces inputted symbol with non-default symbol
-            SpectrumMatchFromTsv.RemoveSpecialCharacters(ref toRemove, replacement: @"=", specialCharacter: @"%");
+            MzLibUtil.ClassExtensions.RemoveSpecialCharacters(ref toRemove, replacement: @"=", specialCharacter: @"%");
             Assert.That(toRemove.Length == length);
             Assert.That(toRemove.Equals("ANDVHAO=CNVASDF=ABVCUAE"));
         }

From 9d3e0334d0828e78f69a0d68ae405124f7bcd5ef Mon Sep 17 00:00:00 2001
From: pcruzparri <peter.cruzparrilla@gmail.com>
Date: Thu, 27 Mar 2025 18:26:02 -0500
Subject: [PATCH 6/7] extra test for ParseModifications

---
 mzLib/MzLibUtil/ClassExtensions.cs            | 97 +++++++++++++++++++
 ...dFullSequencesAndModificationsExamples.txt | 45 +++++++++
 mzLib/Test/Test.csproj                        | 24 +++++
 mzLib/Test/TestMzLibUtil.cs                   | 63 ++++++++++++
 4 files changed, 229 insertions(+)
 create mode 100644 mzLib/Test/ModificationTests/ModifiedFullSequencesAndModificationsExamples.txt

diff --git a/mzLib/MzLibUtil/ClassExtensions.cs b/mzLib/MzLibUtil/ClassExtensions.cs
index e5b8ce7bc..945df8cdc 100644
--- a/mzLib/MzLibUtil/ClassExtensions.cs
+++ b/mzLib/MzLibUtil/ClassExtensions.cs
@@ -19,12 +19,109 @@
 using System;
 using System.Collections.Generic;
 using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Runtime.ConstrainedExecution;
 using System.Text.RegularExpressions;
 
 namespace MzLibUtil
 {
     public static class ClassExtensions
     {
+        /// <summary>
+        /// Parses the full sequence to identify mods. Note: This method has been updated to NOT handle ambiguous mods on a given position (e.g. M[modA]|[modB]).
+        /// If ambiguity exists, generate a separate full sequence for each mod and parse each separately.
+        /// </summary>
+        /// <param name="fullSequence"> Full sequence of the peptide in question.</param>
+        /// <param name="ignoreTerminusMod"> If true, terminal modifications will be ignored.</param>
+        /// <returns> Dictionary with the key being the amino acid position of the mod and the value being the string representing the mod</returns>
+        public static Dictionary<int, string> ParseModifications(this string fullSequence, bool ignoreTerminusMod = false)
+        {
+            // use a regex to get modifications
+            string modPattern = @"-?\[(.+?)\](?<!\[I+\])"; //The "look-behind" condition prevents matching ] for metal ion modifications
+            Regex modRegex = new(modPattern);
+
+            var fullSeq = fullSequence;
+            Dictionary<int, string> modDict = new();
+
+            MatchCollection matches = modRegex.Matches(fullSeq);
+            int captureLengthSum = 0;
+            int positionToAddToDict = 0;
+            foreach (Match match in matches)
+            {
+                GroupCollection group = match.Groups;
+                string rawModString = group[0].Value;
+                string mod = group[1].Value;
+                int startIndex = group[0].Index;
+                int captureLength = group[0].Length;
+
+                // The position of the amino acids is tracked by the positionToAddToDict variable. It takes the 
+                // startIndex of the modification Match and removes the cumulative length of the modifications
+                // found (including the brackets). The difference will be the number of nonmodification characters, 
+                // or the number of amino acids prior to the startIndex in the sequence. 
+                positionToAddToDict = startIndex - captureLengthSum;
+
+                if (((positionToAddToDict == 0) || rawModString.StartsWith("-")) && ignoreTerminusMod) // ignore terminal mods
+                {
+                    captureLengthSum += captureLength;
+                    continue;
+                }
+
+                if (rawModString.StartsWith("-"))
+                {
+                    positionToAddToDict++;
+                }
+
+                modDict.Add(positionToAddToDict, mod);
+                captureLengthSum += captureLength;
+            }
+            return modDict;
+        }
+
+        // This method is a WIP. It is not currently used, and may be removed in the future depending on how/if we want to handle ambiguity here.
+        public static Dictionary<int, string> ParseModificationsWithAmbiguity(this string ambiguousFullSequences, bool ignoreTerminusMod = false)
+        {
+            var modDicts = ambiguousFullSequences.Split('|').Select(fullSeq => fullSeq.ParseModifications(ignoreTerminusMod)).ToList();
+
+            if (modDicts.Count == 1) { return modDicts[0]; }
+            else
+            {
+                var modDict = modDicts.First();
+
+                foreach (var md in modDicts.Skip(1))
+                {
+                    foreach (var mod in md)
+                    {
+                        if (modDict.ContainsKey(mod.Key))
+                        {
+                            if (!modDict[mod.Key].Split('|').Contains(mod.Value))
+                            {
+                                modDict[mod.Key] += "|" + mod.Value;
+                            }
+                        }
+                        else
+                        {
+                            modDict.Add(mod.Key, mod.Value);
+                        }
+                    }
+                }
+                return modDict;
+            }
+        }
+
+        /// <summary>
+        /// Fixes an issue where the | appears and throws off the numbering if there are multiple mods on a single amino acid.
+        /// </summary>
+        /// <param name="fullSequence"></param>
+        /// <param name="replacement"></param>
+        /// <param name="specialCharacter"></param>
+        /// <returns></returns>
+        public static void RemoveSpecialCharacters(ref string fullSequence, string replacement = @"", string specialCharacter = @"\|")
+        {
+            // next regex is used in the event that multiple modifications are on a missed cleavage Lysine (K)
+            Regex regexSpecialChar = new(specialCharacter);
+            fullSequence = regexSpecialChar.Replace(fullSequence, replacement);
+        }
+
         public static double[] BoxCarSmooth(this double[] data, int points)
         {
             // Force to be odd
diff --git a/mzLib/Test/ModificationTests/ModifiedFullSequencesAndModificationsExamples.txt b/mzLib/Test/ModificationTests/ModifiedFullSequencesAndModificationsExamples.txt
new file mode 100644
index 000000000..74bc84ded
--- /dev/null
+++ b/mzLib/Test/ModificationTests/ModifiedFullSequencesAndModificationsExamples.txt
@@ -0,0 +1,45 @@
+﻿Base Sequence	Full Sequence	Mods
+CEDCGKPLSIEADDNGCFPLDGHVLCR	[Common Artifact:Ammonia loss on C]C[Common Fixed:Carbamidomethyl on C]EDC[Common Fixed:Carbamidomethyl on C]GKPLSIEADDNGC[Common Fixed:Carbamidomethyl on C]FPLDGHVLC[Common Fixed:Carbamidomethyl on C]R	Ammonia loss on C Carbamidomethyl on C Carbamidomethyl on C Carbamidomethyl on C Carbamidomethyl on C
+HTGPGILSMANAGPNTNGSQFFICTAK	HTGP[Less Common:Proline pyrrole to pyrrolidine six member ring on P]GILSMANAGPNTN[Common Artifact:Deamidation on N]GSQFFIC[Common Fixed:Carbamidomethyl on C]TAK	Carbamidomethyl on C Deamidation on N Proline pyrrole to pyrrolidine six member ring on P
+GTGRGGGGGGGGGAPR	GTGR[UniProt:Asymmetric dimethylarginine on R]GGGGGGGGGAPR[UniProt:Omega-N-methylarginine on R]	Asymmetric dimethylarginine on R Omega-N-methylarginine on R
+YPIEHGIVTNWDDMEK	YPIEH[UniProt:Tele-methylhistidine on H]GIVTNWDD[Less Common:Water loss on D]MEK	Tele-methylhistidine on H Water loss on D
+PHSEAGTAFIQTQQLHAAMADTFLEHMCR	P[Less Common:Proline pyrrole to pyrrolidine six member ring on P]HSEAGTAFIQTQQLHAAMADTFLEHMC[Common Fixed:Carbamidomethyl on C]R|[Less Common:Formylation on X]PHS[Less Common:Reduction on S]EAGTAFIQTQQLHAAMADTFLEHMC[Common Fixed:Carbamidomethyl on C]R	Carbamidomethyl on C Proline pyrrole to pyrrolidine six member ring on P|Carbamidomethyl on C Formylation on X Reduction on S
+AATDAQDANQCCTSCEDNAPATSYCVECSEPLCETCVEAHQR	AATDAQDANQC[Common Fixed:Carbamidomethyl on C]C[Common Fixed:Carbamidomethyl on C]TSC[Common Fixed:Carbamidomethyl on C]EDNAPATSYC[Common Fixed:Carbamidomethyl on C]VEC[Common Fixed:Carbamidomethyl on C]SEPLC[Common Fixed:Carbamidomethyl on C]ETC[Common Fixed:Carbamidomethyl on C]VEAHQR	Carbamidomethyl on C Carbamidomethyl on C Carbamidomethyl on C Carbamidomethyl on C Carbamidomethyl on C Carbamidomethyl on C Carbamidomethyl on C
+YPIEHGIVTNWDDMEK	YPIEH[UniProt:Tele-methylhistidine on H]GIVTNWD[Less Common:Water loss on D]DMEK	Tele-methylhistidine on H Water loss on D
+CSVCPDYDLCSVCEGK	[Common Artifact:Ammonia loss on C]C[Common Fixed:Carbamidomethyl on C]SVC[Common Fixed:Carbamidomethyl on C]PDYDLC[Common Fixed:Carbamidomethyl on C]SVC[Common Fixed:Carbamidomethyl on C]EGK	Ammonia loss on C Carbamidomethyl on C Carbamidomethyl on C Carbamidomethyl on C Carbamidomethyl on C
+GVAMNPVEHPFGGGNHQHIGK	GVAMNPVEHPFGGGNH[UniProt:(3S)-3-hydroxyhistidine on H]QHIGK	(3S)-3-hydroxyhistidine on H
+GVAMNPVEHPFGGGNHQHIGKPSTIR	GVAMNPVEHPFGGGNH[UniProt:(3S)-3-hydroxyhistidine on H]QHIGKPSTIR	(3S)-3-hydroxyhistidine on H
+SVEMHHEALSEALPGDNVGFNVK	SVEMHHEALSE[UniProt:5-glutamyl glycerylphosphorylethanolamine on E]ALPGDNVGFNVK	5-glutamyl glycerylphosphorylethanolamine on E
+TCNCETEDYGEK	TC[Common Fixed:Carbamidomethyl on C]NC[Common Fixed:Carbamidomethyl on C]E[Metal:Fe[III] on E]TEDYGEK	Carbamidomethyl on C Carbamidomethyl on C Fe[III] on E
+VAKFCYADKSLLNK	VAK[UniProt:N6-acetyllysine on K]FC[Common Fixed:Carbamidomethyl on C]YADK[UniProt:N6-succinyllysine on K]SLLNK	Carbamidomethyl on C N6-acetyllysine on K N6-succinyllysine on K
+GEKGELGPPGLLGPTGPKGDIGNK	GEKGELGPPGLLGPTGPK[UniProt:5-hydroxylysine on K]GDIGNK	5-hydroxylysine on K
+VGGTAPASKR	VGGTAPAS[UniProt:ADP-ribosylserine on S]K[UniProt:N6,N6,N6-trimethyllysine on K]R	ADP-ribosylserine on S N6,N6,N6-trimethyllysine on K
+PVICATQMLESMIK	P[Less Common:Proline pyrrole to pyrrolidine six member ring on P]VIC[Common Fixed:Carbamidomethyl on C]ATQM[Less Common:Oxidation and then loss on oxidized M side chain]LESMIK	Carbamidomethyl on C Oxidation and then loss on oxidized M side chain Proline pyrrole to pyrrolidine six member ring on P
+KGMNMYLTK	KGMNMY[UniProt:3'-nitrotyrosine on Y]LTK	3'-nitrotyrosine on Y
+AGGKGLGKGGK	AGGK[UniProt:N6-(beta-hydroxybutyryl)lysine on K]GLGK[UniProt:N6-(beta-hydroxybutyryl)lysine on K]GGK	N6-(beta-hydroxybutyryl)lysine on K N6-(beta-hydroxybutyryl)lysine on K
+RPPSGFFLFCSEFR	R[Common Biological:Citrullination on R]PPSGFFLFC[UniProt:Cysteine sulfonic acid (-SO3H) on C]SEFR	Citrullination on R Cysteine sulfonic acid (-SO3H) on C
+PGAQGEPGPKGDK	PGAQGEPGPK[UniProt:5-hydroxylysine on K]GDK	5-hydroxylysine on K
+KHPGGRGNAGGLHHHR	KHPGGRGNAGGLH[UniProt:(3S)-3-hydroxyhistidine on H]HHR	(3S)-3-hydroxyhistidine on H
+VGGTAPASKRAVK	VGGTAPAS[UniProt:ADP-ribosylserine on S]KRAVK[UniProt:N6-(beta-hydroxybutyryl)lysine on K]	ADP-ribosylserine on S N6-(beta-hydroxybutyryl)lysine on K
+EGAPGKPGAVGDAGPFGR	EGAPGK[UniProt:5-hydroxylysine on K]PGAVGDAGPFGR	5-hydroxylysine on K
+TILCCNICRSGIR	TILC[UniProt:S-farnesyl cysteine on C]C[Common Fixed:Carbamidomethyl on C]NIC[Common Fixed:Carbamidomethyl on C]RSGIR	Carbamidomethyl on C Carbamidomethyl on C S-farnesyl cysteine on C
+GLGLSKAYVGQKSSFTVDCSK	GLGLSK[UniProt:N6-succinyllysine on K]AYVGQK[UniProt:N6-acetyllysine on K]SSFTVDC[Common Fixed:Carbamidomethyl on C]SK	Carbamidomethyl on C N6-acetyllysine on K N6-succinyllysine on K
+EGPLGALGDFGRDGK	EGPLGALGDFGRDGK[UniProt:5-hydroxylysine on K]	5-hydroxylysine on K
+KATAWQAPR	K[UniProt:N6-(2-hydroxyisobutyryl)lysine on K]ATAWQ[Common Artifact:Deamidation on Q]APR	Deamidation on Q N6-(2-hydroxyisobutyryl)lysine on K
+IGGTPPARKGAAK	IGGTPPARK[UniProt:N6-(beta-hydroxybutyryl)lysine on K]GAAK[UniProt:N6-(beta-hydroxybutyryl)lysine on K]	N6-(beta-hydroxybutyryl)lysine on K N6-(beta-hydroxybutyryl)lysine on K
+VPDESLFLNSGGDSLK	VPDESLFLNSGGDS[UniProt:O-(pantetheine 4'-phosphoryl)serine on S]LK	O-(pantetheine 4'-phosphoryl)serine on S
+HIGFVYHPTK	HIGFVY[UniProt:3'-nitrotyrosine on Y]HPTK	3'-nitrotyrosine on Y
+GQTDRCNVNDPSSLKK	GQTDRC[Common Fixed:Carbamidomethyl on C]N[UniProt:(3S)-3-hydroxyasparagine on N]VNDPSSLKK	(3S)-3-hydroxyasparagine on N Carbamidomethyl on C
+QTARKSTGGK	QTARK[UniProt:N6,N6,N6-trimethyllysine on K]S[UniProt:ADP-ribosylserine on S]TGGK	ADP-ribosylserine on S N6,N6,N6-trimethyllysine on K
+DYKRGYPITIK	DYKRGY[UniProt:3'-nitrotyrosine on Y]PITIK	3'-nitrotyrosine on Y
+RTGVIHEKQTAVSVENFIAELLPDK	RTGVIHEKQTAVSVEN[Common Artifact:Ammonia loss on N]FIAELLP[Less Common:Proline pyrrole to pyrrolidine six member ring on P]DK	Ammonia loss on N Proline pyrrole to pyrrolidine six member ring on P
+FAELKEK	FAE[UniProt:5-glutamyl glycerylphosphorylethanolamine on E]LKEK	5-glutamyl glycerylphosphorylethanolamine on E
+GIPVMGHSEGICHMYVDSEASVDK	GIPVM[Less Common:Oxidation and then loss on oxidized M side chain]GHSEGIC[Common Fixed:Carbamidomethyl on C]HM[Less Common:Oxidation and then loss on oxidized M side chain]YVDSEASVDK	Carbamidomethyl on C Oxidation and then loss on oxidized M side chain Oxidation and then loss on oxidized M side chain
+GPPGAKGNK	GPPGAKGNK[UniProt:5-hydroxylysine on K]	5-hydroxylysine on K
+MGCTLSAEERAALERSK	M[Common Variable:Oxidation on M]GC[UniProt:S-palmitoyl cysteine on C]TLSAEERAALERSK	Oxidation on M S-palmitoyl cysteine on C
+SPCCMPTTVFANIFHAGGQEMIR	SPC[Common Fixed:Carbamidomethyl on C]C[UniProt:3-oxoalanine (Cys) on C]M[Common Variable:Oxidation on M]PTTVFANIFHAGGQEMIR	3-oxoalanine (Cys) on C Carbamidomethyl on C Oxidation on M
+DGTSGEKGER	DGTSGEK[UniProt:5-hydroxylysine on K]GER	5-hydroxylysine on K
+GRGGPMGRGGYGGGGSGGGGR	GRGGPMGR[UniProt:Asymmetric dimethylarginine on R]GGYGGGGSGGGGR[UniProt:Omega-N-methylarginine on R]	Asymmetric dimethylarginine on R Omega-N-methylarginine on R
+TILCCNICR	TILC[UniProt:S-farnesyl cysteine on C]C[Common Fixed:Carbamidomethyl on C]NIC[Common Fixed:Carbamidomethyl on C]R	Carbamidomethyl on C Carbamidomethyl on C S-farnesyl cysteine on C
+KATNEACSGMHIKNYVDTLGDK	K[UniProt:N6-succinyllysine on K]ATNEAC[Common Fixed:Carbamidomethyl on C]SGMHIK[UniProt:N6-acetyllysine on K]NYVDTLGDK	Carbamidomethyl on C N6-acetyllysine on K N6-succinyllysine on K
+GFPGADGVAGPKGPAGER	GFPGADGVAGPK[UniProt:5-hydroxylysine on K]GPAGER	5-hydroxylysine on K
diff --git a/mzLib/Test/Test.csproj b/mzLib/Test/Test.csproj
index 81b9ab12d..3290e940f 100644
--- a/mzLib/Test/Test.csproj
+++ b/mzLib/Test/Test.csproj
@@ -486,6 +486,9 @@
     <None Update="ModificationTests\CommonBiological.txt">
       <CopyToOutputDirectory>Always</CopyToOutputDirectory>
     </None>
+    <None Update="ModificationTests\ModifiedFullSequencesAndModificationsExamples.txt">
+      <CopyToOutputDirectory>Always</CopyToOutputDirectory>
+    </None>
     <None Update="ModificationTests\ModsWithComments.txt">
       <CopyToOutputDirectory>Always</CopyToOutputDirectory>
     </None>
@@ -545,6 +548,27 @@
     </None>
     <None Update="Transcriptomics\TestData\ModomicsUnmodifiedTrimmed.fasta.gz">
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Update="XICData\20100604_Velos1_TaGe_SA_A549_3_first_noRt.mzML">
+      <CopyToOutputDirectory>Always</CopyToOutputDirectory>
+    </None>
+    <None Update="XICData\20100604_Velos1_TaGe_SA_A549_3_second_noRt.mzML">
+      <CopyToOutputDirectory>Always</CopyToOutputDirectory>
+    </None>
+    <None Update="XICData\AllPSMs.psmtsv">
+      <CopyToOutputDirectory>Always</CopyToOutputDirectory>
+    </None>
+    <None Update="XICData\AllPSMs_IsoID.psmtsv">
+      <CopyToOutputDirectory>Always</CopyToOutputDirectory>
+    </None>
+    <None Update="XICData\AllPSMs_IsoID_Combined.psmtsv">
+      <CopyToOutputDirectory>Always</CopyToOutputDirectory>
+    </None>
+    <None Update="XICData\AllPSMs_IsoID_Mixture.psmtsv">
+      <CopyToOutputDirectory>Always</CopyToOutputDirectory>
+    </None>
+    <None Update="XICData\AllPSMs_IsoID_MonoIsotopicmassTolerance.psmtsv">
+      <CopyToOutputDirectory>Always</CopyToOutputDirectory>
     </None>
 	<None Update="DataFiles\centroid_1x_MS1_4x_autoMS2.d\**">
 		  <CopyToOutputDirectory>Always</CopyToOutputDirectory>
diff --git a/mzLib/Test/TestMzLibUtil.cs b/mzLib/Test/TestMzLibUtil.cs
index 73fbdda41..444b0a29c 100644
--- a/mzLib/Test/TestMzLibUtil.cs
+++ b/mzLib/Test/TestMzLibUtil.cs
@@ -2,6 +2,9 @@
 using Assert = NUnit.Framework.Legacy.ClassicAssert;
 using MzLibUtil;
 using Readers;
+using System.IO;
+using System.Linq;
+using System.Text.RegularExpressions;
 
 namespace Test
 {
@@ -33,6 +36,66 @@ public static void TestPeriodTolerantFilenameWithoutExtension(string filenameAnd
             Assert.AreEqual(expectedResult, result);
             Assert.AreEqual(expectedResult, extensionResult);
         }
+        [Test]
+        public static void TestParseModificationsSideChainModOnly()
+        {
+            string fullSeq = "DM[Common Variable:Oxidation on M]MELVQPSISGVDLDK";
+            var mods = fullSeq.ParseModifications(ignoreTerminusMod: false);
+            Assert.That(mods.Count == 1);
+            Assert.That(mods.ContainsKey(2));
+            Assert.That(mods[2] == ("Common Variable:Oxidation on M"));
+        }
+
+        [Test]
+        public static void TestParseModificationsSideChainAndTerminusMods()
+        {
+            string fullSeq = "[UniProt:N-acetylglutamate on E]EDM[Common Variable:Oxidation on M]MELVQPSISGVDLDK[Test Mod2: ModName2 on K]-[Test Mod: ModName on K C-Terminus]";
+            var mods = fullSeq.ParseModifications(ignoreTerminusMod: false);
+            Assert.That(mods.Count == 4);
+            Assert.That(mods.ContainsKey(0));
+            Assert.That(mods.ContainsKey(3));
+            Assert.That(mods.ContainsKey(18));
+            Assert.That(mods.ContainsKey(19));
+            Assert.That(mods[0] == "UniProt:N-acetylglutamate on E");
+            Assert.That(mods[3] == "Common Variable:Oxidation on M");
+            Assert.That(mods[18] == "Test Mod2: ModName2 on K");
+            Assert.That(mods[19] == "Test Mod: ModName on K C-Terminus");
+        }
+
+        [Test]
+        public static void TestParseModificationsIgnoreTerminusMod()
+        {
+            string fullSeq = "[UniProt:N-acetylglutamate on E]EDM[Common Variable:Oxidation on M]MELVQPSISGVDLDK[Test Mod2: ModName2 on K]-[Test Mod: ModName on K C-Terminus]";
+            var mods = fullSeq.ParseModifications(ignoreTerminusMod: true);
+            Assert.That(mods.Count == 2);
+            Assert.That(mods.ContainsKey(3));
+            Assert.That(mods.ContainsKey(18));
+            Assert.That(mods[3] == "Common Variable:Oxidation on M");
+            Assert.That(mods[18] == "Test Mod2: ModName2 on K");
+        }
+
+        [Test]
+        public static void TestParseModificationsWithTsvExamples()
+        {
+
+            var path = @"ModificationTests\ModifiedFullSequencesAndModificationsExamples.txt";
+            var lines = File.ReadAllLines(path);
+            var header = lines.First().Split('\t');
+            foreach (var line in lines.Skip(1))
+            {
+                if (!line.Contains('|')) // Skip any ambiguous sequences
+                {
+                    var parts = line.Split('\t');
+                    var fullSeq = parts[1];
+                    Regex expectedModsPattern = new(@"(?<=on [A-Z])\s(?=[A-Z])");
+                    var expectedMods = string.Join(' ', expectedModsPattern.Split(parts[2]).ToList().Order()); // Sort the mods for consitency with foundMods
+                    var mods = fullSeq.ParseModifications();
+                    var foundMods = string.Join(' ', mods.Values.Select(x=> x.Split(':')[1]).ToList().Order());
+
+                    Assert.AreEqual(expectedMods, foundMods);
+                }
+            }
+        }
 
         [Test]
         public static void TestToEnum()

From 6678f5413edce3e85b195d2aee776c0944a7dba9 Mon Sep 17 00:00:00 2001
From: Peter Cruz Parrilla <peter.cruzparrilla@gmail.com>
Date: Fri, 28 Mar 2025 11:08:48 -0500
Subject: [PATCH 7/7] removed the simple method I considered for parsing mods
 from ambiguous sequences, since it covers most but not many interesting
 cases. Best to remove it to maintain code coverage. I will add some notes on
 the issue on the PR for future reference.

---
 mzLib/MzLibUtil/ClassExtensions.cs | 33 ------------------------------
 1 file changed, 33 deletions(-)

diff --git a/mzLib/MzLibUtil/ClassExtensions.cs b/mzLib/MzLibUtil/ClassExtensions.cs
index 945df8cdc..95d8c7845 100644
--- a/mzLib/MzLibUtil/ClassExtensions.cs
+++ b/mzLib/MzLibUtil/ClassExtensions.cs
@@ -19,8 +19,6 @@
 using System;
 using System.Collections.Generic;
 using System.Linq;
-using System.Runtime.CompilerServices;
-using System.Runtime.ConstrainedExecution;
 using System.Text.RegularExpressions;
 
 namespace MzLibUtil
@@ -77,37 +75,6 @@ public static Dictionary<int, string> ParseModifications(this string fullSequenc
             return modDict;
         }
 
-        // This method is a WIP. It is not currently used, and may be removed in the future depending on how/if we want to handle ambiguity here.
-        public static Dictionary<int, string> ParseModificationsWithAmbiguity(this string ambiguousFullSequences, bool ignoreTerminusMod = false)
-        {
-            var modDicts = ambiguousFullSequences.Split('|').Select(fullSeq => fullSeq.ParseModifications(ignoreTerminusMod)).ToList();
-
-            if (modDicts.Count == 1) { return modDicts[0]; }
-            else
-            {
-                var modDict = modDicts.First();
-
-                foreach (var md in modDicts.Skip(1))
-                {
-                    foreach (var mod in md)
-                    {
-                        if (modDict.ContainsKey(mod.Key))
-                        {
-                            if (!modDict[mod.Key].Split('|').Contains(mod.Value))
-                            {
-                                modDict[mod.Key] += "|" + mod.Value;
-                            }
-                        }
-                        else
-                        {
-                            modDict.Add(mod.Key, mod.Value);
-                        }
-                    }
-                }
-                return modDict;
-            }
-        }
-
         /// <summary>
         /// Fixes an issue where the | appears and throws off the numbering if there are multiple mods on a single amino acid.
         /// </summary>