From 67ef7a4a56e78e40a2ed62f866df2ad2598a73e6 Mon Sep 17 00:00:00 2001 From: Jeroen Van Goey Date: Mon, 22 Jun 2026 12:17:37 +0200 Subject: [PATCH 01/10] Add InstaNovo identification support --- .../experiment/identification/Advocate.java | 14 +- .../InstaNovoCsvIdfileReader.java | 596 ++++++++++++++++++ .../idfilereaders/InstaNovoIdfileReader.java | 33 + .../InstaNovoPlusIdfileReader.java | 33 + .../InstaNovoRefinedIdfileReader.java | 33 + .../identification/idfilereaders/package.html | 12 +- .../search/SearchParameters.java | 14 + .../tool_specific/InstaNovoParameters.java | 217 +++++++ .../InstaNovoPlusParameters.java | 28 + .../identification/tool_specific/package.html | 11 +- ....experiment.io.identification.IdfileReader | 7 +- .../TestInstaNovoIdfileReader.java | 447 +++++++++++++ 12 files changed, 1432 insertions(+), 13 deletions(-) create mode 100644 src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoCsvIdfileReader.java create mode 100644 src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoIdfileReader.java create mode 100644 src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoPlusIdfileReader.java create mode 100644 src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoRefinedIdfileReader.java create mode 100644 src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java create mode 100644 src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoPlusParameters.java create mode 100644 src/test/java/com/compomics/util/test/experiment/io/identifications/TestInstaNovoIdfileReader.java diff --git a/src/main/java/com/compomics/util/experiment/identification/Advocate.java b/src/main/java/com/compomics/util/experiment/identification/Advocate.java index 778afdedb1..da66bf7066 100644 --- a/src/main/java/com/compomics/util/experiment/identification/Advocate.java +++ b/src/main/java/com/compomics/util/experiment/identification/Advocate.java @@ -182,6 +182,14 @@ public enum AdvocateType { * The MSFragger search engine. */ public static final Advocate msFragger = new Advocate(37, "MSFragger", AdvocateType.search_engine, new java.awt.Color(128, 128, 0)); + /** + * The InstaNovo de novo sequencing algorithm. + */ + public static final Advocate instanovo = new Advocate(38, "InstaNovo", AdvocateType.sequencing_algorithm, new Color(95, 158, 160)); + /** + * The InstaNovo+ de novo sequencing algorithm. + */ + public static final Advocate instanovoPlus = new Advocate(39, "InstaNovo+", AdvocateType.sequencing_algorithm, new Color(123, 104, 238)); /** * Advocate type for mzId files where no software is annotated. */ @@ -311,7 +319,7 @@ public String toString() { * @return the implemented advocates in an array */ public static Advocate[] values() { - Advocate[] result = new Advocate[40 + userAdvocates.size()]; + Advocate[] result = new Advocate[42 + userAdvocates.size()]; int i = 0; result[i] = peptideShaker; result[++i] = onyaseEngine; @@ -353,6 +361,8 @@ public static Advocate[] values() { result[++i] = coss; result[++i] = sage; result[++i] = msFragger; + result[++i] = instanovo; + result[++i] = instanovoPlus; for (Advocate advocate : userAdvocates.values()) { result[++i] = advocate; @@ -489,6 +499,8 @@ public String getPmid() { return "37819886"; } else if (this == msFragger) { return "28394336"; + } else if (this == instanovo || this == instanovoPlus) { + return null; } else { return null; } diff --git a/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoCsvIdfileReader.java b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoCsvIdfileReader.java new file mode 100644 index 0000000000..b1b96cc7d3 --- /dev/null +++ b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoCsvIdfileReader.java @@ -0,0 +1,596 @@ +package com.compomics.util.experiment.io.identification.idfilereaders; + +import com.compomics.util.Util; +import com.compomics.util.experiment.biology.proteins.Peptide; +import com.compomics.util.experiment.identification.Advocate; +import com.compomics.util.experiment.identification.matches.ModificationMatch; +import com.compomics.util.experiment.identification.matches.SpectrumMatch; +import com.compomics.util.experiment.identification.spectrum_assumptions.PeptideAssumption; +import com.compomics.util.experiment.io.identification.IdfileReader; +import com.compomics.util.experiment.mass_spectrometry.SpectrumProvider; +import com.compomics.util.io.IoUtil; +import com.compomics.util.io.flat.SimpleFileReader; +import com.compomics.util.parameters.identification.advanced.SequenceMatchingParameters; +import com.compomics.util.parameters.identification.search.SearchParameters; +import com.compomics.util.waiting.WaitingHandler; +import java.io.File; +import java.io.IOException; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.HashMap; +import javax.xml.bind.JAXBException; + +/** + * Shared parser for InstaNovo v1.2.2 normalized CSV predictions. + * + * @author CompOmics + */ +abstract class InstaNovoCsvIdfileReader implements IdfileReader { + + /** + * The supported InstaNovo version. + */ + private static final String SOFTWARE_VERSION = "1.2.2"; + /** + * The CSV file. + */ + private final File csvFile; + /** + * The advocate used for peptide assumptions. + */ + private final Advocate advocate; + /** + * The extension this reader is registered for. + */ + private final String extension; + + /** + * Constructor. + * + * @param csvFile the CSV file + * @param advocate the advocate + * @param extension the registered extension + */ + protected InstaNovoCsvIdfileReader(File csvFile, Advocate advocate, String extension) { + this.csvFile = csvFile; + this.advocate = advocate; + this.extension = extension; + } + + @Override + public String getExtension() { + return extension; + } + + @Override + public ArrayList getAllSpectrumMatches( + SpectrumProvider spectrumProvider, + WaitingHandler waitingHandler, + SearchParameters searchParameters + ) + throws IOException, IllegalArgumentException, SQLException, ClassNotFoundException, InterruptedException, JAXBException { + + return getAllSpectrumMatches( + spectrumProvider, + waitingHandler, + searchParameters, + null, + true + ); + } + + @Override + public ArrayList getAllSpectrumMatches( + SpectrumProvider spectrumProvider, + WaitingHandler waitingHandler, + SearchParameters searchParameters, + SequenceMatchingParameters sequenceMatchingPreferences, + boolean expandAaCombinations + ) + throws IOException, IllegalArgumentException, SQLException, ClassNotFoundException, InterruptedException, JAXBException { + + if (spectrumProvider == null) { + throw new IllegalArgumentException("A spectrum provider is required to import InstaNovo results."); + } + + ArrayList result = new ArrayList<>(); + HashMap matches = new HashMap<>(); + + try (SimpleFileReader reader = SimpleFileReader.getFileReader(csvFile)) { + + String line = reader.readLine(); + + if (line == null) { + throw new IllegalArgumentException("The InstaNovo csv file is empty."); + } + + ArrayList headers = parseCsvLine(line); + HashMap columnIndexes = getColumnIndexes(headers); + + int experimentIndex = getOptionalColumn(columnIndexes, "experiment_name"); + int spectrumIdIndex = getOptionalColumn(columnIndexes, "spectrum_id", "spectrum"); + int scanNumberIndex = getOptionalColumn(columnIndexes, "scan_number", "scan"); + int chargeIndex = getRequiredColumn(columnIndexes, "precursor_charge", "charge", "z"); + int predictionIndex = getRequiredColumn(columnIndexes, "predictions", "prediction", "sequence"); + int scoreIndex = getRequiredColumn(columnIndexes, "log_probs", "prediction_log_probability", "predictions_log_probability"); + + if (experimentIndex < 0 && spectrumIdIndex < 0 && scanNumberIndex < 0) { + throw new IllegalArgumentException("Mandatory spectrum identification columns are missing in the InstaNovo csv file."); + } + + int lineNumber = 1; + while ((line = reader.readLine()) != null) { + + lineNumber++; + + if (line.trim().isEmpty()) { + continue; + } + + ArrayList values = parseCsvLine(line); + + String prediction = getValue(values, predictionIndex).trim(); + + if (prediction.isEmpty()) { + continue; + } + + String experimentName = experimentIndex >= 0 ? getValue(values, experimentIndex).trim() : ""; + String spectrumId = spectrumIdIndex >= 0 ? getValue(values, spectrumIdIndex).trim() : ""; + String scanNumber = scanNumberIndex >= 0 ? getValue(values, scanNumberIndex).trim() : ""; + String spectrumFileName = getSpectrumFileName(spectrumProvider, experimentName, spectrumId); + String spectrumTitle = getSpectrumTitle(spectrumProvider, spectrumFileName, spectrumId, scanNumber); + + int charge = Integer.parseInt(getValue(values, chargeIndex)); + double logProbability = Util.readDoubleAsString(getValue(values, scoreIndex)); + double score = -logProbability; + + ParsedPeptide parsedPeptide = parsePeptide(prediction, lineNumber); + Peptide peptide = new Peptide(parsedPeptide.sequence, parsedPeptide.modificationMatches); + PeptideAssumption peptideAssumption = new PeptideAssumption( + peptide, + 1, + advocate.getIndex(), + charge, + logProbability, + score, + IoUtil.getFileName(csvFile) + ); + + String matchKey = spectrumFileName + "\n" + spectrumTitle; + SpectrumMatch spectrumMatch = matches.get(matchKey); + + if (spectrumMatch == null) { + spectrumMatch = new SpectrumMatch(spectrumFileName, spectrumTitle); + matches.put(matchKey, spectrumMatch); + result.add(spectrumMatch); + } + + spectrumMatch.addPeptideAssumption(advocate.getIndex(), peptideAssumption); + } + } + + return result; + } + + @Override + public void close() throws IOException { + // Nothing to close. + } + + @Override + public HashMap> getSoftwareVersions() { + + HashMap> result = new HashMap<>(); + ArrayList versions = new ArrayList<>(); + versions.add(SOFTWARE_VERSION); + result.put(advocate.getName(), versions); + + if (advocate == Advocate.instanovoPlus && getExtension().contains("refined")) { + + ArrayList instaNovoVersions = new ArrayList<>(); + instaNovoVersions.add(SOFTWARE_VERSION); + result.put(Advocate.instanovo.getName(), instaNovoVersions); + } + + return result; + } + + @Override + public boolean hasDeNovoTags() { + return false; + } + + /** + * Returns the spectrum file name without extension. + * + * @param spectrumProvider the spectrum provider + * @param experimentName the experiment name + * @param spectrumId the spectrum id + * + * @return the spectrum file name without extension + */ + private String getSpectrumFileName(SpectrumProvider spectrumProvider, String experimentName, String spectrumId) { + + String fileName = experimentName; + + if (fileName == null || fileName.isEmpty()) { + int separatorIndex = spectrumId.indexOf(':'); + if (separatorIndex > 0) { + fileName = spectrumId.substring(0, separatorIndex); + } + } + + if (fileName == null || fileName.isEmpty()) { + + String[] fileNames = spectrumProvider.getOrderedFileNamesWithoutExtensions(); + + if (fileNames != null && fileNames.length == 1) { + fileName = fileNames[0]; + } + } + + if (fileName == null || fileName.isEmpty()) { + throw new IllegalArgumentException("Unable to infer the spectrum file name from the InstaNovo csv file."); + } + + return IoUtil.removeExtension(fileName); + } + + /** + * Resolves the spectrum title. + * + * @param spectrumProvider the spectrum provider + * @param spectrumFileName the spectrum file name without extension + * @param spectrumId the spectrum id + * @param scanNumber the scan number + * + * @return the spectrum title + */ + private String getSpectrumTitle(SpectrumProvider spectrumProvider, String spectrumFileName, String spectrumId, String scanNumber) { + + String[] titles = spectrumProvider.getSpectrumTitles(spectrumFileName); + + if (titles == null || titles.length == 0) { + throw new IllegalArgumentException("No spectra found for file '" + spectrumFileName + "'."); + } + + ArrayList candidates = new ArrayList<>(); + + if (spectrumId != null && !spectrumId.isEmpty()) { + candidates.add(spectrumId); + int separatorIndex = spectrumId.indexOf(':'); + if (separatorIndex >= 0 && separatorIndex < spectrumId.length() - 1) { + candidates.add(spectrumId.substring(separatorIndex + 1)); + } + } + + if (scanNumber != null && !scanNumber.isEmpty()) { + candidates.add(scanNumber); + } + + for (String candidate : candidates) { + for (String title : titles) { + if (title.equals(candidate) || title.equalsIgnoreCase(candidate)) { + return title; + } + } + } + + if (scanNumber != null && !scanNumber.isEmpty()) { + try { + int scanIndex = Integer.parseInt(scanNumber); + if (scanIndex >= 0 && scanIndex < titles.length) { + return titles[scanIndex]; + } + } catch (NumberFormatException e) { + // Ignore and report the missing title below. + } + } + + throw new IllegalArgumentException("Unable to match InstaNovo spectrum id '" + spectrumId + "' to a spectrum title in file '" + spectrumFileName + "'."); + } + + /** + * Parses a peptide sequence with optional UniMod annotations. + * + * @param prediction the prediction + * @param lineNumber the line number + * + * @return the parsed peptide + */ + private ParsedPeptide parsePeptide(String prediction, int lineNumber) { + + StringBuilder sequence = new StringBuilder(); + ArrayList modifications = new ArrayList<>(); + int lastResidueSite = 0; + + for (int i = 0; i < prediction.length(); i++) { + + char currentChar = prediction.charAt(i); + + if (currentChar == '[') { + + int endIndex = prediction.indexOf(']', i); + + if (endIndex < 0) { + throw new IllegalArgumentException("Invalid UniMod annotation in InstaNovo csv file at line " + lineNumber + "."); + } + + String annotation = prediction.substring(i + 1, endIndex); + Character previousResidue = lastResidueSite > 0 ? sequence.charAt(lastResidueSite - 1) : null; + Character nextResidue = previousResidue == null ? getNextResidue(prediction, endIndex + 1) : null; + UtilitiesModification modification = getUtilitiesModification(annotation, previousResidue, nextResidue, lastResidueSite); + + if (modification != null) { + modifications.add(new ModificationMatch(modification.name, modification.site)); + } + + i = endIndex; + + } else if (Character.isLetter(currentChar)) { + + sequence.append(Character.toUpperCase(currentChar)); + lastResidueSite = sequence.length(); + } + } + + if (sequence.length() == 0) { + throw new IllegalArgumentException("No peptide sequence found in InstaNovo csv file at line " + lineNumber + "."); + } + + return new ParsedPeptide(sequence.toString(), modifications.toArray(new ModificationMatch[modifications.size()])); + } + + /** + * Maps InstaNovo UniMod annotations to Utilities modification names. + * + * @param annotation the annotation + * @param previousResidue the preceding residue, null for N-terminal + * annotations + * @param nextResidue the next residue, null when unavailable + * @param site the preceding residue site + * + * @return the Utilities modification, or null if unsupported + */ + private UtilitiesModification getUtilitiesModification(String annotation, Character previousResidue, Character nextResidue, int site) { + + if (!annotation.toUpperCase().startsWith("UNIMOD:")) { + return null; + } + + String accession = annotation.substring("UNIMOD:".length()); + + if ("1".equals(accession) && previousResidue == null) { + return new UtilitiesModification("Acetylation of peptide N-term", 0); + } else if ("4".equals(accession) && previousResidue != null && previousResidue == 'C') { + return new UtilitiesModification("Carbamidomethylation of C", site); + } else if ("5".equals(accession) && previousResidue == null) { + return new UtilitiesModification("Carbamilation of protein N-term", 0); + } else if ("7".equals(accession) && previousResidue != null) { + if (previousResidue == 'N') { + return new UtilitiesModification("Deamidation of N", site); + } else if (previousResidue == 'Q') { + return new UtilitiesModification("Deamidation of Q", site); + } else if (previousResidue == 'R') { + return new UtilitiesModification("Citrullination of R", site); + } + } else if ("35".equals(accession) && previousResidue != null) { + if (previousResidue == 'M') { + return new UtilitiesModification("Oxidation of M", site); + } else if (previousResidue == 'P') { + return new UtilitiesModification("Oxidation of P", site); + } else if (previousResidue == 'K') { + return new UtilitiesModification("Oxidation of K", site); + } else if (previousResidue == 'C') { + return new UtilitiesModification("Oxidation of C", site); + } else if (previousResidue == 'N') { + return new UtilitiesModification("Oxidation of N", site); + } + } else if ("21".equals(accession) && previousResidue != null) { + if (previousResidue == 'S') { + return new UtilitiesModification("Phosphorylation of S", site); + } else if (previousResidue == 'T') { + return new UtilitiesModification("Phosphorylation of T", site); + } else if (previousResidue == 'Y') { + return new UtilitiesModification("Phosphorylation of Y", site); + } + } else if ("385".equals(accession)) { + if (previousResidue != null && previousResidue == 'N' && site > 0) { + return new UtilitiesModification("Ammonia loss from N", site); + } else if (previousResidue != null && previousResidue == 'C' && site == 1) { + return new UtilitiesModification("Pyrolidone from carbamidomethylated C", site); + } else if (previousResidue == null && nextResidue != null) { + if (nextResidue == 'N') { + return new UtilitiesModification("Ammonia loss from N", 1); + } else if (nextResidue == 'C') { + return new UtilitiesModification("Pyrolidone from carbamidomethylated C", 1); + } + } + } + + return null; + } + + /** + * Returns the next residue in the prediction. + * + * @param prediction the prediction + * @param startIndex the start index + * + * @return the next residue, or null + */ + private Character getNextResidue(String prediction, int startIndex) { + + for (int i = startIndex; i < prediction.length(); i++) { + + char currentChar = prediction.charAt(i); + + if (Character.isLetter(currentChar)) { + return Character.toUpperCase(currentChar); + } + } + + return null; + } + + /** + * Returns a value from a parsed CSV row. + * + * @param values the values + * @param index the index + * + * @return the value + */ + private String getValue(ArrayList values, int index) { + return index < values.size() ? values.get(index) : ""; + } + + /** + * Returns indexes by lowercase header. + * + * @param headers the headers + * + * @return the indexes + */ + private HashMap getColumnIndexes(ArrayList headers) { + + HashMap result = new HashMap<>(); + + for (int i = 0; i < headers.size(); i++) { + result.put(headers.get(i).trim().toLowerCase(), i); + } + + return result; + } + + /** + * Returns an optional column. + * + * @param columnIndexes the column indexes + * @param columnNames the column names + * + * @return the column index, or -1 + */ + private int getOptionalColumn(HashMap columnIndexes, String... columnNames) { + + for (String columnName : columnNames) { + + Integer columnIndex = columnIndexes.get(columnName.toLowerCase()); + + if (columnIndex != null) { + return columnIndex; + } + } + + return -1; + } + + /** + * Returns a required column. + * + * @param columnIndexes the column indexes + * @param columnNames the column names + * + * @return the column index + */ + private int getRequiredColumn(HashMap columnIndexes, String... columnNames) { + + int columnIndex = getOptionalColumn(columnIndexes, columnNames); + + if (columnIndex < 0) { + throw new IllegalArgumentException("Mandatory columns are missing in the InstaNovo csv file."); + } + + return columnIndex; + } + + /** + * Parses a CSV line. + * + * @param line the line + * + * @return the values + */ + private ArrayList parseCsvLine(String line) { + + ArrayList values = new ArrayList<>(); + StringBuilder currentValue = new StringBuilder(); + boolean inQuotes = false; + + for (int i = 0; i < line.length(); i++) { + + char currentChar = line.charAt(i); + + if (currentChar == '"') { + if (inQuotes && i + 1 < line.length() && line.charAt(i + 1) == '"') { + currentValue.append('"'); + i++; + } else { + inQuotes = !inQuotes; + } + } else if (currentChar == ',' && !inQuotes) { + values.add(currentValue.toString()); + currentValue.setLength(0); + } else { + currentValue.append(currentChar); + } + } + + values.add(currentValue.toString()); + + return values; + } + + /** + * Parsed peptide values. + */ + private static class ParsedPeptide { + + /** + * The bare sequence. + */ + private final String sequence; + /** + * The variable modifications. + */ + private final ModificationMatch[] modificationMatches; + + /** + * Constructor. + * + * @param sequence the sequence + * @param modificationMatches the modification matches + */ + private ParsedPeptide(String sequence, ModificationMatch[] modificationMatches) { + this.sequence = sequence; + this.modificationMatches = modificationMatches; + } + } + + /** + * Utilities modification mapping. + */ + private static class UtilitiesModification { + + /** + * The modification name. + */ + private final String name; + /** + * The modification site. + */ + private final int site; + + /** + * Constructor. + * + * @param name the modification name + * @param site the modification site + */ + private UtilitiesModification(String name, int site) { + this.name = name; + this.site = site; + } + } +} diff --git a/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoIdfileReader.java b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoIdfileReader.java new file mode 100644 index 0000000000..13f556e233 --- /dev/null +++ b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoIdfileReader.java @@ -0,0 +1,33 @@ +package com.compomics.util.experiment.io.identification.idfilereaders; + +import com.compomics.util.experiment.identification.Advocate; +import java.io.File; + +/** + * Reader for InstaNovo transformer-only CSV output. + * + * @author CompOmics + */ +public class InstaNovoIdfileReader extends InstaNovoCsvIdfileReader { + + /** + * The supported extension. + */ + public static final String EXTENSION = ".instanovo.csv"; + + /** + * Default constructor for service loading. + */ + public InstaNovoIdfileReader() { + this(null); + } + + /** + * Constructor. + * + * @param csvFile the CSV file + */ + public InstaNovoIdfileReader(File csvFile) { + super(csvFile, Advocate.instanovo, EXTENSION); + } +} diff --git a/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoPlusIdfileReader.java b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoPlusIdfileReader.java new file mode 100644 index 0000000000..cca7062c56 --- /dev/null +++ b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoPlusIdfileReader.java @@ -0,0 +1,33 @@ +package com.compomics.util.experiment.io.identification.idfilereaders; + +import com.compomics.util.experiment.identification.Advocate; +import java.io.File; + +/** + * Reader for standalone InstaNovo+ CSV output. + * + * @author CompOmics + */ +public class InstaNovoPlusIdfileReader extends InstaNovoCsvIdfileReader { + + /** + * The supported extension. + */ + public static final String EXTENSION = ".instanovoplus.csv"; + + /** + * Default constructor for service loading. + */ + public InstaNovoPlusIdfileReader() { + this(null); + } + + /** + * Constructor. + * + * @param csvFile the CSV file + */ + public InstaNovoPlusIdfileReader(File csvFile) { + super(csvFile, Advocate.instanovoPlus, EXTENSION); + } +} diff --git a/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoRefinedIdfileReader.java b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoRefinedIdfileReader.java new file mode 100644 index 0000000000..3c41332b81 --- /dev/null +++ b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoRefinedIdfileReader.java @@ -0,0 +1,33 @@ +package com.compomics.util.experiment.io.identification.idfilereaders; + +import com.compomics.util.experiment.identification.Advocate; +import java.io.File; + +/** + * Reader for InstaNovo predictions refined by InstaNovo+. + * + * @author CompOmics + */ +public class InstaNovoRefinedIdfileReader extends InstaNovoCsvIdfileReader { + + /** + * The supported extension. + */ + public static final String EXTENSION = ".instanovo.refined.csv"; + + /** + * Default constructor for service loading. + */ + public InstaNovoRefinedIdfileReader() { + this(null); + } + + /** + * Constructor. + * + * @param csvFile the CSV file + */ + public InstaNovoRefinedIdfileReader(File csvFile) { + super(csvFile, Advocate.instanovoPlus, EXTENSION); + } +} diff --git a/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/package.html b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/package.html index 37c3d14ace..c13031b805 100644 --- a/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/package.html +++ b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/package.html @@ -1,5 +1,7 @@ - - - Experiment classes related to reading search engine files. - - + + + Experiment classes related to reading search engine files, including + InstaNovo, InstaNovo+, and InstaNovo with InstaNovo+ refinement CSV + prediction files. + + diff --git a/src/main/java/com/compomics/util/parameters/identification/search/SearchParameters.java b/src/main/java/com/compomics/util/parameters/identification/search/SearchParameters.java index 3241b8073f..2064ccd550 100644 --- a/src/main/java/com/compomics/util/parameters/identification/search/SearchParameters.java +++ b/src/main/java/com/compomics/util/parameters/identification/search/SearchParameters.java @@ -26,6 +26,8 @@ import static com.compomics.util.parameters.identification.IdentificationParameters.CURRENT_VERSION; import com.compomics.util.parameters.identification.tool_specific.MetaMorpheusParameters; import com.compomics.util.parameters.identification.tool_specific.SageParameters; +import com.compomics.util.parameters.identification.tool_specific.InstaNovoParameters; +import com.compomics.util.parameters.identification.tool_specific.InstaNovoPlusParameters; import java.io.*; import java.util.ArrayList; import java.util.HashMap; @@ -303,6 +305,18 @@ public void setDefaultAdvancedSettings(SearchParameters searchParameters) { setIdentificationAlgorithmParameter(Advocate.novor.getIndex(), searchParameters.getIdentificationAlgorithmParameter(Advocate.novor.getIndex())); } + if (searchParameters == null || searchParameters.getIdentificationAlgorithmParameter(Advocate.instanovo.getIndex()) == null) { + setIdentificationAlgorithmParameter(Advocate.instanovo.getIndex(), new InstaNovoParameters()); + } else { + setIdentificationAlgorithmParameter(Advocate.instanovo.getIndex(), searchParameters.getIdentificationAlgorithmParameter(Advocate.instanovo.getIndex())); + } + + if (searchParameters == null || searchParameters.getIdentificationAlgorithmParameter(Advocate.instanovoPlus.getIndex()) == null) { + setIdentificationAlgorithmParameter(Advocate.instanovoPlus.getIndex(), new InstaNovoPlusParameters()); + } else { + setIdentificationAlgorithmParameter(Advocate.instanovoPlus.getIndex(), searchParameters.getIdentificationAlgorithmParameter(Advocate.instanovoPlus.getIndex())); + } + } /** diff --git a/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java b/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java new file mode 100644 index 0000000000..e17be34aae --- /dev/null +++ b/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java @@ -0,0 +1,217 @@ +package com.compomics.util.parameters.identification.tool_specific; + +import com.compomics.util.experiment.identification.Advocate; +import com.compomics.util.experiment.personalization.ExperimentObject; +import com.compomics.util.gui.parameters.identification.IdentificationAlgorithmParameter; + +/** + * InstaNovo specific parameters. + * + * @author CompOmics + */ +public class InstaNovoParameters extends ExperimentObject implements IdentificationAlgorithmParameter { + + /** + * Version number for deserialization. + */ + static final long serialVersionUID = -2295564912139753378L; + /** + * Default InstaNovo model identifier for v1.2.2. + */ + public static final String DEFAULT_INSTANOVO_MODEL = "instanovo-v1.2.0"; + /** + * Default InstaNovo+ model identifier for v1.2.2 refinement. + */ + public static final String DEFAULT_INSTANOVO_PLUS_MODEL = "instanovoplus-v1.1.0"; + /** + * The selected InstaNovo model id or path. + */ + private String instaNovoModel = DEFAULT_INSTANOVO_MODEL; + /** + * The selected InstaNovo+ model id or path used for refinement. + */ + private String instaNovoPlusModel = DEFAULT_INSTANOVO_PLUS_MODEL; + /** + * The optional inference configuration path. + */ + private String configFile = null; + /** + * The number of beams. + */ + private int numberOfBeams = 5; + /** + * The prediction batch size. A value below one lets InstaNovo use its + * configuration default. + */ + private int batchSize = -1; + /** + * Whether to force CPU execution. + */ + private boolean forceCpu = false; + + @Override + public Advocate getAlgorithm() { + return Advocate.instanovo; + } + + @Override + public boolean equals(IdentificationAlgorithmParameter identificationAlgorithmParameter) { + + if (identificationAlgorithmParameter instanceof InstaNovoParameters) { + + InstaNovoParameters other = (InstaNovoParameters) identificationAlgorithmParameter; + + return safeEquals(instaNovoModel, other.getInstaNovoModel()) + && safeEquals(instaNovoPlusModel, other.getInstaNovoPlusModel()) + && safeEquals(configFile, other.getConfigFile()) + && numberOfBeams == other.getNumberOfBeams() + && batchSize == other.getBatchSize() + && forceCpu == other.isForceCpu(); + } + + return false; + } + + @Override + public String toString(boolean html) { + + String newLine = html ? "
" : System.getProperty("line.separator"); + StringBuilder output = new StringBuilder(); + Advocate advocate = getAlgorithm(); + output.append("# ------------------------------------------------------------------"); + output.append(newLine); + output.append("# ").append(advocate.getName()).append(" Specific Parameters"); + output.append(newLine); + output.append("# ------------------------------------------------------------------"); + output.append(newLine); + output.append(newLine); + output.append("INSTANOVO_MODEL=").append(instaNovoModel).append(newLine); + output.append("INSTANOVO_PLUS_MODEL=").append(instaNovoPlusModel).append(newLine); + output.append("CONFIG_FILE=").append(configFile == null ? "" : configFile).append(newLine); + output.append("NUMBER_OF_BEAMS=").append(numberOfBeams).append(newLine); + output.append("BATCH_SIZE=").append(batchSize).append(newLine); + output.append("FORCE_CPU=").append(forceCpu).append(newLine); + + return output.toString(); + } + + /** + * Returns the selected InstaNovo model. + * + * @return the selected InstaNovo model + */ + public String getInstaNovoModel() { + return instaNovoModel; + } + + /** + * Sets the selected InstaNovo model. + * + * @param instaNovoModel the selected InstaNovo model + */ + public void setInstaNovoModel(String instaNovoModel) { + this.instaNovoModel = instaNovoModel; + } + + /** + * Returns the selected InstaNovo+ model. + * + * @return the selected InstaNovo+ model + */ + public String getInstaNovoPlusModel() { + return instaNovoPlusModel; + } + + /** + * Sets the selected InstaNovo+ model. + * + * @param instaNovoPlusModel the selected InstaNovo+ model + */ + public void setInstaNovoPlusModel(String instaNovoPlusModel) { + this.instaNovoPlusModel = instaNovoPlusModel; + } + + /** + * Returns the optional configuration file. + * + * @return the optional configuration file + */ + public String getConfigFile() { + return configFile; + } + + /** + * Sets the optional configuration file. + * + * @param configFile the optional configuration file + */ + public void setConfigFile(String configFile) { + this.configFile = configFile; + } + + /** + * Returns the number of beams. + * + * @return the number of beams + */ + public int getNumberOfBeams() { + return numberOfBeams; + } + + /** + * Sets the number of beams. + * + * @param numberOfBeams the number of beams + */ + public void setNumberOfBeams(int numberOfBeams) { + this.numberOfBeams = numberOfBeams; + } + + /** + * Returns the batch size. + * + * @return the batch size + */ + public int getBatchSize() { + return batchSize; + } + + /** + * Sets the batch size. + * + * @param batchSize the batch size + */ + public void setBatchSize(int batchSize) { + this.batchSize = batchSize; + } + + /** + * Returns whether CPU execution is forced. + * + * @return whether CPU execution is forced + */ + public boolean isForceCpu() { + return forceCpu; + } + + /** + * Sets whether CPU execution is forced. + * + * @param forceCpu whether CPU execution is forced + */ + public void setForceCpu(boolean forceCpu) { + this.forceCpu = forceCpu; + } + + /** + * Null-safe string comparison. + * + * @param a the first value + * @param b the second value + * + * @return true if the two values are equal + */ + protected boolean safeEquals(String a, String b) { + return a == null ? b == null : a.equals(b); + } +} diff --git a/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoPlusParameters.java b/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoPlusParameters.java new file mode 100644 index 0000000000..a80f637a2d --- /dev/null +++ b/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoPlusParameters.java @@ -0,0 +1,28 @@ +package com.compomics.util.parameters.identification.tool_specific; + +import com.compomics.util.experiment.identification.Advocate; +import com.compomics.util.gui.parameters.identification.IdentificationAlgorithmParameter; + +/** + * InstaNovo+ specific parameters. + * + * @author CompOmics + */ +public class InstaNovoPlusParameters extends InstaNovoParameters { + + /** + * Version number for deserialization. + */ + static final long serialVersionUID = -7586968643672811482L; + + @Override + public Advocate getAlgorithm() { + return Advocate.instanovoPlus; + } + + @Override + public boolean equals(IdentificationAlgorithmParameter identificationAlgorithmParameter) { + return identificationAlgorithmParameter instanceof InstaNovoPlusParameters + && super.equals(identificationAlgorithmParameter); + } +} diff --git a/src/main/java/com/compomics/util/parameters/identification/tool_specific/package.html b/src/main/java/com/compomics/util/parameters/identification/tool_specific/package.html index bcfa8b1b9e..d7f7901398 100644 --- a/src/main/java/com/compomics/util/parameters/identification/tool_specific/package.html +++ b/src/main/java/com/compomics/util/parameters/identification/tool_specific/package.html @@ -1,5 +1,6 @@ - - - Parameters settings for the search algorithms. - - + + + Parameter settings for the search and de novo sequencing algorithms, + including InstaNovo and InstaNovo+. + + diff --git a/src/main/resources/META-INF/services/com.compomics.util.experiment.io.identification.IdfileReader b/src/main/resources/META-INF/services/com.compomics.util.experiment.io.identification.IdfileReader index 3e823496b9..4bf7accc6b 100644 --- a/src/main/resources/META-INF/services/com.compomics.util.experiment.io.identification.IdfileReader +++ b/src/main/resources/META-INF/services/com.compomics.util.experiment.io.identification.IdfileReader @@ -10,5 +10,8 @@ com.compomics.util.experiment.io.identification.idfilereaders.TideIdfileReader com.compomics.util.experiment.io.identification.idfilereaders.NovorIdfileReader com.compomics.util.experiment.io.identification.idfilereaders.OnyaseIdfileReader com.compomics.util.experiment.io.identification.idfilereaders.XTandemIdfileReader -com.compomics.util.experiment.io.identification.idfilereaders.CossIdfileReader -com.compomics.util.experiment.io.identification.idfilereaders.SageIdfileReader \ No newline at end of file +com.compomics.util.experiment.io.identification.idfilereaders.CossIdfileReader +com.compomics.util.experiment.io.identification.idfilereaders.SageIdfileReader +com.compomics.util.experiment.io.identification.idfilereaders.InstaNovoIdfileReader +com.compomics.util.experiment.io.identification.idfilereaders.InstaNovoPlusIdfileReader +com.compomics.util.experiment.io.identification.idfilereaders.InstaNovoRefinedIdfileReader diff --git a/src/test/java/com/compomics/util/test/experiment/io/identifications/TestInstaNovoIdfileReader.java b/src/test/java/com/compomics/util/test/experiment/io/identifications/TestInstaNovoIdfileReader.java new file mode 100644 index 0000000000..8d4c9d0718 --- /dev/null +++ b/src/test/java/com/compomics/util/test/experiment/io/identifications/TestInstaNovoIdfileReader.java @@ -0,0 +1,447 @@ +package com.compomics.util.test.experiment.io.identifications; + +import com.compomics.util.experiment.identification.Advocate; +import com.compomics.util.experiment.identification.matches.ModificationMatch; +import com.compomics.util.experiment.identification.matches.SpectrumMatch; +import com.compomics.util.experiment.identification.spectrum_assumptions.PeptideAssumption; +import com.compomics.util.experiment.io.identification.IdfileReader; +import com.compomics.util.experiment.io.identification.idfilereaders.InstaNovoIdfileReader; +import com.compomics.util.experiment.io.identification.idfilereaders.InstaNovoPlusIdfileReader; +import com.compomics.util.experiment.io.identification.idfilereaders.InstaNovoRefinedIdfileReader; +import com.compomics.util.experiment.mass_spectrometry.SpectrumProvider; +import com.compomics.util.experiment.mass_spectrometry.spectra.Precursor; +import com.compomics.util.experiment.mass_spectrometry.spectra.Spectrum; +import com.compomics.util.parameters.identification.search.SearchParameters; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.TreeMap; +import junit.framework.TestCase; +import org.junit.Assert; + +/** + * Tests for InstaNovo v1.2.2 CSV readers. + * + * @author CompOmics + */ +public class TestInstaNovoIdfileReader extends TestCase { + + /** + * Derived from the first row of the InstaNovo v1.2.2 transformer normalized + * Zenodo sample file. + */ + private static final String INSTANOVO_V1_2_2 + = "experiment_name,scan_number,spectrum_id,precursor_mz,precursor_charge,prediction_id,predictions,log_probs,token_log_probs,group,predictions_tokenised,delta_mass_ppm\n" + + "SF_200217_U2OS_TiO2_HCD_OT_rep1,0,SF_200217_U2OS_TiO2_HCD_OT_rep1:0,419.314971923828,2,0,DM[UNIMOD:35]NS[UNIMOD:21]PK,-1147.98681640625,\"[-0.015801219269633293, -1.1395305395126343, -2.2013168334960938, -1.3749353885650635, -1.4705305099487305, -0.5675679445266724]\",no_group,\"D, M[UNIMOD:35], N, S[UNIMOD:21], P, K\",58846.475981092575\n"; + + /** + * Derived from the first row of the InstaNovo+ v1.2.2 no-refinement + * normalized Zenodo sample file. + */ + private static final String INSTANOVOPLUS_V1_2_2 + = "experiment_name,scan_number,spectrum_id,precursor_mz,precursor_charge,prediction_id,predictions,log_probs,token_log_probs,group,predictions_tokenised,delta_mass_ppm\n" + + "SF_200217_U2OS_TiO2_HCD_OT_rep1,0,SF_200217_U2OS_TiO2_HCD_OT_rep1:0,419.314971923828,2,0,MC[UNIMOD:4]IPDQPM[UNIMOD:35]EVDNEDDAPLPPPEAR,-3.6934256553649902,,no_group,\"M, C[UNIMOD:4], I, P, D, Q, P, M[UNIMOD:35], E, V, D, N, E, D, D, A, P, L, P, P, P, E, A, R\",2282970.310323359\n"; + + /** + * Derived from the first row of the InstaNovo v1.2.2 combined refined + * Zenodo sample file. + */ + private static final String INSTANOVO_COMBINED_V1_2_2 + = "experiment_name,scan_number,spectrum_id,precursor_mz,precursor_charge,prediction_id,predictions,log_probs,token_log_probs,group,instanovo_predictions,instanovo_prediction_log_probability,instanovo_prediction_token_log_probabilities,instanovo_predictions_beam_0,instanovo_predictions_log_probability_beam_0,instanovo_predictions_token_log_probabilities_beam_0,instanovo_predictions_beam_1,instanovo_predictions_log_probability_beam_1,instanovo_predictions_token_log_probabilities_beam_1,instanovo_predictions_beam_2,instanovo_predictions_log_probability_beam_2,instanovo_predictions_token_log_probabilities_beam_2,instanovo_predictions_beam_3,instanovo_predictions_log_probability_beam_3,instanovo_predictions_token_log_probabilities_beam_3,instanovo_predictions_beam_4,instanovo_predictions_log_probability_beam_4,instanovo_predictions_token_log_probabilities_beam_4,instanovoplus_predictions,instanovoplus_prediction_log_probability,instanovoplus_prediction_token_log_probabilities,instanovoplus_unrefined_predictions,predictions_tokenised,delta_mass_ppm\n" + + "SF_200217_U2OS_TiO2_HCD_OT_rep1,0,SF_200217_U2OS_TiO2_HCD_OT_rep1:0,419.314971923828,2,0,LIRPLLK,-0.6334811449050903,,no_group,\"['L', 'K', 'G', 'D', 'S[UNIMOD:21]', 'P', 'K']\",-10.102036476135254,\"[-1.716342806816101, -1.0499515533447266, -1.1343414783477783, -2.570066452026367, -1.3749353885650635, -1.4704134464263916, -0.5675679445266724]\",LKGDS[UNIMOD:21]PK,-10.102036476135254,\"[-1.716342806816101, -1.0499515533447266, -1.1343414783477783, -2.570066452026367, -1.3749353885650635, -1.4704134464263916, -0.5675679445266724]\",VKGDS[UNIMOD:21]PK,-11.082494735717773,\"[-2.8237648010253906, -1.0499515533447266, -1.1343414783477783, -2.570066452026367, -1.3749353885650635, -1.4704134464263916, -0.5675679445266724]\",SKGDS[UNIMOD:21]PK,-11.430251121520996,\"[-2.7461280822753906, -1.0499515533447266, -1.1343414783477783, -2.570066452026367, -1.3749353885650635, -1.4704134464263916, -0.5675679445266724]\",AKGDS[UNIMOD:21]PK,-11.492465019226074,\"[-3.1643409729003906, -1.0499515533447266, -1.1343414783477783, -2.570066452026367, -1.3749353885650635, -1.4704134464263916, -0.5675679445266724]\",PKGDS[UNIMOD:21]PK,-11.968438148498535,\"[-2.6694679260253906, -1.0499515533447266, -1.1343414783477783, -2.570066452026367, -1.3749353885650635, -1.4704134464263916, -0.5675679445266724]\",\"['L', 'I', 'R', 'P', 'L', 'L', 'K']\",-0.6334811449050903,,\"['L', 'K', 'G', 'D', 'S[UNIMOD:21]', 'P', 'K']\",\"L, I, R, P, L, L, K\",17862.82765389216\n"; + + /** + * Tests registration and parsing of the three supported InstaNovo modes. + * + * @throws Exception if an exception occurs + */ + public void testInstaNovoReaders() throws Exception { + + Assert.assertNotNull(Advocate.getAdvocate("InstaNovo")); + Assert.assertNotNull(Advocate.getAdvocate("InstaNovo+")); + SimpleSpectrumProvider spectrumProvider = new SimpleSpectrumProvider(); + SearchParameters searchParameters = new SearchParameters(); + + assertReader("test.instanovo.csv", Advocate.instanovo.getIndex(), spectrumProvider, searchParameters); + assertReader("test.instanovoplus.csv", Advocate.instanovoPlus.getIndex(), spectrumProvider, searchParameters); + assertReader("test.instanovo.refined.csv", Advocate.instanovoPlus.getIndex(), spectrumProvider, searchParameters); + } + + /** + * Tests service registration for the three InstaNovo readers. + * + * @throws Exception if an exception occurs + */ + public void testInstaNovoReaderServiceRegistration() throws Exception { + + InputStream serviceStream = getClass().getClassLoader().getResourceAsStream( + "META-INF/services/com.compomics.util.experiment.io.identification.IdfileReader" + ); + + Assert.assertNotNull(serviceStream); + + byte[] bytes = new byte[serviceStream.available()]; + serviceStream.read(bytes); + + String serviceFile = new String(bytes, StandardCharsets.UTF_8); + + Assert.assertTrue(serviceFile.contains(InstaNovoIdfileReader.class.getName())); + Assert.assertTrue(serviceFile.contains(InstaNovoPlusIdfileReader.class.getName())); + Assert.assertTrue(serviceFile.contains(InstaNovoRefinedIdfileReader.class.getName())); + } + + /** + * Tests invalid headers. + * + * @throws Exception if an exception occurs + */ + public void testMissingColumns() throws Exception { + + File csvFile = writeCsv("missing.instanovo.csv", "experiment_name,scan_number,predictions\nexample,0,PEPTIDE\n"); + IdfileReader idfileReader = new InstaNovoIdfileReader(csvFile); + + try { + idfileReader.getAllSpectrumMatches(new SimpleSpectrumProvider(), null, new SearchParameters()); + Assert.fail("Expected invalid InstaNovo CSV columns to fail."); + } catch (IllegalArgumentException e) { + Assert.assertTrue(e.getMessage().contains("Mandatory")); + } + } + + /** + * Tests parsing rows derived from the InstaNovo v1.2.2 sample files. + * + * @throws Exception if an exception occurs + */ + public void testInstaNovoVersion122SampleRows() throws Exception { + + assertSampleReader( + new InstaNovoIdfileReader(writeCsv("sample.instanovo.csv", INSTANOVO_V1_2_2)), + Advocate.instanovo.getIndex(), + "DMNSPK", + 2 + ); + + assertSampleReader( + new InstaNovoPlusIdfileReader(writeCsv("sample.instanovoplus.csv", INSTANOVOPLUS_V1_2_2)), + Advocate.instanovoPlus.getIndex(), + "MCIPDQPMEVDNEDDAPLPPPEAR", + 2 + ); + + assertSampleReader( + new InstaNovoRefinedIdfileReader(writeCsv("sample.instanovo.refined.csv", INSTANOVO_COMBINED_V1_2_2)), + Advocate.instanovoPlus.getIndex(), + "LIRPLLK", + 0 + ); + } + + /** + * Tests all UniMod annotations from the InstaNovo v1.2.2 default residue + * configuration. + * + * @throws Exception if an exception occurs + */ + public void testDefaultInstaNovoModifications() throws Exception { + + String header = "experiment_name,scan_number,spectrum_id,precursor_mz,precursor_charge,prediction_id,predictions,log_probs,token_log_probs,group,predictions_tokenised,delta_mass_ppm\n"; + File csvFile = writeCsv( + "default-modifications.instanovo.csv", + header + + "sample,0,sample:0,419.314971923828,2,0,M[UNIMOD:35]C[UNIMOD:4]N[UNIMOD:7]Q[UNIMOD:7]R[UNIMOD:7]P[UNIMOD:35]S[UNIMOD:21]T[UNIMOD:21]Y[UNIMOD:21],-1.0,,no_group,,0.0\n" + + "sample,1,sample:1,419.314971923828,2,0,[UNIMOD:1]ACD,-1.0,,no_group,,0.0\n" + + "sample,2,sample:2,419.314971923828,2,0,[UNIMOD:5]ACD,-1.0,,no_group,,0.0\n" + + "sample,3,sample:3,419.314971923828,2,0,[UNIMOD:385]CPEP,-1.0,,no_group,,0.0\n" + + "sample,4,sample:4,419.314971923828,2,0,[UNIMOD:385]NPEP,-1.0,,no_group,,0.0\n" + ); + + IdfileReader idfileReader = new InstaNovoIdfileReader(csvFile); + ArrayList spectrumMatches = idfileReader.getAllSpectrumMatches(new SimpleSpectrumProvider(), null, new SearchParameters()); + + Assert.assertEquals(5, spectrumMatches.size()); + + PeptideAssumption residueModifiedAssumption = getFirstAssumption(spectrumMatches, "0", Advocate.instanovo.getIndex()); + + Assert.assertEquals("MCNQRPSTY", residueModifiedAssumption.getPeptide().getSequence()); + assertModification(residueModifiedAssumption, "Oxidation of M", 1); + assertModification(residueModifiedAssumption, "Carbamidomethylation of C", 2); + assertModification(residueModifiedAssumption, "Deamidation of N", 3); + assertModification(residueModifiedAssumption, "Deamidation of Q", 4); + assertModification(residueModifiedAssumption, "Citrullination of R", 5); + assertModification(residueModifiedAssumption, "Oxidation of P", 6); + assertModification(residueModifiedAssumption, "Phosphorylation of S", 7); + assertModification(residueModifiedAssumption, "Phosphorylation of T", 8); + assertModification(residueModifiedAssumption, "Phosphorylation of Y", 9); + + assertModification(getFirstAssumption(spectrumMatches, "1", Advocate.instanovo.getIndex()), "Acetylation of peptide N-term", 0); + assertModification(getFirstAssumption(spectrumMatches, "2", Advocate.instanovo.getIndex()), "Carbamilation of protein N-term", 0); + assertModification(getFirstAssumption(spectrumMatches, "3", Advocate.instanovo.getIndex()), "Pyrolidone from carbamidomethylated C", 1); + assertModification(getFirstAssumption(spectrumMatches, "4", Advocate.instanovo.getIndex()), "Ammonia loss from N", 1); + } + + /** + * Asserts one reader. + * + * @param fileName the file name + * @param advocateIndex the expected advocate index + * @param spectrumProvider the spectrum provider + * @param searchParameters the search parameters + * + * @throws Exception if an exception occurs + */ + private void assertReader( + String fileName, + int advocateIndex, + SpectrumProvider spectrumProvider, + SearchParameters searchParameters + ) throws Exception { + + File csvFile = writeCsv( + fileName, + "experiment_name,scan_number,spectrum_id,precursor_mz,precursor_charge,prediction_id,predictions,log_probs,token_log_probs,group,predictions_tokenised,delta_mass_ppm\n" + + "example,0,example:0,419.314971923828,2,0,DM[UNIMOD:35]NS[UNIMOD:21]PK,-10.0,\"[-1.0]\",no_group,\"D, M[UNIMOD:35], N, S[UNIMOD:21], P, K\",0.0\n" + ); + + IdfileReader idfileReader; + if (fileName.endsWith(InstaNovoPlusIdfileReader.EXTENSION)) { + idfileReader = new InstaNovoPlusIdfileReader(csvFile); + } else if (fileName.endsWith(InstaNovoRefinedIdfileReader.EXTENSION)) { + idfileReader = new InstaNovoRefinedIdfileReader(csvFile); + } else { + idfileReader = new InstaNovoIdfileReader(csvFile); + } + + Assert.assertNotNull(idfileReader); + + ArrayList spectrumMatches = idfileReader.getAllSpectrumMatches(spectrumProvider, null, searchParameters); + Assert.assertEquals(1, spectrumMatches.size()); + SpectrumMatch spectrumMatch = spectrumMatches.get(0); + Assert.assertEquals("example", spectrumMatch.getSpectrumFile()); + Assert.assertEquals("0", spectrumMatch.getSpectrumTitle()); + + TreeMap> assumptions = spectrumMatch.getAllPeptideAssumptions(advocateIndex); + Assert.assertNotNull(assumptions); + PeptideAssumption peptideAssumption = assumptions.firstEntry().getValue().get(0); + Assert.assertEquals("DMNSPK", peptideAssumption.getPeptide().getSequence()); + Assert.assertEquals(2, peptideAssumption.getPeptide().getVariableModifications().length); + Assert.assertTrue(idfileReader.getSoftwareVersions().containsKey(Advocate.getAdvocate(advocateIndex).getName())); + } + + /** + * Asserts a reader using sample v1.2.2 CSV content. + * + * @param idfileReader the reader + * @param advocateIndex the expected advocate index + * @param expectedSequence the expected peptide sequence + * @param expectedVariableModifications the expected number of variable + * modifications + * + * @throws Exception if an exception occurs + */ + private void assertSampleReader( + IdfileReader idfileReader, + int advocateIndex, + String expectedSequence, + int expectedVariableModifications + ) throws Exception { + + ArrayList spectrumMatches = idfileReader.getAllSpectrumMatches(new SimpleSpectrumProvider(), null, new SearchParameters()); + + Assert.assertEquals(1, spectrumMatches.size()); + + SpectrumMatch spectrumMatch = spectrumMatches.get(0); + + Assert.assertEquals("SF_200217_U2OS_TiO2_HCD_OT_rep1", spectrumMatch.getSpectrumFile()); + Assert.assertEquals("0", spectrumMatch.getSpectrumTitle()); + + TreeMap> assumptions = spectrumMatch.getAllPeptideAssumptions(advocateIndex); + + Assert.assertNotNull(assumptions); + + PeptideAssumption peptideAssumption = assumptions.firstEntry().getValue().get(0); + + Assert.assertEquals(expectedSequence, peptideAssumption.getPeptide().getSequence()); + Assert.assertEquals(expectedVariableModifications, peptideAssumption.getPeptide().getVariableModifications().length); + Assert.assertTrue(idfileReader.getSoftwareVersions().containsKey(Advocate.getAdvocate(advocateIndex).getName())); + } + + /** + * Returns the first assumption for a spectrum title. + * + * @param spectrumMatches the spectrum matches + * @param spectrumTitle the spectrum title + * @param advocateIndex the advocate index + * + * @return the first peptide assumption + */ + private PeptideAssumption getFirstAssumption(ArrayList spectrumMatches, String spectrumTitle, int advocateIndex) { + + for (SpectrumMatch spectrumMatch : spectrumMatches) { + + if (spectrumMatch.getSpectrumTitle().equals(spectrumTitle)) { + + TreeMap> assumptions = spectrumMatch.getAllPeptideAssumptions(advocateIndex); + + Assert.assertNotNull(assumptions); + + return assumptions.firstEntry().getValue().get(0); + } + } + + Assert.fail("No spectrum match found for title " + spectrumTitle + "."); + + return null; + } + + /** + * Asserts a modification match. + * + * @param peptideAssumption the peptide assumption + * @param modification the modification name + * @param site the modification site + */ + private void assertModification(PeptideAssumption peptideAssumption, String modification, int site) { + + for (ModificationMatch modificationMatch : peptideAssumption.getPeptide().getVariableModifications()) { + + if (modificationMatch.getModification().equals(modification) && modificationMatch.getSite() == site) { + return; + } + } + + Assert.fail("Modification " + modification + " at site " + site + " not found."); + } + + /** + * Writes a temporary CSV file. + * + * @param fileName the file name + * @param content the content + * + * @return the CSV file + * + * @throws IOException if an IOException occurs + */ + private File writeCsv(String fileName, String content) throws IOException { + + File file = File.createTempFile(fileName, ""); + file.deleteOnExit(); + + try (FileWriter writer = new FileWriter(file)) { + writer.write(content); + } + + return file; + } + + /** + * Simple spectrum provider for tests. + */ + private static class SimpleSpectrumProvider implements SpectrumProvider { + + @Override + public Spectrum getSpectrum(String fileNameWithoutExtension, String spectrumTitle) { + return null; + } + + @Override + public Precursor getPrecursor(String fileNameWithoutExtension, String spectrumTitle) { + return null; + } + + @Override + public ArrayList getPostcursorSpectrumTitles(String fileNameWithoutExtension, String spectrumTitle) { + return null; + } + + @Override + public double getPrecursorMz(String fileNameWithoutExtension, String spectrumTitle) { + return 0; + } + + @Override + public double getPrecursorRt(String fileNameWithoutExtension, String spectrumTitle) { + return 0; + } + + @Override + public int getSpectrumLevel(String fileNameWithoutExtension, String spectrumTitle) { + return 2; + } + + @Override + public double[][] getPeaks(String fileNameWithoutExtension, String spectrumTitle) { + return null; + } + + @Override + public double getMinPrecMz(String fileNameWithoutExtension) { + return 0; + } + + @Override + public double getMaxPrecMz(String fileNameWithoutExtension) { + return 0; + } + + @Override + public double getMaxPrecInt(String fileNameWithoutExtension) { + return 0; + } + + @Override + public double getMaxPrecRT(String fileNameWithoutExtension) { + return 0; + } + + @Override + public double getMinPrecMz() { + return 0; + } + + @Override + public double getMaxPrecMz() { + return 0; + } + + @Override + public double getMaxPrecInt() { + return 0; + } + + @Override + public double getMaxPrecRT() { + return 0; + } + + @Override + public String[] getOrderedFileNamesWithoutExtensions() { + return new String[]{"example"}; + } + + @Override + public String[] getSpectrumTitles(String fileNameWithoutExtension) { + return new String[]{"0", "1", "2", "3", "4"}; + } + + @Override + public HashMap getFilePaths() { + return new HashMap<>(); + } + + @Override + public HashMap getCmsFilePaths() { + return new HashMap<>(); + } + + @Override + public void close() { + // Nothing to close. + } + } +} From 12f6d2fdb64964621adb83ba103f25f2bf5fed0a Mon Sep 17 00:00:00 2001 From: Jeroen Van Goey Date: Mon, 22 Jun 2026 13:56:30 +0200 Subject: [PATCH 02/10] Add InstaNovo advanced inference parameters --- .../tool_specific/InstaNovoParameters.java | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java b/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java index e17be34aae..5d7d2a67b1 100644 --- a/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java +++ b/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java @@ -44,6 +44,14 @@ public class InstaNovoParameters extends ExperimentObject implements Identificat * configuration default. */ private int batchSize = -1; + /** + * Whether to use knapsack beam search. + */ + private boolean useKnapsack = false; + /** + * Whether to save all beam search predictions. + */ + private boolean saveAllPredictions = true; /** * Whether to force CPU execution. */ @@ -66,6 +74,8 @@ && safeEquals(instaNovoPlusModel, other.getInstaNovoPlusModel()) && safeEquals(configFile, other.getConfigFile()) && numberOfBeams == other.getNumberOfBeams() && batchSize == other.getBatchSize() + && useKnapsack == other.isUseKnapsack() + && saveAllPredictions == other.isSaveAllPredictions() && forceCpu == other.isForceCpu(); } @@ -90,6 +100,8 @@ public String toString(boolean html) { output.append("CONFIG_FILE=").append(configFile == null ? "" : configFile).append(newLine); output.append("NUMBER_OF_BEAMS=").append(numberOfBeams).append(newLine); output.append("BATCH_SIZE=").append(batchSize).append(newLine); + output.append("USE_KNAPSACK=").append(useKnapsack).append(newLine); + output.append("SAVE_ALL_PREDICTIONS=").append(saveAllPredictions).append(newLine); output.append("FORCE_CPU=").append(forceCpu).append(newLine); return output.toString(); @@ -185,6 +197,42 @@ public void setBatchSize(int batchSize) { this.batchSize = batchSize; } + /** + * Returns whether knapsack beam search is used. + * + * @return whether knapsack beam search is used + */ + public boolean isUseKnapsack() { + return useKnapsack; + } + + /** + * Sets whether knapsack beam search is used. + * + * @param useKnapsack whether knapsack beam search is used + */ + public void setUseKnapsack(boolean useKnapsack) { + this.useKnapsack = useKnapsack; + } + + /** + * Returns whether all beam search predictions are saved. + * + * @return whether all beam search predictions are saved + */ + public boolean isSaveAllPredictions() { + return saveAllPredictions; + } + + /** + * Sets whether all beam search predictions are saved. + * + * @param saveAllPredictions whether all beam search predictions are saved + */ + public void setSaveAllPredictions(boolean saveAllPredictions) { + this.saveAllPredictions = saveAllPredictions; + } + /** * Returns whether CPU execution is forced. * From b4a030db96a60d4842cab2962621cca54cd5b460 Mon Sep 17 00:00:00 2001 From: Jeroen Van Goey Date: Mon, 22 Jun 2026 15:00:16 +0200 Subject: [PATCH 03/10] Use desktop InstaNovo batch size by default --- .../tool_specific/InstaNovoParameters.java | 6 ++++- .../TestInstaNovoParameters.java | 24 +++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 src/test/java/com/compomics/util/test/parameters/identification/tool_specific/TestInstaNovoParameters.java diff --git a/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java b/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java index 5d7d2a67b1..5a13f83da5 100644 --- a/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java +++ b/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java @@ -23,6 +23,10 @@ public class InstaNovoParameters extends ExperimentObject implements Identificat * Default InstaNovo+ model identifier for v1.2.2 refinement. */ public static final String DEFAULT_INSTANOVO_PLUS_MODEL = "instanovoplus-v1.1.0"; + /** + * Default prediction batch size for desktop SearchGUI runs. + */ + public static final int DEFAULT_BATCH_SIZE = 16; /** * The selected InstaNovo model id or path. */ @@ -43,7 +47,7 @@ public class InstaNovoParameters extends ExperimentObject implements Identificat * The prediction batch size. A value below one lets InstaNovo use its * configuration default. */ - private int batchSize = -1; + private int batchSize = DEFAULT_BATCH_SIZE; /** * Whether to use knapsack beam search. */ diff --git a/src/test/java/com/compomics/util/test/parameters/identification/tool_specific/TestInstaNovoParameters.java b/src/test/java/com/compomics/util/test/parameters/identification/tool_specific/TestInstaNovoParameters.java new file mode 100644 index 0000000000..9a57b623c4 --- /dev/null +++ b/src/test/java/com/compomics/util/test/parameters/identification/tool_specific/TestInstaNovoParameters.java @@ -0,0 +1,24 @@ +package com.compomics.util.test.parameters.identification.tool_specific; + +import com.compomics.util.parameters.identification.tool_specific.InstaNovoParameters; +import junit.framework.TestCase; +import org.junit.Assert; + +/** + * Tests for InstaNovo specific parameters. + * + * @author CompOmics + */ +public class TestInstaNovoParameters extends TestCase { + + /** + * Tests the desktop-oriented default batch size. + */ + public void testDefaultBatchSize() { + + InstaNovoParameters parameters = new InstaNovoParameters(); + + Assert.assertEquals(InstaNovoParameters.DEFAULT_BATCH_SIZE, parameters.getBatchSize()); + Assert.assertTrue(parameters.toString(false).contains("BATCH_SIZE=" + InstaNovoParameters.DEFAULT_BATCH_SIZE)); + } +} From 0ee155cd01785e09e95da9d1bd1e144a7fb74376 Mon Sep 17 00:00:00 2001 From: Jeroen Van Goey Date: Mon, 22 Jun 2026 15:05:34 +0200 Subject: [PATCH 04/10] Normalize legacy InstaNovo batch sizes --- .../tool_specific/InstaNovoParameters.java | 11 +++++------ .../tool_specific/TestInstaNovoParameters.java | 4 ++++ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java b/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java index 5a13f83da5..368a4368a5 100644 --- a/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java +++ b/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java @@ -44,8 +44,7 @@ public class InstaNovoParameters extends ExperimentObject implements Identificat */ private int numberOfBeams = 5; /** - * The prediction batch size. A value below one lets InstaNovo use its - * configuration default. + * The prediction batch size. */ private int batchSize = DEFAULT_BATCH_SIZE; /** @@ -77,7 +76,7 @@ public boolean equals(IdentificationAlgorithmParameter identificationAlgorithmPa && safeEquals(instaNovoPlusModel, other.getInstaNovoPlusModel()) && safeEquals(configFile, other.getConfigFile()) && numberOfBeams == other.getNumberOfBeams() - && batchSize == other.getBatchSize() + && getBatchSize() == other.getBatchSize() && useKnapsack == other.isUseKnapsack() && saveAllPredictions == other.isSaveAllPredictions() && forceCpu == other.isForceCpu(); @@ -103,7 +102,7 @@ public String toString(boolean html) { output.append("INSTANOVO_PLUS_MODEL=").append(instaNovoPlusModel).append(newLine); output.append("CONFIG_FILE=").append(configFile == null ? "" : configFile).append(newLine); output.append("NUMBER_OF_BEAMS=").append(numberOfBeams).append(newLine); - output.append("BATCH_SIZE=").append(batchSize).append(newLine); + output.append("BATCH_SIZE=").append(getBatchSize()).append(newLine); output.append("USE_KNAPSACK=").append(useKnapsack).append(newLine); output.append("SAVE_ALL_PREDICTIONS=").append(saveAllPredictions).append(newLine); output.append("FORCE_CPU=").append(forceCpu).append(newLine); @@ -189,7 +188,7 @@ public void setNumberOfBeams(int numberOfBeams) { * @return the batch size */ public int getBatchSize() { - return batchSize; + return batchSize > 0 ? batchSize : DEFAULT_BATCH_SIZE; } /** @@ -198,7 +197,7 @@ public int getBatchSize() { * @param batchSize the batch size */ public void setBatchSize(int batchSize) { - this.batchSize = batchSize; + this.batchSize = batchSize > 0 ? batchSize : DEFAULT_BATCH_SIZE; } /** diff --git a/src/test/java/com/compomics/util/test/parameters/identification/tool_specific/TestInstaNovoParameters.java b/src/test/java/com/compomics/util/test/parameters/identification/tool_specific/TestInstaNovoParameters.java index 9a57b623c4..f3619643e7 100644 --- a/src/test/java/com/compomics/util/test/parameters/identification/tool_specific/TestInstaNovoParameters.java +++ b/src/test/java/com/compomics/util/test/parameters/identification/tool_specific/TestInstaNovoParameters.java @@ -20,5 +20,9 @@ public void testDefaultBatchSize() { Assert.assertEquals(InstaNovoParameters.DEFAULT_BATCH_SIZE, parameters.getBatchSize()); Assert.assertTrue(parameters.toString(false).contains("BATCH_SIZE=" + InstaNovoParameters.DEFAULT_BATCH_SIZE)); + + parameters.setBatchSize(-1); + + Assert.assertEquals(InstaNovoParameters.DEFAULT_BATCH_SIZE, parameters.getBatchSize()); } } From 619fa87753b2b813fdaf794b0af07f206e3dc7ea Mon Sep 17 00:00:00 2001 From: Jeroen Van Goey Date: Mon, 22 Jun 2026 16:07:58 +0200 Subject: [PATCH 05/10] Harden InstaNovo refined CSV import --- .../experiment/identification/Advocate.java | 9 +- .../InstaNovoCsvIdfileReader.java | 254 ++++++++++++++++-- .../InstaNovoRefinedIdfileReader.java | 2 +- .../tool_specific/InstaNovoParameters.java | 4 +- .../TestInstaNovoIdfileReader.java | 92 ++++++- 5 files changed, 325 insertions(+), 36 deletions(-) diff --git a/src/main/java/com/compomics/util/experiment/identification/Advocate.java b/src/main/java/com/compomics/util/experiment/identification/Advocate.java index da66bf7066..7c306105f0 100644 --- a/src/main/java/com/compomics/util/experiment/identification/Advocate.java +++ b/src/main/java/com/compomics/util/experiment/identification/Advocate.java @@ -190,6 +190,10 @@ public enum AdvocateType { * The InstaNovo+ de novo sequencing algorithm. */ public static final Advocate instanovoPlus = new Advocate(39, "InstaNovo+", AdvocateType.sequencing_algorithm, new Color(123, 104, 238)); + /** + * The InstaNovo predictions refined with InstaNovo+ de novo sequencing algorithm. + */ + public static final Advocate instanovoRefined = new Advocate(40, "InstaNovo with refinement", AdvocateType.sequencing_algorithm, new Color(72, 209, 204)); /** * Advocate type for mzId files where no software is annotated. */ @@ -319,7 +323,7 @@ public String toString() { * @return the implemented advocates in an array */ public static Advocate[] values() { - Advocate[] result = new Advocate[42 + userAdvocates.size()]; + Advocate[] result = new Advocate[43 + userAdvocates.size()]; int i = 0; result[i] = peptideShaker; result[++i] = onyaseEngine; @@ -363,6 +367,7 @@ public static Advocate[] values() { result[++i] = msFragger; result[++i] = instanovo; result[++i] = instanovoPlus; + result[++i] = instanovoRefined; for (Advocate advocate : userAdvocates.values()) { result[++i] = advocate; @@ -499,7 +504,7 @@ public String getPmid() { return "37819886"; } else if (this == msFragger) { return "28394336"; - } else if (this == instanovo || this == instanovoPlus) { + } else if (this == instanovo || this == instanovoPlus || this == instanovoRefined) { return null; } else { return null; diff --git a/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoCsvIdfileReader.java b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoCsvIdfileReader.java index b1b96cc7d3..56a614b6c7 100644 --- a/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoCsvIdfileReader.java +++ b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoCsvIdfileReader.java @@ -18,6 +18,8 @@ import java.sql.SQLException; import java.util.ArrayList; import java.util.HashMap; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import javax.xml.bind.JAXBException; /** @@ -31,6 +33,10 @@ abstract class InstaNovoCsvIdfileReader implements IdfileReader { * The supported InstaNovo version. */ private static final String SOFTWARE_VERSION = "1.2.2"; + /** + * Pattern matching common scan or index tokens in spectrum titles. + */ + private static final Pattern TITLE_NUMBER_PATTERN = Pattern.compile("(?i)(?:scan|index|scan_number)\\s*[=: ]\\s*(\\d+)"); /** * The CSV file. */ @@ -95,6 +101,7 @@ public ArrayList getAllSpectrumMatches( ArrayList result = new ArrayList<>(); HashMap matches = new HashMap<>(); + HashMap spectrumTitleLookups = new HashMap<>(); try (SimpleFileReader reader = SimpleFileReader.getFileReader(csvFile)) { @@ -138,10 +145,22 @@ public ArrayList getAllSpectrumMatches( String experimentName = experimentIndex >= 0 ? getValue(values, experimentIndex).trim() : ""; String spectrumId = spectrumIdIndex >= 0 ? getValue(values, spectrumIdIndex).trim() : ""; String scanNumber = scanNumberIndex >= 0 ? getValue(values, scanNumberIndex).trim() : ""; + Integer charge = getCharge(getValue(values, chargeIndex), lineNumber, waitingHandler); + + if (charge == null) { + continue; + } + String spectrumFileName = getSpectrumFileName(spectrumProvider, experimentName, spectrumId); - String spectrumTitle = getSpectrumTitle(spectrumProvider, spectrumFileName, spectrumId, scanNumber); + SpectrumTitleLookup spectrumTitleLookup = spectrumTitleLookups.get(spectrumFileName); + + if (spectrumTitleLookup == null) { + spectrumTitleLookup = new SpectrumTitleLookup(spectrumProvider, spectrumFileName); + spectrumTitleLookups.put(spectrumFileName, spectrumTitleLookup); + } + + String spectrumTitle = getSpectrumTitle(spectrumTitleLookup, spectrumFileName, spectrumId, scanNumber); - int charge = Integer.parseInt(getValue(values, chargeIndex)); double logProbability = Util.readDoubleAsString(getValue(values, scoreIndex)); double score = -logProbability; @@ -186,11 +205,15 @@ public HashMap> getSoftwareVersions() { versions.add(SOFTWARE_VERSION); result.put(advocate.getName(), versions); - if (advocate == Advocate.instanovoPlus && getExtension().contains("refined")) { + if (advocate == Advocate.instanovoRefined) { ArrayList instaNovoVersions = new ArrayList<>(); instaNovoVersions.add(SOFTWARE_VERSION); result.put(Advocate.instanovo.getName(), instaNovoVersions); + + ArrayList instaNovoPlusVersions = new ArrayList<>(); + instaNovoPlusVersions.add(SOFTWARE_VERSION); + result.put(Advocate.instanovoPlus.getName(), instaNovoPlusVersions); } return result; @@ -247,50 +270,74 @@ private String getSpectrumFileName(SpectrumProvider spectrumProvider, String exp * * @return the spectrum title */ - private String getSpectrumTitle(SpectrumProvider spectrumProvider, String spectrumFileName, String spectrumId, String scanNumber) { + private String getSpectrumTitle(SpectrumTitleLookup spectrumTitleLookup, String spectrumFileName, String spectrumId, String scanNumber) { - String[] titles = spectrumProvider.getSpectrumTitles(spectrumFileName); + String title = spectrumTitleLookup.getTitle(spectrumId); - if (titles == null || titles.length == 0) { - throw new IllegalArgumentException("No spectra found for file '" + spectrumFileName + "'."); + if (title != null) { + return title; } - ArrayList candidates = new ArrayList<>(); - - if (spectrumId != null && !spectrumId.isEmpty()) { - candidates.add(spectrumId); + if (spectrumId != null) { int separatorIndex = spectrumId.indexOf(':'); + if (separatorIndex >= 0 && separatorIndex < spectrumId.length() - 1) { - candidates.add(spectrumId.substring(separatorIndex + 1)); - } - } - if (scanNumber != null && !scanNumber.isEmpty()) { - candidates.add(scanNumber); - } + title = spectrumTitleLookup.getTitle(spectrumId.substring(separatorIndex + 1)); - for (String candidate : candidates) { - for (String title : titles) { - if (title.equals(candidate) || title.equalsIgnoreCase(candidate)) { + if (title != null) { return title; } } } if (scanNumber != null && !scanNumber.isEmpty()) { - try { - int scanIndex = Integer.parseInt(scanNumber); - if (scanIndex >= 0 && scanIndex < titles.length) { - return titles[scanIndex]; - } - } catch (NumberFormatException e) { - // Ignore and report the missing title below. + + title = spectrumTitleLookup.getTitle(scanNumber); + + if (title != null) { + return title; + } + + title = spectrumTitleLookup.getTitleForNumber(scanNumber); + + if (title != null) { + return title; } } throw new IllegalArgumentException("Unable to match InstaNovo spectrum id '" + spectrumId + "' to a spectrum title in file '" + spectrumFileName + "'."); } + /** + * Returns the precursor charge. + * + * @param value the charge column value + * @param lineNumber the line number + * @param waitingHandler the waiting handler + * + * @return the charge, or null if the row should be skipped + */ + private Integer getCharge(String value, int lineNumber, WaitingHandler waitingHandler) { + + String charge = value == null ? "" : value.trim(); + + try { + return Integer.parseInt(charge); + } catch (NumberFormatException e) { + + if (waitingHandler != null) { + waitingHandler.appendReport( + "Skipping InstaNovo csv line " + lineNumber + ": invalid precursor charge '" + charge + "'.", + true, + true + ); + } + + return null; + } + } + /** * Parses a peptide sequence with optional UniMod annotations. * @@ -542,6 +589,159 @@ private ArrayList parseCsvLine(String line) { return values; } + /** + * Spectrum title lookup cache for one spectrum file. + */ + private static class SpectrumTitleLookup { + + /** + * Titles indexed by exact and lower-case title. + */ + private final HashMap titles = new HashMap<>(); + /** + * Titles indexed by scan or index number tokens parsed from the title. + */ + private final HashMap titleByNumber = new HashMap<>(); + + /** + * Constructor. + * + * @param spectrumProvider the spectrum provider + * @param spectrumFileName the spectrum file name without extension + */ + private SpectrumTitleLookup(SpectrumProvider spectrumProvider, String spectrumFileName) { + + String[] spectrumTitles = spectrumProvider.getSpectrumTitles(spectrumFileName); + + if (spectrumTitles == null || spectrumTitles.length == 0) { + throw new IllegalArgumentException("No spectra found for file '" + spectrumFileName + "'."); + } + + for (String title : spectrumTitles) { + addTitle(title); + } + } + + /** + * Adds a title. + * + * @param title the title + */ + private void addTitle(String title) { + + if (title == null) { + return; + } + + titles.put(title, title); + titles.put(title.toLowerCase(), title); + + Matcher matcher = TITLE_NUMBER_PATTERN.matcher(title); + + while (matcher.find()) { + addNumber(matcher.group(1), title); + } + } + + /** + * Adds a scan or index number. + * + * @param number the number + * @param title the spectrum title + */ + private void addNumber(String number, String title) { + + String normalizedNumber = normalizeNumber(number); + + if (normalizedNumber == null) { + return; + } + + if (titleByNumber.containsKey(normalizedNumber) + && !title.equals(titleByNumber.get(normalizedNumber))) { + titleByNumber.put(normalizedNumber, null); + } else { + titleByNumber.put(normalizedNumber, title); + } + } + + /** + * Returns a title matching the given title candidate. + * + * @param candidate the candidate + * + * @return the title, or null if not found + */ + private String getTitle(String candidate) { + + if (candidate == null) { + return null; + } + + String trimmedCandidate = candidate.trim(); + + if (trimmedCandidate.isEmpty()) { + return null; + } + + String result = titles.get(trimmedCandidate); + + if (result != null) { + return result; + } + + return titles.get(trimmedCandidate.toLowerCase()); + } + + /** + * Returns a title matching the given scan or index number. + * + * @param candidate the candidate + * + * @return the title, or null if not found + */ + private String getTitleForNumber(String candidate) { + + String normalizedNumber = normalizeNumber(candidate); + + return normalizedNumber == null ? null : titleByNumber.get(normalizedNumber); + } + + /** + * Normalizes a positive integer string. + * + * @param number the number + * + * @return the normalized number + */ + private String normalizeNumber(String number) { + + if (number == null) { + return null; + } + + String trimmedNumber = number.trim(); + + if (trimmedNumber.isEmpty()) { + return null; + } + + for (int i = 0; i < trimmedNumber.length(); i++) { + if (!Character.isDigit(trimmedNumber.charAt(i))) { + return null; + } + } + + int startIndex = 0; + + while (startIndex < trimmedNumber.length() - 1 && trimmedNumber.charAt(startIndex) == '0') { + startIndex++; + } + + return trimmedNumber.substring(startIndex); + } + } + /** * Parsed peptide values. */ diff --git a/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoRefinedIdfileReader.java b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoRefinedIdfileReader.java index 3c41332b81..d9776bbd24 100644 --- a/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoRefinedIdfileReader.java +++ b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoRefinedIdfileReader.java @@ -28,6 +28,6 @@ public InstaNovoRefinedIdfileReader() { * @param csvFile the CSV file */ public InstaNovoRefinedIdfileReader(File csvFile) { - super(csvFile, Advocate.instanovoPlus, EXTENSION); + super(csvFile, Advocate.instanovoRefined, EXTENSION); } } diff --git a/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java b/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java index 368a4368a5..f341271ccb 100644 --- a/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java +++ b/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java @@ -16,11 +16,11 @@ public class InstaNovoParameters extends ExperimentObject implements Identificat */ static final long serialVersionUID = -2295564912139753378L; /** - * Default InstaNovo model identifier for v1.2.2. + * Default InstaNovo transformer model identifier used by InstaNovo v1.2.2. */ public static final String DEFAULT_INSTANOVO_MODEL = "instanovo-v1.2.0"; /** - * Default InstaNovo+ model identifier for v1.2.2 refinement. + * Default InstaNovo+ diffusion model identifier used by InstaNovo v1.2.2. */ public static final String DEFAULT_INSTANOVO_PLUS_MODEL = "instanovoplus-v1.1.0"; /** diff --git a/src/test/java/com/compomics/util/test/experiment/io/identifications/TestInstaNovoIdfileReader.java b/src/test/java/com/compomics/util/test/experiment/io/identifications/TestInstaNovoIdfileReader.java index 8d4c9d0718..b14f489d8e 100644 --- a/src/test/java/com/compomics/util/test/experiment/io/identifications/TestInstaNovoIdfileReader.java +++ b/src/test/java/com/compomics/util/test/experiment/io/identifications/TestInstaNovoIdfileReader.java @@ -63,12 +63,13 @@ public void testInstaNovoReaders() throws Exception { Assert.assertNotNull(Advocate.getAdvocate("InstaNovo")); Assert.assertNotNull(Advocate.getAdvocate("InstaNovo+")); + Assert.assertNotNull(Advocate.getAdvocate("InstaNovo with refinement")); SimpleSpectrumProvider spectrumProvider = new SimpleSpectrumProvider(); SearchParameters searchParameters = new SearchParameters(); assertReader("test.instanovo.csv", Advocate.instanovo.getIndex(), spectrumProvider, searchParameters); assertReader("test.instanovoplus.csv", Advocate.instanovoPlus.getIndex(), spectrumProvider, searchParameters); - assertReader("test.instanovo.refined.csv", Advocate.instanovoPlus.getIndex(), spectrumProvider, searchParameters); + assertReader("test.instanovo.refined.csv", Advocate.instanovoRefined.getIndex(), spectrumProvider, searchParameters); } /** @@ -135,12 +136,58 @@ public void testInstaNovoVersion122SampleRows() throws Exception { assertSampleReader( new InstaNovoRefinedIdfileReader(writeCsv("sample.instanovo.refined.csv", INSTANOVO_COMBINED_V1_2_2)), - Advocate.instanovoPlus.getIndex(), + Advocate.instanovoRefined.getIndex(), "LIRPLLK", 0 ); } + /** + * Tests matching realistic spectrum titles by scan tokens without positional + * scan-number fallback. + * + * @throws Exception if an exception occurs + */ + public void testSpectrumTitleLookupWithRealisticTitles() throws Exception { + + File csvFile = writeCsv( + "realistic-titles.instanovo.csv", + "experiment_name,scan_number,spectrum_id,precursor_mz,precursor_charge,prediction_id,predictions,log_probs\n" + + "example,1,example:1,419.314971923828,2,0,PEPTIDE,-1.0\n" + ); + + IdfileReader idfileReader = new InstaNovoIdfileReader(csvFile); + SimpleSpectrumProvider spectrumProvider = new SimpleSpectrumProvider( + new String[]{"example"}, + new String[]{"controllerType=0 controllerNumber=1 scan=1", "controllerType=0 controllerNumber=1 scan=2"} + ); + ArrayList spectrumMatches = idfileReader.getAllSpectrumMatches(spectrumProvider, null, new SearchParameters()); + + Assert.assertEquals(1, spectrumMatches.size()); + Assert.assertEquals("controllerType=0 controllerNumber=1 scan=1", spectrumMatches.get(0).getSpectrumTitle()); + } + + /** + * Tests charge parsing robustness. + * + * @throws Exception if an exception occurs + */ + public void testChargeParsingSkipsInvalidRows() throws Exception { + + File csvFile = writeCsv( + "charges.instanovo.csv", + "experiment_name,scan_number,spectrum_id,precursor_mz,precursor_charge,prediction_id,predictions,log_probs\n" + + "example,0,example:0,419.314971923828,not-a-charge,0,PEPTIDE,-1.0\n" + + "example,1,example:1,419.314971923828, 2 ,0,PEPTIDE,-1.0\n" + ); + + IdfileReader idfileReader = new InstaNovoIdfileReader(csvFile); + ArrayList spectrumMatches = idfileReader.getAllSpectrumMatches(new SimpleSpectrumProvider(), null, new SearchParameters()); + + Assert.assertEquals(1, spectrumMatches.size()); + Assert.assertEquals("1", spectrumMatches.get(0).getSpectrumTitle()); + } + /** * Tests all UniMod annotations from the InstaNovo v1.2.2 default residue * configuration. @@ -230,6 +277,11 @@ private void assertReader( Assert.assertEquals("DMNSPK", peptideAssumption.getPeptide().getSequence()); Assert.assertEquals(2, peptideAssumption.getPeptide().getVariableModifications().length); Assert.assertTrue(idfileReader.getSoftwareVersions().containsKey(Advocate.getAdvocate(advocateIndex).getName())); + + if (advocateIndex == Advocate.instanovoRefined.getIndex()) { + Assert.assertTrue(idfileReader.getSoftwareVersions().containsKey(Advocate.instanovo.getName())); + Assert.assertTrue(idfileReader.getSoftwareVersions().containsKey(Advocate.instanovoPlus.getName())); + } } /** @@ -268,6 +320,11 @@ private void assertSampleReader( Assert.assertEquals(expectedSequence, peptideAssumption.getPeptide().getSequence()); Assert.assertEquals(expectedVariableModifications, peptideAssumption.getPeptide().getVariableModifications().length); Assert.assertTrue(idfileReader.getSoftwareVersions().containsKey(Advocate.getAdvocate(advocateIndex).getName())); + + if (advocateIndex == Advocate.instanovoRefined.getIndex()) { + Assert.assertTrue(idfileReader.getSoftwareVersions().containsKey(Advocate.instanovo.getName())); + Assert.assertTrue(idfileReader.getSoftwareVersions().containsKey(Advocate.instanovoPlus.getName())); + } } /** @@ -344,6 +401,33 @@ private File writeCsv(String fileName, String content) throws IOException { */ private static class SimpleSpectrumProvider implements SpectrumProvider { + /** + * File names without extensions. + */ + private final String[] fileNames; + /** + * Spectrum titles. + */ + private final String[] titles; + + /** + * Default constructor. + */ + private SimpleSpectrumProvider() { + this(new String[]{"example"}, new String[]{"0", "1", "2", "3", "4"}); + } + + /** + * Constructor. + * + * @param fileNames the file names + * @param titles the spectrum titles + */ + private SimpleSpectrumProvider(String[] fileNames, String[] titles) { + this.fileNames = fileNames; + this.titles = titles; + } + @Override public Spectrum getSpectrum(String fileNameWithoutExtension, String spectrumTitle) { return null; @@ -421,12 +505,12 @@ public double getMaxPrecRT() { @Override public String[] getOrderedFileNamesWithoutExtensions() { - return new String[]{"example"}; + return fileNames; } @Override public String[] getSpectrumTitles(String fileNameWithoutExtension) { - return new String[]{"0", "1", "2", "3", "4"}; + return titles; } @Override From 5dc6e7f98b2a714f307c527461c7d5d3041392c9 Mon Sep 17 00:00:00 2001 From: Jeroen Van Goey Date: Mon, 22 Jun 2026 22:36:01 +0200 Subject: [PATCH 06/10] Suppress benign Nimbus look and feel exception On recent JDKs the Nimbus look and feel can throw a benign ClassCastException (ColorUIResource cannot be cast to Boolean in NimbusStyle.isOpaque) while building chart popup menus. The exception is still logged but no longer shown to the user, as it does not affect functionality. --- .../util/exceptions/ExceptionHandler.java | 43 ++++++++++++++++++- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/compomics/util/exceptions/ExceptionHandler.java b/src/main/java/com/compomics/util/exceptions/ExceptionHandler.java index 783c21e9c7..de6f589cf7 100644 --- a/src/main/java/com/compomics/util/exceptions/ExceptionHandler.java +++ b/src/main/java/com/compomics/util/exceptions/ExceptionHandler.java @@ -32,14 +32,53 @@ public ExceptionHandler() { public synchronized void catchException(Exception e) { if (!ignoreExceptions && !exceptionCaught.contains(getExceptionType(e))) { - + e.printStackTrace(); exceptionCaught.add(getExceptionType(e)); + + // @TODO: remove once the underlying Nimbus look and feel bug is fixed. + // On recent JDKs the Nimbus look and feel can throw a benign + // ClassCastException ("ColorUIResource cannot be cast to Boolean" in + // NimbusStyle.isOpaque) while building chart popup menus. It does not + // affect functionality, so it is logged above but not shown to the user. + if (isBenignLookAndFeelException(e)) { + return; + } + notifyUser(e); - + } } + /** + * Indicates whether the given exception is the known benign look and feel + * ClassCastException thrown while rendering (e.g. "ColorUIResource cannot be + * cast to Boolean" in NimbusStyle). Such exceptions do not affect + * functionality and should not be reported to the user. + * + * @param e the exception to inspect + * + * @return true if the exception is a benign look and feel rendering exception + */ + private static boolean isBenignLookAndFeelException(Exception e) { + + if (!(e instanceof ClassCastException)) { + return false; + } + + for (StackTraceElement element : e.getStackTrace()) { + + String className = element.getClassName(); + + if (className.startsWith("javax.swing.plaf.nimbus.") + || className.startsWith("javax.swing.plaf.synth.")) { + return true; + } + } + + return false; + } + /** * Notifies the user that an exception was caught. * From a8c9cc9d5e7437e2763127793204880a9fb136bb Mon Sep 17 00:00:00 2001 From: Jeroen Van Goey Date: Mon, 22 Jun 2026 22:36:01 +0200 Subject: [PATCH 07/10] Use .mzML extension for ThermoRawFileParser mzML output ThermoRawFileParser writes mzML files with the canonical .mzML extension. Declaring the format ending as .mzML lets consumers find the converted file on case-sensitive file systems. --- .../ThermoRawFileParserOutputFormat.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/compomics/util/experiment/mass_spectrometry/thermo_raw_file_parser/ThermoRawFileParserOutputFormat.java b/src/main/java/com/compomics/util/experiment/mass_spectrometry/thermo_raw_file_parser/ThermoRawFileParserOutputFormat.java index 50d954e99b..d3fe971d89 100644 --- a/src/main/java/com/compomics/util/experiment/mass_spectrometry/thermo_raw_file_parser/ThermoRawFileParserOutputFormat.java +++ b/src/main/java/com/compomics/util/experiment/mass_spectrometry/thermo_raw_file_parser/ThermoRawFileParserOutputFormat.java @@ -14,11 +14,11 @@ public enum ThermoRawFileParserOutputFormat { /** * mzML generic PSI format. */ - mzML(1, "mzML", "mzML generic PSI format", ".mzml"), + mzML(1, "mzML", "mzML generic PSI format", ".mzML"), /** * Indexed mzML generic PSI format. */ - mzMLIndexed(2, "mzML (indexed)", "mzML generic PSI format", ".mzml"); + mzMLIndexed(2, "mzML (indexed)", "mzML generic PSI format", ".mzML"); /** * The index of the format. From 6c63405d0d9753e36d0732fe7b1627850d3a328a Mon Sep 17 00:00:00 2001 From: Jeroen Van Goey Date: Mon, 22 Jun 2026 22:36:01 +0200 Subject: [PATCH 08/10] Align log4j-api version with log4j-core log4j-core was bumped to 2.25.4 while log4j-api stayed at 2.23.1, causing a NoSuchFieldError at runtime. Bump log4j-api to 2.25.4 to match. --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 290e12a5cd..7ad512b49a 100644 --- a/pom.xml +++ b/pom.xml @@ -444,7 +444,7 @@ org.apache.logging.log4j log4j-api - 2.23.1 + 2.25.4 From a98a27739427c022eab9ca0193ed08672f363c9f Mon Sep 17 00:00:00 2001 From: Jeroen Van Goey Date: Tue, 23 Jun 2026 02:01:11 +0200 Subject: [PATCH 09/10] Resolve positional InstaNovo spectrum ids --- .../InstaNovoCsvIdfileReader.java | 45 +++++++++++++++++++ .../TestInstaNovoIdfileReader.java | 25 +++++++++++ 2 files changed, 70 insertions(+) diff --git a/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoCsvIdfileReader.java b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoCsvIdfileReader.java index 56a614b6c7..881491a4dc 100644 --- a/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoCsvIdfileReader.java +++ b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoCsvIdfileReader.java @@ -306,6 +306,19 @@ private String getSpectrumTitle(SpectrumTitleLookup spectrumTitleLookup, String } } + if (spectrumId != null) { + int separatorIndex = spectrumId.indexOf(':'); + + if (separatorIndex >= 0 && separatorIndex < spectrumId.length() - 1) { + + title = spectrumTitleLookup.getTitleAtIndex(spectrumId.substring(separatorIndex + 1)); + + if (title != null) { + return title; + } + } + } + throw new IllegalArgumentException("Unable to match InstaNovo spectrum id '" + spectrumId + "' to a spectrum title in file '" + spectrumFileName + "'."); } @@ -602,6 +615,10 @@ private static class SpectrumTitleLookup { * Titles indexed by scan or index number tokens parsed from the title. */ private final HashMap titleByNumber = new HashMap<>(); + /** + * Titles in spectrum file order. + */ + private final String[] orderedTitles; /** * Constructor. @@ -617,6 +634,8 @@ private SpectrumTitleLookup(SpectrumProvider spectrumProvider, String spectrumFi throw new IllegalArgumentException("No spectra found for file '" + spectrumFileName + "'."); } + orderedTitles = spectrumTitles; + for (String title : spectrumTitles) { addTitle(title); } @@ -707,6 +726,32 @@ private String getTitleForNumber(String candidate) { return normalizedNumber == null ? null : titleByNumber.get(normalizedNumber); } + /** + * Returns a title by zero-based spectrum position. + * + * @param candidate the candidate index + * + * @return the title, or null if not found + */ + private String getTitleAtIndex(String candidate) { + + String normalizedNumber = normalizeNumber(candidate); + + if (normalizedNumber == null) { + return null; + } + + int index; + + try { + index = Integer.parseInt(normalizedNumber); + } catch (NumberFormatException e) { + return null; + } + + return index >= 0 && index < orderedTitles.length ? orderedTitles[index] : null; + } + /** * Normalizes a positive integer string. * diff --git a/src/test/java/com/compomics/util/test/experiment/io/identifications/TestInstaNovoIdfileReader.java b/src/test/java/com/compomics/util/test/experiment/io/identifications/TestInstaNovoIdfileReader.java index b14f489d8e..24f3df07fd 100644 --- a/src/test/java/com/compomics/util/test/experiment/io/identifications/TestInstaNovoIdfileReader.java +++ b/src/test/java/com/compomics/util/test/experiment/io/identifications/TestInstaNovoIdfileReader.java @@ -167,6 +167,31 @@ public void testSpectrumTitleLookupWithRealisticTitles() throws Exception { Assert.assertEquals("controllerType=0 controllerNumber=1 scan=1", spectrumMatches.get(0).getSpectrumTitle()); } + /** + * Tests matching InstaNovo positional spectrum ids to descriptive MGF + * titles. + * + * @throws Exception if an exception occurs + */ + public void testSpectrumTitleLookupWithPositionalSpectrumId() throws Exception { + + File csvFile = writeCsv( + "positional-titles.instanovo.csv", + "experiment_name,scan_number,spectrum_id,precursor_mz,precursor_charge,prediction_id,predictions,log_probs\n" + + "example,0,example:0,419.314971923828,2,0,PEPTIDE,-1.0\n" + ); + + IdfileReader idfileReader = new InstaNovoIdfileReader(csvFile); + SimpleSpectrumProvider spectrumProvider = new SimpleSpectrumProvider( + new String[]{"example"}, + new String[]{"Cmpd 3543, +MSn(450.6095), 22.5 min", "Cmpd 3544, +MSn(697.8400), 22.5 min"} + ); + ArrayList spectrumMatches = idfileReader.getAllSpectrumMatches(spectrumProvider, null, new SearchParameters()); + + Assert.assertEquals(1, spectrumMatches.size()); + Assert.assertEquals("Cmpd 3543, +MSn(450.6095), 22.5 min", spectrumMatches.get(0).getSpectrumTitle()); + } + /** * Tests charge parsing robustness. * From 315d0a63ef5d081882c5cac362efd79843bdbb17 Mon Sep 17 00:00:00 2001 From: Jeroen Van Goey Date: Tue, 23 Jun 2026 02:07:59 +0200 Subject: [PATCH 10/10] Estimate InstaNovo peptide masses --- .../idfilereaders/InstaNovoCsvIdfileReader.java | 5 +++++ .../io/identifications/TestInstaNovoIdfileReader.java | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoCsvIdfileReader.java b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoCsvIdfileReader.java index 881491a4dc..2d754bb779 100644 --- a/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoCsvIdfileReader.java +++ b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoCsvIdfileReader.java @@ -166,6 +166,11 @@ public ArrayList getAllSpectrumMatches( ParsedPeptide parsedPeptide = parsePeptide(prediction, lineNumber); Peptide peptide = new Peptide(parsedPeptide.sequence, parsedPeptide.modificationMatches); + peptide.estimateTheoreticMass( + searchParameters.getModificationParameters(), + null, + SequenceMatchingParameters.DEFAULT_STRING_MATCHING + ); PeptideAssumption peptideAssumption = new PeptideAssumption( peptide, 1, diff --git a/src/test/java/com/compomics/util/test/experiment/io/identifications/TestInstaNovoIdfileReader.java b/src/test/java/com/compomics/util/test/experiment/io/identifications/TestInstaNovoIdfileReader.java index 24f3df07fd..7c684aa5e9 100644 --- a/src/test/java/com/compomics/util/test/experiment/io/identifications/TestInstaNovoIdfileReader.java +++ b/src/test/java/com/compomics/util/test/experiment/io/identifications/TestInstaNovoIdfileReader.java @@ -301,6 +301,8 @@ private void assertReader( PeptideAssumption peptideAssumption = assumptions.firstEntry().getValue().get(0); Assert.assertEquals("DMNSPK", peptideAssumption.getPeptide().getSequence()); Assert.assertEquals(2, peptideAssumption.getPeptide().getVariableModifications().length); + Assert.assertTrue(peptideAssumption.getPeptide().getMass() > 0.0); + Assert.assertTrue(peptideAssumption.getTheoreticMz() > 0.0); Assert.assertTrue(idfileReader.getSoftwareVersions().containsKey(Advocate.getAdvocate(advocateIndex).getName())); if (advocateIndex == Advocate.instanovoRefined.getIndex()) { @@ -344,6 +346,8 @@ private void assertSampleReader( Assert.assertEquals(expectedSequence, peptideAssumption.getPeptide().getSequence()); Assert.assertEquals(expectedVariableModifications, peptideAssumption.getPeptide().getVariableModifications().length); + Assert.assertTrue(peptideAssumption.getPeptide().getMass() > 0.0); + Assert.assertTrue(peptideAssumption.getTheoreticMz() > 0.0); Assert.assertTrue(idfileReader.getSoftwareVersions().containsKey(Advocate.getAdvocate(advocateIndex).getName())); if (advocateIndex == Advocate.instanovoRefined.getIndex()) {