From 67ef7a4a56e78e40a2ed62f866df2ad2598a73e6 Mon Sep 17 00:00:00 2001 From: Jeroen Van Goey Date: Mon, 22 Jun 2026 12:17:37 +0200 Subject: [PATCH 01/12] Add InstaNovo identification support --- .../experiment/identification/Advocate.java | 14 +- .../InstaNovoCsvIdfileReader.java | 596 ++++++++++++++++++ .../idfilereaders/InstaNovoIdfileReader.java | 33 + .../InstaNovoPlusIdfileReader.java | 33 + .../InstaNovoRefinedIdfileReader.java | 33 + .../identification/idfilereaders/package.html | 12 +- .../search/SearchParameters.java | 14 + .../tool_specific/InstaNovoParameters.java | 217 +++++++ .../InstaNovoPlusParameters.java | 28 + .../identification/tool_specific/package.html | 11 +- ....experiment.io.identification.IdfileReader | 7 +- .../TestInstaNovoIdfileReader.java | 447 +++++++++++++ 12 files changed, 1432 insertions(+), 13 deletions(-) create mode 100644 src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoCsvIdfileReader.java create mode 100644 src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoIdfileReader.java create mode 100644 src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoPlusIdfileReader.java create mode 100644 src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoRefinedIdfileReader.java create mode 100644 src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java create mode 100644 src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoPlusParameters.java create mode 100644 src/test/java/com/compomics/util/test/experiment/io/identifications/TestInstaNovoIdfileReader.java diff --git a/src/main/java/com/compomics/util/experiment/identification/Advocate.java b/src/main/java/com/compomics/util/experiment/identification/Advocate.java index 778afdedb1..da66bf7066 100644 --- a/src/main/java/com/compomics/util/experiment/identification/Advocate.java +++ b/src/main/java/com/compomics/util/experiment/identification/Advocate.java @@ -182,6 +182,14 @@ public enum AdvocateType { * The MSFragger search engine. */ public static final Advocate msFragger = new Advocate(37, "MSFragger", AdvocateType.search_engine, new java.awt.Color(128, 128, 0)); + /** + * The InstaNovo de novo sequencing algorithm. + */ + public static final Advocate instanovo = new Advocate(38, "InstaNovo", AdvocateType.sequencing_algorithm, new Color(95, 158, 160)); + /** + * The InstaNovo+ de novo sequencing algorithm. + */ + public static final Advocate instanovoPlus = new Advocate(39, "InstaNovo+", AdvocateType.sequencing_algorithm, new Color(123, 104, 238)); /** * Advocate type for mzId files where no software is annotated. */ @@ -311,7 +319,7 @@ public String toString() { * @return the implemented advocates in an array */ public static Advocate[] values() { - Advocate[] result = new Advocate[40 + userAdvocates.size()]; + Advocate[] result = new Advocate[42 + userAdvocates.size()]; int i = 0; result[i] = peptideShaker; result[++i] = onyaseEngine; @@ -353,6 +361,8 @@ public static Advocate[] values() { result[++i] = coss; result[++i] = sage; result[++i] = msFragger; + result[++i] = instanovo; + result[++i] = instanovoPlus; for (Advocate advocate : userAdvocates.values()) { result[++i] = advocate; @@ -489,6 +499,8 @@ public String getPmid() { return "37819886"; } else if (this == msFragger) { return "28394336"; + } else if (this == instanovo || this == instanovoPlus) { + return null; } else { return null; } diff --git a/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoCsvIdfileReader.java b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoCsvIdfileReader.java new file mode 100644 index 0000000000..b1b96cc7d3 --- /dev/null +++ b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoCsvIdfileReader.java @@ -0,0 +1,596 @@ +package com.compomics.util.experiment.io.identification.idfilereaders; + +import com.compomics.util.Util; +import com.compomics.util.experiment.biology.proteins.Peptide; +import com.compomics.util.experiment.identification.Advocate; +import com.compomics.util.experiment.identification.matches.ModificationMatch; +import com.compomics.util.experiment.identification.matches.SpectrumMatch; +import com.compomics.util.experiment.identification.spectrum_assumptions.PeptideAssumption; +import com.compomics.util.experiment.io.identification.IdfileReader; +import com.compomics.util.experiment.mass_spectrometry.SpectrumProvider; +import com.compomics.util.io.IoUtil; +import com.compomics.util.io.flat.SimpleFileReader; +import com.compomics.util.parameters.identification.advanced.SequenceMatchingParameters; +import com.compomics.util.parameters.identification.search.SearchParameters; +import com.compomics.util.waiting.WaitingHandler; +import java.io.File; +import java.io.IOException; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.HashMap; +import javax.xml.bind.JAXBException; + +/** + * Shared parser for InstaNovo v1.2.2 normalized CSV predictions. + * + * @author CompOmics + */ +abstract class InstaNovoCsvIdfileReader implements IdfileReader { + + /** + * The supported InstaNovo version. + */ + private static final String SOFTWARE_VERSION = "1.2.2"; + /** + * The CSV file. + */ + private final File csvFile; + /** + * The advocate used for peptide assumptions. + */ + private final Advocate advocate; + /** + * The extension this reader is registered for. + */ + private final String extension; + + /** + * Constructor. + * + * @param csvFile the CSV file + * @param advocate the advocate + * @param extension the registered extension + */ + protected InstaNovoCsvIdfileReader(File csvFile, Advocate advocate, String extension) { + this.csvFile = csvFile; + this.advocate = advocate; + this.extension = extension; + } + + @Override + public String getExtension() { + return extension; + } + + @Override + public ArrayList getAllSpectrumMatches( + SpectrumProvider spectrumProvider, + WaitingHandler waitingHandler, + SearchParameters searchParameters + ) + throws IOException, IllegalArgumentException, SQLException, ClassNotFoundException, InterruptedException, JAXBException { + + return getAllSpectrumMatches( + spectrumProvider, + waitingHandler, + searchParameters, + null, + true + ); + } + + @Override + public ArrayList getAllSpectrumMatches( + SpectrumProvider spectrumProvider, + WaitingHandler waitingHandler, + SearchParameters searchParameters, + SequenceMatchingParameters sequenceMatchingPreferences, + boolean expandAaCombinations + ) + throws IOException, IllegalArgumentException, SQLException, ClassNotFoundException, InterruptedException, JAXBException { + + if (spectrumProvider == null) { + throw new IllegalArgumentException("A spectrum provider is required to import InstaNovo results."); + } + + ArrayList result = new ArrayList<>(); + HashMap matches = new HashMap<>(); + + try (SimpleFileReader reader = SimpleFileReader.getFileReader(csvFile)) { + + String line = reader.readLine(); + + if (line == null) { + throw new IllegalArgumentException("The InstaNovo csv file is empty."); + } + + ArrayList headers = parseCsvLine(line); + HashMap columnIndexes = getColumnIndexes(headers); + + int experimentIndex = getOptionalColumn(columnIndexes, "experiment_name"); + int spectrumIdIndex = getOptionalColumn(columnIndexes, "spectrum_id", "spectrum"); + int scanNumberIndex = getOptionalColumn(columnIndexes, "scan_number", "scan"); + int chargeIndex = getRequiredColumn(columnIndexes, "precursor_charge", "charge", "z"); + int predictionIndex = getRequiredColumn(columnIndexes, "predictions", "prediction", "sequence"); + int scoreIndex = getRequiredColumn(columnIndexes, "log_probs", "prediction_log_probability", "predictions_log_probability"); + + if (experimentIndex < 0 && spectrumIdIndex < 0 && scanNumberIndex < 0) { + throw new IllegalArgumentException("Mandatory spectrum identification columns are missing in the InstaNovo csv file."); + } + + int lineNumber = 1; + while ((line = reader.readLine()) != null) { + + lineNumber++; + + if (line.trim().isEmpty()) { + continue; + } + + ArrayList values = parseCsvLine(line); + + String prediction = getValue(values, predictionIndex).trim(); + + if (prediction.isEmpty()) { + continue; + } + + String experimentName = experimentIndex >= 0 ? getValue(values, experimentIndex).trim() : ""; + String spectrumId = spectrumIdIndex >= 0 ? getValue(values, spectrumIdIndex).trim() : ""; + String scanNumber = scanNumberIndex >= 0 ? getValue(values, scanNumberIndex).trim() : ""; + String spectrumFileName = getSpectrumFileName(spectrumProvider, experimentName, spectrumId); + String spectrumTitle = getSpectrumTitle(spectrumProvider, spectrumFileName, spectrumId, scanNumber); + + int charge = Integer.parseInt(getValue(values, chargeIndex)); + double logProbability = Util.readDoubleAsString(getValue(values, scoreIndex)); + double score = -logProbability; + + ParsedPeptide parsedPeptide = parsePeptide(prediction, lineNumber); + Peptide peptide = new Peptide(parsedPeptide.sequence, parsedPeptide.modificationMatches); + PeptideAssumption peptideAssumption = new PeptideAssumption( + peptide, + 1, + advocate.getIndex(), + charge, + logProbability, + score, + IoUtil.getFileName(csvFile) + ); + + String matchKey = spectrumFileName + "\n" + spectrumTitle; + SpectrumMatch spectrumMatch = matches.get(matchKey); + + if (spectrumMatch == null) { + spectrumMatch = new SpectrumMatch(spectrumFileName, spectrumTitle); + matches.put(matchKey, spectrumMatch); + result.add(spectrumMatch); + } + + spectrumMatch.addPeptideAssumption(advocate.getIndex(), peptideAssumption); + } + } + + return result; + } + + @Override + public void close() throws IOException { + // Nothing to close. + } + + @Override + public HashMap> getSoftwareVersions() { + + HashMap> result = new HashMap<>(); + ArrayList versions = new ArrayList<>(); + versions.add(SOFTWARE_VERSION); + result.put(advocate.getName(), versions); + + if (advocate == Advocate.instanovoPlus && getExtension().contains("refined")) { + + ArrayList instaNovoVersions = new ArrayList<>(); + instaNovoVersions.add(SOFTWARE_VERSION); + result.put(Advocate.instanovo.getName(), instaNovoVersions); + } + + return result; + } + + @Override + public boolean hasDeNovoTags() { + return false; + } + + /** + * Returns the spectrum file name without extension. + * + * @param spectrumProvider the spectrum provider + * @param experimentName the experiment name + * @param spectrumId the spectrum id + * + * @return the spectrum file name without extension + */ + private String getSpectrumFileName(SpectrumProvider spectrumProvider, String experimentName, String spectrumId) { + + String fileName = experimentName; + + if (fileName == null || fileName.isEmpty()) { + int separatorIndex = spectrumId.indexOf(':'); + if (separatorIndex > 0) { + fileName = spectrumId.substring(0, separatorIndex); + } + } + + if (fileName == null || fileName.isEmpty()) { + + String[] fileNames = spectrumProvider.getOrderedFileNamesWithoutExtensions(); + + if (fileNames != null && fileNames.length == 1) { + fileName = fileNames[0]; + } + } + + if (fileName == null || fileName.isEmpty()) { + throw new IllegalArgumentException("Unable to infer the spectrum file name from the InstaNovo csv file."); + } + + return IoUtil.removeExtension(fileName); + } + + /** + * Resolves the spectrum title. + * + * @param spectrumProvider the spectrum provider + * @param spectrumFileName the spectrum file name without extension + * @param spectrumId the spectrum id + * @param scanNumber the scan number + * + * @return the spectrum title + */ + private String getSpectrumTitle(SpectrumProvider spectrumProvider, String spectrumFileName, String spectrumId, String scanNumber) { + + String[] titles = spectrumProvider.getSpectrumTitles(spectrumFileName); + + if (titles == null || titles.length == 0) { + throw new IllegalArgumentException("No spectra found for file '" + spectrumFileName + "'."); + } + + ArrayList candidates = new ArrayList<>(); + + if (spectrumId != null && !spectrumId.isEmpty()) { + candidates.add(spectrumId); + int separatorIndex = spectrumId.indexOf(':'); + if (separatorIndex >= 0 && separatorIndex < spectrumId.length() - 1) { + candidates.add(spectrumId.substring(separatorIndex + 1)); + } + } + + if (scanNumber != null && !scanNumber.isEmpty()) { + candidates.add(scanNumber); + } + + for (String candidate : candidates) { + for (String title : titles) { + if (title.equals(candidate) || title.equalsIgnoreCase(candidate)) { + return title; + } + } + } + + if (scanNumber != null && !scanNumber.isEmpty()) { + try { + int scanIndex = Integer.parseInt(scanNumber); + if (scanIndex >= 0 && scanIndex < titles.length) { + return titles[scanIndex]; + } + } catch (NumberFormatException e) { + // Ignore and report the missing title below. + } + } + + throw new IllegalArgumentException("Unable to match InstaNovo spectrum id '" + spectrumId + "' to a spectrum title in file '" + spectrumFileName + "'."); + } + + /** + * Parses a peptide sequence with optional UniMod annotations. + * + * @param prediction the prediction + * @param lineNumber the line number + * + * @return the parsed peptide + */ + private ParsedPeptide parsePeptide(String prediction, int lineNumber) { + + StringBuilder sequence = new StringBuilder(); + ArrayList modifications = new ArrayList<>(); + int lastResidueSite = 0; + + for (int i = 0; i < prediction.length(); i++) { + + char currentChar = prediction.charAt(i); + + if (currentChar == '[') { + + int endIndex = prediction.indexOf(']', i); + + if (endIndex < 0) { + throw new IllegalArgumentException("Invalid UniMod annotation in InstaNovo csv file at line " + lineNumber + "."); + } + + String annotation = prediction.substring(i + 1, endIndex); + Character previousResidue = lastResidueSite > 0 ? sequence.charAt(lastResidueSite - 1) : null; + Character nextResidue = previousResidue == null ? getNextResidue(prediction, endIndex + 1) : null; + UtilitiesModification modification = getUtilitiesModification(annotation, previousResidue, nextResidue, lastResidueSite); + + if (modification != null) { + modifications.add(new ModificationMatch(modification.name, modification.site)); + } + + i = endIndex; + + } else if (Character.isLetter(currentChar)) { + + sequence.append(Character.toUpperCase(currentChar)); + lastResidueSite = sequence.length(); + } + } + + if (sequence.length() == 0) { + throw new IllegalArgumentException("No peptide sequence found in InstaNovo csv file at line " + lineNumber + "."); + } + + return new ParsedPeptide(sequence.toString(), modifications.toArray(new ModificationMatch[modifications.size()])); + } + + /** + * Maps InstaNovo UniMod annotations to Utilities modification names. + * + * @param annotation the annotation + * @param previousResidue the preceding residue, null for N-terminal + * annotations + * @param nextResidue the next residue, null when unavailable + * @param site the preceding residue site + * + * @return the Utilities modification, or null if unsupported + */ + private UtilitiesModification getUtilitiesModification(String annotation, Character previousResidue, Character nextResidue, int site) { + + if (!annotation.toUpperCase().startsWith("UNIMOD:")) { + return null; + } + + String accession = annotation.substring("UNIMOD:".length()); + + if ("1".equals(accession) && previousResidue == null) { + return new UtilitiesModification("Acetylation of peptide N-term", 0); + } else if ("4".equals(accession) && previousResidue != null && previousResidue == 'C') { + return new UtilitiesModification("Carbamidomethylation of C", site); + } else if ("5".equals(accession) && previousResidue == null) { + return new UtilitiesModification("Carbamilation of protein N-term", 0); + } else if ("7".equals(accession) && previousResidue != null) { + if (previousResidue == 'N') { + return new UtilitiesModification("Deamidation of N", site); + } else if (previousResidue == 'Q') { + return new UtilitiesModification("Deamidation of Q", site); + } else if (previousResidue == 'R') { + return new UtilitiesModification("Citrullination of R", site); + } + } else if ("35".equals(accession) && previousResidue != null) { + if (previousResidue == 'M') { + return new UtilitiesModification("Oxidation of M", site); + } else if (previousResidue == 'P') { + return new UtilitiesModification("Oxidation of P", site); + } else if (previousResidue == 'K') { + return new UtilitiesModification("Oxidation of K", site); + } else if (previousResidue == 'C') { + return new UtilitiesModification("Oxidation of C", site); + } else if (previousResidue == 'N') { + return new UtilitiesModification("Oxidation of N", site); + } + } else if ("21".equals(accession) && previousResidue != null) { + if (previousResidue == 'S') { + return new UtilitiesModification("Phosphorylation of S", site); + } else if (previousResidue == 'T') { + return new UtilitiesModification("Phosphorylation of T", site); + } else if (previousResidue == 'Y') { + return new UtilitiesModification("Phosphorylation of Y", site); + } + } else if ("385".equals(accession)) { + if (previousResidue != null && previousResidue == 'N' && site > 0) { + return new UtilitiesModification("Ammonia loss from N", site); + } else if (previousResidue != null && previousResidue == 'C' && site == 1) { + return new UtilitiesModification("Pyrolidone from carbamidomethylated C", site); + } else if (previousResidue == null && nextResidue != null) { + if (nextResidue == 'N') { + return new UtilitiesModification("Ammonia loss from N", 1); + } else if (nextResidue == 'C') { + return new UtilitiesModification("Pyrolidone from carbamidomethylated C", 1); + } + } + } + + return null; + } + + /** + * Returns the next residue in the prediction. + * + * @param prediction the prediction + * @param startIndex the start index + * + * @return the next residue, or null + */ + private Character getNextResidue(String prediction, int startIndex) { + + for (int i = startIndex; i < prediction.length(); i++) { + + char currentChar = prediction.charAt(i); + + if (Character.isLetter(currentChar)) { + return Character.toUpperCase(currentChar); + } + } + + return null; + } + + /** + * Returns a value from a parsed CSV row. + * + * @param values the values + * @param index the index + * + * @return the value + */ + private String getValue(ArrayList values, int index) { + return index < values.size() ? values.get(index) : ""; + } + + /** + * Returns indexes by lowercase header. + * + * @param headers the headers + * + * @return the indexes + */ + private HashMap getColumnIndexes(ArrayList headers) { + + HashMap result = new HashMap<>(); + + for (int i = 0; i < headers.size(); i++) { + result.put(headers.get(i).trim().toLowerCase(), i); + } + + return result; + } + + /** + * Returns an optional column. + * + * @param columnIndexes the column indexes + * @param columnNames the column names + * + * @return the column index, or -1 + */ + private int getOptionalColumn(HashMap columnIndexes, String... columnNames) { + + for (String columnName : columnNames) { + + Integer columnIndex = columnIndexes.get(columnName.toLowerCase()); + + if (columnIndex != null) { + return columnIndex; + } + } + + return -1; + } + + /** + * Returns a required column. + * + * @param columnIndexes the column indexes + * @param columnNames the column names + * + * @return the column index + */ + private int getRequiredColumn(HashMap columnIndexes, String... columnNames) { + + int columnIndex = getOptionalColumn(columnIndexes, columnNames); + + if (columnIndex < 0) { + throw new IllegalArgumentException("Mandatory columns are missing in the InstaNovo csv file."); + } + + return columnIndex; + } + + /** + * Parses a CSV line. + * + * @param line the line + * + * @return the values + */ + private ArrayList parseCsvLine(String line) { + + ArrayList values = new ArrayList<>(); + StringBuilder currentValue = new StringBuilder(); + boolean inQuotes = false; + + for (int i = 0; i < line.length(); i++) { + + char currentChar = line.charAt(i); + + if (currentChar == '"') { + if (inQuotes && i + 1 < line.length() && line.charAt(i + 1) == '"') { + currentValue.append('"'); + i++; + } else { + inQuotes = !inQuotes; + } + } else if (currentChar == ',' && !inQuotes) { + values.add(currentValue.toString()); + currentValue.setLength(0); + } else { + currentValue.append(currentChar); + } + } + + values.add(currentValue.toString()); + + return values; + } + + /** + * Parsed peptide values. + */ + private static class ParsedPeptide { + + /** + * The bare sequence. + */ + private final String sequence; + /** + * The variable modifications. + */ + private final ModificationMatch[] modificationMatches; + + /** + * Constructor. + * + * @param sequence the sequence + * @param modificationMatches the modification matches + */ + private ParsedPeptide(String sequence, ModificationMatch[] modificationMatches) { + this.sequence = sequence; + this.modificationMatches = modificationMatches; + } + } + + /** + * Utilities modification mapping. + */ + private static class UtilitiesModification { + + /** + * The modification name. + */ + private final String name; + /** + * The modification site. + */ + private final int site; + + /** + * Constructor. + * + * @param name the modification name + * @param site the modification site + */ + private UtilitiesModification(String name, int site) { + this.name = name; + this.site = site; + } + } +} diff --git a/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoIdfileReader.java b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoIdfileReader.java new file mode 100644 index 0000000000..13f556e233 --- /dev/null +++ b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoIdfileReader.java @@ -0,0 +1,33 @@ +package com.compomics.util.experiment.io.identification.idfilereaders; + +import com.compomics.util.experiment.identification.Advocate; +import java.io.File; + +/** + * Reader for InstaNovo transformer-only CSV output. + * + * @author CompOmics + */ +public class InstaNovoIdfileReader extends InstaNovoCsvIdfileReader { + + /** + * The supported extension. + */ + public static final String EXTENSION = ".instanovo.csv"; + + /** + * Default constructor for service loading. + */ + public InstaNovoIdfileReader() { + this(null); + } + + /** + * Constructor. + * + * @param csvFile the CSV file + */ + public InstaNovoIdfileReader(File csvFile) { + super(csvFile, Advocate.instanovo, EXTENSION); + } +} diff --git a/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoPlusIdfileReader.java b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoPlusIdfileReader.java new file mode 100644 index 0000000000..cca7062c56 --- /dev/null +++ b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoPlusIdfileReader.java @@ -0,0 +1,33 @@ +package com.compomics.util.experiment.io.identification.idfilereaders; + +import com.compomics.util.experiment.identification.Advocate; +import java.io.File; + +/** + * Reader for standalone InstaNovo+ CSV output. + * + * @author CompOmics + */ +public class InstaNovoPlusIdfileReader extends InstaNovoCsvIdfileReader { + + /** + * The supported extension. + */ + public static final String EXTENSION = ".instanovoplus.csv"; + + /** + * Default constructor for service loading. + */ + public InstaNovoPlusIdfileReader() { + this(null); + } + + /** + * Constructor. + * + * @param csvFile the CSV file + */ + public InstaNovoPlusIdfileReader(File csvFile) { + super(csvFile, Advocate.instanovoPlus, EXTENSION); + } +} diff --git a/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoRefinedIdfileReader.java b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoRefinedIdfileReader.java new file mode 100644 index 0000000000..3c41332b81 --- /dev/null +++ b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoRefinedIdfileReader.java @@ -0,0 +1,33 @@ +package com.compomics.util.experiment.io.identification.idfilereaders; + +import com.compomics.util.experiment.identification.Advocate; +import java.io.File; + +/** + * Reader for InstaNovo predictions refined by InstaNovo+. + * + * @author CompOmics + */ +public class InstaNovoRefinedIdfileReader extends InstaNovoCsvIdfileReader { + + /** + * The supported extension. + */ + public static final String EXTENSION = ".instanovo.refined.csv"; + + /** + * Default constructor for service loading. + */ + public InstaNovoRefinedIdfileReader() { + this(null); + } + + /** + * Constructor. + * + * @param csvFile the CSV file + */ + public InstaNovoRefinedIdfileReader(File csvFile) { + super(csvFile, Advocate.instanovoPlus, EXTENSION); + } +} diff --git a/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/package.html b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/package.html index 37c3d14ace..c13031b805 100644 --- a/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/package.html +++ b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/package.html @@ -1,5 +1,7 @@ - - - Experiment classes related to reading search engine files. - - + + + Experiment classes related to reading search engine files, including + InstaNovo, InstaNovo+, and InstaNovo with InstaNovo+ refinement CSV + prediction files. + + diff --git a/src/main/java/com/compomics/util/parameters/identification/search/SearchParameters.java b/src/main/java/com/compomics/util/parameters/identification/search/SearchParameters.java index 3241b8073f..2064ccd550 100644 --- a/src/main/java/com/compomics/util/parameters/identification/search/SearchParameters.java +++ b/src/main/java/com/compomics/util/parameters/identification/search/SearchParameters.java @@ -26,6 +26,8 @@ import static com.compomics.util.parameters.identification.IdentificationParameters.CURRENT_VERSION; import com.compomics.util.parameters.identification.tool_specific.MetaMorpheusParameters; import com.compomics.util.parameters.identification.tool_specific.SageParameters; +import com.compomics.util.parameters.identification.tool_specific.InstaNovoParameters; +import com.compomics.util.parameters.identification.tool_specific.InstaNovoPlusParameters; import java.io.*; import java.util.ArrayList; import java.util.HashMap; @@ -303,6 +305,18 @@ public void setDefaultAdvancedSettings(SearchParameters searchParameters) { setIdentificationAlgorithmParameter(Advocate.novor.getIndex(), searchParameters.getIdentificationAlgorithmParameter(Advocate.novor.getIndex())); } + if (searchParameters == null || searchParameters.getIdentificationAlgorithmParameter(Advocate.instanovo.getIndex()) == null) { + setIdentificationAlgorithmParameter(Advocate.instanovo.getIndex(), new InstaNovoParameters()); + } else { + setIdentificationAlgorithmParameter(Advocate.instanovo.getIndex(), searchParameters.getIdentificationAlgorithmParameter(Advocate.instanovo.getIndex())); + } + + if (searchParameters == null || searchParameters.getIdentificationAlgorithmParameter(Advocate.instanovoPlus.getIndex()) == null) { + setIdentificationAlgorithmParameter(Advocate.instanovoPlus.getIndex(), new InstaNovoPlusParameters()); + } else { + setIdentificationAlgorithmParameter(Advocate.instanovoPlus.getIndex(), searchParameters.getIdentificationAlgorithmParameter(Advocate.instanovoPlus.getIndex())); + } + } /** diff --git a/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java b/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java new file mode 100644 index 0000000000..e17be34aae --- /dev/null +++ b/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java @@ -0,0 +1,217 @@ +package com.compomics.util.parameters.identification.tool_specific; + +import com.compomics.util.experiment.identification.Advocate; +import com.compomics.util.experiment.personalization.ExperimentObject; +import com.compomics.util.gui.parameters.identification.IdentificationAlgorithmParameter; + +/** + * InstaNovo specific parameters. + * + * @author CompOmics + */ +public class InstaNovoParameters extends ExperimentObject implements IdentificationAlgorithmParameter { + + /** + * Version number for deserialization. + */ + static final long serialVersionUID = -2295564912139753378L; + /** + * Default InstaNovo model identifier for v1.2.2. + */ + public static final String DEFAULT_INSTANOVO_MODEL = "instanovo-v1.2.0"; + /** + * Default InstaNovo+ model identifier for v1.2.2 refinement. + */ + public static final String DEFAULT_INSTANOVO_PLUS_MODEL = "instanovoplus-v1.1.0"; + /** + * The selected InstaNovo model id or path. + */ + private String instaNovoModel = DEFAULT_INSTANOVO_MODEL; + /** + * The selected InstaNovo+ model id or path used for refinement. + */ + private String instaNovoPlusModel = DEFAULT_INSTANOVO_PLUS_MODEL; + /** + * The optional inference configuration path. + */ + private String configFile = null; + /** + * The number of beams. + */ + private int numberOfBeams = 5; + /** + * The prediction batch size. A value below one lets InstaNovo use its + * configuration default. + */ + private int batchSize = -1; + /** + * Whether to force CPU execution. + */ + private boolean forceCpu = false; + + @Override + public Advocate getAlgorithm() { + return Advocate.instanovo; + } + + @Override + public boolean equals(IdentificationAlgorithmParameter identificationAlgorithmParameter) { + + if (identificationAlgorithmParameter instanceof InstaNovoParameters) { + + InstaNovoParameters other = (InstaNovoParameters) identificationAlgorithmParameter; + + return safeEquals(instaNovoModel, other.getInstaNovoModel()) + && safeEquals(instaNovoPlusModel, other.getInstaNovoPlusModel()) + && safeEquals(configFile, other.getConfigFile()) + && numberOfBeams == other.getNumberOfBeams() + && batchSize == other.getBatchSize() + && forceCpu == other.isForceCpu(); + } + + return false; + } + + @Override + public String toString(boolean html) { + + String newLine = html ? "
" : System.getProperty("line.separator"); + StringBuilder output = new StringBuilder(); + Advocate advocate = getAlgorithm(); + output.append("# ------------------------------------------------------------------"); + output.append(newLine); + output.append("# ").append(advocate.getName()).append(" Specific Parameters"); + output.append(newLine); + output.append("# ------------------------------------------------------------------"); + output.append(newLine); + output.append(newLine); + output.append("INSTANOVO_MODEL=").append(instaNovoModel).append(newLine); + output.append("INSTANOVO_PLUS_MODEL=").append(instaNovoPlusModel).append(newLine); + output.append("CONFIG_FILE=").append(configFile == null ? "" : configFile).append(newLine); + output.append("NUMBER_OF_BEAMS=").append(numberOfBeams).append(newLine); + output.append("BATCH_SIZE=").append(batchSize).append(newLine); + output.append("FORCE_CPU=").append(forceCpu).append(newLine); + + return output.toString(); + } + + /** + * Returns the selected InstaNovo model. + * + * @return the selected InstaNovo model + */ + public String getInstaNovoModel() { + return instaNovoModel; + } + + /** + * Sets the selected InstaNovo model. + * + * @param instaNovoModel the selected InstaNovo model + */ + public void setInstaNovoModel(String instaNovoModel) { + this.instaNovoModel = instaNovoModel; + } + + /** + * Returns the selected InstaNovo+ model. + * + * @return the selected InstaNovo+ model + */ + public String getInstaNovoPlusModel() { + return instaNovoPlusModel; + } + + /** + * Sets the selected InstaNovo+ model. + * + * @param instaNovoPlusModel the selected InstaNovo+ model + */ + public void setInstaNovoPlusModel(String instaNovoPlusModel) { + this.instaNovoPlusModel = instaNovoPlusModel; + } + + /** + * Returns the optional configuration file. + * + * @return the optional configuration file + */ + public String getConfigFile() { + return configFile; + } + + /** + * Sets the optional configuration file. + * + * @param configFile the optional configuration file + */ + public void setConfigFile(String configFile) { + this.configFile = configFile; + } + + /** + * Returns the number of beams. + * + * @return the number of beams + */ + public int getNumberOfBeams() { + return numberOfBeams; + } + + /** + * Sets the number of beams. + * + * @param numberOfBeams the number of beams + */ + public void setNumberOfBeams(int numberOfBeams) { + this.numberOfBeams = numberOfBeams; + } + + /** + * Returns the batch size. + * + * @return the batch size + */ + public int getBatchSize() { + return batchSize; + } + + /** + * Sets the batch size. + * + * @param batchSize the batch size + */ + public void setBatchSize(int batchSize) { + this.batchSize = batchSize; + } + + /** + * Returns whether CPU execution is forced. + * + * @return whether CPU execution is forced + */ + public boolean isForceCpu() { + return forceCpu; + } + + /** + * Sets whether CPU execution is forced. + * + * @param forceCpu whether CPU execution is forced + */ + public void setForceCpu(boolean forceCpu) { + this.forceCpu = forceCpu; + } + + /** + * Null-safe string comparison. + * + * @param a the first value + * @param b the second value + * + * @return true if the two values are equal + */ + protected boolean safeEquals(String a, String b) { + return a == null ? b == null : a.equals(b); + } +} diff --git a/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoPlusParameters.java b/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoPlusParameters.java new file mode 100644 index 0000000000..a80f637a2d --- /dev/null +++ b/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoPlusParameters.java @@ -0,0 +1,28 @@ +package com.compomics.util.parameters.identification.tool_specific; + +import com.compomics.util.experiment.identification.Advocate; +import com.compomics.util.gui.parameters.identification.IdentificationAlgorithmParameter; + +/** + * InstaNovo+ specific parameters. + * + * @author CompOmics + */ +public class InstaNovoPlusParameters extends InstaNovoParameters { + + /** + * Version number for deserialization. + */ + static final long serialVersionUID = -7586968643672811482L; + + @Override + public Advocate getAlgorithm() { + return Advocate.instanovoPlus; + } + + @Override + public boolean equals(IdentificationAlgorithmParameter identificationAlgorithmParameter) { + return identificationAlgorithmParameter instanceof InstaNovoPlusParameters + && super.equals(identificationAlgorithmParameter); + } +} diff --git a/src/main/java/com/compomics/util/parameters/identification/tool_specific/package.html b/src/main/java/com/compomics/util/parameters/identification/tool_specific/package.html index bcfa8b1b9e..d7f7901398 100644 --- a/src/main/java/com/compomics/util/parameters/identification/tool_specific/package.html +++ b/src/main/java/com/compomics/util/parameters/identification/tool_specific/package.html @@ -1,5 +1,6 @@ - - - Parameters settings for the search algorithms. - - + + + Parameter settings for the search and de novo sequencing algorithms, + including InstaNovo and InstaNovo+. + + diff --git a/src/main/resources/META-INF/services/com.compomics.util.experiment.io.identification.IdfileReader b/src/main/resources/META-INF/services/com.compomics.util.experiment.io.identification.IdfileReader index 3e823496b9..4bf7accc6b 100644 --- a/src/main/resources/META-INF/services/com.compomics.util.experiment.io.identification.IdfileReader +++ b/src/main/resources/META-INF/services/com.compomics.util.experiment.io.identification.IdfileReader @@ -10,5 +10,8 @@ com.compomics.util.experiment.io.identification.idfilereaders.TideIdfileReader com.compomics.util.experiment.io.identification.idfilereaders.NovorIdfileReader com.compomics.util.experiment.io.identification.idfilereaders.OnyaseIdfileReader com.compomics.util.experiment.io.identification.idfilereaders.XTandemIdfileReader -com.compomics.util.experiment.io.identification.idfilereaders.CossIdfileReader -com.compomics.util.experiment.io.identification.idfilereaders.SageIdfileReader \ No newline at end of file +com.compomics.util.experiment.io.identification.idfilereaders.CossIdfileReader +com.compomics.util.experiment.io.identification.idfilereaders.SageIdfileReader +com.compomics.util.experiment.io.identification.idfilereaders.InstaNovoIdfileReader +com.compomics.util.experiment.io.identification.idfilereaders.InstaNovoPlusIdfileReader +com.compomics.util.experiment.io.identification.idfilereaders.InstaNovoRefinedIdfileReader diff --git a/src/test/java/com/compomics/util/test/experiment/io/identifications/TestInstaNovoIdfileReader.java b/src/test/java/com/compomics/util/test/experiment/io/identifications/TestInstaNovoIdfileReader.java new file mode 100644 index 0000000000..8d4c9d0718 --- /dev/null +++ b/src/test/java/com/compomics/util/test/experiment/io/identifications/TestInstaNovoIdfileReader.java @@ -0,0 +1,447 @@ +package com.compomics.util.test.experiment.io.identifications; + +import com.compomics.util.experiment.identification.Advocate; +import com.compomics.util.experiment.identification.matches.ModificationMatch; +import com.compomics.util.experiment.identification.matches.SpectrumMatch; +import com.compomics.util.experiment.identification.spectrum_assumptions.PeptideAssumption; +import com.compomics.util.experiment.io.identification.IdfileReader; +import com.compomics.util.experiment.io.identification.idfilereaders.InstaNovoIdfileReader; +import com.compomics.util.experiment.io.identification.idfilereaders.InstaNovoPlusIdfileReader; +import com.compomics.util.experiment.io.identification.idfilereaders.InstaNovoRefinedIdfileReader; +import com.compomics.util.experiment.mass_spectrometry.SpectrumProvider; +import com.compomics.util.experiment.mass_spectrometry.spectra.Precursor; +import com.compomics.util.experiment.mass_spectrometry.spectra.Spectrum; +import com.compomics.util.parameters.identification.search.SearchParameters; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.TreeMap; +import junit.framework.TestCase; +import org.junit.Assert; + +/** + * Tests for InstaNovo v1.2.2 CSV readers. + * + * @author CompOmics + */ +public class TestInstaNovoIdfileReader extends TestCase { + + /** + * Derived from the first row of the InstaNovo v1.2.2 transformer normalized + * Zenodo sample file. + */ + private static final String INSTANOVO_V1_2_2 + = "experiment_name,scan_number,spectrum_id,precursor_mz,precursor_charge,prediction_id,predictions,log_probs,token_log_probs,group,predictions_tokenised,delta_mass_ppm\n" + + "SF_200217_U2OS_TiO2_HCD_OT_rep1,0,SF_200217_U2OS_TiO2_HCD_OT_rep1:0,419.314971923828,2,0,DM[UNIMOD:35]NS[UNIMOD:21]PK,-1147.98681640625,\"[-0.015801219269633293, -1.1395305395126343, -2.2013168334960938, -1.3749353885650635, -1.4705305099487305, -0.5675679445266724]\",no_group,\"D, M[UNIMOD:35], N, S[UNIMOD:21], P, K\",58846.475981092575\n"; + + /** + * Derived from the first row of the InstaNovo+ v1.2.2 no-refinement + * normalized Zenodo sample file. + */ + private static final String INSTANOVOPLUS_V1_2_2 + = "experiment_name,scan_number,spectrum_id,precursor_mz,precursor_charge,prediction_id,predictions,log_probs,token_log_probs,group,predictions_tokenised,delta_mass_ppm\n" + + "SF_200217_U2OS_TiO2_HCD_OT_rep1,0,SF_200217_U2OS_TiO2_HCD_OT_rep1:0,419.314971923828,2,0,MC[UNIMOD:4]IPDQPM[UNIMOD:35]EVDNEDDAPLPPPEAR,-3.6934256553649902,,no_group,\"M, C[UNIMOD:4], I, P, D, Q, P, M[UNIMOD:35], E, V, D, N, E, D, D, A, P, L, P, P, P, E, A, R\",2282970.310323359\n"; + + /** + * Derived from the first row of the InstaNovo v1.2.2 combined refined + * Zenodo sample file. + */ + private static final String INSTANOVO_COMBINED_V1_2_2 + = "experiment_name,scan_number,spectrum_id,precursor_mz,precursor_charge,prediction_id,predictions,log_probs,token_log_probs,group,instanovo_predictions,instanovo_prediction_log_probability,instanovo_prediction_token_log_probabilities,instanovo_predictions_beam_0,instanovo_predictions_log_probability_beam_0,instanovo_predictions_token_log_probabilities_beam_0,instanovo_predictions_beam_1,instanovo_predictions_log_probability_beam_1,instanovo_predictions_token_log_probabilities_beam_1,instanovo_predictions_beam_2,instanovo_predictions_log_probability_beam_2,instanovo_predictions_token_log_probabilities_beam_2,instanovo_predictions_beam_3,instanovo_predictions_log_probability_beam_3,instanovo_predictions_token_log_probabilities_beam_3,instanovo_predictions_beam_4,instanovo_predictions_log_probability_beam_4,instanovo_predictions_token_log_probabilities_beam_4,instanovoplus_predictions,instanovoplus_prediction_log_probability,instanovoplus_prediction_token_log_probabilities,instanovoplus_unrefined_predictions,predictions_tokenised,delta_mass_ppm\n" + + "SF_200217_U2OS_TiO2_HCD_OT_rep1,0,SF_200217_U2OS_TiO2_HCD_OT_rep1:0,419.314971923828,2,0,LIRPLLK,-0.6334811449050903,,no_group,\"['L', 'K', 'G', 'D', 'S[UNIMOD:21]', 'P', 'K']\",-10.102036476135254,\"[-1.716342806816101, -1.0499515533447266, -1.1343414783477783, -2.570066452026367, -1.3749353885650635, -1.4704134464263916, -0.5675679445266724]\",LKGDS[UNIMOD:21]PK,-10.102036476135254,\"[-1.716342806816101, -1.0499515533447266, -1.1343414783477783, -2.570066452026367, -1.3749353885650635, -1.4704134464263916, -0.5675679445266724]\",VKGDS[UNIMOD:21]PK,-11.082494735717773,\"[-2.8237648010253906, -1.0499515533447266, -1.1343414783477783, -2.570066452026367, -1.3749353885650635, -1.4704134464263916, -0.5675679445266724]\",SKGDS[UNIMOD:21]PK,-11.430251121520996,\"[-2.7461280822753906, -1.0499515533447266, -1.1343414783477783, -2.570066452026367, -1.3749353885650635, -1.4704134464263916, -0.5675679445266724]\",AKGDS[UNIMOD:21]PK,-11.492465019226074,\"[-3.1643409729003906, -1.0499515533447266, -1.1343414783477783, -2.570066452026367, -1.3749353885650635, -1.4704134464263916, -0.5675679445266724]\",PKGDS[UNIMOD:21]PK,-11.968438148498535,\"[-2.6694679260253906, -1.0499515533447266, -1.1343414783477783, -2.570066452026367, -1.3749353885650635, -1.4704134464263916, -0.5675679445266724]\",\"['L', 'I', 'R', 'P', 'L', 'L', 'K']\",-0.6334811449050903,,\"['L', 'K', 'G', 'D', 'S[UNIMOD:21]', 'P', 'K']\",\"L, I, R, P, L, L, K\",17862.82765389216\n"; + + /** + * Tests registration and parsing of the three supported InstaNovo modes. + * + * @throws Exception if an exception occurs + */ + public void testInstaNovoReaders() throws Exception { + + Assert.assertNotNull(Advocate.getAdvocate("InstaNovo")); + Assert.assertNotNull(Advocate.getAdvocate("InstaNovo+")); + SimpleSpectrumProvider spectrumProvider = new SimpleSpectrumProvider(); + SearchParameters searchParameters = new SearchParameters(); + + assertReader("test.instanovo.csv", Advocate.instanovo.getIndex(), spectrumProvider, searchParameters); + assertReader("test.instanovoplus.csv", Advocate.instanovoPlus.getIndex(), spectrumProvider, searchParameters); + assertReader("test.instanovo.refined.csv", Advocate.instanovoPlus.getIndex(), spectrumProvider, searchParameters); + } + + /** + * Tests service registration for the three InstaNovo readers. + * + * @throws Exception if an exception occurs + */ + public void testInstaNovoReaderServiceRegistration() throws Exception { + + InputStream serviceStream = getClass().getClassLoader().getResourceAsStream( + "META-INF/services/com.compomics.util.experiment.io.identification.IdfileReader" + ); + + Assert.assertNotNull(serviceStream); + + byte[] bytes = new byte[serviceStream.available()]; + serviceStream.read(bytes); + + String serviceFile = new String(bytes, StandardCharsets.UTF_8); + + Assert.assertTrue(serviceFile.contains(InstaNovoIdfileReader.class.getName())); + Assert.assertTrue(serviceFile.contains(InstaNovoPlusIdfileReader.class.getName())); + Assert.assertTrue(serviceFile.contains(InstaNovoRefinedIdfileReader.class.getName())); + } + + /** + * Tests invalid headers. + * + * @throws Exception if an exception occurs + */ + public void testMissingColumns() throws Exception { + + File csvFile = writeCsv("missing.instanovo.csv", "experiment_name,scan_number,predictions\nexample,0,PEPTIDE\n"); + IdfileReader idfileReader = new InstaNovoIdfileReader(csvFile); + + try { + idfileReader.getAllSpectrumMatches(new SimpleSpectrumProvider(), null, new SearchParameters()); + Assert.fail("Expected invalid InstaNovo CSV columns to fail."); + } catch (IllegalArgumentException e) { + Assert.assertTrue(e.getMessage().contains("Mandatory")); + } + } + + /** + * Tests parsing rows derived from the InstaNovo v1.2.2 sample files. + * + * @throws Exception if an exception occurs + */ + public void testInstaNovoVersion122SampleRows() throws Exception { + + assertSampleReader( + new InstaNovoIdfileReader(writeCsv("sample.instanovo.csv", INSTANOVO_V1_2_2)), + Advocate.instanovo.getIndex(), + "DMNSPK", + 2 + ); + + assertSampleReader( + new InstaNovoPlusIdfileReader(writeCsv("sample.instanovoplus.csv", INSTANOVOPLUS_V1_2_2)), + Advocate.instanovoPlus.getIndex(), + "MCIPDQPMEVDNEDDAPLPPPEAR", + 2 + ); + + assertSampleReader( + new InstaNovoRefinedIdfileReader(writeCsv("sample.instanovo.refined.csv", INSTANOVO_COMBINED_V1_2_2)), + Advocate.instanovoPlus.getIndex(), + "LIRPLLK", + 0 + ); + } + + /** + * Tests all UniMod annotations from the InstaNovo v1.2.2 default residue + * configuration. + * + * @throws Exception if an exception occurs + */ + public void testDefaultInstaNovoModifications() throws Exception { + + String header = "experiment_name,scan_number,spectrum_id,precursor_mz,precursor_charge,prediction_id,predictions,log_probs,token_log_probs,group,predictions_tokenised,delta_mass_ppm\n"; + File csvFile = writeCsv( + "default-modifications.instanovo.csv", + header + + "sample,0,sample:0,419.314971923828,2,0,M[UNIMOD:35]C[UNIMOD:4]N[UNIMOD:7]Q[UNIMOD:7]R[UNIMOD:7]P[UNIMOD:35]S[UNIMOD:21]T[UNIMOD:21]Y[UNIMOD:21],-1.0,,no_group,,0.0\n" + + "sample,1,sample:1,419.314971923828,2,0,[UNIMOD:1]ACD,-1.0,,no_group,,0.0\n" + + "sample,2,sample:2,419.314971923828,2,0,[UNIMOD:5]ACD,-1.0,,no_group,,0.0\n" + + "sample,3,sample:3,419.314971923828,2,0,[UNIMOD:385]CPEP,-1.0,,no_group,,0.0\n" + + "sample,4,sample:4,419.314971923828,2,0,[UNIMOD:385]NPEP,-1.0,,no_group,,0.0\n" + ); + + IdfileReader idfileReader = new InstaNovoIdfileReader(csvFile); + ArrayList spectrumMatches = idfileReader.getAllSpectrumMatches(new SimpleSpectrumProvider(), null, new SearchParameters()); + + Assert.assertEquals(5, spectrumMatches.size()); + + PeptideAssumption residueModifiedAssumption = getFirstAssumption(spectrumMatches, "0", Advocate.instanovo.getIndex()); + + Assert.assertEquals("MCNQRPSTY", residueModifiedAssumption.getPeptide().getSequence()); + assertModification(residueModifiedAssumption, "Oxidation of M", 1); + assertModification(residueModifiedAssumption, "Carbamidomethylation of C", 2); + assertModification(residueModifiedAssumption, "Deamidation of N", 3); + assertModification(residueModifiedAssumption, "Deamidation of Q", 4); + assertModification(residueModifiedAssumption, "Citrullination of R", 5); + assertModification(residueModifiedAssumption, "Oxidation of P", 6); + assertModification(residueModifiedAssumption, "Phosphorylation of S", 7); + assertModification(residueModifiedAssumption, "Phosphorylation of T", 8); + assertModification(residueModifiedAssumption, "Phosphorylation of Y", 9); + + assertModification(getFirstAssumption(spectrumMatches, "1", Advocate.instanovo.getIndex()), "Acetylation of peptide N-term", 0); + assertModification(getFirstAssumption(spectrumMatches, "2", Advocate.instanovo.getIndex()), "Carbamilation of protein N-term", 0); + assertModification(getFirstAssumption(spectrumMatches, "3", Advocate.instanovo.getIndex()), "Pyrolidone from carbamidomethylated C", 1); + assertModification(getFirstAssumption(spectrumMatches, "4", Advocate.instanovo.getIndex()), "Ammonia loss from N", 1); + } + + /** + * Asserts one reader. + * + * @param fileName the file name + * @param advocateIndex the expected advocate index + * @param spectrumProvider the spectrum provider + * @param searchParameters the search parameters + * + * @throws Exception if an exception occurs + */ + private void assertReader( + String fileName, + int advocateIndex, + SpectrumProvider spectrumProvider, + SearchParameters searchParameters + ) throws Exception { + + File csvFile = writeCsv( + fileName, + "experiment_name,scan_number,spectrum_id,precursor_mz,precursor_charge,prediction_id,predictions,log_probs,token_log_probs,group,predictions_tokenised,delta_mass_ppm\n" + + "example,0,example:0,419.314971923828,2,0,DM[UNIMOD:35]NS[UNIMOD:21]PK,-10.0,\"[-1.0]\",no_group,\"D, M[UNIMOD:35], N, S[UNIMOD:21], P, K\",0.0\n" + ); + + IdfileReader idfileReader; + if (fileName.endsWith(InstaNovoPlusIdfileReader.EXTENSION)) { + idfileReader = new InstaNovoPlusIdfileReader(csvFile); + } else if (fileName.endsWith(InstaNovoRefinedIdfileReader.EXTENSION)) { + idfileReader = new InstaNovoRefinedIdfileReader(csvFile); + } else { + idfileReader = new InstaNovoIdfileReader(csvFile); + } + + Assert.assertNotNull(idfileReader); + + ArrayList spectrumMatches = idfileReader.getAllSpectrumMatches(spectrumProvider, null, searchParameters); + Assert.assertEquals(1, spectrumMatches.size()); + SpectrumMatch spectrumMatch = spectrumMatches.get(0); + Assert.assertEquals("example", spectrumMatch.getSpectrumFile()); + Assert.assertEquals("0", spectrumMatch.getSpectrumTitle()); + + TreeMap> assumptions = spectrumMatch.getAllPeptideAssumptions(advocateIndex); + Assert.assertNotNull(assumptions); + PeptideAssumption peptideAssumption = assumptions.firstEntry().getValue().get(0); + Assert.assertEquals("DMNSPK", peptideAssumption.getPeptide().getSequence()); + Assert.assertEquals(2, peptideAssumption.getPeptide().getVariableModifications().length); + Assert.assertTrue(idfileReader.getSoftwareVersions().containsKey(Advocate.getAdvocate(advocateIndex).getName())); + } + + /** + * Asserts a reader using sample v1.2.2 CSV content. + * + * @param idfileReader the reader + * @param advocateIndex the expected advocate index + * @param expectedSequence the expected peptide sequence + * @param expectedVariableModifications the expected number of variable + * modifications + * + * @throws Exception if an exception occurs + */ + private void assertSampleReader( + IdfileReader idfileReader, + int advocateIndex, + String expectedSequence, + int expectedVariableModifications + ) throws Exception { + + ArrayList spectrumMatches = idfileReader.getAllSpectrumMatches(new SimpleSpectrumProvider(), null, new SearchParameters()); + + Assert.assertEquals(1, spectrumMatches.size()); + + SpectrumMatch spectrumMatch = spectrumMatches.get(0); + + Assert.assertEquals("SF_200217_U2OS_TiO2_HCD_OT_rep1", spectrumMatch.getSpectrumFile()); + Assert.assertEquals("0", spectrumMatch.getSpectrumTitle()); + + TreeMap> assumptions = spectrumMatch.getAllPeptideAssumptions(advocateIndex); + + Assert.assertNotNull(assumptions); + + PeptideAssumption peptideAssumption = assumptions.firstEntry().getValue().get(0); + + Assert.assertEquals(expectedSequence, peptideAssumption.getPeptide().getSequence()); + Assert.assertEquals(expectedVariableModifications, peptideAssumption.getPeptide().getVariableModifications().length); + Assert.assertTrue(idfileReader.getSoftwareVersions().containsKey(Advocate.getAdvocate(advocateIndex).getName())); + } + + /** + * Returns the first assumption for a spectrum title. + * + * @param spectrumMatches the spectrum matches + * @param spectrumTitle the spectrum title + * @param advocateIndex the advocate index + * + * @return the first peptide assumption + */ + private PeptideAssumption getFirstAssumption(ArrayList spectrumMatches, String spectrumTitle, int advocateIndex) { + + for (SpectrumMatch spectrumMatch : spectrumMatches) { + + if (spectrumMatch.getSpectrumTitle().equals(spectrumTitle)) { + + TreeMap> assumptions = spectrumMatch.getAllPeptideAssumptions(advocateIndex); + + Assert.assertNotNull(assumptions); + + return assumptions.firstEntry().getValue().get(0); + } + } + + Assert.fail("No spectrum match found for title " + spectrumTitle + "."); + + return null; + } + + /** + * Asserts a modification match. + * + * @param peptideAssumption the peptide assumption + * @param modification the modification name + * @param site the modification site + */ + private void assertModification(PeptideAssumption peptideAssumption, String modification, int site) { + + for (ModificationMatch modificationMatch : peptideAssumption.getPeptide().getVariableModifications()) { + + if (modificationMatch.getModification().equals(modification) && modificationMatch.getSite() == site) { + return; + } + } + + Assert.fail("Modification " + modification + " at site " + site + " not found."); + } + + /** + * Writes a temporary CSV file. + * + * @param fileName the file name + * @param content the content + * + * @return the CSV file + * + * @throws IOException if an IOException occurs + */ + private File writeCsv(String fileName, String content) throws IOException { + + File file = File.createTempFile(fileName, ""); + file.deleteOnExit(); + + try (FileWriter writer = new FileWriter(file)) { + writer.write(content); + } + + return file; + } + + /** + * Simple spectrum provider for tests. + */ + private static class SimpleSpectrumProvider implements SpectrumProvider { + + @Override + public Spectrum getSpectrum(String fileNameWithoutExtension, String spectrumTitle) { + return null; + } + + @Override + public Precursor getPrecursor(String fileNameWithoutExtension, String spectrumTitle) { + return null; + } + + @Override + public ArrayList getPostcursorSpectrumTitles(String fileNameWithoutExtension, String spectrumTitle) { + return null; + } + + @Override + public double getPrecursorMz(String fileNameWithoutExtension, String spectrumTitle) { + return 0; + } + + @Override + public double getPrecursorRt(String fileNameWithoutExtension, String spectrumTitle) { + return 0; + } + + @Override + public int getSpectrumLevel(String fileNameWithoutExtension, String spectrumTitle) { + return 2; + } + + @Override + public double[][] getPeaks(String fileNameWithoutExtension, String spectrumTitle) { + return null; + } + + @Override + public double getMinPrecMz(String fileNameWithoutExtension) { + return 0; + } + + @Override + public double getMaxPrecMz(String fileNameWithoutExtension) { + return 0; + } + + @Override + public double getMaxPrecInt(String fileNameWithoutExtension) { + return 0; + } + + @Override + public double getMaxPrecRT(String fileNameWithoutExtension) { + return 0; + } + + @Override + public double getMinPrecMz() { + return 0; + } + + @Override + public double getMaxPrecMz() { + return 0; + } + + @Override + public double getMaxPrecInt() { + return 0; + } + + @Override + public double getMaxPrecRT() { + return 0; + } + + @Override + public String[] getOrderedFileNamesWithoutExtensions() { + return new String[]{"example"}; + } + + @Override + public String[] getSpectrumTitles(String fileNameWithoutExtension) { + return new String[]{"0", "1", "2", "3", "4"}; + } + + @Override + public HashMap getFilePaths() { + return new HashMap<>(); + } + + @Override + public HashMap getCmsFilePaths() { + return new HashMap<>(); + } + + @Override + public void close() { + // Nothing to close. + } + } +} From 12f6d2fdb64964621adb83ba103f25f2bf5fed0a Mon Sep 17 00:00:00 2001 From: Jeroen Van Goey Date: Mon, 22 Jun 2026 13:56:30 +0200 Subject: [PATCH 02/12] Add InstaNovo advanced inference parameters --- .../tool_specific/InstaNovoParameters.java | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java b/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java index e17be34aae..5d7d2a67b1 100644 --- a/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java +++ b/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java @@ -44,6 +44,14 @@ public class InstaNovoParameters extends ExperimentObject implements Identificat * configuration default. */ private int batchSize = -1; + /** + * Whether to use knapsack beam search. + */ + private boolean useKnapsack = false; + /** + * Whether to save all beam search predictions. + */ + private boolean saveAllPredictions = true; /** * Whether to force CPU execution. */ @@ -66,6 +74,8 @@ && safeEquals(instaNovoPlusModel, other.getInstaNovoPlusModel()) && safeEquals(configFile, other.getConfigFile()) && numberOfBeams == other.getNumberOfBeams() && batchSize == other.getBatchSize() + && useKnapsack == other.isUseKnapsack() + && saveAllPredictions == other.isSaveAllPredictions() && forceCpu == other.isForceCpu(); } @@ -90,6 +100,8 @@ public String toString(boolean html) { output.append("CONFIG_FILE=").append(configFile == null ? "" : configFile).append(newLine); output.append("NUMBER_OF_BEAMS=").append(numberOfBeams).append(newLine); output.append("BATCH_SIZE=").append(batchSize).append(newLine); + output.append("USE_KNAPSACK=").append(useKnapsack).append(newLine); + output.append("SAVE_ALL_PREDICTIONS=").append(saveAllPredictions).append(newLine); output.append("FORCE_CPU=").append(forceCpu).append(newLine); return output.toString(); @@ -185,6 +197,42 @@ public void setBatchSize(int batchSize) { this.batchSize = batchSize; } + /** + * Returns whether knapsack beam search is used. + * + * @return whether knapsack beam search is used + */ + public boolean isUseKnapsack() { + return useKnapsack; + } + + /** + * Sets whether knapsack beam search is used. + * + * @param useKnapsack whether knapsack beam search is used + */ + public void setUseKnapsack(boolean useKnapsack) { + this.useKnapsack = useKnapsack; + } + + /** + * Returns whether all beam search predictions are saved. + * + * @return whether all beam search predictions are saved + */ + public boolean isSaveAllPredictions() { + return saveAllPredictions; + } + + /** + * Sets whether all beam search predictions are saved. + * + * @param saveAllPredictions whether all beam search predictions are saved + */ + public void setSaveAllPredictions(boolean saveAllPredictions) { + this.saveAllPredictions = saveAllPredictions; + } + /** * Returns whether CPU execution is forced. * From b4a030db96a60d4842cab2962621cca54cd5b460 Mon Sep 17 00:00:00 2001 From: Jeroen Van Goey Date: Mon, 22 Jun 2026 15:00:16 +0200 Subject: [PATCH 03/12] Use desktop InstaNovo batch size by default --- .../tool_specific/InstaNovoParameters.java | 6 ++++- .../TestInstaNovoParameters.java | 24 +++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 src/test/java/com/compomics/util/test/parameters/identification/tool_specific/TestInstaNovoParameters.java diff --git a/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java b/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java index 5d7d2a67b1..5a13f83da5 100644 --- a/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java +++ b/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java @@ -23,6 +23,10 @@ public class InstaNovoParameters extends ExperimentObject implements Identificat * Default InstaNovo+ model identifier for v1.2.2 refinement. */ public static final String DEFAULT_INSTANOVO_PLUS_MODEL = "instanovoplus-v1.1.0"; + /** + * Default prediction batch size for desktop SearchGUI runs. + */ + public static final int DEFAULT_BATCH_SIZE = 16; /** * The selected InstaNovo model id or path. */ @@ -43,7 +47,7 @@ public class InstaNovoParameters extends ExperimentObject implements Identificat * The prediction batch size. A value below one lets InstaNovo use its * configuration default. */ - private int batchSize = -1; + private int batchSize = DEFAULT_BATCH_SIZE; /** * Whether to use knapsack beam search. */ diff --git a/src/test/java/com/compomics/util/test/parameters/identification/tool_specific/TestInstaNovoParameters.java b/src/test/java/com/compomics/util/test/parameters/identification/tool_specific/TestInstaNovoParameters.java new file mode 100644 index 0000000000..9a57b623c4 --- /dev/null +++ b/src/test/java/com/compomics/util/test/parameters/identification/tool_specific/TestInstaNovoParameters.java @@ -0,0 +1,24 @@ +package com.compomics.util.test.parameters.identification.tool_specific; + +import com.compomics.util.parameters.identification.tool_specific.InstaNovoParameters; +import junit.framework.TestCase; +import org.junit.Assert; + +/** + * Tests for InstaNovo specific parameters. + * + * @author CompOmics + */ +public class TestInstaNovoParameters extends TestCase { + + /** + * Tests the desktop-oriented default batch size. + */ + public void testDefaultBatchSize() { + + InstaNovoParameters parameters = new InstaNovoParameters(); + + Assert.assertEquals(InstaNovoParameters.DEFAULT_BATCH_SIZE, parameters.getBatchSize()); + Assert.assertTrue(parameters.toString(false).contains("BATCH_SIZE=" + InstaNovoParameters.DEFAULT_BATCH_SIZE)); + } +} From 0ee155cd01785e09e95da9d1bd1e144a7fb74376 Mon Sep 17 00:00:00 2001 From: Jeroen Van Goey Date: Mon, 22 Jun 2026 15:05:34 +0200 Subject: [PATCH 04/12] Normalize legacy InstaNovo batch sizes --- .../tool_specific/InstaNovoParameters.java | 11 +++++------ .../tool_specific/TestInstaNovoParameters.java | 4 ++++ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java b/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java index 5a13f83da5..368a4368a5 100644 --- a/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java +++ b/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java @@ -44,8 +44,7 @@ public class InstaNovoParameters extends ExperimentObject implements Identificat */ private int numberOfBeams = 5; /** - * The prediction batch size. A value below one lets InstaNovo use its - * configuration default. + * The prediction batch size. */ private int batchSize = DEFAULT_BATCH_SIZE; /** @@ -77,7 +76,7 @@ public boolean equals(IdentificationAlgorithmParameter identificationAlgorithmPa && safeEquals(instaNovoPlusModel, other.getInstaNovoPlusModel()) && safeEquals(configFile, other.getConfigFile()) && numberOfBeams == other.getNumberOfBeams() - && batchSize == other.getBatchSize() + && getBatchSize() == other.getBatchSize() && useKnapsack == other.isUseKnapsack() && saveAllPredictions == other.isSaveAllPredictions() && forceCpu == other.isForceCpu(); @@ -103,7 +102,7 @@ public String toString(boolean html) { output.append("INSTANOVO_PLUS_MODEL=").append(instaNovoPlusModel).append(newLine); output.append("CONFIG_FILE=").append(configFile == null ? "" : configFile).append(newLine); output.append("NUMBER_OF_BEAMS=").append(numberOfBeams).append(newLine); - output.append("BATCH_SIZE=").append(batchSize).append(newLine); + output.append("BATCH_SIZE=").append(getBatchSize()).append(newLine); output.append("USE_KNAPSACK=").append(useKnapsack).append(newLine); output.append("SAVE_ALL_PREDICTIONS=").append(saveAllPredictions).append(newLine); output.append("FORCE_CPU=").append(forceCpu).append(newLine); @@ -189,7 +188,7 @@ public void setNumberOfBeams(int numberOfBeams) { * @return the batch size */ public int getBatchSize() { - return batchSize; + return batchSize > 0 ? batchSize : DEFAULT_BATCH_SIZE; } /** @@ -198,7 +197,7 @@ public int getBatchSize() { * @param batchSize the batch size */ public void setBatchSize(int batchSize) { - this.batchSize = batchSize; + this.batchSize = batchSize > 0 ? batchSize : DEFAULT_BATCH_SIZE; } /** diff --git a/src/test/java/com/compomics/util/test/parameters/identification/tool_specific/TestInstaNovoParameters.java b/src/test/java/com/compomics/util/test/parameters/identification/tool_specific/TestInstaNovoParameters.java index 9a57b623c4..f3619643e7 100644 --- a/src/test/java/com/compomics/util/test/parameters/identification/tool_specific/TestInstaNovoParameters.java +++ b/src/test/java/com/compomics/util/test/parameters/identification/tool_specific/TestInstaNovoParameters.java @@ -20,5 +20,9 @@ public void testDefaultBatchSize() { Assert.assertEquals(InstaNovoParameters.DEFAULT_BATCH_SIZE, parameters.getBatchSize()); Assert.assertTrue(parameters.toString(false).contains("BATCH_SIZE=" + InstaNovoParameters.DEFAULT_BATCH_SIZE)); + + parameters.setBatchSize(-1); + + Assert.assertEquals(InstaNovoParameters.DEFAULT_BATCH_SIZE, parameters.getBatchSize()); } } From 619fa87753b2b813fdaf794b0af07f206e3dc7ea Mon Sep 17 00:00:00 2001 From: Jeroen Van Goey Date: Mon, 22 Jun 2026 16:07:58 +0200 Subject: [PATCH 05/12] Harden InstaNovo refined CSV import --- .../experiment/identification/Advocate.java | 9 +- .../InstaNovoCsvIdfileReader.java | 254 ++++++++++++++++-- .../InstaNovoRefinedIdfileReader.java | 2 +- .../tool_specific/InstaNovoParameters.java | 4 +- .../TestInstaNovoIdfileReader.java | 92 ++++++- 5 files changed, 325 insertions(+), 36 deletions(-) diff --git a/src/main/java/com/compomics/util/experiment/identification/Advocate.java b/src/main/java/com/compomics/util/experiment/identification/Advocate.java index da66bf7066..7c306105f0 100644 --- a/src/main/java/com/compomics/util/experiment/identification/Advocate.java +++ b/src/main/java/com/compomics/util/experiment/identification/Advocate.java @@ -190,6 +190,10 @@ public enum AdvocateType { * The InstaNovo+ de novo sequencing algorithm. */ public static final Advocate instanovoPlus = new Advocate(39, "InstaNovo+", AdvocateType.sequencing_algorithm, new Color(123, 104, 238)); + /** + * The InstaNovo predictions refined with InstaNovo+ de novo sequencing algorithm. + */ + public static final Advocate instanovoRefined = new Advocate(40, "InstaNovo with refinement", AdvocateType.sequencing_algorithm, new Color(72, 209, 204)); /** * Advocate type for mzId files where no software is annotated. */ @@ -319,7 +323,7 @@ public String toString() { * @return the implemented advocates in an array */ public static Advocate[] values() { - Advocate[] result = new Advocate[42 + userAdvocates.size()]; + Advocate[] result = new Advocate[43 + userAdvocates.size()]; int i = 0; result[i] = peptideShaker; result[++i] = onyaseEngine; @@ -363,6 +367,7 @@ public static Advocate[] values() { result[++i] = msFragger; result[++i] = instanovo; result[++i] = instanovoPlus; + result[++i] = instanovoRefined; for (Advocate advocate : userAdvocates.values()) { result[++i] = advocate; @@ -499,7 +504,7 @@ public String getPmid() { return "37819886"; } else if (this == msFragger) { return "28394336"; - } else if (this == instanovo || this == instanovoPlus) { + } else if (this == instanovo || this == instanovoPlus || this == instanovoRefined) { return null; } else { return null; diff --git a/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoCsvIdfileReader.java b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoCsvIdfileReader.java index b1b96cc7d3..56a614b6c7 100644 --- a/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoCsvIdfileReader.java +++ b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoCsvIdfileReader.java @@ -18,6 +18,8 @@ import java.sql.SQLException; import java.util.ArrayList; import java.util.HashMap; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import javax.xml.bind.JAXBException; /** @@ -31,6 +33,10 @@ abstract class InstaNovoCsvIdfileReader implements IdfileReader { * The supported InstaNovo version. */ private static final String SOFTWARE_VERSION = "1.2.2"; + /** + * Pattern matching common scan or index tokens in spectrum titles. + */ + private static final Pattern TITLE_NUMBER_PATTERN = Pattern.compile("(?i)(?:scan|index|scan_number)\\s*[=: ]\\s*(\\d+)"); /** * The CSV file. */ @@ -95,6 +101,7 @@ public ArrayList getAllSpectrumMatches( ArrayList result = new ArrayList<>(); HashMap matches = new HashMap<>(); + HashMap spectrumTitleLookups = new HashMap<>(); try (SimpleFileReader reader = SimpleFileReader.getFileReader(csvFile)) { @@ -138,10 +145,22 @@ public ArrayList getAllSpectrumMatches( String experimentName = experimentIndex >= 0 ? getValue(values, experimentIndex).trim() : ""; String spectrumId = spectrumIdIndex >= 0 ? getValue(values, spectrumIdIndex).trim() : ""; String scanNumber = scanNumberIndex >= 0 ? getValue(values, scanNumberIndex).trim() : ""; + Integer charge = getCharge(getValue(values, chargeIndex), lineNumber, waitingHandler); + + if (charge == null) { + continue; + } + String spectrumFileName = getSpectrumFileName(spectrumProvider, experimentName, spectrumId); - String spectrumTitle = getSpectrumTitle(spectrumProvider, spectrumFileName, spectrumId, scanNumber); + SpectrumTitleLookup spectrumTitleLookup = spectrumTitleLookups.get(spectrumFileName); + + if (spectrumTitleLookup == null) { + spectrumTitleLookup = new SpectrumTitleLookup(spectrumProvider, spectrumFileName); + spectrumTitleLookups.put(spectrumFileName, spectrumTitleLookup); + } + + String spectrumTitle = getSpectrumTitle(spectrumTitleLookup, spectrumFileName, spectrumId, scanNumber); - int charge = Integer.parseInt(getValue(values, chargeIndex)); double logProbability = Util.readDoubleAsString(getValue(values, scoreIndex)); double score = -logProbability; @@ -186,11 +205,15 @@ public HashMap> getSoftwareVersions() { versions.add(SOFTWARE_VERSION); result.put(advocate.getName(), versions); - if (advocate == Advocate.instanovoPlus && getExtension().contains("refined")) { + if (advocate == Advocate.instanovoRefined) { ArrayList instaNovoVersions = new ArrayList<>(); instaNovoVersions.add(SOFTWARE_VERSION); result.put(Advocate.instanovo.getName(), instaNovoVersions); + + ArrayList instaNovoPlusVersions = new ArrayList<>(); + instaNovoPlusVersions.add(SOFTWARE_VERSION); + result.put(Advocate.instanovoPlus.getName(), instaNovoPlusVersions); } return result; @@ -247,50 +270,74 @@ private String getSpectrumFileName(SpectrumProvider spectrumProvider, String exp * * @return the spectrum title */ - private String getSpectrumTitle(SpectrumProvider spectrumProvider, String spectrumFileName, String spectrumId, String scanNumber) { + private String getSpectrumTitle(SpectrumTitleLookup spectrumTitleLookup, String spectrumFileName, String spectrumId, String scanNumber) { - String[] titles = spectrumProvider.getSpectrumTitles(spectrumFileName); + String title = spectrumTitleLookup.getTitle(spectrumId); - if (titles == null || titles.length == 0) { - throw new IllegalArgumentException("No spectra found for file '" + spectrumFileName + "'."); + if (title != null) { + return title; } - ArrayList candidates = new ArrayList<>(); - - if (spectrumId != null && !spectrumId.isEmpty()) { - candidates.add(spectrumId); + if (spectrumId != null) { int separatorIndex = spectrumId.indexOf(':'); + if (separatorIndex >= 0 && separatorIndex < spectrumId.length() - 1) { - candidates.add(spectrumId.substring(separatorIndex + 1)); - } - } - if (scanNumber != null && !scanNumber.isEmpty()) { - candidates.add(scanNumber); - } + title = spectrumTitleLookup.getTitle(spectrumId.substring(separatorIndex + 1)); - for (String candidate : candidates) { - for (String title : titles) { - if (title.equals(candidate) || title.equalsIgnoreCase(candidate)) { + if (title != null) { return title; } } } if (scanNumber != null && !scanNumber.isEmpty()) { - try { - int scanIndex = Integer.parseInt(scanNumber); - if (scanIndex >= 0 && scanIndex < titles.length) { - return titles[scanIndex]; - } - } catch (NumberFormatException e) { - // Ignore and report the missing title below. + + title = spectrumTitleLookup.getTitle(scanNumber); + + if (title != null) { + return title; + } + + title = spectrumTitleLookup.getTitleForNumber(scanNumber); + + if (title != null) { + return title; } } throw new IllegalArgumentException("Unable to match InstaNovo spectrum id '" + spectrumId + "' to a spectrum title in file '" + spectrumFileName + "'."); } + /** + * Returns the precursor charge. + * + * @param value the charge column value + * @param lineNumber the line number + * @param waitingHandler the waiting handler + * + * @return the charge, or null if the row should be skipped + */ + private Integer getCharge(String value, int lineNumber, WaitingHandler waitingHandler) { + + String charge = value == null ? "" : value.trim(); + + try { + return Integer.parseInt(charge); + } catch (NumberFormatException e) { + + if (waitingHandler != null) { + waitingHandler.appendReport( + "Skipping InstaNovo csv line " + lineNumber + ": invalid precursor charge '" + charge + "'.", + true, + true + ); + } + + return null; + } + } + /** * Parses a peptide sequence with optional UniMod annotations. * @@ -542,6 +589,159 @@ private ArrayList parseCsvLine(String line) { return values; } + /** + * Spectrum title lookup cache for one spectrum file. + */ + private static class SpectrumTitleLookup { + + /** + * Titles indexed by exact and lower-case title. + */ + private final HashMap titles = new HashMap<>(); + /** + * Titles indexed by scan or index number tokens parsed from the title. + */ + private final HashMap titleByNumber = new HashMap<>(); + + /** + * Constructor. + * + * @param spectrumProvider the spectrum provider + * @param spectrumFileName the spectrum file name without extension + */ + private SpectrumTitleLookup(SpectrumProvider spectrumProvider, String spectrumFileName) { + + String[] spectrumTitles = spectrumProvider.getSpectrumTitles(spectrumFileName); + + if (spectrumTitles == null || spectrumTitles.length == 0) { + throw new IllegalArgumentException("No spectra found for file '" + spectrumFileName + "'."); + } + + for (String title : spectrumTitles) { + addTitle(title); + } + } + + /** + * Adds a title. + * + * @param title the title + */ + private void addTitle(String title) { + + if (title == null) { + return; + } + + titles.put(title, title); + titles.put(title.toLowerCase(), title); + + Matcher matcher = TITLE_NUMBER_PATTERN.matcher(title); + + while (matcher.find()) { + addNumber(matcher.group(1), title); + } + } + + /** + * Adds a scan or index number. + * + * @param number the number + * @param title the spectrum title + */ + private void addNumber(String number, String title) { + + String normalizedNumber = normalizeNumber(number); + + if (normalizedNumber == null) { + return; + } + + if (titleByNumber.containsKey(normalizedNumber) + && !title.equals(titleByNumber.get(normalizedNumber))) { + titleByNumber.put(normalizedNumber, null); + } else { + titleByNumber.put(normalizedNumber, title); + } + } + + /** + * Returns a title matching the given title candidate. + * + * @param candidate the candidate + * + * @return the title, or null if not found + */ + private String getTitle(String candidate) { + + if (candidate == null) { + return null; + } + + String trimmedCandidate = candidate.trim(); + + if (trimmedCandidate.isEmpty()) { + return null; + } + + String result = titles.get(trimmedCandidate); + + if (result != null) { + return result; + } + + return titles.get(trimmedCandidate.toLowerCase()); + } + + /** + * Returns a title matching the given scan or index number. + * + * @param candidate the candidate + * + * @return the title, or null if not found + */ + private String getTitleForNumber(String candidate) { + + String normalizedNumber = normalizeNumber(candidate); + + return normalizedNumber == null ? null : titleByNumber.get(normalizedNumber); + } + + /** + * Normalizes a positive integer string. + * + * @param number the number + * + * @return the normalized number + */ + private String normalizeNumber(String number) { + + if (number == null) { + return null; + } + + String trimmedNumber = number.trim(); + + if (trimmedNumber.isEmpty()) { + return null; + } + + for (int i = 0; i < trimmedNumber.length(); i++) { + if (!Character.isDigit(trimmedNumber.charAt(i))) { + return null; + } + } + + int startIndex = 0; + + while (startIndex < trimmedNumber.length() - 1 && trimmedNumber.charAt(startIndex) == '0') { + startIndex++; + } + + return trimmedNumber.substring(startIndex); + } + } + /** * Parsed peptide values. */ diff --git a/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoRefinedIdfileReader.java b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoRefinedIdfileReader.java index 3c41332b81..d9776bbd24 100644 --- a/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoRefinedIdfileReader.java +++ b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoRefinedIdfileReader.java @@ -28,6 +28,6 @@ public InstaNovoRefinedIdfileReader() { * @param csvFile the CSV file */ public InstaNovoRefinedIdfileReader(File csvFile) { - super(csvFile, Advocate.instanovoPlus, EXTENSION); + super(csvFile, Advocate.instanovoRefined, EXTENSION); } } diff --git a/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java b/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java index 368a4368a5..f341271ccb 100644 --- a/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java +++ b/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java @@ -16,11 +16,11 @@ public class InstaNovoParameters extends ExperimentObject implements Identificat */ static final long serialVersionUID = -2295564912139753378L; /** - * Default InstaNovo model identifier for v1.2.2. + * Default InstaNovo transformer model identifier used by InstaNovo v1.2.2. */ public static final String DEFAULT_INSTANOVO_MODEL = "instanovo-v1.2.0"; /** - * Default InstaNovo+ model identifier for v1.2.2 refinement. + * Default InstaNovo+ diffusion model identifier used by InstaNovo v1.2.2. */ public static final String DEFAULT_INSTANOVO_PLUS_MODEL = "instanovoplus-v1.1.0"; /** diff --git a/src/test/java/com/compomics/util/test/experiment/io/identifications/TestInstaNovoIdfileReader.java b/src/test/java/com/compomics/util/test/experiment/io/identifications/TestInstaNovoIdfileReader.java index 8d4c9d0718..b14f489d8e 100644 --- a/src/test/java/com/compomics/util/test/experiment/io/identifications/TestInstaNovoIdfileReader.java +++ b/src/test/java/com/compomics/util/test/experiment/io/identifications/TestInstaNovoIdfileReader.java @@ -63,12 +63,13 @@ public void testInstaNovoReaders() throws Exception { Assert.assertNotNull(Advocate.getAdvocate("InstaNovo")); Assert.assertNotNull(Advocate.getAdvocate("InstaNovo+")); + Assert.assertNotNull(Advocate.getAdvocate("InstaNovo with refinement")); SimpleSpectrumProvider spectrumProvider = new SimpleSpectrumProvider(); SearchParameters searchParameters = new SearchParameters(); assertReader("test.instanovo.csv", Advocate.instanovo.getIndex(), spectrumProvider, searchParameters); assertReader("test.instanovoplus.csv", Advocate.instanovoPlus.getIndex(), spectrumProvider, searchParameters); - assertReader("test.instanovo.refined.csv", Advocate.instanovoPlus.getIndex(), spectrumProvider, searchParameters); + assertReader("test.instanovo.refined.csv", Advocate.instanovoRefined.getIndex(), spectrumProvider, searchParameters); } /** @@ -135,12 +136,58 @@ public void testInstaNovoVersion122SampleRows() throws Exception { assertSampleReader( new InstaNovoRefinedIdfileReader(writeCsv("sample.instanovo.refined.csv", INSTANOVO_COMBINED_V1_2_2)), - Advocate.instanovoPlus.getIndex(), + Advocate.instanovoRefined.getIndex(), "LIRPLLK", 0 ); } + /** + * Tests matching realistic spectrum titles by scan tokens without positional + * scan-number fallback. + * + * @throws Exception if an exception occurs + */ + public void testSpectrumTitleLookupWithRealisticTitles() throws Exception { + + File csvFile = writeCsv( + "realistic-titles.instanovo.csv", + "experiment_name,scan_number,spectrum_id,precursor_mz,precursor_charge,prediction_id,predictions,log_probs\n" + + "example,1,example:1,419.314971923828,2,0,PEPTIDE,-1.0\n" + ); + + IdfileReader idfileReader = new InstaNovoIdfileReader(csvFile); + SimpleSpectrumProvider spectrumProvider = new SimpleSpectrumProvider( + new String[]{"example"}, + new String[]{"controllerType=0 controllerNumber=1 scan=1", "controllerType=0 controllerNumber=1 scan=2"} + ); + ArrayList spectrumMatches = idfileReader.getAllSpectrumMatches(spectrumProvider, null, new SearchParameters()); + + Assert.assertEquals(1, spectrumMatches.size()); + Assert.assertEquals("controllerType=0 controllerNumber=1 scan=1", spectrumMatches.get(0).getSpectrumTitle()); + } + + /** + * Tests charge parsing robustness. + * + * @throws Exception if an exception occurs + */ + public void testChargeParsingSkipsInvalidRows() throws Exception { + + File csvFile = writeCsv( + "charges.instanovo.csv", + "experiment_name,scan_number,spectrum_id,precursor_mz,precursor_charge,prediction_id,predictions,log_probs\n" + + "example,0,example:0,419.314971923828,not-a-charge,0,PEPTIDE,-1.0\n" + + "example,1,example:1,419.314971923828, 2 ,0,PEPTIDE,-1.0\n" + ); + + IdfileReader idfileReader = new InstaNovoIdfileReader(csvFile); + ArrayList spectrumMatches = idfileReader.getAllSpectrumMatches(new SimpleSpectrumProvider(), null, new SearchParameters()); + + Assert.assertEquals(1, spectrumMatches.size()); + Assert.assertEquals("1", spectrumMatches.get(0).getSpectrumTitle()); + } + /** * Tests all UniMod annotations from the InstaNovo v1.2.2 default residue * configuration. @@ -230,6 +277,11 @@ private void assertReader( Assert.assertEquals("DMNSPK", peptideAssumption.getPeptide().getSequence()); Assert.assertEquals(2, peptideAssumption.getPeptide().getVariableModifications().length); Assert.assertTrue(idfileReader.getSoftwareVersions().containsKey(Advocate.getAdvocate(advocateIndex).getName())); + + if (advocateIndex == Advocate.instanovoRefined.getIndex()) { + Assert.assertTrue(idfileReader.getSoftwareVersions().containsKey(Advocate.instanovo.getName())); + Assert.assertTrue(idfileReader.getSoftwareVersions().containsKey(Advocate.instanovoPlus.getName())); + } } /** @@ -268,6 +320,11 @@ private void assertSampleReader( Assert.assertEquals(expectedSequence, peptideAssumption.getPeptide().getSequence()); Assert.assertEquals(expectedVariableModifications, peptideAssumption.getPeptide().getVariableModifications().length); Assert.assertTrue(idfileReader.getSoftwareVersions().containsKey(Advocate.getAdvocate(advocateIndex).getName())); + + if (advocateIndex == Advocate.instanovoRefined.getIndex()) { + Assert.assertTrue(idfileReader.getSoftwareVersions().containsKey(Advocate.instanovo.getName())); + Assert.assertTrue(idfileReader.getSoftwareVersions().containsKey(Advocate.instanovoPlus.getName())); + } } /** @@ -344,6 +401,33 @@ private File writeCsv(String fileName, String content) throws IOException { */ private static class SimpleSpectrumProvider implements SpectrumProvider { + /** + * File names without extensions. + */ + private final String[] fileNames; + /** + * Spectrum titles. + */ + private final String[] titles; + + /** + * Default constructor. + */ + private SimpleSpectrumProvider() { + this(new String[]{"example"}, new String[]{"0", "1", "2", "3", "4"}); + } + + /** + * Constructor. + * + * @param fileNames the file names + * @param titles the spectrum titles + */ + private SimpleSpectrumProvider(String[] fileNames, String[] titles) { + this.fileNames = fileNames; + this.titles = titles; + } + @Override public Spectrum getSpectrum(String fileNameWithoutExtension, String spectrumTitle) { return null; @@ -421,12 +505,12 @@ public double getMaxPrecRT() { @Override public String[] getOrderedFileNamesWithoutExtensions() { - return new String[]{"example"}; + return fileNames; } @Override public String[] getSpectrumTitles(String fileNameWithoutExtension) { - return new String[]{"0", "1", "2", "3", "4"}; + return titles; } @Override From 5dc6e7f98b2a714f307c527461c7d5d3041392c9 Mon Sep 17 00:00:00 2001 From: Jeroen Van Goey Date: Mon, 22 Jun 2026 22:36:01 +0200 Subject: [PATCH 06/12] Suppress benign Nimbus look and feel exception On recent JDKs the Nimbus look and feel can throw a benign ClassCastException (ColorUIResource cannot be cast to Boolean in NimbusStyle.isOpaque) while building chart popup menus. The exception is still logged but no longer shown to the user, as it does not affect functionality. --- .../util/exceptions/ExceptionHandler.java | 43 ++++++++++++++++++- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/compomics/util/exceptions/ExceptionHandler.java b/src/main/java/com/compomics/util/exceptions/ExceptionHandler.java index 783c21e9c7..de6f589cf7 100644 --- a/src/main/java/com/compomics/util/exceptions/ExceptionHandler.java +++ b/src/main/java/com/compomics/util/exceptions/ExceptionHandler.java @@ -32,14 +32,53 @@ public ExceptionHandler() { public synchronized void catchException(Exception e) { if (!ignoreExceptions && !exceptionCaught.contains(getExceptionType(e))) { - + e.printStackTrace(); exceptionCaught.add(getExceptionType(e)); + + // @TODO: remove once the underlying Nimbus look and feel bug is fixed. + // On recent JDKs the Nimbus look and feel can throw a benign + // ClassCastException ("ColorUIResource cannot be cast to Boolean" in + // NimbusStyle.isOpaque) while building chart popup menus. It does not + // affect functionality, so it is logged above but not shown to the user. + if (isBenignLookAndFeelException(e)) { + return; + } + notifyUser(e); - + } } + /** + * Indicates whether the given exception is the known benign look and feel + * ClassCastException thrown while rendering (e.g. "ColorUIResource cannot be + * cast to Boolean" in NimbusStyle). Such exceptions do not affect + * functionality and should not be reported to the user. + * + * @param e the exception to inspect + * + * @return true if the exception is a benign look and feel rendering exception + */ + private static boolean isBenignLookAndFeelException(Exception e) { + + if (!(e instanceof ClassCastException)) { + return false; + } + + for (StackTraceElement element : e.getStackTrace()) { + + String className = element.getClassName(); + + if (className.startsWith("javax.swing.plaf.nimbus.") + || className.startsWith("javax.swing.plaf.synth.")) { + return true; + } + } + + return false; + } + /** * Notifies the user that an exception was caught. * From a8c9cc9d5e7437e2763127793204880a9fb136bb Mon Sep 17 00:00:00 2001 From: Jeroen Van Goey Date: Mon, 22 Jun 2026 22:36:01 +0200 Subject: [PATCH 07/12] Use .mzML extension for ThermoRawFileParser mzML output ThermoRawFileParser writes mzML files with the canonical .mzML extension. Declaring the format ending as .mzML lets consumers find the converted file on case-sensitive file systems. --- .../ThermoRawFileParserOutputFormat.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/compomics/util/experiment/mass_spectrometry/thermo_raw_file_parser/ThermoRawFileParserOutputFormat.java b/src/main/java/com/compomics/util/experiment/mass_spectrometry/thermo_raw_file_parser/ThermoRawFileParserOutputFormat.java index 50d954e99b..d3fe971d89 100644 --- a/src/main/java/com/compomics/util/experiment/mass_spectrometry/thermo_raw_file_parser/ThermoRawFileParserOutputFormat.java +++ b/src/main/java/com/compomics/util/experiment/mass_spectrometry/thermo_raw_file_parser/ThermoRawFileParserOutputFormat.java @@ -14,11 +14,11 @@ public enum ThermoRawFileParserOutputFormat { /** * mzML generic PSI format. */ - mzML(1, "mzML", "mzML generic PSI format", ".mzml"), + mzML(1, "mzML", "mzML generic PSI format", ".mzML"), /** * Indexed mzML generic PSI format. */ - mzMLIndexed(2, "mzML (indexed)", "mzML generic PSI format", ".mzml"); + mzMLIndexed(2, "mzML (indexed)", "mzML generic PSI format", ".mzML"); /** * The index of the format. From 6c63405d0d9753e36d0732fe7b1627850d3a328a Mon Sep 17 00:00:00 2001 From: Jeroen Van Goey Date: Mon, 22 Jun 2026 22:36:01 +0200 Subject: [PATCH 08/12] Align log4j-api version with log4j-core log4j-core was bumped to 2.25.4 while log4j-api stayed at 2.23.1, causing a NoSuchFieldError at runtime. Bump log4j-api to 2.25.4 to match. --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 290e12a5cd..7ad512b49a 100644 --- a/pom.xml +++ b/pom.xml @@ -444,7 +444,7 @@ org.apache.logging.log4j log4j-api - 2.23.1 + 2.25.4 From b07ddec0c9dce1709d66923560a33da190fa3053 Mon Sep 17 00:00:00 2001 From: Jeroen Van Goey Date: Tue, 23 Jun 2026 01:37:57 +0200 Subject: [PATCH 09/12] Add DeNovoGUI Utilities compatibility APIs --- .../com/compomics/software/ToolFactory.java | 3 + src/main/java/com/compomics/util/Util.java | 45 ++++ .../util/db/object/ObjectsCache.java | 25 +++ .../aminoacids/sequence/AminoAcidPattern.java | 9 + .../sequence/AminoAcidSequence.java | 42 ++++ .../util/experiment/biology/ions/Charge.java | 14 ++ .../biology/modifications/Modification.java | 22 ++ .../modifications/ModificationFactory.java | 20 ++ .../experiment/biology/proteins/Peptide.java | 46 ++++ .../identification/Identification.java | 197 ++++++++++++++++-- .../SpectrumIdentificationAssumption.java | 9 + .../identification/amino_acid_tags/Tag.java | 41 +++- .../matches/ModificationMatch.java | 20 ++ .../identification/matches/SpectrumMatch.java | 122 +++++++++++ .../matches_iterators/PsmIterator.java | 41 ++++ .../AnnotationParameters.java | 21 ++ .../mass_spectrometry/spectra/Precursor.java | 33 ++- .../mass_spectrometry/spectra/Spectrum.java | 107 ++++++++++ .../util/gui/spectrum/SpectrumPanel.java | 63 ++++++ .../advanced/SequenceMatchingParameters.java | 4 +- .../search/DigestionParameters.java | 14 ++ .../search/ModificationParameters.java | 5 + .../search/SearchParameters.java | 10 + .../tool_specific/DirecTagParameters.java | 10 +- 24 files changed, 901 insertions(+), 22 deletions(-) create mode 100644 src/main/java/com/compomics/util/experiment/identification/matches_iterators/PsmIterator.java diff --git a/src/main/java/com/compomics/software/ToolFactory.java b/src/main/java/com/compomics/software/ToolFactory.java index 863585378f..eb5eb04500 100644 --- a/src/main/java/com/compomics/software/ToolFactory.java +++ b/src/main/java/com/compomics/software/ToolFactory.java @@ -44,6 +44,7 @@ public class ToolFactory { * The command line argument for mgf files for SearchGUI. */ public static final String SEARCHGUI_SPECTRUM_FILE_OPTION = "-mgf"; + public static final String searchGuiSpectrumFileOption = SEARCHGUI_SPECTRUM_FILE_OPTION; /** * The command line argument for FASTA file for SearchGUI. */ @@ -56,10 +57,12 @@ public class ToolFactory { * The command line argument for a parameters file for SearchGUI. */ public static final String SEARCHGUI_PARAMETERS_FILE_OPTION = "-identification_parameters"; + public static final String searchGuiParametersFileOption = SEARCHGUI_PARAMETERS_FILE_OPTION; /** * The command line argument for an output folder. */ public static final String OUTPUT_FOLDER_OPTION = "-output_folder"; + public static final String outputFolderOption = OUTPUT_FOLDER_OPTION; /** * The command line argument for the species. */ diff --git a/src/main/java/com/compomics/util/Util.java b/src/main/java/com/compomics/util/Util.java index 03dcd8ca9d..cb08321b87 100644 --- a/src/main/java/com/compomics/util/Util.java +++ b/src/main/java/com/compomics/util/Util.java @@ -1,7 +1,9 @@ package com.compomics.util; import com.compomics.util.gui.waiting.waitinghandlers.ProgressDialogX; +import com.compomics.util.gui.file_handling.FileChooserUtil; import java.awt.Color; +import java.awt.Component; import java.io.*; import java.math.BigDecimal; import java.math.RoundingMode; @@ -695,4 +697,47 @@ public static String keyValueToString( return sb.toString(); } + + public static File getUserSelectedFile( + Component parent, + String fileEnding, + String fileFormatDescription, + String dialogTitle, + String lastSelectedFolder, + String suggestedFileName, + boolean openDialog + ) { + return FileChooserUtil.getUserSelectedFile( + parent, + fileEnding, + fileFormatDescription, + dialogTitle, + lastSelectedFolder, + suggestedFileName, + openDialog + ); + } + + public static boolean deleteDir(File directory) { + if (directory == null || !directory.exists()) { + return true; + } + File[] files = directory.listFiles(); + if (files != null) { + for (File file : files) { + if (file.isDirectory()) { + deleteDir(file); + } else if (!file.delete()) { + return false; + } + } + } + return directory.delete(); + } + + public static String getFileName(File file) { + String name = file.getName(); + int extensionIndex = name.lastIndexOf('.'); + return extensionIndex > 0 ? name.substring(0, extensionIndex) : name; + } } diff --git a/src/main/java/com/compomics/util/db/object/ObjectsCache.java b/src/main/java/com/compomics/util/db/object/ObjectsCache.java index 75b05b00ba..1985c0db81 100644 --- a/src/main/java/com/compomics/util/db/object/ObjectsCache.java +++ b/src/main/java/com/compomics/util/db/object/ObjectsCache.java @@ -49,6 +49,12 @@ public class ObjectsCache { */ private final int keepObjectsThreshold = 10000; + /** + * Constructor. + */ + public ObjectsCache() { + } + /** * Constructor. * @@ -466,6 +472,25 @@ public boolean isEmpty() { return isEmpty; } + /** + * Legacy compatibility method. Automated memory management is handled by + * the current cache implementation. + * + * @param automatedMemoryManagement ignored + */ + public void setAutomatedMemoryManagement(boolean automatedMemoryManagement) { + } + + /** + * Legacy compatibility method. Delegates to the current memory share. + * + * @param memoryShare the memory share + * @param waitingHandler the waiting handler + */ + public void reduceMemoryConsumption(double memoryShare, WaitingHandler waitingHandler) { + setMemoryShare(memoryShare); + } + /** * Clears the cache. */ diff --git a/src/main/java/com/compomics/util/experiment/biology/aminoacids/sequence/AminoAcidPattern.java b/src/main/java/com/compomics/util/experiment/biology/aminoacids/sequence/AminoAcidPattern.java index 44c802b2bd..da01e4108c 100644 --- a/src/main/java/com/compomics/util/experiment/biology/aminoacids/sequence/AminoAcidPattern.java +++ b/src/main/java/com/compomics/util/experiment/biology/aminoacids/sequence/AminoAcidPattern.java @@ -1,6 +1,7 @@ package com.compomics.util.experiment.biology.aminoacids.sequence; import com.compomics.util.experiment.biology.aminoacids.AminoAcid; +import com.compomics.util.experiment.identification.matches.ModificationMatch; import com.compomics.util.experiment.personalization.ExperimentObject; import com.compomics.util.parameters.identification.advanced.SequenceMatchingParameters; import com.compomics.util.parameters.identification.advanced.SequenceMatchingParameters.MatchingType; @@ -146,6 +147,14 @@ public HashMap> getAaTargeted() { return residueTargeted; } + public ArrayList getModificationIndexes() { + return new ArrayList<>(); + } + + public ArrayList getModificationsAt(int site) { + return new ArrayList<>(); + } + /** * Convenience constructor giving a list of targeted residues as input. For * instance (S, T, Y) diff --git a/src/main/java/com/compomics/util/experiment/biology/aminoacids/sequence/AminoAcidSequence.java b/src/main/java/com/compomics/util/experiment/biology/aminoacids/sequence/AminoAcidSequence.java index 4fca5496f8..ff1b0d6f1e 100644 --- a/src/main/java/com/compomics/util/experiment/biology/aminoacids/sequence/AminoAcidSequence.java +++ b/src/main/java/com/compomics/util/experiment/biology/aminoacids/sequence/AminoAcidSequence.java @@ -443,6 +443,48 @@ public ModificationMatch[] getVariableModifications() { } + public ArrayList getModificationIndexes() { + ArrayList result = new ArrayList<>(); + for (ModificationMatch modificationMatch : getVariableModifications()) { + if (!result.contains(modificationMatch.getSite())) { + result.add(modificationMatch.getSite()); + } + } + return result; + } + + public ArrayList getModificationsAt(int site) { + ArrayList result = new ArrayList<>(); + for (ModificationMatch modificationMatch : getVariableModifications()) { + if (modificationMatch.getSite() == site) { + result.add(modificationMatch); + } + } + return result; + } + + public static String getTaggedResidue( + char aminoAcid, + String modificationName, + ModificationParameters modificationParameters, + int modificationType, + boolean useHtmlColorCoding, + boolean useShortName + ) { + String[] variableModifications = new String[3]; + variableModifications[1] = modificationName; + return ModificationUtils.getTaggedModifiedSequence( + modificationParameters, + Character.toString(aminoAcid), + variableModifications, + null, + null, + new String[3], + useHtmlColorCoding, + useShortName + ); + } + /** * Returns the variable modifications indexed by site. Modifications are * indexed by site as follows: N-term modifications are at index 0, C-term diff --git a/src/main/java/com/compomics/util/experiment/biology/ions/Charge.java b/src/main/java/com/compomics/util/experiment/biology/ions/Charge.java index 39d8ff0d37..335c6e631f 100644 --- a/src/main/java/com/compomics/util/experiment/biology/ions/Charge.java +++ b/src/main/java/com/compomics/util/experiment/biology/ions/Charge.java @@ -8,12 +8,26 @@ */ public class Charge { + /** + * Legacy charge value field. + */ + public int value; + /** * Empty default constructor. */ public Charge() { } + /** + * Constructor with a charge value. + * + * @param value the charge value + */ + public Charge(int value) { + this.value = value; + } + /** * Returns a string representing the charge. For example 2+. * diff --git a/src/main/java/com/compomics/util/experiment/biology/modifications/Modification.java b/src/main/java/com/compomics/util/experiment/biology/modifications/Modification.java index e514ea7a5c..3e7562c3e5 100644 --- a/src/main/java/com/compomics/util/experiment/biology/modifications/Modification.java +++ b/src/main/java/com/compomics/util/experiment/biology/modifications/Modification.java @@ -18,6 +18,16 @@ */ public class Modification extends ExperimentObject { + public static final int MODAA = ModificationType.modaa.index; + public static final int MODN = ModificationType.modn_protein.index; + public static final int MODNAA = ModificationType.modnaa_protein.index; + public static final int MODC = ModificationType.modc_protein.index; + public static final int MODCAA = ModificationType.modcaa_protein.index; + public static final int MODNP = ModificationType.modn_peptide.index; + public static final int MODNPAA = ModificationType.modnaa_peptide.index; + public static final int MODCP = ModificationType.modc_peptide.index; + public static final int MODCPAA = ModificationType.modcaa_peptide.index; + /** * The version UID for backward compatibility. */ @@ -171,6 +181,18 @@ public ModificationType getModificationType() { return modificationType; } + public int getType() { + return modificationType.index; + } + + public boolean isNTerm() { + return modificationType.isNTerm(); + } + + public boolean isCTerm() { + return modificationType.isCTerm(); + } + /** * Getter for the modification name. * diff --git a/src/main/java/com/compomics/util/experiment/biology/modifications/ModificationFactory.java b/src/main/java/com/compomics/util/experiment/biology/modifications/ModificationFactory.java index cdc1e33998..a2dc0f69fb 100644 --- a/src/main/java/com/compomics/util/experiment/biology/modifications/ModificationFactory.java +++ b/src/main/java/com/compomics/util/experiment/biology/modifications/ModificationFactory.java @@ -281,6 +281,10 @@ public boolean containsModification(String name) { return modificationMap.containsKey(name); } + public boolean containsPTM(String name) { + return containsModification(name); + } + /** * Returns the names of the default modifications. * @@ -337,6 +341,10 @@ public ArrayList getModifications() { return new ArrayList<>(modificationMap.keySet()); } + public ArrayList getPTMs() { + return getModifications(); + } + /** * Convenience method returning a boolean indicating whether a Modification * is user defined or default. @@ -407,6 +415,18 @@ public void setColor(String expectedModification, int color) { userColors.put(expectedModification, color); } + public void setColor(String expectedModification, Color color) { + setColor(expectedModification, color.getRGB()); + } + + public void addFixedModifications( + ModificationParameters modificationParameters, + com.compomics.util.experiment.identification.amino_acid_tags.Tag tag, + com.compomics.util.parameters.identification.advanced.SequenceMatchingParameters sequenceMatchingParameters + ) { + // Fixed modifications are resolved on demand in Utilities 5. + } + /** * Returns a default color based on the modification name. * diff --git a/src/main/java/com/compomics/util/experiment/biology/proteins/Peptide.java b/src/main/java/com/compomics/util/experiment/biology/proteins/Peptide.java index 49d5fbd055..52f4a7a3d0 100644 --- a/src/main/java/com/compomics/util/experiment/biology/proteins/Peptide.java +++ b/src/main/java/com/compomics/util/experiment/biology/proteins/Peptide.java @@ -292,6 +292,10 @@ public TreeMap getProteinMapping() { return proteinMapping; } + public ArrayList getParentProteinsNoRemapping() { + return proteinMapping == null ? null : new ArrayList<>(proteinMapping.keySet()); + } + /** * Sets the protein mapping as a map of 0-based indexes for every protein * accession. @@ -383,6 +387,22 @@ public ModificationMatch[] getVariableModifications() { return variableModifications == null ? ModificationMatch.NO_MOD : variableModifications; } + public ModificationMatch[] getModificationMatches() { + return getVariableModifications(); + } + + public boolean isModified() { + return getVariableModifications().length > 0; + } + + public int getNModifications() { + return getVariableModifications().length; + } + + public static String getPeptideModificationsAsString(Peptide peptide, boolean variableOnly) { + return PeptideUtils.getVariableModificationsAsString(peptide); + } + /** * Returns the variable modifications indexed by site. Modifications are * indexed by site as follows: N-term modifications are at index 0, C-term @@ -1315,6 +1335,32 @@ public String getTaggedModifiedSequence( ); } + public String getTaggedModifiedSequence( + ModificationParameters modificationProfile, + boolean useHtmlColorCoding, + boolean includeHtmlStartEndTags, + boolean useShortName, + boolean excludeAllFixedPtms + ) { + ModificationParameters displayedModifications = excludeAllFixedPtms + ? new ModificationParameters(modificationProfile) + : modificationProfile; + + if (excludeAllFixedPtms) { + displayedModifications.getFixedModifications().clear(); + } + + return getTaggedModifiedSequence( + displayedModifications, + null, + SequenceMatchingParameters.DEFAULT_STRING_MATCHING, + useHtmlColorCoding, + includeHtmlStartEndTags, + useShortName, + null + ); + } + /** * Estimates the theoretic mass of the peptide. The previous version is * silently overwritten. diff --git a/src/main/java/com/compomics/util/experiment/identification/Identification.java b/src/main/java/com/compomics/util/experiment/identification/Identification.java index 4fe6aa9c1c..af5eb40431 100644 --- a/src/main/java/com/compomics/util/experiment/identification/Identification.java +++ b/src/main/java/com/compomics/util/experiment/identification/Identification.java @@ -1,14 +1,17 @@ package com.compomics.util.experiment.identification; -import com.compomics.util.db.object.ObjectsDB; -import com.compomics.util.experiment.identification.matches.PeptideMatch; -import com.compomics.util.experiment.identification.matches.ProteinMatch; -import com.compomics.util.experiment.identification.matches.SpectrumMatch; -import com.compomics.util.experiment.identification.matches_iterators.PeptideMatchesIterator; -import com.compomics.util.experiment.identification.matches_iterators.ProteinMatchesIterator; -import com.compomics.util.experiment.identification.matches_iterators.SpectrumMatchesIterator; -import com.compomics.util.experiment.personalization.ExperimentObject; -import com.compomics.util.waiting.WaitingHandler; +import com.compomics.util.db.object.ObjectsDB; +import com.compomics.util.db.object.ObjectsCache; +import com.compomics.util.experiment.identification.matches.PeptideMatch; +import com.compomics.util.experiment.identification.matches.ProteinMatch; +import com.compomics.util.experiment.identification.matches.SpectrumMatch; +import com.compomics.util.experiment.identification.matches_iterators.PeptideMatchesIterator; +import com.compomics.util.experiment.identification.matches_iterators.ProteinMatchesIterator; +import com.compomics.util.experiment.identification.matches_iterators.PsmIterator; +import com.compomics.util.experiment.identification.matches_iterators.SpectrumMatchesIterator; +import com.compomics.util.experiment.mass_spectrometry.spectra.Spectrum; +import com.compomics.util.experiment.personalization.ExperimentObject; +import com.compomics.util.waiting.WaitingHandler; import java.util.ArrayList; import java.util.Collection; @@ -95,9 +98,166 @@ public void setIdentificationKeys( * * @return a map of the spectrum matches keys indexed by spectrum file name */ - public HashMap> getSpectrumIdentification() { - return identificationKeys.spectrumIdentification; - } + public HashMap> getSpectrumIdentification() { + return identificationKeys.spectrumIdentification; + } + + /** + * Returns the spectrum files containing spectrum identifications. + * + * @return the spectrum files containing spectrum identifications + */ + public ArrayList getSpectrumFiles() { + return new ArrayList<>(identificationKeys.spectrumIdentification.keySet()); + } + + /** + * Returns the spectrum files containing spectrum identifications. + * + * @return the spectrum files containing spectrum identifications + */ + public ArrayList getOrderedSpectrumFileNames() { + return getSpectrumFiles(); + } + + /** + * Returns the spectrum identification keys for the given spectrum file as + * legacy string spectrum keys. + * + * @param spectrumFile the spectrum file + * + * @return the spectrum identification keys for the given spectrum file + */ + public HashSet getSpectrumIdentification(String spectrumFile) { + + HashSet longKeys = identificationKeys.spectrumIdentification.get(spectrumFile); + HashSet result = new HashSet<>(); + + if (longKeys != null) { + + for (Long key : longKeys) { + + SpectrumMatch spectrumMatch = getSpectrumMatch(key); + + if (spectrumMatch != null) { + result.add(Spectrum.getSpectrumKey(spectrumMatch.getSpectrumFile(), spectrumMatch.getSpectrumTitle())); + } + } + } + + return result; + } + + /** + * Returns whether the given legacy spectrum key exists. + * + * @param spectrumKey the spectrum key + * + * @return true if a match exists + */ + public boolean matchExists(String spectrumKey) { + return contains(getLongSpectrumKey(spectrumKey)); + } + + /** + * Returns the assumptions for the given legacy spectrum key. + * + * @param spectrumKey the spectrum key + * + * @return the assumptions map + */ + public HashMap>> getAssumptions(String spectrumKey) { + + SpectrumMatch spectrumMatch = getSpectrumMatch(getLongSpectrumKey(spectrumKey)); + + return spectrumMatch == null ? null : spectrumMatch.getAssumptionsMap(); + } + + /** + * Returns the spectrum match for the given legacy spectrum key. + * + * @param spectrumKey the spectrum key + * + * @return the spectrum match + */ + public SpectrumMatch getSpectrumMatch(String spectrumKey) { + return getSpectrumMatch(getLongSpectrumKey(spectrumKey)); + } + + /** + * Adds spectrum matches using their own keys. + * + * @param spectrumMatches the spectrum matches + */ + public void addSpectrumMatches(Iterable spectrumMatches) { + + HashMap matches = new HashMap<>(); + + for (SpectrumMatch spectrumMatch : spectrumMatches) { + matches.put(spectrumMatch.getKey(), spectrumMatch); + } + + addSpectrumMatches(matches, null, false); + } + + /** + * Returns a PSM iterator for the given spectrum file. + * + * @param spectrumFile the spectrum file + * @param displayProgress display progress + * @param waitingHandler the waiting handler + * + * @return a PSM iterator + */ + public PsmIterator getPsmIterator(String spectrumFile, boolean displayProgress, WaitingHandler waitingHandler) { + + HashSet keys = identificationKeys.spectrumIdentification.get(spectrumFile); + long[] keyArray = keys == null ? new long[0] : keys.stream().mapToLong(Long::longValue).toArray(); + + return new PsmIterator(new SpectrumMatchesIterator(keyArray, this, waitingHandler, displayProgress)); + } + + /** + * Returns a default identification reference. + * + * @param projectReference the project reference + * @param sampleReference the sample reference + * @param replicateNumber the replicate number + * + * @return the default identification reference + */ + public static String getDefaultReference(String projectReference, String sampleReference, int replicateNumber) { + return projectReference + "_" + sampleReference + "_" + replicateNumber; + } + + /** + * Legacy compatibility method. The current database is provided at + * construction time. + * + * @param dbFolder the database folder + * @param overwrite overwrite existing database + * @param objectsCache the objects cache + */ + public void establishConnection(String dbFolder, boolean overwrite, ObjectsCache objectsCache) { + if (objectsDB != null && objectsCache != null) { + objectsDB.setObjectCache(objectsCache); + } + } + + /** + * Resolves a legacy string spectrum key to the current long key. + * + * @param spectrumKey the legacy spectrum key + * + * @return the long key + */ + private long getLongSpectrumKey(String spectrumKey) { + + String spectrumFile = Spectrum.getSpectrumFile(spectrumKey); + String spectrumTitle = Spectrum.getSpectrumTitle(spectrumKey); + + return SpectrumMatch.getKey(spectrumFile, spectrumTitle); + } /** * Returns the keys of all the spectrum matches in the db. @@ -655,9 +815,16 @@ public HashMap> getProteinMap() { * * @param saveCache save the cache */ - public void close(boolean saveCache) { - objectsDB.close(saveCache); - } + public void close(boolean saveCache) { + objectsDB.close(saveCache); + } + + /** + * Closes the database connection and saves the cache. + */ + public void close() { + close(true); + } /** * Indicates whether the connection to the DB is active. diff --git a/src/main/java/com/compomics/util/experiment/identification/SpectrumIdentificationAssumption.java b/src/main/java/com/compomics/util/experiment/identification/SpectrumIdentificationAssumption.java index 851bb48fbc..36adf46315 100644 --- a/src/main/java/com/compomics/util/experiment/identification/SpectrumIdentificationAssumption.java +++ b/src/main/java/com/compomics/util/experiment/identification/SpectrumIdentificationAssumption.java @@ -207,6 +207,15 @@ public double getDeltaMz( ); } + public double getDeltaMass( + double measuredMZ, + boolean ppm, + int minIsotope, + int maxIsotope + ) { + return getDeltaMz(measuredMZ, ppm, minIsotope, maxIsotope); + } + /** * Returns the precursor isotope number according to the number of protons. * diff --git a/src/main/java/com/compomics/util/experiment/identification/amino_acid_tags/Tag.java b/src/main/java/com/compomics/util/experiment/identification/amino_acid_tags/Tag.java index c48fb953d0..895e8ddecf 100644 --- a/src/main/java/com/compomics/util/experiment/identification/amino_acid_tags/Tag.java +++ b/src/main/java/com/compomics/util/experiment/identification/amino_acid_tags/Tag.java @@ -350,6 +350,43 @@ public String getTaggedModifiedSequence( } + public String getTaggedModifiedSequence( + ModificationParameters modificationProfile, + boolean useHtmlColorCoding, + boolean includeHtmlStartEndTags, + boolean useShortName, + boolean includeTerminalGaps + ) { + return getTaggedModifiedSequence( + modificationProfile, + useHtmlColorCoding, + includeHtmlStartEndTags, + useShortName, + includeTerminalGaps, + SequenceMatchingParameters.DEFAULT_STRING_MATCHING, + null + ); + } + + public String getTaggedModifiedSequence( + ModificationParameters modificationProfile, + boolean useHtmlColorCoding, + boolean includeHtmlStartEndTags, + boolean useShortName, + boolean includeTerminalGaps, + boolean excludeAllFixedPtms + ) { + return getTaggedModifiedSequence( + modificationProfile, + useHtmlColorCoding, + includeHtmlStartEndTags, + useShortName, + includeTerminalGaps, + SequenceMatchingParameters.DEFAULT_STRING_MATCHING, + null + ); + } + /** * Returns the modified sequence as an tagged string with potential * modification sites color coded or with modification tags, e.g, @@ -402,7 +439,7 @@ public static String getTaggedModifiedSequence( // remove the hidden modifications for (int j = 0; j < variableModifications.length; j++) { // @TODO: possible to do this with streams? String tempMod = variableModifications[j]; - if (tempMod != null && !displayedModifications.contains(tempMod)) { + if (tempMod != null && displayedModifications != null && !displayedModifications.contains(tempMod)) { variableModifications[j] = null; } } @@ -412,7 +449,7 @@ public static String getTaggedModifiedSequence( // remove the hidden modifications for (int j = 0; j < fixedModifications.length; j++) { // @TODO: possible to do this with streams? String tempMod = fixedModifications[j]; - if (tempMod != null && !displayedModifications.contains(tempMod)) { + if (tempMod != null && displayedModifications != null && !displayedModifications.contains(tempMod)) { fixedModifications[j] = null; } } diff --git a/src/main/java/com/compomics/util/experiment/identification/matches/ModificationMatch.java b/src/main/java/com/compomics/util/experiment/identification/matches/ModificationMatch.java index 2de4d0c6ff..578e831fee 100644 --- a/src/main/java/com/compomics/util/experiment/identification/matches/ModificationMatch.java +++ b/src/main/java/com/compomics/util/experiment/identification/matches/ModificationMatch.java @@ -71,6 +71,10 @@ public String getModification() { return modification; } + public String getTheoreticPtm() { + return getModification(); + } + /** * Sets the theoretic PTM. * @@ -81,6 +85,10 @@ public void setModification(String modName) { this.modification = modName; } + public void setTheoreticPtm(String modName) { + setModification(modName); + } + /** * Getter for the modification site. N-term modifications * are at index 0, C-term at index sequence length + 1, and other @@ -94,6 +102,10 @@ public int getSite() { return modifiedSite; } + public int getModificationSite() { + return getSite(); + } + /** * Setter for the modification site, 1 is the first amino acid. * @@ -116,6 +128,14 @@ public boolean getConfident() { return confident; } + public boolean isConfident() { + return getConfident(); + } + + public boolean isVariable() { + return true; + } + /** * Sets whether the modification is confidently localized on the sequence. * diff --git a/src/main/java/com/compomics/util/experiment/identification/matches/SpectrumMatch.java b/src/main/java/com/compomics/util/experiment/identification/matches/SpectrumMatch.java index ba7227500d..9443c04671 100644 --- a/src/main/java/com/compomics/util/experiment/identification/matches/SpectrumMatch.java +++ b/src/main/java/com/compomics/util/experiment/identification/matches/SpectrumMatch.java @@ -1,9 +1,11 @@ package com.compomics.util.experiment.identification.matches; import com.compomics.util.experiment.identification.IdentificationMatch; +import com.compomics.util.experiment.identification.SpectrumIdentificationAssumption; import com.compomics.util.experiment.identification.spectrum_assumptions.PeptideAssumption; import com.compomics.util.experiment.identification.spectrum_assumptions.TagAssumption; import com.compomics.util.experiment.personalization.ExperimentObject; +import com.compomics.util.experiment.mass_spectrometry.spectra.Spectrum; import com.compomics.util.io.IoUtil; import java.util.ArrayList; @@ -58,6 +60,17 @@ public class SpectrumMatch extends IdentificationMatch { public SpectrumMatch() { } + /** + * Constructor using a legacy string spectrum key. + * + * @param spectrumKey the spectrum key + */ + public SpectrumMatch( + String spectrumKey + ) { + this(Spectrum.getSpectrumFile(spectrumKey), Spectrum.getSpectrumTitle(spectrumKey)); + } + /** * Sets the peptide assumption map. * @@ -198,6 +211,115 @@ public long getKey() { return key; } + /** + * Sets the key using a legacy string spectrum key. + * + * @param spectrumKey the spectrum key + */ + public void setKey(String spectrumKey) { + setSpectrumFile(Spectrum.getSpectrumFile(spectrumKey)); + setSpectrumTitle(Spectrum.getSpectrumTitle(spectrumKey)); + } + + /** + * Returns a legacy-compatible assumptions map. + * + * @return the assumptions map + */ + public HashMap>> getAssumptionsMap() { + + HashMap>> result = new HashMap<>(); + + peptideAssumptionsMap.forEach((advocate, scoreMap) -> { + HashMap> advocateMap = result.computeIfAbsent(advocate, key -> new HashMap<>()); + scoreMap.forEach((score, assumptions) -> advocateMap.put(score, new ArrayList<>(assumptions))); + }); + + tagAssumptionsMap.forEach((advocate, scoreMap) -> { + HashMap> advocateMap = result.computeIfAbsent(advocate, key -> new HashMap<>()); + scoreMap.forEach((score, assumptions) -> advocateMap.put(score, new ArrayList<>(assumptions))); + }); + + return result; + } + + /** + * Returns all assumptions for the specified advocate. + * + * @param advocateId the advocate index + * + * @return all assumptions indexed by score + */ + public HashMap> getAllAssumptions(int advocateId) { + return getAssumptionsMap().get(advocateId); + } + + /** + * Returns all assumptions. + * + * @return all assumptions + */ + public ArrayList getAllAssumptions() { + + ArrayList result = new ArrayList<>(); + + peptideAssumptionsMap.values().forEach(scoreMap -> scoreMap.values().forEach(result::addAll)); + tagAssumptionsMap.values().forEach(scoreMap -> scoreMap.values().forEach(result::addAll)); + + return result; + } + + /** + * Adds an identification assumption. + * + * @param advocateId the advocate index + * @param assumption the assumption + * @param updateBestAssumption ignored, best assumptions are handled by the + * caller when needed + */ + public void addHit( + int advocateId, + SpectrumIdentificationAssumption assumption, + boolean updateBestAssumption + ) { + + if (assumption instanceof PeptideAssumption) { + addPeptideAssumption(advocateId, (PeptideAssumption) assumption); + } else if (assumption instanceof TagAssumption) { + addTagAssumption(advocateId, (TagAssumption) assumption); + } else { + throw new IllegalArgumentException("Unsupported assumption type: " + assumption.getClass().getName()); + } + } + + /** + * Removes all assumptions. + */ + public void removeAssumptions() { + peptideAssumptionsMap.clear(); + tagAssumptionsMap.clear(); + } + + /** + * Indicates whether the match contains assumptions. + * + * @return true if assumptions are present + */ + public boolean hasAssumption() { + return hasPeptideAssumption() || hasTagAssumption(); + } + + /** + * Indicates whether the match contains assumptions for the given advocate. + * + * @param advocateId the advocate index + * + * @return true if assumptions are present + */ + public boolean hasAssumption(int advocateId) { + return hasPeptideAssumption(advocateId) || hasTagAssumption(advocateId); + } + /** * Returns the advocates supporting hits for this spectrum. * diff --git a/src/main/java/com/compomics/util/experiment/identification/matches_iterators/PsmIterator.java b/src/main/java/com/compomics/util/experiment/identification/matches_iterators/PsmIterator.java new file mode 100644 index 0000000000..e7cf76c197 --- /dev/null +++ b/src/main/java/com/compomics/util/experiment/identification/matches_iterators/PsmIterator.java @@ -0,0 +1,41 @@ +package com.compomics.util.experiment.identification.matches_iterators; + +import com.compomics.util.experiment.identification.matches.SpectrumMatch; + +/** + * Legacy facade for iterating over spectrum matches. + */ +public class PsmIterator { + + /** + * The wrapped spectrum matches iterator. + */ + private final SpectrumMatchesIterator spectrumMatchesIterator; + + /** + * Constructor. + * + * @param spectrumMatchesIterator the spectrum matches iterator + */ + public PsmIterator(SpectrumMatchesIterator spectrumMatchesIterator) { + this.spectrumMatchesIterator = spectrumMatchesIterator; + } + + /** + * Returns the next spectrum match. + * + * @return the next spectrum match + */ + public SpectrumMatch next() { + return spectrumMatchesIterator.next(); + } + + /** + * Returns the next spectrum match. + * + * @return the next spectrum match + */ + public SpectrumMatch nextObject() { + return next(); + } +} diff --git a/src/main/java/com/compomics/util/experiment/identification/spectrum_annotation/AnnotationParameters.java b/src/main/java/com/compomics/util/experiment/identification/spectrum_annotation/AnnotationParameters.java index b7c4d74cf1..840df1a0db 100644 --- a/src/main/java/com/compomics/util/experiment/identification/spectrum_annotation/AnnotationParameters.java +++ b/src/main/java/com/compomics/util/experiment/identification/spectrum_annotation/AnnotationParameters.java @@ -233,6 +233,23 @@ public SpecificAnnotationParameters getSpecificAnnotationParameters( } + public SpecificAnnotationParameters getSpecificAnnotationPreferences( + String spectrumKey, + SpectrumIdentificationAssumption spectrumIdentificationAssumption, + SequenceMatchingParameters modificationSequenceMatchingParameters, + SequenceMatchingParameters sequenceMatchingParameters + ) { + return getSpecificAnnotationParameters( + com.compomics.util.experiment.mass_spectrometry.spectra.Spectrum.getSpectrumFile(spectrumKey), + com.compomics.util.experiment.mass_spectrometry.spectra.Spectrum.getSpectrumTitle(spectrumKey), + spectrumIdentificationAssumption, + new ModificationParameters(), + null, + modificationSequenceMatchingParameters, + null + ); + } + /** * Constructor setting preferences from search parameters. * @@ -300,6 +317,10 @@ public void setParametersFromSearchParameters( } } + public void setPreferencesFromSearchParameters(SearchParameters searchParameters) { + setParametersFromSearchParameters(searchParameters); + } + /** * Returns whether neutral losses are considered only for amino acids of * interest or not. diff --git a/src/main/java/com/compomics/util/experiment/mass_spectrometry/spectra/Precursor.java b/src/main/java/com/compomics/util/experiment/mass_spectrometry/spectra/Precursor.java index 7766a10f6e..89c28c4f53 100644 --- a/src/main/java/com/compomics/util/experiment/mass_spectrometry/spectra/Precursor.java +++ b/src/main/java/com/compomics/util/experiment/mass_spectrometry/spectra/Precursor.java @@ -3,6 +3,7 @@ import com.compomics.util.experiment.biology.ions.Charge; import com.compomics.util.experiment.biology.ions.impl.ElementaryIon; import com.compomics.util.experiment.personalization.ExperimentObject; +import java.util.ArrayList; import java.util.Arrays; import java.util.stream.Collectors; @@ -125,6 +126,36 @@ public double getRtInMinutes() { } + /** + * Returns the retention time in seconds. + * + * @return the retention time in seconds + */ + public double getRt() { + return rt; + } + + /** + * Returns the measured precursor m/z. + * + * @return the measured precursor m/z + */ + public double getMz() { + return mz; + } + + public double getIntensity() { + return intensity; + } + + public ArrayList getPossibleCharges() { + ArrayList result = new ArrayList<>(possibleCharges.length); + for (int possibleCharge : possibleCharges) { + result.add(new Charge(possibleCharge)); + } + return result; + } + /** * Returns the possible charges as a string. * @@ -209,4 +240,4 @@ public boolean isSameAs( return true; } -} \ No newline at end of file +} diff --git a/src/main/java/com/compomics/util/experiment/mass_spectrometry/spectra/Spectrum.java b/src/main/java/com/compomics/util/experiment/mass_spectrometry/spectra/Spectrum.java index c19765a8eb..6773bf48f1 100644 --- a/src/main/java/com/compomics/util/experiment/mass_spectrometry/spectra/Spectrum.java +++ b/src/main/java/com/compomics/util/experiment/mass_spectrometry/spectra/Spectrum.java @@ -15,6 +15,11 @@ */ public class Spectrum extends ExperimentObject { + /** + * Separator used by legacy string spectrum keys. + */ + public static final String SPECTRUM_KEY_SPLITTER = "_cus_"; + /** * The precursor if any. */ @@ -69,6 +74,40 @@ public Spectrum( } + /** + * Returns a legacy string key for the given spectrum. + * + * @param spectrumFile the spectrum file name + * @param spectrumTitle the spectrum title + * + * @return the spectrum key + */ + public static String getSpectrumKey(String spectrumFile, String spectrumTitle) { + return spectrumFile + SPECTRUM_KEY_SPLITTER + spectrumTitle; + } + + /** + * Returns the spectrum file from a legacy string spectrum key. + * + * @param spectrumKey the spectrum key + * + * @return the spectrum file + */ + public static String getSpectrumFile(String spectrumKey) { + return spectrumKey.substring(0, spectrumKey.indexOf(SPECTRUM_KEY_SPLITTER)); + } + + /** + * Returns the spectrum title from a legacy string spectrum key. + * + * @param spectrumKey the spectrum key + * + * @return the spectrum title + */ + public static String getSpectrumTitle(String spectrumKey) { + return spectrumKey.substring(spectrumKey.indexOf(SPECTRUM_KEY_SPLITTER) + SPECTRUM_KEY_SPLITTER.length()); + } + /** * Returns the peak list as an array list formatted as text, e.g. * [[303.17334 3181.14],[318.14542 37971.93], ... ]. @@ -144,6 +183,74 @@ public Precursor getPrecursor() { } + /** + * Returns the m/z values as an array. + * + * @return the m/z values as an array + */ + public double[] getMzValuesAsArray() { + return mz; + } + + /** + * Returns the intensity values as an array. + * + * @return the intensity values as an array + */ + public double[] getIntensityValuesAsArray() { + return intensity; + } + + /** + * Returns the intensity values normalized to 100. + * + * @return the normalized intensity values + */ + public double[] getIntensityValuesNormalizedAsArray() { + + double maxIntensity = getMaxIntensity(); + double[] result = new double[intensity.length]; + + if (maxIntensity <= 0.0) { + return result; + } + + for (int i = 0; i < intensity.length; i++) { + result[i] = intensity[i] / maxIntensity * 100.0; + } + + return result; + } + + /** + * Returns the spectrum in MGF text format. + * + * @return the spectrum in MGF text format + */ + public String asMgf() { + + StringBuilder mgf = new StringBuilder(); + mgf.append("BEGIN IONS").append(System.lineSeparator()); + + if (precursor != null) { + mgf.append("PEPMASS=").append(precursor.mz).append(System.lineSeparator()); + if (precursor.possibleCharges.length > 0) { + mgf.append("CHARGE=").append(precursor.getPossibleChargesAsString()).append(System.lineSeparator()); + } + if (!Double.isNaN(precursor.rt)) { + mgf.append("RTINSECONDS=").append(precursor.rt).append(System.lineSeparator()); + } + } + + for (int i = 0; i < mz.length; i++) { + mgf.append(mz[i]).append(' ').append(intensity[i]).append(System.lineSeparator()); + } + + mgf.append("END IONS").append(System.lineSeparator()); + + return mgf.toString(); + } + /** * Returns the number of peaks. * diff --git a/src/main/java/com/compomics/util/gui/spectrum/SpectrumPanel.java b/src/main/java/com/compomics/util/gui/spectrum/SpectrumPanel.java index 036e8381b0..4d3899980d 100644 --- a/src/main/java/com/compomics/util/gui/spectrum/SpectrumPanel.java +++ b/src/main/java/com/compomics/util/gui/spectrum/SpectrumPanel.java @@ -1672,6 +1672,39 @@ public void addAutomaticDeNovoSequencing( ); } + public void addAutomaticDeNovoSequencing( + Peptide currentPeptide, + ArrayList annotations, + int aForwardIon, + int aRewindIon, + int aDeNovoCharge, + boolean showForwardTags, + boolean showRewindTags, + double forwardIonPercentHeight, + double rewindIonPercentHeight, + ArrayList alphaLevels, + boolean excludeFixedModifications, + boolean mirrored + ) { + addAutomaticDeNovoSequencing( + currentPeptide, + annotations.toArray(new IonMatch[0]), + aForwardIon, + aRewindIon, + aDeNovoCharge, + showForwardTags, + showRewindTags, + forwardIonPercentHeight, + rewindIonPercentHeight, + alphaLevels, + excludeFixedModifications, + mirrored, + new ModificationParameters(), + null, + SequenceMatchingParameters.DEFAULT_STRING_MATCHING + ); + } + /** * Add reference areas annotating the de novo tags, using default alpha * levels of 0.2. Fixed modifications are not annotated. @@ -1850,6 +1883,36 @@ public void addAutomaticDeNovoSequencing( ); } + public void addAutomaticDeNovoSequencing( + Tag tag, + ArrayList annotations, + int aForwardIon, + int aRewindIon, + int aDeNovoCharge, + boolean showForwardTags, + boolean showRewindTags, + double forwardIonPercentHeight, + double rewindIonPercentHeight, + ArrayList alphaLevels, + boolean excludeFixedModifications, + boolean mirrored + ) { + addAutomaticDeNovoSequencing( + tag, + annotations.toArray(new IonMatch[0]), + aForwardIon, + aRewindIon, + aDeNovoCharge, + showForwardTags, + showRewindTags, + forwardIonPercentHeight, + rewindIonPercentHeight, + alphaLevels, + excludeFixedModifications, + mirrored + ); + } + /** * Add reference areas annotating the de novo tags, using default alpha * levels of 0.2. Fixed modifications are not annotated. diff --git a/src/main/java/com/compomics/util/parameters/identification/advanced/SequenceMatchingParameters.java b/src/main/java/com/compomics/util/parameters/identification/advanced/SequenceMatchingParameters.java index b499ebaa2d..103df3662b 100644 --- a/src/main/java/com/compomics/util/parameters/identification/advanced/SequenceMatchingParameters.java +++ b/src/main/java/com/compomics/util/parameters/identification/advanced/SequenceMatchingParameters.java @@ -126,7 +126,9 @@ public static MatchingType getMatchingType(int index) { /** * Default string matching. */ - public static final SequenceMatchingParameters DEFAULT_STRING_MATCHING = getStringMatching(); + public static final SequenceMatchingParameters DEFAULT_STRING_MATCHING = getStringMatching(); + + public static final SequenceMatchingParameters defaultStringMatching = DEFAULT_STRING_MATCHING; /** * Constructor for empty preferences. diff --git a/src/main/java/com/compomics/util/parameters/identification/search/DigestionParameters.java b/src/main/java/com/compomics/util/parameters/identification/search/DigestionParameters.java index 9f9fa9e7a7..a28f9953e4 100644 --- a/src/main/java/com/compomics/util/parameters/identification/search/DigestionParameters.java +++ b/src/main/java/com/compomics/util/parameters/identification/search/DigestionParameters.java @@ -107,6 +107,10 @@ public String toString() { } } + public static DigestionParameters getDefaultPreferences() { + return getDefaultParameters(); + } + /** * Enum for the different types of digestion. */ @@ -193,6 +197,12 @@ public String toString() { } } + public enum CleavagePreference { + enzyme, + unSpecific, + wholeProtein + } + /** * Boolean indicating whether the sample was not digested. */ @@ -417,6 +427,10 @@ public CleavageParameter getCleavageParameter() { } + public CleavagePreference getCleavagePreference() { + return CleavagePreference.valueOf(cleavageParameter.name()); + } + /** * Sets the cleavage parameters. * diff --git a/src/main/java/com/compomics/util/parameters/identification/search/ModificationParameters.java b/src/main/java/com/compomics/util/parameters/identification/search/ModificationParameters.java index cd12bd2989..f9a234e35c 100644 --- a/src/main/java/com/compomics/util/parameters/identification/search/ModificationParameters.java +++ b/src/main/java/com/compomics/util/parameters/identification/search/ModificationParameters.java @@ -3,6 +3,7 @@ import com.compomics.util.experiment.personalization.ExperimentObject; import com.compomics.util.experiment.biology.modifications.Modification; import com.compomics.util.experiment.biology.modifications.ModificationFactory; +import java.awt.Color; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -253,6 +254,10 @@ public void setColor(String expectedModification, int color) { colors.put(expectedModification, color); } + public void setColor(String expectedModification, Color color) { + setColor(expectedModification, color.getRGB()); + } + /** * Returns the color used to code the given modification. * diff --git a/src/main/java/com/compomics/util/parameters/identification/search/SearchParameters.java b/src/main/java/com/compomics/util/parameters/identification/search/SearchParameters.java index 2064ccd550..e9af904105 100644 --- a/src/main/java/com/compomics/util/parameters/identification/search/SearchParameters.java +++ b/src/main/java/com/compomics/util/parameters/identification/search/SearchParameters.java @@ -355,6 +355,10 @@ public void setModificationParameters(ModificationParameters modificationParamet this.modificationParameters = modificationParameters; } + public void setPtmSettings(ModificationParameters modificationParameters) { + setModificationParameters(modificationParameters); + } + /** * Returns the MS2 ion m/z tolerance. * @@ -501,6 +505,12 @@ public double getPrecursorAccuracy() { return precursorTolerance; } + public double getPrecursorAccuracyDalton() { + return precursorAccuracyType == MassAccuracyType.DA + ? precursorTolerance + : precursorTolerance * refMass / 1000000.0; + } + /** * Sets the precursor tolerance. * diff --git a/src/main/java/com/compomics/util/parameters/identification/tool_specific/DirecTagParameters.java b/src/main/java/com/compomics/util/parameters/identification/tool_specific/DirecTagParameters.java index f30999c3c1..4eefcbac19 100644 --- a/src/main/java/com/compomics/util/parameters/identification/tool_specific/DirecTagParameters.java +++ b/src/main/java/com/compomics/util/parameters/identification/tool_specific/DirecTagParameters.java @@ -559,9 +559,13 @@ public void setPrecursorAdjustmentStep(double PrecursorAdjustmentStep) { * * @param variableModifications list of the names of the searched variable modifications */ - public void setModifications(ArrayList variableModifications) { - this.variableModifications = variableModifications; - } + public void setModifications(ArrayList variableModifications) { + this.variableModifications = variableModifications; + } + + public void setPtms(ArrayList variableModifications) { + setModifications(variableModifications); + } /** * Returns the name of the modification indexed by the given index. From 0ee35ae492fdfc42d99b09757b6a186565d1b63f Mon Sep 17 00:00:00 2001 From: Jeroen Van Goey Date: Tue, 23 Jun 2026 02:01:11 +0200 Subject: [PATCH 10/12] Resolve positional InstaNovo spectrum ids --- .../InstaNovoCsvIdfileReader.java | 45 +++++++++++++++++++ .../TestInstaNovoIdfileReader.java | 25 +++++++++++ 2 files changed, 70 insertions(+) diff --git a/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoCsvIdfileReader.java b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoCsvIdfileReader.java index 56a614b6c7..881491a4dc 100644 --- a/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoCsvIdfileReader.java +++ b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoCsvIdfileReader.java @@ -306,6 +306,19 @@ private String getSpectrumTitle(SpectrumTitleLookup spectrumTitleLookup, String } } + if (spectrumId != null) { + int separatorIndex = spectrumId.indexOf(':'); + + if (separatorIndex >= 0 && separatorIndex < spectrumId.length() - 1) { + + title = spectrumTitleLookup.getTitleAtIndex(spectrumId.substring(separatorIndex + 1)); + + if (title != null) { + return title; + } + } + } + throw new IllegalArgumentException("Unable to match InstaNovo spectrum id '" + spectrumId + "' to a spectrum title in file '" + spectrumFileName + "'."); } @@ -602,6 +615,10 @@ private static class SpectrumTitleLookup { * Titles indexed by scan or index number tokens parsed from the title. */ private final HashMap titleByNumber = new HashMap<>(); + /** + * Titles in spectrum file order. + */ + private final String[] orderedTitles; /** * Constructor. @@ -617,6 +634,8 @@ private SpectrumTitleLookup(SpectrumProvider spectrumProvider, String spectrumFi throw new IllegalArgumentException("No spectra found for file '" + spectrumFileName + "'."); } + orderedTitles = spectrumTitles; + for (String title : spectrumTitles) { addTitle(title); } @@ -707,6 +726,32 @@ private String getTitleForNumber(String candidate) { return normalizedNumber == null ? null : titleByNumber.get(normalizedNumber); } + /** + * Returns a title by zero-based spectrum position. + * + * @param candidate the candidate index + * + * @return the title, or null if not found + */ + private String getTitleAtIndex(String candidate) { + + String normalizedNumber = normalizeNumber(candidate); + + if (normalizedNumber == null) { + return null; + } + + int index; + + try { + index = Integer.parseInt(normalizedNumber); + } catch (NumberFormatException e) { + return null; + } + + return index >= 0 && index < orderedTitles.length ? orderedTitles[index] : null; + } + /** * Normalizes a positive integer string. * diff --git a/src/test/java/com/compomics/util/test/experiment/io/identifications/TestInstaNovoIdfileReader.java b/src/test/java/com/compomics/util/test/experiment/io/identifications/TestInstaNovoIdfileReader.java index b14f489d8e..24f3df07fd 100644 --- a/src/test/java/com/compomics/util/test/experiment/io/identifications/TestInstaNovoIdfileReader.java +++ b/src/test/java/com/compomics/util/test/experiment/io/identifications/TestInstaNovoIdfileReader.java @@ -167,6 +167,31 @@ public void testSpectrumTitleLookupWithRealisticTitles() throws Exception { Assert.assertEquals("controllerType=0 controllerNumber=1 scan=1", spectrumMatches.get(0).getSpectrumTitle()); } + /** + * Tests matching InstaNovo positional spectrum ids to descriptive MGF + * titles. + * + * @throws Exception if an exception occurs + */ + public void testSpectrumTitleLookupWithPositionalSpectrumId() throws Exception { + + File csvFile = writeCsv( + "positional-titles.instanovo.csv", + "experiment_name,scan_number,spectrum_id,precursor_mz,precursor_charge,prediction_id,predictions,log_probs\n" + + "example,0,example:0,419.314971923828,2,0,PEPTIDE,-1.0\n" + ); + + IdfileReader idfileReader = new InstaNovoIdfileReader(csvFile); + SimpleSpectrumProvider spectrumProvider = new SimpleSpectrumProvider( + new String[]{"example"}, + new String[]{"Cmpd 3543, +MSn(450.6095), 22.5 min", "Cmpd 3544, +MSn(697.8400), 22.5 min"} + ); + ArrayList spectrumMatches = idfileReader.getAllSpectrumMatches(spectrumProvider, null, new SearchParameters()); + + Assert.assertEquals(1, spectrumMatches.size()); + Assert.assertEquals("Cmpd 3543, +MSn(450.6095), 22.5 min", spectrumMatches.get(0).getSpectrumTitle()); + } + /** * Tests charge parsing robustness. * From 7fdb9768b150ef468743f4f7ee60b5995a4a9e3a Mon Sep 17 00:00:00 2001 From: Jeroen Van Goey Date: Tue, 23 Jun 2026 02:07:59 +0200 Subject: [PATCH 11/12] Estimate InstaNovo peptide masses --- .../idfilereaders/InstaNovoCsvIdfileReader.java | 5 +++++ .../io/identifications/TestInstaNovoIdfileReader.java | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoCsvIdfileReader.java b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoCsvIdfileReader.java index 881491a4dc..2d754bb779 100644 --- a/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoCsvIdfileReader.java +++ b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoCsvIdfileReader.java @@ -166,6 +166,11 @@ public ArrayList getAllSpectrumMatches( ParsedPeptide parsedPeptide = parsePeptide(prediction, lineNumber); Peptide peptide = new Peptide(parsedPeptide.sequence, parsedPeptide.modificationMatches); + peptide.estimateTheoreticMass( + searchParameters.getModificationParameters(), + null, + SequenceMatchingParameters.DEFAULT_STRING_MATCHING + ); PeptideAssumption peptideAssumption = new PeptideAssumption( peptide, 1, diff --git a/src/test/java/com/compomics/util/test/experiment/io/identifications/TestInstaNovoIdfileReader.java b/src/test/java/com/compomics/util/test/experiment/io/identifications/TestInstaNovoIdfileReader.java index 24f3df07fd..7c684aa5e9 100644 --- a/src/test/java/com/compomics/util/test/experiment/io/identifications/TestInstaNovoIdfileReader.java +++ b/src/test/java/com/compomics/util/test/experiment/io/identifications/TestInstaNovoIdfileReader.java @@ -301,6 +301,8 @@ private void assertReader( PeptideAssumption peptideAssumption = assumptions.firstEntry().getValue().get(0); Assert.assertEquals("DMNSPK", peptideAssumption.getPeptide().getSequence()); Assert.assertEquals(2, peptideAssumption.getPeptide().getVariableModifications().length); + Assert.assertTrue(peptideAssumption.getPeptide().getMass() > 0.0); + Assert.assertTrue(peptideAssumption.getTheoreticMz() > 0.0); Assert.assertTrue(idfileReader.getSoftwareVersions().containsKey(Advocate.getAdvocate(advocateIndex).getName())); if (advocateIndex == Advocate.instanovoRefined.getIndex()) { @@ -344,6 +346,8 @@ private void assertSampleReader( Assert.assertEquals(expectedSequence, peptideAssumption.getPeptide().getSequence()); Assert.assertEquals(expectedVariableModifications, peptideAssumption.getPeptide().getVariableModifications().length); + Assert.assertTrue(peptideAssumption.getPeptide().getMass() > 0.0); + Assert.assertTrue(peptideAssumption.getTheoreticMz() > 0.0); Assert.assertTrue(idfileReader.getSoftwareVersions().containsKey(Advocate.getAdvocate(advocateIndex).getName())); if (advocateIndex == Advocate.instanovoRefined.getIndex()) { From 65e901ae03d190d2a2b7f3c3ea92e9a09229e464 Mon Sep 17 00:00:00 2001 From: Jeroen Van Goey Date: Tue, 23 Jun 2026 04:13:06 +0200 Subject: [PATCH 12/12] Preserve InstaNovo log probability scores --- .../idfilereaders/InstaNovoCsvIdfileReader.java | 2 +- .../identifications/TestInstaNovoIdfileReader.java | 14 ++++++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoCsvIdfileReader.java b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoCsvIdfileReader.java index 2d754bb779..0658e13db1 100644 --- a/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoCsvIdfileReader.java +++ b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoCsvIdfileReader.java @@ -162,7 +162,7 @@ public ArrayList getAllSpectrumMatches( String spectrumTitle = getSpectrumTitle(spectrumTitleLookup, spectrumFileName, spectrumId, scanNumber); double logProbability = Util.readDoubleAsString(getValue(values, scoreIndex)); - double score = -logProbability; + double score = logProbability; ParsedPeptide parsedPeptide = parsePeptide(prediction, lineNumber); Peptide peptide = new Peptide(parsedPeptide.sequence, parsedPeptide.modificationMatches); diff --git a/src/test/java/com/compomics/util/test/experiment/io/identifications/TestInstaNovoIdfileReader.java b/src/test/java/com/compomics/util/test/experiment/io/identifications/TestInstaNovoIdfileReader.java index 7c684aa5e9..5d3f0f03c3 100644 --- a/src/test/java/com/compomics/util/test/experiment/io/identifications/TestInstaNovoIdfileReader.java +++ b/src/test/java/com/compomics/util/test/experiment/io/identifications/TestInstaNovoIdfileReader.java @@ -124,21 +124,24 @@ public void testInstaNovoVersion122SampleRows() throws Exception { new InstaNovoIdfileReader(writeCsv("sample.instanovo.csv", INSTANOVO_V1_2_2)), Advocate.instanovo.getIndex(), "DMNSPK", - 2 + 2, + -1147.98681640625 ); assertSampleReader( new InstaNovoPlusIdfileReader(writeCsv("sample.instanovoplus.csv", INSTANOVOPLUS_V1_2_2)), Advocate.instanovoPlus.getIndex(), "MCIPDQPMEVDNEDDAPLPPPEAR", - 2 + 2, + -3.6934256553649902 ); assertSampleReader( new InstaNovoRefinedIdfileReader(writeCsv("sample.instanovo.refined.csv", INSTANOVO_COMBINED_V1_2_2)), Advocate.instanovoRefined.getIndex(), "LIRPLLK", - 0 + 0, + -0.6334811449050903 ); } @@ -326,7 +329,8 @@ private void assertSampleReader( IdfileReader idfileReader, int advocateIndex, String expectedSequence, - int expectedVariableModifications + int expectedVariableModifications, + double expectedLogProbability ) throws Exception { ArrayList spectrumMatches = idfileReader.getAllSpectrumMatches(new SimpleSpectrumProvider(), null, new SearchParameters()); @@ -346,6 +350,8 @@ private void assertSampleReader( Assert.assertEquals(expectedSequence, peptideAssumption.getPeptide().getSequence()); Assert.assertEquals(expectedVariableModifications, peptideAssumption.getPeptide().getVariableModifications().length); + Assert.assertEquals(expectedLogProbability, peptideAssumption.getRawScore(), 0.0); + Assert.assertEquals(expectedLogProbability, peptideAssumption.getScore(), 0.0); Assert.assertTrue(peptideAssumption.getPeptide().getMass() > 0.0); Assert.assertTrue(peptideAssumption.getTheoreticMz() > 0.0); Assert.assertTrue(idfileReader.getSoftwareVersions().containsKey(Advocate.getAdvocate(advocateIndex).getName()));