diff --git a/pom.xml b/pom.xml index 290e12a5c..7ad512b49 100644 --- a/pom.xml +++ b/pom.xml @@ -444,7 +444,7 @@ org.apache.logging.log4j log4j-api - 2.23.1 + 2.25.4 diff --git a/src/main/java/com/compomics/util/exceptions/ExceptionHandler.java b/src/main/java/com/compomics/util/exceptions/ExceptionHandler.java index 783c21e9c..de6f589cf 100644 --- a/src/main/java/com/compomics/util/exceptions/ExceptionHandler.java +++ b/src/main/java/com/compomics/util/exceptions/ExceptionHandler.java @@ -32,14 +32,53 @@ public ExceptionHandler() { public synchronized void catchException(Exception e) { if (!ignoreExceptions && !exceptionCaught.contains(getExceptionType(e))) { - + e.printStackTrace(); exceptionCaught.add(getExceptionType(e)); + + // @TODO: remove once the underlying Nimbus look and feel bug is fixed. + // On recent JDKs the Nimbus look and feel can throw a benign + // ClassCastException ("ColorUIResource cannot be cast to Boolean" in + // NimbusStyle.isOpaque) while building chart popup menus. It does not + // affect functionality, so it is logged above but not shown to the user. + if (isBenignLookAndFeelException(e)) { + return; + } + notifyUser(e); - + } } + /** + * Indicates whether the given exception is the known benign look and feel + * ClassCastException thrown while rendering (e.g. "ColorUIResource cannot be + * cast to Boolean" in NimbusStyle). Such exceptions do not affect + * functionality and should not be reported to the user. + * + * @param e the exception to inspect + * + * @return true if the exception is a benign look and feel rendering exception + */ + private static boolean isBenignLookAndFeelException(Exception e) { + + if (!(e instanceof ClassCastException)) { + return false; + } + + for (StackTraceElement element : e.getStackTrace()) { + + String className = element.getClassName(); + + if (className.startsWith("javax.swing.plaf.nimbus.") + || className.startsWith("javax.swing.plaf.synth.")) { + return true; + } + } + + return false; + } + /** * Notifies the user that an exception was caught. * diff --git a/src/main/java/com/compomics/util/experiment/identification/Advocate.java b/src/main/java/com/compomics/util/experiment/identification/Advocate.java index 778afdedb..7c306105f 100644 --- a/src/main/java/com/compomics/util/experiment/identification/Advocate.java +++ b/src/main/java/com/compomics/util/experiment/identification/Advocate.java @@ -182,6 +182,18 @@ public enum AdvocateType { * The MSFragger search engine. */ public static final Advocate msFragger = new Advocate(37, "MSFragger", AdvocateType.search_engine, new java.awt.Color(128, 128, 0)); + /** + * The InstaNovo de novo sequencing algorithm. + */ + public static final Advocate instanovo = new Advocate(38, "InstaNovo", AdvocateType.sequencing_algorithm, new Color(95, 158, 160)); + /** + * The InstaNovo+ de novo sequencing algorithm. + */ + public static final Advocate instanovoPlus = new Advocate(39, "InstaNovo+", AdvocateType.sequencing_algorithm, new Color(123, 104, 238)); + /** + * The InstaNovo predictions refined with InstaNovo+ de novo sequencing algorithm. + */ + public static final Advocate instanovoRefined = new Advocate(40, "InstaNovo with refinement", AdvocateType.sequencing_algorithm, new Color(72, 209, 204)); /** * Advocate type for mzId files where no software is annotated. */ @@ -311,7 +323,7 @@ public String toString() { * @return the implemented advocates in an array */ public static Advocate[] values() { - Advocate[] result = new Advocate[40 + userAdvocates.size()]; + Advocate[] result = new Advocate[43 + userAdvocates.size()]; int i = 0; result[i] = peptideShaker; result[++i] = onyaseEngine; @@ -353,6 +365,9 @@ public static Advocate[] values() { result[++i] = coss; result[++i] = sage; result[++i] = msFragger; + result[++i] = instanovo; + result[++i] = instanovoPlus; + result[++i] = instanovoRefined; for (Advocate advocate : userAdvocates.values()) { result[++i] = advocate; @@ -489,6 +504,8 @@ public String getPmid() { return "37819886"; } else if (this == msFragger) { return "28394336"; + } else if (this == instanovo || this == instanovoPlus || this == instanovoRefined) { + return null; } else { return null; } diff --git a/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoCsvIdfileReader.java b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoCsvIdfileReader.java new file mode 100644 index 000000000..2d754bb77 --- /dev/null +++ b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoCsvIdfileReader.java @@ -0,0 +1,846 @@ +package com.compomics.util.experiment.io.identification.idfilereaders; + +import com.compomics.util.Util; +import com.compomics.util.experiment.biology.proteins.Peptide; +import com.compomics.util.experiment.identification.Advocate; +import com.compomics.util.experiment.identification.matches.ModificationMatch; +import com.compomics.util.experiment.identification.matches.SpectrumMatch; +import com.compomics.util.experiment.identification.spectrum_assumptions.PeptideAssumption; +import com.compomics.util.experiment.io.identification.IdfileReader; +import com.compomics.util.experiment.mass_spectrometry.SpectrumProvider; +import com.compomics.util.io.IoUtil; +import com.compomics.util.io.flat.SimpleFileReader; +import com.compomics.util.parameters.identification.advanced.SequenceMatchingParameters; +import com.compomics.util.parameters.identification.search.SearchParameters; +import com.compomics.util.waiting.WaitingHandler; +import java.io.File; +import java.io.IOException; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import javax.xml.bind.JAXBException; + +/** + * Shared parser for InstaNovo v1.2.2 normalized CSV predictions. + * + * @author CompOmics + */ +abstract class InstaNovoCsvIdfileReader implements IdfileReader { + + /** + * The supported InstaNovo version. + */ + private static final String SOFTWARE_VERSION = "1.2.2"; + /** + * Pattern matching common scan or index tokens in spectrum titles. + */ + private static final Pattern TITLE_NUMBER_PATTERN = Pattern.compile("(?i)(?:scan|index|scan_number)\\s*[=: ]\\s*(\\d+)"); + /** + * The CSV file. + */ + private final File csvFile; + /** + * The advocate used for peptide assumptions. + */ + private final Advocate advocate; + /** + * The extension this reader is registered for. + */ + private final String extension; + + /** + * Constructor. + * + * @param csvFile the CSV file + * @param advocate the advocate + * @param extension the registered extension + */ + protected InstaNovoCsvIdfileReader(File csvFile, Advocate advocate, String extension) { + this.csvFile = csvFile; + this.advocate = advocate; + this.extension = extension; + } + + @Override + public String getExtension() { + return extension; + } + + @Override + public ArrayList getAllSpectrumMatches( + SpectrumProvider spectrumProvider, + WaitingHandler waitingHandler, + SearchParameters searchParameters + ) + throws IOException, IllegalArgumentException, SQLException, ClassNotFoundException, InterruptedException, JAXBException { + + return getAllSpectrumMatches( + spectrumProvider, + waitingHandler, + searchParameters, + null, + true + ); + } + + @Override + public ArrayList getAllSpectrumMatches( + SpectrumProvider spectrumProvider, + WaitingHandler waitingHandler, + SearchParameters searchParameters, + SequenceMatchingParameters sequenceMatchingPreferences, + boolean expandAaCombinations + ) + throws IOException, IllegalArgumentException, SQLException, ClassNotFoundException, InterruptedException, JAXBException { + + if (spectrumProvider == null) { + throw new IllegalArgumentException("A spectrum provider is required to import InstaNovo results."); + } + + ArrayList result = new ArrayList<>(); + HashMap matches = new HashMap<>(); + HashMap spectrumTitleLookups = new HashMap<>(); + + try (SimpleFileReader reader = SimpleFileReader.getFileReader(csvFile)) { + + String line = reader.readLine(); + + if (line == null) { + throw new IllegalArgumentException("The InstaNovo csv file is empty."); + } + + ArrayList headers = parseCsvLine(line); + HashMap columnIndexes = getColumnIndexes(headers); + + int experimentIndex = getOptionalColumn(columnIndexes, "experiment_name"); + int spectrumIdIndex = getOptionalColumn(columnIndexes, "spectrum_id", "spectrum"); + int scanNumberIndex = getOptionalColumn(columnIndexes, "scan_number", "scan"); + int chargeIndex = getRequiredColumn(columnIndexes, "precursor_charge", "charge", "z"); + int predictionIndex = getRequiredColumn(columnIndexes, "predictions", "prediction", "sequence"); + int scoreIndex = getRequiredColumn(columnIndexes, "log_probs", "prediction_log_probability", "predictions_log_probability"); + + if (experimentIndex < 0 && spectrumIdIndex < 0 && scanNumberIndex < 0) { + throw new IllegalArgumentException("Mandatory spectrum identification columns are missing in the InstaNovo csv file."); + } + + int lineNumber = 1; + while ((line = reader.readLine()) != null) { + + lineNumber++; + + if (line.trim().isEmpty()) { + continue; + } + + ArrayList values = parseCsvLine(line); + + String prediction = getValue(values, predictionIndex).trim(); + + if (prediction.isEmpty()) { + continue; + } + + String experimentName = experimentIndex >= 0 ? getValue(values, experimentIndex).trim() : ""; + String spectrumId = spectrumIdIndex >= 0 ? getValue(values, spectrumIdIndex).trim() : ""; + String scanNumber = scanNumberIndex >= 0 ? getValue(values, scanNumberIndex).trim() : ""; + Integer charge = getCharge(getValue(values, chargeIndex), lineNumber, waitingHandler); + + if (charge == null) { + continue; + } + + String spectrumFileName = getSpectrumFileName(spectrumProvider, experimentName, spectrumId); + SpectrumTitleLookup spectrumTitleLookup = spectrumTitleLookups.get(spectrumFileName); + + if (spectrumTitleLookup == null) { + spectrumTitleLookup = new SpectrumTitleLookup(spectrumProvider, spectrumFileName); + spectrumTitleLookups.put(spectrumFileName, spectrumTitleLookup); + } + + String spectrumTitle = getSpectrumTitle(spectrumTitleLookup, spectrumFileName, spectrumId, scanNumber); + + double logProbability = Util.readDoubleAsString(getValue(values, scoreIndex)); + double score = -logProbability; + + ParsedPeptide parsedPeptide = parsePeptide(prediction, lineNumber); + Peptide peptide = new Peptide(parsedPeptide.sequence, parsedPeptide.modificationMatches); + peptide.estimateTheoreticMass( + searchParameters.getModificationParameters(), + null, + SequenceMatchingParameters.DEFAULT_STRING_MATCHING + ); + PeptideAssumption peptideAssumption = new PeptideAssumption( + peptide, + 1, + advocate.getIndex(), + charge, + logProbability, + score, + IoUtil.getFileName(csvFile) + ); + + String matchKey = spectrumFileName + "\n" + spectrumTitle; + SpectrumMatch spectrumMatch = matches.get(matchKey); + + if (spectrumMatch == null) { + spectrumMatch = new SpectrumMatch(spectrumFileName, spectrumTitle); + matches.put(matchKey, spectrumMatch); + result.add(spectrumMatch); + } + + spectrumMatch.addPeptideAssumption(advocate.getIndex(), peptideAssumption); + } + } + + return result; + } + + @Override + public void close() throws IOException { + // Nothing to close. + } + + @Override + public HashMap> getSoftwareVersions() { + + HashMap> result = new HashMap<>(); + ArrayList versions = new ArrayList<>(); + versions.add(SOFTWARE_VERSION); + result.put(advocate.getName(), versions); + + if (advocate == Advocate.instanovoRefined) { + + ArrayList instaNovoVersions = new ArrayList<>(); + instaNovoVersions.add(SOFTWARE_VERSION); + result.put(Advocate.instanovo.getName(), instaNovoVersions); + + ArrayList instaNovoPlusVersions = new ArrayList<>(); + instaNovoPlusVersions.add(SOFTWARE_VERSION); + result.put(Advocate.instanovoPlus.getName(), instaNovoPlusVersions); + } + + return result; + } + + @Override + public boolean hasDeNovoTags() { + return false; + } + + /** + * Returns the spectrum file name without extension. + * + * @param spectrumProvider the spectrum provider + * @param experimentName the experiment name + * @param spectrumId the spectrum id + * + * @return the spectrum file name without extension + */ + private String getSpectrumFileName(SpectrumProvider spectrumProvider, String experimentName, String spectrumId) { + + String fileName = experimentName; + + if (fileName == null || fileName.isEmpty()) { + int separatorIndex = spectrumId.indexOf(':'); + if (separatorIndex > 0) { + fileName = spectrumId.substring(0, separatorIndex); + } + } + + if (fileName == null || fileName.isEmpty()) { + + String[] fileNames = spectrumProvider.getOrderedFileNamesWithoutExtensions(); + + if (fileNames != null && fileNames.length == 1) { + fileName = fileNames[0]; + } + } + + if (fileName == null || fileName.isEmpty()) { + throw new IllegalArgumentException("Unable to infer the spectrum file name from the InstaNovo csv file."); + } + + return IoUtil.removeExtension(fileName); + } + + /** + * Resolves the spectrum title. + * + * @param spectrumProvider the spectrum provider + * @param spectrumFileName the spectrum file name without extension + * @param spectrumId the spectrum id + * @param scanNumber the scan number + * + * @return the spectrum title + */ + private String getSpectrumTitle(SpectrumTitleLookup spectrumTitleLookup, String spectrumFileName, String spectrumId, String scanNumber) { + + String title = spectrumTitleLookup.getTitle(spectrumId); + + if (title != null) { + return title; + } + + if (spectrumId != null) { + int separatorIndex = spectrumId.indexOf(':'); + + if (separatorIndex >= 0 && separatorIndex < spectrumId.length() - 1) { + + title = spectrumTitleLookup.getTitle(spectrumId.substring(separatorIndex + 1)); + + if (title != null) { + return title; + } + } + } + + if (scanNumber != null && !scanNumber.isEmpty()) { + + title = spectrumTitleLookup.getTitle(scanNumber); + + if (title != null) { + return title; + } + + title = spectrumTitleLookup.getTitleForNumber(scanNumber); + + if (title != null) { + return title; + } + } + + if (spectrumId != null) { + int separatorIndex = spectrumId.indexOf(':'); + + if (separatorIndex >= 0 && separatorIndex < spectrumId.length() - 1) { + + title = spectrumTitleLookup.getTitleAtIndex(spectrumId.substring(separatorIndex + 1)); + + if (title != null) { + return title; + } + } + } + + throw new IllegalArgumentException("Unable to match InstaNovo spectrum id '" + spectrumId + "' to a spectrum title in file '" + spectrumFileName + "'."); + } + + /** + * Returns the precursor charge. + * + * @param value the charge column value + * @param lineNumber the line number + * @param waitingHandler the waiting handler + * + * @return the charge, or null if the row should be skipped + */ + private Integer getCharge(String value, int lineNumber, WaitingHandler waitingHandler) { + + String charge = value == null ? "" : value.trim(); + + try { + return Integer.parseInt(charge); + } catch (NumberFormatException e) { + + if (waitingHandler != null) { + waitingHandler.appendReport( + "Skipping InstaNovo csv line " + lineNumber + ": invalid precursor charge '" + charge + "'.", + true, + true + ); + } + + return null; + } + } + + /** + * Parses a peptide sequence with optional UniMod annotations. + * + * @param prediction the prediction + * @param lineNumber the line number + * + * @return the parsed peptide + */ + private ParsedPeptide parsePeptide(String prediction, int lineNumber) { + + StringBuilder sequence = new StringBuilder(); + ArrayList modifications = new ArrayList<>(); + int lastResidueSite = 0; + + for (int i = 0; i < prediction.length(); i++) { + + char currentChar = prediction.charAt(i); + + if (currentChar == '[') { + + int endIndex = prediction.indexOf(']', i); + + if (endIndex < 0) { + throw new IllegalArgumentException("Invalid UniMod annotation in InstaNovo csv file at line " + lineNumber + "."); + } + + String annotation = prediction.substring(i + 1, endIndex); + Character previousResidue = lastResidueSite > 0 ? sequence.charAt(lastResidueSite - 1) : null; + Character nextResidue = previousResidue == null ? getNextResidue(prediction, endIndex + 1) : null; + UtilitiesModification modification = getUtilitiesModification(annotation, previousResidue, nextResidue, lastResidueSite); + + if (modification != null) { + modifications.add(new ModificationMatch(modification.name, modification.site)); + } + + i = endIndex; + + } else if (Character.isLetter(currentChar)) { + + sequence.append(Character.toUpperCase(currentChar)); + lastResidueSite = sequence.length(); + } + } + + if (sequence.length() == 0) { + throw new IllegalArgumentException("No peptide sequence found in InstaNovo csv file at line " + lineNumber + "."); + } + + return new ParsedPeptide(sequence.toString(), modifications.toArray(new ModificationMatch[modifications.size()])); + } + + /** + * Maps InstaNovo UniMod annotations to Utilities modification names. + * + * @param annotation the annotation + * @param previousResidue the preceding residue, null for N-terminal + * annotations + * @param nextResidue the next residue, null when unavailable + * @param site the preceding residue site + * + * @return the Utilities modification, or null if unsupported + */ + private UtilitiesModification getUtilitiesModification(String annotation, Character previousResidue, Character nextResidue, int site) { + + if (!annotation.toUpperCase().startsWith("UNIMOD:")) { + return null; + } + + String accession = annotation.substring("UNIMOD:".length()); + + if ("1".equals(accession) && previousResidue == null) { + return new UtilitiesModification("Acetylation of peptide N-term", 0); + } else if ("4".equals(accession) && previousResidue != null && previousResidue == 'C') { + return new UtilitiesModification("Carbamidomethylation of C", site); + } else if ("5".equals(accession) && previousResidue == null) { + return new UtilitiesModification("Carbamilation of protein N-term", 0); + } else if ("7".equals(accession) && previousResidue != null) { + if (previousResidue == 'N') { + return new UtilitiesModification("Deamidation of N", site); + } else if (previousResidue == 'Q') { + return new UtilitiesModification("Deamidation of Q", site); + } else if (previousResidue == 'R') { + return new UtilitiesModification("Citrullination of R", site); + } + } else if ("35".equals(accession) && previousResidue != null) { + if (previousResidue == 'M') { + return new UtilitiesModification("Oxidation of M", site); + } else if (previousResidue == 'P') { + return new UtilitiesModification("Oxidation of P", site); + } else if (previousResidue == 'K') { + return new UtilitiesModification("Oxidation of K", site); + } else if (previousResidue == 'C') { + return new UtilitiesModification("Oxidation of C", site); + } else if (previousResidue == 'N') { + return new UtilitiesModification("Oxidation of N", site); + } + } else if ("21".equals(accession) && previousResidue != null) { + if (previousResidue == 'S') { + return new UtilitiesModification("Phosphorylation of S", site); + } else if (previousResidue == 'T') { + return new UtilitiesModification("Phosphorylation of T", site); + } else if (previousResidue == 'Y') { + return new UtilitiesModification("Phosphorylation of Y", site); + } + } else if ("385".equals(accession)) { + if (previousResidue != null && previousResidue == 'N' && site > 0) { + return new UtilitiesModification("Ammonia loss from N", site); + } else if (previousResidue != null && previousResidue == 'C' && site == 1) { + return new UtilitiesModification("Pyrolidone from carbamidomethylated C", site); + } else if (previousResidue == null && nextResidue != null) { + if (nextResidue == 'N') { + return new UtilitiesModification("Ammonia loss from N", 1); + } else if (nextResidue == 'C') { + return new UtilitiesModification("Pyrolidone from carbamidomethylated C", 1); + } + } + } + + return null; + } + + /** + * Returns the next residue in the prediction. + * + * @param prediction the prediction + * @param startIndex the start index + * + * @return the next residue, or null + */ + private Character getNextResidue(String prediction, int startIndex) { + + for (int i = startIndex; i < prediction.length(); i++) { + + char currentChar = prediction.charAt(i); + + if (Character.isLetter(currentChar)) { + return Character.toUpperCase(currentChar); + } + } + + return null; + } + + /** + * Returns a value from a parsed CSV row. + * + * @param values the values + * @param index the index + * + * @return the value + */ + private String getValue(ArrayList values, int index) { + return index < values.size() ? values.get(index) : ""; + } + + /** + * Returns indexes by lowercase header. + * + * @param headers the headers + * + * @return the indexes + */ + private HashMap getColumnIndexes(ArrayList headers) { + + HashMap result = new HashMap<>(); + + for (int i = 0; i < headers.size(); i++) { + result.put(headers.get(i).trim().toLowerCase(), i); + } + + return result; + } + + /** + * Returns an optional column. + * + * @param columnIndexes the column indexes + * @param columnNames the column names + * + * @return the column index, or -1 + */ + private int getOptionalColumn(HashMap columnIndexes, String... columnNames) { + + for (String columnName : columnNames) { + + Integer columnIndex = columnIndexes.get(columnName.toLowerCase()); + + if (columnIndex != null) { + return columnIndex; + } + } + + return -1; + } + + /** + * Returns a required column. + * + * @param columnIndexes the column indexes + * @param columnNames the column names + * + * @return the column index + */ + private int getRequiredColumn(HashMap columnIndexes, String... columnNames) { + + int columnIndex = getOptionalColumn(columnIndexes, columnNames); + + if (columnIndex < 0) { + throw new IllegalArgumentException("Mandatory columns are missing in the InstaNovo csv file."); + } + + return columnIndex; + } + + /** + * Parses a CSV line. + * + * @param line the line + * + * @return the values + */ + private ArrayList parseCsvLine(String line) { + + ArrayList values = new ArrayList<>(); + StringBuilder currentValue = new StringBuilder(); + boolean inQuotes = false; + + for (int i = 0; i < line.length(); i++) { + + char currentChar = line.charAt(i); + + if (currentChar == '"') { + if (inQuotes && i + 1 < line.length() && line.charAt(i + 1) == '"') { + currentValue.append('"'); + i++; + } else { + inQuotes = !inQuotes; + } + } else if (currentChar == ',' && !inQuotes) { + values.add(currentValue.toString()); + currentValue.setLength(0); + } else { + currentValue.append(currentChar); + } + } + + values.add(currentValue.toString()); + + return values; + } + + /** + * Spectrum title lookup cache for one spectrum file. + */ + private static class SpectrumTitleLookup { + + /** + * Titles indexed by exact and lower-case title. + */ + private final HashMap titles = new HashMap<>(); + /** + * Titles indexed by scan or index number tokens parsed from the title. + */ + private final HashMap titleByNumber = new HashMap<>(); + /** + * Titles in spectrum file order. + */ + private final String[] orderedTitles; + + /** + * Constructor. + * + * @param spectrumProvider the spectrum provider + * @param spectrumFileName the spectrum file name without extension + */ + private SpectrumTitleLookup(SpectrumProvider spectrumProvider, String spectrumFileName) { + + String[] spectrumTitles = spectrumProvider.getSpectrumTitles(spectrumFileName); + + if (spectrumTitles == null || spectrumTitles.length == 0) { + throw new IllegalArgumentException("No spectra found for file '" + spectrumFileName + "'."); + } + + orderedTitles = spectrumTitles; + + for (String title : spectrumTitles) { + addTitle(title); + } + } + + /** + * Adds a title. + * + * @param title the title + */ + private void addTitle(String title) { + + if (title == null) { + return; + } + + titles.put(title, title); + titles.put(title.toLowerCase(), title); + + Matcher matcher = TITLE_NUMBER_PATTERN.matcher(title); + + while (matcher.find()) { + addNumber(matcher.group(1), title); + } + } + + /** + * Adds a scan or index number. + * + * @param number the number + * @param title the spectrum title + */ + private void addNumber(String number, String title) { + + String normalizedNumber = normalizeNumber(number); + + if (normalizedNumber == null) { + return; + } + + if (titleByNumber.containsKey(normalizedNumber) + && !title.equals(titleByNumber.get(normalizedNumber))) { + titleByNumber.put(normalizedNumber, null); + } else { + titleByNumber.put(normalizedNumber, title); + } + } + + /** + * Returns a title matching the given title candidate. + * + * @param candidate the candidate + * + * @return the title, or null if not found + */ + private String getTitle(String candidate) { + + if (candidate == null) { + return null; + } + + String trimmedCandidate = candidate.trim(); + + if (trimmedCandidate.isEmpty()) { + return null; + } + + String result = titles.get(trimmedCandidate); + + if (result != null) { + return result; + } + + return titles.get(trimmedCandidate.toLowerCase()); + } + + /** + * Returns a title matching the given scan or index number. + * + * @param candidate the candidate + * + * @return the title, or null if not found + */ + private String getTitleForNumber(String candidate) { + + String normalizedNumber = normalizeNumber(candidate); + + return normalizedNumber == null ? null : titleByNumber.get(normalizedNumber); + } + + /** + * Returns a title by zero-based spectrum position. + * + * @param candidate the candidate index + * + * @return the title, or null if not found + */ + private String getTitleAtIndex(String candidate) { + + String normalizedNumber = normalizeNumber(candidate); + + if (normalizedNumber == null) { + return null; + } + + int index; + + try { + index = Integer.parseInt(normalizedNumber); + } catch (NumberFormatException e) { + return null; + } + + return index >= 0 && index < orderedTitles.length ? orderedTitles[index] : null; + } + + /** + * Normalizes a positive integer string. + * + * @param number the number + * + * @return the normalized number + */ + private String normalizeNumber(String number) { + + if (number == null) { + return null; + } + + String trimmedNumber = number.trim(); + + if (trimmedNumber.isEmpty()) { + return null; + } + + for (int i = 0; i < trimmedNumber.length(); i++) { + if (!Character.isDigit(trimmedNumber.charAt(i))) { + return null; + } + } + + int startIndex = 0; + + while (startIndex < trimmedNumber.length() - 1 && trimmedNumber.charAt(startIndex) == '0') { + startIndex++; + } + + return trimmedNumber.substring(startIndex); + } + } + + /** + * Parsed peptide values. + */ + private static class ParsedPeptide { + + /** + * The bare sequence. + */ + private final String sequence; + /** + * The variable modifications. + */ + private final ModificationMatch[] modificationMatches; + + /** + * Constructor. + * + * @param sequence the sequence + * @param modificationMatches the modification matches + */ + private ParsedPeptide(String sequence, ModificationMatch[] modificationMatches) { + this.sequence = sequence; + this.modificationMatches = modificationMatches; + } + } + + /** + * Utilities modification mapping. + */ + private static class UtilitiesModification { + + /** + * The modification name. + */ + private final String name; + /** + * The modification site. + */ + private final int site; + + /** + * Constructor. + * + * @param name the modification name + * @param site the modification site + */ + private UtilitiesModification(String name, int site) { + this.name = name; + this.site = site; + } + } +} diff --git a/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoIdfileReader.java b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoIdfileReader.java new file mode 100644 index 000000000..13f556e23 --- /dev/null +++ b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoIdfileReader.java @@ -0,0 +1,33 @@ +package com.compomics.util.experiment.io.identification.idfilereaders; + +import com.compomics.util.experiment.identification.Advocate; +import java.io.File; + +/** + * Reader for InstaNovo transformer-only CSV output. + * + * @author CompOmics + */ +public class InstaNovoIdfileReader extends InstaNovoCsvIdfileReader { + + /** + * The supported extension. + */ + public static final String EXTENSION = ".instanovo.csv"; + + /** + * Default constructor for service loading. + */ + public InstaNovoIdfileReader() { + this(null); + } + + /** + * Constructor. + * + * @param csvFile the CSV file + */ + public InstaNovoIdfileReader(File csvFile) { + super(csvFile, Advocate.instanovo, EXTENSION); + } +} diff --git a/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoPlusIdfileReader.java b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoPlusIdfileReader.java new file mode 100644 index 000000000..cca7062c5 --- /dev/null +++ b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoPlusIdfileReader.java @@ -0,0 +1,33 @@ +package com.compomics.util.experiment.io.identification.idfilereaders; + +import com.compomics.util.experiment.identification.Advocate; +import java.io.File; + +/** + * Reader for standalone InstaNovo+ CSV output. + * + * @author CompOmics + */ +public class InstaNovoPlusIdfileReader extends InstaNovoCsvIdfileReader { + + /** + * The supported extension. + */ + public static final String EXTENSION = ".instanovoplus.csv"; + + /** + * Default constructor for service loading. + */ + public InstaNovoPlusIdfileReader() { + this(null); + } + + /** + * Constructor. + * + * @param csvFile the CSV file + */ + public InstaNovoPlusIdfileReader(File csvFile) { + super(csvFile, Advocate.instanovoPlus, EXTENSION); + } +} diff --git a/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoRefinedIdfileReader.java b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoRefinedIdfileReader.java new file mode 100644 index 000000000..d9776bbd2 --- /dev/null +++ b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/InstaNovoRefinedIdfileReader.java @@ -0,0 +1,33 @@ +package com.compomics.util.experiment.io.identification.idfilereaders; + +import com.compomics.util.experiment.identification.Advocate; +import java.io.File; + +/** + * Reader for InstaNovo predictions refined by InstaNovo+. + * + * @author CompOmics + */ +public class InstaNovoRefinedIdfileReader extends InstaNovoCsvIdfileReader { + + /** + * The supported extension. + */ + public static final String EXTENSION = ".instanovo.refined.csv"; + + /** + * Default constructor for service loading. + */ + public InstaNovoRefinedIdfileReader() { + this(null); + } + + /** + * Constructor. + * + * @param csvFile the CSV file + */ + public InstaNovoRefinedIdfileReader(File csvFile) { + super(csvFile, Advocate.instanovoRefined, EXTENSION); + } +} diff --git a/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/package.html b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/package.html index 37c3d14ac..c13031b80 100644 --- a/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/package.html +++ b/src/main/java/com/compomics/util/experiment/io/identification/idfilereaders/package.html @@ -1,5 +1,7 @@ - - - Experiment classes related to reading search engine files. - - + + + Experiment classes related to reading search engine files, including + InstaNovo, InstaNovo+, and InstaNovo with InstaNovo+ refinement CSV + prediction files. + + diff --git a/src/main/java/com/compomics/util/experiment/mass_spectrometry/thermo_raw_file_parser/ThermoRawFileParserOutputFormat.java b/src/main/java/com/compomics/util/experiment/mass_spectrometry/thermo_raw_file_parser/ThermoRawFileParserOutputFormat.java index 50d954e99..d3fe971d8 100644 --- a/src/main/java/com/compomics/util/experiment/mass_spectrometry/thermo_raw_file_parser/ThermoRawFileParserOutputFormat.java +++ b/src/main/java/com/compomics/util/experiment/mass_spectrometry/thermo_raw_file_parser/ThermoRawFileParserOutputFormat.java @@ -14,11 +14,11 @@ public enum ThermoRawFileParserOutputFormat { /** * mzML generic PSI format. */ - mzML(1, "mzML", "mzML generic PSI format", ".mzml"), + mzML(1, "mzML", "mzML generic PSI format", ".mzML"), /** * Indexed mzML generic PSI format. */ - mzMLIndexed(2, "mzML (indexed)", "mzML generic PSI format", ".mzml"); + mzMLIndexed(2, "mzML (indexed)", "mzML generic PSI format", ".mzML"); /** * The index of the format. diff --git a/src/main/java/com/compomics/util/parameters/identification/search/SearchParameters.java b/src/main/java/com/compomics/util/parameters/identification/search/SearchParameters.java index 3241b8073..2064ccd55 100644 --- a/src/main/java/com/compomics/util/parameters/identification/search/SearchParameters.java +++ b/src/main/java/com/compomics/util/parameters/identification/search/SearchParameters.java @@ -26,6 +26,8 @@ import static com.compomics.util.parameters.identification.IdentificationParameters.CURRENT_VERSION; import com.compomics.util.parameters.identification.tool_specific.MetaMorpheusParameters; import com.compomics.util.parameters.identification.tool_specific.SageParameters; +import com.compomics.util.parameters.identification.tool_specific.InstaNovoParameters; +import com.compomics.util.parameters.identification.tool_specific.InstaNovoPlusParameters; import java.io.*; import java.util.ArrayList; import java.util.HashMap; @@ -303,6 +305,18 @@ public void setDefaultAdvancedSettings(SearchParameters searchParameters) { setIdentificationAlgorithmParameter(Advocate.novor.getIndex(), searchParameters.getIdentificationAlgorithmParameter(Advocate.novor.getIndex())); } + if (searchParameters == null || searchParameters.getIdentificationAlgorithmParameter(Advocate.instanovo.getIndex()) == null) { + setIdentificationAlgorithmParameter(Advocate.instanovo.getIndex(), new InstaNovoParameters()); + } else { + setIdentificationAlgorithmParameter(Advocate.instanovo.getIndex(), searchParameters.getIdentificationAlgorithmParameter(Advocate.instanovo.getIndex())); + } + + if (searchParameters == null || searchParameters.getIdentificationAlgorithmParameter(Advocate.instanovoPlus.getIndex()) == null) { + setIdentificationAlgorithmParameter(Advocate.instanovoPlus.getIndex(), new InstaNovoPlusParameters()); + } else { + setIdentificationAlgorithmParameter(Advocate.instanovoPlus.getIndex(), searchParameters.getIdentificationAlgorithmParameter(Advocate.instanovoPlus.getIndex())); + } + } /** diff --git a/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java b/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java new file mode 100644 index 000000000..f341271cc --- /dev/null +++ b/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoParameters.java @@ -0,0 +1,268 @@ +package com.compomics.util.parameters.identification.tool_specific; + +import com.compomics.util.experiment.identification.Advocate; +import com.compomics.util.experiment.personalization.ExperimentObject; +import com.compomics.util.gui.parameters.identification.IdentificationAlgorithmParameter; + +/** + * InstaNovo specific parameters. + * + * @author CompOmics + */ +public class InstaNovoParameters extends ExperimentObject implements IdentificationAlgorithmParameter { + + /** + * Version number for deserialization. + */ + static final long serialVersionUID = -2295564912139753378L; + /** + * Default InstaNovo transformer model identifier used by InstaNovo v1.2.2. + */ + public static final String DEFAULT_INSTANOVO_MODEL = "instanovo-v1.2.0"; + /** + * Default InstaNovo+ diffusion model identifier used by InstaNovo v1.2.2. + */ + public static final String DEFAULT_INSTANOVO_PLUS_MODEL = "instanovoplus-v1.1.0"; + /** + * Default prediction batch size for desktop SearchGUI runs. + */ + public static final int DEFAULT_BATCH_SIZE = 16; + /** + * The selected InstaNovo model id or path. + */ + private String instaNovoModel = DEFAULT_INSTANOVO_MODEL; + /** + * The selected InstaNovo+ model id or path used for refinement. + */ + private String instaNovoPlusModel = DEFAULT_INSTANOVO_PLUS_MODEL; + /** + * The optional inference configuration path. + */ + private String configFile = null; + /** + * The number of beams. + */ + private int numberOfBeams = 5; + /** + * The prediction batch size. + */ + private int batchSize = DEFAULT_BATCH_SIZE; + /** + * Whether to use knapsack beam search. + */ + private boolean useKnapsack = false; + /** + * Whether to save all beam search predictions. + */ + private boolean saveAllPredictions = true; + /** + * Whether to force CPU execution. + */ + private boolean forceCpu = false; + + @Override + public Advocate getAlgorithm() { + return Advocate.instanovo; + } + + @Override + public boolean equals(IdentificationAlgorithmParameter identificationAlgorithmParameter) { + + if (identificationAlgorithmParameter instanceof InstaNovoParameters) { + + InstaNovoParameters other = (InstaNovoParameters) identificationAlgorithmParameter; + + return safeEquals(instaNovoModel, other.getInstaNovoModel()) + && safeEquals(instaNovoPlusModel, other.getInstaNovoPlusModel()) + && safeEquals(configFile, other.getConfigFile()) + && numberOfBeams == other.getNumberOfBeams() + && getBatchSize() == other.getBatchSize() + && useKnapsack == other.isUseKnapsack() + && saveAllPredictions == other.isSaveAllPredictions() + && forceCpu == other.isForceCpu(); + } + + return false; + } + + @Override + public String toString(boolean html) { + + String newLine = html ? "
" : System.getProperty("line.separator"); + StringBuilder output = new StringBuilder(); + Advocate advocate = getAlgorithm(); + output.append("# ------------------------------------------------------------------"); + output.append(newLine); + output.append("# ").append(advocate.getName()).append(" Specific Parameters"); + output.append(newLine); + output.append("# ------------------------------------------------------------------"); + output.append(newLine); + output.append(newLine); + output.append("INSTANOVO_MODEL=").append(instaNovoModel).append(newLine); + output.append("INSTANOVO_PLUS_MODEL=").append(instaNovoPlusModel).append(newLine); + output.append("CONFIG_FILE=").append(configFile == null ? "" : configFile).append(newLine); + output.append("NUMBER_OF_BEAMS=").append(numberOfBeams).append(newLine); + output.append("BATCH_SIZE=").append(getBatchSize()).append(newLine); + output.append("USE_KNAPSACK=").append(useKnapsack).append(newLine); + output.append("SAVE_ALL_PREDICTIONS=").append(saveAllPredictions).append(newLine); + output.append("FORCE_CPU=").append(forceCpu).append(newLine); + + return output.toString(); + } + + /** + * Returns the selected InstaNovo model. + * + * @return the selected InstaNovo model + */ + public String getInstaNovoModel() { + return instaNovoModel; + } + + /** + * Sets the selected InstaNovo model. + * + * @param instaNovoModel the selected InstaNovo model + */ + public void setInstaNovoModel(String instaNovoModel) { + this.instaNovoModel = instaNovoModel; + } + + /** + * Returns the selected InstaNovo+ model. + * + * @return the selected InstaNovo+ model + */ + public String getInstaNovoPlusModel() { + return instaNovoPlusModel; + } + + /** + * Sets the selected InstaNovo+ model. + * + * @param instaNovoPlusModel the selected InstaNovo+ model + */ + public void setInstaNovoPlusModel(String instaNovoPlusModel) { + this.instaNovoPlusModel = instaNovoPlusModel; + } + + /** + * Returns the optional configuration file. + * + * @return the optional configuration file + */ + public String getConfigFile() { + return configFile; + } + + /** + * Sets the optional configuration file. + * + * @param configFile the optional configuration file + */ + public void setConfigFile(String configFile) { + this.configFile = configFile; + } + + /** + * Returns the number of beams. + * + * @return the number of beams + */ + public int getNumberOfBeams() { + return numberOfBeams; + } + + /** + * Sets the number of beams. + * + * @param numberOfBeams the number of beams + */ + public void setNumberOfBeams(int numberOfBeams) { + this.numberOfBeams = numberOfBeams; + } + + /** + * Returns the batch size. + * + * @return the batch size + */ + public int getBatchSize() { + return batchSize > 0 ? batchSize : DEFAULT_BATCH_SIZE; + } + + /** + * Sets the batch size. + * + * @param batchSize the batch size + */ + public void setBatchSize(int batchSize) { + this.batchSize = batchSize > 0 ? batchSize : DEFAULT_BATCH_SIZE; + } + + /** + * Returns whether knapsack beam search is used. + * + * @return whether knapsack beam search is used + */ + public boolean isUseKnapsack() { + return useKnapsack; + } + + /** + * Sets whether knapsack beam search is used. + * + * @param useKnapsack whether knapsack beam search is used + */ + public void setUseKnapsack(boolean useKnapsack) { + this.useKnapsack = useKnapsack; + } + + /** + * Returns whether all beam search predictions are saved. + * + * @return whether all beam search predictions are saved + */ + public boolean isSaveAllPredictions() { + return saveAllPredictions; + } + + /** + * Sets whether all beam search predictions are saved. + * + * @param saveAllPredictions whether all beam search predictions are saved + */ + public void setSaveAllPredictions(boolean saveAllPredictions) { + this.saveAllPredictions = saveAllPredictions; + } + + /** + * Returns whether CPU execution is forced. + * + * @return whether CPU execution is forced + */ + public boolean isForceCpu() { + return forceCpu; + } + + /** + * Sets whether CPU execution is forced. + * + * @param forceCpu whether CPU execution is forced + */ + public void setForceCpu(boolean forceCpu) { + this.forceCpu = forceCpu; + } + + /** + * Null-safe string comparison. + * + * @param a the first value + * @param b the second value + * + * @return true if the two values are equal + */ + protected boolean safeEquals(String a, String b) { + return a == null ? b == null : a.equals(b); + } +} diff --git a/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoPlusParameters.java b/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoPlusParameters.java new file mode 100644 index 000000000..a80f637a2 --- /dev/null +++ b/src/main/java/com/compomics/util/parameters/identification/tool_specific/InstaNovoPlusParameters.java @@ -0,0 +1,28 @@ +package com.compomics.util.parameters.identification.tool_specific; + +import com.compomics.util.experiment.identification.Advocate; +import com.compomics.util.gui.parameters.identification.IdentificationAlgorithmParameter; + +/** + * InstaNovo+ specific parameters. + * + * @author CompOmics + */ +public class InstaNovoPlusParameters extends InstaNovoParameters { + + /** + * Version number for deserialization. + */ + static final long serialVersionUID = -7586968643672811482L; + + @Override + public Advocate getAlgorithm() { + return Advocate.instanovoPlus; + } + + @Override + public boolean equals(IdentificationAlgorithmParameter identificationAlgorithmParameter) { + return identificationAlgorithmParameter instanceof InstaNovoPlusParameters + && super.equals(identificationAlgorithmParameter); + } +} diff --git a/src/main/java/com/compomics/util/parameters/identification/tool_specific/package.html b/src/main/java/com/compomics/util/parameters/identification/tool_specific/package.html index bcfa8b1b9..d7f790139 100644 --- a/src/main/java/com/compomics/util/parameters/identification/tool_specific/package.html +++ b/src/main/java/com/compomics/util/parameters/identification/tool_specific/package.html @@ -1,5 +1,6 @@ - - - Parameters settings for the search algorithms. - - + + + Parameter settings for the search and de novo sequencing algorithms, + including InstaNovo and InstaNovo+. + + diff --git a/src/main/resources/META-INF/services/com.compomics.util.experiment.io.identification.IdfileReader b/src/main/resources/META-INF/services/com.compomics.util.experiment.io.identification.IdfileReader index 3e823496b..4bf7accc6 100644 --- a/src/main/resources/META-INF/services/com.compomics.util.experiment.io.identification.IdfileReader +++ b/src/main/resources/META-INF/services/com.compomics.util.experiment.io.identification.IdfileReader @@ -10,5 +10,8 @@ com.compomics.util.experiment.io.identification.idfilereaders.TideIdfileReader com.compomics.util.experiment.io.identification.idfilereaders.NovorIdfileReader com.compomics.util.experiment.io.identification.idfilereaders.OnyaseIdfileReader com.compomics.util.experiment.io.identification.idfilereaders.XTandemIdfileReader -com.compomics.util.experiment.io.identification.idfilereaders.CossIdfileReader -com.compomics.util.experiment.io.identification.idfilereaders.SageIdfileReader \ No newline at end of file +com.compomics.util.experiment.io.identification.idfilereaders.CossIdfileReader +com.compomics.util.experiment.io.identification.idfilereaders.SageIdfileReader +com.compomics.util.experiment.io.identification.idfilereaders.InstaNovoIdfileReader +com.compomics.util.experiment.io.identification.idfilereaders.InstaNovoPlusIdfileReader +com.compomics.util.experiment.io.identification.idfilereaders.InstaNovoRefinedIdfileReader diff --git a/src/test/java/com/compomics/util/test/experiment/io/identifications/TestInstaNovoIdfileReader.java b/src/test/java/com/compomics/util/test/experiment/io/identifications/TestInstaNovoIdfileReader.java new file mode 100644 index 000000000..7c684aa5e --- /dev/null +++ b/src/test/java/com/compomics/util/test/experiment/io/identifications/TestInstaNovoIdfileReader.java @@ -0,0 +1,560 @@ +package com.compomics.util.test.experiment.io.identifications; + +import com.compomics.util.experiment.identification.Advocate; +import com.compomics.util.experiment.identification.matches.ModificationMatch; +import com.compomics.util.experiment.identification.matches.SpectrumMatch; +import com.compomics.util.experiment.identification.spectrum_assumptions.PeptideAssumption; +import com.compomics.util.experiment.io.identification.IdfileReader; +import com.compomics.util.experiment.io.identification.idfilereaders.InstaNovoIdfileReader; +import com.compomics.util.experiment.io.identification.idfilereaders.InstaNovoPlusIdfileReader; +import com.compomics.util.experiment.io.identification.idfilereaders.InstaNovoRefinedIdfileReader; +import com.compomics.util.experiment.mass_spectrometry.SpectrumProvider; +import com.compomics.util.experiment.mass_spectrometry.spectra.Precursor; +import com.compomics.util.experiment.mass_spectrometry.spectra.Spectrum; +import com.compomics.util.parameters.identification.search.SearchParameters; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.TreeMap; +import junit.framework.TestCase; +import org.junit.Assert; + +/** + * Tests for InstaNovo v1.2.2 CSV readers. + * + * @author CompOmics + */ +public class TestInstaNovoIdfileReader extends TestCase { + + /** + * Derived from the first row of the InstaNovo v1.2.2 transformer normalized + * Zenodo sample file. + */ + private static final String INSTANOVO_V1_2_2 + = "experiment_name,scan_number,spectrum_id,precursor_mz,precursor_charge,prediction_id,predictions,log_probs,token_log_probs,group,predictions_tokenised,delta_mass_ppm\n" + + "SF_200217_U2OS_TiO2_HCD_OT_rep1,0,SF_200217_U2OS_TiO2_HCD_OT_rep1:0,419.314971923828,2,0,DM[UNIMOD:35]NS[UNIMOD:21]PK,-1147.98681640625,\"[-0.015801219269633293, -1.1395305395126343, -2.2013168334960938, -1.3749353885650635, -1.4705305099487305, -0.5675679445266724]\",no_group,\"D, M[UNIMOD:35], N, S[UNIMOD:21], P, K\",58846.475981092575\n"; + + /** + * Derived from the first row of the InstaNovo+ v1.2.2 no-refinement + * normalized Zenodo sample file. + */ + private static final String INSTANOVOPLUS_V1_2_2 + = "experiment_name,scan_number,spectrum_id,precursor_mz,precursor_charge,prediction_id,predictions,log_probs,token_log_probs,group,predictions_tokenised,delta_mass_ppm\n" + + "SF_200217_U2OS_TiO2_HCD_OT_rep1,0,SF_200217_U2OS_TiO2_HCD_OT_rep1:0,419.314971923828,2,0,MC[UNIMOD:4]IPDQPM[UNIMOD:35]EVDNEDDAPLPPPEAR,-3.6934256553649902,,no_group,\"M, C[UNIMOD:4], I, P, D, Q, P, M[UNIMOD:35], E, V, D, N, E, D, D, A, P, L, P, P, P, E, A, R\",2282970.310323359\n"; + + /** + * Derived from the first row of the InstaNovo v1.2.2 combined refined + * Zenodo sample file. + */ + private static final String INSTANOVO_COMBINED_V1_2_2 + = "experiment_name,scan_number,spectrum_id,precursor_mz,precursor_charge,prediction_id,predictions,log_probs,token_log_probs,group,instanovo_predictions,instanovo_prediction_log_probability,instanovo_prediction_token_log_probabilities,instanovo_predictions_beam_0,instanovo_predictions_log_probability_beam_0,instanovo_predictions_token_log_probabilities_beam_0,instanovo_predictions_beam_1,instanovo_predictions_log_probability_beam_1,instanovo_predictions_token_log_probabilities_beam_1,instanovo_predictions_beam_2,instanovo_predictions_log_probability_beam_2,instanovo_predictions_token_log_probabilities_beam_2,instanovo_predictions_beam_3,instanovo_predictions_log_probability_beam_3,instanovo_predictions_token_log_probabilities_beam_3,instanovo_predictions_beam_4,instanovo_predictions_log_probability_beam_4,instanovo_predictions_token_log_probabilities_beam_4,instanovoplus_predictions,instanovoplus_prediction_log_probability,instanovoplus_prediction_token_log_probabilities,instanovoplus_unrefined_predictions,predictions_tokenised,delta_mass_ppm\n" + + "SF_200217_U2OS_TiO2_HCD_OT_rep1,0,SF_200217_U2OS_TiO2_HCD_OT_rep1:0,419.314971923828,2,0,LIRPLLK,-0.6334811449050903,,no_group,\"['L', 'K', 'G', 'D', 'S[UNIMOD:21]', 'P', 'K']\",-10.102036476135254,\"[-1.716342806816101, -1.0499515533447266, -1.1343414783477783, -2.570066452026367, -1.3749353885650635, -1.4704134464263916, -0.5675679445266724]\",LKGDS[UNIMOD:21]PK,-10.102036476135254,\"[-1.716342806816101, -1.0499515533447266, -1.1343414783477783, -2.570066452026367, -1.3749353885650635, -1.4704134464263916, -0.5675679445266724]\",VKGDS[UNIMOD:21]PK,-11.082494735717773,\"[-2.8237648010253906, -1.0499515533447266, -1.1343414783477783, -2.570066452026367, -1.3749353885650635, -1.4704134464263916, -0.5675679445266724]\",SKGDS[UNIMOD:21]PK,-11.430251121520996,\"[-2.7461280822753906, -1.0499515533447266, -1.1343414783477783, -2.570066452026367, -1.3749353885650635, -1.4704134464263916, -0.5675679445266724]\",AKGDS[UNIMOD:21]PK,-11.492465019226074,\"[-3.1643409729003906, -1.0499515533447266, -1.1343414783477783, -2.570066452026367, -1.3749353885650635, -1.4704134464263916, -0.5675679445266724]\",PKGDS[UNIMOD:21]PK,-11.968438148498535,\"[-2.6694679260253906, -1.0499515533447266, -1.1343414783477783, -2.570066452026367, -1.3749353885650635, -1.4704134464263916, -0.5675679445266724]\",\"['L', 'I', 'R', 'P', 'L', 'L', 'K']\",-0.6334811449050903,,\"['L', 'K', 'G', 'D', 'S[UNIMOD:21]', 'P', 'K']\",\"L, I, R, P, L, L, K\",17862.82765389216\n"; + + /** + * Tests registration and parsing of the three supported InstaNovo modes. + * + * @throws Exception if an exception occurs + */ + public void testInstaNovoReaders() throws Exception { + + Assert.assertNotNull(Advocate.getAdvocate("InstaNovo")); + Assert.assertNotNull(Advocate.getAdvocate("InstaNovo+")); + Assert.assertNotNull(Advocate.getAdvocate("InstaNovo with refinement")); + SimpleSpectrumProvider spectrumProvider = new SimpleSpectrumProvider(); + SearchParameters searchParameters = new SearchParameters(); + + assertReader("test.instanovo.csv", Advocate.instanovo.getIndex(), spectrumProvider, searchParameters); + assertReader("test.instanovoplus.csv", Advocate.instanovoPlus.getIndex(), spectrumProvider, searchParameters); + assertReader("test.instanovo.refined.csv", Advocate.instanovoRefined.getIndex(), spectrumProvider, searchParameters); + } + + /** + * Tests service registration for the three InstaNovo readers. + * + * @throws Exception if an exception occurs + */ + public void testInstaNovoReaderServiceRegistration() throws Exception { + + InputStream serviceStream = getClass().getClassLoader().getResourceAsStream( + "META-INF/services/com.compomics.util.experiment.io.identification.IdfileReader" + ); + + Assert.assertNotNull(serviceStream); + + byte[] bytes = new byte[serviceStream.available()]; + serviceStream.read(bytes); + + String serviceFile = new String(bytes, StandardCharsets.UTF_8); + + Assert.assertTrue(serviceFile.contains(InstaNovoIdfileReader.class.getName())); + Assert.assertTrue(serviceFile.contains(InstaNovoPlusIdfileReader.class.getName())); + Assert.assertTrue(serviceFile.contains(InstaNovoRefinedIdfileReader.class.getName())); + } + + /** + * Tests invalid headers. + * + * @throws Exception if an exception occurs + */ + public void testMissingColumns() throws Exception { + + File csvFile = writeCsv("missing.instanovo.csv", "experiment_name,scan_number,predictions\nexample,0,PEPTIDE\n"); + IdfileReader idfileReader = new InstaNovoIdfileReader(csvFile); + + try { + idfileReader.getAllSpectrumMatches(new SimpleSpectrumProvider(), null, new SearchParameters()); + Assert.fail("Expected invalid InstaNovo CSV columns to fail."); + } catch (IllegalArgumentException e) { + Assert.assertTrue(e.getMessage().contains("Mandatory")); + } + } + + /** + * Tests parsing rows derived from the InstaNovo v1.2.2 sample files. + * + * @throws Exception if an exception occurs + */ + public void testInstaNovoVersion122SampleRows() throws Exception { + + assertSampleReader( + new InstaNovoIdfileReader(writeCsv("sample.instanovo.csv", INSTANOVO_V1_2_2)), + Advocate.instanovo.getIndex(), + "DMNSPK", + 2 + ); + + assertSampleReader( + new InstaNovoPlusIdfileReader(writeCsv("sample.instanovoplus.csv", INSTANOVOPLUS_V1_2_2)), + Advocate.instanovoPlus.getIndex(), + "MCIPDQPMEVDNEDDAPLPPPEAR", + 2 + ); + + assertSampleReader( + new InstaNovoRefinedIdfileReader(writeCsv("sample.instanovo.refined.csv", INSTANOVO_COMBINED_V1_2_2)), + Advocate.instanovoRefined.getIndex(), + "LIRPLLK", + 0 + ); + } + + /** + * Tests matching realistic spectrum titles by scan tokens without positional + * scan-number fallback. + * + * @throws Exception if an exception occurs + */ + public void testSpectrumTitleLookupWithRealisticTitles() throws Exception { + + File csvFile = writeCsv( + "realistic-titles.instanovo.csv", + "experiment_name,scan_number,spectrum_id,precursor_mz,precursor_charge,prediction_id,predictions,log_probs\n" + + "example,1,example:1,419.314971923828,2,0,PEPTIDE,-1.0\n" + ); + + IdfileReader idfileReader = new InstaNovoIdfileReader(csvFile); + SimpleSpectrumProvider spectrumProvider = new SimpleSpectrumProvider( + new String[]{"example"}, + new String[]{"controllerType=0 controllerNumber=1 scan=1", "controllerType=0 controllerNumber=1 scan=2"} + ); + ArrayList spectrumMatches = idfileReader.getAllSpectrumMatches(spectrumProvider, null, new SearchParameters()); + + Assert.assertEquals(1, spectrumMatches.size()); + Assert.assertEquals("controllerType=0 controllerNumber=1 scan=1", spectrumMatches.get(0).getSpectrumTitle()); + } + + /** + * Tests matching InstaNovo positional spectrum ids to descriptive MGF + * titles. + * + * @throws Exception if an exception occurs + */ + public void testSpectrumTitleLookupWithPositionalSpectrumId() throws Exception { + + File csvFile = writeCsv( + "positional-titles.instanovo.csv", + "experiment_name,scan_number,spectrum_id,precursor_mz,precursor_charge,prediction_id,predictions,log_probs\n" + + "example,0,example:0,419.314971923828,2,0,PEPTIDE,-1.0\n" + ); + + IdfileReader idfileReader = new InstaNovoIdfileReader(csvFile); + SimpleSpectrumProvider spectrumProvider = new SimpleSpectrumProvider( + new String[]{"example"}, + new String[]{"Cmpd 3543, +MSn(450.6095), 22.5 min", "Cmpd 3544, +MSn(697.8400), 22.5 min"} + ); + ArrayList spectrumMatches = idfileReader.getAllSpectrumMatches(spectrumProvider, null, new SearchParameters()); + + Assert.assertEquals(1, spectrumMatches.size()); + Assert.assertEquals("Cmpd 3543, +MSn(450.6095), 22.5 min", spectrumMatches.get(0).getSpectrumTitle()); + } + + /** + * Tests charge parsing robustness. + * + * @throws Exception if an exception occurs + */ + public void testChargeParsingSkipsInvalidRows() throws Exception { + + File csvFile = writeCsv( + "charges.instanovo.csv", + "experiment_name,scan_number,spectrum_id,precursor_mz,precursor_charge,prediction_id,predictions,log_probs\n" + + "example,0,example:0,419.314971923828,not-a-charge,0,PEPTIDE,-1.0\n" + + "example,1,example:1,419.314971923828, 2 ,0,PEPTIDE,-1.0\n" + ); + + IdfileReader idfileReader = new InstaNovoIdfileReader(csvFile); + ArrayList spectrumMatches = idfileReader.getAllSpectrumMatches(new SimpleSpectrumProvider(), null, new SearchParameters()); + + Assert.assertEquals(1, spectrumMatches.size()); + Assert.assertEquals("1", spectrumMatches.get(0).getSpectrumTitle()); + } + + /** + * Tests all UniMod annotations from the InstaNovo v1.2.2 default residue + * configuration. + * + * @throws Exception if an exception occurs + */ + public void testDefaultInstaNovoModifications() throws Exception { + + String header = "experiment_name,scan_number,spectrum_id,precursor_mz,precursor_charge,prediction_id,predictions,log_probs,token_log_probs,group,predictions_tokenised,delta_mass_ppm\n"; + File csvFile = writeCsv( + "default-modifications.instanovo.csv", + header + + "sample,0,sample:0,419.314971923828,2,0,M[UNIMOD:35]C[UNIMOD:4]N[UNIMOD:7]Q[UNIMOD:7]R[UNIMOD:7]P[UNIMOD:35]S[UNIMOD:21]T[UNIMOD:21]Y[UNIMOD:21],-1.0,,no_group,,0.0\n" + + "sample,1,sample:1,419.314971923828,2,0,[UNIMOD:1]ACD,-1.0,,no_group,,0.0\n" + + "sample,2,sample:2,419.314971923828,2,0,[UNIMOD:5]ACD,-1.0,,no_group,,0.0\n" + + "sample,3,sample:3,419.314971923828,2,0,[UNIMOD:385]CPEP,-1.0,,no_group,,0.0\n" + + "sample,4,sample:4,419.314971923828,2,0,[UNIMOD:385]NPEP,-1.0,,no_group,,0.0\n" + ); + + IdfileReader idfileReader = new InstaNovoIdfileReader(csvFile); + ArrayList spectrumMatches = idfileReader.getAllSpectrumMatches(new SimpleSpectrumProvider(), null, new SearchParameters()); + + Assert.assertEquals(5, spectrumMatches.size()); + + PeptideAssumption residueModifiedAssumption = getFirstAssumption(spectrumMatches, "0", Advocate.instanovo.getIndex()); + + Assert.assertEquals("MCNQRPSTY", residueModifiedAssumption.getPeptide().getSequence()); + assertModification(residueModifiedAssumption, "Oxidation of M", 1); + assertModification(residueModifiedAssumption, "Carbamidomethylation of C", 2); + assertModification(residueModifiedAssumption, "Deamidation of N", 3); + assertModification(residueModifiedAssumption, "Deamidation of Q", 4); + assertModification(residueModifiedAssumption, "Citrullination of R", 5); + assertModification(residueModifiedAssumption, "Oxidation of P", 6); + assertModification(residueModifiedAssumption, "Phosphorylation of S", 7); + assertModification(residueModifiedAssumption, "Phosphorylation of T", 8); + assertModification(residueModifiedAssumption, "Phosphorylation of Y", 9); + + assertModification(getFirstAssumption(spectrumMatches, "1", Advocate.instanovo.getIndex()), "Acetylation of peptide N-term", 0); + assertModification(getFirstAssumption(spectrumMatches, "2", Advocate.instanovo.getIndex()), "Carbamilation of protein N-term", 0); + assertModification(getFirstAssumption(spectrumMatches, "3", Advocate.instanovo.getIndex()), "Pyrolidone from carbamidomethylated C", 1); + assertModification(getFirstAssumption(spectrumMatches, "4", Advocate.instanovo.getIndex()), "Ammonia loss from N", 1); + } + + /** + * Asserts one reader. + * + * @param fileName the file name + * @param advocateIndex the expected advocate index + * @param spectrumProvider the spectrum provider + * @param searchParameters the search parameters + * + * @throws Exception if an exception occurs + */ + private void assertReader( + String fileName, + int advocateIndex, + SpectrumProvider spectrumProvider, + SearchParameters searchParameters + ) throws Exception { + + File csvFile = writeCsv( + fileName, + "experiment_name,scan_number,spectrum_id,precursor_mz,precursor_charge,prediction_id,predictions,log_probs,token_log_probs,group,predictions_tokenised,delta_mass_ppm\n" + + "example,0,example:0,419.314971923828,2,0,DM[UNIMOD:35]NS[UNIMOD:21]PK,-10.0,\"[-1.0]\",no_group,\"D, M[UNIMOD:35], N, S[UNIMOD:21], P, K\",0.0\n" + ); + + IdfileReader idfileReader; + if (fileName.endsWith(InstaNovoPlusIdfileReader.EXTENSION)) { + idfileReader = new InstaNovoPlusIdfileReader(csvFile); + } else if (fileName.endsWith(InstaNovoRefinedIdfileReader.EXTENSION)) { + idfileReader = new InstaNovoRefinedIdfileReader(csvFile); + } else { + idfileReader = new InstaNovoIdfileReader(csvFile); + } + + Assert.assertNotNull(idfileReader); + + ArrayList spectrumMatches = idfileReader.getAllSpectrumMatches(spectrumProvider, null, searchParameters); + Assert.assertEquals(1, spectrumMatches.size()); + SpectrumMatch spectrumMatch = spectrumMatches.get(0); + Assert.assertEquals("example", spectrumMatch.getSpectrumFile()); + Assert.assertEquals("0", spectrumMatch.getSpectrumTitle()); + + TreeMap> assumptions = spectrumMatch.getAllPeptideAssumptions(advocateIndex); + Assert.assertNotNull(assumptions); + PeptideAssumption peptideAssumption = assumptions.firstEntry().getValue().get(0); + Assert.assertEquals("DMNSPK", peptideAssumption.getPeptide().getSequence()); + Assert.assertEquals(2, peptideAssumption.getPeptide().getVariableModifications().length); + Assert.assertTrue(peptideAssumption.getPeptide().getMass() > 0.0); + Assert.assertTrue(peptideAssumption.getTheoreticMz() > 0.0); + Assert.assertTrue(idfileReader.getSoftwareVersions().containsKey(Advocate.getAdvocate(advocateIndex).getName())); + + if (advocateIndex == Advocate.instanovoRefined.getIndex()) { + Assert.assertTrue(idfileReader.getSoftwareVersions().containsKey(Advocate.instanovo.getName())); + Assert.assertTrue(idfileReader.getSoftwareVersions().containsKey(Advocate.instanovoPlus.getName())); + } + } + + /** + * Asserts a reader using sample v1.2.2 CSV content. + * + * @param idfileReader the reader + * @param advocateIndex the expected advocate index + * @param expectedSequence the expected peptide sequence + * @param expectedVariableModifications the expected number of variable + * modifications + * + * @throws Exception if an exception occurs + */ + private void assertSampleReader( + IdfileReader idfileReader, + int advocateIndex, + String expectedSequence, + int expectedVariableModifications + ) throws Exception { + + ArrayList spectrumMatches = idfileReader.getAllSpectrumMatches(new SimpleSpectrumProvider(), null, new SearchParameters()); + + Assert.assertEquals(1, spectrumMatches.size()); + + SpectrumMatch spectrumMatch = spectrumMatches.get(0); + + Assert.assertEquals("SF_200217_U2OS_TiO2_HCD_OT_rep1", spectrumMatch.getSpectrumFile()); + Assert.assertEquals("0", spectrumMatch.getSpectrumTitle()); + + TreeMap> assumptions = spectrumMatch.getAllPeptideAssumptions(advocateIndex); + + Assert.assertNotNull(assumptions); + + PeptideAssumption peptideAssumption = assumptions.firstEntry().getValue().get(0); + + Assert.assertEquals(expectedSequence, peptideAssumption.getPeptide().getSequence()); + Assert.assertEquals(expectedVariableModifications, peptideAssumption.getPeptide().getVariableModifications().length); + Assert.assertTrue(peptideAssumption.getPeptide().getMass() > 0.0); + Assert.assertTrue(peptideAssumption.getTheoreticMz() > 0.0); + Assert.assertTrue(idfileReader.getSoftwareVersions().containsKey(Advocate.getAdvocate(advocateIndex).getName())); + + if (advocateIndex == Advocate.instanovoRefined.getIndex()) { + Assert.assertTrue(idfileReader.getSoftwareVersions().containsKey(Advocate.instanovo.getName())); + Assert.assertTrue(idfileReader.getSoftwareVersions().containsKey(Advocate.instanovoPlus.getName())); + } + } + + /** + * Returns the first assumption for a spectrum title. + * + * @param spectrumMatches the spectrum matches + * @param spectrumTitle the spectrum title + * @param advocateIndex the advocate index + * + * @return the first peptide assumption + */ + private PeptideAssumption getFirstAssumption(ArrayList spectrumMatches, String spectrumTitle, int advocateIndex) { + + for (SpectrumMatch spectrumMatch : spectrumMatches) { + + if (spectrumMatch.getSpectrumTitle().equals(spectrumTitle)) { + + TreeMap> assumptions = spectrumMatch.getAllPeptideAssumptions(advocateIndex); + + Assert.assertNotNull(assumptions); + + return assumptions.firstEntry().getValue().get(0); + } + } + + Assert.fail("No spectrum match found for title " + spectrumTitle + "."); + + return null; + } + + /** + * Asserts a modification match. + * + * @param peptideAssumption the peptide assumption + * @param modification the modification name + * @param site the modification site + */ + private void assertModification(PeptideAssumption peptideAssumption, String modification, int site) { + + for (ModificationMatch modificationMatch : peptideAssumption.getPeptide().getVariableModifications()) { + + if (modificationMatch.getModification().equals(modification) && modificationMatch.getSite() == site) { + return; + } + } + + Assert.fail("Modification " + modification + " at site " + site + " not found."); + } + + /** + * Writes a temporary CSV file. + * + * @param fileName the file name + * @param content the content + * + * @return the CSV file + * + * @throws IOException if an IOException occurs + */ + private File writeCsv(String fileName, String content) throws IOException { + + File file = File.createTempFile(fileName, ""); + file.deleteOnExit(); + + try (FileWriter writer = new FileWriter(file)) { + writer.write(content); + } + + return file; + } + + /** + * Simple spectrum provider for tests. + */ + private static class SimpleSpectrumProvider implements SpectrumProvider { + + /** + * File names without extensions. + */ + private final String[] fileNames; + /** + * Spectrum titles. + */ + private final String[] titles; + + /** + * Default constructor. + */ + private SimpleSpectrumProvider() { + this(new String[]{"example"}, new String[]{"0", "1", "2", "3", "4"}); + } + + /** + * Constructor. + * + * @param fileNames the file names + * @param titles the spectrum titles + */ + private SimpleSpectrumProvider(String[] fileNames, String[] titles) { + this.fileNames = fileNames; + this.titles = titles; + } + + @Override + public Spectrum getSpectrum(String fileNameWithoutExtension, String spectrumTitle) { + return null; + } + + @Override + public Precursor getPrecursor(String fileNameWithoutExtension, String spectrumTitle) { + return null; + } + + @Override + public ArrayList getPostcursorSpectrumTitles(String fileNameWithoutExtension, String spectrumTitle) { + return null; + } + + @Override + public double getPrecursorMz(String fileNameWithoutExtension, String spectrumTitle) { + return 0; + } + + @Override + public double getPrecursorRt(String fileNameWithoutExtension, String spectrumTitle) { + return 0; + } + + @Override + public int getSpectrumLevel(String fileNameWithoutExtension, String spectrumTitle) { + return 2; + } + + @Override + public double[][] getPeaks(String fileNameWithoutExtension, String spectrumTitle) { + return null; + } + + @Override + public double getMinPrecMz(String fileNameWithoutExtension) { + return 0; + } + + @Override + public double getMaxPrecMz(String fileNameWithoutExtension) { + return 0; + } + + @Override + public double getMaxPrecInt(String fileNameWithoutExtension) { + return 0; + } + + @Override + public double getMaxPrecRT(String fileNameWithoutExtension) { + return 0; + } + + @Override + public double getMinPrecMz() { + return 0; + } + + @Override + public double getMaxPrecMz() { + return 0; + } + + @Override + public double getMaxPrecInt() { + return 0; + } + + @Override + public double getMaxPrecRT() { + return 0; + } + + @Override + public String[] getOrderedFileNamesWithoutExtensions() { + return fileNames; + } + + @Override + public String[] getSpectrumTitles(String fileNameWithoutExtension) { + return titles; + } + + @Override + public HashMap getFilePaths() { + return new HashMap<>(); + } + + @Override + public HashMap getCmsFilePaths() { + return new HashMap<>(); + } + + @Override + public void close() { + // Nothing to close. + } + } +} diff --git a/src/test/java/com/compomics/util/test/parameters/identification/tool_specific/TestInstaNovoParameters.java b/src/test/java/com/compomics/util/test/parameters/identification/tool_specific/TestInstaNovoParameters.java new file mode 100644 index 000000000..f3619643e --- /dev/null +++ b/src/test/java/com/compomics/util/test/parameters/identification/tool_specific/TestInstaNovoParameters.java @@ -0,0 +1,28 @@ +package com.compomics.util.test.parameters.identification.tool_specific; + +import com.compomics.util.parameters.identification.tool_specific.InstaNovoParameters; +import junit.framework.TestCase; +import org.junit.Assert; + +/** + * Tests for InstaNovo specific parameters. + * + * @author CompOmics + */ +public class TestInstaNovoParameters extends TestCase { + + /** + * Tests the desktop-oriented default batch size. + */ + public void testDefaultBatchSize() { + + InstaNovoParameters parameters = new InstaNovoParameters(); + + Assert.assertEquals(InstaNovoParameters.DEFAULT_BATCH_SIZE, parameters.getBatchSize()); + Assert.assertTrue(parameters.toString(false).contains("BATCH_SIZE=" + InstaNovoParameters.DEFAULT_BATCH_SIZE)); + + parameters.setBatchSize(-1); + + Assert.assertEquals(InstaNovoParameters.DEFAULT_BATCH_SIZE, parameters.getBatchSize()); + } +}