Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/predictor_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,10 @@ jobs:
pip cache purge
pip install git+https://github.com/griffithlab/bigmhc.git#egg=bigmhc
pip install git+https://github.com/griffithlab/deepimmuno.git#egg=deepimmuno
pip install git+https://github.com/griffithlab/ImmuScope.git#egg=ImmuScope
pip install -e .
mhcflurry-downloads fetch
immuscope-download-weights
- name: Install MixMHCpred
uses: actions/checkout@master
with:
Expand Down
31 changes: 31 additions & 0 deletions docs/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,37 @@ You can check that DeepImmuno was installed successfully by running:

This should show information about the DeepImmuno installation.

Installing Immuscope
---------------------

If you wish to run the Immuscope_IM prediction algorithm, you will need to
install Immuscope on your system. This package is not a direct dependency of
the the ``pvactools`` packages and needs to be installed manually by running:

.. code-block:: none

pip install git+https://github.com/griffithlab/ImmuScope.git#egg=ImmuScope

In addition to installing the python package, you will also need to download the
ImmuScope model weights:

.. code-block:: none

immuscope-download-weights

.. note::

Immuscope needs to be installed in the same python 3
environment as the ``pvactools`` package.

You can check that Immuscope was installed successfully by running:

.. code-block:: none

pip show immuscope

This should show information about the Immuscope installation.

.. _blast:

Installing BLAST
Expand Down
15 changes: 15 additions & 0 deletions predictor_tests/test_call_iedb.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,5 +410,20 @@ def test_mixmhc2pred_method_generates_expected_files(self):
actual_df = pd.read_csv(call_iedb_output_file.name, sep="\t", index_col=[0,13,14])
pd.testing.assert_frame_equal(expected_df, actual_df, check_like=True, check_exact=False)

def test_immuscope_im_method_generates_expected_files(self):
call_iedb_output_file = tempfile.NamedTemporaryFile()

pvactools.lib.call_iedb.main([
self.input_file,
call_iedb_output_file.name,
'ImmuScope_IM',
'DRB1*01:01',
'-l', '15',
])
expected_output_file = os.path.join(self.test_data_dir, 'output_immuscope_im.tsv')
expected_df = pd.read_csv(expected_output_file, sep="\t", index_col=[1,5,6])
actual_df = pd.read_csv(call_iedb_output_file.name, sep="\t", index_col=[1,5,6])
pd.testing.assert_frame_equal(expected_df, actual_df, check_like=True, check_exact=False)

if __name__ == '__main__':
unittest.main()
2,974 changes: 2,974 additions & 0 deletions predictor_tests/test_data/output_immuscope_im.tsv

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions pvactools/lib/aggregate_all_epitopes.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ def determine_used_binding_percentile_algorithms(self):

def determine_used_immunogenicity_score_algorithms(self):
headers = pd.read_csv(self.input_file, delimiter="\t", nrows=0).columns.tolist()
potential_algorithms = ["BigMHC_IM", "DeepImmuno", "PRIME"]
potential_algorithms = ["BigMHC_IM", "DeepImmuno", "PRIME", "ImmuScope_IM"]
prediction_algorithms = []
for algorithm in potential_algorithms:
if "{} MT Immunogenicity Score".format(algorithm) in headers or "{} Immunogenicity Score".format(algorithm) in headers:
Expand All @@ -170,7 +170,7 @@ def determine_used_immunogenicity_score_algorithms(self):

def determine_used_immunogenicity_percentile_algorithms(self):
headers = pd.read_csv(self.input_file, delimiter="\t", nrows=0).columns.tolist()
potential_algorithms = ["BigMHC_IM", "DeepImmuno", "PRIME"]
potential_algorithms = ["BigMHC_IM", "DeepImmuno", "PRIME", "ImmuScope_IM"]
prediction_algorithms = []
for algorithm in potential_algorithms:
if "{} MT Percentile".format(algorithm) in headers or "{} Percentile".format(algorithm) in headers:
Expand Down
15 changes: 11 additions & 4 deletions pvactools/lib/output_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,13 @@ def get_scores(self, line, method):
line.get('BigMHC_IM'), method,
percentile_keys=None, percentile_fallback='NA', is_reversed=True
)

if m == 'immuscope_im':
return self._make_score_entry(
line, 'ImmuScope_IM', 'immunogenicity',
line.get('ImmuScope_IM'), method,
percentile_keys=None, percentile_fallback='NA', is_reversed=True
)

if m == 'netmhcpan_el':
presentation = line.get('score')
Expand Down Expand Up @@ -888,7 +895,7 @@ def output_headers(self):
elif method in ['BigMHC_EL', 'netmhciipan_el', 'netmhcpan_el', 'MixMHC2pred']:
headers.append("%s WT Presentation Score" % pretty_method)
headers.append("%s MT Presentation Score" % pretty_method)
elif method in ['BigMHC_IM', 'DeepImmuno', 'PRIME']:
elif method in ['BigMHC_IM', 'DeepImmuno', 'PRIME', 'ImmuScope_IM']:
headers.append("%s WT Immunogenicity Score" % pretty_method)
headers.append("%s MT Immunogenicity Score" % pretty_method)
else:
Expand Down Expand Up @@ -949,7 +956,7 @@ def add_prediction_scores(self, row, mt_scores, wt_scores):
elif pretty_method in ['BigMHC_EL', 'NetMHCIIpanEL', 'NetMHCpanEL', 'MixMHC2pred']:
row[f'{pretty_method} MT Presentation Score'] = self.score_or_na(mt_scores, pretty_method, 'presentation')
row[f'{pretty_method} WT Presentation Score'] = self.score_or_na(wt_scores, pretty_method, 'presentation')
elif pretty_method in ['BigMHC_IM', 'DeepImmuno', 'PRIME']:
elif pretty_method in ['BigMHC_IM', 'DeepImmuno', 'PRIME', 'ImmuScope_IM']:
row[f'{pretty_method} MT Immunogenicity Score'] = self.score_or_na(mt_scores, pretty_method, 'immunogenicity')
row[f'{pretty_method} WT Immunogenicity Score'] = self.score_or_na(wt_scores, pretty_method, 'immunogenicity')
else:
Expand Down Expand Up @@ -1256,7 +1263,7 @@ def output_headers(self):
headers.append("%s Binding Score" % pretty_method)
elif method in ['BigMHC_EL', 'netmhciipan_el', 'netmhcpan_el', 'MixMHC2pred']:
headers.append("%s Presentation Score" % pretty_method)
elif method in ['BigMHC_IM', 'DeepImmuno', 'PRIME']:
elif method in ['BigMHC_IM', 'DeepImmuno', 'PRIME', 'ImmuScope_IM']:
headers.append("%s Immunogenicity Score" % pretty_method)
else:
headers.append("%s IC50 Score" % pretty_method)
Expand Down Expand Up @@ -1288,7 +1295,7 @@ def add_prediction_scores(self, row, mt_scores):
row[f'{pretty_method} Binding Score'] = self.score_or_na(mt_scores, pretty_method, 'binding_score')
elif pretty_method in ['BigMHC_EL', 'NetMHCIIpanEL', 'NetMHCpanEL', 'MixMHC2pred']:
row[f'{pretty_method} Presentation Score'] = self.score_or_na(mt_scores, pretty_method, 'presentation')
elif pretty_method in ['BigMHC_IM', 'DeepImmuno', 'PRIME']:
elif pretty_method in ['BigMHC_IM', 'DeepImmuno', 'PRIME', 'ImmuScope_IM']:
row[f'{pretty_method} Immunogenicity Score'] = self.score_or_na(mt_scores, pretty_method, 'immunogenicity')
else:
row[f'{pretty_method} IC50 Score'] = self.score_or_na(mt_scores, pretty_method, 'ic50')
Expand Down
2 changes: 1 addition & 1 deletion pvactools/lib/post_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def get_flurry_state(self):
def is_el(self, algorithm):
if algorithm == 'MHCflurry' and self.flurry_state == 'EL_only':
return True
if algorithm in ['NetMHCIIpanEL', 'NetMHCpanEL', 'BigMHC_EL', 'BigMHC_IM', 'DeepImmuno', 'MixMHCpred', 'PRIME']:
if algorithm in ['NetMHCIIpanEL', 'NetMHCpanEL', 'BigMHC_EL', 'BigMHC_IM', 'DeepImmuno', 'MixMHCpred', 'PRIME', 'ImmuScope_IM']:
return True
return False

Expand Down
147 changes: 147 additions & 0 deletions pvactools/lib/prediction_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -822,6 +822,153 @@ def iedb_executable_params(self, iedb_executable_path, method, allele, input_fil
class NetMHCIIVersion:
netmhciipan_version = None

class ImmuScope_IM(MHCII):
immuscope_score_col = 'ImmuScope_IM'
immuscope_weights_dir = '/opt/ImmuScope/weights'

def resolved_immuscope_weights_dir(self):
xdg_data_home = os.environ.get('XDG_DATA_HOME')
if xdg_data_home:
user_dir = os.path.join(xdg_data_home, 'ImmuScope', 'weights')
else:
user_dir = os.path.join(os.path.expanduser('~/.local/share'), 'ImmuScope', 'weights')

if os.path.isdir(os.path.join(user_dir, 'IM')):
return user_dir

return self.immuscope_weights_dir

def valid_allele_names(self):
"""Return allele names supported by ImmuScope.

Allele support is maintained as a static list in
`tools/pvacseq/iedb_alleles/class_ii/Immuscope.txt`.
"""

base_dir = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..'))
alleles_dir = os.path.join(base_dir, 'tools', 'pvacseq', 'iedb_alleles', 'class_ii')
alleles_file_name = os.path.join(alleles_dir, 'Immuscope.txt')
with open(alleles_file_name, 'r') as fh:
return list(filter(None, (line.strip() for line in fh)))

def valid_lengths_for_allele(self, allele):
return [11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]

def check_length_valid_for_allele(self, length, allele):
if length not in self.valid_lengths_for_allele(allele):
sys.exit(
"Epitope length %s not supported for method %s. Valid lengths are: %s" % (
length,
self.__class__.__name__,
','.join(map(str, self.valid_lengths_for_allele(allele)))
)
)

def predict(self, input_file, allele, epitope_length, iedb_executable_path, iedb_retries, tmp_dir=None, log_dir=None):
weights_dir = self.resolved_immuscope_weights_dir()
if not os.path.isdir(os.path.join(weights_dir, 'IM')):
raise Exception(
"ImmuScope weights directory not found at {}. "
"Install weights (e.g. run `immuscope-download-weights`) or set IMMU_SCOPE_WEIGHTS_DIR.".format(weights_dir)
)

results = pd.DataFrame()

metadata_rows = []
unique_pairs = set()
for record in SeqIO.parse(input_file, "fasta"):
seq_num = record.id
peptide = str(record.seq)
epitopes = pvactools.lib.run_utils.determine_neoepitopes(peptide, epitope_length)
for start, epitope in epitopes.items():
metadata_rows.append({
'allele': allele,
'peptide': epitope,
'seq_num': seq_num,
'start': start,
})
unique_pairs.add((allele, epitope))

all_epitopes = [peptide for _, peptide in unique_pairs]

if len(all_epitopes) == 0:
return (results, 'pandas')

tmp_input_file = tempfile.NamedTemporaryFile('w', dir=tmp_dir, delete=False, newline='')
writer = csv.writer(tmp_input_file, delimiter='\t', lineterminator='\n')
writer.writerow(["allele", "peptide", "seq_num", "start"])
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it required by the predictor to add the seq_num and start to the input file? If not I think these columns can be removed.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, they are currently required by the predictor/wrapper interface as implemented. If you would rather them be excluded, I can update the fork to make these optional

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Gotcha. I think we could generate the file with these columns filled in by creating it in the same block of code above where we read in the fasta file (line 878+). The determine_neoepitopes method returns a hash with the start position as the key and the epitope as the value. The fasta sequence header can be used as the seq_num.

I assume that the output includes these two columns as well so that would then save us from having to map back each epitope to it's seq num and start position (line 934+). This would be at the expense of potentially having duplicate epitopes in that file if there are repetitive regions etc which could make ImmunoScope slower (not sure if they accounted for this).

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I pushed a commit that keeps the deduped peptide set for scoring, but captures seq_num and start during the initial FASTA parsing and then merges them back onto the ImmuScope output. This lets us drop the remapping loop while still preserving those fields cleanly.

The performance of ImmuScope would be impacted if we passed every epitope occurrence directly to the wrapper with seq_num/start filled in, since it would score duplicates instead of just unique peptides. This approach avoids that by keeping the input deduplicated and only expanding back afterward.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wasn't sure if Immuscope was being smart and deduplicates epitopes on their end.

for epitope in all_epitopes:
writer.writerow([allele, epitope, "", ""])
tmp_input_file.close()

tmp_output_file = tempfile.NamedTemporaryFile('r', dir=tmp_dir, delete=False)
tmp_output_file.close()

arguments = [
'immuscope-wrapper',
'--input', tmp_input_file.name,
'--output', tmp_output_file.name,
'--allele-col', 'allele',
'--peptide-col', 'peptide',
'--seq-num-col', 'seq_num',
'--start-col', 'start',
]
stderr_fh = tempfile.NamedTemporaryFile('w', dir=tmp_dir, delete=False)
try:
run(arguments, check=True, stdout=DEVNULL, stderr=stderr_fh)
except:
stderr_fh.close()
with open(stderr_fh.name, 'r') as fh:
err = fh.read()
os.unlink(stderr_fh.name)
os.unlink(tmp_input_file.name)
if os.path.exists(tmp_output_file.name):
os.unlink(tmp_output_file.name)
raise Exception("An error occurred while calling ImmuScope:\n{}".format(err))
stderr_fh.close()
os.unlink(stderr_fh.name)
os.unlink(tmp_input_file.name)

df = pd.read_csv(tmp_output_file.name, sep='\t')
os.unlink(tmp_output_file.name)

# Wrapper emits: allele peptide tgt len ImmuScope_IM seq_num start
if self.immuscope_score_col not in df.columns:
raise Exception(
"ImmuScope wrapper output missing expected score column '{}'. Found columns: {}".format(
self.immuscope_score_col,
','.join(df.columns),
)
)

required_columns = {'allele', 'peptide', self.immuscope_score_col}
missing_columns = required_columns.difference(df.columns)
if missing_columns:
raise Exception(
"ImmuScope wrapper output missing expected columns {}. Found columns: {}".format(
','.join(sorted(missing_columns)),
','.join(df.columns),
)
)

metadata_df = pd.DataFrame(metadata_rows)
score_df = df.drop(columns=[col for col in ['seq_num', 'start'] if col in df.columns])
results = metadata_df.merge(
score_df,
on=['allele', 'peptide'],
how='left',
validate='many_to_one',
)

ordered_columns = [
col for col in ['allele', 'peptide', 'tgt', 'len', self.immuscope_score_col, 'seq_num', 'start']
if col in results.columns
]
remaining_columns = [col for col in results.columns if col not in ordered_columns]
results = results[ordered_columns + remaining_columns]

return (results, 'pandas')

class NetMHCIIpan(IEDBMHCII):
@property
def iedb_prediction_method(self):
Expand Down
Loading
Loading