diff --git a/setup.py b/setup.py index ce475d6..f71eb08 100644 --- a/setup.py +++ b/setup.py @@ -21,13 +21,13 @@ def run(self): if not self.dry_run: try: for tool in ['samtools', 'htslib', 'bcftools']: - if not os.path.exists(f'build/{tool}'): - if not os.path.exists(f'{tool}.tar.bz2'): - _run(['wget', - f'https://github.com/samtools/{tool}/releases/download/{SAMTOOLS_VERSION}/{tool}-{SAMTOOLS_VERSION}.tar.bz2', - '-O', f'{tool}.tar.bz2']) - os.makedirs(f'build/{tool}', exist_ok=True) - _run(["tar", "-vxjf", f"{tool}.tar.bz2", f"--directory=build/{tool}", "--strip-components=1"]) + # if not os.path.exists(f'build/{tool}'): + # if not os.path.exists(f'{tool}.tar.bz2'): + _run(['wget', + f'https://github.com/samtools/{tool}/releases/download/{SAMTOOLS_VERSION}/{tool}-{SAMTOOLS_VERSION}.tar.bz2', + '-O', f'{tool}.tar.bz2']) + os.makedirs(f'build/{tool}', exist_ok=True) + _run(["tar", "-vxjf", f"{tool}.tar.bz2", f"--directory=build/{tool}", "--strip-components=1"]) for tool in ['samtools', 'htslib', 'bcftools']: _run(["./configure"], cwd=f"build/{tool}") diff --git a/xsamtools/cli/__init__.py b/xsamtools/cli/__init__.py index e2b5b6d..a06cc55 100644 --- a/xsamtools/cli/__init__.py +++ b/xsamtools/cli/__init__.py @@ -3,6 +3,7 @@ from textwrap import dedent from xsamtools import samtools +from xsamtools.cli.view import samtools_view from xsamtools.cli.cram import view from xsamtools.cli.vcf import merge, subsample, stats @@ -11,25 +12,30 @@ def add_cram_subparser(subparsers): cram_parser = subparsers.add_parser('cram') # The api here is somewhat redundant, cram only has a single sub-command/parser cram_subparsers = cram_parser.add_subparsers() - view_parser = cram_subparsers.add_parser('view', description='A limited wrapper around "samtools view", but with ' - 'functions to operate on google cloud bucket keys.') - view_parser.add_argument("--cram", type=str, required=True, - help="Input cram file. This can be a Google Storage object if prefixed with 'gs://'.") - view_parser.add_argument("--crai", type=str, required=False, - help="Input crai file. This can be a Google Storage file (e.g. gs://bucket/key) or a " + cram_view_parser = cram_subparsers.add_parser('view', + description='A limited wrapper around "samtools view", but with ' + 'functions to operate on google cloud bucket keys.') + cram_view_parser.add_argument("--cram", + type=str, + required=True, + help="Input cram file. This can be a Google Storage object if prefixed with 'gs://'.") + cram_view_parser.add_argument("--crai", + type=str, + required=False, + help="Input crai file. This can be a Google Storage file (e.g. gs://bucket/key) or a " "local file. If not specified, one will be generated for you (this may take a long " "time).") # TODO: add an argument to intake a BED file. - view_parser.add_argument("--regions", type=str, required=False, default=None, + cram_view_parser.add_argument("--regions", type=str, required=False, default=None, help="A comma-delimited list of regions of sequence in the input cram file to subset as " "the output CRAM. For example, something like: 'ch1,ch2' or " "'chromsome_1:10000,chromosome2'.") - view_parser.add_argument("-C", action='store_true', required=False, + cram_view_parser.add_argument("-C", action='store_true', required=False, help="Write the output file in CRAM format.") # TODO: Allow this to be a google key. - view_parser.add_argument("--output", type=str, required=False, default=None, + cram_view_parser.add_argument("--output", type=str, required=False, default=None, help="A local output file path for the generated cram file.") - view_parser.set_defaults(func=view) + cram_view_parser.set_defaults(func=view) def merge_options(): @@ -40,6 +46,15 @@ def merge_options(): return "bcftools merge arguments:" + options +def add_view_subparser(subparsers): + view_parser = subparsers.add_parser( + 'view', + description='A wrapper around samtools that accepts gs:// and drs:// input file paths.', + formatter_class=argparse.RawDescriptionHelpFormatter + ) + view_parser.set_defaults(func=samtools_view) + + def add_vcf_subparser(subparsers): vcf_parser = subparsers.add_parser('vcf') vcf_subparsers = vcf_parser.add_subparsers() @@ -84,5 +99,6 @@ def main(args): subparsers = parser.add_subparsers() add_cram_subparser(subparsers) add_vcf_subparser(subparsers) + add_view_subparser(subparsers) args, extra_args = parser.parse_known_args(args) args.func(args, extra_args) diff --git a/xsamtools/cli/view.py b/xsamtools/cli/view.py new file mode 100644 index 0000000..2e2649b --- /dev/null +++ b/xsamtools/cli/view.py @@ -0,0 +1,14 @@ +""" +CRAM file utilities. +""" +import argparse +from typing import Sequence + +from xsamtools import view + + +def samtools_view(args: argparse.Namespace, extra_args: Sequence[str]): + """ + A limited wrapper around "samtools view", but with functions to operate on drs and google cloud bucket keys. + """ + view.samtools_view(extra_args) diff --git a/xsamtools/cram.py b/xsamtools/cram.py old mode 100755 new mode 100644 diff --git a/xsamtools/samtools.py b/xsamtools/samtools.py index 0c0acab..e125044 100644 --- a/xsamtools/samtools.py +++ b/xsamtools/samtools.py @@ -1,9 +1,7 @@ import os -import sys import typing import warnings import subprocess -from typing import Optional import xsamtools @@ -26,11 +24,12 @@ def _samtools_binary_path(name): htsfile=os.path.join(xsamtools.__path__[0], "..", "build", "htslib", "htsfile"), samtools=os.path.join(xsamtools.__path__[0], "..", "build", "samtools", "samtools")) path = paths[name] + print(path) try: _run([path, "--version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) return path except (FileNotFoundError, subprocess.CalledProcessError): - pass + raise return None for name in paths: diff --git a/xsamtools/utils.py b/xsamtools/utils.py index ed18a72..a981efb 100644 --- a/xsamtools/utils.py +++ b/xsamtools/utils.py @@ -1,8 +1,31 @@ import subprocess +import terra_notebook_utils as tnu from typing import Any +def substitute_drs_and_gs_uris_for_http(args): + new_args = [] + for arg in args: + if arg.startswith('-') and '=' in arg: + key_arg, value_arg = arg.split('=', 1) + else: + key_arg, value_arg = None, arg + + if value_arg.strip('"').strip("'").startswith('drs://'): + value_arg = tnu.drs.access(arg) + elif value_arg.strip('"').strip("'").startswith('gs://'): + value_arg = tnu.gs.get_signed_url(arg) + + if key_arg: + arg = f'{key_arg}={value_arg}' + else: + arg = value_arg + + new_args.append(arg) + return new_args + + def run(cmd: Any, check: bool = True, **kwargs) -> subprocess.CompletedProcess: """ Subprocess.run() that will default to printing stderr when raising on a non-zero error code. @@ -20,5 +43,6 @@ def run(cmd: Any, check: bool = True, **kwargs) -> subprocess.CompletedProcess: raise subprocess.CalledProcessError(process.returncode, cmd, process.stdout, process.stderr) return process + class XSamtoolsCalledProcessError(Exception): pass diff --git a/xsamtools/version.py b/xsamtools/version.py index d07e93f..ef7eb44 100644 --- a/xsamtools/version.py +++ b/xsamtools/version.py @@ -1,2 +1 @@ -__version__ = "0.6.0" - +__version__ = '0.6.0' diff --git a/xsamtools/view.py b/xsamtools/view.py new file mode 100644 index 0000000..40de9a3 --- /dev/null +++ b/xsamtools/view.py @@ -0,0 +1,26 @@ +""" +A limited wrapper around "samtools view", but with functions to operate on drs and google cloud bucket keys. +""" +import sys +import subprocess +import logging + +from xsamtools.utils import substitute_drs_and_gs_uris_for_http +from xsamtools import samtools + + +log = logging.getLogger(__name__) + + +def samtools_view(preset_args): + """ + A limited wrapper around "samtools view", but with functions to operate on drs and google cloud bucket keys. + """ + preset_args = substitute_drs_and_gs_uris_for_http(preset_args) + cmd = [samtools.paths['samtools'], 'view'] + preset_args + process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + for line in process.stdout: + sys.stdout.write(line.decode('utf-8')) + if process.returncode: + sys.stdout.write(f'Command: "{cmd}" failed with return code: {process.returncode}') + raise subprocess.CalledProcessError(process.returncode, cmd, process.stdout, process.stderr)