Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@ def run(self):
if not self.dry_run:
try:
for tool in ['samtools', 'htslib', 'bcftools']:
if not os.path.exists(f'build/{tool}'):
if not os.path.exists(f'{tool}.tar.bz2'):
_run(['wget',
f'https://github.com/samtools/{tool}/releases/download/{SAMTOOLS_VERSION}/{tool}-{SAMTOOLS_VERSION}.tar.bz2',
'-O', f'{tool}.tar.bz2'])
os.makedirs(f'build/{tool}', exist_ok=True)
_run(["tar", "-vxjf", f"{tool}.tar.bz2", f"--directory=build/{tool}", "--strip-components=1"])
# if not os.path.exists(f'build/{tool}'):
# if not os.path.exists(f'{tool}.tar.bz2'):
_run(['wget',
f'https://github.com/samtools/{tool}/releases/download/{SAMTOOLS_VERSION}/{tool}-{SAMTOOLS_VERSION}.tar.bz2',
'-O', f'{tool}.tar.bz2'])
os.makedirs(f'build/{tool}', exist_ok=True)
_run(["tar", "-vxjf", f"{tool}.tar.bz2", f"--directory=build/{tool}", "--strip-components=1"])

for tool in ['samtools', 'htslib', 'bcftools']:
_run(["./configure"], cwd=f"build/{tool}")
Expand Down
36 changes: 26 additions & 10 deletions xsamtools/cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from textwrap import dedent

from xsamtools import samtools
from xsamtools.cli.view import samtools_view
from xsamtools.cli.cram import view
from xsamtools.cli.vcf import merge, subsample, stats

Expand All @@ -11,25 +12,30 @@ def add_cram_subparser(subparsers):
cram_parser = subparsers.add_parser('cram')
# The api here is somewhat redundant, cram only has a single sub-command/parser
cram_subparsers = cram_parser.add_subparsers()
view_parser = cram_subparsers.add_parser('view', description='A limited wrapper around "samtools view", but with '
'functions to operate on google cloud bucket keys.')
view_parser.add_argument("--cram", type=str, required=True,
help="Input cram file. This can be a Google Storage object if prefixed with 'gs://'.")
view_parser.add_argument("--crai", type=str, required=False,
help="Input crai file. This can be a Google Storage file (e.g. gs://bucket/key) or a "
cram_view_parser = cram_subparsers.add_parser('view',
description='A limited wrapper around "samtools view", but with '
'functions to operate on google cloud bucket keys.')
cram_view_parser.add_argument("--cram",
type=str,
required=True,
help="Input cram file. This can be a Google Storage object if prefixed with 'gs://'.")
cram_view_parser.add_argument("--crai",
type=str,
required=False,
help="Input crai file. This can be a Google Storage file (e.g. gs://bucket/key) or a "
"local file. If not specified, one will be generated for you (this may take a long "
"time).")
# TODO: add an argument to intake a BED file.
view_parser.add_argument("--regions", type=str, required=False, default=None,
cram_view_parser.add_argument("--regions", type=str, required=False, default=None,
help="A comma-delimited list of regions of sequence in the input cram file to subset as "
"the output CRAM. For example, something like: 'ch1,ch2' or "
"'chromsome_1:10000,chromosome2'.")
view_parser.add_argument("-C", action='store_true', required=False,
cram_view_parser.add_argument("-C", action='store_true', required=False,
help="Write the output file in CRAM format.")
# TODO: Allow this to be a google key.
view_parser.add_argument("--output", type=str, required=False, default=None,
cram_view_parser.add_argument("--output", type=str, required=False, default=None,
help="A local output file path for the generated cram file.")
view_parser.set_defaults(func=view)
cram_view_parser.set_defaults(func=view)


def merge_options():
Expand All @@ -40,6 +46,15 @@ def merge_options():
return "bcftools merge arguments:" + options


def add_view_subparser(subparsers):
view_parser = subparsers.add_parser(
'view',
description='A wrapper around samtools that accepts gs:// and drs:// input file paths.',
formatter_class=argparse.RawDescriptionHelpFormatter
)
view_parser.set_defaults(func=samtools_view)


def add_vcf_subparser(subparsers):
vcf_parser = subparsers.add_parser('vcf')
vcf_subparsers = vcf_parser.add_subparsers()
Expand Down Expand Up @@ -84,5 +99,6 @@ def main(args):
subparsers = parser.add_subparsers()
add_cram_subparser(subparsers)
add_vcf_subparser(subparsers)
add_view_subparser(subparsers)
args, extra_args = parser.parse_known_args(args)
args.func(args, extra_args)
14 changes: 14 additions & 0 deletions xsamtools/cli/view.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"""
CRAM file utilities.
"""
import argparse
from typing import Sequence

from xsamtools import view


def samtools_view(args: argparse.Namespace, extra_args: Sequence[str]):
"""
A limited wrapper around "samtools view", but with functions to operate on drs and google cloud bucket keys.
"""
view.samtools_view(extra_args)
Empty file modified xsamtools/cram.py
100755 → 100644
Empty file.
5 changes: 2 additions & 3 deletions xsamtools/samtools.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import os
import sys
import typing
import warnings
import subprocess
from typing import Optional

import xsamtools

Expand All @@ -26,11 +24,12 @@ def _samtools_binary_path(name):
htsfile=os.path.join(xsamtools.__path__[0], "..", "build", "htslib", "htsfile"),
samtools=os.path.join(xsamtools.__path__[0], "..", "build", "samtools", "samtools"))
path = paths[name]
print(path)
try:
_run([path, "--version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
return path
except (FileNotFoundError, subprocess.CalledProcessError):
pass
raise
return None

for name in paths:
Expand Down
24 changes: 24 additions & 0 deletions xsamtools/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,31 @@
import subprocess
import terra_notebook_utils as tnu

from typing import Any


def substitute_drs_and_gs_uris_for_http(args):
new_args = []
for arg in args:
if arg.startswith('-') and '=' in arg:
key_arg, value_arg = arg.split('=', 1)
else:
key_arg, value_arg = None, arg

if value_arg.strip('"').strip("'").startswith('drs://'):
value_arg = tnu.drs.access(arg)
elif value_arg.strip('"').strip("'").startswith('gs://'):
value_arg = tnu.gs.get_signed_url(arg)

if key_arg:
arg = f'{key_arg}={value_arg}'
else:
arg = value_arg

new_args.append(arg)
return new_args


def run(cmd: Any, check: bool = True, **kwargs) -> subprocess.CompletedProcess:
"""
Subprocess.run() that will default to printing stderr when raising on a non-zero error code.
Expand All @@ -20,5 +43,6 @@ def run(cmd: Any, check: bool = True, **kwargs) -> subprocess.CompletedProcess:
raise subprocess.CalledProcessError(process.returncode, cmd, process.stdout, process.stderr)
return process


class XSamtoolsCalledProcessError(Exception):
pass
3 changes: 1 addition & 2 deletions xsamtools/version.py
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
__version__ = "0.6.0"

__version__ = '0.6.0'
26 changes: 26 additions & 0 deletions xsamtools/view.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""
A limited wrapper around "samtools view", but with functions to operate on drs and google cloud bucket keys.
"""
import sys
import subprocess
import logging

from xsamtools.utils import substitute_drs_and_gs_uris_for_http
from xsamtools import samtools


log = logging.getLogger(__name__)


def samtools_view(preset_args):
"""
A limited wrapper around "samtools view", but with functions to operate on drs and google cloud bucket keys.
"""
preset_args = substitute_drs_and_gs_uris_for_http(preset_args)
cmd = [samtools.paths['samtools'], 'view'] + preset_args
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
for line in process.stdout:
sys.stdout.write(line.decode('utf-8'))
if process.returncode:
sys.stdout.write(f'Command: "{cmd}" failed with return code: {process.returncode}')
raise subprocess.CalledProcessError(process.returncode, cmd, process.stdout, process.stderr)