diff --git a/Snakefile b/Snakefile index 1ac3c23..33e77ff 100644 --- a/Snakefile +++ b/Snakefile @@ -9,11 +9,14 @@ min_version("8.0.0") configfile: "config/configfile.yaml" +include: "shared/vendored/snakemake/config.smk" +include: "workflow/snakemake_rules/config.smk" wildcard_constraints: a_or_b=r"a|b", build_name="|".join(config.get("builds_to_run", ["genome"])), resolution="|".join(config.get("resolutions_to_run", ["all-time"])), + gene="G|F", build_dir = "results" diff --git a/config/auspice_config.json b/config/auspice_config.json index b41aa67..b641b53 100644 --- a/config/auspice_config.json +++ b/config/auspice_config.json @@ -3,7 +3,7 @@ "maintainers": [ {"name": "Nextstrain team", "url": "http://nextstrain.org"}, {"name": "Richard Neher", "url": "http://nextstrain.org"}, - {"name": "Bloom lab (antibody escape annotations)", "url": "https://jbloomlab.org/"} + {"name": "Bloom lab (antibody escape annotations)", "url": "https://www.biorxiv.org/content/10.64898/2026.02.12.705519"} ], "data_provenance": [ { diff --git a/config/configfile.yaml b/config/configfile.yaml index 87271ce..8d463b0 100644 --- a/config/configfile.yaml +++ b/config/configfile.yaml @@ -51,9 +51,6 @@ filter: 3y: min_date: 3Y background_min_date: 12Y - 1y: - min_date: 1Y - background_min_date: 12Y subsample_max_sequences: genome: 3000 @@ -61,6 +58,12 @@ filter: F: 3000 F-antibody-escape: 2000 + exclude_where: + recent: ["qc.overallStatus=bad"] + background: ["qc.overallStatus=bad", "qc.overallStatus=mediocre"] + + missing_data_threshold: 1000 + files: auspice_config: "config/auspice_config.json" auspice_config_additional_colorings: "config/auspice_config_additional_colorings.json" @@ -84,6 +87,15 @@ cds: traits: columns: "country region" +frequencies: + resolutions: + all-time: + min_date: 1975-01-01 + 6y: + min_date: 6Y + 3y: + min_date: 3Y + nextclade_attributes: a: name: "RSV-A NextClade using real-time tree" diff --git a/config/description.md b/config/description.md index 00f7cf6..a0a00f0 100644 --- a/config/description.md +++ b/config/description.md @@ -8,7 +8,7 @@ The second is ['rsv/a/G'](https://nextstrain.org/rsv/a/G) and ['rsv/b/G'](https: The third is ['rsv/a/F'](https://nextstrain.org/rsv/a/F) and ['rsv/b/F'](https://nextstrain.org/rsv/b/F), which show evolution of only the F gene. -The fourth is ['rsv/a/F-antibody-escape'](https://nextstrain.org/rsv/a/F-antibody-escape) and ['rsv/b/F-antibody-escape'](https://nextstrain.org/rsv/b/F-antibody-escape), which show evolution of only the F gene but with the available sequences subsampled so as to enrich for sequences that have escape mutations to key monoclonal antibodies as assessed by the deep mutational scanning reported in [Simonich et al]() **ADD CITATION**. Note that these trees as well as the other ones can be colored by the escape from key monoclonal antibodies as computed under an additive model of the mutation effects measured in the deep mutational scanning, with annotations for either *Total Escape* (sum of effects of all mutations) and *Max Escape* (the maximum escape caused by any mutation in the strain). +The fourth is ['rsv/a/F-antibody-escape'](https://nextstrain.org/rsv/a/F-antibody-escape) and ['rsv/b/F-antibody-escape'](https://nextstrain.org/rsv/b/F-antibody-escape), which show evolution of only the F gene but with the available sequences subsampled so as to enrich for sequences that have escape mutations to key monoclonal antibodies as assessed by the deep mutational scanning reported in [Simonich et al (2026)](https://www.biorxiv.org/content/10.64898/2026.02.12.705519). Note that these trees as well as the other ones can be colored by the escape from key monoclonal antibodies as computed under an additive model of the mutation effects measured in the deep mutational scanning, with annotations for either *Total Escape* (sum of effects of all mutations) and *Max Escape* (the maximum escape caused by any mutation in the strain). #### Analysis @@ -22,7 +22,7 @@ Our bioinformatic processing workflow can be found at [github.com/nextstrain/rsv [RSV-A](https://raw.githubusercontent.com/rsv-lineages/lineage-designation-A/main/.auto-generated/lineage.tsv) [RSV-B](https://raw.githubusercontent.com/rsv-lineages/lineage-designation-A/main/.auto-generated/lineage.tsv) These clade definitions are based on the [nomenclature proposal by the RSV Genotyping Consensus Consortium](https://wwwnc.cdc.gov/eid/article/30/8/24-0209_article). -- annotation of antibody escape is done based on an additive model of mutational effects measured in the deep mutational scanning of [Simonich et al]() **ADD CITATION**, and additionally the *F-antibody-escape* builds have the sequences subsampled to enrich for those with escape mutations. +- annotation of antibody escape is done based on an additive model of mutational effects measured in the deep mutational scanning of [Simonich et al](https://www.biorxiv.org/content/10.64898/2026.02.12.705519), and additionally the *F-antibody-escape* builds have the sequences subsampled to enrich for those with escape mutations. #### Underlying sequence data diff --git a/dms-data/README.md b/dms-data/README.md index e9c1af7..f7f0462 100644 --- a/dms-data/README.md +++ b/dms-data/README.md @@ -1,5 +1,5 @@ # Deep mutational scanning data -Data from [Bloom lab](https://jbloomlab.org/) pseudovirus deep mutational scanning of RSV F. +Data from [Bloom lab](https://jbloomlab.org/) pseudovirus deep mutational scanning of RSV F, see [Simonich et al (2026)](https://www.biorxiv.org/content/10.64898/2026.02.12.705519). [all_antibodyes.csv](all_antibodies.csv) taken from [https://github.com/dms-vep/RSV_Long_F_DMS/blob/main/results/summaries/all_antibodies.csv](https://github.com/dms-vep/RSV_Long_F_DMS/blob/main/results/summaries/all_antibodies.csv) and then antibody columns renamed like *Nirsevimab-IgG escape* -> *Nirsevimab-IgG*. diff --git a/logs/traits_rsv_rsv.txt b/logs/traits_rsv_rsv.txt deleted file mode 100644 index e69de29..0000000 diff --git a/scripts/newreference.py b/scripts/newreference.py index a89c819..00355da 100644 --- a/scripts/newreference.py +++ b/scripts/newreference.py @@ -1,7 +1,6 @@ from Bio import SeqIO from Bio.SeqRecord import SeqRecord -from Bio.SeqFeature import SeqFeature, FeatureLocation, Seq -import shutil +from Bio.SeqFeature import SeqFeature, FeatureLocation import argparse import sys @@ -20,7 +19,7 @@ def new_reference(referencefile, outgenbank, outfasta, gene): # If user provides a --gene 'some name' that is not found, error out as this may indicate that # the gene name is misspelled or the user may be using the wrong GenBank file. - if(gene is not None and startofgene is None and endofgene is None): + if(startofgene is None and endofgene is None): print(f"ERROR: No '{gene}' was found under 'gene' or 'CDS' features in the GenBank file.", file=sys.stderr) sys.exit(1) @@ -36,18 +35,14 @@ def new_reference(referencefile, outgenbank, outfasta, gene): if __name__ == '__main__': parser = argparse.ArgumentParser( - description="make new reference depending on whether the entire genome or only part is to be used for the tree", + description="make new reference based on a gene", formatter_class=argparse.ArgumentDefaultsHelpFormatter ) parser.add_argument("--reference", required=True, help="GenBank file with reference sequences") - parser.add_argument("--output-fasta", required=True, help="GenBank new reference file") + parser.add_argument("--output-fasta", required=True, help="FASTA new reference file") parser.add_argument("--output-genbank", required=True, help="GenBank new reference file") - parser.add_argument("--gene", help="gene name or genome for entire genome") + parser.add_argument("--gene", required=True, help="gene name") args = parser.parse_args() - if args.gene=='genome': - shutil.copy(args.reference, args.output_genbank) - SeqIO.write(SeqIO.read(args.reference, 'genbank'), args.output_fasta, 'fasta') - else: - new_reference(args.reference, args.output_genbank, args.output_fasta, args.gene) + new_reference(args.reference, args.output_genbank, args.output_fasta, args.gene) diff --git a/shared/vendored/.github/workflows/ci.yaml b/shared/vendored/.github/workflows/ci.yaml index 94d3054..6dba82b 100644 --- a/shared/vendored/.github/workflows/ci.yaml +++ b/shared/vendored/.github/workflows/ci.yaml @@ -11,5 +11,5 @@ jobs: shellcheck: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - uses: nextstrain/.github/actions/shellcheck@master diff --git a/shared/vendored/.github/workflows/pre-commit.yaml b/shared/vendored/.github/workflows/pre-commit.yaml index bea15f6..c63b890 100644 --- a/shared/vendored/.github/workflows/pre-commit.yaml +++ b/shared/vendored/.github/workflows/pre-commit.yaml @@ -7,7 +7,7 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - uses: actions/setup-python@v6 with: python-version: "3.12" diff --git a/shared/vendored/.gitrepo b/shared/vendored/.gitrepo index c561adb..3723d3d 100644 --- a/shared/vendored/.gitrepo +++ b/shared/vendored/.gitrepo @@ -6,7 +6,7 @@ [subrepo] remote = https://github.com/nextstrain/shared branch = main - commit = bfbbb6875b084e22920712d89704ccb259f8950e - parent = 401279d49d2c703f5f79a584c4530dec736fbaad + commit = 37cf39c2a4f4c42474046e70c856acb6a2031e2d + parent = f1f17cec6fe39fa2dd06429a44bcd925a441e47f method = merge - cmdver = 0.4.6 + cmdver = 0.4.9 diff --git a/shared/vendored/snakemake/config.smk b/shared/vendored/snakemake/config.smk index 15727b0..2f0df30 100644 --- a/shared/vendored/snakemake/config.smk +++ b/shared/vendored/snakemake/config.smk @@ -11,6 +11,45 @@ from typing import Optional from textwrap import dedent, indent +# Set search paths for Augur +if "AUGUR_SEARCH_PATHS" in os.environ: + print(dedent(f"""\ + Using existing search paths in AUGUR_SEARCH_PATHS: + + {os.environ["AUGUR_SEARCH_PATHS"]!r} + """), file=sys.stderr) +else: + # Note that this differs from the search paths used in + # resolve_config_path(). + # This is the preferred default moving forwards, and the plan is to + # eventually update resolve_config_path() to use AUGUR_SEARCH_PATHS. + search_paths = [ + # User analysis directory + Path.cwd(), + + # Workflow defaults folder + Path(workflow.basedir) / "defaults", + + # Workflow root (contains Snakefile) + Path(workflow.basedir), + ] + + # This should work for majority of workflows, but we could consider doing a + # more thorough search for the nextstrain-pathogen.yaml. This would likely + # replicate how CLI searches for the root.¹ + # ¹ + repo_root = Path(workflow.basedir) / ".." + if (repo_root / "nextstrain-pathogen.yaml").is_file(): + search_paths.extend([ + # Pathogen repo root + repo_root, + ]) + + search_paths = [path.resolve() for path in search_paths if path.is_dir()] + + os.environ["AUGUR_SEARCH_PATHS"] = ":".join(map(str, search_paths)) + + class InvalidConfigError(Exception): pass @@ -147,15 +186,40 @@ def resolve_config_path(path: str, defaults_dir: Optional[str] = None) -> Callab return _resolve_config_path -def write_config(path): +def write_config(path, section=None): """ - Write Snakemake's 'config' variable to a file. + Write Snakemake's 'config' variable, or a section of it, to a file. + + *section* is an optional list of keys to navigate to a specific section of + config. If provided, only that section will be written. """ global config os.makedirs(os.path.dirname(path), exist_ok=True) + data = config + section_str = "config" + + if section: + # Navigate to the specified section + for key in section: + # Error if key doesn't exist + if key not in data: + raise Exception(f"ERROR: Key {key!r} not found in {section_str!r}.") + + data = data[key] + section_str += f".{key}" + + # Error if value is not a mapping + if not isinstance(data, dict): + raise Exception(f"ERROR: {section_str!r} is not a mapping of key/value pairs.") + with open(path, 'w') as f: - yaml.dump(config, f, sort_keys=False) + yaml.dump(data, f, sort_keys=False, Dumper=NoAliasDumper) + + print(f"Saved {section_str!r} to {path!r}.", file=sys.stderr) + - print(f"Saved current run config to {path!r}.", file=sys.stderr) +class NoAliasDumper(yaml.SafeDumper): + def ignore_aliases(self, data): + return True diff --git a/workflow/snakemake_rules/chores.smk b/workflow/snakemake_rules/chores.smk index eae7d72..27578ea 100644 --- a/workflow/snakemake_rules/chores.smk +++ b/workflow/snakemake_rules/chores.smk @@ -6,10 +6,8 @@ rule update_example_data_wildcards: The subset of data is generated by an augur filter call which: - sets the subsampling size to 50 - - applies the grouping from the config + - groups by year and country """ - message: - "Update example data" input: sequences = "results/{a_or_b}/sequences.fasta", metadata = "results/{a_or_b}/metadata.tsv", @@ -18,14 +16,13 @@ rule update_example_data_wildcards: metadata = "example_data/{a_or_b}/metadata.tsv", params: strain_id=config["strain_id_field"], - group_by=config["filter"]["group_by"], shell: """ augur filter \ --metadata {input.metadata} \ --metadata-id-columns {params.strain_id} \ --sequences {input.sequences} \ - --group-by {params.group_by} \ + --group-by year country \ --subsample-max-sequences 50 \ --subsample-seed 0 \ --output-metadata {output.metadata} \ diff --git a/workflow/snakemake_rules/clades.smk b/workflow/snakemake_rules/clades.smk index 51935df..53aee03 100644 --- a/workflow/snakemake_rules/clades.smk +++ b/workflow/snakemake_rules/clades.smk @@ -1,6 +1,7 @@ rule clades_genome: - message: - "adding clades based on the entire genome" + """ + adding clades based on the entire genome + """ input: tree = rules.refine.output.tree, aa_muts = rules.translate.output.node_data, @@ -9,20 +10,26 @@ rule clades_genome: output: node_data = build_dir + "/{a_or_b}/{build_name}/{resolution}/clades_genome.json" log: - "logs/{a_or_b}/clades_genome_{build_name}_{resolution}.txt" + "logs/clades_genome_{a_or_b}_{build_name}_{resolution}.txt" + benchmark: + "benchmarks/clades_genome_{a_or_b}_{build_name}_{resolution}.txt" shell: - """ + r""" + exec &> >(tee {log:q}) + augur clades --tree {input.tree} \ --mutations {input.nuc_muts} {input.aa_muts} \ --clades {input.clades} \ --membership-name genome_clade \ --label-name genome_clade \ - --output-node-data {output.node_data} 2>&1 | tee {log} + --output-node-data {output.node_data} """ rule clades_Goya: - message: "Adding internal clade labels" + """ + Adding internal clade labels + """ input: tree = rules.refine.output.tree, aa_muts = rules.translate.output.node_data, @@ -31,19 +38,25 @@ rule clades_Goya: output: node_data = build_dir + "/{a_or_b}/{build_name}/{resolution}/clades_G.json" log: - "logs/{a_or_b}/clades_{build_name}_{resolution}.txt" + "logs/clades_Goya_{a_or_b}_{build_name}_{resolution}.txt" + benchmark: + "benchmarks/clades_Goya_{a_or_b}_{build_name}_{resolution}.txt" shell: - """ + r""" + exec &> >(tee {log:q}) + augur clades --tree {input.tree} \ --mutations {input.nuc_muts} {input.aa_muts} \ --clades {input.clades} \ --membership-name G_clade \ --label-name G_clade \ - --output-node-data {output.node_data} 2>&1 | tee {log} + --output-node-data {output.node_data} """ rule clades_consortium: - message: "Adding internal clade labels" + """ + Adding internal clade labels + """ input: tree = rules.refine.output.tree, aa_muts = rules.translate.output.node_data, @@ -52,21 +65,31 @@ rule clades_consortium: output: node_data = build_dir + "/{a_or_b}/{build_name}/{resolution}/clades_consortium.json" log: - "logs/{a_or_b}/clades_{build_name}_{resolution}.txt" + "logs/clades_consortium_{a_or_b}_{build_name}_{resolution}.txt" + benchmark: + "benchmarks/clades_consortium_{a_or_b}_{build_name}_{resolution}.txt" shell: - """ + r""" + exec &> >(tee {log:q}) + augur clades --tree {input.tree} \ --mutations {input.nuc_muts} {input.aa_muts} \ --clades {input.clades} \ - --output-node-data {output.node_data} 2>&1 | tee {log} + --output-node-data {output.node_data} """ rule download_clades: output: clades = "results/clades_consortium_{a_or_b}.tsv" + log: + "logs/download_clades_{a_or_b}.txt" + benchmark: + "benchmarks/download_clades_{a_or_b}.txt" params: url = lambda w: f"https://raw.githubusercontent.com/rsv-lineages/lineage-designation-{w.a_or_b.upper()}/main/.auto-generated/lineages.tsv" shell: - """ + r""" + exec &> >(tee {log:q}) + curl {params.url} --output {output.clades} """ diff --git a/workflow/snakemake_rules/config.smk b/workflow/snakemake_rules/config.smk new file mode 100644 index 0000000..199b4a9 --- /dev/null +++ b/workflow/snakemake_rules/config.smk @@ -0,0 +1,8 @@ +""" +This part of the workflow deals with configuration. + +OUTPUTS: + + results/run_config.yaml +""" +write_config("results/run_config.yaml") diff --git a/workflow/snakemake_rules/core.smk b/workflow/snakemake_rules/core.smk index 7f936cf..260afd8 100644 --- a/workflow/snakemake_rules/core.smk +++ b/workflow/snakemake_rules/core.smk @@ -7,17 +7,22 @@ This part of the workflow expects input files rule index_sequences: - message: - """ - Creating an index of sequence composition for filtering. - """ + """ + Creating an index of sequence composition for filtering. + """ input: sequences="results/{a_or_b}/sequences.fasta", output: sequence_index=build_dir + "/{a_or_b}/sequence_index.tsv", + log: + "logs/index_sequences_{a_or_b}.txt" + benchmark: + "benchmarks/index_sequences_{a_or_b}.txt" shell: - """ + r""" + exec &> >(tee {log:q}) + augur index \ --sequences {input.sequences} \ --output {output.sequence_index} @@ -25,10 +30,9 @@ rule index_sequences: rule newreference: - message: - """ - Making new reference - """ + """ + Making new reference + """ input: oldreference="config/{a_or_b}reference.gbk", output: @@ -36,32 +40,38 @@ rule newreference: + "/{a_or_b}/{gene}_reference.gbk", newreferencefasta=build_dir + "/{a_or_b}/{gene}_reference.fasta", - params: - gene=lambda w: w.gene.split("-")[0], + log: + "logs/newreference_{a_or_b}_{gene}.txt" + benchmark: + "benchmarks/newreference_{a_or_b}_{gene}.txt" shell: - """ + r""" + exec &> >(tee {log:q}) + python scripts/newreference.py \ --reference {input.oldreference} \ --output-genbank {output.newreferencegbk} \ --output-fasta {output.newreferencefasta} \ - --gene {params.gene} + --gene {wildcards.gene} """ rule filter_recent: - message: - """ - filtering sequences - """ + """ + filtering sequences + """ input: sequences="results/{a_or_b}/sequences.fasta", - reference="config/{a_or_b}reference.gbk", metadata="results/{a_or_b}/metadata.tsv", sequence_index=rules.index_sequences.output, exclude=config["exclude"], output: sequences=build_dir + "/{a_or_b}/{build_name}/{resolution}/filtered_recent.fasta", + log: + "logs/filter_recent_{a_or_b}_{build_name}_{resolution}.txt" + benchmark: + "benchmarks/filter_recent_{a_or_b}_{build_name}_{resolution}.txt" params: group_by=config["filter"]["group_by"], min_coverage=lambda w: f'{w.build_name.split("-")[0]}_coverage>{config["filter"]["min_coverage"][w.build_name]}', @@ -71,41 +81,47 @@ rule filter_recent: ][w.build_name], strain_id=config["strain_id_field"], min_date=lambda w: config["filter"]["resolutions"][w.resolution]["min_date"], + exclude_where=config["filter"]["exclude_where"]["recent"], + missing_data_threshold=config["filter"]["missing_data_threshold"], shell: - """ + r""" + exec &> >(tee {log:q}) + augur filter \ --sequences {input.sequences} \ --sequence-index {input.sequence_index} \ --metadata {input.metadata} \ --metadata-id-columns {params.strain_id} \ --exclude {input.exclude} \ - --exclude-where 'qc.overallStatus=bad' \ + --exclude-where {params.exclude_where:q} \ --min-date {params.min_date} \ --min-length {params.min_length} \ --output {output.sequences} \ --group-by {params.group_by} \ --subsample-max-sequences {params.subsample_max_sequences} \ - --query '({params.min_coverage}) & missing_data<1000' + --query '({params.min_coverage}) & missing_data<{params.missing_data_threshold}' """ rule filter_background: - message: - """ - filtering sequences - """ + """ + filtering sequences + """ input: sequences="results/{a_or_b}/sequences.fasta", - reference="config/{a_or_b}reference.gbk", metadata="results/{a_or_b}/metadata.tsv", sequence_index=rules.index_sequences.output, include="config/include_{a_or_b}.txt", exclude=config["exclude"], output: sequences=build_dir - + "/{a_or_b}/{build_name}/{resolution}/filtered_background_pre.fasta", + + "/{a_or_b}/{build_name}/{resolution}/filtered_background.fasta", metadata=build_dir + "/{a_or_b}/{build_name}/{resolution}/filtered_background_metadata.tsv", + log: + "logs/filter_background_{a_or_b}_{build_name}_{resolution}.txt" + benchmark: + "benchmarks/filter_background_{a_or_b}_{build_name}_{resolution}.txt" params: group_by=config["filter"]["group_by"], min_coverage=lambda w: f'{w.build_name.split("-")[0]}_coverage>{config["filter"]["min_coverage"][w.build_name]}', @@ -119,8 +135,13 @@ rule filter_background: min_date=lambda w: config["filter"]["resolutions"][w.resolution][ "background_min_date" ], + exclude_where=config["filter"]["exclude_where"]["background"], + missing_data_threshold=config["filter"]["missing_data_threshold"], + clade_prefix=lambda w: f"{w.a_or_b.upper()}.D", shell: - """ + r""" + exec &> >(tee {log:q}) + augur filter \ --sequences {input.sequences} \ --sequence-index {input.sequence_index} \ @@ -128,7 +149,7 @@ rule filter_background: --metadata-id-columns {params.strain_id} \ --include {input.include} \ --exclude {input.exclude} \ - --exclude-where 'qc.overallStatus=bad' 'qc.overallStatus=mediocre' \ + --exclude-where {params.exclude_where:q} \ --min-date {params.min_date} \ --max-date {params.max_date} \ --min-length {params.min_length} \ @@ -136,37 +157,9 @@ rule filter_background: --output-metadata {output.metadata} \ --group-by {params.group_by} \ --subsample-max-sequences {params.subsample_max_sequences} \ - --query '({params.min_coverage}) & missing_data<1000' + --query '({params.min_coverage}) & missing_data<{params.missing_data_threshold} & clade.str.startswith("{params.clade_prefix}", na=False)' """ -rule exclude_preduplication: - message: - """ - excluding sequences predate the duplication starting with A.D or B.D as lineage names - """ - input: - sequences=rules.filter_background.output.sequences, - metadata=rules.filter_background.output.metadata, - output: - sequences=build_dir - + "/{a_or_b}/{build_name}/{resolution}/filtered_background.fasta", - run: - import pandas as pd - import Bio.SeqIO as SeqIO - - desired_prefix = wildcards.a_or_b.upper() + ".D" - - metadata_df = pd.read_csv(input.metadata, sep="\t") - ids_to_keep = set(metadata_df[ - metadata_df["clade"].str.startswith(desired_prefix, na=False) - ]["accession"].tolist()) - - with open(output.sequences, "w") as seq_out: - for seq in SeqIO.parse(input.sequences, "fasta"): - if seq.id in ids_to_keep: - SeqIO.write(seq, seq_out, "fasta") - - rule combine_samples: input: subsamples=lambda w: ( @@ -191,19 +184,28 @@ rule combine_samples: ), output: sequences=build_dir + "/{a_or_b}/{build_name}/{resolution}/filtered.fasta", + log: + "logs/combine_samples_{a_or_b}_{build_name}_{resolution}.txt" + benchmark: + "benchmarks/combine_samples_{a_or_b}_{build_name}_{resolution}.txt" shell: - """ + r""" + exec &> >(tee {log:q}) + cat {input.subsamples} | seqkit rmdup > {output} """ rule get_nextclade_dataset: - message: - """ - fetching nextclade dataset - """ + """ + fetching nextclade dataset + """ output: dataset="results/nextclade_rsv-{a_or_b}.zip", + log: + "logs/get_nextclade_dataset_{a_or_b}.txt" + benchmark: + "benchmarks/get_nextclade_dataset_{a_or_b}.txt" params: ds_name=lambda w: ( "nextstrain/rsv/a/EPI_ISL_412866" @@ -211,29 +213,36 @@ rule get_nextclade_dataset: else "nextstrain/rsv/b/EPI_ISL_1653999" ), shell: - """ + r""" + exec &> >(tee {log:q}) + nextclade3 dataset get -n {params.ds_name} --output-zip {output.dataset} """ rule filter_for_pre_subsample_alignment: - message: - """ - Do the quality filtering applied to each sequence set before subsampling - """ + """ + Do the quality filtering applied to each sequence set before subsampling + """ input: sequences="results/{a_or_b}/sequences.fasta", metadata="results/{a_or_b}/metadata.tsv", exclude=config["exclude"], output: sequences=build_dir + "/{a_or_b}/{build_name}/{resolution}/pre_subsample/filtered_for_alignment.fasta", + log: + "logs/filter_for_pre_subsample_alignment_{a_or_b}_{build_name}_{resolution}.txt" + benchmark: + "benchmarks/filter_for_pre_subsample_alignment_{a_or_b}_{build_name}_{resolution}.txt" params: min_coverage=lambda w: f'{w.build_name.split("-")[0]}_coverage>{config["filter"]["min_coverage"][w.build_name]}', min_length=lambda w: config["filter"]["min_length"][w.build_name], strain_id=config["strain_id_field"], min_date=lambda w: config["filter"]["resolutions"][w.resolution]["min_date"], shell: - """ + r""" + exec &> >(tee {log:q}) + augur filter \ --sequences {input.sequences} \ --metadata {input.metadata} \ @@ -248,10 +257,9 @@ rule filter_for_pre_subsample_alignment: rule align_pre_subsample_sequences: - message: - """ - Aligning all pre-subsampled quality-filtered sequences - """ + """ + Aligning all pre-subsampled quality-filtered sequences + """ input: sequences=rules.filter_for_pre_subsample_alignment.output.sequences, dataset=rules.get_nextclade_dataset.output.dataset, @@ -263,32 +271,43 @@ rule align_pre_subsample_sequences: genes=lambda w: config["cds"][w.build_name], threads: 8 log: - "logs/align_all_{a_or_b}_{build_name}_{resolution}.txt", + "logs/align_pre_subsample_sequences_{a_or_b}_{build_name}_{resolution}.txt" + benchmark: + "benchmarks/align_pre_subsample_sequences_{a_or_b}_{build_name}_{resolution}.txt" shell: - """ + r""" + exec &> >(tee {log:q}) + nextclade3 run -j {threads}\ {input.sequences} \ -D {input.dataset} \ --output-fasta {output.alignment} \ --cds-selection {params.genes} \ - --output-translations "{output.translations}/{{cds}}.fasta" 2>&1 | tee {log} && touch {output.translations_done} + --output-translations "{output.translations}/{{cds}}.fasta" && touch {output.translations_done} """ rule score_pre_subsample_f_proteins: - message: - "Computing F protein DMS scores for pre-subsampled sequences" + """ + Computing F protein DMS scores for pre-subsampled sequences + """ input: translations_done=rules.align_pre_subsample_sequences.output.translations_done, dms_scores=config["f_dms_data"], output: scores=build_dir + "/{a_or_b}/{build_name}/{resolution}/pre_subsample/f_protein_scores.tsv", + log: + "logs/score_pre_subsample_f_proteins_{a_or_b}_{build_name}_{resolution}.txt" + benchmark: + "benchmarks/score_pre_subsample_f_proteins_{a_or_b}_{build_name}_{resolution}.txt" params: f_sequences=lambda w: build_dir + f"/{w.a_or_b}/{w.build_name}/{w.resolution}/pre_subsample/translations/F.fasta", dms_antibodies=lambda w: " ".join(shlex.quote(ab) for ab in config["f_dms_antibodies"]), only_positive_escape=config["dms_only_positive_escape"], shell: - """ + r""" + exec &> >(tee {log:q}) + python scripts/score_f_sequences.py fasta \ --sequences {params.f_sequences} \ --dms-scores {input.dms_scores} \ @@ -299,17 +318,24 @@ rule score_pre_subsample_f_proteins: rule add_f_scores_to_pre_subsample_metadata: - message: - "Adding F protein scores to pre-subsampled metadata" + """ + Adding F protein scores to pre-subsampled metadata + """ input: original_metadata="results/{a_or_b}/metadata.tsv", f_scores=rules.score_pre_subsample_f_proteins.output.scores, output: enhanced_metadata=build_dir + "/{a_or_b}/{build_name}/{resolution}/pre_subsample/metadata_with_scores.tsv", + log: + "logs/add_f_scores_to_pre_subsample_metadata_{a_or_b}_{build_name}_{resolution}.txt" + benchmark: + "benchmarks/add_f_scores_to_pre_subsample_metadata_{a_or_b}_{build_name}_{resolution}.txt" params: strain_id=config["strain_id_field"], shell: - """ + r""" + exec &> >(tee {log:q}) + python scripts/merge_f_scores.py \ --metadata {input.original_metadata} \ --scores {input.f_scores} \ @@ -319,8 +345,9 @@ rule add_f_scores_to_pre_subsample_metadata: rule enrich_antibody_escape: - message: - "Get sequences with high antibody escape to add to tree via custom filtering rule." + """ + Get sequences with high antibody escape to add to tree via custom filtering rule. + """ wildcard_constraints: antibody="|".join(re.escape(antibody) for antibody in config["f_dms_antibodies"]), scoretype="total_escape|max_escape", @@ -329,6 +356,10 @@ rule enrich_antibody_escape: metadata=rules.add_f_scores_to_pre_subsample_metadata.output.enhanced_metadata, output: sequences=build_dir + "/{a_or_b}/{build_name}/{resolution}/filtered_{antibody}_{scoretype}.fasta" + log: + "logs/enrich_antibody_escape_{a_or_b}_{build_name}_{resolution}_{antibody}_{scoretype}.txt" + benchmark: + "benchmarks/enrich_antibody_escape_{a_or_b}_{build_name}_{resolution}_{antibody}_{scoretype}.txt" params: strain_id=config["strain_id_field"], escape_col=lambda w: f"{w.antibody}_{w.scoretype}", @@ -337,7 +368,9 @@ rule enrich_antibody_escape: max_identical_f_prot_muts=lambda w: config["enrich_antibody_escape"][w.build_name]["max_identical_f_prot_muts"], max_identical_max_escape_mut=lambda w: config["enrich_antibody_escape"][w.build_name]["max_identical_max_escape_mut"], shell: - """ + r""" + exec &> >(tee {log:q}) + python scripts/enrich_antibody_escape.py \ --input-sequences {input.sequences} \ --metadata {input.metadata} \ @@ -352,10 +385,9 @@ rule enrich_antibody_escape: rule genome_align: - message: - """ - Aligning sequences to the reference - """ + """ + Aligning sequences to the reference + """ input: sequences=rules.combine_samples.output.sequences, dataset=rules.get_nextclade_dataset.output.dataset, @@ -366,15 +398,19 @@ rule genome_align: genes=lambda w: config["cds"][w.build_name], threads: 4 log: - "logs/align_{a_or_b}_{build_name}_{resolution}.txt", + "logs/genome_align_{a_or_b}_{build_name}_{resolution}.txt" + benchmark: + "benchmarks/genome_align_{a_or_b}_{build_name}_{resolution}.txt" shell: - """ + r""" + exec &> >(tee {log:q}) + nextclade3 run -j {threads}\ {input.sequences} \ -D {input.dataset} \ --output-fasta {output.alignment} \ --cds-selection {params.genes} \ - --output-translations "{output.translations}/{{cds}}.fasta" 2>&1 | tee {log} \ + --output-translations "{output.translations}/{{cds}}.fasta" """ @@ -386,8 +422,14 @@ rule cut: output: slicedalignment=build_dir + "/{a_or_b}/{build_name}/{resolution}/{gene}_slicedalignment.fasta", + log: + "logs/cut_{a_or_b}_{build_name}_{resolution}_{gene}.txt" + benchmark: + "benchmarks/cut_{a_or_b}_{build_name}_{resolution}_{gene}.txt" shell: - """ + r""" + exec &> >(tee {log:q}) + python scripts/cut.py \ --oldalignment {input.oldalignment} \ --slicedalignment {output.slicedalignment} \ @@ -404,9 +446,15 @@ rule realign: + "/{a_or_b}/{gene}_reference.fasta", output: realigned=build_dir + "/{a_or_b}/{build_name}/{resolution}/{gene}_aligned.fasta", + log: + "logs/realign_{a_or_b}_{build_name}_{resolution}_{gene}.txt" + benchmark: + "benchmarks/realign_{a_or_b}_{build_name}_{resolution}_{gene}.txt" threads: 4 shell: - """ + r""" + exec &> >(tee {log:q}) + augur align --nthreads {threads} \ --sequences {input.slicedalignment} \ --reference-sequence {input.reference} \ @@ -422,8 +470,14 @@ rule hybrid_align: output: hybrid_alignment=build_dir + "/{a_or_b}/{build_name}/{resolution}/hybrid_alignment.fasta", + log: + "logs/hybrid_align_{a_or_b}_{build_name}_{resolution}.txt" + benchmark: + "benchmarks/hybrid_align_{a_or_b}_{build_name}_{resolution}.txt" shell: - """ + r""" + exec &> >(tee {log:q}) + python scripts/align_for_tree.py \ --realign {input.G_alignment} \ --original {input.original} \ @@ -444,16 +498,31 @@ def get_alignment(w): ) +def get_reference(w): + if w.build_name == "genome": + return f"config/{w.a_or_b}reference.gbk" + else: + gene = config["cds"][w.build_name] + return build_dir + f"/{w.a_or_b}/{gene}_reference.gbk" + + rule tree: - message: - "Building tree" + """ + Building tree + """ input: alignment=get_alignment, output: tree=build_dir + "/{a_or_b}/{build_name}/{resolution}/tree_raw.nwk", + log: + "logs/tree_{a_or_b}_{build_name}_{resolution}.txt" + benchmark: + "benchmarks/tree_{a_or_b}_{build_name}_{resolution}.txt" threads: 4 shell: - """ + r""" + exec &> >(tee {log:q}) + augur tree \ --alignment {input.alignment} \ --output {output.tree} \ @@ -463,13 +532,12 @@ rule tree: rule refine: - message: - """ - Refining tree - - estimate timetree - - use {params.coalescent} coalescent timescale - - estimate {params.date_inference} node dates - """ + """ + Refining tree + - estimate timetree + - use {params.coalescent} coalescent timescale + - estimate {params.date_inference} node dates + """ input: tree=rules.tree.output.tree, alignment=get_alignment, @@ -477,13 +545,19 @@ rule refine: output: tree=build_dir + "/{a_or_b}/{build_name}/{resolution}/tree.nwk", node_data=build_dir + "/{a_or_b}/{build_name}/{resolution}/branch_lengths.json", + log: + "logs/refine_{a_or_b}_{build_name}_{resolution}.txt" + benchmark: + "benchmarks/refine_{a_or_b}_{build_name}_{resolution}.txt" params: coalescent=config["refine"]["coalescent"], clock_filter_iqd=config["refine"]["clock_filter_iqd"], date_inference=config["refine"]["date_inference"], strain_id=config["strain_id_field"], shell: - """ + r""" + exec &> >(tee {log:q}) + augur refine \ --tree {input.tree} \ --alignment {input.alignment} \ @@ -547,12 +621,16 @@ rule distances: comparisons=_get_distance_comparisons_by_lineage_and_segment, attribute_names=_get_distance_attributes_by_lineage_and_segment, log: - "logs/distances_{a_or_b}_{build_name}_{resolution}.txt", + "logs/distances_{a_or_b}_{build_name}_{resolution}.txt" + benchmark: + "benchmarks/distances_{a_or_b}_{build_name}_{resolution}.txt" resources: mem_mb=8000, time="00:30:00", shell: - """ + r""" + exec &> >(tee {log:q}) + augur distance \ --alignment {params.alignments} \ --tree {input.tree} \ @@ -560,21 +638,20 @@ rule distances: --compare-to {params.comparisons} \ --attribute-name {params.attribute_names} \ --map {input.distance_maps} \ - --output {output.distances} 2>&1 | tee {log} + --output {output.distances} """ rule ancestral: - message: - """ - Reconstructing ancestral sequences and mutations - - inferring ambiguous mutations - """ + """ + Reconstructing ancestral sequences and mutations + - inferring ambiguous mutations + """ input: tree=rules.refine.output.tree, alignment=get_alignment, translations=rules.genome_align.output.translations, - root_sequence=build_dir + "/{a_or_b}/{build_name}_reference.gbk", + root_sequence=get_reference, output: node_data=build_dir + "/{a_or_b}/{build_name}/{resolution}/nt_muts.json", translations_done=build_dir + "/{a_or_b}/{build_name}/{resolution}/translations.done", @@ -584,9 +661,13 @@ rule ancestral: output_translations=lambda w: build_dir + f"/{w.a_or_b}/{w.build_name}/{w.resolution}/translations/%GENE_withInternalNodes.fasta", input_translations=lambda w: build_dir + f"/{w.a_or_b}/{w.build_name}/{w.resolution}/translations/%GENE.fasta", log: - "logs/ancestral_{a_or_b}_{build_name}_{resolution}.txt", + "logs/ancestral_{a_or_b}_{build_name}_{resolution}.txt" + benchmark: + "benchmarks/ancestral_{a_or_b}_{build_name}_{resolution}.txt" shell: - """ + r""" + exec &> >(tee {log:q}) + augur ancestral \ --tree {input.tree} \ --alignment {input.alignment} \ @@ -596,23 +677,30 @@ rule ancestral: --genes {params.genes} \ --translations "{params.input_translations}" \ --output-translations "{params.output_translations}" \ - --inference {params.inference} 2>&1 | tee {log} && touch {output.translations_done} + --inference {params.inference} && touch {output.translations_done} """ rule translate: - message: - "Translating amino acid sequences" + """ + Translating amino acid sequences + """ input: tree=rules.refine.output.tree, node_data=rules.ancestral.output.node_data, - reference=build_dir + "/{a_or_b}/{build_name}_reference.gbk", + reference=get_reference, output: node_data=build_dir + "/{a_or_b}/{build_name}/{resolution}/aa_muts.json", + log: + "logs/translate_{a_or_b}_{build_name}_{resolution}.txt" + benchmark: + "benchmarks/translate_{a_or_b}_{build_name}_{resolution}.txt" params: alignment_file_mask=build_dir + "/{a_or_b}/{build_name}/{resolution}/aligned_%GENE.fasta", shell: - """ + r""" + exec &> >(tee {log:q}) + augur translate \ --tree {input.tree} \ --ancestral-sequences {input.node_data} \ @@ -623,10 +711,9 @@ rule translate: rule compute_f_scores_node_data: - message: - """ - Computing F protein antibody escape scores for all tree nodes - """ + """ + Computing F protein antibody escape scores for all tree nodes + """ input: tree_newick=rules.refine.output.tree, aa_muts=rules.translate.output.node_data, @@ -638,9 +725,13 @@ rule compute_f_scores_node_data: f_antibodies=lambda w: " ".join(shlex.quote(ab) for ab in config["f_dms_antibodies"]), only_positive_escape=config["dms_only_positive_escape"], log: - "logs/compute_f_scores_node_data_{a_or_b}_{build_name}_{resolution}.txt", + "logs/compute_f_scores_node_data_{a_or_b}_{build_name}_{resolution}.txt" + benchmark: + "benchmarks/compute_f_scores_node_data_{a_or_b}_{build_name}_{resolution}.txt" shell: - """ + r""" + exec &> >(tee {log:q}) + python scripts/score_f_sequences.py tree \ --tree-newick {input.tree_newick} \ --aa-muts {input.aa_muts} \ @@ -648,7 +739,7 @@ rule compute_f_scores_node_data: --dms-scores {input.f_scores} \ --dms-antibodies {params.f_antibodies} \ --only-positive-escape {params.only_positive_escape} \ - --output {output.f_scores_node_data} &> {log} + --output {output.f_scores_node_data} """ @@ -659,12 +750,16 @@ rule traits: output: node_data=build_dir + "/{a_or_b}/{build_name}/{resolution}/traits.json", log: - "logs/{a_or_b}/traits_{build_name}_{resolution}_rsv.txt", + "logs/traits_{a_or_b}_{build_name}_{resolution}.txt" + benchmark: + "benchmarks/traits_{a_or_b}_{build_name}_{resolution}.txt" params: columns=config["traits"]["columns"], strain_id=config["strain_id_field"], shell: - """ + r""" + exec &> >(tee {log:q}) + augur traits \ --tree {input.tree} \ --metadata {input.metadata} \ @@ -680,10 +775,16 @@ rule frequencies: metadata = "results/{a_or_b}/metadata.tsv" output: frequencies = build_dir + "/{a_or_b}/{build_name}/{resolution}/frequencies.json" + log: + "logs/frequencies_{a_or_b}_{build_name}_{resolution}.txt" + benchmark: + "benchmarks/frequencies_{a_or_b}_{build_name}_{resolution}.txt" params: - min_date_arg = lambda w: f"--min-date {config['filter']['resolutions'][w.resolution]['min_date']}", + min_date_arg = lambda w: f"--min-date {config['frequencies']['resolutions'][w.resolution]['min_date']}", shell: - """ + r""" + exec &> >(tee {log:q}) + augur frequencies \ --tree {input.tree} \ --method kde \ diff --git a/workflow/snakemake_rules/export.smk b/workflow/snakemake_rules/export.smk index 660e980..38d3b8b 100644 --- a/workflow/snakemake_rules/export.smk +++ b/workflow/snakemake_rules/export.smk @@ -14,11 +14,17 @@ def get_node_data(w): return node_data rule generate_f_dms_antibody_auspice_config: - message: "Generating auspice config for F DMS antibody colorings" + """ + Generating auspice config for F DMS antibody colorings + """ input: f_scores_node_data = rules.compute_f_scores_node_data.output.f_scores_node_data output: auspice_config = "results/{a_or_b}/{build_name}/{resolution}/auspice_config_f_dms_antibodies.json" + log: + "logs/generate_f_dms_antibody_auspice_config_{a_or_b}_{build_name}_{resolution}.txt" + benchmark: + "benchmarks/generate_f_dms_antibody_auspice_config_{a_or_b}_{build_name}_{resolution}.txt" params: antibodies = config["f_dms_antibodies"], continuous_scale = " ".join( @@ -27,7 +33,9 @@ rule generate_f_dms_antibody_auspice_config: ["#440154", "#472d7b", "#3b528b", "#2c728e", "#21918c", "#28ae80", "#5ec962", "#addc30", "#fde725"] ) shell: - """ + r""" + exec &> >(tee {log:q}) + python scripts/generate_f_dms_antibody_auspice_config.py \ --antibodies {params.antibodies} \ --continuous-scale {params.continuous_scale} \ @@ -42,8 +50,14 @@ rule colors: metadata = "results/{a_or_b}/metadata.tsv", output: colors = "results/{a_or_b}/colors.tsv" + log: + "logs/colors_{a_or_b}.txt" + benchmark: + "benchmarks/colors_{a_or_b}.txt" shell: - """ + r""" + exec &> >(tee {log:q}) + python scripts/assign-colors.py \ --color-schemes {input.color_schemes} \ --ordering {input.color_orderings} \ @@ -72,7 +86,9 @@ def auspice_configs(wildcards): return configs rule export: - message: "Exporting data files for auspice" + """ + Exporting data files for auspice + """ input: tree = rules.refine.output.tree, metadata = "results/{a_or_b}/metadata.tsv", @@ -82,11 +98,17 @@ rule export: description = config["description"] output: auspice_json = build_dir + "/{a_or_b}/{build_name}/{resolution}/tree.json" + log: + "logs/export_{a_or_b}_{build_name}_{resolution}.txt" + benchmark: + "benchmarks/export_{a_or_b}_{build_name}_{resolution}.txt" params: title = lambda w: f"RSV-{w.a_or_b.upper()} phylogeny", strain_id=config["strain_id_field"], shell: - """ + r""" + exec &> >(tee {log:q}) + augur export v2 \ --tree {input.tree} \ --metadata {input.metadata} \ @@ -110,11 +132,17 @@ rule final_strain_name: output: auspice_json=build_dir + "/{a_or_b}/{build_name}/{resolution}/tree_renamed.json", freq_json= "auspice/rsv_{a_or_b}_{build_name}_{resolution}_tip-frequencies.json" + log: + "logs/final_strain_name_{a_or_b}_{build_name}_{resolution}.txt" + benchmark: + "benchmarks/final_strain_name_{a_or_b}_{build_name}_{resolution}.txt" params: strain_id=config["strain_id_field"], display_strain_field=config["display_strain_field"], shell: - """ + r""" + exec &> >(tee {log:q}) + python3 scripts/set_final_strain_name.py --metadata {input.metadata} \ --metadata-id-columns {params.strain_id} \ --input-auspice-json {input.auspice_json} \ @@ -131,12 +159,18 @@ rule rename_and_ready_for_nextclade: pathogen_json= "nextclade/config/pathogen.json", output: auspice_json= "auspice/rsv_{a_or_b}_{build_name}_{resolution}.json", + log: + "logs/rename_and_ready_for_nextclade_{a_or_b}_{build_name}_{resolution}.txt" + benchmark: + "benchmarks/rename_and_ready_for_nextclade_{a_or_b}_{build_name}_{resolution}.txt" params: accession= lambda w: config["nextclade_attributes"][w.a_or_b]["accession"], name= lambda w: config["nextclade_attributes"][w.a_or_b]["name"], ref_name= lambda w: config["nextclade_attributes"][w.a_or_b]["reference_name"] shell: - """ + r""" + exec &> >(tee {log:q}) + python3 scripts/rename_and_nextclade.py \ --input-auspice-json {input.auspice_json} \ --pathogen-json {input.pathogen_json} \ diff --git a/workflow/snakemake_rules/glycosylation.smk b/workflow/snakemake_rules/glycosylation.smk index 599e20a..f7138a2 100644 --- a/workflow/snakemake_rules/glycosylation.smk +++ b/workflow/snakemake_rules/glycosylation.smk @@ -4,10 +4,16 @@ rule glycosylation: translations = rules.translate.output.node_data output: glycosylations = build_dir + "/{a_or_b}/{build_name}/{resolution}/glyc.json" + log: + "logs/glycosylation_{a_or_b}_{build_name}_{resolution}.txt" + benchmark: + "benchmarks/glycosylation_{a_or_b}_{build_name}_{resolution}.txt" params: aa_data = build_dir + "/{a_or_b}/{build_name}/{resolution}/aligned_{build_name}.fasta" shell: - """ + r""" + exec &> >(tee {log:q}) + python scripts/glycosylation.py \ --alignment {params.aa_data} \ --tree {input.tree} \ diff --git a/workflow/snakemake_rules/merge_inputs.smk b/workflow/snakemake_rules/merge_inputs.smk index fb335af..7d66df0 100644 --- a/workflow/snakemake_rules/merge_inputs.smk +++ b/workflow/snakemake_rules/merge_inputs.smk @@ -70,9 +70,9 @@ if len(_input_metadata) == 1: output: metadata = "results/{a_or_b}/metadata.tsv", log: - "logs/{a_or_b}/decompress_metadata.txt", + "logs/decompress_metadata_{a_or_b}.txt" benchmark: - "benchmarks/{a_or_b}/decompress_metadata.txt", + "benchmarks/decompress_metadata_{a_or_b}.txt" shell: r""" exec &> >(tee {log:q}) @@ -95,9 +95,9 @@ else: output: metadata = "results/{a_or_b}/metadata.tsv" log: - "logs/{a_or_b}/merge_metadata.txt", + "logs/merge_metadata_{a_or_b}.txt" benchmark: - "benchmarks/{a_or_b}/merge_metadata.txt" + "benchmarks/merge_metadata_{a_or_b}.txt" shell: r""" exec &> >(tee {log:q}) @@ -122,9 +122,9 @@ if len(_input_sequences) == 1: output: sequences = "results/{a_or_b}/sequences.fasta", log: - "logs/{a_or_b}/decompress_sequences.txt", + "logs/decompress_sequences_{a_or_b}.txt" benchmark: - "benchmarks/{a_or_b}/decompress_sequences.txt", + "benchmarks/decompress_sequences_{a_or_b}.txt" shell: r""" exec &> >(tee {log:q}) @@ -144,9 +144,9 @@ else: output: sequences = "results/{a_or_b}/sequences.fasta", log: - "logs/{a_or_b}/merge_sequences.txt", + "logs/merge_sequences_{a_or_b}.txt" benchmark: - "benchmarks/{a_or_b}/merge_sequences.txt" + "benchmarks/merge_sequences_{a_or_b}.txt" shell: r""" exec &> >(tee {log:q})