NW-PaGe · DOH-LAF2303 · Apr 9, 2026 · Feb 13, 2026 · Feb 28, 2026 · Mar 1, 2026
diff --git a/Snakefile b/Snakefile
@@ -9,11 +9,14 @@ min_version("8.0.0")
 
 configfile: "config/configfile.yaml"
 
+include: "shared/vendored/snakemake/config.smk"
+include: "workflow/snakemake_rules/config.smk"
 
 wildcard_constraints:
     a_or_b=r"a|b",
     build_name="|".join(config.get("builds_to_run", ["genome"])),
     resolution="|".join(config.get("resolutions_to_run", ["all-time"])),
+    gene="G|F",
 
 
 build_dir = "results"

diff --git a/config/auspice_config.json b/config/auspice_config.json
@@ -3,7 +3,7 @@
   "maintainers": [
     {"name": "Nextstrain team", "url": "http://nextstrain.org"},
     {"name": "Richard Neher", "url": "http://nextstrain.org"},
-    {"name": "Bloom lab (antibody escape annotations)", "url": "https://jbloomlab.org/"}
+    {"name": "Bloom lab (antibody escape annotations)", "url": "https://www.biorxiv.org/content/10.64898/2026.02.12.705519"}
   ],
   "data_provenance": [
     {

diff --git a/config/configfile.yaml b/config/configfile.yaml
@@ -51,16 +51,19 @@ filter:
     3y:
       min_date: 3Y
       background_min_date: 12Y
-    1y:
-      min_date: 1Y
-      background_min_date: 12Y
 
   subsample_max_sequences:
     genome: 3000
     G: 3000
     F: 3000
     F-antibody-escape: 2000
 
+  exclude_where:
+    recent: ["qc.overallStatus=bad"]
+    background: ["qc.overallStatus=bad", "qc.overallStatus=mediocre"]
+
+  missing_data_threshold: 1000
+
 files:
   auspice_config: "config/auspice_config.json"
   auspice_config_additional_colorings: "config/auspice_config_additional_colorings.json"
@@ -84,6 +87,15 @@ cds:
 traits:
   columns: "country region"
 
+frequencies:
+  resolutions:
+    all-time:
+      min_date: 1975-01-01
+    6y:
+      min_date: 6Y
+    3y:
+      min_date: 3Y
+
 nextclade_attributes:
   a:
     name: "RSV-A NextClade using real-time tree"

diff --git a/config/description.md b/config/description.md
@@ -8,7 +8,7 @@ The second is ['rsv/a/G'](https://nextstrain.org/rsv/a/G) and ['rsv/b/G'](https:
 
 The third is ['rsv/a/F'](https://nextstrain.org/rsv/a/F) and ['rsv/b/F'](https://nextstrain.org/rsv/b/F), which show evolution of only the F gene.
 
-The fourth is ['rsv/a/F-antibody-escape'](https://nextstrain.org/rsv/a/F-antibody-escape) and ['rsv/b/F-antibody-escape'](https://nextstrain.org/rsv/b/F-antibody-escape), which show evolution of only the F gene but with the available sequences subsampled so as to enrich for sequences that have escape mutations to key monoclonal antibodies as assessed by the deep mutational scanning reported in [Simonich et al]() **ADD CITATION**. Note that these trees as well as the other ones can be colored by the escape from key monoclonal antibodies as computed under an additive model of the mutation effects measured in the deep mutational scanning, with annotations for either *Total Escape* (sum of effects of all mutations) and *Max Escape* (the maximum escape caused by any mutation in the strain).
+The fourth is ['rsv/a/F-antibody-escape'](https://nextstrain.org/rsv/a/F-antibody-escape) and ['rsv/b/F-antibody-escape'](https://nextstrain.org/rsv/b/F-antibody-escape), which show evolution of only the F gene but with the available sequences subsampled so as to enrich for sequences that have escape mutations to key monoclonal antibodies as assessed by the deep mutational scanning reported in [Simonich et al (2026)](https://www.biorxiv.org/content/10.64898/2026.02.12.705519). Note that these trees as well as the other ones can be colored by the escape from key monoclonal antibodies as computed under an additive model of the mutation effects measured in the deep mutational scanning, with annotations for either *Total Escape* (sum of effects of all mutations) and *Max Escape* (the maximum escape caused by any mutation in the strain).
 
 #### Analysis
 
@@ -22,7 +22,7 @@ Our bioinformatic processing workflow can be found at [github.com/nextstrain/rsv
   [RSV-A](https://raw.githubusercontent.com/rsv-lineages/lineage-designation-A/main/.auto-generated/lineage.tsv)
   [RSV-B](https://raw.githubusercontent.com/rsv-lineages/lineage-designation-A/main/.auto-generated/lineage.tsv)
   These clade definitions are based on the [nomenclature proposal by the RSV Genotyping Consensus Consortium](https://wwwnc.cdc.gov/eid/article/30/8/24-0209_article).
-- annotation of antibody escape is done based on an additive model of mutational effects measured in the deep mutational scanning of [Simonich et al]() **ADD CITATION**, and additionally the *F-antibody-escape* builds have the sequences subsampled to enrich for those with escape mutations.
+- annotation of antibody escape is done based on an additive model of mutational effects measured in the deep mutational scanning of [Simonich et al](https://www.biorxiv.org/content/10.64898/2026.02.12.705519), and additionally the *F-antibody-escape* builds have the sequences subsampled to enrich for those with escape mutations.
 
 #### Underlying sequence data
 

diff --git a/dms-data/README.md b/dms-data/README.md
@@ -1,5 +1,5 @@
 # Deep mutational scanning data
 
-Data from [Bloom lab](https://jbloomlab.org/) pseudovirus deep mutational scanning of RSV F.
+Data from [Bloom lab](https://jbloomlab.org/) pseudovirus deep mutational scanning of RSV F, see [Simonich et al (2026)](https://www.biorxiv.org/content/10.64898/2026.02.12.705519).
 
 [all_antibodyes.csv](all_antibodies.csv) taken from [https://github.com/dms-vep/RSV_Long_F_DMS/blob/main/results/summaries/all_antibodies.csv](https://github.com/dms-vep/RSV_Long_F_DMS/blob/main/results/summaries/all_antibodies.csv) and then antibody columns renamed like *Nirsevimab-IgG escape* -> *Nirsevimab-IgG*.
diff --git a/logs/traits_rsv_rsv.txt b/logs/traits_rsv_rsv.txt
diff --git a/scripts/newreference.py b/scripts/newreference.py
@@ -1,7 +1,6 @@
 from Bio import SeqIO
 from Bio.SeqRecord import SeqRecord
-from Bio.SeqFeature import SeqFeature, FeatureLocation, Seq
-import shutil
+from Bio.SeqFeature import SeqFeature, FeatureLocation
 import argparse
 import sys
 
@@ -20,7 +19,7 @@ def new_reference(referencefile, outgenbank, outfasta, gene):
 
     # If user provides a --gene 'some name' that is not found, error out as this may indicate that
     # the gene name is misspelled or the user may be using the wrong GenBank file.
-    if(gene is not None and startofgene is None and endofgene is None):
+    if(startofgene is None and endofgene is None):
         print(f"ERROR: No '{gene}' was found under 'gene' or 'CDS' features in the GenBank file.", file=sys.stderr)
         sys.exit(1)
 
@@ -36,18 +35,14 @@ def new_reference(referencefile, outgenbank, outfasta, gene):
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(
-        description="make new reference depending on whether the entire genome or only part is to be used for the tree",
+        description="make new reference based on a gene",
         formatter_class=argparse.ArgumentDefaultsHelpFormatter
     )
     parser.add_argument("--reference", required=True, help="GenBank file with reference sequences")
-    parser.add_argument("--output-fasta", required=True, help="GenBank new reference file")
+    parser.add_argument("--output-fasta", required=True, help="FASTA new reference file")
     parser.add_argument("--output-genbank", required=True, help="GenBank new reference file")
-    parser.add_argument("--gene", help="gene name or genome for entire genome")
+    parser.add_argument("--gene", required=True, help="gene name")
     args = parser.parse_args()
 
-    if args.gene=='genome':
-        shutil.copy(args.reference, args.output_genbank)
-        SeqIO.write(SeqIO.read(args.reference, 'genbank'), args.output_fasta, 'fasta')
-    else:
-        new_reference(args.reference, args.output_genbank, args.output_fasta, args.gene)
+    new_reference(args.reference, args.output_genbank, args.output_fasta, args.gene)
 
diff --git a/shared/vendored/.github/workflows/ci.yaml b/shared/vendored/.github/workflows/ci.yaml
@@ -11,5 +11,5 @@ jobs:
   shellcheck:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v5
+      - uses: actions/checkout@v6
       - uses: nextstrain/.github/actions/shellcheck@master
diff --git a/shared/vendored/.github/workflows/pre-commit.yaml b/shared/vendored/.github/workflows/pre-commit.yaml
@@ -7,7 +7,7 @@ jobs:
   pre-commit:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v5
+      - uses: actions/checkout@v6
       - uses: actions/setup-python@v6
         with:
           python-version: "3.12"

diff --git a/shared/vendored/.gitrepo b/shared/vendored/.gitrepo
@@ -6,7 +6,7 @@
 [subrepo]
 	remote = https://github.com/nextstrain/shared
 	branch = main
-	commit = bfbbb6875b084e22920712d89704ccb259f8950e
-	parent = 401279d49d2c703f5f79a584c4530dec736fbaad                                        
+	commit = 37cf39c2a4f4c42474046e70c856acb6a2031e2d
+	parent = f1f17cec6fe39fa2dd06429a44bcd925a441e47f
 	method = merge
-	cmdver = 0.4.6
+	cmdver = 0.4.9
diff --git a/shared/vendored/snakemake/config.smk b/shared/vendored/snakemake/config.smk
@@ -11,6 +11,45 @@ from typing import Optional
 from textwrap import dedent, indent
 
 
+# Set search paths for Augur
+if "AUGUR_SEARCH_PATHS" in os.environ:
+    print(dedent(f"""\
+        Using existing search paths in AUGUR_SEARCH_PATHS:
+
+            {os.environ["AUGUR_SEARCH_PATHS"]!r}
+        """), file=sys.stderr)
+else:
+    # Note that this differs from the search paths used in
+    # resolve_config_path().
+    # This is the preferred default moving forwards, and the plan is to
+    # eventually update resolve_config_path() to use AUGUR_SEARCH_PATHS.
+    search_paths = [
+        # User analysis directory
+        Path.cwd(),
+
+        # Workflow defaults folder
+        Path(workflow.basedir) / "defaults",
+
+        # Workflow root (contains Snakefile)
+        Path(workflow.basedir),
+    ]
+
+    # This should work for majority of workflows, but we could consider doing a
+    # more thorough search for the nextstrain-pathogen.yaml. This would likely
+    # replicate how CLI searches for the root.¹
+    # ¹ <https://github.com/nextstrain/cli/blob/d5e184c5/nextstrain/cli/command/build.py#L413-L420>
+    repo_root = Path(workflow.basedir) / ".."
+    if (repo_root / "nextstrain-pathogen.yaml").is_file():
+        search_paths.extend([
+            # Pathogen repo root
+            repo_root,
+        ])
+
+    search_paths = [path.resolve() for path in search_paths if path.is_dir()]
+
+    os.environ["AUGUR_SEARCH_PATHS"] = ":".join(map(str, search_paths))
+
+
 class InvalidConfigError(Exception):
     pass
 
@@ -147,15 +186,40 @@ def resolve_config_path(path: str, defaults_dir: Optional[str] = None) -> Callab
     return _resolve_config_path
 
 
-def write_config(path):
+def write_config(path, section=None):
     """
-    Write Snakemake's 'config' variable to a file.
+    Write Snakemake's 'config' variable, or a section of it, to a file.
+
+    *section* is an optional list of keys to navigate to a specific section of
+    config. If provided, only that section will be written.
     """
     global config
 
     os.makedirs(os.path.dirname(path), exist_ok=True)
 
+    data = config
+    section_str = "config"
+
+    if section:
+        # Navigate to the specified section
+        for key in section:
+            # Error if key doesn't exist
+            if key not in data:
+                raise Exception(f"ERROR: Key {key!r} not found in {section_str!r}.")
+
+            data = data[key]
+            section_str += f".{key}"
+
+            # Error if value is not a mapping
+            if not isinstance(data, dict):
+                raise Exception(f"ERROR: {section_str!r} is not a mapping of key/value pairs.")
+
     with open(path, 'w') as f:
-        yaml.dump(config, f, sort_keys=False)
+        yaml.dump(data, f, sort_keys=False, Dumper=NoAliasDumper)
+
+    print(f"Saved {section_str!r} to {path!r}.", file=sys.stderr)
+
 
-    print(f"Saved current run config to {path!r}.", file=sys.stderr)
+class NoAliasDumper(yaml.SafeDumper):
+    def ignore_aliases(self, data):
+        return True
diff --git a/workflow/snakemake_rules/chores.smk b/workflow/snakemake_rules/chores.smk
@@ -6,10 +6,8 @@ rule update_example_data_wildcards:
 
     The subset of data is generated by an augur filter call which:
     - sets the subsampling size to 50
-    - applies the grouping from the config
+    - groups by year and country
     """
-    message:
-        "Update example data"
     input:
         sequences = "results/{a_or_b}/sequences.fasta",
         metadata = "results/{a_or_b}/metadata.tsv",
@@ -18,14 +16,13 @@ rule update_example_data_wildcards:
         metadata = "example_data/{a_or_b}/metadata.tsv",
     params:
         strain_id=config["strain_id_field"],
-        group_by=config["filter"]["group_by"],
     shell:
         """
         augur filter \
             --metadata {input.metadata} \
             --metadata-id-columns {params.strain_id} \
             --sequences {input.sequences} \
-            --group-by {params.group_by} \
+            --group-by year country \
             --subsample-max-sequences 50 \
             --subsample-seed 0 \
             --output-metadata {output.metadata} \

diff --git a/workflow/snakemake_rules/clades.smk b/workflow/snakemake_rules/clades.smk
@@ -1,6 +1,7 @@
 rule clades_genome:
-    message:
-        "adding clades based on the entire genome"
+    """
+    adding clades based on the entire genome
+    """
     input:
         tree = rules.refine.output.tree,
         aa_muts = rules.translate.output.node_data,
@@ -9,20 +10,26 @@ rule clades_genome:
     output:
         node_data = build_dir + "/{a_or_b}/{build_name}/{resolution}/clades_genome.json"
     log:
-        "logs/{a_or_b}/clades_genome_{build_name}_{resolution}.txt"
+        "logs/clades_genome_{a_or_b}_{build_name}_{resolution}.txt"
+    benchmark:
+        "benchmarks/clades_genome_{a_or_b}_{build_name}_{resolution}.txt"
     shell:
-        """
+        r"""
+        exec &> >(tee {log:q})
+
         augur clades --tree {input.tree} \
             --mutations {input.nuc_muts} {input.aa_muts} \
             --clades {input.clades} \
             --membership-name genome_clade \
             --label-name genome_clade \
-            --output-node-data  {output.node_data} 2>&1 | tee {log}
+            --output-node-data  {output.node_data}
         """
 
 
 rule clades_Goya:
-    message: "Adding internal clade labels"
+    """
+    Adding internal clade labels
+    """
     input:
         tree = rules.refine.output.tree,
         aa_muts = rules.translate.output.node_data,
@@ -31,19 +38,25 @@ rule clades_Goya:
     output:
         node_data = build_dir + "/{a_or_b}/{build_name}/{resolution}/clades_G.json"
     log:
-        "logs/{a_or_b}/clades_{build_name}_{resolution}.txt"
+        "logs/clades_Goya_{a_or_b}_{build_name}_{resolution}.txt"
+    benchmark:
+        "benchmarks/clades_Goya_{a_or_b}_{build_name}_{resolution}.txt"
     shell:
-        """
+        r"""
+        exec &> >(tee {log:q})
+
         augur clades --tree {input.tree} \
             --mutations {input.nuc_muts} {input.aa_muts} \
             --clades {input.clades} \
             --membership-name G_clade \
             --label-name G_clade \
-            --output-node-data {output.node_data} 2>&1 | tee {log}
+            --output-node-data {output.node_data}
         """
 
 rule clades_consortium:
-    message: "Adding internal clade labels"
+    """
+    Adding internal clade labels
+    """
     input:
         tree = rules.refine.output.tree,
         aa_muts = rules.translate.output.node_data,
@@ -52,21 +65,31 @@ rule clades_consortium:
     output:
         node_data = build_dir + "/{a_or_b}/{build_name}/{resolution}/clades_consortium.json"
     log:
-        "logs/{a_or_b}/clades_{build_name}_{resolution}.txt"
+        "logs/clades_consortium_{a_or_b}_{build_name}_{resolution}.txt"
+    benchmark:
+        "benchmarks/clades_consortium_{a_or_b}_{build_name}_{resolution}.txt"
     shell:
-        """
+        r"""
+        exec &> >(tee {log:q})
+
         augur clades --tree {input.tree} \
             --mutations {input.nuc_muts} {input.aa_muts} \
             --clades {input.clades} \
-            --output-node-data {output.node_data} 2>&1 | tee {log}
+            --output-node-data {output.node_data}
         """
 
 rule download_clades:
     output:
         clades = "results/clades_consortium_{a_or_b}.tsv"
+    log:
+        "logs/download_clades_{a_or_b}.txt"
+    benchmark:
+        "benchmarks/download_clades_{a_or_b}.txt"
     params:
         url = lambda w: f"https://raw.githubusercontent.com/rsv-lineages/lineage-designation-{w.a_or_b.upper()}/main/.auto-generated/lineages.tsv"
     shell:
-        """
+        r"""
+        exec &> >(tee {log:q})
+
         curl {params.url} --output {output.clades}
         """
diff --git a/workflow/snakemake_rules/config.smk b/workflow/snakemake_rules/config.smk
@@ -0,0 +1,8 @@
+"""
+This part of the workflow deals with configuration.
+
+OUTPUTS:
+
+    results/run_config.yaml
+"""
+write_config("results/run_config.yaml")