diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..95522b8 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,149 @@ +# AGENTS.md — AI Coding Guidelines for plotnado + +## What is plotnado? + +plotnado is a lightweight Python package for creating genome browser-style plots from genomic data files (BigWig, BED, NarrowPeak, etc.). It provides two complementary interfaces: + +1. **Python API** — fluent builder for programmatic use +2. **CLI + YAML templates** — file-driven workflow for non-programmers + +## Architecture + +``` +plotnado/ +├── figure.py # GenomicFigure — the core plotting API +├── template.py # Pydantic models for YAML templates (Template, TrackSpec, ...) +├── render.py # TemplateCompiler → RenderPlan (template-to-figure bridge) +├── theme.py # Theme / BuiltinTheme +├── tracks/ # Track implementations (BigWigTrack, BedTrack, ...) +│ ├── enums.py # Internal enums incl. TrackType (figure-layer) +│ └── ... +└── cli/ + ├── cli.py # Typer app entry point + ├── init.py # `plotnado init` — infer template from files + ├── plot.py # `plotnado plot` — render template for regions + ├── validate.py # `plotnado validate` — check template validity + ├── inference.py # Heuristics: infer track type/title from filename + └── grouping.py # Grouping strategies for init command +``` + +## Key Design Decisions + +### Python API (GenomicFigure) + +The fluent builder pattern allows chaining: + +```python +fig = ( + GenomicFigure() + .bigwig("signal.bw", title="H3K27ac") + .narrowpeak("peaks.narrowpeak") + .genes("hg38") + .axis() + .scalebar() +) +fig.save("out.png", region="chr1:1000000-2000000") +``` + +- Each method (`.bigwig()`, `.bed()`, etc.) appends a track and returns `self` +- `from_template(path)` builds a figure from a YAML template + +### Template / CLI layer + +YAML templates are human-readable, version-controllable, and editable: + +```yaml +genome: hg38 +tracks: + - path: signal.bw + type: bigwig + title: H3K27ac + group: sample1 +guides: + genes: true + axis: true + scalebar: true +``` + +`TemplateCompiler.compile(template)` → `RenderPlan` → `GenomicFigure` calls. + +## TrackType vs TemplateTrackType + +There are two separate enums. Do not confuse them: + +| Enum | Location | Purpose | +|---|---|---| +| `TrackType` | `plotnado/tracks/enums.py` | Internal figure enum; values match `GenomicFigure` method names | +| `TemplateTrackType` | `plotnado/template.py` | User-facing YAML vocabulary; values appear in template files | + +`TemplateTrackType` has more values (for example `annotation` and `unknown`) that map to existing figure methods. The mapping is defined in `RenderPlan.get_track_by_method()` in `render.py`. + +`TrackType = TemplateTrackType` alias exists in `template.py` for backward compatibility. + +## Adding a New Track Type + +1. Create `plotnado/tracks/mytrack.py` with a class extending `Track` +2. Add aesthetics class if needed and register field names +3. Add a method to `GenomicFigure` in `figure.py` (for example `.mytrack(data, **kwargs)`) +4. Add the alias in `GenomicFigure._alias_map()` +5. Add a `TemplateTrackType.MYTRACK = "mytrack"` value in `template.py` +6. Add the mapping in `RenderPlan.get_track_by_method()` in `render.py` +7. Export from `plotnado/tracks/__init__.py` and `plotnado/__init__.py` +8. Write tests + +## Method Map + +`RenderPlan.get_track_by_method()` maps `TemplateTrackType` values to `GenomicFigure` method names: + +| TemplateTrackType | GenomicFigure method | Notes | +|---|---|---| +| `bigwig` | `bigwig` | | +| `bedgraph` | `bigwig` | BigWigTrack handles bedgraph natively | +| `bed` | `bed` | | +| `narrowpeak` | `narrowpeak` | | +| `gene` | `genes` | | +| `links` | `links` | | +| `annotation` | `bed` | BED interval track with annotation semantics | +| `overlay` | `overlay` | | +| `unknown` | `bed` | Fallback | + +## Template Compilation Rules + +- `TemplateCompiler.compile()` must never mutate the `Template` argument +- Resolved group indices go into `RenderPlan.resolved_group_indices`, not back into the template +- Group references are resolved case-insensitively against track `name` or `title` fields + +## Common Pitfalls + +- Width override: always use `width if width is not None else plan.width`, never `width or plan.width` +- Bedgraph method: there is no `GenomicFigure.bedgraph()`; bedgraph files use `.bigwig()` +- `TemplateTrackType` vs `TrackType`: import the right enum for the layer you are working in +- CLI shim: `plotnado/cli/render.py` re-exports from `plotnado.render`; import from `plotnado.render` in new code + +## Testing + +Run: `uv run pytest tests/` + +| Test file | Coverage | +|---|---| +| `test_template.py` | Template round-trip, YAML serialization | +| `test_render.py` | TemplateCompiler, no-mutation guarantee, autocolor | +| `test_inference.py` | Track type/title inference heuristics | +| `test_grouping.py` | Grouping strategies | +| `test_cli.py` | CLI integration via `typer.testing.CliRunner` | + +Guidelines: +- Use `tmp_path` pytest fixture for file-based tests +- Do not mock `GenomicFigure.plot()` or `.save()` in unit tests +- The no-mutation guarantee on `TemplateCompiler.compile()` must always have a regression test + +## Dev Setup + +```bash +uv venv +source .venv/bin/activate +uv pip install -e ".[dev]" +uv run pytest tests/ +``` + +Entry point: `plotnado = "plotnado.cli.cli:main"` (defined in `pyproject.toml`) diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..44472d8 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,141 @@ +# CLAUDE.md — AI Coding Guidelines for plotnado + +## What is plotnado? + +plotnado is a lightweight Python package for creating genome browser-style plots from genomic data files (BigWig, BED, NarrowPeak, etc.). It provides two complementary interfaces: + +1. **Python API** — fluent builder for programmatic use +2. **CLI + YAML templates** — file-driven workflow for non-programmers + +## Architecture + +``` +plotnado/ +├── figure.py # GenomicFigure — the core plotting API +├── template.py # Pydantic models for YAML templates (Template, TrackSpec, ...) +├── render.py # TemplateCompiler → RenderPlan (template-to-figure bridge) +├── theme.py # Theme / BuiltinTheme +├── tracks/ # Track implementations (BigWigTrack, BedTrack, ...) +│ ├── enums.py # Internal enums incl. TrackType (figure-layer) +│ └── ... +└── cli/ + ├── cli.py # Typer app entry point + ├── init.py # `plotnado init` — infer template from files + ├── plot.py # `plotnado plot` — render template for regions + ├── validate.py # `plotnado validate` — check template validity + ├── inference.py # Heuristics: infer track type/title from filename + └── grouping.py # Grouping strategies for init command +``` + +## Key Design Decisions + +### Python API (GenomicFigure) + +The fluent builder pattern allows chaining: + +```python +fig = ( + GenomicFigure() + .bigwig("signal.bw", title="H3K27ac") + .narrowpeak("peaks.narrowpeak") + .genes("hg38") + .axis() + .scalebar() +) +fig.save("out.png", region="chr1:1000000-2000000") +``` + +- Each method (`.bigwig()`, `.bed()`, etc.) appends a track and returns `self` +- `from_template(path)` builds a figure from a YAML template + +### Template / CLI layer + +YAML templates are human-readable, version-controllable, and editable: + +```yaml +genome: hg38 +tracks: + - path: signal.bw + type: bigwig + title: H3K27ac + group: sample1 +guides: + genes: true + axis: true + scalebar: true +``` + +`TemplateCompiler.compile(template)` → `RenderPlan` → `GenomicFigure` calls. + +## Adding a New Track Type + +1. Create `plotnado/tracks/mytrack.py` with a class extending `Track` +2. If it has a single primary color: `class MyAesthetics(BaseAesthetics)` (from `tracks/aesthetics.py`) + If it renders multiple series: `class MyAesthetics(BaseMultiColorAesthetics)` +3. Register the track using the decorator: + ```python + from .registry import registry + from .enums import TrackType + + @registry.register(TrackType.MYTRACK, aliases=["my_alias"]) + class MyTrack(Track): ... + ``` +4. Add `MYTRACK = "mytrack"` to `TrackType` in `tracks/enums.py` +5. Add a builder method to `GenomicFigure` in `figure_methods.py`: + ```python + def mytrack(self, data: Any, /, **kwargs) -> Self: + return self.add_track(TrackType.MYTRACK, data=data, **kwargs) + ``` +6. Add a `MytrackKwargs` TypedDict entry and regenerate `_kwargs.py`: + ```bash + python scripts/generate_kwargs.py + ``` +7. Export from `plotnado/tracks/__init__.py` and `plotnado/__init__.py` +8. Write tests + +## Method Map (render.py) + +`TemplateCompiler` and `GenomicFigure` no longer maintain a separate method map. +Track lookup is centralized in `plotnado/tracks/registry.py`, and aliases such as +`bedgraph`, `annotation`, `unknown`, `bigwig_overlay`, and `scale` resolve there. + +## Template Compilation Rules + +- `TemplateCompiler.compile()` must **never mutate** the `Template` argument +- Resolved group indices go into `RenderPlan.resolved_group_indices`, not back into the template +- Group references are resolved case-insensitively against track `name` or `title` fields + +## Common Pitfalls + +- **Width override**: always use `width if width is not None else plan.width`, never `width or plan.width` (breaks when width=0.0) +- **TrackType source of truth**: use `plotnado.tracks.enums.TrackType` for both Python and template codepaths +- **Registry lookup**: if you need alias → class resolution, use `plotnado.tracks.registry.registry`, not a hard-coded method map +- **CLI shim**: `plotnado/cli/render.py` re-exports from `plotnado.render` — always import from `plotnado.render` in new code + +## Testing + +Run: `uv run pytest tests/` + +| Test file | Coverage | +|---|---| +| `test_template.py` | Template round-trip, YAML serialization | +| `test_render.py` | TemplateCompiler, no-mutation guarantee, autocolor | +| `test_inference.py` | Track type/title inference heuristics | +| `test_grouping.py` | Grouping strategies | +| `test_cli.py` | CLI integration via `typer.testing.CliRunner` | + +Guidelines: +- Use `tmp_path` pytest fixture for file-based tests +- Do not mock `GenomicFigure.plot()` or `.save()` in unit tests +- The no-mutation guarantee on `TemplateCompiler.compile()` must always have a regression test + +## Dev Setup + +```bash +uv venv +source .venv/bin/activate +uv pip install -e ".[dev]" +uv run pytest tests/ +``` + +Entry point: `plotnado = "plotnado.cli.cli:main"` (defined in pyproject.toml) diff --git a/README.md b/README.md index 2c945bb..02ae9fa 100644 --- a/README.md +++ b/README.md @@ -2,48 +2,59 @@ [![Tests](https://github.com/alsmith151/plotnado/actions/workflows/run_tests.yml/badge.svg)](https://github.com/alsmith151/plotnado/actions/workflows/run_tests.yml) -PlotNado is a lightweight Python package for building genome browser-style figures with a modern, chainable API. +PlotNado is a lightweight Python package for building genome browser-style figures. + +It supports two complementary workflows: + +- A chainable Python API built around `GenomicFigure` +- A template-driven CLI for generating, validating, and rendering YAML specs + +Use the Python API if you want plotting in notebooks or scripts. +Use the CLI if you want an editable YAML template and a file-driven workflow. -> [!NOTE] -> This version of PlotNado is independent and does not require CoolBox. ## Install +`uv` is the recommended way to work with PlotNado. + +For a project-local environment: + ```bash -pip install plotnado +uv venv +source .venv/bin/activate +uv pip install plotnado ``` -For development from source: +For a global CLI install: ```bash -git clone https://github.com/alsmith151/plotnado -cd plotnado -pip install -e .[dev,docs] - -# required developer setup: run local quality hooks before every commit -pip install pre-commit -pre-commit install +uv tool install plotnado ``` -## Developer workflow (important) - -- Do not edit autogenerated typing surfaces directly: - - `plotnado/figure.pyi` - - auto-generated overload blocks in `plotnado/figure.py` (`# BEGIN/END AUTO-GENERATED OVERLOAD`) -- Make API changes in the source Pydantic models first (track models/aesthetics/labels), then regenerate artifacts: +If you prefer standard Python tooling, `pip` also works: ```bash -python scripts/generate_figure_overloads.py -python scripts/generate_figure_stub.py +python -m venv .venv +source .venv/bin/activate +pip install plotnado ``` -- Always run pre-commit before pushing: +If you use Conda: ```bash -pre-commit run --all-files +conda create -n plotnado python=3.12 +conda activate plotnado +pip install plotnado ``` -## Quick Start +## Start Here + +Choose the workflow that matches how you want to work: + +- Python API: build figures directly in code +- CLI + YAML template: infer a template from files, edit it, then render plots + +## Python Quick Start ```python from plotnado import GenomicFigure @@ -66,7 +77,75 @@ fig.bigwig(signal, title="Synthetic signal", style="fill", color="#1f77b4") fig.save("quickstart.png", "chr1:1,010,000-1,080,000") ``` -`GenomicFigure()` now uses publication-style defaults automatically. Use `GenomicFigure(theme=None)` to opt out. +`GenomicFigure()` uses publication-style defaults automatically. Use `GenomicFigure(theme=None)` to opt out. + +## CLI Quick Start + +Generate a template from your files: + +```bash +plotnado init sample1.bw sample2.bw peaks.narrowpeak --auto --output template.yaml +``` + +Validate it: + +```bash +plotnado validate template.yaml +``` + +Render a region: + +```bash +plotnado plot template.yaml --region chr1:1,000,000-1,100,000 --output browser_view.png +``` + +You can also plot by gene name when the template defines `genome`: + +```bash +plotnado plot template.yaml --region GNAQ +``` + +If you want to return to Python after generating a template: + +```python +from plotnado import GenomicFigure + +fig = GenomicFigure.from_template("template.yaml") +fig.save("browser_view.png", region="chr1:1,000,000-1,100,000") +``` + +## What The Template Looks Like + +The generated YAML is meant to be human-editable: + +```yaml +genome: hg38 +guides: + genes: true +tracks: + - path: sample1.bw + type: bigwig + title: sample1 + - path: peaks.narrowpeak + type: narrowpeak + title: peaks +``` + +## Key Features + +- Chainable `GenomicFigure` API for fast composition. +- Template-driven CLI built around `init`, `validate`, and `plot`. +- Alias-based track creation (`fig.add_track("bigwig", ...)`). +- Built-in themes, autocolor, autoscale, and highlight overlays. +- Broad track support: BigWig, BED, narrowPeak, genes, axis, scalebar, links, `OverlayTrack`, cooler-based matrix tracks. + +## Learn More + +- Docs home: `docs/index.md` +- Quick start: `docs/quickstart.md` +- CLI guide: `docs/cli.md` +- Track catalog + options: `docs/track_catalog.md` +- API and generated references: `docs/api_reference.md` ## Examples @@ -80,14 +159,6 @@ fig.save("quickstart.png", "chr1:1,010,000-1,080,000") All scripts write outputs to `examples/output/`. -## Key Features - -- Chainable `GenomicFigure` API for fast composition. -- Alias-based track creation (`fig.add_track("bigwig", ...)`). -- Track option introspection at runtime. -- Built-in themes, autocolor, autoscale, and highlight overlays. -- Broad track support: BigWig, BED, narrowPeak, genes, axis, scalebar, links, `OverlayTrack`, cooler-based matrix tracks. - ## Discover Track Options ```python @@ -99,25 +170,43 @@ GenomicFigure.track_options_markdown("bigwig") BigWigTrack.options_markdown() ``` -## Documentation +## Build Docs -- Docs home: `docs/index.md` -- Quick start: `docs/quickstart.md` -- Track catalog + options: `docs/track_catalog.md` -- API and generated references: `docs/api_reference.md` +```bash +uv sync --extra docs +uv run mkdocs build --strict +uv run mkdocs serve +``` + +## Development -Build docs locally: +For development from source: ```bash -pip install -e .[docs] -mkdocs build --strict -mkdocs serve +git clone https://github.com/alsmith151/plotnado +cd plotnado +uv venv +source .venv/bin/activate +uv sync --extra dev --extra docs + +# required developer setup: run local quality hooks before every commit +uv run pre-commit install ``` -## CLI +Developer workflow: -```python -plotnado plot chr1:1,000,000-1,100,000 -o browser_view.png -plotnado track-options bigwig -plotnado track-options --all --output-format json +- Do not edit autogenerated typing surfaces directly: + - `plotnado/figure.pyi` + - auto-generated overload blocks in `plotnado/figure.py` (`# BEGIN/END AUTO-GENERATED OVERLOAD`) +- Make API changes in the source Pydantic models first (track models/aesthetics/labels), then regenerate artifacts: + +```bash +uv run python scripts/generate_figure_overloads.py +uv run python scripts/generate_figure_stub.py +``` + +- Always run pre-commit before pushing: + +```bash +uv run pre-commit run --all-files ``` diff --git a/docs/api_reference.md b/docs/api_reference.md index 86d83c9..05f78f3 100644 --- a/docs/api_reference.md +++ b/docs/api_reference.md @@ -58,7 +58,9 @@ gf.bigwig("sampleA.bw", title="A signal", color_group="sampleA") ## Common entry points -- `plotnado.GenomicFigure`: high-level composition (`add_track`, `plot`, `plot_regions`, `plot_gene`, `to_toml`, `from_toml`). +- `plotnado.GenomicFigure`: high-level composition (`add_track`, `plot`, `plot_regions`, `plot_gene`, `from_template`, `to_toml`, `from_toml`). +- `plotnado.Template`: YAML model used by the CLI and `GenomicFigure.from_template()`. +- `plotnado.TemplateCompiler`: converts a `Template` into a reusable render plan. - `plotnado.Theme`: built-in or custom visual defaults. - `plotnado.tracks.*`: concrete track classes when you want explicit model construction. diff --git a/docs/best_practices.md b/docs/best_practices.md index c284273..869782a 100644 --- a/docs/best_practices.md +++ b/docs/best_practices.md @@ -60,8 +60,8 @@ GenomicFigure.track_options_markdown("genes") - Run tests and docs build before release: ```bash -pytest tests/ -v -mkdocs build --strict +uv run pytest tests/ -v +uv run mkdocs build --strict ``` - Prefer example scripts in `examples/` as regression fixtures for plotting behavior. diff --git a/docs/cli.md b/docs/cli.md index 66c698a..595f43d 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -1,21 +1,102 @@ # CLI -PlotNado includes a lightweight CLI. +PlotNado includes a template-driven CLI for users who prefer a file-based workflow. -## Plot a region +The standard flow is: + +1. `plotnado init` to infer a YAML template from track files +2. `plotnado validate` to catch missing files or bad group references +3. `plotnado plot` to render one or more regions from that template + +## `plotnado init` + +Generate a starting template from track files: + +```bash +plotnado init sample1.bw sample2.bw peaks.narrowpeak --auto --output template.yaml +``` + +Useful options: + +- `--output` / `-o`: where to write the YAML file +- `--genome` / `-g`: set a default genome such as `hg38` or `mm10` +- `--group-by`: group tracks by a predefined strategy or regex +- `--auto`: skip interactive prompts and use inferred defaults +- `--no-genes`: do not add a genes guide track by default + +Examples: ```bash -plotnado plot chr1:1,000,000-1,100,000 -o output.png +plotnado init *.bw --auto +plotnado init sample1_H3K27ac.bw sample1_H3K4me3.bw sample2_H3K27ac.bw --group-by sample +plotnado init control_r1.bw control_r2.bw treat_r1.bw treat_r2.bw --group-by '([^_]+)_r[0-9]' +``` + +The generated template is plain YAML and intended to be edited. A typical file looks like: + +```yaml +genome: hg38 +guides: + genes: true +tracks: + - path: sample1.bw + type: bigwig + title: sample1 + group: sample + - path: peaks.narrowpeak + type: narrowpeak + title: peaks ``` -## Discover track options +## `plotnado validate` + +Validate a template before rendering: ```bash -plotnado track-options -plotnado track-options bigwig -plotnado track-options bigwig --section aesthetics -plotnado track-options --all --output-format json -plotnado track-options genes --output-format markdown +plotnado validate template.yaml ``` -The CLI option metadata is generated from the same Pydantic models used by the Python API. +Validation checks: + +- template can be loaded as YAML +- local track files exist +- group references resolve against track `name` or `title` +- the template compiles into a render plan cleanly + +## `plotnado plot` + +Render a template for one or more regions: + +```bash +plotnado plot template.yaml --region chr1:1,000,000-1,100,000 --output output.png +``` + +Useful options: + +- `--region` / `-r`: genomic region or gene name; repeat for multiple outputs +- `--output` / `-o`: explicit output path for a single region +- `--format` / `-f`: output format such as `png`, `pdf`, `svg`, or `jpg` +- `--width` / `-w`: override template width in inches +- `--dpi`: output resolution + +Examples: + +```bash +plotnado plot template.yaml --region chr1:1M-2M +plotnado plot template.yaml --region GNAQ +plotnado plot template.yaml --region chr1:1M-2M --region chr2:5M-6M +plotnado plot template.yaml --region chr1:1,000,000-2,000,000 --output plot.pdf --dpi 300 +``` + +Gene-name resolution requires `genome` to be defined in the template. + +## Python bridge + +The CLI template format is not separate from the Python API. The same template can be loaded directly in Python: + +```python +from plotnado import GenomicFigure + +fig = GenomicFigure.from_template("template.yaml") +fig.save("output.png", region="chr1:1,000,000-1,100,000") +``` diff --git a/docs/index.md b/docs/index.md index 4e4d220..b2e3469 100644 --- a/docs/index.md +++ b/docs/index.md @@ -2,12 +2,19 @@ ![PlotNado logo](assets/plotnado-logo.svg) -PlotNado is a Python library for clean, publication-ready genome browser figures with a fast, chainable API. +PlotNado is a Python library for clean, publication-ready genome browser figures. + +It exposes two parallel interfaces: + +- A fluent Python API for programmatic figure construction +- A CLI that turns genomic files into editable YAML templates and rendered plots ## Install ```bash -pip install plotnado +uv venv +source .venv/bin/activate +uv pip install plotnado ``` ## Preferred workflow style @@ -27,6 +34,16 @@ gf.axis() gf.plot_gene("GNAQ") ``` +## Template-driven workflow + +```bash +plotnado init *.bw peaks/*.narrowpeak --auto --output template.yaml +plotnado validate template.yaml +plotnado plot template.yaml --region chr1:1,000,000-1,100,000 --output region.png +``` + +Templates can also be consumed from Python with `GenomicFigure.from_template("template.yaml")`. + ## Read by task - New environment setup: [Installation](installation.md) @@ -34,4 +51,5 @@ gf.plot_gene("GNAQ") - Ways to add tracks: [Track Construction](quickstart_tracks.md) - Production guidance: [Best Practices](best_practices.md) - Track families and support matrix: [Track Catalog](track_catalog.md) -- CLI and runtime option discovery: [CLI](cli.md) and [API Reference](api_reference.md) +- Template workflow and command reference: [CLI](cli.md) +- API details and runtime option discovery: [API Reference](api_reference.md) diff --git a/docs/installation.md b/docs/installation.md index 4a02827..808f081 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -3,33 +3,107 @@ ## Requirements - Python 3.12+ -- `pip` +- `uv` recommended, `pip` supported -## Install from PyPI +## Install `uv` + +If you do not already have `uv`, install it first: + +```bash +curl -LsSf https://astral.sh/uv/install.sh | sh +``` + +On macOS you can also use: + +```bash +brew install uv +``` + +## Install PlotNado with `uv` + +For a local virtual environment: + +```bash +uv venv +source .venv/bin/activate +uv pip install plotnado +``` + +For a global CLI install: + +```bash +uv tool install plotnado +``` + +## Install PlotNado with `pip` + +If you prefer the standard Python packaging workflow: + +```bash +python -m venv .venv +source .venv/bin/activate +pip install plotnado +``` + +## Install PlotNado in a Conda environment + +If you already use Conda or Miniforge for scientific Python work: ```bash +conda create -n plotnado python=3.12 +conda activate plotnado pip install plotnado ``` -## Development install +For development in a Conda environment: ```bash git clone https://github.com/alsmith151/plotnado cd plotnado +conda create -n plotnado-dev python=3.12 +conda activate plotnado-dev pip install -e .[dev,docs] pre-commit install ``` +## Development install with `uv` + +```bash +git clone https://github.com/alsmith151/plotnado +cd plotnado +uv venv +source .venv/bin/activate +uv sync --extra dev --extra docs +uv run pre-commit install +``` + ## Verify installation ```bash -python -c "import plotnado; print(plotnado.__version__)" -plotnado track-options bigwig +uv run python -c "import plotnado; print(plotnado.__version__)" +uv run plotnado validate --help ``` ## Build docs locally ```bash -mkdocs build --strict -mkdocs serve +uv run mkdocs build --strict +uv run mkdocs serve +``` + +## Run tests + +```bash +uv run pytest tests/ +``` + +## Development install with `pip` + +If you prefer not to use `uv`, the traditional workflow still works: + +```bash +python -m venv .venv +source .venv/bin/activate +pip install -e .[dev,docs] +pre-commit install ``` diff --git a/docs/quickstart.md b/docs/quickstart.md index 7ba2a2a..d2da95b 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -2,6 +2,11 @@ This guide gets you from installation to a first saved figure quickly. +PlotNado has two common entry points: + +- Use the Python API when you want figure construction in code. +- Use the CLI when you want an editable YAML template and a file-driven workflow. + ## 1) Build a minimal figure ```python @@ -33,8 +38,28 @@ gf.save("quickstart.png", "chr1:1,010,000-1,080,000") python examples/quickstart/01_first_plot.py ``` -## 3) Next steps +## 3) Generate a template with the CLI + +```bash +plotnado init sample1.bw sample2.bw peaks.narrowpeak --auto --output template.yaml +plotnado validate template.yaml +plotnado plot template.yaml --region chr1:1,000,000-1,100,000 --output quickstart-cli.png +``` + +`plotnado plot` also accepts gene symbols such as `--region GNAQ` when the template has a `genome` value. + +If you want to stay in Python after generating a template: + +```python +from plotnado import GenomicFigure + +gf = GenomicFigure.from_template("template.yaml") +gf.save("quickstart-from-template.png", region="chr1:1,000,000-1,100,000") +``` + +## 4) Next steps - Learn all track-add patterns: [Track Construction](quickstart_tracks.md) +- Learn the template workflow: [CLI](cli.md) - Review practical defaults: [Best Practices](best_practices.md) - Explore track families: [Track Catalog](track_catalog.md) diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index ecabbeb..864d5dd 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -19,8 +19,8 @@ GenomicFigure.available_track_aliases() PlotNado requires Python 3.12+. ```bash -python --version -mkdocs build --strict +uv run python --version +uv run mkdocs build --strict ``` ## TOML export/import errors @@ -28,5 +28,5 @@ mkdocs build --strict Install optional dependencies: ```bash -pip install -e .[docs] +uv sync --extra docs ``` diff --git a/plotnado/__init__.py b/plotnado/__init__.py index d3cd838..9685dcc 100644 --- a/plotnado/__init__.py +++ b/plotnado/__init__.py @@ -7,6 +7,8 @@ from .figure import GenomicFigure from .theme import Theme +from .template import Template, TrackSpec, GuideSpec, GroupSpec +from .render import TemplateCompiler, RenderPlan, ResolvedTrack from .tracks import ( # Enums CollectionStyle, @@ -77,6 +79,15 @@ __all__ = [ "GenomicFigure", "Theme", + # Template system + "Template", + "TrackSpec", + "GuideSpec", + "GroupSpec", + # Render pipeline + "TemplateCompiler", + "RenderPlan", + "ResolvedTrack", # Enums "DisplayMode", "CollectionStyle", diff --git a/plotnado/_kwargs.py b/plotnado/_kwargs.py index 795103d..1c4e167 100644 --- a/plotnado/_kwargs.py +++ b/plotnado/_kwargs.py @@ -1,7 +1,12 @@ +# AUTO-GENERATED - do not edit manually. +# Re-generate with: python scripts/generate_kwargs.py +# +# This file provides TypedDict definitions for GenomicFigure builder +# methods, enabling IDE autocompletion and type checking. from __future__ import annotations from pathlib import Path -from typing import Any, TypedDict +from typing import Any, Literal, TypedDict import pandas as pd @@ -18,9 +23,6 @@ PlotStyle, Position, ) -from .tracks.region import GenomicRegion -from .tracks.base import Track - class BigwigKwargs(TypedDict, total=False): @@ -30,11 +32,11 @@ class BigwigKwargs(TypedDict, total=False): color_group: str | None y_min: float | None y_max: float | None - style: PlotStyle color: str - fill: bool alpha: float linewidth: float + style: PlotStyle + fill: bool scatter_point_size: float show_baseline: bool baseline_color: str @@ -67,17 +69,18 @@ class BigwigKwargs(TypedDict, total=False): class GenesKwargs(TypedDict, total=False): title: str | None - data: Path | str | DataFrame | None + data: Path | str | pd.DataFrame | None height: float autoscale_group: str | None color_group: str | None row_scale: float small_relative: float gene_count: int - style: PlotStyle color: str - fill: bool alpha: float + linewidth: float + style: PlotStyle + fill: bool display: DisplayMode minimum_gene_length: int max_number_of_rows: int @@ -135,6 +138,8 @@ class AxisKwargs(TypedDict, total=False): color_group: str | None show_chromosome: bool color: str + alpha: float + linewidth: float font_size: int num_ticks: int use_human_readable_labels: bool @@ -169,8 +174,10 @@ class ScalebarKwargs(TypedDict, total=False): height: float autoscale_group: str | None color_group: str | None - style: PlotStyle color: str + alpha: float + linewidth: float + style: PlotStyle position: Position scale_distance: float | None font_size: int @@ -229,8 +236,9 @@ class BedKwargs(TypedDict, total=False): autoscale_group: str | None color_group: str | None color: str - edge_color: str alpha: float + linewidth: float + edge_color: str interval_height: float display: DisplayMode max_rows: int @@ -291,6 +299,72 @@ class CoolerKwargs(TypedDict, total=False): scale_font: str scale_weight: FontWeight +class CapcruncherKwargs(TypedDict, total=False): + title: str | None + data: Any | None + height: float + autoscale_group: str | None + color_group: str | None + resolution: int | None + balance: bool + transform: CoolerTransform + viewpoint: str | None + normalisation: str | None + cmap: str + min_value: float | None + max_value: float | None + plot_title: bool + plot_scale: bool + label_on_track: bool + data_range_style: DataRangeStyle + label_box_enabled: bool + label_box_alpha: float + title_location: Position + title_height: float + title_size: int + title_color: str + title_font: str + title_weight: FontWeight + scale_location: Position + scale_height: float + scale_precision: int + scale_size: int + scale_color: str + scale_font: str + scale_weight: FontWeight + +class CoolerAverageKwargs(TypedDict, total=False): + title: str | None + data: Any | None + height: float + autoscale_group: str | None + color_group: str | None + resolution: int | None + balance: bool + transform: CoolerTransform + cmap: str + min_value: float | None + max_value: float | None + plot_title: bool + plot_scale: bool + label_on_track: bool + data_range_style: DataRangeStyle + label_box_enabled: bool + label_box_alpha: float + title_location: Position + title_height: float + title_size: int + title_color: str + title_font: str + title_weight: FontWeight + scale_location: Position + scale_height: float + scale_precision: int + scale_size: int + scale_color: str + scale_font: str + scale_weight: FontWeight + class BigwigCollectionKwargs(TypedDict, total=False): title: str | None data: Any | None @@ -298,8 +372,9 @@ class BigwigCollectionKwargs(TypedDict, total=False): autoscale_group: str | None color_group: str | None colors: list[str] | None - labels: list[str] | None alpha: float + linewidth: float + labels: list[str] | None style: CollectionStyle plot_title: bool plot_scale: bool @@ -328,9 +403,11 @@ class BigwigDiffKwargs(TypedDict, total=False): autoscale_group: str | None color_group: str | None method: BigWigDiffMethod + color: str + alpha: float + linewidth: float positive_color: str negative_color: str - linewidth: float bar_alpha: float zero_line_color: str zero_line_width: float @@ -363,6 +440,7 @@ class BigwigOverlayKwargs(TypedDict, total=False): color_group: str | None colors: list[str] | None alpha: float + linewidth: float show_labels: bool min_value: float | None max_value: float | None @@ -394,6 +472,7 @@ class OverlayKwargs(TypedDict, total=False): color_group: str | None colors: list[str] | None alpha: float + linewidth: float show_labels: bool min_value: float | None max_value: float | None @@ -423,8 +502,9 @@ class NarrowpeakKwargs(TypedDict, total=False): autoscale_group: str | None color_group: str | None color: str - edge_color: str alpha: float + linewidth: float + edge_color: str interval_height: float display: DisplayMode max_rows: int @@ -466,9 +546,9 @@ class LinksKwargs(TypedDict, total=False): autoscale_group: str | None color_group: str | None color: str - edge_color: str | None alpha: float linewidth: float + edge_color: str | None cmap: str max_height: float color_by_score: bool @@ -502,8 +582,8 @@ class HighlightsKwargs(TypedDict, total=False): color_group: str | None color: str alpha: float - edge_color: str | None linewidth: float + edge_color: str | None plot_title: bool plot_scale: bool label_on_track: bool @@ -531,9 +611,9 @@ class HlineKwargs(TypedDict, total=False): autoscale_group: str | None color_group: str | None color: str - linestyle: str - linewidth: float alpha: float + linewidth: float + linestyle: str zorder: int plot_title: bool plot_scale: bool @@ -562,9 +642,9 @@ class VlineKwargs(TypedDict, total=False): autoscale_group: str | None color_group: str | None color: str - linestyle: str - linewidth: float alpha: float + linewidth: float + linestyle: str zorder: int plot_title: bool plot_scale: bool @@ -586,72 +666,6 @@ class VlineKwargs(TypedDict, total=False): scale_font: str scale_weight: FontWeight -class CapcruncherKwargs(TypedDict, total=False): - title: str | None - data: Any | None - height: float - autoscale_group: str | None - color_group: str | None - resolution: int | None - balance: bool - transform: CoolerTransform - viewpoint: str | None - normalisation: str | None - cmap: str - min_value: float | None - max_value: float | None - plot_title: bool - plot_scale: bool - label_on_track: bool - data_range_style: DataRangeStyle - label_box_enabled: bool - label_box_alpha: float - title_location: Position - title_height: float - title_size: int - title_color: str - title_font: str - title_weight: FontWeight - scale_location: Position - scale_height: float - scale_precision: int - scale_size: int - scale_color: str - scale_font: str - scale_weight: FontWeight - -class CoolerAverageKwargs(TypedDict, total=False): - title: str | None - data: Any | None - height: float - autoscale_group: str | None - color_group: str | None - resolution: int | None - balance: bool - transform: CoolerTransform - cmap: str - min_value: float | None - max_value: float | None - plot_title: bool - plot_scale: bool - label_on_track: bool - data_range_style: DataRangeStyle - label_box_enabled: bool - label_box_alpha: float - title_location: Position - title_height: float - title_size: int - title_color: str - title_font: str - title_weight: FontWeight - scale_location: Position - scale_height: float - scale_precision: int - scale_size: int - scale_color: str - scale_font: str - scale_weight: FontWeight - class QuantnadoCoverageKwargs(TypedDict, total=False): title: str | None data: Any | None diff --git a/plotnado/cli/cli.py b/plotnado/cli/cli.py index 2be71fe..7912318 100644 --- a/plotnado/cli/cli.py +++ b/plotnado/cli/cli.py @@ -1,197 +1,29 @@ """ Plotnado command-line interface. -""" -import json -import pathlib -from enum import Enum +Primary workflow: + 1. plotnado init --output template.yaml + Generates a template from track files using inference heuristics + + 2. Edit template.yaml as needed (optional) + + 3. plotnado plot template.yaml --region chr:start-end [--output out.png] + Renders the template for the specified region + + 4. plotnado validate template.yaml + Validates and explains the template +""" import typer -from typing_extensions import Annotated - -import plotnado as pn - - -class OutputFormat(str, Enum): - """Supported output formats.""" - - png = "png" - svg = "svg" - pdf = "pdf" - jpg = "jpg" - jpeg = "jpeg" - tiff = "tiff" - - -app = typer.Typer(help="Plotnado - Simple genomic track visualization") - - -def _markdown_cell(value: object) -> str: - return str(value).replace("|", "\\|") - - -def _emit_options_table(track_alias: str, options: dict[str, dict], section: str | None) -> None: - sections = [section] if section else ["track", "aesthetics", "label"] - typer.echo(f"\n[{track_alias}]") - for section_name in sections: - section_options = options.get(section_name, {}) - typer.echo(f" {section_name}:") - if not section_options: - typer.echo(" (none)") - continue - for field_name, meta in section_options.items(): - choices = meta.get("choices") or [] - choices_text = ",".join(str(choice) for choice in choices) if choices else "—" - typer.echo( - " " - f"{field_name}: type={meta['type']}, default={meta['default']}, " - f"choices={choices_text}, required={meta['required']}" - ) - - -@app.command("track-options") -def track_options( - track: Annotated[ - str | None, - typer.Argument( - help="Track alias to inspect (e.g. bigwig, genes, axis). Omit to list aliases." - ), - ] = None, - all_tracks: Annotated[ - bool, - typer.Option("--all", help="Show options for all track aliases"), - ] = False, - output_format: Annotated[ - str, - typer.Option("--output-format", "-f", help="Output format: table, markdown, or json"), - ] = "table", - section: Annotated[ - str | None, - typer.Option("--section", help="Optional section filter: track, aesthetics, or label"), - ] = None, -): - """Inspect available kwargs for each track alias. - - Examples: - plotnado track-options - plotnado track-options bigwig - plotnado track-options bigwig --section aesthetics - plotnado track-options bigwig -f markdown - plotnado track-options --all -f json - """ - aliases = pn.GenomicFigure.available_track_aliases() - valid_sections = {"track", "aesthetics", "label"} - valid_formats = {"table", "markdown", "json"} - - if output_format not in valid_formats: - raise typer.BadParameter( - f"--output-format must be one of: {', '.join(sorted(valid_formats))}" - ) - if section is not None and section not in valid_sections: - raise typer.BadParameter(f"--section must be one of: {', '.join(sorted(valid_sections))}") - if track and all_tracks: - raise typer.BadParameter("Use either a track alias argument or --all, not both") - - if track is None and not all_tracks: - typer.echo("Available track aliases:") - for alias, class_name in sorted(aliases.items()): - typer.echo(f" {alias:18} -> {class_name}") - typer.echo("\nUse `plotnado track-options ` for full option details.") - return - - requested_aliases = [track] if track else sorted(aliases.keys()) - normalized_aliases = [alias.lower() for alias in requested_aliases] - unknown_aliases = [alias for alias in normalized_aliases if alias not in aliases] - if unknown_aliases: - raise typer.BadParameter( - f"Unknown alias(es): {', '.join(unknown_aliases)}. " - f"Available: {', '.join(sorted(aliases.keys()))}" - ) - - if output_format == "json": - payload = { - alias: pn.GenomicFigure.track_options(alias) - for alias in normalized_aliases - } - if section: - payload = { - alias: {section: data.get(section, {})} - for alias, data in payload.items() - } - typer.echo(json.dumps(payload, indent=2)) - return - - if output_format == "markdown": - for index, alias in enumerate(normalized_aliases): - if section: - options = pn.GenomicFigure.track_options(alias) - typer.echo(f"## {alias}\n") - typer.echo(f"### {section.title()} fields\n") - typer.echo("| Name | Type | Default | Choices | Required | Description |") - typer.echo("|---|---|---|---|---|---|") - for field_name, meta in options.get(section, {}).items(): - choices = meta.get("choices") or [] - choices_text = _markdown_cell( - ", ".join(str(choice) for choice in choices) if choices else "—" - ) - type_cell = _markdown_cell(meta["type"]) - default_cell = _markdown_cell(meta["default"]) - required_cell = _markdown_cell(meta["required"]) - description_cell = _markdown_cell(meta.get("description") or "—") - typer.echo( - f"| {field_name} | {type_cell} | {default_cell} | {choices_text} | {required_cell} | {description_cell} |" - ) - else: - typer.echo(pn.GenomicFigure.track_options_markdown(alias)) - if index != len(normalized_aliases) - 1: - typer.echo("\n") - return - - for alias in normalized_aliases: - _emit_options_table(alias, pn.GenomicFigure.track_options(alias), section) - - -@app.command() -def plot( - coordinates: Annotated[ - str, - typer.Argument(help="Coordinates to plot in format: CHR:START-END"), - ], - output: Annotated[ - pathlib.Path | None, - typer.Option("--output", "-o", help="Output file path"), - ] = None, - output_format: Annotated[ - OutputFormat, - typer.Option("--format", "-f", help="Output format"), - ] = OutputFormat.png, - width: Annotated[ - float, - typer.Option("--width", "-w", help="Figure width in inches"), - ] = 12.0, - dpi: Annotated[ - int, - typer.Option("--dpi", help="Resolution in dots per inch"), - ] = 600, -): - """ - Create a simple genome browser plot. - - Example: - plotnado chr1:1000000-2000000 -o output.png - """ - # Create a basic figure with scale and genes - figure = pn.GenomicFigure(width=width) - figure.add_track("scalebar") - figure.add_track("genes", genome="hg38") - # Determine output path - if output is None: - output = pathlib.Path(f"{coordinates.replace(':', '_')}.{output_format.value}") +# Initialize the main CLI app +app = typer.Typer( + help="PlotNado - Heuristic templates for genomic track visualization", + no_args_is_help=True, +) - # Save the plot - figure.save(output, coordinates, dpi=dpi) - typer.echo(f"Saved plot to {output}") +# Import commands - this registers them with the app +from . import init, plot, validate # noqa: F401 def main(): diff --git a/plotnado/cli/grouping.py b/plotnado/cli/grouping.py new file mode 100644 index 0000000..1242654 --- /dev/null +++ b/plotnado/cli/grouping.py @@ -0,0 +1,277 @@ +""" +Grouping strategies for organizing tracks into shared scaling/coloring groups. + +Supports both predefined strategies (seqnado-aware) and custom regex patterns. +""" + +from abc import ABC, abstractmethod +from dataclasses import dataclass +from pathlib import Path +from typing import Optional +import re + +from plotnado.cli.inference import SeqnadoPattern + + +@dataclass +class GroupingResult: + """Result of applying a grouping strategy.""" + + groups: dict[str, list[int]] # group_name -> list of track indices + strategy_name: str # Name of the strategy used + explanation: str # Human-readable explanation of the grouping + + +class GroupingStrategy(ABC): + """Abstract base class for grouping strategies.""" + + @abstractmethod + def apply(self, paths: list[str]) -> Optional[GroupingResult]: + """ + Apply the strategy to a list of file paths. + + Returns: + GroupingResult if strategy applies, None if not applicable + """ + pass + + +class HistoneMarkStrategy(GroupingStrategy): + """Group by histone mark (H3K*, H4K*, etc.) detected in filenames.""" + + # Common histone marks pattern: H3K4me3, H3K27ac, H4K20me1, H2AK119ub, etc. + HISTONE_MARK_PATTERN = re.compile( + r'(H[0-9]K[0-9]+(?:me|ac|ph|ub)[0-9]*)', + re.IGNORECASE + ) + + def apply(self, paths: list[str]) -> Optional[GroupingResult]: + """Group files by histone mark if detected in filenames.""" + marks: dict[str, list[int]] = {} + + for i, path in enumerate(paths): + filename = Path(path).name + match = self.HISTONE_MARK_PATTERN.search(filename) + if match: + mark = match.group(1) # e.g., "H3K4me3" + if mark not in marks: + marks[mark] = [] + marks[mark].append(i) + + # Only return if we found marks in all files + if len(marks) == 0 or sum(len(v) for v in marks.values()) != len(paths): + return None + + # Only group marks that appear multiple times + multi_mark_groups = {k: v for k, v in marks.items() if len(v) > 1} + single_mark_groups = {k: v for k, v in marks.items() if len(v) == 1} + + # Combine: multi-track groups first, then single tracks + all_groups = {**multi_mark_groups, **single_mark_groups} + + if not all_groups: + return None + + return GroupingResult( + groups={f"{name.lower()}_group": indices for name, indices in all_groups.items()}, + strategy_name="histone-mark", + explanation=f"Grouped by histone marks: {', '.join(sorted(all_groups.keys()))}", + ) + + +class SeqnadoSampleStrategy(GroupingStrategy): + """Group by sample name (seqnado SAMPLE_ANTIBODY.bw pattern).""" + + def apply(self, paths: list[str]) -> Optional[GroupingResult]: + """Group seqnado files by sample name.""" + seqnado_files = [SeqnadoPattern.parse(Path(p).name) for p in paths] + all_seqnado = all(s is not None for s in seqnado_files) + + if not all_seqnado: + return None + + samples: dict[str, list[int]] = {} + for i, (sample, antibody) in enumerate(seqnado_files): + if sample not in samples: + samples[sample] = [] + samples[sample].append(i) + + # Only return groups with multiple members + groups = {k: v for k, v in samples.items() if len(v) > 1} + + if not groups: + return None + + return GroupingResult( + groups={f"{name}_autoscale": indices for name, indices in groups.items()}, + strategy_name="seqnado-sample", + explanation="Grouped by sample name (seqnado pattern: SAMPLE_ANTIBODY.bw)", + ) + + +class SeqnadoAntibodyStrategy(GroupingStrategy): + """Group by antibody name (seqnado SAMPLE_ANTIBODY.bw pattern).""" + + def apply(self, paths: list[str]) -> Optional[GroupingResult]: + """Group seqnado files by antibody name.""" + seqnado_files = [SeqnadoPattern.parse(Path(p).name) for p in paths] + all_seqnado = all(s is not None for s in seqnado_files) + + if not all_seqnado: + return None + + antibodies: dict[str, list[int]] = {} + for i, (sample, antibody) in enumerate(seqnado_files): + if antibody not in antibodies: + antibodies[antibody] = [] + antibodies[antibody].append(i) + + # Only return groups with multiple members + groups = {k: v for k, v in antibodies.items() if len(v) > 1} + + if not groups: + return None + + return GroupingResult( + groups={f"{name}_autoscale": indices for name, indices in groups.items()}, + strategy_name="seqnado-antibody", + explanation="Grouped by antibody name (seqnado pattern: SAMPLE_ANTIBODY.bw)", + ) + + +class RegexGroupingStrategy(GroupingStrategy): + """Group files by regex pattern matching.""" + + def __init__(self, pattern: str): + """ + Initialize with a regex pattern. + + The pattern should contain a capturing group for the group name. + Examples: + r'([^_]+)_rep[0-9]+' # Matches: control_rep1, control_rep2 -> groups as "control" + r'(.*?)_[0-9]{8}$' # Matches: sample_20260312 -> group as "sample" + r'([ACGT]+)\\.bw' # Matches: ACGTNN.bw -> group as "ACGTNN" + """ + self.pattern = re.compile(pattern, re.IGNORECASE) + + def apply(self, paths: list[str]) -> Optional[GroupingResult]: + """Group files by regex pattern.""" + # Extract filenames + filenames = [Path(p).name for p in paths] + + # Try to extract group names from filenames + groups: dict[str, list[int]] = {} + matched_any = False + + for i, filename in enumerate(filenames): + match = self.pattern.search(filename) + if match: + matched_any = True + if match.groups(): + # Use the first capturing group as the group name + group_name = match.group(1) + if group_name not in groups: + groups[group_name] = [] + groups[group_name].append(i) + else: + # If no capturing group, use the entire match + group_name = match.group(0) + if group_name not in groups: + groups[group_name] = [] + groups[group_name].append(i) + + if not matched_any: + return None + + # Only return groups with multiple members + groups = {k: v for k, v in groups.items() if len(v) > 1} + + if not groups: + return None + + return GroupingResult( + groups={f"{name}_group": indices for name, indices in groups.items()}, + strategy_name=f"regex: {self.pattern.pattern}", + explanation=f"Grouped by regex pattern: {self.pattern.pattern}", + ) + + +class PredefinedGroupingStrategies: + """Registry of predefined grouping strategies.""" + + STRATEGIES = { + "sample": SeqnadoSampleStrategy(), + "antibody": SeqnadoAntibodyStrategy(), + } + + @classmethod + def get(cls, name: str) -> Optional[GroupingStrategy]: + """Get a predefined strategy by name.""" + return cls.STRATEGIES.get(name) + + @classmethod + def list_names(cls) -> list[str]: + """List all predefined strategy names.""" + return list(cls.STRATEGIES.keys()) + + @classmethod + def parse_group_by(cls, group_by_arg: str) -> Optional[GroupingStrategy]: + """ + Parse a --group-by argument. + + Can be: + - A predefined strategy name (e.g., "sample", "antibody") + - A regex pattern (detected by presence of regex metacharacters) + + Returns: + GroupingStrategy instance or None if invalid + """ + if not group_by_arg: + return None + + # Try predefined strategies first + strategy = cls.get(group_by_arg) + if strategy: + return strategy + + # Try as regex pattern + try: + return RegexGroupingStrategy(group_by_arg) + except re.error as e: + raise ValueError(f"Invalid regex pattern: {e}") + + +def apply_grouping_strategy( + paths: list[str], + strategy: GroupingStrategy, +) -> Optional[GroupingResult]: + """ + Apply a grouping strategy to a list of file paths. + + Returns: + GroupingResult if applicable, None if strategy doesn't apply + """ + return strategy.apply(paths) + + +def detect_and_apply_grouping(paths: list[str]) -> Optional[GroupingResult]: + """ + Automatically detect and apply the best grouping strategy. + + Tries strategies in order of preference (most specific first): + 1. Seqnado sample grouping (SAMPLE_ANTIBODY.bw) + 2. Seqnado antibody grouping + 3. Histone mark grouping (H3K*, H4K*, etc.) + """ + strategies_to_try = [ + SeqnadoSampleStrategy(), + SeqnadoAntibodyStrategy(), + HistoneMarkStrategy(), + ] + + for strategy in strategies_to_try: + result = strategy.apply(paths) + if result: + return result + + return None diff --git a/plotnado/cli/inference.py b/plotnado/cli/inference.py new file mode 100644 index 0000000..9d31ff8 --- /dev/null +++ b/plotnado/cli/inference.py @@ -0,0 +1,441 @@ +""" +Inference engine for track type detection and template generation heuristics. + +This module provides: +- Track type classification from file paths and URLs +- Title inference from filenames +- Grouping heuristics for replicates and shared analysis +- Seqnado pipeline pattern detection (SAMPLE_ANTIBODY.bw) +- Confidence scoring for inference decisions +""" + +import re +from pathlib import Path +from typing import Optional + +from plotnado.tracks.enums import TrackType + + +class SeqnadoPattern: + """Detection and parsing for seqnado pipeline outputs.""" + + # Standard seqnado output: SAMPLE-NAME_ANTIBODY.bigwig (without explicit bw/bigwig in name) + # Examples: control_H3K27ac.bw, sample1_Input.bigwig + # Won't match: THP1H3K4me1_bigWig.bigWig (has bigwig in filename before extension) + PATTERN = re.compile( + r'([A-Za-z0-9\-_]+)_([A-Za-z0-9]+)\.(?:bw|bigwig)$', + re.IGNORECASE + ) + + # File type suffixes to exclude from antibody name + EXCLUDED_ANTIBODIES = {'bigwig', 'bw', 'bigbed', 'bed', 'sorted', 'bam'} + + @staticmethod + def is_seqnado(filename: str) -> bool: + """Check if a filename matches seqnado pattern.""" + match = SeqnadoPattern.PATTERN.match(Path(filename).name) + if not match: + return False + # Reject if antibody looks like a file type suffix + _, antibody = match.groups() + return antibody.lower() not in SeqnadoPattern.EXCLUDED_ANTIBODIES + + @staticmethod + def parse(filename: str) -> Optional[tuple[str, str]]: + """ + Parse a seqnado filename into (sample_name, antibody). + + Returns: + Tuple of (sample_name, antibody) or None if not a seqnado file + """ + match = SeqnadoPattern.PATTERN.search(Path(filename).name) + if not match: + return None + + sample, antibody = match.groups() + + # Reject if antibody is a file type + if antibody.lower() in SeqnadoPattern.EXCLUDED_ANTIBODIES: + return None + + return sample, antibody + + +class TrackClassifier: + """Classifies data sources into track types.""" + + # File extension to track type mappings + EXTENSION_MAP = { + '.bw': TrackType.BIGWIG, + '.bigwig': TrackType.BIGWIG, + '.bedgraph': TrackType.BEDGRAPH, + '.bed': TrackType.BED, + '.bigbed': TrackType.BED, # BigBed is a BED format variant + '.narrowpeak': TrackType.NARROWPEAK, + '.broadpeak': TrackType.NARROWPEAK, # Similar to narrowpeak + '.bedpe': TrackType.LINKS, + '.links': TrackType.LINKS, + } + + # URL pattern mappings + URL_PATTERNS = { + r'\.bw$': TrackType.BIGWIG, + r'\.bigwig$': TrackType.BIGWIG, + r'\.bigbed$': TrackType.BED, # BigBed format + r'\.bed': TrackType.BED, + r'\.narrowpeak': TrackType.NARROWPEAK, + r'\.bedgraph': TrackType.BEDGRAPH, + } + + @staticmethod + def classify(path: str) -> tuple[TrackType, float]: + """ + Classify a data source into a track type. + + Returns: + Tuple of (track_type, confidence) where confidence is 0-1 + """ + # Check for URLs + if path.startswith(('http://', 'https://', 's3://', 'ftp://')): + for pattern, track_type in TrackClassifier.URL_PATTERNS.items(): + if re.search(pattern, path, re.IGNORECASE): + return track_type, 0.9 # High confidence for URL patterns + return TrackType.UNKNOWN, 0.1 + + # Check file extension + path_lower = path.lower() + for ext, track_type in TrackClassifier.EXTENSION_MAP.items(): + if path_lower.endswith(ext): + return track_type, 0.95 # Very high confidence for direct extension match + + # Check Path object extension + try: + p = Path(path) + suffix = p.suffix.lower() + if suffix in TrackClassifier.EXTENSION_MAP: + return TrackClassifier.EXTENSION_MAP[suffix], 0.95 + except (TypeError, ValueError): + pass + + return TrackType.UNKNOWN, 0.0 + + +class TitleInference: + """Infers meaningful titles from file paths and filenames.""" + + @staticmethod + def infer(path: str, track_type: Optional[TrackType] = None) -> tuple[str, bool]: + """ + Infer a title from a file path or URL. + + Args: + path: File path or URL + track_type: Optional track type to add appropriate suffix + + Returns: + Tuple of (title, was_inferred) + """ + try: + # Extract filename from path or URL + if '://' in path: # URL + path = path.split('/')[-1] + + filename = Path(path).stem # Remove extension + + # Check for seqnado pattern first + seqnado = SeqnadoPattern.parse(Path(path).name) + if seqnado: + sample, antibody = seqnado + # Format as "Antibody (Sample)" for clarity + title = f"{antibody} ({sample})" + return title, True + + # Clean up common filename patterns + title = TitleInference._clean_filename(filename) + + # Add track type suffix for clarity + if title and track_type == TrackType.BED: + title = f"{title} peaks" + + if title: + return title, True + return filename or "Unknown", False + except (TypeError, ValueError): + return "Unknown", False + + @staticmethod + def _clean_filename(name: str) -> str: + """ + Clean up common filename patterns while preserving scientific notation. + + Examples: + "THP1H3K4me3_bigBed" → "THP1 H3K4me3" + "control_rep1_Input" → "Control Input" + "sample1_H3K27ac_sorted" → "Sample1 H3K27ac" + """ + # Remove common file type suffixes (bigWig, bigBed, sorted, etc.) + name = re.sub(r'[-_](bigwig|bigbed|sorted|bam|fastq|fq|txt)$', '', name, flags=re.IGNORECASE) + + # Separate camelCase words (e.g., "THP1H3K4me3" → "THP1 H3K4me3") + # Insert space before uppercase letter that follows lowercase + name = re.sub(r'([a-z])([A-Z])', r'\1 \2', name) + + # Insert space before histone marks (H3K, H4K, etc.) + name = re.sub(r'([A-Za-z0-9])([H][0-9])', r'\1 \2', name) + + # Remove leading numbers and common control keywords + name = re.sub(r'^[0-9]+[-_.]', '', name) + name = re.sub(r'\b(rep|replicate|sample|input|control)[-_]?', '', name, flags=re.IGNORECASE) + + # Replace remaining underscores and hyphens with spaces + name = re.sub(r'[_-]+', ' ', name) + + # Clean up multiple spaces + name = re.sub(r'\s+', ' ', name) + + # Capitalize first letter of each word, but preserve existing capitals + # (e.g., "H3K4me3" stays, "control" → "Control") + words = name.split() + capitalized = [] + for word in words: + # If word starts with capital, keep original case + if word and word[0].isupper(): + capitalized.append(word) + else: + # Otherwise capitalize first letter + capitalized.append(word.capitalize() if word else '') + + name = ' '.join(capitalized) + + return name.strip() + + +class GroupingHeuristic: + """Infers grouping for shared autoscaling and coloring.""" + + @staticmethod + def group_by_patterns(paths: list[str]) -> dict[str, list[int]]: + """ + Group track indices by filename patterns. + + Looks for seqnado patterns first, then common prefixes, tokens, and replication patterns. + Returns mapping of group_id -> list of track indices. + """ + if not paths: + return {} + + # Check if all are seqnado files + seqnado_files = [SeqnadoPattern.parse(Path(p).name) for p in paths] + all_seqnado = all(s is not None for s in seqnado_files) + + groups: dict[str, list[int]] = {} + + if all_seqnado: + # Group by sample name (each antibody for the same sample shares scaling) + samples: dict[str, list[int]] = {} + for i, (sample, antibody) in enumerate(seqnado_files): + if sample not in samples: + samples[sample] = [] + samples[sample].append(i) + + # Create groups for samples with multiple antibodies + for sample, indices in samples.items(): + if len(indices) > 1: + group_id = f"{sample}_autoscale" + groups[group_id] = sorted(indices) + + return groups + + # Fall back to generic pattern matching + stems = [Path(p).stem.lower() for p in paths] + + # Look for common patterns + for i, stem1 in enumerate(stems): + # Check if stem appears to be a replicate + matches = [] + for j, stem2 in enumerate(stems): + if i != j and GroupingHeuristic._are_replicates(stem1, stem2): + matches.append(j) + + if matches: + # Found replicates - create a group + group_id = GroupingHeuristic._extract_group_id(stem1) + if group_id: + group_members = sorted([i] + matches) + group_key = f"{group_id}_group" + if group_key not in groups: + groups[group_key] = group_members + + return groups + + @staticmethod + def group_by_sample(paths: list[str]) -> Optional[dict[str, list[int]]]: + """ + Group by sample name (seqnado pattern only). + + Returns mapping of sample_name -> list of track indices, + or None if files don't match seqnado pattern. + """ + seqnado_files = [SeqnadoPattern.parse(Path(p).name) for p in paths] + all_seqnado = all(s is not None for s in seqnado_files) + + if not all_seqnado: + return None + + samples: dict[str, list[int]] = {} + for i, (sample, antibody) in enumerate(seqnado_files): + if sample not in samples: + samples[sample] = [] + samples[sample].append(i) + + # Only return groups with multiple members (worth grouping) + return {k: v for k, v in samples.items() if len(v) > 1} + + @staticmethod + def group_by_antibody(paths: list[str]) -> Optional[dict[str, list[int]]]: + """ + Group by antibody (seqnado pattern only). + + Returns mapping of antibody_name -> list of track indices, + or None if files don't match seqnado pattern. + """ + seqnado_files = [SeqnadoPattern.parse(Path(p).name) for p in paths] + all_seqnado = all(s is not None for s in seqnado_files) + + if not all_seqnado: + return None + + antibodies: dict[str, list[int]] = {} + for i, (sample, antibody) in enumerate(seqnado_files): + if antibody not in antibodies: + antibodies[antibody] = [] + antibodies[antibody].append(i) + + # Only return groups with multiple members (worth grouping) + return {k: v for k, v in antibodies.items() if len(v) > 1} + + @staticmethod + def _are_replicates(stem1: str, stem2: str) -> bool: + """Check if two stems appear to be replicates.""" + # Remove numeric suffixes (rep1, rep2, r1, r2, _1, _2) + base1 = re.sub(r'[_-]?r(ep)?[_-]?[0-9]+', '', stem1) + base2 = re.sub(r'[_-]?r(ep)?[_-]?[0-9]+', '', stem2) + + # Check if bases are similar enough (at least 70% match) + common_length = sum(1 for a, b in zip(base1, base2) if a == b) + min_length = min(len(base1), len(base2)) + + return min_length > 0 and common_length / min_length >= 0.7 + + @staticmethod + def _extract_group_id(stem: str) -> str: + """Extract group identifier (remove rep/replicate numbers).""" + cleaned = re.sub(r'[_-]?r(ep)?[_-]?[0-9]+', '', stem) + return cleaned or stem + + +# Curated palette for BigWig/coverage tracks — visually distinct, colorblind-friendly +_BIGWIG_PALETTE = [ + "#4ECDC4", # Teal + "#E8A4D0", # Rose + "#95E1D3", # Mint + "#F38181", # Coral + "#A8D8EA", # Sky blue + "#AA96DA", # Lavender + "#FCBAD3", # Pink + "#FFFFD2", # Light yellow +] + +# Fixed color for annotation-style tracks (narrowpeak, bed) +_ANNOTATION_COLOR = "#FF6B6B" + + +def _palette_color_for_group(group_name: str) -> str: + """Deterministically map a group name to a palette color.""" + idx = hash(group_name) % len(_BIGWIG_PALETTE) + return _BIGWIG_PALETTE[idx] + + +class InferenceResult: + """Result of inference operations with confidence and explanations.""" + + def __init__(self): + self.track_type: TrackType = TrackType.UNKNOWN + self.type_confidence: float = 0.0 + self.title: str = "Unknown" + self.title_inferred: bool = False + self.group: Optional[str] = None + self.group_confidence: float = 0.0 + self.suggested_color: Optional[str] = None # Suggested default color + self.notes: list[str] = [] # Reasoning notes + self.issues: list[str] = [] # Warnings or ambiguities + + # Seqnado-specific + self.is_seqnado: bool = False + self.seqnado_sample: Optional[str] = None + self.seqnado_antibody: Optional[str] = None + + def overall_confidence(self) -> float: + """Average confidence across all inferences.""" + confidences = [self.type_confidence] + if self.group: + confidences.append(self.group_confidence) + return sum(confidences) / len(confidences) if confidences else 0.0 + + +def infer_track(path: str, known_group: Optional[str] = None) -> InferenceResult: + """ + Run full inference on a single track. + + Args: + path: File path or URL + known_group: Pre-determined group (overrides heuristic grouping) + + Returns: + InferenceResult with type, title, group, and confidence + """ + result = InferenceResult() + + # Check for seqnado pattern + seqnado = SeqnadoPattern.parse(Path(path).name) + if seqnado: + result.is_seqnado = True + result.seqnado_sample, result.seqnado_antibody = seqnado + result.notes.append( + f"Seqnado pattern detected: sample='{result.seqnado_sample}', " + f"antibody='{result.seqnado_antibody}'" + ) + + # Classify track type + track_type, type_conf = TrackClassifier.classify(path) + result.track_type = track_type + result.type_confidence = type_conf + + if type_conf < 0.5: + result.issues.append(f"Could not confidently determine track type for {path}") + result.notes.append(f"Using default type: {track_type.value}") + else: + result.notes.append(f"Detected track type: {track_type.value} ({type_conf:.0%} confidence)") + + # Infer title + title, was_inferred = TitleInference.infer(path, track_type) + result.title = title + result.title_inferred = was_inferred + result.notes.append(f"Title: {title}" + (" (inferred)" if was_inferred else " (explicit)")) + + # Handle grouping + if known_group: + result.group = known_group + result.group_confidence = 1.0 + result.notes.append(f"Assigned to group: {known_group}") + + # Assign suggested color + if result.track_type in (TrackType.BIGWIG, TrackType.BEDGRAPH): + # Use antibody name for seqnado files (consistent color per antibody across samples) + color_key = result.seqnado_antibody or result.group or path + result.suggested_color = _palette_color_for_group(color_key) + elif result.track_type in (TrackType.NARROWPEAK, TrackType.BED, TrackType.ANNOTATION): + result.suggested_color = _ANNOTATION_COLOR + # UNKNOWN, GENE, LINKS, OVERLAY: no color suggestion + + return result diff --git a/plotnado/cli/init.py b/plotnado/cli/init.py new file mode 100644 index 0000000..97d591d --- /dev/null +++ b/plotnado/cli/init.py @@ -0,0 +1,352 @@ +""" +CLI command: plotnado init + +Generates a template from track files using inference heuristics. +""" + +from pathlib import Path +from typing import Optional + +import typer +from typing_extensions import Annotated +from rich.console import Console +from rich.table import Table + +from plotnado.template import Template, TrackSpec, GuideSpec, GroupSpec +from plotnado.tracks.enums import TrackType +from plotnado.cli.inference import infer_track +from plotnado.cli.grouping import ( + PredefinedGroupingStrategies, + apply_grouping_strategy, + detect_and_apply_grouping, +) + +from . import cli + +console = Console() + +# Track type ordering for generated templates: signal first, then peaks, then annotations +_TYPE_ORDER: dict[str, int] = { + TrackType.BIGWIG.value: 0, + TrackType.BEDGRAPH.value: 1, + TrackType.NARROWPEAK.value: 2, + TrackType.BED.value: 3, + TrackType.ANNOTATION.value: 4, + TrackType.LINKS.value: 5, + TrackType.GENE.value: 6, + TrackType.OVERLAY.value: 7, + TrackType.UNKNOWN.value: 8, +} + + +def _sort_tracks(tracks: list[TrackSpec]) -> list[TrackSpec]: + """Sort tracks: BigWig/Bedgraph → NarrowPeak → BED → Links → Unknown.""" + return sorted(tracks, key=lambda t: _TYPE_ORDER.get(str(t.type), 8)) + + +def _show_preview_table(tracks: list[TrackSpec]) -> None: + """Show a rich table preview of inferred tracks.""" + console.print(f"\n[bold cyan]Inferred {len(tracks)} track(s) — preview:[/bold cyan]\n") + table = Table(show_header=True, header_style="bold") + table.add_column("#", style="dim", width=3) + table.add_column("Title") + table.add_column("Type", width=12) + table.add_column("Group") + table.add_column("Color", width=9) + table.add_column("File") + + for i, track in enumerate(tracks, 1): + color_str = track.color or "—" + group_str = track.group or "—" + file_str = Path(track.path).name if track.path else "—" + table.add_row( + str(i), + track.title or "—", + str(track.type), + group_str, + color_str, + file_str, + ) + console.print(table) + + +@cli.app.command("init") +def init_command( + tracks: Annotated[ + list[str], + typer.Argument(help="Path or URL to track files (BigWig, BED, etc.)"), + ], + output: Annotated[ + str, + typer.Option( + "--output", + "-o", + help="Output YAML template file path", + ), + ] = "template.yaml", + genome: Annotated[ + Optional[str], + typer.Option( + "--genome", + "-g", + help="Default genome (e.g., hg38, mm10)", + ), + ] = None, + group_by: Annotated[ + Optional[str], + typer.Option( + "--group-by", + help=( + "Grouping strategy: predefined name (sample, antibody) or regex pattern. " + "Examples: --group-by sample, --group-by '([^_]+)_rep[0-9]'" + ), + ), + ] = None, + auto: Annotated[ + bool, + typer.Option( + "--auto", + help="Generate template automatically without prompting", + ), + ] = False, + no_genes: Annotated[ + bool, + typer.Option( + "--no-genes", + help="Do not include gene track by default", + ), + ] = False, +): + """ + Generate a template from track files using inference heuristics. + + The init command analyzes your track files, infers track types and grouping, + then generates an editable YAML template for rendering plots. + + Supports flexible grouping strategies: + - Predefined: sample, antibody (for seqnado SAMPLE_ANTIBODY.bw patterns) + - Regex: custom patterns like '([^_]+)_rep[0-9]' to group by filename prefix + + Examples: + plotnado init sample1.bw sample2.bw peaks.bed + plotnado init sample1_H3K27ac.bw sample1_H3K4me3.bw sample2_H3K27ac.bw \\ + --group-by sample + plotnado init control_r1.bw control_r2.bw treat_r1.bw treat_r2.bw \\ + --group-by '([^_]+)_r[0-9]' + plotnado init --auto *.bw + """ + + if not tracks: + console.print("[red]Error: At least one track file is required[/red]") + raise typer.Exit(code=1) + + console.print(f"\n[bold]PlotNado Template Generator[/bold]") + console.print(f"Analyzing {len(tracks)} track file(s)...\n") + + # Run inference on all tracks + template = Template() + template.genome = genome + template.guides = GuideSpec( + genes=not no_genes, + axis=True, + scalebar=True, + ) + + inferences = [] + for track_path in tracks: + result = infer_track(track_path) + inferences.append((track_path, result)) + + track_spec = TrackSpec( + path=track_path, + type=result.track_type, + title=result.title, + color=result.suggested_color, + ) + template.tracks.append(track_spec) + + # Detect seqnado pattern + seqnado_results = [infer.is_seqnado for _, infer in inferences] + all_seqnado = all(seqnado_results) + + # Handle grouping + grouping_result = None + + if group_by: + # User provided explicit grouping strategy + try: + strategy = PredefinedGroupingStrategies.parse_group_by(group_by) + if strategy: + grouping_result = apply_grouping_strategy(tracks, strategy) + if not grouping_result: + console.print( + f"[yellow]⚠ Grouping strategy '{group_by}' " + f"did not match any tracks[/yellow]" + ) + else: + console.print(f"[red]Error: Unknown grouping strategy '{group_by}'[/red]") + raise typer.Exit(code=1) + except ValueError as e: + console.print(f"[red]Error: {e}[/red]") + raise typer.Exit(code=1) + + elif not auto: + # Interactive mode + if all_seqnado: + # Seqnado files detected - offer seqnado strategies + console.print("[bold cyan]Seqnado Pipeline Detected[/bold cyan]") + + samples = set() + antibodies = set() + for _, infer in inferences: + if infer.is_seqnado: + samples.add(infer.seqnado_sample) + antibodies.add(infer.seqnado_antibody) + + console.print(f" Samples: {', '.join(sorted(samples))}") + console.print(f" Antibodies: {', '.join(sorted(antibodies))}") + + console.print("\n[bold]How would you like to group tracks?[/bold]") + console.print(" 1. by sample (each antibody for same sample shares scaling)") + console.print(" 2. by antibody (each sample for same antibody shares scaling)") + console.print(" 3. no grouping") + console.print(" 4. custom regex pattern") + + choice = typer.prompt("Select option", type=int, default=1) + + if choice == 1: + strategy = PredefinedGroupingStrategies.get("sample") + grouping_result = apply_grouping_strategy(tracks, strategy) + elif choice == 2: + strategy = PredefinedGroupingStrategies.get("antibody") + grouping_result = apply_grouping_strategy(tracks, strategy) + elif choice == 4: + pattern = typer.prompt( + "Enter regex pattern (e.g., '([^_]+)_rep[0-9]')" + ) + try: + strategy = PredefinedGroupingStrategies.parse_group_by(pattern) + grouping_result = apply_grouping_strategy(tracks, strategy) + except ValueError as e: + console.print(f"[red]Error: {e}[/red]") + raise typer.Exit(code=1) + + else: + # Non-seqnado interactive wizard + # 1. Genome prompt + if not genome: + genome_input = typer.prompt( + "Genome assembly", + default="hg38", + prompt_suffix=" [hg38/mm10/none]: ", + show_default=False, + ) + if genome_input and genome_input.lower() != "none": + template.genome = genome_input + + # 2. Gene track prompt (only if genome set) + if template.genome and not no_genes: + include_genes = typer.confirm( + "Include gene annotation track?", default=True + ) + template.guides.genes = include_genes + else: + template.guides.genes = False + + # 3. Grouping + auto_result = detect_and_apply_grouping(tracks) + if auto_result: + console.print(f"\n[bold cyan]Detected grouping:[/bold cyan] {auto_result.explanation}") + apply = typer.confirm("Apply this grouping?", default=True) + if apply: + grouping_result = auto_result + else: + apply_manual = typer.confirm( + "\nGroup any tracks together for shared autoscaling?", default=False + ) + if apply_manual: + # Show numbered list + console.print("\nTracks:") + for i, track in enumerate(template.tracks, 1): + console.print(f" {i}. {track.title} ({track.type.value})") + + # Collect groups interactively + manual_groups: dict[str, list[int]] = {} + while True: + indices_str = typer.prompt( + "Enter track numbers to group (e.g. 1,3) or leave empty to finish", + default="", + ) + if not indices_str.strip(): + break + try: + indices = [int(x.strip()) - 1 for x in indices_str.split(",")] + if any(i < 0 or i >= len(template.tracks) for i in indices): + console.print("[yellow]⚠ Invalid track numbers, try again[/yellow]") + continue + group_name = typer.prompt("Group name", default=f"group{len(manual_groups) + 1}") + manual_groups[group_name] = indices + except ValueError: + console.print("[yellow]⚠ Enter comma-separated numbers[/yellow]") + + if manual_groups: + from plotnado.cli.grouping import GroupingResult + grouping_result = GroupingResult( + groups=manual_groups, + strategy_name="manual", + explanation=f"Manually grouped {len(manual_groups)} group(s)", + ) + + # 4. BigWig style + bw_tracks = [t for t in template.tracks if str(t.type) in (TrackType.BIGWIG.value, TrackType.BEDGRAPH.value)] + if bw_tracks: + style_input = typer.prompt( + "BigWig display style", + default="fill", + prompt_suffix=" [fill/line]: ", + show_default=False, + ) + if style_input in ("fill", "line"): + for track in bw_tracks: + track.style = style_input + + else: + # Auto mode: auto-detect the best grouping + grouping_result = detect_and_apply_grouping(tracks) + + # Apply grouping result to template + if grouping_result: + console.print(f"\n[green]✓ {grouping_result.explanation}[/green]") + + for group_name, indices in grouping_result.groups.items(): + group_spec = GroupSpec( + name=group_name, + tracks=[template.tracks[i].title for i in indices], + autoscale=True, + autocolor=True, + ) + template.groups.append(group_spec) + + # Set group on individual tracks + for idx in indices: + template.tracks[idx].group = group_name + + for group in template.groups: + console.print(f" {group.name}: {', '.join(str(t) for t in group.tracks)}") + + # Sort tracks: signal first, then peaks, then annotations + template.tracks = _sort_tracks(template.tracks) + + # Show preview table + _show_preview_table(template.tracks) + + # Save template with annotated header + output_path = Path(output) + args_str = " ".join(Path(t).name for t in tracks) + template.save(output_path, header_args=args_str) + + console.print(f"[green]✓ Template saved to:[/green] [bold]{output_path}[/bold]\n") + console.print("[bold]Next steps:[/bold]") + console.print(f" 1. Review: cat {output_path}") + console.print(" 2. Edit as needed (optional)") + console.print(f" 3. Plot: plotnado plot {output_path} --region chr1:1000-2000") diff --git a/plotnado/cli/plot.py b/plotnado/cli/plot.py new file mode 100644 index 0000000..7ecc878 --- /dev/null +++ b/plotnado/cli/plot.py @@ -0,0 +1,230 @@ +""" +CLI command: plotnado plot + +Renders a template for specified genomic regions. +""" + +from pathlib import Path +from typing import Optional +import json +import importlib.resources +import pandas as pd + +import typer +from typing_extensions import Annotated +from rich.console import Console + +import plotnado as pn +from plotnado.template import Template +from plotnado.tracks import GenomicRegion +from plotnado.render import TemplateCompiler + +from . import cli + +console = Console() + + +def resolve_gene_region(gene_name: str, genome: Optional[str] = None) -> GenomicRegion: + """ + Resolve a gene name to a genomic region. + + Args: + gene_name: Gene symbol to look up (e.g., 'GNAQ') + genome: Optional genome assembly (e.g., 'hg38', 'mm10') + + Returns: + GenomicRegion corresponding to the gene + + Raises: + ValueError: If gene not found or genome not available + """ + if not genome: + raise ValueError("Cannot resolve gene name without genome specification. Ensure template has genome defined.") + + # Load gene annotations from bundled data + try: + bed_prefix = importlib.resources.files("plotnado.data.gene_bed_files") + with open(bed_prefix / "genes.json") as handle: + mapping = json.load(handle) + + if genome not in mapping: + raise ValueError(f"Gene annotations not available for genome '{genome}'") + + gene_file = bed_prefix / mapping[genome] + genes_df = pd.read_csv(gene_file, sep="\t", header=None) + except Exception as e: + raise ValueError(f"Failed to load gene annotations: {e}") + + # Parse BED format (chrom, start, end, name, ...) + genes_df.columns = [ + "chrom", + "start", + "end", + "name", + *[f"field_{i}" for i in range(max(0, genes_df.shape[1] - 4))], + ] + + # Match gene name (case-insensitive) + match = genes_df.loc[genes_df["name"].astype(str).str.upper() == gene_name.upper()] + + if match.empty: + raise ValueError(f"Gene '{gene_name}' not found in {genome} annotations") + + row = match.iloc[0] + return GenomicRegion( + chromosome=row["chrom"], + start=int(row["start"]), + end=int(row["end"]), + ) + + +@cli.app.command("plot") +def plot_command( + template_file: Annotated[ + str, + typer.Argument(help="Path to YAML template file"), + ], + region: Annotated[ + list[str], + typer.Option( + "--region", + "-r", + help="Genomic region to plot (chr:start-end or gene name). Repeat for multiple regions.", + ), + ], + output: Annotated[ + Optional[str], + typer.Option( + "--output", + "-o", + help="Output image file path. Only valid with a single region.", + ), + ] = None, + format: Annotated[ + str, + typer.Option( + "--format", + "-f", + help="Output format (png, pdf, svg, jpg)", + ), + ] = "png", + width: Annotated[ + Optional[float], + typer.Option( + "--width", + "-w", + help="Figure width in inches (overrides template width)", + ), + ] = None, + dpi: Annotated[ + int, + typer.Option( + "--dpi", + help="Resolution (dots per inch)", + ), + ] = 600, +): + """ + Render a template for one or more genomic regions. + + The plot command loads a template (created by 'plotnado init'), + applies it to the specified region(s), and saves the resulting plot(s). + + Supports both genomic coordinates and gene names for the region parameter. + + Examples: + plotnado plot template.yaml --region chr1:1000-2000 + plotnado plot template.yaml --region GNAQ + plotnado plot template.yaml -r chr1:1M-2M -r chr2:5M-6M + plotnado plot template.yaml -r chr1:1,000,000-2,000,000 -o plot.pdf + plotnado plot template.yaml --region chr1:start-end --format svg --width 15 + """ + + if not region: + console.print("[red]Error: At least one --region is required[/red]") + raise typer.Exit(code=1) + + if output and len(region) > 1: + console.print("[red]Error: --output can only be used with a single --region[/red]") + raise typer.Exit(code=1) + + # Load template + try: + template = Template.load(template_file) + console.print(f"[green]✓ Loaded template:[/green] {template_file}") + except FileNotFoundError: + console.print(f"[red]Error: Template file not found:[/red] {template_file}") + raise typer.Exit(code=1) + except Exception as e: + console.print(f"[red]Error loading template:[/red] {e}") + raise typer.Exit(code=1) + + # Compile template once (region-independent) + try: + plan = TemplateCompiler.compile(template) + console.print(f"[cyan]Compiled render plan:[/cyan] {len(plan.tracks)} tracks") + except Exception as e: + console.print(f"[red]Error compiling template:[/red] {e}") + raise typer.Exit(code=1) + + for region_str in region: + # Parse region - try gene name first if it doesn't look like a genomic coordinate + gr = None + + if ":" not in region_str: + try: + gr = resolve_gene_region(region_str, genome=template.genome) + console.print(f"[cyan]Resolved gene:[/cyan] {region_str} → {gr}") + except ValueError as e: + console.print(f"[yellow]Could not resolve as gene name:[/yellow] {e}") + console.print("[yellow]Attempting to parse as genomic region...[/yellow]") + + if gr is None: + try: + gr = GenomicRegion.from_str(region_str) + console.print(f"[cyan]Region:[/cyan] {gr}") + except Exception as e: + console.print(f"[red]Error parsing region '{region_str}':[/red] {e}") + console.print("Expected format: chr:start-end (e.g., chr1:1000-2000) or gene name (e.g., GNAQ)") + raise typer.Exit(code=1) + + # Create figure from render plan + try: + fig = pn.GenomicFigure(width=width if width is not None else plan.width, track_height=plan.track_height) + + if plan.add_scalebar: + fig.scalebar() + if plan.add_axis: + fig.axis() + if plan.add_genes and plan.genome: + fig.genes(plan.genome) + + for resolved_track in plan.tracks: + kwargs = resolved_track.to_figure_kwargs() + data = resolved_track.get_data() + track_type = str(resolved_track.track_spec.type) + if data: + fig.add_track( + track_type, + **{resolved_track.source_kwarg_name(): data}, + **kwargs, + ) + else: + fig.add_track(track_type, **kwargs) + except Exception as e: + console.print(f"[red]Error creating figure for {gr}:[/red] {e}") + raise typer.Exit(code=1) + + # Determine output path + if output: + out_path = Path(output) + else: + safe_region = str(gr).replace(":", "_").replace("-", "_").replace("(", "_").replace(")", "_") + out_path = Path(f"{Path(template_file).stem}_{safe_region}.{format}") + + try: + fig.save(out_path, region=str(gr), dpi=dpi) + console.print(f"[green]✓ Saved plot:[/green] [bold]{out_path}[/bold]") + except Exception as e: + console.print(f"[red]Error saving figure:[/red] {e}") + raise typer.Exit(code=1) diff --git a/plotnado/cli/validate.py b/plotnado/cli/validate.py new file mode 100644 index 0000000..6f56f95 --- /dev/null +++ b/plotnado/cli/validate.py @@ -0,0 +1,127 @@ +""" +CLI command: plotnado validate + +Validates and explains a template. +""" + +from pathlib import Path + +import typer +from typing_extensions import Annotated +from rich.console import Console +from rich.table import Table + +from plotnado.template import Template +from plotnado.render import TemplateCompiler + +from . import cli + +console = Console() + + +@cli.app.command("validate") +def validate_command( + template_file: Annotated[ + str, + typer.Argument(help="Path to YAML template file"), + ], +): + """ + Validate a template and show its configuration. + + Checks for missing files, group reference errors, and other issues + before you attempt to plot. + + Examples: + plotnado validate template.yaml + """ + + # Load template + try: + template = Template.load(template_file) + console.print(f"[green]✓ Template loaded:[/green] {template_file}\n") + except FileNotFoundError: + console.print(f"[red]Error: Template file not found:[/red] {template_file}") + raise typer.Exit(code=1) + except Exception as e: + console.print(f"[red]Error loading template:[/red] {e}") + raise typer.Exit(code=1) + + # Show metadata + console.print("[bold cyan]Metadata[/bold cyan]") + if template.genome: + console.print(f" genome: {template.genome}") + console.print(f" width: {template.width} inches") + console.print(f" track_height: {template.track_height}") + + # Show guides + console.print("\n[bold cyan]Guides[/bold cyan]") + console.print(f" genes: {template.guides.genes}") + console.print(f" axis: {template.guides.axis}") + console.print(f" scalebar: {template.guides.scalebar}") + + # Show tracks + console.print(f"\n[bold cyan]Tracks ({len(template.tracks)})[/bold cyan]") + + table = Table(show_header=True, header_style="bold") + table.add_column("Index", style="dim") + table.add_column("Title") + table.add_column("Type") + table.add_column("Group") + table.add_column("File") + + missing_files: list[str] = [] + + for i, track in enumerate(template.tracks, 1): + group_str = track.group or "—" + file_str = Path(track.path).name if track.path else "—" + + # Check file existence for local paths + if track.path and not track.path.startswith(("http://", "https://", "s3://", "ftp://")): + if not Path(track.path).exists(): + missing_files.append(track.path) + file_str = f"[red]{file_str} ✗[/red]" + + table.add_row( + str(i), + track.title or "—", + str(track.type), + group_str, + file_str, + ) + + console.print(table) + + # Show groups + if template.groups: + console.print(f"\n[bold cyan]Groups ({len(template.groups)})[/bold cyan]") + for group in template.groups: + console.print(f" {group.name}") + if group.tracks: + console.print(f" tracks: {group.tracks}") + console.print(f" autoscale: {group.autoscale}") + console.print(f" autocolor: {group.autocolor}") + + # Report missing files + errors = False + if missing_files: + errors = True + console.print(f"\n[red]✗ {len(missing_files)} file(s) not found:[/red]") + for f in missing_files: + console.print(f" {f}") + + # Dry-compile to catch group reference errors + try: + TemplateCompiler.compile(template) + console.print("\n[green]✓ Group references resolved successfully[/green]") + except ValueError as e: + errors = True + console.print(f"\n[red]✗ Group reference error:[/red] {e}") + console.print("[dim]Tip: Check that track titles in your groups section match track titles exactly (case-insensitive).[/dim]") + + if errors: + console.print("\n[red]Validation failed — fix the issues above before plotting.[/red]") + raise typer.Exit(code=1) + + console.print("\n[green]✓ Validation complete[/green]") + console.print("Next: plotnado plot