Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 46 additions & 13 deletions src/nemo_runspec/help.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,9 @@

import types
import typing
from dataclasses import dataclass
from pathlib import Path
from typing import TYPE_CHECKING, Any, ClassVar

if TYPE_CHECKING:
from pydantic_settings import BaseSettings
from typing import Any, ClassVar

from rich import box
from rich.panel import Panel
Expand All @@ -42,6 +40,15 @@
except ImportError:
import tomli as tomllib

@dataclass(frozen=True)
class LazyConfigModel:
"""Config model loader used when importing the model has optional deps."""

load: typing.Callable[[], type[Any]]


ConfigModelProvider = type[Any] | LazyConfigModel


def _format_annotation(annotation: Any) -> str:
"""Format a type annotation for display in help text."""
Expand Down Expand Up @@ -121,7 +128,7 @@ class RecipeCommand(TyperCommand):

artifact_overrides: ClassVar[dict[str, str]] = {}
config_dir: ClassVar[str | None] = None
config_model: ClassVar[type[BaseSettings] | None] = None
config_model: ClassVar[ConfigModelProvider | None] = None

def format_help(self, ctx, formatter):
"""Format help with custom recipe options section."""
Expand All @@ -135,6 +142,7 @@ def format_help(self, ctx, formatter):
# Then add our custom panels
console = rich_utils._get_rich_console()
cmd_name = ctx.info_name
config_model = self._resolve_config_model(console)

# Global options table
options_table = Table(
Expand Down Expand Up @@ -175,8 +183,8 @@ def format_help(self, ctx, formatter):
)

# Config options from Pydantic model
if self.config_model is not None:
self._format_config_options(console, cmd_name)
if config_model is not None:
self._format_config_options(console, cmd_name, config_model)

# Artifact overrides (if any defined for this command)
if self.artifact_overrides:
Expand Down Expand Up @@ -241,8 +249,8 @@ def format_help(self, ctx, formatter):

# Dotlist override examples
example_override = "key.path=value"
if self.config_model is not None:
fields = list(self.config_model.model_fields.keys())
if config_model is not None:
fields = list(config_model.model_fields.keys())
if fields:
example_override = f"{fields[0]}=..."

Expand All @@ -268,8 +276,32 @@ def format_help(self, ctx, formatter):
)
)

def _resolve_config_model(self, console) -> type[Any] | None:
"""Resolve an optional lazy config model for command help."""
config_model = type(self).config_model
if config_model is None:
return None
if isinstance(config_model, type):
return config_model
if not isinstance(config_model, LazyConfigModel):
return None
try:
resolved = config_model.load()
except ImportError as exc:
console.print(
Panel(
f"{exc}\n\n[dim]The command can still run locally because the recipe "
"script resolves its PEP 723 dependencies with `uv run --no-project`.[/]",
title="[bold]Config Options Unavailable[/]",
title_align="left",
border_style=rich_utils.STYLE_OPTIONS_PANEL_BORDER,
)
)
return None
type(self).config_model = resolved
return resolved

def _format_config_options(self, console, cmd_name: str) -> None:
def _format_config_options(self, console, cmd_name: str, config_model: type[Any]) -> None:
"""Render config options panel from Pydantic model_fields."""
from pydantic_core import PydanticUndefined

Expand All @@ -285,7 +317,7 @@ def _format_config_options(self, console, cmd_name: str) -> None:
config_table.add_column("Default", no_wrap=True, max_width=35)
config_table.add_column("Description")

for name, field_info in self.config_model.model_fields.items():
for name, field_info in config_model.model_fields.items():
type_str = _format_annotation(field_info.annotation).replace("[", "\\[")

if field_info.default is not PydanticUndefined:
Expand Down Expand Up @@ -317,15 +349,16 @@ def _format_config_options(self, console, cmd_name: str) -> None:
def make_recipe_command(
artifact_overrides: dict[str, str] | None = None,
config_dir: str | None = None,
config_model: type[BaseSettings] | None = None,
config_model: ConfigModelProvider | None = None,
):
"""Factory function to create a RecipeCommand subclass with custom options.

Args:
artifact_overrides: Dict mapping artifact names to descriptions.
Example: {"data": "Data artifact", "model": "Model checkpoint"}
config_dir: Path to config directory (relative to repo root).
config_model: Pydantic BaseSettings subclass for config option introspection.
config_model: Pydantic BaseSettings subclass, or a LazyConfigModel
used for config option introspection.

Returns:
A RecipeCommand subclass with the specified options.
Expand Down
13 changes: 5 additions & 8 deletions src/nemo_runspec/recipe_typer.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,10 @@

from collections.abc import Callable
from dataclasses import dataclass, field
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from pydantic_settings import BaseSettings

import typer

from nemo_runspec.help import make_recipe_command
from nemo_runspec.help import ConfigModelProvider, make_recipe_command


@dataclass(frozen=True)
Expand All @@ -50,6 +46,7 @@ class RecipeMeta:
- default_config: Default config name
- input_artifacts: What data the recipe consumes
- output_artifacts: What the recipe produces
- config_model: Pydantic config class or LazyConfigModel for rich help

Example:
META = RecipeMeta(
Expand All @@ -68,7 +65,7 @@ class RecipeMeta:
default_config: str = "default"
input_artifacts: dict[str, str] = field(default_factory=dict)
output_artifacts: dict[str, str] = field(default_factory=dict)
config_model: type[BaseSettings] | None = None
config_model: ConfigModelProvider | None = None


class RecipeTyper(typer.Typer):
Expand Down Expand Up @@ -97,7 +94,7 @@ def recipe_command(
config_dir: str | None = None,
input_artifacts: dict[str, str] | None = None,
output_artifacts: dict[str, str] | None = None,
config_model: type[BaseSettings] | None = None,
config_model: ConfigModelProvider | None = None,
rich_help_panel: str | None = None,
name: str | None = None,
) -> Callable[[Callable], Callable]:
Expand Down Expand Up @@ -150,7 +147,7 @@ def add_recipe_command(
config_dir: str | None = None,
input_artifacts: dict[str, str] | None = None,
output_artifacts: dict[str, str] | None = None,
config_model: type[BaseSettings] | None = None,
config_model: ConfigModelProvider | None = None,
rich_help_panel: str | None = None,
name: str | None = None,
) -> None:
Expand Down
42 changes: 27 additions & 15 deletions src/nemotron/cli/commands/data/sdg/long_document/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,11 @@
clients) are resolved at runtime from each script's PEP 723 inline
``dependencies`` list.

Pydantic config classes are loaded from the scripts via importlib so the
rich ``--help`` panel can introspect every config field — the scripts cannot
be imported by their normal dotted path because their parent directory uses
a dash and filenames begin with digits.
Pydantic config classes are loaded lazily from the scripts via importlib so
the rich per-stage ``--help`` panel can introspect every config field when
the optional recipe deps are installed — the scripts cannot be imported by
their normal dotted path because their parent directory uses a dash and
filenames begin with digits.

Producer stages (``ocr``, ``text-qa``, ...) optionally accept ``--serve``,
which composes a multi-task ``nemo_run.Experiment``: a serve task brings vLLM
Expand All @@ -43,6 +44,7 @@
import subprocess
import time
from pathlib import Path
from typing import Any

import typer

Expand All @@ -51,9 +53,9 @@
from nemo_runspec.display import display_job_config, display_job_submission
from nemo_runspec.env import parse_env
from nemo_runspec.execution import build_env_vars
from nemo_runspec.help import LazyConfigModel
from nemo_runspec.recipe_config import RecipeConfig, parse_recipe_config
from nemo_runspec.recipe_typer import RecipeMeta

from nemotron.cli.commands.data.sdg.long_document._config_loader import load_config_class
from nemotron.cli.commands.data.sdg.long_document._deployment import (
STAGE_DEFAULT_DEPLOYMENT,
Expand All @@ -69,16 +71,26 @@
# Each stage is described by:
# - SCRIPT_*: repo-relative path to the recipe script (used by CodePackager).
# - SPEC_* : runspec parsed from the script's PEP 723 [tool.runspec] block.
# - *_CFG : Pydantic config class (loaded via importlib for rich --help).
# - *_CFG : Lazy Pydantic config-class loader for rich per-command --help.
# - META_* : RecipeMeta wired into Typer for help rendering.
# --------------------------------------------------------------------------- #

_RECIPES_ROOT = "src/nemotron/recipes/data/sdg/long-document"


def _lazy_config_class(script_path: Path, class_name: str, module_alias: str) -> LazyConfigModel:
"""Defer optional recipe imports until a stage command renders help."""

def _load_config_class() -> type[Any]:
return load_config_class(script_path, class_name, module_alias)

return LazyConfigModel(load=_load_config_class)


# Stage 01: seed -----------------------------------------------------------------
SCRIPT_SEED = f"{_RECIPES_ROOT}/01-seed-dataset-preparation.py"
SPEC_SEED = parse_runspec(SCRIPT_SEED)
SEED_CFG = load_config_class(SPEC_SEED.script_path, "SeedConfig", "_long_doc_seed_module")
SEED_CFG = _lazy_config_class(SPEC_SEED.script_path, "SeedConfig", "_long_doc_seed_module")
META_SEED = RecipeMeta(
name=SPEC_SEED.name,
script_path=SCRIPT_SEED,
Expand All @@ -92,7 +104,7 @@
# Stage 02: ocr -------------------------------------------------------------------
SCRIPT_OCR = f"{_RECIPES_ROOT}/02-nemotron-parse-ocr-sdg.py"
SPEC_OCR = parse_runspec(SCRIPT_OCR)
OCR_CFG = load_config_class(SPEC_OCR.script_path, "OcrConfig", "_long_doc_ocr_module")
OCR_CFG = _lazy_config_class(SPEC_OCR.script_path, "OcrConfig", "_long_doc_ocr_module")
META_OCR = RecipeMeta(
name=SPEC_OCR.name,
script_path=SCRIPT_OCR,
Expand All @@ -106,7 +118,7 @@
# Stage 03: text-qa ---------------------------------------------------------------
SCRIPT_TEXT_QA = f"{_RECIPES_ROOT}/03-text-qa-sdg.py"
SPEC_TEXT_QA = parse_runspec(SCRIPT_TEXT_QA)
TEXT_QA_CFG = load_config_class(SPEC_TEXT_QA.script_path, "TextQAConfig", "_long_doc_text_qa_module")
TEXT_QA_CFG = _lazy_config_class(SPEC_TEXT_QA.script_path, "TextQAConfig", "_long_doc_text_qa_module")
META_TEXT_QA = RecipeMeta(
name=SPEC_TEXT_QA.name,
script_path=SCRIPT_TEXT_QA,
Expand All @@ -120,7 +132,7 @@
# Stage 04: page-classification ---------------------------------------------------
SCRIPT_PAGE_CLASSIFICATION = f"{_RECIPES_ROOT}/04-page-classification-sdg.py"
SPEC_PAGE_CLASSIFICATION = parse_runspec(SCRIPT_PAGE_CLASSIFICATION)
PAGE_CLASSIFICATION_CFG = load_config_class(
PAGE_CLASSIFICATION_CFG = _lazy_config_class(
SPEC_PAGE_CLASSIFICATION.script_path,
"PageClassificationConfig",
"_long_doc_page_classification_module",
Expand All @@ -138,7 +150,7 @@
# Stage 05: visual-qa -------------------------------------------------------------
SCRIPT_VISUAL_QA = f"{_RECIPES_ROOT}/05-visual-qa-sdg.py"
SPEC_VISUAL_QA = parse_runspec(SCRIPT_VISUAL_QA)
VISUAL_QA_CFG = load_config_class(SPEC_VISUAL_QA.script_path, "VisualQAConfig", "_long_doc_visual_qa_module")
VISUAL_QA_CFG = _lazy_config_class(SPEC_VISUAL_QA.script_path, "VisualQAConfig", "_long_doc_visual_qa_module")
META_VISUAL_QA = RecipeMeta(
name=SPEC_VISUAL_QA.name,
script_path=SCRIPT_VISUAL_QA,
Expand All @@ -152,7 +164,7 @@
# Stage 06: single-page-qa --------------------------------------------------------
SCRIPT_SINGLE_PAGE_QA = f"{_RECIPES_ROOT}/06-single-page-qa-sdg.py"
SPEC_SINGLE_PAGE_QA = parse_runspec(SCRIPT_SINGLE_PAGE_QA)
SINGLE_PAGE_QA_CFG = load_config_class(
SINGLE_PAGE_QA_CFG = _lazy_config_class(
SPEC_SINGLE_PAGE_QA.script_path,
"SinglePageQAConfig",
"_long_doc_single_page_qa_module",
Expand All @@ -170,7 +182,7 @@
# Stage 07: windowed-qa -----------------------------------------------------------
SCRIPT_WINDOWED_QA = f"{_RECIPES_ROOT}/07-multi-page-windowed-qa-sdg.py"
SPEC_WINDOWED_QA = parse_runspec(SCRIPT_WINDOWED_QA)
WINDOWED_QA_CFG = load_config_class(
WINDOWED_QA_CFG = _lazy_config_class(
SPEC_WINDOWED_QA.script_path,
"WindowedQAConfig",
"_long_doc_windowed_qa_module",
Expand All @@ -188,7 +200,7 @@
# Stage 08: whole-document-qa -----------------------------------------------------
SCRIPT_WHOLE_DOCUMENT_QA = f"{_RECIPES_ROOT}/08-whole-document-qa-sdg.py"
SPEC_WHOLE_DOCUMENT_QA = parse_runspec(SCRIPT_WHOLE_DOCUMENT_QA)
WHOLE_DOCUMENT_QA_CFG = load_config_class(
WHOLE_DOCUMENT_QA_CFG = _lazy_config_class(
SPEC_WHOLE_DOCUMENT_QA.script_path,
"WholeDocumentQAConfig",
"_long_doc_whole_document_qa_module",
Expand All @@ -206,7 +218,7 @@
# Stage 09: judge -----------------------------------------------------------------
SCRIPT_JUDGE = f"{_RECIPES_ROOT}/09-frontier-judge-sdg.py"
SPEC_JUDGE = parse_runspec(SCRIPT_JUDGE)
JUDGE_CFG = load_config_class(SPEC_JUDGE.script_path, "JudgeConfig", "_long_doc_judge_module")
JUDGE_CFG = _lazy_config_class(SPEC_JUDGE.script_path, "JudgeConfig", "_long_doc_judge_module")
META_JUDGE = RecipeMeta(
name=SPEC_JUDGE.name,
script_path=SCRIPT_JUDGE,
Expand Down
29 changes: 29 additions & 0 deletions tests/recipes/test_data_sdg_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""Regression tests for the data SDG CLI import surface."""

from __future__ import annotations

from typer.testing import CliRunner

from nemotron.cli.bin.nemotron import app

runner = CliRunner()


def test_root_help_succeeds_without_data_sdg_extra():
result = runner.invoke(app, ["--help"])
assert result.exit_code == 0
assert "data" in result.output


def test_long_document_group_help_succeeds_without_data_sdg_extra():
result = runner.invoke(app, ["data", "sdg", "long-document", "--help"])
assert result.exit_code == 0
assert "ocr" in result.output
assert "text-qa" in result.output


def test_long_document_stage_help_succeeds_without_data_sdg_extra():
result = runner.invoke(app, ["data", "sdg", "long-document", "ocr", "--help"])
assert result.exit_code == 0
assert "Run Nemotron-Parse OCR" in result.output
assert "Global Options" in result.output
Loading