diff --git a/src/nemo_runspec/help.py b/src/nemo_runspec/help.py index 0ba8ae711..c394d0931 100644 --- a/src/nemo_runspec/help.py +++ b/src/nemo_runspec/help.py @@ -25,11 +25,9 @@ import types import typing +from dataclasses import dataclass from pathlib import Path -from typing import TYPE_CHECKING, Any, ClassVar - -if TYPE_CHECKING: - from pydantic_settings import BaseSettings +from typing import Any, ClassVar from rich import box from rich.panel import Panel @@ -42,6 +40,15 @@ except ImportError: import tomli as tomllib +@dataclass(frozen=True) +class LazyConfigModel: + """Config model loader used when importing the model has optional deps.""" + + load: typing.Callable[[], type[Any]] + + +ConfigModelProvider = type[Any] | LazyConfigModel + def _format_annotation(annotation: Any) -> str: """Format a type annotation for display in help text.""" @@ -121,7 +128,7 @@ class RecipeCommand(TyperCommand): artifact_overrides: ClassVar[dict[str, str]] = {} config_dir: ClassVar[str | None] = None - config_model: ClassVar[type[BaseSettings] | None] = None + config_model: ClassVar[ConfigModelProvider | None] = None def format_help(self, ctx, formatter): """Format help with custom recipe options section.""" @@ -135,6 +142,7 @@ def format_help(self, ctx, formatter): # Then add our custom panels console = rich_utils._get_rich_console() cmd_name = ctx.info_name + config_model = self._resolve_config_model(console) # Global options table options_table = Table( @@ -175,8 +183,8 @@ def format_help(self, ctx, formatter): ) # Config options from Pydantic model - if self.config_model is not None: - self._format_config_options(console, cmd_name) + if config_model is not None: + self._format_config_options(console, cmd_name, config_model) # Artifact overrides (if any defined for this command) if self.artifact_overrides: @@ -241,8 +249,8 @@ def format_help(self, ctx, formatter): # Dotlist override examples example_override = "key.path=value" - if self.config_model is not None: - fields = list(self.config_model.model_fields.keys()) + if config_model is not None: + fields = list(config_model.model_fields.keys()) if fields: example_override = f"{fields[0]}=..." @@ -268,8 +276,32 @@ def format_help(self, ctx, formatter): ) ) + def _resolve_config_model(self, console) -> type[Any] | None: + """Resolve an optional lazy config model for command help.""" + config_model = type(self).config_model + if config_model is None: + return None + if isinstance(config_model, type): + return config_model + if not isinstance(config_model, LazyConfigModel): + return None + try: + resolved = config_model.load() + except ImportError as exc: + console.print( + Panel( + f"{exc}\n\n[dim]The command can still run locally because the recipe " + "script resolves its PEP 723 dependencies with `uv run --no-project`.[/]", + title="[bold]Config Options Unavailable[/]", + title_align="left", + border_style=rich_utils.STYLE_OPTIONS_PANEL_BORDER, + ) + ) + return None + type(self).config_model = resolved + return resolved - def _format_config_options(self, console, cmd_name: str) -> None: + def _format_config_options(self, console, cmd_name: str, config_model: type[Any]) -> None: """Render config options panel from Pydantic model_fields.""" from pydantic_core import PydanticUndefined @@ -285,7 +317,7 @@ def _format_config_options(self, console, cmd_name: str) -> None: config_table.add_column("Default", no_wrap=True, max_width=35) config_table.add_column("Description") - for name, field_info in self.config_model.model_fields.items(): + for name, field_info in config_model.model_fields.items(): type_str = _format_annotation(field_info.annotation).replace("[", "\\[") if field_info.default is not PydanticUndefined: @@ -317,7 +349,7 @@ def _format_config_options(self, console, cmd_name: str) -> None: def make_recipe_command( artifact_overrides: dict[str, str] | None = None, config_dir: str | None = None, - config_model: type[BaseSettings] | None = None, + config_model: ConfigModelProvider | None = None, ): """Factory function to create a RecipeCommand subclass with custom options. @@ -325,7 +357,8 @@ def make_recipe_command( artifact_overrides: Dict mapping artifact names to descriptions. Example: {"data": "Data artifact", "model": "Model checkpoint"} config_dir: Path to config directory (relative to repo root). - config_model: Pydantic BaseSettings subclass for config option introspection. + config_model: Pydantic BaseSettings subclass, or a LazyConfigModel + used for config option introspection. Returns: A RecipeCommand subclass with the specified options. diff --git a/src/nemo_runspec/recipe_typer.py b/src/nemo_runspec/recipe_typer.py index 7fb37a024..eda277098 100644 --- a/src/nemo_runspec/recipe_typer.py +++ b/src/nemo_runspec/recipe_typer.py @@ -29,14 +29,10 @@ from collections.abc import Callable from dataclasses import dataclass, field -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from pydantic_settings import BaseSettings import typer -from nemo_runspec.help import make_recipe_command +from nemo_runspec.help import ConfigModelProvider, make_recipe_command @dataclass(frozen=True) @@ -50,6 +46,7 @@ class RecipeMeta: - default_config: Default config name - input_artifacts: What data the recipe consumes - output_artifacts: What the recipe produces + - config_model: Pydantic config class or LazyConfigModel for rich help Example: META = RecipeMeta( @@ -68,7 +65,7 @@ class RecipeMeta: default_config: str = "default" input_artifacts: dict[str, str] = field(default_factory=dict) output_artifacts: dict[str, str] = field(default_factory=dict) - config_model: type[BaseSettings] | None = None + config_model: ConfigModelProvider | None = None class RecipeTyper(typer.Typer): @@ -97,7 +94,7 @@ def recipe_command( config_dir: str | None = None, input_artifacts: dict[str, str] | None = None, output_artifacts: dict[str, str] | None = None, - config_model: type[BaseSettings] | None = None, + config_model: ConfigModelProvider | None = None, rich_help_panel: str | None = None, name: str | None = None, ) -> Callable[[Callable], Callable]: @@ -150,7 +147,7 @@ def add_recipe_command( config_dir: str | None = None, input_artifacts: dict[str, str] | None = None, output_artifacts: dict[str, str] | None = None, - config_model: type[BaseSettings] | None = None, + config_model: ConfigModelProvider | None = None, rich_help_panel: str | None = None, name: str | None = None, ) -> None: diff --git a/src/nemotron/cli/commands/data/sdg/long_document/commands.py b/src/nemotron/cli/commands/data/sdg/long_document/commands.py index b3bd3b9d1..42fb16d5e 100644 --- a/src/nemotron/cli/commands/data/sdg/long_document/commands.py +++ b/src/nemotron/cli/commands/data/sdg/long_document/commands.py @@ -20,10 +20,11 @@ clients) are resolved at runtime from each script's PEP 723 inline ``dependencies`` list. -Pydantic config classes are loaded from the scripts via importlib so the -rich ``--help`` panel can introspect every config field — the scripts cannot -be imported by their normal dotted path because their parent directory uses -a dash and filenames begin with digits. +Pydantic config classes are loaded lazily from the scripts via importlib so +the rich per-stage ``--help`` panel can introspect every config field when +the optional recipe deps are installed — the scripts cannot be imported by +their normal dotted path because their parent directory uses a dash and +filenames begin with digits. Producer stages (``ocr``, ``text-qa``, ...) optionally accept ``--serve``, which composes a multi-task ``nemo_run.Experiment``: a serve task brings vLLM @@ -43,6 +44,7 @@ import subprocess import time from pathlib import Path +from typing import Any import typer @@ -51,9 +53,9 @@ from nemo_runspec.display import display_job_config, display_job_submission from nemo_runspec.env import parse_env from nemo_runspec.execution import build_env_vars +from nemo_runspec.help import LazyConfigModel from nemo_runspec.recipe_config import RecipeConfig, parse_recipe_config from nemo_runspec.recipe_typer import RecipeMeta - from nemotron.cli.commands.data.sdg.long_document._config_loader import load_config_class from nemotron.cli.commands.data.sdg.long_document._deployment import ( STAGE_DEFAULT_DEPLOYMENT, @@ -69,16 +71,26 @@ # Each stage is described by: # - SCRIPT_*: repo-relative path to the recipe script (used by CodePackager). # - SPEC_* : runspec parsed from the script's PEP 723 [tool.runspec] block. -# - *_CFG : Pydantic config class (loaded via importlib for rich --help). +# - *_CFG : Lazy Pydantic config-class loader for rich per-command --help. # - META_* : RecipeMeta wired into Typer for help rendering. # --------------------------------------------------------------------------- # _RECIPES_ROOT = "src/nemotron/recipes/data/sdg/long-document" + +def _lazy_config_class(script_path: Path, class_name: str, module_alias: str) -> LazyConfigModel: + """Defer optional recipe imports until a stage command renders help.""" + + def _load_config_class() -> type[Any]: + return load_config_class(script_path, class_name, module_alias) + + return LazyConfigModel(load=_load_config_class) + + # Stage 01: seed ----------------------------------------------------------------- SCRIPT_SEED = f"{_RECIPES_ROOT}/01-seed-dataset-preparation.py" SPEC_SEED = parse_runspec(SCRIPT_SEED) -SEED_CFG = load_config_class(SPEC_SEED.script_path, "SeedConfig", "_long_doc_seed_module") +SEED_CFG = _lazy_config_class(SPEC_SEED.script_path, "SeedConfig", "_long_doc_seed_module") META_SEED = RecipeMeta( name=SPEC_SEED.name, script_path=SCRIPT_SEED, @@ -92,7 +104,7 @@ # Stage 02: ocr ------------------------------------------------------------------- SCRIPT_OCR = f"{_RECIPES_ROOT}/02-nemotron-parse-ocr-sdg.py" SPEC_OCR = parse_runspec(SCRIPT_OCR) -OCR_CFG = load_config_class(SPEC_OCR.script_path, "OcrConfig", "_long_doc_ocr_module") +OCR_CFG = _lazy_config_class(SPEC_OCR.script_path, "OcrConfig", "_long_doc_ocr_module") META_OCR = RecipeMeta( name=SPEC_OCR.name, script_path=SCRIPT_OCR, @@ -106,7 +118,7 @@ # Stage 03: text-qa --------------------------------------------------------------- SCRIPT_TEXT_QA = f"{_RECIPES_ROOT}/03-text-qa-sdg.py" SPEC_TEXT_QA = parse_runspec(SCRIPT_TEXT_QA) -TEXT_QA_CFG = load_config_class(SPEC_TEXT_QA.script_path, "TextQAConfig", "_long_doc_text_qa_module") +TEXT_QA_CFG = _lazy_config_class(SPEC_TEXT_QA.script_path, "TextQAConfig", "_long_doc_text_qa_module") META_TEXT_QA = RecipeMeta( name=SPEC_TEXT_QA.name, script_path=SCRIPT_TEXT_QA, @@ -120,7 +132,7 @@ # Stage 04: page-classification --------------------------------------------------- SCRIPT_PAGE_CLASSIFICATION = f"{_RECIPES_ROOT}/04-page-classification-sdg.py" SPEC_PAGE_CLASSIFICATION = parse_runspec(SCRIPT_PAGE_CLASSIFICATION) -PAGE_CLASSIFICATION_CFG = load_config_class( +PAGE_CLASSIFICATION_CFG = _lazy_config_class( SPEC_PAGE_CLASSIFICATION.script_path, "PageClassificationConfig", "_long_doc_page_classification_module", @@ -138,7 +150,7 @@ # Stage 05: visual-qa ------------------------------------------------------------- SCRIPT_VISUAL_QA = f"{_RECIPES_ROOT}/05-visual-qa-sdg.py" SPEC_VISUAL_QA = parse_runspec(SCRIPT_VISUAL_QA) -VISUAL_QA_CFG = load_config_class(SPEC_VISUAL_QA.script_path, "VisualQAConfig", "_long_doc_visual_qa_module") +VISUAL_QA_CFG = _lazy_config_class(SPEC_VISUAL_QA.script_path, "VisualQAConfig", "_long_doc_visual_qa_module") META_VISUAL_QA = RecipeMeta( name=SPEC_VISUAL_QA.name, script_path=SCRIPT_VISUAL_QA, @@ -152,7 +164,7 @@ # Stage 06: single-page-qa -------------------------------------------------------- SCRIPT_SINGLE_PAGE_QA = f"{_RECIPES_ROOT}/06-single-page-qa-sdg.py" SPEC_SINGLE_PAGE_QA = parse_runspec(SCRIPT_SINGLE_PAGE_QA) -SINGLE_PAGE_QA_CFG = load_config_class( +SINGLE_PAGE_QA_CFG = _lazy_config_class( SPEC_SINGLE_PAGE_QA.script_path, "SinglePageQAConfig", "_long_doc_single_page_qa_module", @@ -170,7 +182,7 @@ # Stage 07: windowed-qa ----------------------------------------------------------- SCRIPT_WINDOWED_QA = f"{_RECIPES_ROOT}/07-multi-page-windowed-qa-sdg.py" SPEC_WINDOWED_QA = parse_runspec(SCRIPT_WINDOWED_QA) -WINDOWED_QA_CFG = load_config_class( +WINDOWED_QA_CFG = _lazy_config_class( SPEC_WINDOWED_QA.script_path, "WindowedQAConfig", "_long_doc_windowed_qa_module", @@ -188,7 +200,7 @@ # Stage 08: whole-document-qa ----------------------------------------------------- SCRIPT_WHOLE_DOCUMENT_QA = f"{_RECIPES_ROOT}/08-whole-document-qa-sdg.py" SPEC_WHOLE_DOCUMENT_QA = parse_runspec(SCRIPT_WHOLE_DOCUMENT_QA) -WHOLE_DOCUMENT_QA_CFG = load_config_class( +WHOLE_DOCUMENT_QA_CFG = _lazy_config_class( SPEC_WHOLE_DOCUMENT_QA.script_path, "WholeDocumentQAConfig", "_long_doc_whole_document_qa_module", @@ -206,7 +218,7 @@ # Stage 09: judge ----------------------------------------------------------------- SCRIPT_JUDGE = f"{_RECIPES_ROOT}/09-frontier-judge-sdg.py" SPEC_JUDGE = parse_runspec(SCRIPT_JUDGE) -JUDGE_CFG = load_config_class(SPEC_JUDGE.script_path, "JudgeConfig", "_long_doc_judge_module") +JUDGE_CFG = _lazy_config_class(SPEC_JUDGE.script_path, "JudgeConfig", "_long_doc_judge_module") META_JUDGE = RecipeMeta( name=SPEC_JUDGE.name, script_path=SCRIPT_JUDGE, diff --git a/tests/recipes/test_data_sdg_cli.py b/tests/recipes/test_data_sdg_cli.py new file mode 100644 index 000000000..85e50d622 --- /dev/null +++ b/tests/recipes/test_data_sdg_cli.py @@ -0,0 +1,29 @@ +"""Regression tests for the data SDG CLI import surface.""" + +from __future__ import annotations + +from typer.testing import CliRunner + +from nemotron.cli.bin.nemotron import app + +runner = CliRunner() + + +def test_root_help_succeeds_without_data_sdg_extra(): + result = runner.invoke(app, ["--help"]) + assert result.exit_code == 0 + assert "data" in result.output + + +def test_long_document_group_help_succeeds_without_data_sdg_extra(): + result = runner.invoke(app, ["data", "sdg", "long-document", "--help"]) + assert result.exit_code == 0 + assert "ocr" in result.output + assert "text-qa" in result.output + + +def test_long_document_stage_help_succeeds_without_data_sdg_extra(): + result = runner.invoke(app, ["data", "sdg", "long-document", "ocr", "--help"]) + assert result.exit_code == 0 + assert "Run Nemotron-Parse OCR" in result.output + assert "Global Options" in result.output