From ca23d4f5f43e17cc06d22dd12bf3eeac6b2aaab3 Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Mon, 15 Dec 2025 18:19:22 -0800 Subject: [PATCH 1/5] save --- eval_protocol/cli.py | 15 + eval_protocol/cli_commands/export_docs.py | 466 ++++++++++++++++++++++ 2 files changed, 481 insertions(+) create mode 100644 eval_protocol/cli_commands/export_docs.py diff --git a/eval_protocol/cli.py b/eval_protocol/cli.py index e8125390..2a360c29 100644 --- a/eval_protocol/cli.py +++ b/eval_protocol/cli.py @@ -494,6 +494,17 @@ def parse_args(args=None): # help="Run an evaluation using a Hydra configuration. All arguments after 'run' are passed to Hydra.", # ) + # Hidden command: export-docs (for generating CLI reference documentation) + export_docs_parser = subparsers.add_parser( + "export-docs", + help=argparse.SUPPRESS, # Hidden from help output + ) + export_docs_parser.add_argument( + "--output-dir", + default="./docs/cli-reference", + help="Directory to write markdown files to (default: ./docs/cli-reference)", + ) + # Use parse_known_args to allow Hydra to handle its own arguments return parser.parse_known_args(args) @@ -623,6 +634,10 @@ def _extract_flag_value(argv_list, flag_name): from .cli_commands.local_test import local_test_command return local_test_command(args) + elif args.command == "export-docs": + from .cli_commands.export_docs import export_docs_command + + return export_docs_command(args) # elif args.command == "run": # # For the 'run' command, Hydra takes over argument parsing. # diff --git a/eval_protocol/cli_commands/export_docs.py b/eval_protocol/cli_commands/export_docs.py new file mode 100644 index 00000000..8ad7ade5 --- /dev/null +++ b/eval_protocol/cli_commands/export_docs.py @@ -0,0 +1,466 @@ +""" +Export CLI reference documentation as markdown files. + +This module provides functionality to introspect the argparse-based CLI +and generate markdown documentation for each command. +""" + +import argparse +import logging +import os +from pathlib import Path +from typing import Dict, List, Optional, Tuple + +logger = logging.getLogger(__name__) + + +def _get_parser_info(parser: argparse.ArgumentParser) -> Dict: + """Extract information from an ArgumentParser.""" + info = { + "prog": parser.prog, + "description": parser.description or "", + "epilog": parser.epilog or "", + "arguments": [], + "subparsers": {}, + } + + # Extract arguments + for action in parser._actions: + if isinstance(action, argparse._SubParsersAction): + # Handle subparsers + for name, subparser in action.choices.items(): + info["subparsers"][name] = _get_parser_info(subparser) + elif isinstance(action, argparse._HelpAction): + # Skip help action, it's always present + continue + else: + arg_info = { + "option_strings": action.option_strings, + "dest": action.dest, + "help": action.help or "", + "default": action.default, + "required": getattr(action, "required", False), + "type": getattr(action, "type", None), + "choices": getattr(action, "choices", None), + "nargs": getattr(action, "nargs", None), + "metavar": getattr(action, "metavar", None), + } + # Check if help is suppressed + if action.help != argparse.SUPPRESS: + info["arguments"].append(arg_info) + + return info + + +def _format_argument_row(arg: Dict) -> str: + """Format a single argument as a markdown table row.""" + # Build the flag/argument name + if arg["option_strings"]: + name = ", ".join(f"`{opt}`" for opt in arg["option_strings"]) + else: + name = f"`{arg['dest']}`" + + # Build type info + type_str = "" + if arg["type"]: + type_str = getattr(arg["type"], "__name__", str(arg["type"])) + if arg["choices"]: + type_str = f"choices: {arg['choices']}" + + # Format default value + default = arg["default"] + if default is None: + default_str = "-" + elif default == argparse.SUPPRESS: + default_str = "-" + elif isinstance(default, bool): + default_str = str(default).lower() + else: + default_str = f"`{default}`" + + # Help text (escape pipe characters for markdown tables) + help_text = (arg["help"] or "-").replace("|", "\\|") + + # Required indicator + required = "Yes" if arg["required"] else "No" + + return f"| {name} | {type_str} | {default_str} | {required} | {help_text} |" + + +def _generate_command_markdown( + name: str, + info: Dict, + parent_command: str = "", + level: int = 1, +) -> str: + """Generate markdown documentation for a single command.""" + lines = [] + + # Command title + full_command = f"{parent_command} {name}".strip() if parent_command else name + heading = "#" * min(level, 4) + lines.append(f"{heading} `{full_command}`") + lines.append("") + + # Description + if info["description"]: + lines.append(info["description"]) + lines.append("") + + # Arguments table + if info["arguments"]: + lines.append("**Options:**") + lines.append("") + lines.append("| Option | Type | Default | Required | Description |") + lines.append("|--------|------|---------|----------|-------------|") + for arg in info["arguments"]: + lines.append(_format_argument_row(arg)) + lines.append("") + + # Epilog + if info["epilog"]: + lines.append(info["epilog"]) + lines.append("") + + return "\n".join(lines) + + +def _generate_subcommand_docs( + subparsers: Dict, + parent_command: str, + level: int, +) -> List[Tuple[str, str]]: + """Generate markdown docs for all subcommands, returns list of (filename, content).""" + docs = [] + + for name, info in subparsers.items(): + full_command = f"{parent_command} {name}".strip() + + # Generate this command's doc + content_lines = [] + content_lines.append(f"# `{full_command}`") + content_lines.append("") + + if info["description"]: + content_lines.append(info["description"]) + content_lines.append("") + + # Arguments table + if info["arguments"]: + content_lines.append("## Options") + content_lines.append("") + content_lines.append("| Option | Type | Default | Required | Description |") + content_lines.append("|--------|------|---------|----------|-------------|") + for arg in info["arguments"]: + content_lines.append(_format_argument_row(arg)) + content_lines.append("") + + # Handle nested subparsers + if info["subparsers"]: + content_lines.append("## Subcommands") + content_lines.append("") + for subname in info["subparsers"].keys(): + sub_full = f"{full_command} {subname}" + content_lines.append(f"- [`{sub_full}`]({name}-{subname}.md)") + content_lines.append("") + + # Recursively generate docs for nested subcommands + nested_docs = _generate_subcommand_docs( + info["subparsers"], + full_command, + level + 1, + ) + for nested_filename, nested_content in nested_docs: + docs.append((f"{name}-{nested_filename}", nested_content)) + + if info["epilog"]: + content_lines.append(info["epilog"]) + content_lines.append("") + + filename = name.replace(" ", "-") + ".md" + docs.append((filename, "\n".join(content_lines))) + + return docs + + +def generate_cli_docs(parser: argparse.ArgumentParser, output_dir: str) -> int: + """ + Generate markdown documentation from an ArgumentParser. + + Args: + parser: The root ArgumentParser instance. + output_dir: Directory to write markdown files to. + + Returns: + 0 on success, 1 on failure. + """ + output_path = Path(output_dir) + output_path.mkdir(parents=True, exist_ok=True) + + # Extract parser info + info = _get_parser_info(parser) + + # Generate index/overview page + index_lines = [] + index_lines.append("# CLI Reference") + index_lines.append("") + index_lines.append(f"**{info['prog']}** - {info['description']}") + index_lines.append("") + + # Global options + if info["arguments"]: + index_lines.append("## Global Options") + index_lines.append("") + index_lines.append("| Option | Type | Default | Required | Description |") + index_lines.append("|--------|------|---------|----------|-------------|") + for arg in info["arguments"]: + index_lines.append(_format_argument_row(arg)) + index_lines.append("") + + # Commands section + if info["subparsers"]: + index_lines.append("## Commands") + index_lines.append("") + for name, subinfo in info["subparsers"].items(): + description = subinfo["description"] or "" + # Truncate long descriptions for the index + if len(description) > 100: + description = description[:97] + "..." + index_lines.append(f"- [`{name}`]({name}.md) - {description}") + index_lines.append("") + + # Write index file + index_path = output_path / "index.md" + index_path.write_text("\n".join(index_lines), encoding="utf-8") + logger.info(f"Generated: {index_path}") + + # Generate individual command docs + if info["subparsers"]: + docs = _generate_subcommand_docs(info["subparsers"], info["prog"], 1) + for filename, content in docs: + file_path = output_path / filename + file_path.write_text(content, encoding="utf-8") + logger.info(f"Generated: {file_path}") + + logger.info(f"CLI documentation exported to: {output_path}") + return 0 + + +def export_docs_command(args: argparse.Namespace) -> int: + """ + Export CLI documentation to markdown files. + + This command introspects the CLI parser and generates markdown documentation. + """ + # Import here to avoid circular imports + from eval_protocol.cli import parse_args + + # Create a fresh parser by calling parse_args with empty args + # We need to access the parser directly + parser = argparse.ArgumentParser(description="eval-protocol: Tools for evaluation and reward modeling") + parser.add_argument("--verbose", "-v", action="store_true", help="Enable verbose logging") + parser.add_argument( + "--profile", + help="Fireworks profile to use (reads ~/.fireworks/profiles//auth.ini and settings.ini)", + ) + parser.add_argument( + "--server", + help="Fireworks API server hostname or URL (e.g., dev.api.fireworks.ai or https://dev.api.fireworks.ai)", + ) + + subparsers = parser.add_subparsers(dest="command", help="Command to run") + + # Logs command + logs_parser = subparsers.add_parser("logs", help="Serve logs with file watching and real-time updates") + logs_parser.add_argument("--port", type=int, default=8000, help="Port to bind to (default: 8000)") + logs_parser.add_argument("--debug", action="store_true", help="Enable debug mode") + logs_parser.add_argument("--disable-elasticsearch-setup", action="store_true", help="Disable Elasticsearch setup") + logs_parser.add_argument( + "--use-env-elasticsearch-config", + action="store_true", + help="Use env vars for Elasticsearch config (requires ELASTICSEARCH_URL, ELASTICSEARCH_API_KEY, ELASTICSEARCH_INDEX_NAME)", + ) + logs_parser.add_argument( + "--use-fireworks", + action="store_true", + help="Force Fireworks tracing backend for logs UI (overrides env auto-detection)", + ) + logs_parser.add_argument( + "--use-elasticsearch", + action="store_true", + help="Force Elasticsearch backend for logs UI (overrides env auto-detection)", + ) + + # Upload command + upload_parser = subparsers.add_parser( + "upload", + help="Scan for evaluation tests, select, and upload as Fireworks evaluators", + ) + upload_parser.add_argument( + "--path", + default=".", + help="Path to search for evaluation tests (default: current directory)", + ) + upload_parser.add_argument( + "--entry", + help="Entrypoint of evaluation test to upload (module:function or path::function). For multiple, separate by commas.", + ) + upload_parser.add_argument( + "--id", + help="Evaluator ID to use (if multiple selections, a numeric suffix is appended)", + ) + upload_parser.add_argument( + "--display-name", + help="Display name for evaluator (defaults to ID)", + ) + upload_parser.add_argument( + "--description", + help="Description for evaluator", + ) + upload_parser.add_argument( + "--force", + action="store_true", + help="Overwrite existing evaluator with the same ID", + ) + upload_parser.add_argument( + "--yes", + "-y", + action="store_true", + help="Non-interactive: upload all discovered evaluation tests", + ) + upload_parser.add_argument( + "--env-file", + help="Path to .env file containing secrets to upload (default: .env in current directory)", + ) + + # Create command group + create_parser = subparsers.add_parser( + "create", + help="Resource creation commands", + ) + create_subparsers = create_parser.add_subparsers(dest="create_command") + rft_parser = create_subparsers.add_parser( + "rft", + help="Create a Reinforcement Fine-tuning Job on Fireworks", + ) + rft_parser.add_argument( + "--evaluator", + help="Evaluator ID or fully-qualified resource (accounts/{acct}/evaluators/{id}); if omitted, derive from local tests", + ) + rft_parser.add_argument( + "--dataset", + help="Use existing dataset (ID or resource 'accounts/{acct}/datasets/{id}') to skip local materialization", + ) + rft_parser.add_argument( + "--dataset-jsonl", + help="Path to JSONL to upload as a new Fireworks dataset", + ) + rft_parser.add_argument( + "--dataset-builder", + help="Explicit dataset builder spec (module::function or path::function)", + ) + rft_parser.add_argument( + "--dataset-display-name", + help="Display name for dataset on Fireworks (defaults to dataset id)", + ) + rft_parser.add_argument("--base-model", help="Base model resource id") + rft_parser.add_argument("--warm-start-from", help="Addon model to warm start from") + rft_parser.add_argument("--output-model", help="Output model id (defaults from evaluator)") + rft_parser.add_argument("--epochs", type=int, default=1, help="Number of training epochs") + rft_parser.add_argument("--batch-size", type=int, default=128000, help="Training batch size") + rft_parser.add_argument("--learning-rate", type=float, default=3e-5, help="Learning rate") + rft_parser.add_argument("--max-context-length", type=int, default=65536, help="Maximum context length") + rft_parser.add_argument("--lora-rank", type=int, default=16, help="LoRA rank") + rft_parser.add_argument("--gradient-accumulation-steps", type=int, help="Number of gradient accumulation steps") + rft_parser.add_argument("--learning-rate-warmup-steps", type=int, help="Number of LR warmup steps") + rft_parser.add_argument("--accelerator-count", type=int, help="Number of accelerators") + rft_parser.add_argument("--region", help="Fireworks region enum value") + rft_parser.add_argument("--display-name", help="RFT job display name") + rft_parser.add_argument("--evaluation-dataset", help="Optional separate eval dataset id") + rft_parser.add_argument( + "--eval-auto-carveout", + dest="eval_auto_carveout", + action="store_true", + default=True, + help="Enable auto carveout for evaluation (default: true)", + ) + rft_parser.add_argument( + "--no-eval-auto-carveout", + dest="eval_auto_carveout", + action="store_false", + help="Disable auto carveout for evaluation", + ) + rft_parser.add_argument("--chunk-size", type=int, default=100, help="Data chunk size for rollout batching") + rft_parser.add_argument("--temperature", type=float, help="Sampling temperature") + rft_parser.add_argument("--top-p", type=float, help="Top-p sampling parameter") + rft_parser.add_argument("--top-k", type=int, help="Top-k sampling parameter") + rft_parser.add_argument("--max-output-tokens", type=int, default=32768, help="Maximum output tokens") + rft_parser.add_argument("--response-candidates-count", type=int, default=8, help="Number of response candidates") + rft_parser.add_argument("--extra-body", help="JSON string for extra inference params") + rft_parser.add_argument( + "--mcp-server", + help="The MCP server resource name to use for the reinforcement fine-tuning job.", + ) + rft_parser.add_argument("--wandb-enabled", action="store_true", help="Enable Weights & Biases logging") + rft_parser.add_argument("--wandb-project", help="Weights & Biases project name") + rft_parser.add_argument("--wandb-entity", help="Weights & Biases entity") + rft_parser.add_argument("--wandb-run-id", help="Weights & Biases run ID") + rft_parser.add_argument("--wandb-api-key", help="Weights & Biases API key") + rft_parser.add_argument("--job-id", help="Specify an explicit RFT job id") + rft_parser.add_argument("--yes", "-y", action="store_true", help="Non-interactive mode") + rft_parser.add_argument("--dry-run", action="store_true", help="Print planned REST calls without sending") + rft_parser.add_argument("--force", action="store_true", help="Overwrite existing evaluator with the same ID") + rft_parser.add_argument( + "--skip-validation", + action="store_true", + help="Skip local dataset and evaluator validation before creating the RFT job", + ) + rft_parser.add_argument( + "--ignore-docker", + action="store_true", + help="Ignore Dockerfile even if present; run pytest on host during evaluator validation", + ) + rft_parser.add_argument( + "--docker-build-extra", + default="", + help="Extra flags to pass to 'docker build' when validating evaluator (quoted string)", + ) + rft_parser.add_argument( + "--docker-run-extra", + default="", + help="Extra flags to pass to 'docker run' when validating evaluator (quoted string)", + ) + + # Local test command + local_test_parser = subparsers.add_parser( + "local-test", + help="Select an evaluation test and run it locally. If a Dockerfile exists, build and run via Docker; otherwise run on host.", + ) + local_test_parser.add_argument( + "--entry", + help="Entrypoint to run (path::function or path). If not provided, a selector will be shown (unless --yes).", + ) + local_test_parser.add_argument( + "--ignore-docker", + action="store_true", + help="Ignore Dockerfile even if present; run pytest on host", + ) + local_test_parser.add_argument( + "--yes", + "-y", + action="store_true", + help="Non-interactive: if multiple tests exist and no --entry, fails with guidance", + ) + local_test_parser.add_argument( + "--docker-build-extra", + default="", + help="Extra flags to pass to 'docker build' (quoted string)", + ) + local_test_parser.add_argument( + "--docker-run-extra", + default="", + help="Extra flags to pass to 'docker run' (quoted string)", + ) + + output_dir = args.output_dir + return generate_cli_docs(parser, output_dir) From 3ff6f8cdeafe6683e012ca50f3b479c980b0dbf1 Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Mon, 15 Dec 2025 21:54:22 -0800 Subject: [PATCH 2/5] update --- eval_protocol/cli.py | 22 +- eval_protocol/cli_commands/export_docs.py | 392 ++++------------------ 2 files changed, 79 insertions(+), 335 deletions(-) diff --git a/eval_protocol/cli.py b/eval_protocol/cli.py index 2a360c29..59c0392c 100644 --- a/eval_protocol/cli.py +++ b/eval_protocol/cli.py @@ -32,9 +32,14 @@ preview_command = None # type: ignore[assignment] -def parse_args(args=None): - """Parse command line arguments""" +def build_parser() -> argparse.ArgumentParser: + """Build and return the argument parser for the CLI.""" parser = argparse.ArgumentParser(description="eval-protocol: Tools for evaluation and reward modeling") + return _configure_parser(parser) + + +def _configure_parser(parser: argparse.ArgumentParser) -> argparse.ArgumentParser: + """Configure all arguments and subparsers on the given parser.""" parser.add_argument("--verbose", "-v", action="store_true", help="Enable verbose logging") parser.add_argument( "--profile", @@ -500,11 +505,18 @@ def parse_args(args=None): help=argparse.SUPPRESS, # Hidden from help output ) export_docs_parser.add_argument( - "--output-dir", - default="./docs/cli-reference", - help="Directory to write markdown files to (default: ./docs/cli-reference)", + "--output", + "-o", + default="./docs/cli-reference.md", + help="Output markdown file path (default: ./docs/cli-reference.md)", ) + return parser + + +def parse_args(args=None): + """Parse command line arguments.""" + parser = build_parser() # Use parse_known_args to allow Hydra to handle its own arguments return parser.parse_known_args(args) diff --git a/eval_protocol/cli_commands/export_docs.py b/eval_protocol/cli_commands/export_docs.py index 8ad7ade5..5b39742c 100644 --- a/eval_protocol/cli_commands/export_docs.py +++ b/eval_protocol/cli_commands/export_docs.py @@ -7,9 +7,8 @@ import argparse import logging -import os from pathlib import Path -from typing import Dict, List, Optional, Tuple +from typing import Dict, List logger = logging.getLogger(__name__) @@ -87,380 +86,113 @@ def _format_argument_row(arg: Dict) -> str: return f"| {name} | {type_str} | {default_str} | {required} | {help_text} |" -def _generate_command_markdown( +def _generate_command_section( name: str, info: Dict, - parent_command: str = "", - level: int = 1, -) -> str: - """Generate markdown documentation for a single command.""" + parent_command: str, + heading_level: int = 2, +) -> List[str]: + """Generate markdown section for a single command.""" lines = [] + full_command = f"{parent_command} {name}".strip() + heading = "#" * heading_level - # Command title - full_command = f"{parent_command} {name}".strip() if parent_command else name - heading = "#" * min(level, 4) lines.append(f"{heading} `{full_command}`") lines.append("") - # Description if info["description"]: lines.append(info["description"]) lines.append("") # Arguments table if info["arguments"]: - lines.append("**Options:**") - lines.append("") lines.append("| Option | Type | Default | Required | Description |") lines.append("|--------|------|---------|----------|-------------|") for arg in info["arguments"]: lines.append(_format_argument_row(arg)) lines.append("") - # Epilog + # Handle nested subparsers recursively + if info["subparsers"]: + for subname, subinfo in info["subparsers"].items(): + lines.extend( + _generate_command_section( + subname, + subinfo, + full_command, + heading_level + 1, + ) + ) + if info["epilog"]: lines.append(info["epilog"]) lines.append("") - return "\n".join(lines) - - -def _generate_subcommand_docs( - subparsers: Dict, - parent_command: str, - level: int, -) -> List[Tuple[str, str]]: - """Generate markdown docs for all subcommands, returns list of (filename, content).""" - docs = [] - - for name, info in subparsers.items(): - full_command = f"{parent_command} {name}".strip() - - # Generate this command's doc - content_lines = [] - content_lines.append(f"# `{full_command}`") - content_lines.append("") - - if info["description"]: - content_lines.append(info["description"]) - content_lines.append("") - - # Arguments table - if info["arguments"]: - content_lines.append("## Options") - content_lines.append("") - content_lines.append("| Option | Type | Default | Required | Description |") - content_lines.append("|--------|------|---------|----------|-------------|") - for arg in info["arguments"]: - content_lines.append(_format_argument_row(arg)) - content_lines.append("") - - # Handle nested subparsers - if info["subparsers"]: - content_lines.append("## Subcommands") - content_lines.append("") - for subname in info["subparsers"].keys(): - sub_full = f"{full_command} {subname}" - content_lines.append(f"- [`{sub_full}`]({name}-{subname}.md)") - content_lines.append("") - - # Recursively generate docs for nested subcommands - nested_docs = _generate_subcommand_docs( - info["subparsers"], - full_command, - level + 1, - ) - for nested_filename, nested_content in nested_docs: - docs.append((f"{name}-{nested_filename}", nested_content)) - - if info["epilog"]: - content_lines.append(info["epilog"]) - content_lines.append("") - - filename = name.replace(" ", "-") + ".md" - docs.append((filename, "\n".join(content_lines))) + return lines - return docs - -def generate_cli_docs(parser: argparse.ArgumentParser, output_dir: str) -> int: +def generate_cli_docs(parser: argparse.ArgumentParser, output_path: str) -> int: """ - Generate markdown documentation from an ArgumentParser. + Generate markdown documentation from an ArgumentParser to a single file. Args: - parser: The root ArgumentParser instance. - output_dir: Directory to write markdown files to. + parser: The root ArgumentParser instance. + output_path: Path to write the markdown file to. Returns: - 0 on success, 1 on failure. + 0 on success, 1 on failure. """ - output_path = Path(output_dir) - output_path.mkdir(parents=True, exist_ok=True) - # Extract parser info info = _get_parser_info(parser) - # Generate index/overview page - index_lines = [] - index_lines.append("# CLI Reference") - index_lines.append("") - index_lines.append(f"**{info['prog']}** - {info['description']}") - index_lines.append("") + # Filter out hidden commands (like export-docs itself) + visible_subparsers = { + name: subinfo + for name, subinfo in info["subparsers"].items() + if name != "export-docs" # Don't document the hidden command + } + + # Generate single page + lines = [] + lines.append("# CLI Reference") + lines.append("") + lines.append(f"**{info['prog']}** - {info['description']}") + lines.append("") # Global options if info["arguments"]: - index_lines.append("## Global Options") - index_lines.append("") - index_lines.append("| Option | Type | Default | Required | Description |") - index_lines.append("|--------|------|---------|----------|-------------|") + lines.append("## Global Options") + lines.append("") + lines.append("| Option | Type | Default | Required | Description |") + lines.append("|--------|------|---------|----------|-------------|") for arg in info["arguments"]: - index_lines.append(_format_argument_row(arg)) - index_lines.append("") + lines.append(_format_argument_row(arg)) + lines.append("") # Commands section - if info["subparsers"]: - index_lines.append("## Commands") - index_lines.append("") - for name, subinfo in info["subparsers"].items(): - description = subinfo["description"] or "" - # Truncate long descriptions for the index - if len(description) > 100: - description = description[:97] + "..." - index_lines.append(f"- [`{name}`]({name}.md) - {description}") - index_lines.append("") - - # Write index file - index_path = output_path / "index.md" - index_path.write_text("\n".join(index_lines), encoding="utf-8") - logger.info(f"Generated: {index_path}") - - # Generate individual command docs - if info["subparsers"]: - docs = _generate_subcommand_docs(info["subparsers"], info["prog"], 1) - for filename, content in docs: - file_path = output_path / filename - file_path.write_text(content, encoding="utf-8") - logger.info(f"Generated: {file_path}") + if visible_subparsers: + lines.append("## Commands") + lines.append("") + for name, subinfo in visible_subparsers.items(): + lines.extend(_generate_command_section(name, subinfo, info["prog"], heading_level=3)) + + # Write single file + out = Path(output_path) + out.parent.mkdir(parents=True, exist_ok=True) + out.write_text("\n".join(lines), encoding="utf-8") + logger.info(f"Generated: {out}") - logger.info(f"CLI documentation exported to: {output_path}") return 0 def export_docs_command(args: argparse.Namespace) -> int: """ - Export CLI documentation to markdown files. + Export CLI documentation to a single markdown file. This command introspects the CLI parser and generates markdown documentation. """ - # Import here to avoid circular imports - from eval_protocol.cli import parse_args - - # Create a fresh parser by calling parse_args with empty args - # We need to access the parser directly - parser = argparse.ArgumentParser(description="eval-protocol: Tools for evaluation and reward modeling") - parser.add_argument("--verbose", "-v", action="store_true", help="Enable verbose logging") - parser.add_argument( - "--profile", - help="Fireworks profile to use (reads ~/.fireworks/profiles//auth.ini and settings.ini)", - ) - parser.add_argument( - "--server", - help="Fireworks API server hostname or URL (e.g., dev.api.fireworks.ai or https://dev.api.fireworks.ai)", - ) - - subparsers = parser.add_subparsers(dest="command", help="Command to run") - - # Logs command - logs_parser = subparsers.add_parser("logs", help="Serve logs with file watching and real-time updates") - logs_parser.add_argument("--port", type=int, default=8000, help="Port to bind to (default: 8000)") - logs_parser.add_argument("--debug", action="store_true", help="Enable debug mode") - logs_parser.add_argument("--disable-elasticsearch-setup", action="store_true", help="Disable Elasticsearch setup") - logs_parser.add_argument( - "--use-env-elasticsearch-config", - action="store_true", - help="Use env vars for Elasticsearch config (requires ELASTICSEARCH_URL, ELASTICSEARCH_API_KEY, ELASTICSEARCH_INDEX_NAME)", - ) - logs_parser.add_argument( - "--use-fireworks", - action="store_true", - help="Force Fireworks tracing backend for logs UI (overrides env auto-detection)", - ) - logs_parser.add_argument( - "--use-elasticsearch", - action="store_true", - help="Force Elasticsearch backend for logs UI (overrides env auto-detection)", - ) - - # Upload command - upload_parser = subparsers.add_parser( - "upload", - help="Scan for evaluation tests, select, and upload as Fireworks evaluators", - ) - upload_parser.add_argument( - "--path", - default=".", - help="Path to search for evaluation tests (default: current directory)", - ) - upload_parser.add_argument( - "--entry", - help="Entrypoint of evaluation test to upload (module:function or path::function). For multiple, separate by commas.", - ) - upload_parser.add_argument( - "--id", - help="Evaluator ID to use (if multiple selections, a numeric suffix is appended)", - ) - upload_parser.add_argument( - "--display-name", - help="Display name for evaluator (defaults to ID)", - ) - upload_parser.add_argument( - "--description", - help="Description for evaluator", - ) - upload_parser.add_argument( - "--force", - action="store_true", - help="Overwrite existing evaluator with the same ID", - ) - upload_parser.add_argument( - "--yes", - "-y", - action="store_true", - help="Non-interactive: upload all discovered evaluation tests", - ) - upload_parser.add_argument( - "--env-file", - help="Path to .env file containing secrets to upload (default: .env in current directory)", - ) - - # Create command group - create_parser = subparsers.add_parser( - "create", - help="Resource creation commands", - ) - create_subparsers = create_parser.add_subparsers(dest="create_command") - rft_parser = create_subparsers.add_parser( - "rft", - help="Create a Reinforcement Fine-tuning Job on Fireworks", - ) - rft_parser.add_argument( - "--evaluator", - help="Evaluator ID or fully-qualified resource (accounts/{acct}/evaluators/{id}); if omitted, derive from local tests", - ) - rft_parser.add_argument( - "--dataset", - help="Use existing dataset (ID or resource 'accounts/{acct}/datasets/{id}') to skip local materialization", - ) - rft_parser.add_argument( - "--dataset-jsonl", - help="Path to JSONL to upload as a new Fireworks dataset", - ) - rft_parser.add_argument( - "--dataset-builder", - help="Explicit dataset builder spec (module::function or path::function)", - ) - rft_parser.add_argument( - "--dataset-display-name", - help="Display name for dataset on Fireworks (defaults to dataset id)", - ) - rft_parser.add_argument("--base-model", help="Base model resource id") - rft_parser.add_argument("--warm-start-from", help="Addon model to warm start from") - rft_parser.add_argument("--output-model", help="Output model id (defaults from evaluator)") - rft_parser.add_argument("--epochs", type=int, default=1, help="Number of training epochs") - rft_parser.add_argument("--batch-size", type=int, default=128000, help="Training batch size") - rft_parser.add_argument("--learning-rate", type=float, default=3e-5, help="Learning rate") - rft_parser.add_argument("--max-context-length", type=int, default=65536, help="Maximum context length") - rft_parser.add_argument("--lora-rank", type=int, default=16, help="LoRA rank") - rft_parser.add_argument("--gradient-accumulation-steps", type=int, help="Number of gradient accumulation steps") - rft_parser.add_argument("--learning-rate-warmup-steps", type=int, help="Number of LR warmup steps") - rft_parser.add_argument("--accelerator-count", type=int, help="Number of accelerators") - rft_parser.add_argument("--region", help="Fireworks region enum value") - rft_parser.add_argument("--display-name", help="RFT job display name") - rft_parser.add_argument("--evaluation-dataset", help="Optional separate eval dataset id") - rft_parser.add_argument( - "--eval-auto-carveout", - dest="eval_auto_carveout", - action="store_true", - default=True, - help="Enable auto carveout for evaluation (default: true)", - ) - rft_parser.add_argument( - "--no-eval-auto-carveout", - dest="eval_auto_carveout", - action="store_false", - help="Disable auto carveout for evaluation", - ) - rft_parser.add_argument("--chunk-size", type=int, default=100, help="Data chunk size for rollout batching") - rft_parser.add_argument("--temperature", type=float, help="Sampling temperature") - rft_parser.add_argument("--top-p", type=float, help="Top-p sampling parameter") - rft_parser.add_argument("--top-k", type=int, help="Top-k sampling parameter") - rft_parser.add_argument("--max-output-tokens", type=int, default=32768, help="Maximum output tokens") - rft_parser.add_argument("--response-candidates-count", type=int, default=8, help="Number of response candidates") - rft_parser.add_argument("--extra-body", help="JSON string for extra inference params") - rft_parser.add_argument( - "--mcp-server", - help="The MCP server resource name to use for the reinforcement fine-tuning job.", - ) - rft_parser.add_argument("--wandb-enabled", action="store_true", help="Enable Weights & Biases logging") - rft_parser.add_argument("--wandb-project", help="Weights & Biases project name") - rft_parser.add_argument("--wandb-entity", help="Weights & Biases entity") - rft_parser.add_argument("--wandb-run-id", help="Weights & Biases run ID") - rft_parser.add_argument("--wandb-api-key", help="Weights & Biases API key") - rft_parser.add_argument("--job-id", help="Specify an explicit RFT job id") - rft_parser.add_argument("--yes", "-y", action="store_true", help="Non-interactive mode") - rft_parser.add_argument("--dry-run", action="store_true", help="Print planned REST calls without sending") - rft_parser.add_argument("--force", action="store_true", help="Overwrite existing evaluator with the same ID") - rft_parser.add_argument( - "--skip-validation", - action="store_true", - help="Skip local dataset and evaluator validation before creating the RFT job", - ) - rft_parser.add_argument( - "--ignore-docker", - action="store_true", - help="Ignore Dockerfile even if present; run pytest on host during evaluator validation", - ) - rft_parser.add_argument( - "--docker-build-extra", - default="", - help="Extra flags to pass to 'docker build' when validating evaluator (quoted string)", - ) - rft_parser.add_argument( - "--docker-run-extra", - default="", - help="Extra flags to pass to 'docker run' when validating evaluator (quoted string)", - ) - - # Local test command - local_test_parser = subparsers.add_parser( - "local-test", - help="Select an evaluation test and run it locally. If a Dockerfile exists, build and run via Docker; otherwise run on host.", - ) - local_test_parser.add_argument( - "--entry", - help="Entrypoint to run (path::function or path). If not provided, a selector will be shown (unless --yes).", - ) - local_test_parser.add_argument( - "--ignore-docker", - action="store_true", - help="Ignore Dockerfile even if present; run pytest on host", - ) - local_test_parser.add_argument( - "--yes", - "-y", - action="store_true", - help="Non-interactive: if multiple tests exist and no --entry, fails with guidance", - ) - local_test_parser.add_argument( - "--docker-build-extra", - default="", - help="Extra flags to pass to 'docker build' (quoted string)", - ) - local_test_parser.add_argument( - "--docker-run-extra", - default="", - help="Extra flags to pass to 'docker run' (quoted string)", - ) - - output_dir = args.output_dir - return generate_cli_docs(parser, output_dir) + # Import the parser builder from cli.py to get the actual parser + from eval_protocol.cli import build_parser + + parser = build_parser() + return generate_cli_docs(parser, args.output) From 5e194935f73f51b171405e6d7158b8992be124e3 Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Mon, 15 Dec 2025 22:02:12 -0800 Subject: [PATCH 3/5] Enhance CLI documentation generation by updating subparser help extraction. Introduce a method to hide suppressed commands from help output and ensure accurate help text is included for subparsers. --- docs/cli-reference.md | 106 ++++++++++++++++++++++ eval_protocol/cli.py | 22 ++++- eval_protocol/cli_commands/export_docs.py | 20 +++- 3 files changed, 139 insertions(+), 9 deletions(-) create mode 100644 docs/cli-reference.md diff --git a/docs/cli-reference.md b/docs/cli-reference.md new file mode 100644 index 00000000..73de1862 --- /dev/null +++ b/docs/cli-reference.md @@ -0,0 +1,106 @@ +# CLI Reference + +**ep** - eval-protocol: Tools for evaluation and reward modeling + +## Global Options + +| Option | Type | Default | Required | Description | +|--------|------|---------|----------|-------------| +| `--verbose`, `-v` | | false | No | Enable verbose logging | +| `--profile` | | - | No | Fireworks profile to use (reads ~/.fireworks/profiles//auth.ini and settings.ini) | +| `--server` | | - | No | Fireworks API server hostname or URL (e.g., dev.api.fireworks.ai or https://dev.api.fireworks.ai) | + +## Commands + +### `ep logs` + +Serve logs with file watching and real-time updates + +| Option | Type | Default | Required | Description | +|--------|------|---------|----------|-------------| +| `--port` | int | `8000` | No | Port to bind to (default: 8000) | +| `--debug` | | false | No | Enable debug mode | +| `--disable-elasticsearch-setup` | | false | No | Disable Elasticsearch setup | +| `--use-env-elasticsearch-config` | | false | No | Use env vars for Elasticsearch config (requires ELASTICSEARCH_URL, ELASTICSEARCH_API_KEY, ELASTICSEARCH_INDEX_NAME) | +| `--use-fireworks` | | false | No | Force Fireworks tracing backend for logs UI (overrides env auto-detection) | +| `--use-elasticsearch` | | false | No | Force Elasticsearch backend for logs UI (overrides env auto-detection) | + +### `ep upload` + +Scan for evaluation tests, select, and upload as Fireworks evaluators + +| Option | Type | Default | Required | Description | +|--------|------|---------|----------|-------------| +| `--path` | | `.` | No | Path to search for evaluation tests (default: current directory) | +| `--entry` | | - | No | Entrypoint of evaluation test to upload (module:function or path::function). For multiple, separate by commas. | +| `--id` | | - | No | Evaluator ID to use (if multiple selections, a numeric suffix is appended) | +| `--display-name` | | - | No | Display name for evaluator (defaults to ID) | +| `--description` | | - | No | Description for evaluator | +| `--force` | | false | No | Overwrite existing evaluator with the same ID | +| `--yes`, `-y` | | false | No | Non-interactive: upload all discovered evaluation tests | +| `--env-file` | | - | No | Path to .env file containing secrets to upload (default: .env in current directory) | + +### `ep create` + +Resource creation commands + +#### `ep create rft` + +Create a Reinforcement Fine-tuning Job on Fireworks + +| Option | Type | Default | Required | Description | +|--------|------|---------|----------|-------------| +| `--evaluator` | | - | No | Evaluator ID or fully-qualified resource (accounts/{acct}/evaluators/{id}); if omitted, derive from local tests | +| `--dataset` | | - | No | Use existing dataset (ID or resource 'accounts/{acct}/datasets/{id}') to skip local materialization | +| `--dataset-jsonl` | | - | No | Path to JSONL to upload as a new Fireworks dataset | +| `--dataset-builder` | | - | No | Explicit dataset builder spec (module::function or path::function) | +| `--dataset-display-name` | | - | No | Display name for dataset on Fireworks (defaults to dataset id) | +| `--base-model` | | - | No | Base model resource id | +| `--warm-start-from` | | - | No | Addon model to warm start from | +| `--output-model` | | - | No | Output model id (defaults from evaluator) | +| `--epochs` | int | `1` | No | - | +| `--batch-size` | int | `128000` | No | - | +| `--learning-rate` | float | `3e-05` | No | - | +| `--max-context-length` | int | `65536` | No | - | +| `--lora-rank` | int | `16` | No | - | +| `--gradient-accumulation-steps` | int | - | No | Number of gradient accumulation steps | +| `--learning-rate-warmup-steps` | int | - | No | Number of LR warmup steps | +| `--accelerator-count` | int | - | No | - | +| `--region` | | - | No | Fireworks region enum value | +| `--display-name` | | - | No | RFT job display name | +| `--evaluation-dataset` | | - | No | Optional separate eval dataset id | +| `--eval-auto-carveout` | | true | No | - | +| `--no-eval-auto-carveout` | | true | No | - | +| `--chunk-size` | int | `100` | No | Data chunk size for rollout batching | +| `--temperature` | float | - | No | - | +| `--top-p` | float | - | No | - | +| `--top-k` | int | - | No | - | +| `--max-output-tokens` | int | `32768` | No | - | +| `--response-candidates-count` | int | `8` | No | - | +| `--extra-body` | | - | No | JSON string for extra inference params | +| `--mcp-server` | | - | No | The MCP server resource name to use for the reinforcement fine-tuning job. | +| `--wandb-enabled` | | false | No | - | +| `--wandb-project` | | - | No | - | +| `--wandb-entity` | | - | No | - | +| `--wandb-run-id` | | - | No | - | +| `--wandb-api-key` | | - | No | - | +| `--job-id` | | - | No | Specify an explicit RFT job id | +| `--yes`, `-y` | | false | No | Non-interactive mode | +| `--dry-run` | | false | No | Print planned REST calls without sending | +| `--force` | | false | No | Overwrite existing evaluator with the same ID | +| `--skip-validation` | | false | No | Skip local dataset and evaluator validation before creating the RFT job | +| `--ignore-docker` | | false | No | Ignore Dockerfile even if present; run pytest on host during evaluator validation | +| `--docker-build-extra` | | `` | No | Extra flags to pass to 'docker build' when validating evaluator (quoted string, e.g. "--no-cache --pull --progress=plain") | +| `--docker-run-extra` | | `` | No | Extra flags to pass to 'docker run' when validating evaluator (quoted string, e.g. "--env-file .env --memory=8g") | + +### `ep local-test` + +Select an evaluation test and run it locally. If a Dockerfile exists, build and run via Docker; otherwise run on host. + +| Option | Type | Default | Required | Description | +|--------|------|---------|----------|-------------| +| `--entry` | | - | No | Entrypoint to run (path::function or path). If not provided, a selector will be shown (unless --yes). | +| `--ignore-docker` | | false | No | Ignore Dockerfile even if present; run pytest on host | +| `--yes`, `-y` | | false | No | Non-interactive: if multiple tests exist and no --entry, fails with guidance | +| `--docker-build-extra` | | `` | No | Extra flags to pass to 'docker build' (quoted string, e.g. "--no-cache --pull --progress=plain") | +| `--docker-run-extra` | | `` | No | Extra flags to pass to 'docker run' (quoted string, e.g. "--env-file .env --memory=8g") | diff --git a/eval_protocol/cli.py b/eval_protocol/cli.py index 59c0392c..3bb455d7 100644 --- a/eval_protocol/cli.py +++ b/eval_protocol/cli.py @@ -500,10 +500,7 @@ def _configure_parser(parser: argparse.ArgumentParser) -> argparse.ArgumentParse # ) # Hidden command: export-docs (for generating CLI reference documentation) - export_docs_parser = subparsers.add_parser( - "export-docs", - help=argparse.SUPPRESS, # Hidden from help output - ) + export_docs_parser = subparsers.add_parser("export-docs", help=argparse.SUPPRESS) export_docs_parser.add_argument( "--output", "-o", @@ -511,9 +508,26 @@ def _configure_parser(parser: argparse.ArgumentParser) -> argparse.ArgumentParse help="Output markdown file path (default: ./docs/cli-reference.md)", ) + # Update metavar to only show visible commands (exclude those with SUPPRESS) + _hide_suppressed_subparsers(parser) + return parser +def _hide_suppressed_subparsers(parser: argparse.ArgumentParser) -> None: + """Update subparsers to exclude commands with help=SUPPRESS from help output.""" + for action in parser._actions: + if isinstance(action, argparse._SubParsersAction): + # Filter _choices_actions to only visible commands + choices_actions = getattr(action, "_choices_actions", []) + visible_actions = [a for a in choices_actions if a.help != argparse.SUPPRESS] + action._choices_actions = visible_actions + # Update metavar to match + visible_names = [a.dest for a in visible_actions] + if visible_names: + action.metavar = "{" + ",".join(visible_names) + "}" + + def parse_args(args=None): """Parse command line arguments.""" parser = build_parser() diff --git a/eval_protocol/cli_commands/export_docs.py b/eval_protocol/cli_commands/export_docs.py index 5b39742c..feb083b0 100644 --- a/eval_protocol/cli_commands/export_docs.py +++ b/eval_protocol/cli_commands/export_docs.py @@ -13,11 +13,12 @@ logger = logging.getLogger(__name__) -def _get_parser_info(parser: argparse.ArgumentParser) -> Dict: +def _get_parser_info(parser: argparse.ArgumentParser, subparser_help: str = "") -> Dict: """Extract information from an ArgumentParser.""" info = { "prog": parser.prog, "description": parser.description or "", + "help": subparser_help, # The help text from add_parser() "epilog": parser.epilog or "", "arguments": [], "subparsers": {}, @@ -26,9 +27,16 @@ def _get_parser_info(parser: argparse.ArgumentParser) -> Dict: # Extract arguments for action in parser._actions: if isinstance(action, argparse._SubParsersAction): - # Handle subparsers + # Handle subparsers - also extract the help text for each for name, subparser in action.choices.items(): - info["subparsers"][name] = _get_parser_info(subparser) + # Get the help text from the subparser action's _parser_class + subparser_help_text = "" + if hasattr(action, "_choices_actions"): + for choice_action in action._choices_actions: + if choice_action.dest == name: + subparser_help_text = choice_action.help or "" + break + info["subparsers"][name] = _get_parser_info(subparser, subparser_help_text) elif isinstance(action, argparse._HelpAction): # Skip help action, it's always present continue @@ -100,8 +108,10 @@ def _generate_command_section( lines.append(f"{heading} `{full_command}`") lines.append("") - if info["description"]: - lines.append(info["description"]) + # Use help text (from add_parser) or description (from ArgumentParser) + description = info.get("help") or info.get("description") or "" + if description and description != argparse.SUPPRESS: + lines.append(description) lines.append("") # Arguments table From a1d4cd5ed52c23e88125ae6b23e564ccb2496d28 Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Mon, 15 Dec 2025 22:03:25 -0800 Subject: [PATCH 4/5] remove generated cli-reference --- docs/cli-reference.md | 106 ------------------------------------------ 1 file changed, 106 deletions(-) delete mode 100644 docs/cli-reference.md diff --git a/docs/cli-reference.md b/docs/cli-reference.md deleted file mode 100644 index 73de1862..00000000 --- a/docs/cli-reference.md +++ /dev/null @@ -1,106 +0,0 @@ -# CLI Reference - -**ep** - eval-protocol: Tools for evaluation and reward modeling - -## Global Options - -| Option | Type | Default | Required | Description | -|--------|------|---------|----------|-------------| -| `--verbose`, `-v` | | false | No | Enable verbose logging | -| `--profile` | | - | No | Fireworks profile to use (reads ~/.fireworks/profiles//auth.ini and settings.ini) | -| `--server` | | - | No | Fireworks API server hostname or URL (e.g., dev.api.fireworks.ai or https://dev.api.fireworks.ai) | - -## Commands - -### `ep logs` - -Serve logs with file watching and real-time updates - -| Option | Type | Default | Required | Description | -|--------|------|---------|----------|-------------| -| `--port` | int | `8000` | No | Port to bind to (default: 8000) | -| `--debug` | | false | No | Enable debug mode | -| `--disable-elasticsearch-setup` | | false | No | Disable Elasticsearch setup | -| `--use-env-elasticsearch-config` | | false | No | Use env vars for Elasticsearch config (requires ELASTICSEARCH_URL, ELASTICSEARCH_API_KEY, ELASTICSEARCH_INDEX_NAME) | -| `--use-fireworks` | | false | No | Force Fireworks tracing backend for logs UI (overrides env auto-detection) | -| `--use-elasticsearch` | | false | No | Force Elasticsearch backend for logs UI (overrides env auto-detection) | - -### `ep upload` - -Scan for evaluation tests, select, and upload as Fireworks evaluators - -| Option | Type | Default | Required | Description | -|--------|------|---------|----------|-------------| -| `--path` | | `.` | No | Path to search for evaluation tests (default: current directory) | -| `--entry` | | - | No | Entrypoint of evaluation test to upload (module:function or path::function). For multiple, separate by commas. | -| `--id` | | - | No | Evaluator ID to use (if multiple selections, a numeric suffix is appended) | -| `--display-name` | | - | No | Display name for evaluator (defaults to ID) | -| `--description` | | - | No | Description for evaluator | -| `--force` | | false | No | Overwrite existing evaluator with the same ID | -| `--yes`, `-y` | | false | No | Non-interactive: upload all discovered evaluation tests | -| `--env-file` | | - | No | Path to .env file containing secrets to upload (default: .env in current directory) | - -### `ep create` - -Resource creation commands - -#### `ep create rft` - -Create a Reinforcement Fine-tuning Job on Fireworks - -| Option | Type | Default | Required | Description | -|--------|------|---------|----------|-------------| -| `--evaluator` | | - | No | Evaluator ID or fully-qualified resource (accounts/{acct}/evaluators/{id}); if omitted, derive from local tests | -| `--dataset` | | - | No | Use existing dataset (ID or resource 'accounts/{acct}/datasets/{id}') to skip local materialization | -| `--dataset-jsonl` | | - | No | Path to JSONL to upload as a new Fireworks dataset | -| `--dataset-builder` | | - | No | Explicit dataset builder spec (module::function or path::function) | -| `--dataset-display-name` | | - | No | Display name for dataset on Fireworks (defaults to dataset id) | -| `--base-model` | | - | No | Base model resource id | -| `--warm-start-from` | | - | No | Addon model to warm start from | -| `--output-model` | | - | No | Output model id (defaults from evaluator) | -| `--epochs` | int | `1` | No | - | -| `--batch-size` | int | `128000` | No | - | -| `--learning-rate` | float | `3e-05` | No | - | -| `--max-context-length` | int | `65536` | No | - | -| `--lora-rank` | int | `16` | No | - | -| `--gradient-accumulation-steps` | int | - | No | Number of gradient accumulation steps | -| `--learning-rate-warmup-steps` | int | - | No | Number of LR warmup steps | -| `--accelerator-count` | int | - | No | - | -| `--region` | | - | No | Fireworks region enum value | -| `--display-name` | | - | No | RFT job display name | -| `--evaluation-dataset` | | - | No | Optional separate eval dataset id | -| `--eval-auto-carveout` | | true | No | - | -| `--no-eval-auto-carveout` | | true | No | - | -| `--chunk-size` | int | `100` | No | Data chunk size for rollout batching | -| `--temperature` | float | - | No | - | -| `--top-p` | float | - | No | - | -| `--top-k` | int | - | No | - | -| `--max-output-tokens` | int | `32768` | No | - | -| `--response-candidates-count` | int | `8` | No | - | -| `--extra-body` | | - | No | JSON string for extra inference params | -| `--mcp-server` | | - | No | The MCP server resource name to use for the reinforcement fine-tuning job. | -| `--wandb-enabled` | | false | No | - | -| `--wandb-project` | | - | No | - | -| `--wandb-entity` | | - | No | - | -| `--wandb-run-id` | | - | No | - | -| `--wandb-api-key` | | - | No | - | -| `--job-id` | | - | No | Specify an explicit RFT job id | -| `--yes`, `-y` | | false | No | Non-interactive mode | -| `--dry-run` | | false | No | Print planned REST calls without sending | -| `--force` | | false | No | Overwrite existing evaluator with the same ID | -| `--skip-validation` | | false | No | Skip local dataset and evaluator validation before creating the RFT job | -| `--ignore-docker` | | false | No | Ignore Dockerfile even if present; run pytest on host during evaluator validation | -| `--docker-build-extra` | | `` | No | Extra flags to pass to 'docker build' when validating evaluator (quoted string, e.g. "--no-cache --pull --progress=plain") | -| `--docker-run-extra` | | `` | No | Extra flags to pass to 'docker run' when validating evaluator (quoted string, e.g. "--env-file .env --memory=8g") | - -### `ep local-test` - -Select an evaluation test and run it locally. If a Dockerfile exists, build and run via Docker; otherwise run on host. - -| Option | Type | Default | Required | Description | -|--------|------|---------|----------|-------------| -| `--entry` | | - | No | Entrypoint to run (path::function or path). If not provided, a selector will be shown (unless --yes). | -| `--ignore-docker` | | false | No | Ignore Dockerfile even if present; run pytest on host | -| `--yes`, `-y` | | false | No | Non-interactive: if multiple tests exist and no --entry, fails with guidance | -| `--docker-build-extra` | | `` | No | Extra flags to pass to 'docker build' (quoted string, e.g. "--no-cache --pull --progress=plain") | -| `--docker-run-extra` | | `` | No | Extra flags to pass to 'docker run' (quoted string, e.g. "--env-file .env --memory=8g") | From cefc4618d91022b60cd70835f716c120609e5f85 Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Mon, 15 Dec 2025 22:43:45 -0800 Subject: [PATCH 5/5] update --- eval_protocol/cli.py | 63 ++++++---- eval_protocol/cli_commands/export_docs.py | 136 ++++++++++++++++------ 2 files changed, 139 insertions(+), 60 deletions(-) diff --git a/eval_protocol/cli.py b/eval_protocol/cli.py index 3bb455d7..471a5bae 100644 --- a/eval_protocol/cli.py +++ b/eval_protocol/cli.py @@ -34,7 +34,9 @@ def build_parser() -> argparse.ArgumentParser: """Build and return the argument parser for the CLI.""" - parser = argparse.ArgumentParser(description="eval-protocol: Tools for evaluation and reward modeling") + parser = argparse.ArgumentParser( + description="Inspect evaluation runs locally, upload evaluators, and create reinforcement fine-tuning jobs on Fireworks" + ) return _configure_parser(parser) @@ -401,39 +403,52 @@ def _configure_parser(parser: argparse.ArgumentParser) -> argparse.ArgumentParse rft_parser.add_argument("--base-model", help="Base model resource id") rft_parser.add_argument("--warm-start-from", help="Addon model to warm start from") rft_parser.add_argument("--output-model", help="Output model id (defaults from evaluator)") - rft_parser.add_argument("--epochs", type=int, default=1) - rft_parser.add_argument("--batch-size", type=int, default=128000) - rft_parser.add_argument("--learning-rate", type=float, default=3e-5) - rft_parser.add_argument("--max-context-length", type=int, default=65536) - rft_parser.add_argument("--lora-rank", type=int, default=16) + rft_parser.add_argument("--epochs", type=int, default=1, help="Number of training epochs") + rft_parser.add_argument("--batch-size", type=int, default=128000, help="Training batch size in tokens") + rft_parser.add_argument("--learning-rate", type=float, default=3e-5, help="Learning rate for training") + rft_parser.add_argument("--max-context-length", type=int, default=65536, help="Maximum context length in tokens") + rft_parser.add_argument("--lora-rank", type=int, default=16, help="LoRA rank for fine-tuning") rft_parser.add_argument("--gradient-accumulation-steps", type=int, help="Number of gradient accumulation steps") - rft_parser.add_argument("--learning-rate-warmup-steps", type=int, help="Number of LR warmup steps") - rft_parser.add_argument("--accelerator-count", type=int) - rft_parser.add_argument("--region", help="Fireworks region enum value") - rft_parser.add_argument("--display-name", help="RFT job display name") - rft_parser.add_argument("--evaluation-dataset", help="Optional separate eval dataset id") - rft_parser.add_argument("--eval-auto-carveout", dest="eval_auto_carveout", action="store_true", default=True) - rft_parser.add_argument("--no-eval-auto-carveout", dest="eval_auto_carveout", action="store_false") + rft_parser.add_argument("--learning-rate-warmup-steps", type=int, help="Number of learning rate warmup steps") + rft_parser.add_argument("--accelerator-count", type=int, help="Number of accelerators (GPUs) to use") + rft_parser.add_argument("--region", help="Fireworks region for training") + rft_parser.add_argument("--display-name", help="Display name for the RFT job") + rft_parser.add_argument("--evaluation-dataset", help="Separate dataset id for evaluation") + rft_parser.add_argument( + "--eval-auto-carveout", + dest="eval_auto_carveout", + action="store_true", + default=True, + help="Automatically carve out evaluation data from training set", + ) + rft_parser.add_argument( + "--no-eval-auto-carveout", + dest="eval_auto_carveout", + action="store_false", + help="Disable automatic evaluation data carveout", + ) # Rollout chunking rft_parser.add_argument("--chunk-size", type=int, default=100, help="Data chunk size for rollout batching") # Inference params - rft_parser.add_argument("--temperature", type=float) - rft_parser.add_argument("--top-p", type=float) - rft_parser.add_argument("--top-k", type=int) - rft_parser.add_argument("--max-output-tokens", type=int, default=32768) - rft_parser.add_argument("--response-candidates-count", type=int, default=8) + rft_parser.add_argument("--temperature", type=float, help="Sampling temperature for rollouts") + rft_parser.add_argument("--top-p", type=float, help="Top-p (nucleus) sampling parameter") + rft_parser.add_argument("--top-k", type=int, help="Top-k sampling parameter") + rft_parser.add_argument("--max-output-tokens", type=int, default=32768, help="Maximum output tokens per rollout") + rft_parser.add_argument( + "--response-candidates-count", type=int, default=8, help="Number of response candidates per prompt" + ) rft_parser.add_argument("--extra-body", help="JSON string for extra inference params") # MCP server (optional) rft_parser.add_argument( "--mcp-server", - help="The MCP server resource name to use for the reinforcement fine-tuning job.", + help="MCP server resource name for agentic rollouts", ) # Wandb - rft_parser.add_argument("--wandb-enabled", action="store_true") - rft_parser.add_argument("--wandb-project") - rft_parser.add_argument("--wandb-entity") - rft_parser.add_argument("--wandb-run-id") - rft_parser.add_argument("--wandb-api-key") + rft_parser.add_argument("--wandb-enabled", action="store_true", help="Enable Weights & Biases logging") + rft_parser.add_argument("--wandb-project", help="Weights & Biases project name") + rft_parser.add_argument("--wandb-entity", help="Weights & Biases entity (username or team)") + rft_parser.add_argument("--wandb-run-id", help="Weights & Biases run id for resuming") + rft_parser.add_argument("--wandb-api-key", help="Weights & Biases API key") # Misc rft_parser.add_argument("--job-id", help="Specify an explicit RFT job id") rft_parser.add_argument("--yes", "-y", action="store_true", help="Non-interactive mode") diff --git a/eval_protocol/cli_commands/export_docs.py b/eval_protocol/cli_commands/export_docs.py index feb083b0..4240b5c3 100644 --- a/eval_protocol/cli_commands/export_docs.py +++ b/eval_protocol/cli_commands/export_docs.py @@ -59,39 +59,83 @@ def _get_parser_info(parser: argparse.ArgumentParser, subparser_help: str = "") return info -def _format_argument_row(arg: Dict) -> str: - """Format a single argument as a markdown table row.""" - # Build the flag/argument name +def _format_argument_item(arg: Dict) -> List[str]: + """Format a single argument as a Mintlify ParamField component.""" + lines = [] + + # Build the flag name if arg["option_strings"]: - name = ", ".join(f"`{opt}`" for opt in arg["option_strings"]) + long_opts = [o for o in arg["option_strings"] if o.startswith("--")] + short_opts = [o for o in arg["option_strings"] if not o.startswith("--")] + primary = long_opts[0] if long_opts else arg["option_strings"][0] else: - name = f"`{arg['dest']}`" + primary = arg["dest"] + short_opts = [] - # Build type info + # Map Python types to ParamField types type_str = "" if arg["type"]: - type_str = getattr(arg["type"], "__name__", str(arg["type"])) - if arg["choices"]: - type_str = f"choices: {arg['choices']}" - - # Format default value + python_type = getattr(arg["type"], "__name__", str(arg["type"])) + type_map = {"int": "number", "float": "number", "str": "string", "bool": "boolean"} + type_str = type_map.get(python_type, python_type) + elif arg["default"] is not None: + # Infer type from default + if isinstance(arg["default"], bool): + type_str = "boolean" + elif isinstance(arg["default"], int): + type_str = "number" + elif isinstance(arg["default"], float): + type_str = "number" + elif isinstance(arg["default"], str): + type_str = "string" + + # Build ParamField attributes + attrs = [f'path="{primary}"'] + + if type_str: + attrs.append(f'type="{type_str}"') + + # Default value default = arg["default"] - if default is None: - default_str = "-" - elif default == argparse.SUPPRESS: - default_str = "-" - elif isinstance(default, bool): - default_str = str(default).lower() - else: - default_str = f"`{default}`" + if default is not None and default != argparse.SUPPRESS: + if isinstance(default, bool): + default_str = str(default).lower() + elif isinstance(default, str): + # Escape quotes in string defaults + default_str = default.replace('"', '\\"') + else: + default_str = str(default) + attrs.append(f'default="{default_str}"') + + if arg["required"]: + attrs.append("required") + + # Build description with short alias mention + help_text = (arg["help"] or "").replace("<", "<").replace(">", ">") + if short_opts: + alias_note = f"Short: `{short_opts[0]}`" + if help_text: + help_text = f"{help_text} ({alias_note})" + else: + help_text = alias_note - # Help text (escape pipe characters for markdown tables) - help_text = (arg["help"] or "-").replace("|", "\\|") + # Add choices info to description + if arg["choices"]: + choices_str = ", ".join(f"`{c}`" for c in arg["choices"]) + choices_note = f"Choices: {choices_str}" + if help_text: + help_text = f"{help_text}. {choices_note}" + else: + help_text = choices_note - # Required indicator - required = "Yes" if arg["required"] else "No" + # Generate ParamField + lines.append(f"") + if help_text: + lines.append(f" {help_text}") + lines.append("") + lines.append("") - return f"| {name} | {type_str} | {default_str} | {required} | {help_text} |" + return lines def _generate_command_section( @@ -105,6 +149,21 @@ def _generate_command_section( full_command = f"{parent_command} {name}".strip() heading = "#" * heading_level + # Skip commands that have no arguments and only subparsers (like "ep create") + # Instead, just render the subcommands directly at the same level + if not info["arguments"] and info["subparsers"]: + # Skip this level, render subcommands directly + for subname, subinfo in info["subparsers"].items(): + lines.extend( + _generate_command_section( + subname, + subinfo, + full_command, + heading_level, # Keep same heading level + ) + ) + return lines + lines.append(f"{heading} `{full_command}`") lines.append("") @@ -114,13 +173,10 @@ def _generate_command_section( lines.append(description) lines.append("") - # Arguments table + # Arguments (no extra heading to keep TOC clean) if info["arguments"]: - lines.append("| Option | Type | Default | Required | Description |") - lines.append("|--------|------|---------|----------|-------------|") for arg in info["arguments"]: - lines.append(_format_argument_row(arg)) - lines.append("") + lines.extend(_format_argument_item(arg)) # Handle nested subparsers recursively if info["subparsers"]: @@ -162,22 +218,30 @@ def generate_cli_docs(parser: argparse.ArgumentParser, output_path: str) -> int: if name != "export-docs" # Don't document the hidden command } - # Generate single page + # Generate single page with Mintlify frontmatter lines = [] - lines.append("# CLI Reference") + lines.append("---") + lines.append("title: CLI") + lines.append("icon: terminal") + lines.append("---") lines.append("") - lines.append(f"**{info['prog']}** - {info['description']}") + lines.append( + f"The `{info['prog']}` command-line interface can {info['description'][0].lower()}{info['description'][1:]}." + ) + lines.append("") + lines.append("```bash") + lines.append(f"{info['prog']} [global options] [command options]") + lines.append("```") lines.append("") # Global options if info["arguments"]: lines.append("## Global Options") lines.append("") - lines.append("| Option | Type | Default | Required | Description |") - lines.append("|--------|------|---------|----------|-------------|") - for arg in info["arguments"]: - lines.append(_format_argument_row(arg)) + lines.append("These options can be used with any command:") lines.append("") + for arg in info["arguments"]: + lines.extend(_format_argument_item(arg)) # Commands section if visible_subparsers: