diff --git a/eval_protocol/cli.py b/eval_protocol/cli.py
index e8125390..471a5bae 100644
--- a/eval_protocol/cli.py
+++ b/eval_protocol/cli.py
@@ -32,9 +32,16 @@
preview_command = None # type: ignore[assignment]
-def parse_args(args=None):
- """Parse command line arguments"""
- parser = argparse.ArgumentParser(description="eval-protocol: Tools for evaluation and reward modeling")
+def build_parser() -> argparse.ArgumentParser:
+ """Build and return the argument parser for the CLI."""
+ parser = argparse.ArgumentParser(
+ description="Inspect evaluation runs locally, upload evaluators, and create reinforcement fine-tuning jobs on Fireworks"
+ )
+ return _configure_parser(parser)
+
+
+def _configure_parser(parser: argparse.ArgumentParser) -> argparse.ArgumentParser:
+ """Configure all arguments and subparsers on the given parser."""
parser.add_argument("--verbose", "-v", action="store_true", help="Enable verbose logging")
parser.add_argument(
"--profile",
@@ -396,39 +403,52 @@ def parse_args(args=None):
rft_parser.add_argument("--base-model", help="Base model resource id")
rft_parser.add_argument("--warm-start-from", help="Addon model to warm start from")
rft_parser.add_argument("--output-model", help="Output model id (defaults from evaluator)")
- rft_parser.add_argument("--epochs", type=int, default=1)
- rft_parser.add_argument("--batch-size", type=int, default=128000)
- rft_parser.add_argument("--learning-rate", type=float, default=3e-5)
- rft_parser.add_argument("--max-context-length", type=int, default=65536)
- rft_parser.add_argument("--lora-rank", type=int, default=16)
+ rft_parser.add_argument("--epochs", type=int, default=1, help="Number of training epochs")
+ rft_parser.add_argument("--batch-size", type=int, default=128000, help="Training batch size in tokens")
+ rft_parser.add_argument("--learning-rate", type=float, default=3e-5, help="Learning rate for training")
+ rft_parser.add_argument("--max-context-length", type=int, default=65536, help="Maximum context length in tokens")
+ rft_parser.add_argument("--lora-rank", type=int, default=16, help="LoRA rank for fine-tuning")
rft_parser.add_argument("--gradient-accumulation-steps", type=int, help="Number of gradient accumulation steps")
- rft_parser.add_argument("--learning-rate-warmup-steps", type=int, help="Number of LR warmup steps")
- rft_parser.add_argument("--accelerator-count", type=int)
- rft_parser.add_argument("--region", help="Fireworks region enum value")
- rft_parser.add_argument("--display-name", help="RFT job display name")
- rft_parser.add_argument("--evaluation-dataset", help="Optional separate eval dataset id")
- rft_parser.add_argument("--eval-auto-carveout", dest="eval_auto_carveout", action="store_true", default=True)
- rft_parser.add_argument("--no-eval-auto-carveout", dest="eval_auto_carveout", action="store_false")
+ rft_parser.add_argument("--learning-rate-warmup-steps", type=int, help="Number of learning rate warmup steps")
+ rft_parser.add_argument("--accelerator-count", type=int, help="Number of accelerators (GPUs) to use")
+ rft_parser.add_argument("--region", help="Fireworks region for training")
+ rft_parser.add_argument("--display-name", help="Display name for the RFT job")
+ rft_parser.add_argument("--evaluation-dataset", help="Separate dataset id for evaluation")
+ rft_parser.add_argument(
+ "--eval-auto-carveout",
+ dest="eval_auto_carveout",
+ action="store_true",
+ default=True,
+ help="Automatically carve out evaluation data from training set",
+ )
+ rft_parser.add_argument(
+ "--no-eval-auto-carveout",
+ dest="eval_auto_carveout",
+ action="store_false",
+ help="Disable automatic evaluation data carveout",
+ )
# Rollout chunking
rft_parser.add_argument("--chunk-size", type=int, default=100, help="Data chunk size for rollout batching")
# Inference params
- rft_parser.add_argument("--temperature", type=float)
- rft_parser.add_argument("--top-p", type=float)
- rft_parser.add_argument("--top-k", type=int)
- rft_parser.add_argument("--max-output-tokens", type=int, default=32768)
- rft_parser.add_argument("--response-candidates-count", type=int, default=8)
+ rft_parser.add_argument("--temperature", type=float, help="Sampling temperature for rollouts")
+ rft_parser.add_argument("--top-p", type=float, help="Top-p (nucleus) sampling parameter")
+ rft_parser.add_argument("--top-k", type=int, help="Top-k sampling parameter")
+ rft_parser.add_argument("--max-output-tokens", type=int, default=32768, help="Maximum output tokens per rollout")
+ rft_parser.add_argument(
+ "--response-candidates-count", type=int, default=8, help="Number of response candidates per prompt"
+ )
rft_parser.add_argument("--extra-body", help="JSON string for extra inference params")
# MCP server (optional)
rft_parser.add_argument(
"--mcp-server",
- help="The MCP server resource name to use for the reinforcement fine-tuning job.",
+ help="MCP server resource name for agentic rollouts",
)
# Wandb
- rft_parser.add_argument("--wandb-enabled", action="store_true")
- rft_parser.add_argument("--wandb-project")
- rft_parser.add_argument("--wandb-entity")
- rft_parser.add_argument("--wandb-run-id")
- rft_parser.add_argument("--wandb-api-key")
+ rft_parser.add_argument("--wandb-enabled", action="store_true", help="Enable Weights & Biases logging")
+ rft_parser.add_argument("--wandb-project", help="Weights & Biases project name")
+ rft_parser.add_argument("--wandb-entity", help="Weights & Biases entity (username or team)")
+ rft_parser.add_argument("--wandb-run-id", help="Weights & Biases run id for resuming")
+ rft_parser.add_argument("--wandb-api-key", help="Weights & Biases API key")
# Misc
rft_parser.add_argument("--job-id", help="Specify an explicit RFT job id")
rft_parser.add_argument("--yes", "-y", action="store_true", help="Non-interactive mode")
@@ -494,6 +514,38 @@ def parse_args(args=None):
# help="Run an evaluation using a Hydra configuration. All arguments after 'run' are passed to Hydra.",
# )
+ # Hidden command: export-docs (for generating CLI reference documentation)
+ export_docs_parser = subparsers.add_parser("export-docs", help=argparse.SUPPRESS)
+ export_docs_parser.add_argument(
+ "--output",
+ "-o",
+ default="./docs/cli-reference.md",
+ help="Output markdown file path (default: ./docs/cli-reference.md)",
+ )
+
+ # Update metavar to only show visible commands (exclude those with SUPPRESS)
+ _hide_suppressed_subparsers(parser)
+
+ return parser
+
+
+def _hide_suppressed_subparsers(parser: argparse.ArgumentParser) -> None:
+ """Update subparsers to exclude commands with help=SUPPRESS from help output."""
+ for action in parser._actions:
+ if isinstance(action, argparse._SubParsersAction):
+ # Filter _choices_actions to only visible commands
+ choices_actions = getattr(action, "_choices_actions", [])
+ visible_actions = [a for a in choices_actions if a.help != argparse.SUPPRESS]
+ action._choices_actions = visible_actions
+ # Update metavar to match
+ visible_names = [a.dest for a in visible_actions]
+ if visible_names:
+ action.metavar = "{" + ",".join(visible_names) + "}"
+
+
+def parse_args(args=None):
+ """Parse command line arguments."""
+ parser = build_parser()
# Use parse_known_args to allow Hydra to handle its own arguments
return parser.parse_known_args(args)
@@ -623,6 +675,10 @@ def _extract_flag_value(argv_list, flag_name):
from .cli_commands.local_test import local_test_command
return local_test_command(args)
+ elif args.command == "export-docs":
+ from .cli_commands.export_docs import export_docs_command
+
+ return export_docs_command(args)
# elif args.command == "run":
# # For the 'run' command, Hydra takes over argument parsing.
#
diff --git a/eval_protocol/cli_commands/export_docs.py b/eval_protocol/cli_commands/export_docs.py
new file mode 100644
index 00000000..4240b5c3
--- /dev/null
+++ b/eval_protocol/cli_commands/export_docs.py
@@ -0,0 +1,272 @@
+"""
+Export CLI reference documentation as markdown files.
+
+This module provides functionality to introspect the argparse-based CLI
+and generate markdown documentation for each command.
+"""
+
+import argparse
+import logging
+from pathlib import Path
+from typing import Dict, List
+
+logger = logging.getLogger(__name__)
+
+
+def _get_parser_info(parser: argparse.ArgumentParser, subparser_help: str = "") -> Dict:
+ """Extract information from an ArgumentParser."""
+ info = {
+ "prog": parser.prog,
+ "description": parser.description or "",
+ "help": subparser_help, # The help text from add_parser()
+ "epilog": parser.epilog or "",
+ "arguments": [],
+ "subparsers": {},
+ }
+
+ # Extract arguments
+ for action in parser._actions:
+ if isinstance(action, argparse._SubParsersAction):
+ # Handle subparsers - also extract the help text for each
+ for name, subparser in action.choices.items():
+ # Get the help text from the subparser action's _parser_class
+ subparser_help_text = ""
+ if hasattr(action, "_choices_actions"):
+ for choice_action in action._choices_actions:
+ if choice_action.dest == name:
+ subparser_help_text = choice_action.help or ""
+ break
+ info["subparsers"][name] = _get_parser_info(subparser, subparser_help_text)
+ elif isinstance(action, argparse._HelpAction):
+ # Skip help action, it's always present
+ continue
+ else:
+ arg_info = {
+ "option_strings": action.option_strings,
+ "dest": action.dest,
+ "help": action.help or "",
+ "default": action.default,
+ "required": getattr(action, "required", False),
+ "type": getattr(action, "type", None),
+ "choices": getattr(action, "choices", None),
+ "nargs": getattr(action, "nargs", None),
+ "metavar": getattr(action, "metavar", None),
+ }
+ # Check if help is suppressed
+ if action.help != argparse.SUPPRESS:
+ info["arguments"].append(arg_info)
+
+ return info
+
+
+def _format_argument_item(arg: Dict) -> List[str]:
+ """Format a single argument as a Mintlify ParamField component."""
+ lines = []
+
+ # Build the flag name
+ if arg["option_strings"]:
+ long_opts = [o for o in arg["option_strings"] if o.startswith("--")]
+ short_opts = [o for o in arg["option_strings"] if not o.startswith("--")]
+ primary = long_opts[0] if long_opts else arg["option_strings"][0]
+ else:
+ primary = arg["dest"]
+ short_opts = []
+
+ # Map Python types to ParamField types
+ type_str = ""
+ if arg["type"]:
+ python_type = getattr(arg["type"], "__name__", str(arg["type"]))
+ type_map = {"int": "number", "float": "number", "str": "string", "bool": "boolean"}
+ type_str = type_map.get(python_type, python_type)
+ elif arg["default"] is not None:
+ # Infer type from default
+ if isinstance(arg["default"], bool):
+ type_str = "boolean"
+ elif isinstance(arg["default"], int):
+ type_str = "number"
+ elif isinstance(arg["default"], float):
+ type_str = "number"
+ elif isinstance(arg["default"], str):
+ type_str = "string"
+
+ # Build ParamField attributes
+ attrs = [f'path="{primary}"']
+
+ if type_str:
+ attrs.append(f'type="{type_str}"')
+
+ # Default value
+ default = arg["default"]
+ if default is not None and default != argparse.SUPPRESS:
+ if isinstance(default, bool):
+ default_str = str(default).lower()
+ elif isinstance(default, str):
+ # Escape quotes in string defaults
+ default_str = default.replace('"', '\\"')
+ else:
+ default_str = str(default)
+ attrs.append(f'default="{default_str}"')
+
+ if arg["required"]:
+ attrs.append("required")
+
+ # Build description with short alias mention
+ help_text = (arg["help"] or "").replace("<", "<").replace(">", ">")
+ if short_opts:
+ alias_note = f"Short: `{short_opts[0]}`"
+ if help_text:
+ help_text = f"{help_text} ({alias_note})"
+ else:
+ help_text = alias_note
+
+ # Add choices info to description
+ if arg["choices"]:
+ choices_str = ", ".join(f"`{c}`" for c in arg["choices"])
+ choices_note = f"Choices: {choices_str}"
+ if help_text:
+ help_text = f"{help_text}. {choices_note}"
+ else:
+ help_text = choices_note
+
+ # Generate ParamField
+ lines.append(f"")
+ if help_text:
+ lines.append(f" {help_text}")
+ lines.append("")
+ lines.append("")
+
+ return lines
+
+
+def _generate_command_section(
+ name: str,
+ info: Dict,
+ parent_command: str,
+ heading_level: int = 2,
+) -> List[str]:
+ """Generate markdown section for a single command."""
+ lines = []
+ full_command = f"{parent_command} {name}".strip()
+ heading = "#" * heading_level
+
+ # Skip commands that have no arguments and only subparsers (like "ep create")
+ # Instead, just render the subcommands directly at the same level
+ if not info["arguments"] and info["subparsers"]:
+ # Skip this level, render subcommands directly
+ for subname, subinfo in info["subparsers"].items():
+ lines.extend(
+ _generate_command_section(
+ subname,
+ subinfo,
+ full_command,
+ heading_level, # Keep same heading level
+ )
+ )
+ return lines
+
+ lines.append(f"{heading} `{full_command}`")
+ lines.append("")
+
+ # Use help text (from add_parser) or description (from ArgumentParser)
+ description = info.get("help") or info.get("description") or ""
+ if description and description != argparse.SUPPRESS:
+ lines.append(description)
+ lines.append("")
+
+ # Arguments (no extra heading to keep TOC clean)
+ if info["arguments"]:
+ for arg in info["arguments"]:
+ lines.extend(_format_argument_item(arg))
+
+ # Handle nested subparsers recursively
+ if info["subparsers"]:
+ for subname, subinfo in info["subparsers"].items():
+ lines.extend(
+ _generate_command_section(
+ subname,
+ subinfo,
+ full_command,
+ heading_level + 1,
+ )
+ )
+
+ if info["epilog"]:
+ lines.append(info["epilog"])
+ lines.append("")
+
+ return lines
+
+
+def generate_cli_docs(parser: argparse.ArgumentParser, output_path: str) -> int:
+ """
+ Generate markdown documentation from an ArgumentParser to a single file.
+
+ Args:
+ parser: The root ArgumentParser instance.
+ output_path: Path to write the markdown file to.
+
+ Returns:
+ 0 on success, 1 on failure.
+ """
+ # Extract parser info
+ info = _get_parser_info(parser)
+
+ # Filter out hidden commands (like export-docs itself)
+ visible_subparsers = {
+ name: subinfo
+ for name, subinfo in info["subparsers"].items()
+ if name != "export-docs" # Don't document the hidden command
+ }
+
+ # Generate single page with Mintlify frontmatter
+ lines = []
+ lines.append("---")
+ lines.append("title: CLI")
+ lines.append("icon: terminal")
+ lines.append("---")
+ lines.append("")
+ lines.append(
+ f"The `{info['prog']}` command-line interface can {info['description'][0].lower()}{info['description'][1:]}."
+ )
+ lines.append("")
+ lines.append("```bash")
+ lines.append(f"{info['prog']} [global options] [command options]")
+ lines.append("```")
+ lines.append("")
+
+ # Global options
+ if info["arguments"]:
+ lines.append("## Global Options")
+ lines.append("")
+ lines.append("These options can be used with any command:")
+ lines.append("")
+ for arg in info["arguments"]:
+ lines.extend(_format_argument_item(arg))
+
+ # Commands section
+ if visible_subparsers:
+ lines.append("## Commands")
+ lines.append("")
+ for name, subinfo in visible_subparsers.items():
+ lines.extend(_generate_command_section(name, subinfo, info["prog"], heading_level=3))
+
+ # Write single file
+ out = Path(output_path)
+ out.parent.mkdir(parents=True, exist_ok=True)
+ out.write_text("\n".join(lines), encoding="utf-8")
+ logger.info(f"Generated: {out}")
+
+ return 0
+
+
+def export_docs_command(args: argparse.Namespace) -> int:
+ """
+ Export CLI documentation to a single markdown file.
+
+ This command introspects the CLI parser and generates markdown documentation.
+ """
+ # Import the parser builder from cli.py to get the actual parser
+ from eval_protocol.cli import build_parser
+
+ parser = build_parser()
+ return generate_cli_docs(parser, args.output)