diff --git a/eval_protocol/cli.py b/eval_protocol/cli.py index e8125390..471a5bae 100644 --- a/eval_protocol/cli.py +++ b/eval_protocol/cli.py @@ -32,9 +32,16 @@ preview_command = None # type: ignore[assignment] -def parse_args(args=None): - """Parse command line arguments""" - parser = argparse.ArgumentParser(description="eval-protocol: Tools for evaluation and reward modeling") +def build_parser() -> argparse.ArgumentParser: + """Build and return the argument parser for the CLI.""" + parser = argparse.ArgumentParser( + description="Inspect evaluation runs locally, upload evaluators, and create reinforcement fine-tuning jobs on Fireworks" + ) + return _configure_parser(parser) + + +def _configure_parser(parser: argparse.ArgumentParser) -> argparse.ArgumentParser: + """Configure all arguments and subparsers on the given parser.""" parser.add_argument("--verbose", "-v", action="store_true", help="Enable verbose logging") parser.add_argument( "--profile", @@ -396,39 +403,52 @@ def parse_args(args=None): rft_parser.add_argument("--base-model", help="Base model resource id") rft_parser.add_argument("--warm-start-from", help="Addon model to warm start from") rft_parser.add_argument("--output-model", help="Output model id (defaults from evaluator)") - rft_parser.add_argument("--epochs", type=int, default=1) - rft_parser.add_argument("--batch-size", type=int, default=128000) - rft_parser.add_argument("--learning-rate", type=float, default=3e-5) - rft_parser.add_argument("--max-context-length", type=int, default=65536) - rft_parser.add_argument("--lora-rank", type=int, default=16) + rft_parser.add_argument("--epochs", type=int, default=1, help="Number of training epochs") + rft_parser.add_argument("--batch-size", type=int, default=128000, help="Training batch size in tokens") + rft_parser.add_argument("--learning-rate", type=float, default=3e-5, help="Learning rate for training") + rft_parser.add_argument("--max-context-length", type=int, default=65536, help="Maximum context length in tokens") + rft_parser.add_argument("--lora-rank", type=int, default=16, help="LoRA rank for fine-tuning") rft_parser.add_argument("--gradient-accumulation-steps", type=int, help="Number of gradient accumulation steps") - rft_parser.add_argument("--learning-rate-warmup-steps", type=int, help="Number of LR warmup steps") - rft_parser.add_argument("--accelerator-count", type=int) - rft_parser.add_argument("--region", help="Fireworks region enum value") - rft_parser.add_argument("--display-name", help="RFT job display name") - rft_parser.add_argument("--evaluation-dataset", help="Optional separate eval dataset id") - rft_parser.add_argument("--eval-auto-carveout", dest="eval_auto_carveout", action="store_true", default=True) - rft_parser.add_argument("--no-eval-auto-carveout", dest="eval_auto_carveout", action="store_false") + rft_parser.add_argument("--learning-rate-warmup-steps", type=int, help="Number of learning rate warmup steps") + rft_parser.add_argument("--accelerator-count", type=int, help="Number of accelerators (GPUs) to use") + rft_parser.add_argument("--region", help="Fireworks region for training") + rft_parser.add_argument("--display-name", help="Display name for the RFT job") + rft_parser.add_argument("--evaluation-dataset", help="Separate dataset id for evaluation") + rft_parser.add_argument( + "--eval-auto-carveout", + dest="eval_auto_carveout", + action="store_true", + default=True, + help="Automatically carve out evaluation data from training set", + ) + rft_parser.add_argument( + "--no-eval-auto-carveout", + dest="eval_auto_carveout", + action="store_false", + help="Disable automatic evaluation data carveout", + ) # Rollout chunking rft_parser.add_argument("--chunk-size", type=int, default=100, help="Data chunk size for rollout batching") # Inference params - rft_parser.add_argument("--temperature", type=float) - rft_parser.add_argument("--top-p", type=float) - rft_parser.add_argument("--top-k", type=int) - rft_parser.add_argument("--max-output-tokens", type=int, default=32768) - rft_parser.add_argument("--response-candidates-count", type=int, default=8) + rft_parser.add_argument("--temperature", type=float, help="Sampling temperature for rollouts") + rft_parser.add_argument("--top-p", type=float, help="Top-p (nucleus) sampling parameter") + rft_parser.add_argument("--top-k", type=int, help="Top-k sampling parameter") + rft_parser.add_argument("--max-output-tokens", type=int, default=32768, help="Maximum output tokens per rollout") + rft_parser.add_argument( + "--response-candidates-count", type=int, default=8, help="Number of response candidates per prompt" + ) rft_parser.add_argument("--extra-body", help="JSON string for extra inference params") # MCP server (optional) rft_parser.add_argument( "--mcp-server", - help="The MCP server resource name to use for the reinforcement fine-tuning job.", + help="MCP server resource name for agentic rollouts", ) # Wandb - rft_parser.add_argument("--wandb-enabled", action="store_true") - rft_parser.add_argument("--wandb-project") - rft_parser.add_argument("--wandb-entity") - rft_parser.add_argument("--wandb-run-id") - rft_parser.add_argument("--wandb-api-key") + rft_parser.add_argument("--wandb-enabled", action="store_true", help="Enable Weights & Biases logging") + rft_parser.add_argument("--wandb-project", help="Weights & Biases project name") + rft_parser.add_argument("--wandb-entity", help="Weights & Biases entity (username or team)") + rft_parser.add_argument("--wandb-run-id", help="Weights & Biases run id for resuming") + rft_parser.add_argument("--wandb-api-key", help="Weights & Biases API key") # Misc rft_parser.add_argument("--job-id", help="Specify an explicit RFT job id") rft_parser.add_argument("--yes", "-y", action="store_true", help="Non-interactive mode") @@ -494,6 +514,38 @@ def parse_args(args=None): # help="Run an evaluation using a Hydra configuration. All arguments after 'run' are passed to Hydra.", # ) + # Hidden command: export-docs (for generating CLI reference documentation) + export_docs_parser = subparsers.add_parser("export-docs", help=argparse.SUPPRESS) + export_docs_parser.add_argument( + "--output", + "-o", + default="./docs/cli-reference.md", + help="Output markdown file path (default: ./docs/cli-reference.md)", + ) + + # Update metavar to only show visible commands (exclude those with SUPPRESS) + _hide_suppressed_subparsers(parser) + + return parser + + +def _hide_suppressed_subparsers(parser: argparse.ArgumentParser) -> None: + """Update subparsers to exclude commands with help=SUPPRESS from help output.""" + for action in parser._actions: + if isinstance(action, argparse._SubParsersAction): + # Filter _choices_actions to only visible commands + choices_actions = getattr(action, "_choices_actions", []) + visible_actions = [a for a in choices_actions if a.help != argparse.SUPPRESS] + action._choices_actions = visible_actions + # Update metavar to match + visible_names = [a.dest for a in visible_actions] + if visible_names: + action.metavar = "{" + ",".join(visible_names) + "}" + + +def parse_args(args=None): + """Parse command line arguments.""" + parser = build_parser() # Use parse_known_args to allow Hydra to handle its own arguments return parser.parse_known_args(args) @@ -623,6 +675,10 @@ def _extract_flag_value(argv_list, flag_name): from .cli_commands.local_test import local_test_command return local_test_command(args) + elif args.command == "export-docs": + from .cli_commands.export_docs import export_docs_command + + return export_docs_command(args) # elif args.command == "run": # # For the 'run' command, Hydra takes over argument parsing. # diff --git a/eval_protocol/cli_commands/export_docs.py b/eval_protocol/cli_commands/export_docs.py new file mode 100644 index 00000000..4240b5c3 --- /dev/null +++ b/eval_protocol/cli_commands/export_docs.py @@ -0,0 +1,272 @@ +""" +Export CLI reference documentation as markdown files. + +This module provides functionality to introspect the argparse-based CLI +and generate markdown documentation for each command. +""" + +import argparse +import logging +from pathlib import Path +from typing import Dict, List + +logger = logging.getLogger(__name__) + + +def _get_parser_info(parser: argparse.ArgumentParser, subparser_help: str = "") -> Dict: + """Extract information from an ArgumentParser.""" + info = { + "prog": parser.prog, + "description": parser.description or "", + "help": subparser_help, # The help text from add_parser() + "epilog": parser.epilog or "", + "arguments": [], + "subparsers": {}, + } + + # Extract arguments + for action in parser._actions: + if isinstance(action, argparse._SubParsersAction): + # Handle subparsers - also extract the help text for each + for name, subparser in action.choices.items(): + # Get the help text from the subparser action's _parser_class + subparser_help_text = "" + if hasattr(action, "_choices_actions"): + for choice_action in action._choices_actions: + if choice_action.dest == name: + subparser_help_text = choice_action.help or "" + break + info["subparsers"][name] = _get_parser_info(subparser, subparser_help_text) + elif isinstance(action, argparse._HelpAction): + # Skip help action, it's always present + continue + else: + arg_info = { + "option_strings": action.option_strings, + "dest": action.dest, + "help": action.help or "", + "default": action.default, + "required": getattr(action, "required", False), + "type": getattr(action, "type", None), + "choices": getattr(action, "choices", None), + "nargs": getattr(action, "nargs", None), + "metavar": getattr(action, "metavar", None), + } + # Check if help is suppressed + if action.help != argparse.SUPPRESS: + info["arguments"].append(arg_info) + + return info + + +def _format_argument_item(arg: Dict) -> List[str]: + """Format a single argument as a Mintlify ParamField component.""" + lines = [] + + # Build the flag name + if arg["option_strings"]: + long_opts = [o for o in arg["option_strings"] if o.startswith("--")] + short_opts = [o for o in arg["option_strings"] if not o.startswith("--")] + primary = long_opts[0] if long_opts else arg["option_strings"][0] + else: + primary = arg["dest"] + short_opts = [] + + # Map Python types to ParamField types + type_str = "" + if arg["type"]: + python_type = getattr(arg["type"], "__name__", str(arg["type"])) + type_map = {"int": "number", "float": "number", "str": "string", "bool": "boolean"} + type_str = type_map.get(python_type, python_type) + elif arg["default"] is not None: + # Infer type from default + if isinstance(arg["default"], bool): + type_str = "boolean" + elif isinstance(arg["default"], int): + type_str = "number" + elif isinstance(arg["default"], float): + type_str = "number" + elif isinstance(arg["default"], str): + type_str = "string" + + # Build ParamField attributes + attrs = [f'path="{primary}"'] + + if type_str: + attrs.append(f'type="{type_str}"') + + # Default value + default = arg["default"] + if default is not None and default != argparse.SUPPRESS: + if isinstance(default, bool): + default_str = str(default).lower() + elif isinstance(default, str): + # Escape quotes in string defaults + default_str = default.replace('"', '\\"') + else: + default_str = str(default) + attrs.append(f'default="{default_str}"') + + if arg["required"]: + attrs.append("required") + + # Build description with short alias mention + help_text = (arg["help"] or "").replace("<", "<").replace(">", ">") + if short_opts: + alias_note = f"Short: `{short_opts[0]}`" + if help_text: + help_text = f"{help_text} ({alias_note})" + else: + help_text = alias_note + + # Add choices info to description + if arg["choices"]: + choices_str = ", ".join(f"`{c}`" for c in arg["choices"]) + choices_note = f"Choices: {choices_str}" + if help_text: + help_text = f"{help_text}. {choices_note}" + else: + help_text = choices_note + + # Generate ParamField + lines.append(f"") + if help_text: + lines.append(f" {help_text}") + lines.append("") + lines.append("") + + return lines + + +def _generate_command_section( + name: str, + info: Dict, + parent_command: str, + heading_level: int = 2, +) -> List[str]: + """Generate markdown section for a single command.""" + lines = [] + full_command = f"{parent_command} {name}".strip() + heading = "#" * heading_level + + # Skip commands that have no arguments and only subparsers (like "ep create") + # Instead, just render the subcommands directly at the same level + if not info["arguments"] and info["subparsers"]: + # Skip this level, render subcommands directly + for subname, subinfo in info["subparsers"].items(): + lines.extend( + _generate_command_section( + subname, + subinfo, + full_command, + heading_level, # Keep same heading level + ) + ) + return lines + + lines.append(f"{heading} `{full_command}`") + lines.append("") + + # Use help text (from add_parser) or description (from ArgumentParser) + description = info.get("help") or info.get("description") or "" + if description and description != argparse.SUPPRESS: + lines.append(description) + lines.append("") + + # Arguments (no extra heading to keep TOC clean) + if info["arguments"]: + for arg in info["arguments"]: + lines.extend(_format_argument_item(arg)) + + # Handle nested subparsers recursively + if info["subparsers"]: + for subname, subinfo in info["subparsers"].items(): + lines.extend( + _generate_command_section( + subname, + subinfo, + full_command, + heading_level + 1, + ) + ) + + if info["epilog"]: + lines.append(info["epilog"]) + lines.append("") + + return lines + + +def generate_cli_docs(parser: argparse.ArgumentParser, output_path: str) -> int: + """ + Generate markdown documentation from an ArgumentParser to a single file. + + Args: + parser: The root ArgumentParser instance. + output_path: Path to write the markdown file to. + + Returns: + 0 on success, 1 on failure. + """ + # Extract parser info + info = _get_parser_info(parser) + + # Filter out hidden commands (like export-docs itself) + visible_subparsers = { + name: subinfo + for name, subinfo in info["subparsers"].items() + if name != "export-docs" # Don't document the hidden command + } + + # Generate single page with Mintlify frontmatter + lines = [] + lines.append("---") + lines.append("title: CLI") + lines.append("icon: terminal") + lines.append("---") + lines.append("") + lines.append( + f"The `{info['prog']}` command-line interface can {info['description'][0].lower()}{info['description'][1:]}." + ) + lines.append("") + lines.append("```bash") + lines.append(f"{info['prog']} [global options] [command options]") + lines.append("```") + lines.append("") + + # Global options + if info["arguments"]: + lines.append("## Global Options") + lines.append("") + lines.append("These options can be used with any command:") + lines.append("") + for arg in info["arguments"]: + lines.extend(_format_argument_item(arg)) + + # Commands section + if visible_subparsers: + lines.append("## Commands") + lines.append("") + for name, subinfo in visible_subparsers.items(): + lines.extend(_generate_command_section(name, subinfo, info["prog"], heading_level=3)) + + # Write single file + out = Path(output_path) + out.parent.mkdir(parents=True, exist_ok=True) + out.write_text("\n".join(lines), encoding="utf-8") + logger.info(f"Generated: {out}") + + return 0 + + +def export_docs_command(args: argparse.Namespace) -> int: + """ + Export CLI documentation to a single markdown file. + + This command introspects the CLI parser and generates markdown documentation. + """ + # Import the parser builder from cli.py to get the actual parser + from eval_protocol.cli import build_parser + + parser = build_parser() + return generate_cli_docs(parser, args.output)