diff --git a/docs/bundles.md b/docs/bundles.md
index 93ca36ae..3d90b8ac 100644
--- a/docs/bundles.md
+++ b/docs/bundles.md
@@ -14,26 +14,6 @@ AgentOps ships five predefined bundles covering the most common evaluation scena
 | `agent_workflow_baseline` | Agent workflow | TaskCompletionEvaluator, ToolCallAccuracyEvaluator, IntentResolutionEvaluator, TaskAdherenceEvaluator, ToolSelectionEvaluator, ToolInputAccuracyEvaluator, avg\_latency\_seconds | Agents with tool calling |
 | `safe_agent_baseline` | Safety | ViolenceEvaluator, SexualEvaluator, SelfHarmEvaluator, HateUnfairnessEvaluator, ProtectedMaterialEvaluator, avg\_latency\_seconds | Content safety and responsible AI |
 
-## Tuning Defaults for Your Backend
-
-The shipped bundles target a **Foundry cloud-evaluation** baseline. Two thresholds are
-worth reviewing for your environment:
-
-- **`avg_latency_seconds`** — measures the *full pipeline* (agent invocation **plus**
-  evaluator execution), not just the agent. For Foundry cloud evaluation this is
-  typically 15–25 s/row including the judge model. Defaults are set conservatively
-  (30 s for most bundles, 45 s for `agent_workflow_baseline`). For HTTP or
-  local-adapter backends with light evaluators you can usually tighten this to
-  5–10 s.
-
-- **Tool-related evaluators** (`agent_workflow_baseline`) — `ToolCallAccuracyEvaluator`,
-  `ToolSelectionEvaluator`, and `ToolInputAccuracyEvaluator` only produce meaningful
-  scores when the target agent **actually exposes tool definitions** matching the
-  `tool_definitions` field in your dataset rows. Running this bundle against an
-  agent without registered tools will silently produce near-zero scores (the
-  evaluators see no tool calls to grade); use `conversational_agent_baseline`
-  instead for tool-less agents.
-
 ## Bundle YAML Structure
 
 ```yaml
diff --git a/docs/how-it-works.md b/docs/how-it-works.md
index bcf01e07..0910d359 100644
--- a/docs/how-it-works.md
+++ b/docs/how-it-works.md
@@ -320,7 +320,7 @@ Run configs use `version: 1`.
 - `kind` — `foundry_agent` or `http`
 
 Foundry agent endpoint fields:
-- `agent_id` — Agent identifier, e.g. `my-agent:3` (name:version). Omitting the `:version` suffix is allowed and resolves to the **latest** version at run time, but is not deterministic across runs — pin a version for CI / baseline comparisons.
+- `agent_id` — Agent identifier, e.g. `my-agent:3` (name:version)
 - `project_endpoint` — Foundry project URL (inline value)
 - `project_endpoint_env` — Env var name holding the project URL (default: `AZURE_AI_FOUNDRY_PROJECT_ENDPOINT`)
 - `api_version` — Agent Service API version
diff --git a/docs/tutorial-agent-workflow.md b/docs/tutorial-agent-workflow.md
index 5b43306e..1cc00cb3 100644
--- a/docs/tutorial-agent-workflow.md
+++ b/docs/tutorial-agent-workflow.md
@@ -249,7 +249,7 @@ The `agent_workflow_baseline` bundle enforces:
 | TaskAdherenceEvaluator | ≥ | 3.0 |
 | ToolSelectionEvaluator | ≥ | 3.0 |
 | ToolInputAccuracyEvaluator | ≥ | 3.0 |
-| avg_latency_seconds | ≤ | 45.0 |
+| avg_latency_seconds | ≤ | 15.0 |
 
 Scores range from 1 to 5. Adjust thresholds in `.agentops/bundles/agent_workflow_baseline.yaml`.
 
diff --git a/docs/tutorial-basic-foundry-agent.md b/docs/tutorial-basic-foundry-agent.md
index e8adab2a..22f7ec8e 100644
--- a/docs/tutorial-basic-foundry-agent.md
+++ b/docs/tutorial-basic-foundry-agent.md
@@ -76,13 +76,6 @@ After saving, you need the agent's identifier for the run config. There are two
 
 AgentOps handles both. Named agents use the Foundry Responses API; legacy agents use the Threads API.
 
-> **Versioning tip.** When you omit the `:version` suffix on a named agent
-> (e.g., `agent_id: my-agent`), Foundry resolves to the **latest** version of
-> that agent at run time. This is convenient for local iteration but can hurt
-> reproducibility — a later run could pick up a different agent revision.
-> For CI / baseline runs, pin an explicit version (e.g., `my-agent:3`) so
-> the same `results.json` can be re-generated deterministically.
-
 ## Part 2: Set up AgentOps
 
 ### 1) Azure login
diff --git a/docs/tutorial-conversational-agent.md b/docs/tutorial-conversational-agent.md
index b1aec399..5b49583f 100644
--- a/docs/tutorial-conversational-agent.md
+++ b/docs/tutorial-conversational-agent.md
@@ -233,7 +233,7 @@ The `conversational_agent_baseline` bundle enforces:
 | FluencyEvaluator | ≥ | 3.0 |
 | RelevanceEvaluator | ≥ | 3.0 |
 | SimilarityEvaluator | ≥ | 3.0 |
-| avg_latency_seconds | ≤ | 30.0 |
+| avg_latency_seconds | ≤ | 10.0 |
 
 Scores range from 1 to 5. Adjust thresholds in `.agentops/bundles/conversational_agent_baseline.yaml` for your quality bar.
 
diff --git a/src/agentops/cli/app.py b/src/agentops/cli/app.py
index e9dd1717..478536c7 100644
--- a/src/agentops/cli/app.py
+++ b/src/agentops/cli/app.py
@@ -380,44 +380,9 @@ def cmd_dataset_import() -> None:
 
 
 @config_app.command("validate")
-def cmd_config_validate(
-    config: Path = typer.Option(
-        Path(".agentops/run.yaml"),
-        "-c",
-        "--config",
-        help="Path to a run.yaml file to validate.",
-    ),
-) -> None:
-    """Validate a run.yaml configuration file against the AgentOps schema.
-
-    Loads the file with the same Pydantic models the runner uses, prints a
-    summary on success, and prints a clear error message and exits with
-    code 1 on validation failure.
-    """
-    from agentops.core.config_loader import load_run_config
-
-    if not config.exists():
-        typer.echo(f"Error: config file not found: {config}", err=True)
-        raise typer.Exit(code=1)
-
-    try:
-        run_cfg = load_run_config(config)
-    except ValueError as exc:
-        typer.echo(f"❌ {config} is invalid:\n{exc}", err=True)
-        raise typer.Exit(code=1) from exc
-    except Exception as exc:  # pragma: no cover - unexpected I/O / parser error
-        typer.echo(f"Error reading {config}: {exc}", err=True)
-        raise typer.Exit(code=1) from exc
-
-    target = run_cfg.target
-    bundle_ref = run_cfg.bundle.name or run_cfg.bundle.path
-    dataset_ref = run_cfg.dataset.name or run_cfg.dataset.path
-    typer.echo(
-        f"✅ {config} is valid "
-        f"(version={run_cfg.version}, target.type={target.type}, "
-        f"target.hosting={target.hosting}, target.execution_mode={target.execution_mode}, "
-        f"bundle={bundle_ref}, dataset={dataset_ref})"
-    )
+def cmd_config_validate() -> None:
+    """Validate configuration files (planned)."""
+    _planned_command("agentops config validate")
 
 
 @config_app.command("show")
diff --git a/src/agentops/templates/bundles/agent_workflow_baseline.yaml b/src/agentops/templates/bundles/agent_workflow_baseline.yaml
index 3eb916af..ea6e015b 100644
--- a/src/agentops/templates/bundles/agent_workflow_baseline.yaml
+++ b/src/agentops/templates/bundles/agent_workflow_baseline.yaml
@@ -106,7 +106,7 @@ thresholds:
     value: 3
   - evaluator: avg_latency_seconds
     criteria: "<="
-    value: 45.0
+    value: 15.0
 metadata:
   category: agent-workflow
   scenario: agent_with_tools
diff --git a/src/agentops/templates/bundles/conversational_agent_baseline.yaml b/src/agentops/templates/bundles/conversational_agent_baseline.yaml
index 8cd3e129..2126df44 100644
--- a/src/agentops/templates/bundles/conversational_agent_baseline.yaml
+++ b/src/agentops/templates/bundles/conversational_agent_baseline.yaml
@@ -64,7 +64,7 @@ thresholds:
     value: 3
   - evaluator: avg_latency_seconds
     criteria: "<="
-    value: 30.0
+    value: 10.0
 metadata:
   category: conversational
   scenario: conversational_agent
diff --git a/src/agentops/templates/bundles/model_quality_baseline.yaml b/src/agentops/templates/bundles/model_quality_baseline.yaml
index 27fe1abd..9b2f2583 100644
--- a/src/agentops/templates/bundles/model_quality_baseline.yaml
+++ b/src/agentops/templates/bundles/model_quality_baseline.yaml
@@ -64,7 +64,7 @@ thresholds:
     value: 0.4
   - evaluator: avg_latency_seconds
     criteria: "<="
-    value: 30.0
+    value: 10.0
 metadata:
   category: model-quality
   scenario: model_direct
diff --git a/src/agentops/templates/bundles/rag_quality_baseline.yaml b/src/agentops/templates/bundles/rag_quality_baseline.yaml
index 4b51c36b..feb8f258 100644
--- a/src/agentops/templates/bundles/rag_quality_baseline.yaml
+++ b/src/agentops/templates/bundles/rag_quality_baseline.yaml
@@ -79,7 +79,7 @@ thresholds:
     value: 3
   - evaluator: avg_latency_seconds
     criteria: "<="
-    value: 30.0
+    value: 10.0
 metadata:
   category: rag-quality
   scenario: rag_retrieval
diff --git a/src/agentops/templates/bundles/safe_agent_baseline.yaml b/src/agentops/templates/bundles/safe_agent_baseline.yaml
index 06814927..36108d7f 100644
--- a/src/agentops/templates/bundles/safe_agent_baseline.yaml
+++ b/src/agentops/templates/bundles/safe_agent_baseline.yaml
@@ -78,7 +78,7 @@ thresholds:
     value: 2
   - evaluator: avg_latency_seconds
     criteria: "<="
-    value: 30.0
+    value: 10.0
 metadata:
   category: safety
   scenario: content_safety
diff --git a/tests/unit/test_cli_commands.py b/tests/unit/test_cli_commands.py
index 7c6060f3..cf77e6ce 100644
--- a/tests/unit/test_cli_commands.py
+++ b/tests/unit/test_cli_commands.py
@@ -52,47 +52,3 @@ def test_report_help_exposes_available_and_planned_commands() -> None:
     assert "generate" in stripped
     assert "show" in stripped
     assert "export" in stripped
-
-
-def test_config_validate_accepts_valid_run_yaml(tmp_path) -> None:
-    cfg = tmp_path / "run.yaml"
-    cfg.write_text(
-        """\
-version: 1
-target:
-  type: agent
-  hosting: local
-  execution_mode: local
-  local:
-    callable: my_module:my_fn
-bundle:
-  name: conversational_agent_baseline
-dataset:
-  name: smoke-conversational
-""",
-        encoding="utf-8",
-    )
-
-    result = runner.invoke(app, ["config", "validate", "-c", str(cfg)])
-
-    assert result.exit_code == 0, result.stdout + (result.stderr or "")
-    assert "is valid" in result.stdout
-
-
-def test_config_validate_reports_missing_file() -> None:
-    result = runner.invoke(app, ["config", "validate", "-c", "/no/such/file.yaml"])
-
-    assert result.exit_code == 1
-    combined = result.stdout + (result.stderr or "")
-    assert "not found" in combined.lower()
-
-
-def test_config_validate_reports_invalid_schema(tmp_path) -> None:
-    cfg = tmp_path / "run.yaml"
-    cfg.write_text("version: 1\nbackend: legacy\n", encoding="utf-8")
-
-    result = runner.invoke(app, ["config", "validate", "-c", str(cfg)])
-
-    assert result.exit_code == 1
-    combined = result.stdout + (result.stderr or "")
-    assert "invalid" in combined.lower() or "backend" in combined.lower()