diff --git a/src/agentops/backends/eval_engine.py b/src/agentops/backends/eval_engine.py
index 544bef3e..fbe5e80c 100644
--- a/src/agentops/backends/eval_engine.py
+++ b/src/agentops/backends/eval_engine.py
@@ -23,6 +23,44 @@
 
 logger = logging.getLogger(__name__)
 
+
+# ---------------------------------------------------------------------------
+# Suppress noisy SDK warnings for single-turn evaluation inputs
+# ---------------------------------------------------------------------------
+
+class _ConversationHistoryFilter(logging.Filter):
+    """Suppress 'Conversation history could not be parsed' from azure-ai-evaluation.
+
+    This warning fires on every single-turn evaluation row because plain-text
+    inputs are not in conversation-list format.  It is expected and harmless.
+    """
+
+    def filter(self, record: logging.LogRecord) -> bool:
+        return "Conversation history could not be parsed" not in record.getMessage()
+
+
+# Apply filter to SDK loggers that emit the warning.
+# Each evaluator module passes its own logger to reformat_conversation_history().
+for _sdk_logger_name in (
+    "azure.ai.evaluation._common.utils",
+    "azure.ai.evaluation._evaluators._task_adherence._task_adherence",
+    "azure.ai.evaluation._evaluators._intent_resolution._intent_resolution",
+    "azure.ai.evaluation._evaluators._task_completion._task_completion",
+    "azure.ai.evaluation._evaluators._tool_call_accuracy._tool_call_accuracy",
+    "azure.ai.evaluation",
+):
+    logging.getLogger(_sdk_logger_name).addFilter(_ConversationHistoryFilter())
+
+
+# ---------------------------------------------------------------------------
+# Cloud-only evaluator sentinel
+# ---------------------------------------------------------------------------
+
+
+class _CloudOnlyEvaluatorError(Exception):
+    """Raised when an evaluator is only available via Foundry Cloud Evaluation."""
+
+
 # ---------------------------------------------------------------------------
 # Credential help (shared by _default_credential and _acquire_token)
 # ---------------------------------------------------------------------------
@@ -609,8 +647,12 @@ def _load_foundry_evaluator_callable(
                 "Install with: pip install azure-ai-evaluation"
             ) from exc
         except AttributeError as exc:
-            raise ValueError(
-                f"Unknown built-in Foundry evaluator class: {class_name}"
+            raise _CloudOnlyEvaluatorError(
+                f"Evaluator '{class_name}' is not available in the local "
+                f"azure-ai-evaluation SDK. It may only be available via "
+                f"Foundry Cloud Evaluation (builtin.{_to_builtin_evaluator_name(class_name)}). "
+                f"Use 'hosting: foundry' with 'execution_mode: remote' to "
+                f"run this evaluator, or disable it for local runs."
             ) from exc
 
         return _instantiate_evaluator_symbol(
@@ -691,12 +733,23 @@ def _build_foundry_evaluator_runtimes(
                 )
             score_keys = score_keys_raw
 
-        evaluator_callable = _load_foundry_evaluator_callable(
-            evaluator_name=evaluator.name,
-            evaluator_config=config,
-            fallback_endpoint=fallback_endpoint,
-            fallback_deployment=fallback_deployment,
-        )
+        try:
+            evaluator_callable = _load_foundry_evaluator_callable(
+                evaluator_name=evaluator.name,
+                evaluator_config=config,
+                fallback_endpoint=fallback_endpoint,
+                fallback_deployment=fallback_deployment,
+            )
+        except _CloudOnlyEvaluatorError:
+            logger.warning(
+                "Skipping evaluator '%s' — not available in the local "
+                "azure-ai-evaluation SDK. This evaluator is only supported "
+                "via Foundry Cloud Evaluation (hosting: foundry, "
+                "execution_mode: remote). It will be ignored for this "
+                "local run.",
+                evaluator.name,
+            )
+            continue
 
         runtimes.append(
             FoundryEvaluatorRuntime(
diff --git a/src/agentops/backends/local_adapter_backend.py b/src/agentops/backends/local_adapter_backend.py
index 9eae1b62..1636e97a 100644
--- a/src/agentops/backends/local_adapter_backend.py
+++ b/src/agentops/backends/local_adapter_backend.py
@@ -222,6 +222,7 @@ def execute(self, context: BackendRunContext) -> BackendExecutionResult:
                             prediction_text = _normalize_text(
                                 result.get("response", "")
                             )
+                            returned_tool_calls = result.get("tool_calls")
                             set_agent_invoke_result(invoke_span)
                     except Exception as exc:  # noqa: BLE001
                         stderr_lines.append(f"row={index} error={exc!s}")
@@ -230,6 +231,7 @@ def execute(self, context: BackendRunContext) -> BackendExecutionResult:
                         continue
                 else:
                     # --- Subprocess mode ---
+                    assert adapter_command is not None
                     adapter_input = json.dumps(
                         {"input": prompt_text, "expected": expected_text, **row}
                     )
@@ -267,6 +269,7 @@ def execute(self, context: BackendRunContext) -> BackendExecutionResult:
                             prediction_text = _normalize_text(
                                 adapter_output.get("response", "")
                             )
+                            returned_tool_calls = adapter_output.get("tool_calls")
                             set_agent_invoke_result(invoke_span)
                     except subprocess.TimeoutExpired:
                         stderr_lines.append(f"row={index} error=adapter timeout")
@@ -333,6 +336,7 @@ def execute(self, context: BackendRunContext) -> BackendExecutionResult:
                         "input": prompt_text,
                         "response": prediction_text,
                         "context": row.get("context"),
+                        "tool_calls": returned_tool_calls,
                         "metrics": row_metric_entries,
                     }
                 )
diff --git a/src/agentops/cli/app.py b/src/agentops/cli/app.py
index 693ffa0f..478536c7 100644
--- a/src/agentops/cli/app.py
+++ b/src/agentops/cli/app.py
@@ -491,6 +491,17 @@ def cmd_skills_install(
             help="Target platform(s): copilot, claude.",
         ),
     ] = None,
+    from_github: Annotated[
+        str | None,
+        typer.Option(
+            "--from",
+            help=(
+                "Install a community skill from GitHub. "
+                "Format: org/repo or github:org/repo[@ref]. "
+                "Example: --from donlee/pptx-designer"
+            ),
+        ),
+    ] = None,
     force: bool = typer.Option(
         False,
         "--force",
@@ -507,12 +518,18 @@ def cmd_skills_install(
         help="Target repository root directory.",
     ),
 ) -> None:
-    """Install AgentOps coding agent skills into the target project."""
-    from agentops.services.skills import install_skills
+    """Install AgentOps coding agent skills into the target project.
+
+    Use --from to install a community skill from GitHub:
 
+        agentops skills install --from donlee/pptx-designer
+
+        agentops skills install --from github:org/repo@v1.0
+    """
     log.debug(
-        "cmd_skills_install called platform=%s force=%s prompt=%s dir=%s",
+        "cmd_skills_install called platform=%s from=%s force=%s prompt=%s dir=%s",
         platform,
+        from_github,
         force,
         prompt,
         directory,
@@ -524,6 +541,31 @@ def cmd_skills_install(
         typer.echo("No platforms selected. Skipping skill installation.")
         return
 
+    if from_github:
+        # GitHub-based skill installation
+        from agentops.services.skills import install_github_skill
+
+        typer.echo(f"Installing skill from GitHub: {from_github}")
+        try:
+            result = install_github_skill(
+                source=from_github,
+                directory=directory,
+                platforms=resolved_platforms,
+                force=True,
+            )
+        except ValueError as exc:
+            typer.echo(f"Error: {exc}", err=True)
+            raise typer.Exit(code=1) from exc
+        except Exception as exc:
+            typer.echo(f"Error: failed to install skill: {exc}", err=True)
+            raise typer.Exit(code=1) from exc
+
+        _print_skills_result(result)
+        return
+
+    # Bundled skills installation
+    from agentops.services.skills import install_skills
+
     try:
         result = install_skills(
             directory=directory, platforms=resolved_platforms, force=True
diff --git a/src/agentops/services/initializer.py b/src/agentops/services/initializer.py
index 28e3d402..5f6e3559 100644
--- a/src/agentops/services/initializer.py
+++ b/src/agentops/services/initializer.py
@@ -23,11 +23,14 @@ class InitResult:
     "run.yaml",
     "run-rag.yaml",
     "run-agent.yaml",
+    "run-agent-local.yaml",
     "run-http-model.yaml",
     "run-http-rag.yaml",
     "run-http-agent-tools.yaml",
     "run-callable.yaml",
     "callable_adapter.py",
+    "agent_framework_adapter.py",
+    "multi_agent_workflow.py",
     ".gitignore",
     "bundles/model_quality_baseline.yaml",
     "bundles/rag_quality_baseline.yaml",
diff --git a/src/agentops/services/runner.py b/src/agentops/services/runner.py
index 1124cbcf..18319af9 100644
--- a/src/agentops/services/runner.py
+++ b/src/agentops/services/runner.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import json
+import logging
 import shutil
 from dataclasses import dataclass
 from datetime import datetime
@@ -42,6 +43,8 @@
     shutdown as shutdown_tracing,
 )
 
+logger = logging.getLogger(__name__)
+
 
 @dataclass(frozen=True)
 class EvalRunServiceResult:
@@ -218,9 +221,9 @@ def _evaluate_item_thresholds(
         threshold_results: list[ItemThresholdEvaluationResult] = []
         for rule in threshold_rules:
             if rule.evaluator not in row_values:
-                raise ValueError(
-                    f"Missing evaluator score '{rule.evaluator}' for row {row.row_index}"
-                )
+                # Evaluator may be cloud-only and was skipped during local
+                # execution — silently skip its threshold check.
+                continue
 
             threshold_results.append(
                 _evaluate_threshold_against_value(
@@ -266,8 +269,12 @@ def _validate_enabled_evaluators_scored(
 
     missing = [name for name in evaluator_names if name not in scored_names]
     if missing:
-        raise ValueError(
-            "Missing scores for enabled evaluators: " + ", ".join(sorted(missing))
+        logger.warning(
+            "Some enabled evaluators did not produce scores and will be "
+            "excluded from threshold checks: %s. These evaluators may "
+            "only be available via Foundry Cloud Evaluation "
+            "(hosting: foundry, execution_mode: remote).",
+            ", ".join(sorted(missing)),
         )
 
 
@@ -291,6 +298,11 @@ def _summarize_thresholds_from_items(
                 ):
                     rule_results.append(threshold_result)
 
+        # Skip threshold rules for evaluators that produced no scores
+        # (e.g., cloud-only evaluators skipped during local execution).
+        if not rule_results:
+            continue
+
         passed_items = sum(1 for result in rule_results if result.passed)
         passed = bool(rule_results) and passed_items == len(rule_results)
 
diff --git a/src/agentops/services/skills.py b/src/agentops/services/skills.py
index 35092588..ce66e276 100644
--- a/src/agentops/services/skills.py
+++ b/src/agentops/services/skills.py
@@ -2,10 +2,15 @@
 
 from __future__ import annotations
 
+import io
+import json
 import re
+import tarfile
+import urllib.error
+import urllib.request
 from dataclasses import dataclass, field
 from importlib.resources import files
-from pathlib import Path
+from pathlib import Path, PurePosixPath
 from typing import Dict, List
 
 _TEMPLATE_PACKAGE = "agentops.templates"
@@ -200,9 +205,357 @@ def install_skills(
 
 
 # ---------------------------------------------------------------------------
-# Registration — add skill discovery entries to instruction files
+# GitHub-based skill installation
 # ---------------------------------------------------------------------------
 
+# Allowed sub-directories within a skill folder (agentskills.io spec).
+_ALLOWED_SKILL_DIRS = {"references", "scripts", "assets"}
+
+# Directories skipped by default for security (opt-in only).
+_RESTRICTED_DIRS = {"scripts"}
+
+_GITHUB_REF_RE = re.compile(
+    r"^(?:github:)?"
+    r"(?P<owner>[A-Za-z0-9._-]+)"
+    r"/(?P<repo>[A-Za-z0-9._-]+)"
+    r"(?:@(?P<ref>[A-Za-z0-9._/-]+))?$"
+)
+
+_PROVENANCE_FILE = ".installed-from.json"
+
+
+@dataclass
+class GitHubSkillRef:
+    """Parsed GitHub skill reference."""
+
+    owner: str
+    repo: str
+    ref: str  # branch, tag, or commit SHA
+
+
+def _parse_github_ref(source: str) -> GitHubSkillRef:
+    """Parse ``github:org/repo@ref`` or ``org/repo`` into components.
+
+    Raises ValueError on invalid input.
+    """
+    m = _GITHUB_REF_RE.match(source.strip())
+    if not m:
+        raise ValueError(
+            f"Invalid GitHub skill reference: '{source}'. "
+            "Expected format: github:org/repo or org/repo[@ref]"
+        )
+    return GitHubSkillRef(
+        owner=m.group("owner"),
+        repo=m.group("repo"),
+        ref=m.group("ref") or "main",
+    )
+
+
+def _validate_skill_name(name: str) -> str:
+    """Validate and sanitize a skill name from SKILL.md frontmatter.
+
+    Raises ValueError if the name contains path traversal or invalid chars.
+    """
+    if not name or not re.fullmatch(r"[a-z0-9]+(?:-[a-z0-9]+)*", name):
+        raise ValueError(
+            f"Invalid skill name: '{name}'. "
+            "Must be lowercase alphanumeric with single hyphens, "
+            "e.g. 'pptx-designer'."
+        )
+    if ".." in name or "/" in name or "\\" in name:
+        raise ValueError(f"Skill name contains path traversal: '{name}'")
+    return name
+
+
+def _parse_skill_frontmatter(content: str) -> dict[str, str]:
+    """Extract YAML frontmatter fields from a SKILL.md file.
+
+    Returns a dict with at least ``name`` and ``description`` keys.
+    Uses simple line parsing to avoid a YAML dependency in this module.
+    """
+    if not content.startswith("---"):
+        raise ValueError("SKILL.md is missing YAML frontmatter (must start with ---).")
+
+    lines = content.split("\n")
+    end_idx = None
+    for i, line in enumerate(lines[1:], 1):
+        if line.strip() == "---":
+            end_idx = i
+            break
+
+    if end_idx is None:
+        raise ValueError("SKILL.md has unclosed YAML frontmatter.")
+
+    meta: dict[str, str] = {}
+    current_key = ""
+    for line in lines[1:end_idx]:
+        if line.startswith("  ") and current_key:
+            # Continuation of multiline value
+            meta[current_key] = meta.get(current_key, "") + " " + line.strip()
+            continue
+        if ":" in line:
+            key, _, val = line.partition(":")
+            key = key.strip()
+            val = val.strip().strip(">").strip('"').strip("'").strip()
+            if key:
+                current_key = key
+                meta[key] = val
+
+    if "name" not in meta:
+        raise ValueError("SKILL.md frontmatter is missing required 'name' field.")
+    if "description" not in meta:
+        raise ValueError("SKILL.md frontmatter is missing required 'description' field.")
+
+    return meta
+
+
+def _fetch_github_tarball(ref: GitHubSkillRef) -> bytes:
+    """Download a GitHub repo tarball for the given ref.
+
+    Uses ``GITHUB_TOKEN`` or ``GH_TOKEN`` env var if available.
+    """
+    import os
+
+    url = f"https://api.github.com/repos/{ref.owner}/{ref.repo}/tarball/{ref.ref}"
+
+    headers: dict[str, str] = {"Accept": "application/vnd.github+json"}
+    token = os.environ.get("GITHUB_TOKEN") or os.environ.get("GH_TOKEN")
+    if token:
+        headers["Authorization"] = f"Bearer {token}"
+
+    req = urllib.request.Request(url, headers=headers)
+    try:
+        with urllib.request.urlopen(req, timeout=30) as resp:
+            return resp.read()
+    except urllib.error.HTTPError as e:
+        if e.code == 404:
+            raise ValueError(
+                f"GitHub repository not found: {ref.owner}/{ref.repo}@{ref.ref}"
+            ) from e
+        if e.code == 403:
+            raise ValueError(
+                f"GitHub API rate limit or access denied for {ref.owner}/{ref.repo}. "
+                "Set GITHUB_TOKEN env var for authenticated access."
+            ) from e
+        raise ValueError(
+            f"GitHub API error ({e.code}): {e.reason}"
+        ) from e
+    except urllib.error.URLError as e:
+        raise ValueError(f"Network error fetching {ref.owner}/{ref.repo}: {e}") from e
+
+
+def _extract_skill_from_tarball(
+    tarball: bytes,
+    repo_name: str,
+) -> tuple[dict[str, str], dict[str, bytes]]:
+    """Extract a single skill from a GitHub repo tarball.
+
+    Returns (frontmatter_metadata, {relative_path: content_bytes}).
+
+    Searches for the skill directory following agentskills.io convention:
+      1. ``{repo_name}/SKILL.md`` (skill dir = repo name)
+      2. Any ``*/SKILL.md`` at depth 1 from repo root
+      3. ``SKILL.md`` at repo root
+
+    Raises ValueError if no SKILL.md is found or multiple candidates exist.
+    """
+    with tarfile.open(fileobj=io.BytesIO(tarball), mode="r:gz") as tar:
+        members = tar.getnames()
+
+        # GitHub tarballs have a prefix like "owner-repo-sha/"
+        prefix = ""
+        for name in members:
+            if "/" in name:
+                prefix = name.split("/")[0] + "/"
+                break
+
+        # Find SKILL.md candidates
+        candidates: list[str] = []
+        for name in members:
+            relative = name[len(prefix):] if name.startswith(prefix) else name
+            parts = PurePosixPath(relative).parts
+            if parts and parts[-1] == "SKILL.md":
+                if len(parts) <= 2:
+                    candidates.append(relative)
+
+        if not candidates:
+            raise ValueError(
+                f"No SKILL.md found in {repo_name}. "
+                "The repository must contain a skill directory with a SKILL.md file "
+                "(agentskills.io standard)."
+            )
+
+        # Prefer {repo_name}/SKILL.md, then first candidate
+        chosen = None
+        for c in candidates:
+            if c.startswith(repo_name + "/"):
+                chosen = c
+                break
+        if chosen is None:
+            if len(candidates) > 1:
+                dirs = [str(PurePosixPath(c).parent) for c in candidates]
+                raise ValueError(
+                    f"Multiple skills found in {repo_name}: {', '.join(dirs)}. "
+                    "Use github:org/repo with a repo that contains a single skill."
+                )
+            chosen = candidates[0]
+
+        skill_dir = str(PurePosixPath(chosen).parent)
+        if skill_dir == ".":
+            skill_dir = ""
+
+        # Read SKILL.md and parse frontmatter
+        skill_md_path = prefix + chosen
+        member = tar.getmember(skill_md_path)
+        f = tar.extractfile(member)
+        if f is None:
+            raise ValueError(f"Cannot read {skill_md_path}")
+        skill_md_content = f.read()
+        metadata = _parse_skill_frontmatter(skill_md_content.decode("utf-8"))
+
+        # Collect all files in the skill directory
+        skill_prefix = prefix + (skill_dir + "/" if skill_dir else "")
+        collected: dict[str, bytes] = {}
+
+        for member in tar.getmembers():
+            if not member.isfile():
+                continue
+            if not member.name.startswith(skill_prefix):
+                continue
+
+            relative = member.name[len(skill_prefix):]
+            parts = PurePosixPath(relative).parts
+
+            if not parts:
+                continue
+
+            # Security: block path traversal
+            if any(p in ("..", "") for p in parts):
+                continue
+            if any(p.startswith(".") for p in parts):
+                continue
+
+            # Allow SKILL.md at root, and files in allowed subdirs
+            if len(parts) == 1 and parts[0] == "SKILL.md":
+                collected[relative] = skill_md_content
+                continue
+
+            top_dir = parts[0] if len(parts) > 1 else None
+            if top_dir and top_dir in _ALLOWED_SKILL_DIRS:
+                if top_dir in _RESTRICTED_DIRS:
+                    continue  # Skip scripts/ by default
+                f = tar.extractfile(member)
+                if f:
+                    collected[relative] = f.read()
+
+    return metadata, collected
+
+
+def install_github_skill(
+    source: str,
+    directory: Path,
+    platforms: list[str],
+    force: bool = False,
+) -> SkillsInstallResult:
+    """Install a skill from a GitHub repository.
+
+    Downloads the repo archive, extracts the skill, validates it,
+    and installs to platform-specific directories.
+
+    Args:
+        source: GitHub reference, e.g. ``github:org/repo``, ``org/repo@v1.0``.
+        directory: Root directory of the consumer repository.
+        platforms: Platform identifiers (e.g. ``["copilot"]``).
+        force: When True, overwrite existing skill files.
+
+    Returns:
+        SkillsInstallResult with paths of created, overwritten, or skipped files.
+    """
+    ref = _parse_github_ref(source)
+    result = SkillsInstallResult(platforms=list(platforms))
+    resolved = directory.resolve()
+
+    # Fetch and extract
+    tarball = _fetch_github_tarball(ref)
+    metadata, skill_files = _extract_skill_from_tarball(tarball, ref.repo)
+
+    skill_name = _validate_skill_name(metadata["name"])
+
+    if not skill_files:
+        raise ValueError(f"No installable files found in {ref.owner}/{ref.repo}.")
+
+    # Install to each platform
+    for platform in platforms:
+        config = _PLATFORM_CONFIGS.get(platform)
+        if not config:
+            continue
+
+        target_dir = resolved / config["target_dir"]
+
+        for relative_path, content_bytes in skill_files.items():
+            if relative_path == "SKILL.md":
+                # SKILL.md uses the platform file pattern
+                dest_relative = config["file_pattern"].format(skill_name=skill_name)
+                dest = target_dir / dest_relative
+                text_content = content_bytes.decode("utf-8")
+                text_content = _transform_content(text_content, platform)
+                write_bytes = text_content.encode("utf-8")
+            else:
+                # Reference/asset files go alongside the SKILL.md
+                if platform == "claude":
+                    continue  # Claude only gets the single .md file
+                skill_dest_dir = config["file_pattern"].format(
+                    skill_name=skill_name
+                )
+                # e.g. "pptx-designer/SKILL.md" → "pptx-designer/"
+                skill_base = str(PurePosixPath(skill_dest_dir).parent)
+                dest = target_dir / skill_base / relative_path
+                write_bytes = content_bytes
+
+            # Security: ensure dest stays under target_dir
+            try:
+                dest.resolve().relative_to(target_dir.resolve())
+            except ValueError:
+                continue  # path traversal — skip silently
+
+            existed = dest.exists()
+            if existed and not force:
+                result.skipped_files.append(dest)
+                continue
+
+            dest.parent.mkdir(parents=True, exist_ok=True)
+            dest.write_bytes(write_bytes)
+
+            if existed:
+                result.overwritten_files.append(dest)
+            else:
+                result.created_files.append(dest)
+
+        # Write provenance file
+        if platform != "claude":
+            provenance_dest_rel = config["file_pattern"].format(
+                skill_name=skill_name
+            )
+            provenance_dir = (
+                target_dir / str(PurePosixPath(provenance_dest_rel).parent)
+            )
+            provenance = {
+                "source": f"github:{ref.owner}/{ref.repo}",
+                "ref": ref.ref,
+                "skill_name": skill_name,
+                "description": metadata.get("description", ""),
+                "files": sorted(skill_files.keys()),
+            }
+            prov_path = provenance_dir / _PROVENANCE_FILE
+            prov_path.parent.mkdir(parents=True, exist_ok=True)
+            prov_path.write_text(
+                json.dumps(provenance, indent=2) + "\n", encoding="utf-8"
+            )
+            if prov_path not in result.created_files:
+                result.created_files.append(prov_path)
+
+    return result
+
 
 @dataclass
 class RegistrationResult:
diff --git a/src/agentops/templates/agent_framework_adapter.py b/src/agentops/templates/agent_framework_adapter.py
new file mode 100644
index 00000000..281b785d
--- /dev/null
+++ b/src/agentops/templates/agent_framework_adapter.py
@@ -0,0 +1,132 @@
+"""Agent Framework adapter for evaluating a single agent with tools.
+
+Uses Microsoft Agent Framework Agent with FoundryChatClient to create
+an agent with local @tool functions. Unlike FoundryAgent (which requires
+tools declared server-side), this pattern defines tools entirely in code.
+
+For multi-agent workflows with routing, use multi_agent_workflow.py.
+
+Reference: github.com/microsoft/agent-framework/python/samples/
+           03-workflows/_start-here/step2_agents_in_a_workflow.py
+
+Prerequisites:
+  pip install agent-framework[foundry] azure-identity
+
+Environment variables:
+  AZURE_AI_FOUNDRY_PROJECT_ENDPOINT  — Foundry project endpoint
+  AZURE_OPENAI_DEPLOYMENT            — model deployment name
+
+Usage in run.yaml:
+  target:
+    type: agent
+    hosting: local
+    execution_mode: local
+    framework: agent_framework
+    local:
+      callable: agent_framework_adapter:run_evaluation
+"""
+from __future__ import annotations
+
+import asyncio
+import logging
+import os
+from typing import Any
+
+from agent_framework import Agent, AgentResponse, tool
+
+logger = logging.getLogger(__name__)
+
+PROJECT_ENDPOINT = os.environ.get("AZURE_AI_FOUNDRY_PROJECT_ENDPOINT", "")
+MODEL = os.environ.get("AZURE_OPENAI_DEPLOYMENT", "")
+
+_client = None
+_captured_tool_calls: list[dict[str, Any]] = []
+
+
+def _get_chat_client():
+    """Lazily initialize the FoundryChatClient."""
+    global _client
+    if _client is None:
+        from azure.identity import DefaultAzureCredential
+        from agent_framework.foundry import FoundryChatClient
+
+        _client = FoundryChatClient(
+            project_endpoint=PROJECT_ENDPOINT,
+            model=MODEL,
+            credential=DefaultAzureCredential(),
+        )
+    return _client
+
+
+# ── Local tool implementations ─────────────────────────────────────────
+# Replace these with your agent's actual tools.
+
+
+@tool
+def get_weather(city: str) -> str:
+    """Get current weather for a city"""
+    _captured_tool_calls.append({"name": "get_weather", "arguments": {"city": city}})
+    return f"Current weather in {city}: 55°F, partly cloudy."
+
+
+@tool
+def convert_currency(amount: str, from_currency: str, to_currency: str) -> str:
+    """Convert an amount from one currency to another"""
+    amt = float(amount)
+    _captured_tool_calls.append({
+        "name": "convert_currency",
+        "arguments": {"amount": amt, "from_currency": from_currency, "to_currency": to_currency},
+    })
+    return f"{amt} {from_currency} = {amt * 0.92:.2f} {to_currency}"
+
+
+@tool
+def search_news(query: str, max_results: str = "5") -> str:
+    """Search for recent news articles"""
+    _captured_tool_calls.append({
+        "name": "search_news",
+        "arguments": {"query": query, "max_results": int(max_results)},
+    })
+    return f"Found {max_results} articles about '{query}'."
+
+
+ALL_TOOLS = [get_weather, convert_currency, search_news]
+
+
+async def _run_agent(input_text: str) -> dict[str, Any]:
+    """Run a single agent with local @tool functions."""
+    agent = Agent(
+        client=_get_chat_client(),
+        name="EvalAgent",
+        instructions=(
+            "You are a helpful assistant with tools. "
+            "Use the appropriate tool to answer the user's query. "
+            "Always call a tool before responding."
+        ),
+        tools=ALL_TOOLS,
+    )
+
+    _captured_tool_calls.clear()
+    result: AgentResponse = await agent.run(input_text)
+
+    response_text = result.text or ""
+
+    return {
+        "response": response_text.strip(),
+        "tool_calls": list(_captured_tool_calls),
+    }
+
+
+def run_evaluation(input_text: str, context: dict) -> dict:
+    """Callable entry point for AgentOps evaluation.
+
+    Creates a single Agent with local @tool functions using
+    Microsoft Agent Framework. Tool calls are captured and
+    returned alongside the response for evaluator scoring.
+    """
+    if not PROJECT_ENDPOINT or not MODEL:
+        raise ValueError(
+            "Set AZURE_AI_FOUNDRY_PROJECT_ENDPOINT and AZURE_OPENAI_DEPLOYMENT"
+        )
+
+    return asyncio.run(_run_agent(input_text))
diff --git a/src/agentops/templates/bundles/agent_workflow_baseline.yaml b/src/agentops/templates/bundles/agent_workflow_baseline.yaml
index 03d27589..ea6e015b 100644
--- a/src/agentops/templates/bundles/agent_workflow_baseline.yaml
+++ b/src/agentops/templates/bundles/agent_workflow_baseline.yaml
@@ -5,6 +5,15 @@ description: >
   Measures task completion, tool call accuracy, intent resolution,
   task adherence, tool selection, and tool input accuracy using
   AI-assisted evaluators from the Foundry evaluation suite.
+
+  Note: TaskCompletionEvaluator, ToolSelectionEvaluator, and
+  ToolInputAccuracyEvaluator are only available via Foundry Cloud
+  Evaluation and will be gracefully skipped in local execution mode.
+
+  Note: TaskAdherenceEvaluator works best with multi-turn conversation
+  format (list of message dicts with role/content).  Single-turn plain
+  text inputs may produce low scores because the evaluator cannot assess
+  procedural adherence without conversation context.
 evaluators:
   - name: TaskCompletionEvaluator
     source: foundry
diff --git a/src/agentops/templates/callable_adapter.py b/src/agentops/templates/callable_adapter.py
index f66c400b..2c7d7549 100644
--- a/src/agentops/templates/callable_adapter.py
+++ b/src/agentops/templates/callable_adapter.py
@@ -103,6 +103,14 @@ def run_evaluation(input_text: str, context: dict) -> dict:
     # result = workflow.invoke(input_text)
     # return {"response": result.output}
 
+    # --- Option 4: Agent Framework (Azure AI Foundry agent) ---
+    # For Agent Framework agents, use the dedicated adapter template instead:
+    #
+    #   callable: agent_framework_adapter:run_evaluation
+    #
+    # Set AZURE_AI_FOUNDRY_PROJECT_ENDPOINT and AGENT_ID environment variables.
+    # See agent_framework_adapter.py for details.
+
     # --- Context sanitization (RAG scenarios) ---
     # If your dataset has a "context" field with raw document content,
     # clean it before returning:
diff --git a/src/agentops/templates/multi_agent_workflow.py b/src/agentops/templates/multi_agent_workflow.py
new file mode 100644
index 00000000..f44c6b9a
--- /dev/null
+++ b/src/agentops/templates/multi_agent_workflow.py
@@ -0,0 +1,281 @@
+"""Multi-agent workflow using Microsoft Agent Framework.
+
+Demonstrates a router-to-specialist pattern following the official
+Agent Framework workflow samples (microsoft/agent-framework):
+
+  Router Agent → Coordinator (custom Executor) → Specialist Agent
+
+The Coordinator examines the Router's output and forwards the original
+user query to the correct Specialist Agent. Each specialist has @tool
+functions that Agent Framework auto-executes.
+
+Reference: github.com/microsoft/agent-framework/python/samples/03-workflows/
+
+Prerequisites:
+  pip install agent-framework[foundry] azure-identity
+
+Environment variables:
+  AZURE_AI_FOUNDRY_PROJECT_ENDPOINT  — Foundry project endpoint
+  AZURE_OPENAI_DEPLOYMENT            — model deployment name (e.g. gpt-5.1)
+
+Usage in run.yaml:
+  target:
+    type: agent
+    hosting: local
+    execution_mode: local
+    framework: agent_framework
+    local:
+      callable: multi_agent_workflow:run_evaluation
+"""
+from __future__ import annotations
+
+import asyncio
+import logging
+import os
+from typing import Any
+
+from agent_framework import (
+    Agent,
+    AgentExecutor,
+    AgentExecutorRequest,
+    AgentExecutorResponse,
+    AgentResponse,
+    Executor,
+    Message,
+    WorkflowBuilder,
+    WorkflowContext,
+    handler,
+    tool,
+)
+
+logger = logging.getLogger(__name__)
+
+PROJECT_ENDPOINT = os.environ.get("AZURE_AI_FOUNDRY_PROJECT_ENDPOINT", "")
+MODEL = os.environ.get("AZURE_OPENAI_DEPLOYMENT", "")
+
+_client = None
+_captured_tool_calls: list[dict[str, Any]] = []
+
+
+def _get_chat_client():
+    """Lazily initialize the FoundryChatClient."""
+    global _client
+    if _client is None:
+        from azure.identity import DefaultAzureCredential
+        from agent_framework.foundry import FoundryChatClient
+
+        _client = FoundryChatClient(
+            project_endpoint=PROJECT_ENDPOINT,
+            model=MODEL,
+            credential=DefaultAzureCredential(),
+        )
+    return _client
+
+
+# ── Tool functions (decorated with @tool for Agent Framework) ──────────
+
+
+@tool
+def get_weather(city: str) -> str:
+    """Get current weather for a city"""
+    _captured_tool_calls.append({"name": "get_weather", "arguments": {"city": city}})
+    return f"Current weather in {city}: 55°F, partly cloudy."
+
+
+@tool
+def convert_currency(amount: str, from_currency: str, to_currency: str) -> str:
+    """Convert an amount from one currency to another"""
+    amt = float(amount)
+    _captured_tool_calls.append({
+        "name": "convert_currency",
+        "arguments": {"amount": amt, "from_currency": from_currency, "to_currency": to_currency},
+    })
+    return f"{amt} {from_currency} = {amt * 0.92:.2f} {to_currency}"
+
+
+@tool
+def calculate_compound_interest(principal: str, rate: str, years: str) -> str:
+    """Calculate compound interest"""
+    p, r, y = float(principal), float(rate) / 100, int(float(years))
+    total = p * ((1 + r) ** y)
+    interest = total - p
+    _captured_tool_calls.append({
+        "name": "calculate_compound_interest",
+        "arguments": {"principal": p, "rate": r, "years": y},
+    })
+    return f"Compound interest: ${interest:,.2f}, total: ${total:,.2f}"
+
+
+@tool
+def search_news(query: str, max_results: str = "5") -> str:
+    """Search for recent news articles"""
+    _captured_tool_calls.append({
+        "name": "search_news",
+        "arguments": {"query": query, "max_results": int(max_results)},
+    })
+    return f"Found {max_results} articles about '{query}'."
+
+
+@tool
+def search_flights(origin: str, destination: str, date: str) -> str:
+    """Search for available flights"""
+    _captured_tool_calls.append({
+        "name": "search_flights",
+        "arguments": {"origin": origin, "destination": destination, "date": date},
+    })
+    return f"Found 3 flights from {origin} to {destination} on {date}."
+
+
+# ── Coordinator Executor ──────────────────────────────────────────────
+# Routes the user query to the correct specialist based on the Router's
+# classification. Follows the official Coordinator pattern from
+# microsoft/agent-framework samples.
+
+
+class RoutingCoordinator(Executor):
+    """Routes between Router Agent and Specialist Agents."""
+
+    SPECIALIST_IDS = {
+        "weather": "weather_specialist",
+        "finance": "finance_specialist",
+        "search": "search_specialist",
+    }
+
+    def __init__(self) -> None:
+        super().__init__(id="coordinator")
+
+    @handler
+    async def on_agent_response(
+        self,
+        response: AgentExecutorResponse,
+        ctx: WorkflowContext[AgentExecutorRequest, AgentResponse],
+    ) -> None:
+        """Handle responses from Router and Specialist agents."""
+        if response.executor_id != "router":
+            # Specialist response — yield as workflow output
+            await ctx.yield_output(response.agent_response)
+            return
+
+        # Router response — parse routing decision and forward to specialist
+        routing_text = response.agent_response.text.strip().lower()
+
+        if "weather" in routing_text:
+            target = "weather_specialist"
+        elif any(k in routing_text for k in ("finance", "currency", "interest")):
+            target = "finance_specialist"
+        else:
+            target = "search_specialist"
+
+        logger.info("Coordinator routing to: %s (router said: %s)", target, routing_text)
+
+        # Forward the original user query to the specialist
+        original_messages = list(response.full_conversation)
+        user_query = ""
+        for msg in original_messages:
+            if msg.role == "user":
+                user_query = msg.text or ""
+                break
+
+        await ctx.send_message(
+            AgentExecutorRequest(
+                messages=[Message("user", contents=[user_query])],
+                should_respond=True,
+            ),
+            target_id=target,
+        )
+
+
+def _build_workflow():
+    """Build the multi-agent workflow with Router → Coordinator → Specialists."""
+    client = _get_chat_client()
+
+    # Create agents
+    router = AgentExecutor(Agent(
+        client=client,
+        name="router",
+        instructions=(
+            "You are a routing agent. Analyze the user's query and respond "
+            "with ONLY one word:\n"
+            "- 'weather' for weather queries\n"
+            "- 'finance' for currency or interest calculations\n"
+            "- 'search' for news, flights, or general queries\n"
+            "Respond with only the category word, nothing else."
+        ),
+    ))
+
+    weather = AgentExecutor(Agent(
+        client=client,
+        name="weather_specialist",
+        instructions="Use the get_weather tool to answer weather queries.",
+        tools=[get_weather],
+    ))
+
+    finance = AgentExecutor(Agent(
+        client=client,
+        name="finance_specialist",
+        instructions=(
+            "Use convert_currency or calculate_compound_interest tools as needed."
+        ),
+        tools=[convert_currency, calculate_compound_interest],
+    ))
+
+    search = AgentExecutor(Agent(
+        client=client,
+        name="search_specialist",
+        instructions="Use search_news or search_flights tools as needed.",
+        tools=[search_news, search_flights],
+    ))
+
+    coordinator = RoutingCoordinator()
+
+    # Build workflow: Router → Coordinator ↔ Specialists
+    workflow = (
+        WorkflowBuilder(start_executor=router)
+        # Router output goes to Coordinator
+        .add_edge(router, coordinator)
+        # Coordinator can route to any specialist
+        .add_edge(coordinator, weather)
+        .add_edge(coordinator, finance)
+        .add_edge(coordinator, search)
+        # Specialist output goes back to Coordinator (which yields output)
+        .add_edge(weather, coordinator)
+        .add_edge(finance, coordinator)
+        .add_edge(search, coordinator)
+        .build()
+    )
+
+    return workflow
+
+
+async def _run_workflow(input_text: str) -> dict[str, Any]:
+    """Run the multi-agent workflow for a single query."""
+    workflow = _build_workflow()
+
+    _captured_tool_calls.clear()
+    events = await workflow.run(input_text)
+
+    # Extract the final response from workflow outputs
+    response_text = ""
+    outputs = events.get_outputs()
+    for output in outputs:
+        if isinstance(output, AgentResponse) and output.text:
+            response_text = output.text
+
+    return {
+        "response": response_text.strip(),
+        "tool_calls": list(_captured_tool_calls),
+    }
+
+
+def run_evaluation(input_text: str, context: dict) -> dict:
+    """Multi-agent workflow entry point for AgentOps evaluation.
+
+    Uses Microsoft Agent Framework WorkflowBuilder with:
+      Router Agent → RoutingCoordinator → Specialist Agents (@tool)
+    """
+    if not PROJECT_ENDPOINT or not MODEL:
+        raise ValueError(
+            "Set AZURE_AI_FOUNDRY_PROJECT_ENDPOINT and AZURE_OPENAI_DEPLOYMENT"
+        )
+
+    return asyncio.run(_run_workflow(input_text))
diff --git a/src/agentops/templates/run-agent-local.yaml b/src/agentops/templates/run-agent-local.yaml
new file mode 100644
index 00000000..915b023a
--- /dev/null
+++ b/src/agentops/templates/run-agent-local.yaml
@@ -0,0 +1,37 @@
+version: 1
+
+# Local agent workflow evaluation via callable adapter.
+# Evaluates a local Python function that implements a multi-agent workflow.
+#
+# Two adapter options:
+#
+#   1. agent_framework_adapter:run_evaluation
+#      For Azure AI Foundry agents (Agent Framework SDK).
+#      Requires: AZURE_AI_FOUNDRY_PROJECT_ENDPOINT and AGENT_ID env vars.
+#      pip install azure-ai-projects azure-identity
+#
+#   2. callable_adapter:run_evaluation
+#      For custom agents — HTTP, direct Python, or any callable.
+#
+# Cloud-only evaluators (TaskCompletionEvaluator, ToolSelectionEvaluator,
+# ToolInputAccuracyEvaluator) will be gracefully skipped in local mode.
+# Use 'hosting: foundry' with 'execution_mode: remote' to run all evaluators.
+
+target:
+  type: agent
+  hosting: local
+  execution_mode: local
+  framework: agent_framework
+  local:
+    # Option 1: Agent Framework adapter (Azure AI Foundry agents)
+    #   callable: agent_framework_adapter:run_evaluation
+    # Option 2: Custom callable adapter (HTTP, direct Python, etc.)
+    callable: callable_adapter:run_evaluation
+bundle:
+  name: agent_workflow_baseline
+dataset:
+  name: smoke-agent-tools
+execution:
+  timeout_seconds: 300
+output:
+  write_report: true
diff --git a/tests/unit/test_initializer.py b/tests/unit/test_initializer.py
index 67357b48..74daab3e 100644
--- a/tests/unit/test_initializer.py
+++ b/tests/unit/test_initializer.py
@@ -42,7 +42,7 @@ def test_init_creates_expected_files(tmp_path: Path) -> None:
     assert (tmp_path / ".agentops" / "data" / "smoke-conversational.jsonl").is_file()
     assert (tmp_path / ".agentops" / "workflows" / "agentops-eval.yml").is_file()
 
-    assert len(result.created_files) == 24
+    assert len(result.created_files) == 27
     assert len(result.overwritten_files) == 0
 
     run_config = load_yaml(tmp_path / ".agentops" / "run.yaml")
diff --git a/tests/unit/test_skills.py b/tests/unit/test_skills.py
index 4392f077..5b9a4458 100644
--- a/tests/unit/test_skills.py
+++ b/tests/unit/test_skills.py
@@ -1,12 +1,22 @@
+import io
+import json
+import tarfile
 from pathlib import Path
+from unittest.mock import patch
 
+import pytest
 from typer.testing import CliRunner
 
 from agentops.cli.app import app
 from agentops.services.skills import (
     _COPILOT_MARKER_END,
     _COPILOT_MARKER_START,
+    _extract_skill_from_tarball,
+    _parse_github_ref,
+    _parse_skill_frontmatter,
+    _validate_skill_name,
     detect_platforms,
+    install_github_skill,
     install_skills,
     register_skills,
 )
@@ -403,3 +413,347 @@ def test_cli_init_does_not_install_skills_claude(tmp_path: Path) -> None:
 
     for rel in _CLAUDE_SKILL_PATHS:
         assert not (tmp_path / rel).exists(), f"Should not exist after init: {rel}"
+
+
+# ---------------------------------------------------------------------------
+# GitHub ref parsing
+# ---------------------------------------------------------------------------
+
+
+def test_parse_github_ref_simple() -> None:
+    ref = _parse_github_ref("donlee/pptx-designer")
+    assert ref.owner == "donlee"
+    assert ref.repo == "pptx-designer"
+    assert ref.ref == "main"
+
+
+def test_parse_github_ref_with_prefix() -> None:
+    ref = _parse_github_ref("github:org/repo")
+    assert ref.owner == "org"
+    assert ref.repo == "repo"
+    assert ref.ref == "main"
+
+
+def test_parse_github_ref_with_version() -> None:
+    ref = _parse_github_ref("github:org/repo@v1.2.3")
+    assert ref.owner == "org"
+    assert ref.repo == "repo"
+    assert ref.ref == "v1.2.3"
+
+
+def test_parse_github_ref_with_branch() -> None:
+    ref = _parse_github_ref("org/repo@feature/my-branch")
+    assert ref.ref == "feature/my-branch"
+
+
+def test_parse_github_ref_invalid() -> None:
+    with pytest.raises(ValueError, match="Invalid GitHub skill reference"):
+        _parse_github_ref("not-valid")
+
+
+def test_parse_github_ref_empty() -> None:
+    with pytest.raises(ValueError, match="Invalid GitHub skill reference"):
+        _parse_github_ref("")
+
+
+# ---------------------------------------------------------------------------
+# Skill name validation
+# ---------------------------------------------------------------------------
+
+
+def test_validate_skill_name_valid() -> None:
+    assert _validate_skill_name("pptx-designer") == "pptx-designer"
+    assert _validate_skill_name("myskill") == "myskill"
+    assert _validate_skill_name("my-cool-skill") == "my-cool-skill"
+
+
+def test_validate_skill_name_invalid() -> None:
+    with pytest.raises(ValueError, match="Invalid skill name"):
+        _validate_skill_name("My Skill")
+
+    with pytest.raises(ValueError, match="Invalid skill name"):
+        _validate_skill_name("../traversal")
+
+    with pytest.raises(ValueError, match="Invalid skill name"):
+        _validate_skill_name("")
+
+    with pytest.raises(ValueError, match="Invalid skill name"):
+        _validate_skill_name("UPPERCASE")
+
+
+# ---------------------------------------------------------------------------
+# Frontmatter parsing
+# ---------------------------------------------------------------------------
+
+
+_VALID_FRONTMATTER = """\
+---
+name: test-skill
+description: A test skill for unit testing.
+license: MIT
+---
+
+# Test Skill
+
+Instructions here.
+"""
+
+
+def test_parse_frontmatter_valid() -> None:
+    meta = _parse_skill_frontmatter(_VALID_FRONTMATTER)
+    assert meta["name"] == "test-skill"
+    assert "test skill" in meta["description"].lower()
+
+
+def test_parse_frontmatter_missing_name() -> None:
+    content = "---\ndescription: test\n---\n# Body"
+    with pytest.raises(ValueError, match="missing required 'name'"):
+        _parse_skill_frontmatter(content)
+
+
+def test_parse_frontmatter_missing_description() -> None:
+    content = "---\nname: test\n---\n# Body"
+    with pytest.raises(ValueError, match="missing required 'description'"):
+        _parse_skill_frontmatter(content)
+
+
+def test_parse_frontmatter_no_frontmatter() -> None:
+    with pytest.raises(ValueError, match="missing YAML frontmatter"):
+        _parse_skill_frontmatter("# Just a heading")
+
+
+def test_parse_frontmatter_unclosed() -> None:
+    with pytest.raises(ValueError, match="unclosed YAML frontmatter"):
+        _parse_skill_frontmatter("---\nname: test\n")
+
+
+def test_parse_frontmatter_multiline_description() -> None:
+    content = "---\nname: test-skill\ndescription: >\n  A long\n  description here.\n---\n# Body"
+    meta = _parse_skill_frontmatter(content)
+    assert "long" in meta["description"]
+    assert "description here" in meta["description"]
+
+
+# ---------------------------------------------------------------------------
+# Tarball extraction
+# ---------------------------------------------------------------------------
+
+
+def _make_test_tarball(files: dict[str, str], prefix: str = "owner-repo-abc123") -> bytes:
+    """Create a gzipped tarball with the given files for testing."""
+    buf = io.BytesIO()
+    with tarfile.open(fileobj=buf, mode="w:gz") as tar:
+        for path, content in files.items():
+            full_path = f"{prefix}/{path}"
+            data = content.encode("utf-8")
+            info = tarfile.TarInfo(name=full_path)
+            info.size = len(data)
+            tar.addfile(info, io.BytesIO(data))
+    return buf.getvalue()
+
+
+def test_extract_skill_from_tarball() -> None:
+    tarball = _make_test_tarball({
+        "my-skill/SKILL.md": _VALID_FRONTMATTER,
+        "my-skill/references/guide.md": "# Guide\n\nSome content.",
+    })
+    meta, files = _extract_skill_from_tarball(tarball, "my-skill")
+    assert meta["name"] == "test-skill"
+    assert "SKILL.md" in files
+    assert "references/guide.md" in files
+
+
+def test_extract_skill_prefers_repo_named_dir() -> None:
+    tarball = _make_test_tarball({
+        "my-skill/SKILL.md": _VALID_FRONTMATTER,
+        "other-dir/SKILL.md": _VALID_FRONTMATTER,
+    })
+    meta, files = _extract_skill_from_tarball(tarball, "my-skill")
+    assert meta["name"] == "test-skill"
+
+
+def test_extract_skill_root_skill_md() -> None:
+    tarball = _make_test_tarball({
+        "SKILL.md": _VALID_FRONTMATTER,
+    })
+    meta, files = _extract_skill_from_tarball(tarball, "some-repo")
+    assert meta["name"] == "test-skill"
+    assert "SKILL.md" in files
+
+
+def test_extract_skill_no_skill_md() -> None:
+    tarball = _make_test_tarball({
+        "README.md": "# Hello",
+    })
+    with pytest.raises(ValueError, match="No SKILL.md found"):
+        _extract_skill_from_tarball(tarball, "some-repo")
+
+
+def test_extract_skill_multiple_ambiguous() -> None:
+    tarball = _make_test_tarball({
+        "skill-a/SKILL.md": _VALID_FRONTMATTER,
+        "skill-b/SKILL.md": _VALID_FRONTMATTER.replace("test-skill", "other-skill"),
+    })
+    with pytest.raises(ValueError, match="Multiple skills found"):
+        _extract_skill_from_tarball(tarball, "unrelated-repo")
+
+
+def test_extract_skill_skips_scripts() -> None:
+    tarball = _make_test_tarball({
+        "my-skill/SKILL.md": _VALID_FRONTMATTER,
+        "my-skill/scripts/run.py": "print('hello')",
+        "my-skill/references/ref.md": "# Ref",
+    })
+    _, files = _extract_skill_from_tarball(tarball, "my-skill")
+    assert "references/ref.md" in files
+    assert "scripts/run.py" not in files  # scripts blocked by default
+
+
+def test_extract_skill_blocks_path_traversal() -> None:
+    tarball = _make_test_tarball({
+        "my-skill/SKILL.md": _VALID_FRONTMATTER,
+        "my-skill/../../../etc/passwd": "root:x:0:0",
+    })
+    _, files = _extract_skill_from_tarball(tarball, "my-skill")
+    assert all(".." not in p for p in files)
+
+
+def test_extract_skill_blocks_hidden_files() -> None:
+    tarball = _make_test_tarball({
+        "my-skill/SKILL.md": _VALID_FRONTMATTER,
+        "my-skill/.env": "SECRET=abc",
+        "my-skill/references/guide.md": "# Guide",
+    })
+    _, files = _extract_skill_from_tarball(tarball, "my-skill")
+    assert ".env" not in files
+    assert "references/guide.md" in files
+
+
+# ---------------------------------------------------------------------------
+# install_github_skill (with mocked network)
+# ---------------------------------------------------------------------------
+
+
+def test_install_github_skill_copilot(tmp_path: Path) -> None:
+    tarball = _make_test_tarball({
+        "pptx-designer/SKILL.md": _VALID_FRONTMATTER,
+        "pptx-designer/references/setup.md": "# Setup guide",
+    })
+
+    with patch(
+        "agentops.services.skills._fetch_github_tarball", return_value=tarball
+    ):
+        result = install_github_skill(
+            source="donlee/pptx-designer",
+            directory=tmp_path,
+            platforms=["copilot"],
+            force=True,
+        )
+
+    # SKILL.md installed
+    skill_path = tmp_path / ".github/skills/test-skill/SKILL.md"
+    assert skill_path.exists()
+    content = skill_path.read_text(encoding="utf-8")
+    assert content.startswith("---")  # frontmatter preserved for copilot
+
+    # Reference file installed
+    ref_path = tmp_path / ".github/skills/test-skill/references/setup.md"
+    assert ref_path.exists()
+
+    # Provenance file created
+    prov_path = tmp_path / ".github/skills/test-skill/.installed-from.json"
+    assert prov_path.exists()
+    prov = json.loads(prov_path.read_text())
+    assert prov["source"] == "github:donlee/pptx-designer"
+    assert prov["skill_name"] == "test-skill"
+
+    assert len(result.created_files) >= 2
+
+
+def test_install_github_skill_claude(tmp_path: Path) -> None:
+    tarball = _make_test_tarball({
+        "pptx-designer/SKILL.md": _VALID_FRONTMATTER,
+        "pptx-designer/references/setup.md": "# Setup guide",
+    })
+
+    with patch(
+        "agentops.services.skills._fetch_github_tarball", return_value=tarball
+    ):
+        install_github_skill(
+            source="donlee/pptx-designer",
+            directory=tmp_path,
+            platforms=["claude"],
+        )
+
+    # Claude gets a single .md file with frontmatter stripped
+    skill_path = tmp_path / ".claude/commands/test-skill.md"
+    assert skill_path.exists()
+    content = skill_path.read_text(encoding="utf-8")
+    assert not content.startswith("---")  # frontmatter stripped
+
+    # Claude does NOT get reference files
+    ref_path = tmp_path / ".claude/commands/references/setup.md"
+    assert not ref_path.exists()
+
+
+def test_install_github_skill_skip_existing(tmp_path: Path) -> None:
+    tarball = _make_test_tarball({
+        "my-skill/SKILL.md": _VALID_FRONTMATTER,
+    })
+
+    # Pre-create the file
+    dest = tmp_path / ".github/skills/test-skill/SKILL.md"
+    dest.parent.mkdir(parents=True)
+    dest.write_text("custom content")
+
+    with patch(
+        "agentops.services.skills._fetch_github_tarball", return_value=tarball
+    ):
+        result = install_github_skill(
+            source="org/my-skill",
+            directory=tmp_path,
+            platforms=["copilot"],
+            force=False,
+        )
+
+    assert len(result.skipped_files) >= 1
+    assert dest.read_text() == "custom content"
+
+
+# ---------------------------------------------------------------------------
+# CLI — agentops skills install --from
+# ---------------------------------------------------------------------------
+
+
+def test_cli_skills_install_from_github(tmp_path: Path) -> None:
+    tarball = _make_test_tarball({
+        "pptx-designer/SKILL.md": _VALID_FRONTMATTER,
+    })
+
+    with patch(
+        "agentops.services.skills._fetch_github_tarball", return_value=tarball
+    ):
+        result = runner.invoke(
+            app,
+            [
+                "skills", "install",
+                "--from", "donlee/pptx-designer",
+                "--dir", str(tmp_path),
+            ],
+        )
+
+    assert result.exit_code == 0
+    assert "Installing skill from GitHub" in result.stdout
+    assert "created" in result.stdout
+
+
+def test_cli_skills_install_from_invalid_ref(tmp_path: Path) -> None:
+    result = runner.invoke(
+        app,
+        [
+            "skills", "install",
+            "--from", "not-valid-ref",
+            "--dir", str(tmp_path),
+        ],
+    )
+    assert result.exit_code == 1