diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 83ecfc25..b2536916 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -192,6 +192,23 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 # Full history for version derivation + + - name: Sync VSIX version from git tag + run: | + LAST_TAG=$(git describe --tags --abbrev=0 2>/dev/null || echo "v0.0.0") + LAST_VERSION=${LAST_TAG#v} + IFS='.' read -r MAJOR MINOR PATCH <<< "$LAST_VERSION" + if git describe --tags --exact-match HEAD >/dev/null 2>&1; then + BASE_VERSION="$LAST_VERSION" + else + BASE_VERSION="$MAJOR.$MINOR.$((PATCH + 1))" + fi + jq --arg v "$BASE_VERSION" '.version = $v' \ + plugins/agentops/package.json > plugins/agentops/package.json.tmp + mv plugins/agentops/package.json.tmp plugins/agentops/package.json + echo "VSIX version set to $BASE_VERSION (from tag $LAST_TAG)" - name: Set up Node.js uses: actions/setup-node@v4 @@ -224,6 +241,23 @@ jobs: environment: staging steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 # Full history for version derivation + + - name: Sync VSIX version from git tag + run: | + LAST_TAG=$(git describe --tags --abbrev=0 2>/dev/null || echo "v0.0.0") + LAST_VERSION=${LAST_TAG#v} + IFS='.' read -r MAJOR MINOR PATCH <<< "$LAST_VERSION" + if git describe --tags --exact-match HEAD >/dev/null 2>&1; then + BASE_VERSION="$LAST_VERSION" + else + BASE_VERSION="$MAJOR.$MINOR.$((PATCH + 1))" + fi + jq --arg v "$BASE_VERSION" '.version = $v' \ + plugins/agentops/package.json > plugins/agentops/package.json.tmp + mv plugins/agentops/package.json.tmp plugins/agentops/package.json + echo "VSIX version set to $BASE_VERSION (from tag $LAST_TAG)" - name: Set up Node.js uses: actions/setup-node@v4 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index aeb9fcc0..9100c6e7 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -147,6 +147,23 @@ jobs: environment: release # same approval gate as PyPI steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 # Full history for version derivation + + - name: Sync VSIX version from git tag + run: | + LAST_TAG=$(git describe --tags --abbrev=0 2>/dev/null || echo "v0.0.0") + LAST_VERSION=${LAST_TAG#v} + IFS='.' read -r MAJOR MINOR PATCH <<< "$LAST_VERSION" + if git describe --tags --exact-match HEAD >/dev/null 2>&1; then + BASE_VERSION="$LAST_VERSION" + else + BASE_VERSION="$MAJOR.$MINOR.$((PATCH + 1))" + fi + jq --arg v "$BASE_VERSION" '.version = $v' \ + plugins/agentops/package.json > plugins/agentops/package.json.tmp + mv plugins/agentops/package.json.tmp plugins/agentops/package.json + echo "VSIX version set to $BASE_VERSION (from tag $LAST_TAG)" - name: Set up Node.js uses: actions/setup-node@v4 diff --git a/.github/workflows/staging.yml b/.github/workflows/staging.yml index 41292f21..2ceb08ea 100644 --- a/.github/workflows/staging.yml +++ b/.github/workflows/staging.yml @@ -124,6 +124,23 @@ jobs: environment: staging steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 # Full history for version derivation + + - name: Sync VSIX version from git tag + run: | + LAST_TAG=$(git describe --tags --abbrev=0 2>/dev/null || echo "v0.0.0") + LAST_VERSION=${LAST_TAG#v} + IFS='.' read -r MAJOR MINOR PATCH <<< "$LAST_VERSION" + if git describe --tags --exact-match HEAD >/dev/null 2>&1; then + BASE_VERSION="$LAST_VERSION" + else + BASE_VERSION="$MAJOR.$MINOR.$((PATCH + 1))" + fi + jq --arg v "$BASE_VERSION" '.version = $v' \ + plugins/agentops/package.json > plugins/agentops/package.json.tmp + mv plugins/agentops/package.json.tmp plugins/agentops/package.json + echo "VSIX version set to $BASE_VERSION (from tag $LAST_TAG)" - name: Set up Node.js uses: actions/setup-node@v4 diff --git a/plugins/agentops/README.md b/plugins/agentops/README.md index b8c6c5ce..6e118171 100644 --- a/plugins/agentops/README.md +++ b/plugins/agentops/README.md @@ -14,18 +14,10 @@ Copilot agent skills for running standardized evaluation workflows with | **Browse & Inspect** | List and inspect evaluation runs, view per-row scores, browse run history | | **Dataset Management** | Validate, describe, and import datasets for evaluation workflows | -## Prerequisites - -Install the AgentOps CLI in your project's virtual environment: - -```bash -pip install agentops-toolkit -``` - ## Installation Install from the -[VS Code Marketplace](https://marketplace.visualstudio.com/items?itemName=PUBLISHER_ID.agentops-skills) +[VS Code Marketplace](https://marketplace.visualstudio.com/items?itemName=AgentOpsToolkit.agentops-toolkit) or search **"AgentOps Skills"** in the VS Code Extensions view. A **pre-release** channel is available for early access to new skills and updates — @@ -34,13 +26,37 @@ enable it from the extension's Marketplace page or the Extensions view. ## Usage Open **Copilot Chat** in VS Code and describe what you want to do. -The skills are invoked automatically when your request matches their domain: +The skills are invoked automatically when your request matches their domain. + +**Set up a workspace** + +``` +> Initialize an agentops workspace for my Foundry agent project +> Create a RAG evaluation bundle with groundedness and similarity +``` + +**Run and compare evaluations** + +``` +> Run the default evaluation against my agent +> Benchmark gpt-4o vs gpt-4o-mini using the smoke dataset +> Compare the last two evaluation runs and summarize the differences +``` + +**Investigate results** ``` -> Initialize an agentops workspace for my project -> Run the default evaluation -> Compare run abc123 with run def456 > Which rows failed the groundedness threshold? +> Show me the worst-scoring items from the latest run +> Why did similarity drop between run abc123 and run def456? +``` + +**Browse and manage** + +``` +> List all evaluation runs +> Show details for the latest run +> Validate my dataset before running an eval ``` ## Links