Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/skills/azure-typespec-author/evaluate/.vally.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ suites:
version-evolution:
evals: ["evals/001005.eval.yaml","evals/001006.eval.yaml","evals/001007.eval.yaml","evals/001008.eval.yaml"]
armtemplate:
evals: ["evals/002001.eval.yaml","evals/002002.eval.yaml","evals/002003.eval.yaml","evals/002004.eval.yaml","evals/002005.eval.yaml","evals/002006.eval.yaml","evals/002007.eval.yaml","evals/002008.eval.yaml","evals/002009.eval.yaml","evals/002010.eval.yaml"]
evals: ["evals/002001.eval.yaml","evals/002002.eval.yaml","evals/002003.eval.yaml","evals/002004.eval.yaml","evals/002005.eval.yaml","evals/002006.eval.yaml","evals/002007.eval.yaml","evals/002008.eval.yaml","evals/002009.eval.yaml","evals/002010.eval.yaml","evals/002011.eval.yaml"]
longrunningoperation:
evals: ["evals/003001.eval.yaml","evals/003002.eval.yaml"]
decorators:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
name: azure-typespec-author-eval
description: Evaluation suite for azure-typespec-author.
version: "1.0"
type: capability # capability | regression

# Root-level environment
environment: azsdk-mcp

# Execution configuration
config:
runs: 1 # Trials per stimulus
timeout: "660s" # Seconds per trial
model: claude-opus-4.6 #gpt-4o #claude-sonnet-4.6 # Model for agent execution
executor: copilot-sdk # Which executor to use

# Test cases
stimuli:
- prompt: |
In the Widget project, add a checkExistence (HEAD) operation to the existing Employee resource.
name: 002011-arm-add-check-existence-operation
constraints:
max_turns: 5
max_tokens: 100000
environment:
files:
- src: ../fixtures/Microsoft.Widget/Widget/employee.tsp
dest: employee.tsp
- src: ../fixtures/Microsoft.Widget/Widget/main.tsp
dest: main.tsp
- src: ../fixtures/Microsoft.Widget/Widget/package-lock.json
dest: package-lock.json
- src: ../fixtures/Microsoft.Widget/Widget/package.json
dest: package.json
- src: ../fixtures/Microsoft.Widget/Widget/readme.md
dest: readme.md
- src: ../fixtures/Microsoft.Widget/Widget/shared.tsp
dest: shared.tsp
- src: ../fixtures/Microsoft.Widget/Widget/tspconfig.yaml
dest: tspconfig.yaml
commands:
- node -e "const fs=require('fs');const t=process.env.FIXTURE_NODE_MODULES;if(t&&fs.existsSync(t)&&!fs.existsSync('node_modules')){fs.symlinkSync(t,'node_modules','junction')}"
Comment thread
JoyerJin marked this conversation as resolved.
graders:
- type: tool-calls
config:
required:
- edit
- azure-sdk-mcp-azsdk_typespec_generate_authoring_plan
- azure-sdk-mcp-azsdk_run_typespec_validation
- type: skill-invocation
config:
required:
- azure-typespec-author
- type: file-matches
config:
path: employee.tsp
pattern: 'checkExistence\s+is\s+ArmResourceCheckExistence\s*<\s*Employee\s*>'
- type: prompt
config:
scoring: scale_1_5
threshold: 1.0
prompt: 'Verify change scope and quality only: no unrelated edits, and the implemented result is semantically consistent with the task intent.'
model: claude-opus-4.6
scoring:
weights:
tool-calls: 1
skill-invocation: 1
prompt: 1
file-matches: 1
threshold: 1.0
43 changes: 43 additions & 0 deletions .github/skills/azure-typespec-author/evaluate/evals/eval.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1355,6 +1355,49 @@ stimuli:
threshold: 1
prompt: 'Verify change scope and quality only: no unrelated edits, and the implemented result is semantically consistent with the task intent.'
model: claude-opus-4.6
- name: 002011-arm-add-check-existence-operation
tags:
suite: armtemplate
Comment thread
JoyerJin marked this conversation as resolved.
prompt: |
In the Widget project, add a checkExistence (HEAD) operation to the existing Employee resource.
constraints:
max_turns: 5
max_tokens: 1000000
environment:
files:
- src: ../fixtures/Microsoft.Widget/Widget/employee.tsp
dest: employee.tsp
- src: ../fixtures/Microsoft.Widget/Widget/main.tsp
dest: main.tsp
- src: ../fixtures/Microsoft.Widget/Widget/package-lock.json
dest: package-lock.json
- src: ../fixtures/Microsoft.Widget/Widget/package.json
dest: package.json
- src: ../fixtures/Microsoft.Widget/Widget/readme.md
dest: readme.md
- src: ../fixtures/Microsoft.Widget/Widget/shared.tsp
dest: shared.tsp
- src: ../fixtures/Microsoft.Widget/Widget/tspconfig.yaml
dest: tspconfig.yaml
commands:
- node -e "const fs=require('fs');const t=process.env.FIXTURE_NODE_MODULES;if(t&&fs.existsSync(t)&&!fs.existsSync('node_modules')){fs.symlinkSync(t,'node_modules','junction')}"
graders:
- type: tool-calls
config:
required:
- edit
- azure-sdk-mcp-azsdk_typespec_generate_authoring_plan
- azure-sdk-mcp-azsdk_run_typespec_validation
- type: file-matches
config:
path: employee.tsp
pattern: 'checkExistence\s+is\s+ArmResourceCheckExistence\s*<\s*Employee\s*>'
- type: prompt
config:
scoring: scale_1_5
threshold: 1
prompt: 'Verify change scope and quality only: no unrelated edits, and the implemented result is semantically consistent with the task intent.'
model: claude-opus-4.6
- name: 003001-arm-action-lro
prompt: |
Add a move async operation to move employee, operation's request is MoveRequest, response is MoveResponse
Expand Down