fix: address review feedback on defender skill

hiskudin · claude · hiskudin · commit 0bcd5e13bace · 2026-03-31T17:47:22.000+01:00
- Align metadata.version to "2.0" (matches other skills)
- Fix PromptDefense config: tier2 → tier2Config (matches actual API)
- Fix ToolResultSanitizer example to match real config shape
- Add illustrative disclaimer to Important section
- Fix Tier 2 verification: check result.tier instead of score value
- Add missing import in Pattern 2 reference example
- Add try/catch to Express middleware example
- Use threshold param in batch evaluation example

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/skills/stackone-defender/SKILL.md b/skills/stackone-defender/SKILL.md
@@ -5,7 +5,7 @@ license: MIT
 compatibility: Requires Node.js 18+. Optional peer dependencies @huggingface/transformers and onnxruntime-node for Tier 2 ML classification.
 metadata:
   author: stackone
-  version: "1.0"
+  version: "2.0"
 ---
 
 # StackOne Defender
@@ -18,7 +18,7 @@ StackOne Defender is a local-first prompt injection and jailbreak detection libr
 https://www.npmjs.com/package/@stackone/defender
 ```
 
-Do not guess configuration options. Verify against the published package.
+Code examples below are illustrative — verify class names and config keys against the published package README before use. Do not guess configuration options.
 
 ## Instructions
 
@@ -55,7 +55,7 @@ Without these, Defender falls back to Tier 1 pattern matching only.
 import { PromptDefense } from "@stackone/defender";
 
 const defense = new PromptDefense({
-  tier2: { mode: "onnx" }, // default — uses ONNX ML model
+  tier2Config: { mode: "onnx" }, // default — uses ONNX ML model
 });
 
 const result = await defense.scan("What is the capital of France?");
@@ -90,11 +90,9 @@ The `scan()` method returns:
 ```typescript
 const defense = new PromptDefense({
   // Tier 1: pattern matching
-  tier1: {
-    enabled: true, // default: true
-  },
+  enableTier1: true, // default: true
   // Tier 2: ML classification
-  tier2: {
+  tier2Config: {
     mode: "onnx",       // "onnx" (default) or "mlp"
     threshold: 0.5,      // score above this = blocked (default: 0.5)
   },
@@ -113,19 +111,19 @@ When building agents, tool results from external APIs can contain injected conte
 ```typescript
 import { ToolResultSanitizer } from "@stackone/defender";
 
-const sanitizer = new ToolResultSanitizer({
-  tier2Config: { mode: "onnx" },
-});
+const sanitizer = new ToolResultSanitizer();
 
 const toolOutput = await externalApi.getData();
-const sanitized = await sanitizer.scan(JSON.stringify(toolOutput));
+const sanitized = await sanitizer.sanitize(toolOutput, "tool-name");
 
-if (!sanitized.allowed) {
+if (sanitized.riskLevel === "high" || sanitized.riskLevel === "critical") {
   console.warn("Tool result contains suspicious content:", sanitized);
   // Handle: skip, flag, or redact the result
 }
 ```
 
+> **Note**: `ToolResultSanitizer` has its own configuration shape — fetch the npm README for full options. The examples above are illustrative.
+
 ## Examples
 
 ### Example 1: User wants to quickly test if a string is safe
@@ -160,7 +158,7 @@ Actions:
 ```typescript
 import { PromptDefense } from "@stackone/defender";
 
-const defense = new PromptDefense({ tier2: { mode: "onnx" } });
+const defense = new PromptDefense({ tier2Config: { mode: "onnx" } });
 
 const dataset = [
   { text: "What is 2+2?", expected: true },
@@ -196,7 +194,7 @@ Result: Root cause identified with actionable fix (threshold adjustment or text
 ### Tier 2 not working / falling back to Tier 1 only
 **Cause**: Missing optional peer dependencies.
 - Install: `npm install @huggingface/transformers onnxruntime-node`
-- Verify: check that `result.score` returns a non-zero value (Tier 1 only returns 0 or 1)
+- Verify: run a scan on a benign string and confirm `result.tier` is `null` (not `"tier1"`) — this confirms the ML model loaded
 
 ### High false positive rate
 **Cause**: Threshold too low for the use case.
diff --git a/skills/stackone-defender/references/integration-patterns.md b/skills/stackone-defender/references/integration-patterns.md
@@ -9,7 +9,7 @@ Scan all tool outputs before they enter the LLM context window. This is the most
 ```typescript
 import { PromptDefense } from "@stackone/defender";
 
-const defense = new PromptDefense({ tier2: { mode: "onnx" } });
+const defense = new PromptDefense({ tier2Config: { mode: "onnx" } });
 
 async function safeToolCall(toolName: string, args: any): Promise<string> {
   const rawResult = await executeTool(toolName, args);
@@ -28,6 +28,10 @@ async function safeToolCall(toolName: string, args: any): Promise<string> {
 Scan user messages before processing. Catches direct prompt injection attempts.
 
 ```typescript
+import { PromptDefense } from "@stackone/defender";
+
+const defense = new PromptDefense({ tier2Config: { mode: "onnx" } });
+
 async function handleUserMessage(message: string) {
   const scan = await defense.scan(message);
 
@@ -47,21 +51,25 @@ Add Defender as HTTP middleware to protect API endpoints that accept free-text i
 ```typescript
 import { PromptDefense } from "@stackone/defender";
 
-const defense = new PromptDefense({ tier2: { mode: "onnx" } });
+const defense = new PromptDefense({ tier2Config: { mode: "onnx" } });
 
 // Express middleware
 async function defenderMiddleware(req, res, next) {
-  const text = req.body?.message || req.body?.input || req.body?.prompt;
-  if (!text) return next();
-
-  const scan = await defense.scan(text);
-  if (!scan.allowed) {
-    return res.status(400).json({
-      error: "Input rejected",
-      reason: `Detected by ${scan.tier} (score: ${scan.score.toFixed(2)})`,
-    });
+  try {
+    const text = req.body?.message || req.body?.input || req.body?.prompt;
+    if (!text) return next();
+
+    const scan = await defense.scan(text);
+    if (!scan.allowed) {
+      return res.status(400).json({
+        error: "Input rejected",
+        reason: `Detected by ${scan.tier} (score: ${scan.score.toFixed(2)})`,
+      });
+    }
+    next();
+  } catch (err) {
+    next(err);
   }
-  next();
 }
 
 app.post("/api/chat", defenderMiddleware, chatHandler);
@@ -74,7 +82,7 @@ Evaluate Defender against a labeled dataset to measure detection quality.
 ```typescript
 import { PromptDefense } from "@stackone/defender";
 
-const defense = new PromptDefense({ tier2: { mode: "onnx" } });
+const defense = new PromptDefense({ tier2Config: { mode: "onnx" } });
 
 interface Sample {
   text: string;
@@ -86,7 +94,7 @@ async function evaluate(samples: Sample[], threshold = 0.5) {
 
   for (const { text, label } of samples) {
     const result = await defense.scan(text);
-    const predicted = !result.allowed;
+    const predicted = result.score >= threshold;
     const actual = label === "malicious";
 
     if (predicted && actual) tp++;
@@ -111,7 +119,7 @@ The ONNX model loads on first inference. Pre-warm at startup to avoid cold-start
 ```typescript
 import { PromptDefense } from "@stackone/defender";
 
-const defense = new PromptDefense({ tier2: { mode: "onnx" } });
+const defense = new PromptDefense({ tier2Config: { mode: "onnx" } });
 
 // Pre-warm at application startup
 await defense.scan("warmup");