From 9ad03189cc5d34352e4273d9979c0abf78ecd11d Mon Sep 17 00:00:00 2001 From: Hisku Date: Tue, 10 Mar 2026 14:22:16 +0000 Subject: [PATCH 1/8] feat: export ToolSanitizationRule type from public API Allows consumers to properly type custom toolRules passed via PromptDefenseOptions.config without importing from internal modules. Co-Authored-By: Claude Sonnet 4.6 --- src/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/index.ts b/src/index.ts index 1fbad19..1d39910 100644 --- a/src/index.ts +++ b/src/index.ts @@ -27,4 +27,4 @@ export { type PromptDefenseOptions, } from "./core/prompt-defense"; // Types -export type { RiskLevel, Tier1Result } from "./types"; +export type { RiskLevel, Tier1Result, ToolSanitizationRule } from "./types"; From 2046fea5e7902fe1adde19ceae5dde76b282deea Mon Sep 17 00:00:00 2001 From: Hisku Date: Wed, 11 Mar 2026 17:27:01 +0000 Subject: [PATCH 2/8] fix: add tier2Fields option to scope Tier 2 classification to specific fields When tier2Fields is set, extractStrings only collects text from matching field keys (descending into non-matching keys to find them deeper). This lets callers focus Tier 2 on user-controlled fields (e.g. subject, body) and skip encoded or non-text fields, reducing both latency and false-negative risk from base64-encoded content. Co-Authored-By: Claude Sonnet 4.6 --- src/core/prompt-defense.ts | 45 +++++++++++++++++++++++++++++--------- src/types.ts | 6 +++++ 2 files changed, 41 insertions(+), 10 deletions(-) diff --git a/src/core/prompt-defense.ts b/src/core/prompt-defense.ts index 4210c4a..db9fd50 100644 --- a/src/core/prompt-defense.ts +++ b/src/core/prompt-defense.ts @@ -45,26 +45,42 @@ export interface DefenseResult { /** * Recursively extract all string values from an object. - * Used to collect text content from tool results for Tier 2 classification. + * When `fields` is provided, only strings under matching field keys are collected; + * the traversal still descends into non-matching keys to find matching ones deeper. */ -function extractStrings(obj: unknown): string[] { +function extractStrings(obj: unknown, fields?: string[]): string[] { const strings: string[] = []; - function traverse(value: unknown): void { + function collectAll(value: unknown): void { if (typeof value === "string") { strings.push(value); } else if (Array.isArray(value)) { - for (const item of value) { - traverse(item); - } + for (const item of value) collectAll(item); + } else if (value && typeof value === "object") { + for (const v of Object.values(value)) collectAll(v); + } + } + + function traverse(value: unknown): void { + if (Array.isArray(value)) { + for (const item of value) traverse(item); } else if (value && typeof value === "object") { - for (const v of Object.values(value)) { - traverse(v); + for (const [k, v] of Object.entries(value as Record)) { + if (fields?.includes(k)) { + collectAll(v); + } else { + traverse(v); + } } } } - traverse(obj); + if (!fields) { + collectAll(obj); + } else { + traverse(obj); + } + return strings; } @@ -91,6 +107,12 @@ export interface PromptDefenseOptions { * Defaults to false — tool rules are opt-in to avoid unexpected risk level inflation. */ useDefaultToolRules?: boolean; + /** + * Only run Tier 2 on strings extracted from these field names. + * Strings under any other field key are skipped. + * If omitted, Tier 2 runs on all strings in the tool result. + */ + tier2Fields?: string[]; } /** @@ -114,6 +136,7 @@ export class PromptDefense { private toolResultSanitizer: ToolResultSanitizer; private patternDetector: PatternDetector; private tier2Classifier: Tier2Classifier | null = null; + private tier2Fields: string[] | undefined; constructor(options: PromptDefenseOptions = {}) { // Build configuration @@ -124,6 +147,8 @@ export class PromptDefense { this.config.blockHighRisk = options.blockHighRisk; } + this.tier2Fields = options.tier2Fields; + // Initialize components this.toolResultSanitizer = createToolResultSanitizer({ riskyFields: this.config.riskyFields, @@ -216,7 +241,7 @@ export class PromptDefense { let tier2Risk: RiskLevel = "low"; if (this.tier2Classifier) { - const strings = extractStrings(value); + const strings = extractStrings(value, this.tier2Fields); const combinedText = strings.join("\n\n"); if (combinedText.length > 0) { diff --git a/src/types.ts b/src/types.ts index 6323c85..45f194e 100644 --- a/src/types.ts +++ b/src/types.ts @@ -317,6 +317,12 @@ export interface PromptDefenseConfig { mediumRiskThreshold: number; /** Size threshold to skip Tier 2 (bytes) */ skipBelowSize: number; + /** + * Only run Tier 2 on strings extracted from these field names. + * Strings under any other field key are skipped. + * If omitted, Tier 2 runs on all strings in the tool result. + */ + tier2Fields?: string[]; }; /** Whether to block high/critical risk by default */ blockHighRisk: boolean; From 18fe270cbd7bf2da0fe26a933f4e758e3de296f7 Mon Sep 17 00:00:00 2001 From: Hisku Date: Wed, 11 Mar 2026 17:30:26 +0000 Subject: [PATCH 3/8] fix: enable Tier 2 classification by default Changes enableTier2 default from false to true so callers don't need to explicitly opt in. Also updates DEFAULT_TIER2_CONFIG.enabled to reflect the actual behaviour. Co-Authored-By: Claude Sonnet 4.6 --- src/config.ts | 2 +- src/core/prompt-defense.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/config.ts b/src/config.ts index 9a24efc..955b8f2 100644 --- a/src/config.ts +++ b/src/config.ts @@ -169,7 +169,7 @@ export const DEFAULT_CUMULATIVE_RISK_THRESHOLDS = { * Default Tier 2 configuration */ export const DEFAULT_TIER2_CONFIG = { - enabled: false, // Disabled until implemented + enabled: true, mode: "onnx" as const, highRiskThreshold: 0.8, mediumRiskThreshold: 0.5, diff --git a/src/core/prompt-defense.ts b/src/core/prompt-defense.ts index db9fd50..d6b6025 100644 --- a/src/core/prompt-defense.ts +++ b/src/core/prompt-defense.ts @@ -166,7 +166,7 @@ export class PromptDefense { this.patternDetector = createPatternDetector(); // Initialize Tier 2 classifier if enabled - if (options.enableTier2) { + if (options.enableTier2 ?? true) { this.tier2Classifier = createTier2Classifier(options.tier2Config); if (options.tier2Weights) { this.tier2Classifier.loadWeights(options.tier2Weights); From e3c44fec7393dac05b3e560070b762afcf0ea923 Mon Sep 17 00:00:00 2001 From: Hisku Date: Wed, 11 Mar 2026 17:41:04 +0000 Subject: [PATCH 4/8] docs: reflect Tier 2 enabled by default - Remove explicit enableTier2: true from all examples (now the default) - Update API reference: default true, set false to disable - Add tier2Fields to API reference Co-Authored-By: Claude Sonnet 4.6 --- README.md | 12 ++++++------ src/core/prompt-defense.ts | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 4d0db25..15f3069 100644 --- a/README.md +++ b/README.md @@ -18,8 +18,9 @@ import { createPromptDefense } from '@stackone/defender'; // Create defense with Tier 1 (patterns) + Tier 2 (ML classifier) // blockHighRisk: true enables the allowed/blocked decision +// Tier 1 (patterns) + Tier 2 (ML classifier) are both on by default. +// blockHighRisk: true enables the allowed/blocked decision. const defense = createPromptDefense({ - enableTier2: true, blockHighRisk: true, useDefaultToolRules: true, // Enable built-in per-tool base risk and field-handling rules (risky-field overrides always apply) }); @@ -105,9 +106,10 @@ Create a defense instance. ```typescript const defense = createPromptDefense({ enableTier1: true, // Pattern detection (default: true) - enableTier2: true, // ML classification (default: false) + enableTier2: true, // ML classification (default: true) — set false to disable blockHighRisk: true, // Block high/critical content (default: false) useDefaultToolRules: true, // Enable built-in per-tool base risk and field-handling rules (default: false) + tier2Fields: ['subject', 'body', 'snippet'], // Scope Tier 2 to specific fields (default: all fields) defaultRiskLevel: 'medium', }); ``` @@ -164,14 +166,13 @@ console.log(result.matches); // [{ pattern: '...', severity: 'high', ... } ONNX mode auto-loads the bundled model on first `defendToolResult()` call. Use `warmupTier2()` at startup to avoid first-call latency: ```typescript -// ONNX mode (default) — optional warmup to pre-load at startup -const defense = createPromptDefense({ enableTier2: true }); +// ONNX mode (default) — Tier 2 is on by default, warmup is optional +const defense = createPromptDefense(); await defense.warmupTier2(); // optional, avoids ~1-2s first-call latency // MLP mode (legacy) — requires loading weights explicitly import { createPromptDefense, MLP_WEIGHTS } from '@stackone/defender'; const mlpDefense = createPromptDefense({ - enableTier2: true, tier2Config: { mode: 'mlp' }, }); mlpDefense.loadTier2Weights(MLP_WEIGHTS); @@ -187,7 +188,6 @@ import { generateText, tool } from 'ai'; import { createPromptDefense } from '@stackone/defender'; const defense = createPromptDefense({ - enableTier2: true, blockHighRisk: true, useDefaultToolRules: true, }); diff --git a/src/core/prompt-defense.ts b/src/core/prompt-defense.ts index d6b6025..0616ac1 100644 --- a/src/core/prompt-defense.ts +++ b/src/core/prompt-defense.ts @@ -92,7 +92,7 @@ export interface PromptDefenseOptions { config?: Partial; /** Enable Tier 1 classification */ enableTier1?: boolean; - /** Enable Tier 2 ML classification */ + /** Enable Tier 2 ML classification (default: true — set false to disable) */ enableTier2?: boolean; /** Tier 2 classifier configuration */ tier2Config?: Partial; From 731d0008d379ae560132d2fc5da5b6ad7be7b9bc Mon Sep 17 00:00:00 2001 From: Hisku Date: Wed, 11 Mar 2026 17:45:13 +0000 Subject: [PATCH 5/8] fix: address PR review comments on extractStrings - Normalize empty fields array to collect-all (avoids silent Tier 2 bypass) - Fall back to config.tier2.tier2Fields when options.tier2Fields is unset - Use Set for O(1) key lookups during traversal Co-Authored-By: Claude Sonnet 4.6 --- src/core/prompt-defense.ts | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/core/prompt-defense.ts b/src/core/prompt-defense.ts index 0616ac1..22dc9fe 100644 --- a/src/core/prompt-defense.ts +++ b/src/core/prompt-defense.ts @@ -61,12 +61,20 @@ function extractStrings(obj: unknown, fields?: string[]): string[] { } } + if (!fields || fields.length === 0) { + collectAll(obj); + return strings; + } + + // Use a Set for O(1) key lookups during traversal + const fieldSet = new Set(fields); + function traverse(value: unknown): void { if (Array.isArray(value)) { for (const item of value) traverse(item); } else if (value && typeof value === "object") { for (const [k, v] of Object.entries(value as Record)) { - if (fields?.includes(k)) { + if (fieldSet.has(k)) { collectAll(v); } else { traverse(v); @@ -75,12 +83,7 @@ function extractStrings(obj: unknown, fields?: string[]): string[] { } } - if (!fields) { - collectAll(obj); - } else { - traverse(obj); - } - + traverse(obj); return strings; } @@ -147,7 +150,7 @@ export class PromptDefense { this.config.blockHighRisk = options.blockHighRisk; } - this.tier2Fields = options.tier2Fields; + this.tier2Fields = options.tier2Fields ?? this.config.tier2?.tier2Fields; // Initialize components this.toolResultSanitizer = createToolResultSanitizer({ From 5daea214a22b30e52d457879366edcde07c80c49 Mon Sep 17 00:00:00 2001 From: Hisku Date: Wed, 11 Mar 2026 17:55:03 +0000 Subject: [PATCH 6/8] fix: set tier2SkipReason when no strings extracted for Tier 2 Co-Authored-By: Claude Sonnet 4.6 --- src/core/prompt-defense.ts | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/core/prompt-defense.ts b/src/core/prompt-defense.ts index 22dc9fe..dff77b2 100644 --- a/src/core/prompt-defense.ts +++ b/src/core/prompt-defense.ts @@ -37,6 +37,8 @@ export interface DefenseResult { patternsByField: Record; /** Tier 2 ML score (0.0 = safe, 1.0 = injection), undefined if Tier 2 not enabled */ tier2Score?: number; + /** Reason Tier 2 was skipped (e.g. "No strings extracted") when tier2Score is undefined */ + tier2SkipReason?: string; /** The sentence with the highest Tier 2 score */ maxSentence?: string; /** Total processing time in milliseconds */ @@ -240,6 +242,7 @@ export class PromptDefense { // Tier 2: sentence-level ML classification on raw (unsanitized) value let tier2Score: number | undefined; + let tier2SkipReason: string | undefined; let maxSentence: string | undefined; let tier2Risk: RiskLevel = "low"; @@ -253,7 +256,13 @@ export class PromptDefense { tier2Score = tier2Result.score; tier2Risk = this.tier2Classifier.getRiskLevel(tier2Result.score); maxSentence = tier2Result.maxSentence; + } else { + tier2SkipReason = tier2Result.skipReason; } + } else { + tier2SkipReason = this.tier2Fields?.length + ? "No strings found in tier2Fields" + : "No strings extracted from tool result"; } } @@ -285,6 +294,7 @@ export class PromptDefense { fieldsSanitized, patternsByField: patternsRemovedByField, tier2Score, + tier2SkipReason, maxSentence, latencyMs: performance.now() - startTime, }; From a42f23d3e679d1873af34c5598de21f64b9edb1e Mon Sep 17 00:00:00 2001 From: Hisku Date: Wed, 11 Mar 2026 18:04:53 +0000 Subject: [PATCH 7/8] fix: handle plain strings in extractStrings with tier2Fields set Co-Authored-By: Claude Sonnet 4.6 --- src/core/prompt-defense.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/core/prompt-defense.ts b/src/core/prompt-defense.ts index dff77b2..a3205f3 100644 --- a/src/core/prompt-defense.ts +++ b/src/core/prompt-defense.ts @@ -82,6 +82,9 @@ function extractStrings(obj: unknown, fields?: string[]): string[] { traverse(v); } } + } else if (typeof value === "string") { + // Plain string — no field keys to filter on, fall back to collecting it + strings.push(value); } } From 82c9e8160da744af7574b8f0e61cf0ceb197f8a0 Mon Sep 17 00:00:00 2001 From: Hisku Date: Wed, 11 Mar 2026 18:13:15 +0000 Subject: [PATCH 8/8] fix: drop unused enabled from PromptDefenseConfig.tier2 Co-Authored-By: Claude Sonnet 4.6 --- src/config.ts | 1 - src/types.ts | 2 -- 2 files changed, 3 deletions(-) diff --git a/src/config.ts b/src/config.ts index 955b8f2..043cd8f 100644 --- a/src/config.ts +++ b/src/config.ts @@ -169,7 +169,6 @@ export const DEFAULT_CUMULATIVE_RISK_THRESHOLDS = { * Default Tier 2 configuration */ export const DEFAULT_TIER2_CONFIG = { - enabled: true, mode: "onnx" as const, highRiskThreshold: 0.8, mediumRiskThreshold: 0.5, diff --git a/src/types.ts b/src/types.ts index 45f194e..3b7f665 100644 --- a/src/types.ts +++ b/src/types.ts @@ -307,8 +307,6 @@ export interface PromptDefenseConfig { }; /** Tier 2 configuration */ tier2: { - /** Whether Tier 2 is enabled */ - enabled: boolean; /** Inference mode: 'onnx' for fine-tuned MiniLM, 'mlp' for frozen embeddings + MLP head */ mode?: "mlp" | "onnx"; /** Score threshold for high risk */