From 9ad03189cc5d34352e4273d9979c0abf78ecd11d Mon Sep 17 00:00:00 2001
From: Hisku <hiskias@stackone.com>
Date: Tue, 10 Mar 2026 14:22:16 +0000
Subject: [PATCH 1/8] feat: export ToolSanitizationRule type from public API

Allows consumers to properly type custom toolRules passed via
PromptDefenseOptions.config without importing from internal modules.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/index.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/index.ts b/src/index.ts
index 1fbad19..1d39910 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -27,4 +27,4 @@ export {
 	type PromptDefenseOptions,
 } from "./core/prompt-defense";
 // Types
-export type { RiskLevel, Tier1Result } from "./types";
+export type { RiskLevel, Tier1Result, ToolSanitizationRule } from "./types";

From 2046fea5e7902fe1adde19ceae5dde76b282deea Mon Sep 17 00:00:00 2001
From: Hisku <hiskias@stackone.com>
Date: Wed, 11 Mar 2026 17:27:01 +0000
Subject: [PATCH 2/8] fix: add tier2Fields option to scope Tier 2
 classification to specific fields

When tier2Fields is set, extractStrings only collects text from matching
field keys (descending into non-matching keys to find them deeper).
This lets callers focus Tier 2 on user-controlled fields (e.g. subject,
body) and skip encoded or non-text fields, reducing both latency and
false-negative risk from base64-encoded content.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/core/prompt-defense.ts | 45 +++++++++++++++++++++++++++++---------
 src/types.ts               |  6 +++++
 2 files changed, 41 insertions(+), 10 deletions(-)

diff --git a/src/core/prompt-defense.ts b/src/core/prompt-defense.ts
index 4210c4a..db9fd50 100644
--- a/src/core/prompt-defense.ts
+++ b/src/core/prompt-defense.ts
@@ -45,26 +45,42 @@ export interface DefenseResult {
 
 /**
  * Recursively extract all string values from an object.
- * Used to collect text content from tool results for Tier 2 classification.
+ * When `fields` is provided, only strings under matching field keys are collected;
+ * the traversal still descends into non-matching keys to find matching ones deeper.
  */
-function extractStrings(obj: unknown): string[] {
+function extractStrings(obj: unknown, fields?: string[]): string[] {
 	const strings: string[] = [];
 
-	function traverse(value: unknown): void {
+	function collectAll(value: unknown): void {
 		if (typeof value === "string") {
 			strings.push(value);
 		} else if (Array.isArray(value)) {
-			for (const item of value) {
-				traverse(item);
-			}
+			for (const item of value) collectAll(item);
+		} else if (value && typeof value === "object") {
+			for (const v of Object.values(value)) collectAll(v);
+		}
+	}
+
+	function traverse(value: unknown): void {
+		if (Array.isArray(value)) {
+			for (const item of value) traverse(item);
 		} else if (value && typeof value === "object") {
-			for (const v of Object.values(value)) {
-				traverse(v);
+			for (const [k, v] of Object.entries(value as Record<string, unknown>)) {
+				if (fields?.includes(k)) {
+					collectAll(v);
+				} else {
+					traverse(v);
+				}
 			}
 		}
 	}
 
-	traverse(obj);
+	if (!fields) {
+		collectAll(obj);
+	} else {
+		traverse(obj);
+	}
+
 	return strings;
 }
 
@@ -91,6 +107,12 @@ export interface PromptDefenseOptions {
 	 * Defaults to false — tool rules are opt-in to avoid unexpected risk level inflation.
 	 */
 	useDefaultToolRules?: boolean;
+	/**
+	 * Only run Tier 2 on strings extracted from these field names.
+	 * Strings under any other field key are skipped.
+	 * If omitted, Tier 2 runs on all strings in the tool result.
+	 */
+	tier2Fields?: string[];
 }
 
 /**
@@ -114,6 +136,7 @@ export class PromptDefense {
 	private toolResultSanitizer: ToolResultSanitizer;
 	private patternDetector: PatternDetector;
 	private tier2Classifier: Tier2Classifier | null = null;
+	private tier2Fields: string[] | undefined;
 
 	constructor(options: PromptDefenseOptions = {}) {
 		// Build configuration
@@ -124,6 +147,8 @@ export class PromptDefense {
 			this.config.blockHighRisk = options.blockHighRisk;
 		}
 
+		this.tier2Fields = options.tier2Fields;
+
 		// Initialize components
 		this.toolResultSanitizer = createToolResultSanitizer({
 			riskyFields: this.config.riskyFields,
@@ -216,7 +241,7 @@ export class PromptDefense {
 		let tier2Risk: RiskLevel = "low";
 
 		if (this.tier2Classifier) {
-			const strings = extractStrings(value);
+			const strings = extractStrings(value, this.tier2Fields);
 			const combinedText = strings.join("\n\n");
 
 			if (combinedText.length > 0) {
diff --git a/src/types.ts b/src/types.ts
index 6323c85..45f194e 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -317,6 +317,12 @@ export interface PromptDefenseConfig {
 		mediumRiskThreshold: number;
 		/** Size threshold to skip Tier 2 (bytes) */
 		skipBelowSize: number;
+		/**
+		 * Only run Tier 2 on strings extracted from these field names.
+		 * Strings under any other field key are skipped.
+		 * If omitted, Tier 2 runs on all strings in the tool result.
+		 */
+		tier2Fields?: string[];
 	};
 	/** Whether to block high/critical risk by default */
 	blockHighRisk: boolean;

From 18fe270cbd7bf2da0fe26a933f4e758e3de296f7 Mon Sep 17 00:00:00 2001
From: Hisku <hiskias@stackone.com>
Date: Wed, 11 Mar 2026 17:30:26 +0000
Subject: [PATCH 3/8] fix: enable Tier 2 classification by default

Changes enableTier2 default from false to true so callers don't need
to explicitly opt in. Also updates DEFAULT_TIER2_CONFIG.enabled to
reflect the actual behaviour.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/config.ts              | 2 +-
 src/core/prompt-defense.ts | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/config.ts b/src/config.ts
index 9a24efc..955b8f2 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -169,7 +169,7 @@ export const DEFAULT_CUMULATIVE_RISK_THRESHOLDS = {
  * Default Tier 2 configuration
  */
 export const DEFAULT_TIER2_CONFIG = {
-	enabled: false, // Disabled until implemented
+	enabled: true,
 	mode: "onnx" as const,
 	highRiskThreshold: 0.8,
 	mediumRiskThreshold: 0.5,
diff --git a/src/core/prompt-defense.ts b/src/core/prompt-defense.ts
index db9fd50..d6b6025 100644
--- a/src/core/prompt-defense.ts
+++ b/src/core/prompt-defense.ts
@@ -166,7 +166,7 @@ export class PromptDefense {
 		this.patternDetector = createPatternDetector();
 
 		// Initialize Tier 2 classifier if enabled
-		if (options.enableTier2) {
+		if (options.enableTier2 ?? true) {
 			this.tier2Classifier = createTier2Classifier(options.tier2Config);
 			if (options.tier2Weights) {
 				this.tier2Classifier.loadWeights(options.tier2Weights);

From e3c44fec7393dac05b3e560070b762afcf0ea923 Mon Sep 17 00:00:00 2001
From: Hisku <hiskias@stackone.com>
Date: Wed, 11 Mar 2026 17:41:04 +0000
Subject: [PATCH 4/8] docs: reflect Tier 2 enabled by default

- Remove explicit enableTier2: true from all examples (now the default)
- Update API reference: default true, set false to disable
- Add tier2Fields to API reference

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 README.md                  | 12 ++++++------
 src/core/prompt-defense.ts |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 4d0db25..15f3069 100644
--- a/README.md
+++ b/README.md
@@ -18,8 +18,9 @@ import { createPromptDefense } from '@stackone/defender';
 
 // Create defense with Tier 1 (patterns) + Tier 2 (ML classifier)
 // blockHighRisk: true enables the allowed/blocked decision
+// Tier 1 (patterns) + Tier 2 (ML classifier) are both on by default.
+// blockHighRisk: true enables the allowed/blocked decision.
 const defense = createPromptDefense({
-  enableTier2: true,
   blockHighRisk: true,
   useDefaultToolRules: true, // Enable built-in per-tool base risk and field-handling rules (risky-field overrides always apply)
 });
@@ -105,9 +106,10 @@ Create a defense instance.
 ```typescript
 const defense = createPromptDefense({
   enableTier1: true,           // Pattern detection (default: true)
-  enableTier2: true,           // ML classification (default: false)
+  enableTier2: true,           // ML classification (default: true) — set false to disable
   blockHighRisk: true,         // Block high/critical content (default: false)
   useDefaultToolRules: true,   // Enable built-in per-tool base risk and field-handling rules (default: false)
+  tier2Fields: ['subject', 'body', 'snippet'], // Scope Tier 2 to specific fields (default: all fields)
   defaultRiskLevel: 'medium',
 });
 ```
@@ -164,14 +166,13 @@ console.log(result.matches);       // [{ pattern: '...', severity: 'high', ... }
 ONNX mode auto-loads the bundled model on first `defendToolResult()` call. Use `warmupTier2()` at startup to avoid first-call latency:
 
 ```typescript
-// ONNX mode (default) — optional warmup to pre-load at startup
-const defense = createPromptDefense({ enableTier2: true });
+// ONNX mode (default) — Tier 2 is on by default, warmup is optional
+const defense = createPromptDefense();
 await defense.warmupTier2(); // optional, avoids ~1-2s first-call latency
 
 // MLP mode (legacy) — requires loading weights explicitly
 import { createPromptDefense, MLP_WEIGHTS } from '@stackone/defender';
 const mlpDefense = createPromptDefense({
-  enableTier2: true,
   tier2Config: { mode: 'mlp' },
 });
 mlpDefense.loadTier2Weights(MLP_WEIGHTS);
@@ -187,7 +188,6 @@ import { generateText, tool } from 'ai';
 import { createPromptDefense } from '@stackone/defender';
 
 const defense = createPromptDefense({
-  enableTier2: true,
   blockHighRisk: true,
   useDefaultToolRules: true,
 });
diff --git a/src/core/prompt-defense.ts b/src/core/prompt-defense.ts
index d6b6025..0616ac1 100644
--- a/src/core/prompt-defense.ts
+++ b/src/core/prompt-defense.ts
@@ -92,7 +92,7 @@ export interface PromptDefenseOptions {
 	config?: Partial<PromptDefenseConfig>;
 	/** Enable Tier 1 classification */
 	enableTier1?: boolean;
-	/** Enable Tier 2 ML classification */
+	/** Enable Tier 2 ML classification (default: true — set false to disable) */
 	enableTier2?: boolean;
 	/** Tier 2 classifier configuration */
 	tier2Config?: Partial<Tier2ClassifierConfig>;

From 731d0008d379ae560132d2fc5da5b6ad7be7b9bc Mon Sep 17 00:00:00 2001
From: Hisku <hiskias@stackone.com>
Date: Wed, 11 Mar 2026 17:45:13 +0000
Subject: [PATCH 5/8] fix: address PR review comments on extractStrings

- Normalize empty fields array to collect-all (avoids silent Tier 2 bypass)
- Fall back to config.tier2.tier2Fields when options.tier2Fields is unset
- Use Set for O(1) key lookups during traversal

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/core/prompt-defense.ts | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/src/core/prompt-defense.ts b/src/core/prompt-defense.ts
index 0616ac1..22dc9fe 100644
--- a/src/core/prompt-defense.ts
+++ b/src/core/prompt-defense.ts
@@ -61,12 +61,20 @@ function extractStrings(obj: unknown, fields?: string[]): string[] {
 		}
 	}
 
+	if (!fields || fields.length === 0) {
+		collectAll(obj);
+		return strings;
+	}
+
+	// Use a Set for O(1) key lookups during traversal
+	const fieldSet = new Set(fields);
+
 	function traverse(value: unknown): void {
 		if (Array.isArray(value)) {
 			for (const item of value) traverse(item);
 		} else if (value && typeof value === "object") {
 			for (const [k, v] of Object.entries(value as Record<string, unknown>)) {
-				if (fields?.includes(k)) {
+				if (fieldSet.has(k)) {
 					collectAll(v);
 				} else {
 					traverse(v);
@@ -75,12 +83,7 @@ function extractStrings(obj: unknown, fields?: string[]): string[] {
 		}
 	}
 
-	if (!fields) {
-		collectAll(obj);
-	} else {
-		traverse(obj);
-	}
-
+	traverse(obj);
 	return strings;
 }
 
@@ -147,7 +150,7 @@ export class PromptDefense {
 			this.config.blockHighRisk = options.blockHighRisk;
 		}
 
-		this.tier2Fields = options.tier2Fields;
+		this.tier2Fields = options.tier2Fields ?? this.config.tier2?.tier2Fields;
 
 		// Initialize components
 		this.toolResultSanitizer = createToolResultSanitizer({

From 5daea214a22b30e52d457879366edcde07c80c49 Mon Sep 17 00:00:00 2001
From: Hisku <hiskias@stackone.com>
Date: Wed, 11 Mar 2026 17:55:03 +0000
Subject: [PATCH 6/8] fix: set tier2SkipReason when no strings extracted for
 Tier 2

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/core/prompt-defense.ts | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/core/prompt-defense.ts b/src/core/prompt-defense.ts
index 22dc9fe..dff77b2 100644
--- a/src/core/prompt-defense.ts
+++ b/src/core/prompt-defense.ts
@@ -37,6 +37,8 @@ export interface DefenseResult {
 	patternsByField: Record<string, string[]>;
 	/** Tier 2 ML score (0.0 = safe, 1.0 = injection), undefined if Tier 2 not enabled */
 	tier2Score?: number;
+	/** Reason Tier 2 was skipped (e.g. "No strings extracted") when tier2Score is undefined */
+	tier2SkipReason?: string;
 	/** The sentence with the highest Tier 2 score */
 	maxSentence?: string;
 	/** Total processing time in milliseconds */
@@ -240,6 +242,7 @@ export class PromptDefense {
 
 		// Tier 2: sentence-level ML classification on raw (unsanitized) value
 		let tier2Score: number | undefined;
+		let tier2SkipReason: string | undefined;
 		let maxSentence: string | undefined;
 		let tier2Risk: RiskLevel = "low";
 
@@ -253,7 +256,13 @@ export class PromptDefense {
 					tier2Score = tier2Result.score;
 					tier2Risk = this.tier2Classifier.getRiskLevel(tier2Result.score);
 					maxSentence = tier2Result.maxSentence;
+				} else {
+					tier2SkipReason = tier2Result.skipReason;
 				}
+			} else {
+				tier2SkipReason = this.tier2Fields?.length
+					? "No strings found in tier2Fields"
+					: "No strings extracted from tool result";
 			}
 		}
 
@@ -285,6 +294,7 @@ export class PromptDefense {
 			fieldsSanitized,
 			patternsByField: patternsRemovedByField,
 			tier2Score,
+			tier2SkipReason,
 			maxSentence,
 			latencyMs: performance.now() - startTime,
 		};

From a42f23d3e679d1873af34c5598de21f64b9edb1e Mon Sep 17 00:00:00 2001
From: Hisku <hiskias@stackone.com>
Date: Wed, 11 Mar 2026 18:04:53 +0000
Subject: [PATCH 7/8] fix: handle plain strings in extractStrings with
 tier2Fields set

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/core/prompt-defense.ts | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/core/prompt-defense.ts b/src/core/prompt-defense.ts
index dff77b2..a3205f3 100644
--- a/src/core/prompt-defense.ts
+++ b/src/core/prompt-defense.ts
@@ -82,6 +82,9 @@ function extractStrings(obj: unknown, fields?: string[]): string[] {
 					traverse(v);
 				}
 			}
+		} else if (typeof value === "string") {
+			// Plain string — no field keys to filter on, fall back to collecting it
+			strings.push(value);
 		}
 	}
 

From 82c9e8160da744af7574b8f0e61cf0ceb197f8a0 Mon Sep 17 00:00:00 2001
From: Hisku <hiskias@stackone.com>
Date: Wed, 11 Mar 2026 18:13:15 +0000
Subject: [PATCH 8/8] fix: drop unused enabled from PromptDefenseConfig.tier2

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/config.ts | 1 -
 src/types.ts  | 2 --
 2 files changed, 3 deletions(-)

diff --git a/src/config.ts b/src/config.ts
index 955b8f2..043cd8f 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -169,7 +169,6 @@ export const DEFAULT_CUMULATIVE_RISK_THRESHOLDS = {
  * Default Tier 2 configuration
  */
 export const DEFAULT_TIER2_CONFIG = {
-	enabled: true,
 	mode: "onnx" as const,
 	highRiskThreshold: 0.8,
 	mediumRiskThreshold: 0.5,
diff --git a/src/types.ts b/src/types.ts
index 45f194e..3b7f665 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -307,8 +307,6 @@ export interface PromptDefenseConfig {
 	};
 	/** Tier 2 configuration */
 	tier2: {
-		/** Whether Tier 2 is enabled */
-		enabled: boolean;
 		/** Inference mode: 'onnx' for fine-tuned MiniLM, 'mlp' for frozen embeddings + MLP head */
 		mode?: "mlp" | "onnx";
 		/** Score threshold for high risk */