From 66334499f63bd0fe294769dc30efc8b165598b3c Mon Sep 17 00:00:00 2001 From: xiaye624 Date: Tue, 12 May 2026 22:59:51 +0800 Subject: [PATCH 1/2] Add AWS Comprehend PII redaction helper --- JS/edgechains/arakoodev/README.md | 19 ++ JS/edgechains/arakoodev/package.json | 1 + JS/edgechains/arakoodev/src/ai/src/index.ts | 7 + .../src/lib/comprehend/comprehendRedactor.ts | 245 ++++++++++++++++++ .../comprehend/comprehendRedactor.test.ts | 87 +++++++ .../aws-comprehend-redaction/README.md | 23 ++ .../aws-comprehend-redaction/package.json | 13 + .../aws-comprehend-redaction/src/index.ts | 26 ++ .../aws-comprehend-redaction/tsconfig.json | 10 + 9 files changed, 431 insertions(+) create mode 100644 JS/edgechains/arakoodev/src/ai/src/lib/comprehend/comprehendRedactor.ts create mode 100644 JS/edgechains/arakoodev/src/ai/src/testcases/comprehend/comprehendRedactor.test.ts create mode 100644 JS/edgechains/examples/aws-comprehend-redaction/README.md create mode 100644 JS/edgechains/examples/aws-comprehend-redaction/package.json create mode 100644 JS/edgechains/examples/aws-comprehend-redaction/src/index.ts create mode 100644 JS/edgechains/examples/aws-comprehend-redaction/tsconfig.json diff --git a/JS/edgechains/arakoodev/README.md b/JS/edgechains/arakoodev/README.md index 81237e662..17e466ad0 100644 --- a/JS/edgechains/arakoodev/README.md +++ b/JS/edgechains/arakoodev/README.md @@ -3,3 +3,22 @@ Installation ``` npm install arakoodev ``` + +## AWS Comprehend PII redaction + +`ComprehendRedactor` can redact PII before text is passed to an LLM endpoint. + +```ts +import { ComprehendRedactor, OpenAI } from "@arakoodev/edgechains.js/ai"; + +const redactor = new ComprehendRedactor(); +const openAI = new OpenAI({}); +const safeOpenAI = redactor.wrapChat(openAI); + +const response = await safeOpenAI.chat({ + prompt: "Summarize Jane Doe's account notes. Email: jane@example.com", +}); +``` + +Credentials are read from `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, optional +`AWS_SESSION_TOKEN`, and `AWS_REGION` or `AWS_DEFAULT_REGION`. diff --git a/JS/edgechains/arakoodev/package.json b/JS/edgechains/arakoodev/package.json index 0b0bd3784..a4a8c1588 100644 --- a/JS/edgechains/arakoodev/package.json +++ b/JS/edgechains/arakoodev/package.json @@ -22,6 +22,7 @@ "test": "vitest" }, "dependencies": { + "@aws-sdk/client-comprehend": "^3.1045.0", "@babel/core": "^7.24.4", "@babel/preset-env": "^7.24.4", "@hono/node-server": "^0.6.0", diff --git a/JS/edgechains/arakoodev/src/ai/src/index.ts b/JS/edgechains/arakoodev/src/ai/src/index.ts index 2c98f37dc..f0ee2a89c 100644 --- a/JS/edgechains/arakoodev/src/ai/src/index.ts +++ b/JS/edgechains/arakoodev/src/ai/src/index.ts @@ -3,3 +3,10 @@ export { GeminiAI } from "./lib/gemini/gemini.js"; export { LlamaAI } from "./lib/llama/llama.js"; export { RetellAI } from "./lib/retell-ai/retell.js"; export { RetellWebClient } from "./lib/retell-ai/retellWebClient.js"; +export { + ComprehendRedactor, + type ComprehendDetectionOptions, + type ComprehendRedactionResult, + type ComprehendRedactOptions, + type ComprehendRedactorOptions, +} from "./lib/comprehend/comprehendRedactor.js"; diff --git a/JS/edgechains/arakoodev/src/ai/src/lib/comprehend/comprehendRedactor.ts b/JS/edgechains/arakoodev/src/ai/src/lib/comprehend/comprehendRedactor.ts new file mode 100644 index 000000000..e4a56c05f --- /dev/null +++ b/JS/edgechains/arakoodev/src/ai/src/lib/comprehend/comprehendRedactor.ts @@ -0,0 +1,245 @@ +import { + ComprehendClient, + DetectPiiEntitiesCommand, + type DetectPiiEntitiesCommandOutput, + type LanguageCode, + type PiiEntity, +} from "@aws-sdk/client-comprehend"; + +type ComprehendClientLike = { + send( + command: DetectPiiEntitiesCommand, + ): Promise; +}; + +type PromptMessage = { + content?: string; + [key: string]: unknown; +}; + +type PromptLike = { + prompt?: string; + messages?: PromptMessage[]; + [key: string]: unknown; +}; + +type RedactionReplacement = + | string + | ((entity: PiiEntity, value: string) => string); + +export interface ComprehendRedactorOptions { + region?: string; + accessKeyId?: string; + secretAccessKey?: string; + sessionToken?: string; + client?: ComprehendClientLike; + replacement?: RedactionReplacement; +} + +export interface ComprehendDetectionOptions { + text: string; + languageCode?: LanguageCode; +} + +export interface ComprehendRedactOptions extends ComprehendDetectionOptions { + replacement?: RedactionReplacement; +} + +export interface ComprehendRedactionResult { + originalText: string; + redactedText: string; + entities: PiiEntity[]; +} + +export class ComprehendRedactor { + private client: ComprehendClientLike; + private replacement: RedactionReplacement; + + constructor(options: ComprehendRedactorOptions = {}) { + this.client = options.client || this.createClient(options); + this.replacement = + options.replacement || ((entity) => `[${entity.Type || "PII"}]`); + } + + async detectPiiEntities( + options: string | ComprehendDetectionOptions, + ): Promise { + const { text, languageCode } = this.normalizeDetectionOptions(options); + + if (!text.trim()) { + return []; + } + + const response = await this.client.send( + new DetectPiiEntitiesCommand({ + Text: text, + LanguageCode: languageCode, + }), + ); + + return response.Entities || []; + } + + async redact( + options: string | ComprehendRedactOptions, + ): Promise { + const normalizedOptions = this.normalizeRedactOptions(options); + const entities = await this.detectPiiEntities(normalizedOptions); + + return { + originalText: normalizedOptions.text, + redactedText: this.applyRedactions( + normalizedOptions.text, + entities, + normalizedOptions.replacement || this.replacement, + ), + entities, + }; + } + + async redactText( + text: string, + options: Omit = {}, + ): Promise { + const result = await this.redact({ ...options, text }); + return result.redactedText; + } + + async redactPromptOptions( + chatOptions: T, + options: Omit = {}, + ): Promise { + const redactedOptions = { ...chatOptions }; + + if (typeof redactedOptions.prompt === "string") { + redactedOptions.prompt = await this.redactText( + redactedOptions.prompt, + options, + ); + } + + if (Array.isArray(redactedOptions.messages)) { + redactedOptions.messages = await Promise.all( + redactedOptions.messages.map(async (message) => { + if (typeof message.content !== "string") { + return message; + } + + return { + ...message, + content: await this.redactText(message.content, options), + }; + }), + ); + } + + return redactedOptions as T; + } + + wrapChat( + endpoint: { chat(options: TOptions): Promise | TResult }, + options: Omit = {}, + ): { chat(options: TOptions): Promise } { + return { + chat: async (chatOptions: TOptions) => { + const redactedOptions = await this.redactPromptOptions( + chatOptions, + options, + ); + return endpoint.chat(redactedOptions); + }, + }; + } + + private createClient(options: ComprehendRedactorOptions): ComprehendClient { + const accessKeyId = options.accessKeyId || process.env.AWS_ACCESS_KEY_ID; + const secretAccessKey = + options.secretAccessKey || process.env.AWS_SECRET_ACCESS_KEY; + const sessionToken = options.sessionToken || process.env.AWS_SESSION_TOKEN; + + return new ComprehendClient({ + region: + options.region || + process.env.AWS_REGION || + process.env.AWS_DEFAULT_REGION || + "us-east-1", + credentials: + accessKeyId && secretAccessKey + ? { + accessKeyId, + secretAccessKey, + sessionToken, + } + : undefined, + }); + } + + private normalizeDetectionOptions( + options: string | ComprehendDetectionOptions, + ): Required { + if (typeof options === "string") { + return { text: options, languageCode: "en" }; + } + + return { + text: options.text, + languageCode: options.languageCode || "en", + }; + } + + private normalizeRedactOptions( + options: string | ComprehendRedactOptions, + ): Required { + if (typeof options === "string") { + return { + text: options, + languageCode: "en", + replacement: this.replacement, + }; + } + + return { + text: options.text, + languageCode: options.languageCode || "en", + replacement: options.replacement || this.replacement, + }; + } + + private applyRedactions( + text: string, + entities: PiiEntity[], + replacement: RedactionReplacement, + ): string { + return entities + .filter((entity) => this.isEntityWithOffsets(entity, text)) + .sort((a, b) => (b.BeginOffset || 0) - (a.BeginOffset || 0)) + .reduce((redactedText, entity) => { + const begin = entity.BeginOffset || 0; + const end = entity.EndOffset || begin; + const value = redactedText.slice(begin, end); + const redactedValue = + typeof replacement === "function" + ? replacement(entity, value) + : replacement; + + return ( + redactedText.slice(0, begin) + redactedValue + redactedText.slice(end) + ); + }, text); + } + + private isEntityWithOffsets(entity: PiiEntity, text: string): boolean { + if ( + typeof entity.BeginOffset !== "number" || + typeof entity.EndOffset !== "number" + ) { + return false; + } + + return ( + entity.BeginOffset >= 0 && + entity.EndOffset > entity.BeginOffset && + entity.EndOffset <= text.length + ); + } +} diff --git a/JS/edgechains/arakoodev/src/ai/src/testcases/comprehend/comprehendRedactor.test.ts b/JS/edgechains/arakoodev/src/ai/src/testcases/comprehend/comprehendRedactor.test.ts new file mode 100644 index 000000000..775698521 --- /dev/null +++ b/JS/edgechains/arakoodev/src/ai/src/testcases/comprehend/comprehendRedactor.test.ts @@ -0,0 +1,87 @@ +import { describe, expect, test } from "vitest"; +import { ComprehendRedactor } from "../../lib/comprehend/comprehendRedactor.js"; + +class FakeComprehendClient { + commands: any[] = []; + + constructor(private entities: any[]) {} + + async send(command: any): Promise { + this.commands.push(command); + return { Entities: this.entities }; + } +} + +describe("ComprehendRedactor", () => { + test("detects PII entities with AWS Comprehend", async () => { + const client = new FakeComprehendClient([ + { Type: "EMAIL", BeginOffset: 14, EndOffset: 30, Score: 0.99 }, + ]); + const redactor = new ComprehendRedactor({ client }); + + const entities = await redactor.detectPiiEntities( + "Jane Doe uses jane@example.com", + ); + + expect(entities).toEqual([ + { Type: "EMAIL", BeginOffset: 14, EndOffset: 30, Score: 0.99 }, + ]); + expect(client.commands).toHaveLength(1); + }); + + test("redacts detected PII entities with entity labels", async () => { + const client = new FakeComprehendClient([ + { Type: "NAME", BeginOffset: 0, EndOffset: 8, Score: 0.99 }, + { Type: "EMAIL", BeginOffset: 14, EndOffset: 30, Score: 0.99 }, + ]); + const redactor = new ComprehendRedactor({ client }); + + const result = await redactor.redact("Jane Doe uses jane@example.com"); + + expect(result.redactedText).toBe("[NAME] uses [EMAIL]"); + }); + + test("supports custom replacement strings", async () => { + const client = new FakeComprehendClient([ + { Type: "PHONE", BeginOffset: 8, EndOffset: 20, Score: 0.99 }, + ]); + const redactor = new ComprehendRedactor({ + client, + replacement: "[REDACTED]", + }); + + const redactedText = await redactor.redactText("Call me 555-123-4567"); + + expect(redactedText).toBe("Call me [REDACTED]"); + }); + + test("redacts prompt and message options before they are sent to an endpoint", async () => { + const client = new FakeComprehendClient([ + { Type: "NAME", BeginOffset: 0, EndOffset: 8, Score: 0.99 }, + ]); + const redactor = new ComprehendRedactor({ client }); + + const options = await redactor.redactPromptOptions({ + prompt: "Jane Doe needs help", + messages: [{ role: "user", content: "Jane Doe needs help" }], + }); + + expect(options.prompt).toBe("[NAME] needs help"); + expect(options.messages?.[0].content).toBe("[NAME] needs help"); + }); + + test("wraps chat endpoints so redaction can be chained with LLM calls", async () => { + const client = new FakeComprehendClient([ + { Type: "NAME", BeginOffset: 0, EndOffset: 8, Score: 0.99 }, + ]); + const redactor = new ComprehendRedactor({ client }); + const endpoint = { + chat: async (options: { prompt: string }) => options.prompt, + }; + + const safeEndpoint = redactor.wrapChat(endpoint); + const response = await safeEndpoint.chat({ prompt: "Jane Doe needs help" }); + + expect(response).toBe("[NAME] needs help"); + }); +}); diff --git a/JS/edgechains/examples/aws-comprehend-redaction/README.md b/JS/edgechains/examples/aws-comprehend-redaction/README.md new file mode 100644 index 000000000..a74651196 --- /dev/null +++ b/JS/edgechains/examples/aws-comprehend-redaction/README.md @@ -0,0 +1,23 @@ +# AWS Comprehend Redaction Example + +This example redacts personally identifiable information with Amazon Comprehend before sending text to an LLM endpoint. + +## Environment + +Set AWS credentials with the standard environment variables: + +```bash +AWS_REGION=us-east-1 +AWS_ACCESS_KEY_ID=... +AWS_SECRET_ACCESS_KEY=... +AWS_SESSION_TOKEN=... +``` + +`AWS_SESSION_TOKEN` is optional. `AWS_DEFAULT_REGION` is also supported when `AWS_REGION` is not set. + +## Run + +```bash +npm install +npm start +``` diff --git a/JS/edgechains/examples/aws-comprehend-redaction/package.json b/JS/edgechains/examples/aws-comprehend-redaction/package.json new file mode 100644 index 000000000..151688d64 --- /dev/null +++ b/JS/edgechains/examples/aws-comprehend-redaction/package.json @@ -0,0 +1,13 @@ +{ + "name": "aws-comprehend-redaction-example", + "private": true, + "type": "module", + "scripts": { + "start": "ts-node src/index.ts" + }, + "dependencies": { + "@arakoodev/edgechains.js": "file:../../arakoodev", + "ts-node": "^10.9.2", + "typescript": "^5.6.3" + } +} diff --git a/JS/edgechains/examples/aws-comprehend-redaction/src/index.ts b/JS/edgechains/examples/aws-comprehend-redaction/src/index.ts new file mode 100644 index 000000000..3f0d4d348 --- /dev/null +++ b/JS/edgechains/examples/aws-comprehend-redaction/src/index.ts @@ -0,0 +1,26 @@ +import { ComprehendRedactor, OpenAI } from "@arakoodev/edgechains.js/ai"; + +async function main() { + const redactor = new ComprehendRedactor(); + const prompt = + "Summarize this customer note: Jane Doe can be reached at jane@example.com."; + const redactedPrompt = await redactor.redactText(prompt); + + console.log("Redacted prompt:", redactedPrompt); + + if (!process.env.OPENAI_API_KEY) { + console.log("Skipping OpenAI call because OPENAI_API_KEY is not set."); + return; + } + + const openAI = new OpenAI({}); + const safeOpenAI = redactor.wrapChat(openAI); + const response = await safeOpenAI.chat({ prompt }); + + console.log(response); +} + +main().catch((error) => { + console.error(error); + process.exit(1); +}); diff --git a/JS/edgechains/examples/aws-comprehend-redaction/tsconfig.json b/JS/edgechains/examples/aws-comprehend-redaction/tsconfig.json new file mode 100644 index 000000000..5d96ed084 --- /dev/null +++ b/JS/edgechains/examples/aws-comprehend-redaction/tsconfig.json @@ -0,0 +1,10 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "NodeNext", + "moduleResolution": "NodeNext", + "esModuleInterop": true, + "strict": true, + "skipLibCheck": true + } +} From 4f9289036dbba0970726e0b45ee9b0e7d89969f0 Mon Sep 17 00:00:00 2001 From: mulan Date: Tue, 12 May 2026 23:47:42 +0800 Subject: [PATCH 2/2] Add observable-style Comprehend redaction helpers --- JS/edgechains/arakoodev/README.md | 3 + JS/edgechains/arakoodev/src/ai/src/index.ts | 11 +-- .../src/lib/comprehend/comprehendRedactor.ts | 76 +++++++++++++++++++ .../comprehend/comprehendRedactor.test.ts | 37 +++++++++ 4 files changed, 122 insertions(+), 5 deletions(-) diff --git a/JS/edgechains/arakoodev/README.md b/JS/edgechains/arakoodev/README.md index 17e466ad0..5c34298f4 100644 --- a/JS/edgechains/arakoodev/README.md +++ b/JS/edgechains/arakoodev/README.md @@ -20,5 +20,8 @@ const response = await safeOpenAI.chat({ }); ``` +For chainable flows, `redactObservable()` and `redactTextObservable()` expose a +small Observable-style interface with `subscribe()` and `pipe()`. + Credentials are read from `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, optional `AWS_SESSION_TOKEN`, and `AWS_REGION` or `AWS_DEFAULT_REGION`. diff --git a/JS/edgechains/arakoodev/src/ai/src/index.ts b/JS/edgechains/arakoodev/src/ai/src/index.ts index f0ee2a89c..99234514f 100644 --- a/JS/edgechains/arakoodev/src/ai/src/index.ts +++ b/JS/edgechains/arakoodev/src/ai/src/index.ts @@ -4,9 +4,10 @@ export { LlamaAI } from "./lib/llama/llama.js"; export { RetellAI } from "./lib/retell-ai/retell.js"; export { RetellWebClient } from "./lib/retell-ai/retellWebClient.js"; export { - ComprehendRedactor, - type ComprehendDetectionOptions, - type ComprehendRedactionResult, - type ComprehendRedactOptions, - type ComprehendRedactorOptions, + type ComprehendObservable, + ComprehendRedactor, + type ComprehendDetectionOptions, + type ComprehendRedactionResult, + type ComprehendRedactOptions, + type ComprehendRedactorOptions, } from "./lib/comprehend/comprehendRedactor.js"; diff --git a/JS/edgechains/arakoodev/src/ai/src/lib/comprehend/comprehendRedactor.ts b/JS/edgechains/arakoodev/src/ai/src/lib/comprehend/comprehendRedactor.ts index e4a56c05f..83ed09274 100644 --- a/JS/edgechains/arakoodev/src/ai/src/lib/comprehend/comprehendRedactor.ts +++ b/JS/edgechains/arakoodev/src/ai/src/lib/comprehend/comprehendRedactor.ts @@ -27,6 +27,25 @@ type RedactionReplacement = | string | ((entity: PiiEntity, value: string) => string); +type Observer = { + next?: (value: T) => void; + error?: (error: unknown) => void; + complete?: () => void; +}; + +type Teardown = { + unsubscribe(): void; +}; + +export type ComprehendObservable = { + subscribe( + observerOrNext: Observer | ((value: T) => void), + error?: (error: unknown) => void, + complete?: () => void, + ): Teardown; + pipe(operator: (source: ComprehendObservable) => TNext): TNext; +}; + export interface ComprehendRedactorOptions { region?: string; accessKeyId?: string; @@ -105,6 +124,19 @@ export class ComprehendRedactor { return result.redactedText; } + redactObservable( + options: string | ComprehendRedactOptions, + ): ComprehendObservable { + return this.createObservable(() => this.redact(options)); + } + + redactTextObservable( + text: string, + options: Omit = {}, + ): ComprehendObservable { + return this.createObservable(() => this.redactText(text, options)); + } + async redactPromptOptions( chatOptions: T, options: Omit = {}, @@ -151,6 +183,50 @@ export class ComprehendRedactor { }; } + private createObservable( + producer: () => Promise, + ): ComprehendObservable { + const observable: ComprehendObservable = { + subscribe: ( + observerOrNext: Observer | ((value: T) => void), + error?: (error: unknown) => void, + complete?: () => void, + ) => { + let isSubscribed = true; + const observer = + typeof observerOrNext === "function" + ? { next: observerOrNext, error, complete } + : observerOrNext; + + producer() + .then((value) => { + if (!isSubscribed) { + return; + } + + observer.next?.(value); + observer.complete?.(); + }) + .catch((caughtError) => { + if (!isSubscribed) { + return; + } + + observer.error?.(caughtError); + }); + + return { + unsubscribe: () => { + isSubscribed = false; + }, + }; + }, + pipe: (operator) => operator(observable), + }; + + return observable; + } + private createClient(options: ComprehendRedactorOptions): ComprehendClient { const accessKeyId = options.accessKeyId || process.env.AWS_ACCESS_KEY_ID; const secretAccessKey = diff --git a/JS/edgechains/arakoodev/src/ai/src/testcases/comprehend/comprehendRedactor.test.ts b/JS/edgechains/arakoodev/src/ai/src/testcases/comprehend/comprehendRedactor.test.ts index 775698521..8f4fe901f 100644 --- a/JS/edgechains/arakoodev/src/ai/src/testcases/comprehend/comprehendRedactor.test.ts +++ b/JS/edgechains/arakoodev/src/ai/src/testcases/comprehend/comprehendRedactor.test.ts @@ -84,4 +84,41 @@ describe("ComprehendRedactor", () => { expect(response).toBe("[NAME] needs help"); }); + + test("exposes observable-style text redaction for chainable flows", async () => { + const client = new FakeComprehendClient([ + { Type: "NAME", BeginOffset: 0, EndOffset: 8, Score: 0.99 }, + ]); + const redactor = new ComprehendRedactor({ client }); + + const redactedText = await new Promise((resolve, reject) => { + redactor.redactTextObservable("Jane Doe needs help").subscribe({ + next: resolve, + error: reject, + }); + }); + + expect(redactedText).toBe("[NAME] needs help"); + }); + + test("supports observable pipe operators", async () => { + const client = new FakeComprehendClient([ + { Type: "NAME", BeginOffset: 0, EndOffset: 8, Score: 0.99 }, + ]); + const redactor = new ComprehendRedactor({ client }); + + const redactedText = await redactor + .redactObservable("Jane Doe needs help") + .pipe( + (source) => + new Promise((resolve, reject) => { + source.subscribe({ + next: (result) => resolve(result.redactedText), + error: reject, + }); + }), + ); + + expect(redactedText).toBe("[NAME] needs help"); + }); });