Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2,156 changes: 2,156 additions & 0 deletions JS/edgechains/arakoodev/bun.lock

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions JS/edgechains/arakoodev/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
"test": "vitest"
},
"dependencies": {
"@aws-sdk/client-comprehend": "^3.1045.0",
"@babel/core": "^7.24.4",
"@babel/preset-env": "^7.24.4",
"@hono/node-server": "^0.6.0",
Expand All @@ -48,6 +49,7 @@
"retell-client-js-sdk": "^2.0.4",
"retell-sdk": "^4.9.0",
"retry": "^0.13.1",
"rxjs": "^7.8.2",
"ts-node": "^10.9.2",
"typeorm": "^0.3.20",
"vitest": "^2.0.3",
Expand Down
7 changes: 7 additions & 0 deletions JS/edgechains/arakoodev/src/ai/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,10 @@ export { GeminiAI } from "./lib/gemini/gemini.js";
export { LlamaAI } from "./lib/llama/llama.js";
export { RetellAI } from "./lib/retell-ai/retell.js";
export { RetellWebClient } from "./lib/retell-ai/retellWebClient.js";
export { AwsComprehendRedactor } from "./lib/aws-comprehend/awsComprehendRedactor.js";
export type {
AwsComprehendRedactorOptions,
RedactablePrompt,
RedactedPiiEntity,
RedactedPromptResult,
} from "./lib/aws-comprehend/awsComprehendRedactor.js";
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
import {
ComprehendClient,
DetectPiiEntitiesCommand,
type ComprehendClientConfig,
type LanguageCode,
type PiiEntity,
type PiiEntityType,
} from "@aws-sdk/client-comprehend";
import { from, isObservable, mergeMap, Observable, of } from "rxjs";

type RedactionMode = "replace" | "mask";

export interface AwsComprehendRedactorOptions {
client?: Pick<ComprehendClient, "send">;
clientConfig?: ComprehendClientConfig;
languageCode?: LanguageCode;
minScore?: number;
replacementText?: string;
redactionMode?: RedactionMode;
entityTypes?: PiiEntityType[];
maskCharacter?: string;
}

export interface RedactablePrompt {
prompt: string;
}

export interface RedactedPiiEntity {
type: PiiEntityType;
score: number;
beginOffset: number;
endOffset: number;
text: string;
}

export interface RedactedPromptResult {
prompt: string;
redactedPrompt: string;
entities: RedactedPiiEntity[];
}

type RedactableInput = string | RedactablePrompt;

export class AwsComprehendRedactor {
private readonly client: Pick<ComprehendClient, "send">;
private readonly languageCode: LanguageCode;
private readonly minScore: number;
private readonly replacementText?: string;
private readonly redactionMode: RedactionMode;
private readonly entityTypes?: Set<PiiEntityType>;
private readonly maskCharacter: string;

constructor(options: AwsComprehendRedactorOptions = {}) {
this.client = options.client ?? new ComprehendClient(options.clientConfig ?? {});
this.languageCode = options.languageCode ?? "en";
this.minScore = options.minScore ?? 0;
this.replacementText = options.replacementText;
this.redactionMode = options.redactionMode ?? "replace";
this.entityTypes = options.entityTypes ? new Set(options.entityTypes) : undefined;
this.maskCharacter = options.maskCharacter ?? "*";
}

async redact(input: RedactableInput): Promise<RedactedPromptResult> {
const prompt = typeof input === "string" ? input : input.prompt;
const response = await this.client.send(
new DetectPiiEntitiesCommand({
Text: prompt,
LanguageCode: this.languageCode,
})
);
const entities = this.normalizeEntities(prompt, response.Entities ?? []);
const redactedPrompt = this.applyRedactions(prompt, entities);

return {
prompt,
redactedPrompt,
entities,
};
}

redactObservable(input: Observable<RedactableInput> | RedactableInput): Observable<RedactedPromptResult> {
const source = isObservable(input) ? input : of(input);
return source.pipe(mergeMap((prompt) => from(this.redact(prompt))));
}

private normalizeEntities(prompt: string, entities: PiiEntity[]): RedactedPiiEntity[] {
return entities
.filter((entity): entity is Required<Pick<PiiEntity, "Type" | "Score" | "BeginOffset" | "EndOffset">> => {
if (!entity.Type || entity.Score === undefined) return false;
if (entity.BeginOffset === undefined || entity.EndOffset === undefined) return false;
if (entity.Score < this.minScore) return false;
if (this.entityTypes && !this.entityTypes.has(entity.Type)) return false;
return entity.BeginOffset >= 0 && entity.EndOffset <= prompt.length;
})
.sort((a, b) => a.BeginOffset - b.BeginOffset)
.filter((entity, index, sorted) => {
const previous = sorted[index - 1];
return !previous || entity.BeginOffset >= previous.EndOffset;
})
.map((entity) => ({
type: entity.Type,
score: entity.Score,
beginOffset: entity.BeginOffset,
endOffset: entity.EndOffset,
text: prompt.slice(entity.BeginOffset, entity.EndOffset),
}));
}

private applyRedactions(prompt: string, entities: RedactedPiiEntity[]): string {
let redacted = prompt;
for (const entity of [...entities].sort((a, b) => b.beginOffset - a.beginOffset)) {
const replacement =
this.redactionMode === "mask"
? this.maskCharacter.repeat(entity.endOffset - entity.beginOffset)
: this.replacementText ?? `[${entity.type}]`;
redacted =
redacted.slice(0, entity.beginOffset) +
replacement +
redacted.slice(entity.endOffset);
}
return redacted;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import { describe, expect, test } from "bun:test";
import { firstValueFrom, of, toArray } from "rxjs";
import { AwsComprehendRedactor } from "../../lib/aws-comprehend/awsComprehendRedactor";

describe("AwsComprehendRedactor", () => {
test("redacts detected PII entities with typed placeholders", async () => {
const redactor = new AwsComprehendRedactor({
client: {
send: async () => ({
Entities: [
{
Type: "EMAIL",
Score: 0.99,
BeginOffset: 17,
EndOffset: 30,
},
{
Type: "PHONE",
Score: 0.97,
BeginOffset: 34,
EndOffset: 46,
},
],
}),
},
minScore: 0.9,
});

const result = await redactor.redact("Contact Alice at a@example.com or 555-010-2020.");

expect(result.redactedPrompt).toBe("Contact Alice at [EMAIL] or [PHONE].");
expect(result.entities).toEqual([
{
type: "EMAIL",
score: 0.99,
beginOffset: 17,
endOffset: 30,
text: "a@example.com",
},
{
type: "PHONE",
score: 0.97,
beginOffset: 34,
endOffset: 46,
text: "555-010-2020",
},
]);
});

test("filters by confidence and entity type", async () => {
const redactor = new AwsComprehendRedactor({
client: {
send: async () => ({
Entities: [
{ Type: "EMAIL", Score: 0.89, BeginOffset: 0, EndOffset: 13 },
{ Type: "NAME", Score: 0.99, BeginOffset: 18, EndOffset: 23 },
],
}),
},
entityTypes: ["NAME"],
minScore: 0.95,
});

const result = await redactor.redact("x@example.com for Alice");

expect(result.redactedPrompt).toBe("x@example.com for [NAME]");
expect(result.entities.map((entity) => entity.type)).toEqual(["NAME"]);
});

test("supports observable prompt chains", async () => {
const redactor = new AwsComprehendRedactor({
client: {
send: async () => ({
Entities: [{ Type: "NAME", Score: 0.98, BeginOffset: 3, EndOffset: 8 }],
}),
},
redactionMode: "mask",
});

const results = await firstValueFrom(
redactor.redactObservable(of("Hi Alice", { prompt: "Hi Alice" })).pipe(toArray())
);

expect(results.map((result) => result.redactedPrompt)).toEqual(["Hi *****", "Hi *****"]);
});
});
25 changes: 25 additions & 0 deletions JS/edgechains/examples/aws-comprehend-redaction/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# AWS Comprehend prompt redaction

This example builds a prompt from jsonnet, sends it through `AwsComprehendRedactor`,
and prints the redacted prompt plus the PII entities Amazon Comprehend detected.

## Run

```sh
export AWS_REGION=us-east-1
export AWS_ACCESS_KEY_ID=...
export AWS_SECRET_ACCESS_KEY=...

bun install
bun run start
```

The prompt content is defined in `jsonnet/main.jsonnet`. You can override the sample
PII without editing code:

```sh
CUSTOMER_NAME="Jane Doe" \
CUSTOMER_EMAIL="jane@example.com" \
CUSTOMER_PHONE="555-010-1212" \
bun run start
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
local promptTemplate = |||
Send the onboarding details to {name} at {email}. Call them at {phone} if the message bounces.
|||;

{
prompt: std.strReplace(
std.strReplace(
std.strReplace(promptTemplate, '{name}', std.extVar('customer_name')),
'{email}',
std.extVar('customer_email')
),
'{phone}',
std.extVar('customer_phone')
),
redaction: {
languageCode: 'en',
minScore: 0.8,
redactionMode: 'replace',
},
}
16 changes: 16 additions & 0 deletions JS/edgechains/examples/aws-comprehend-redaction/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"name": "aws-comprehend-redaction-example",
"version": "1.0.0",
"type": "module",
"scripts": {
"start": "ts-node src/index.ts"
},
"dependencies": {
"@arakoodev/edgechains.js": "file:../../arakoodev",
"@arakoodev/jsonnet": "^0.1.7",
"ts-node": "^10.9.2"
},
"devDependencies": {
"typescript": "^5.6.3"
}
}
24 changes: 24 additions & 0 deletions JS/edgechains/examples/aws-comprehend-redaction/src/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import { AwsComprehendRedactor } from "@arakoodev/edgechains.js/ai";
import Jsonnet from "@arakoodev/jsonnet";
import path from "path";
import { fileURLToPath } from "url";

const jsonnet = new Jsonnet();
const __dirname = path.dirname(fileURLToPath(import.meta.url));

jsonnet.extString("customer_name", process.env.CUSTOMER_NAME ?? "Alice Example");
jsonnet.extString("customer_email", process.env.CUSTOMER_EMAIL ?? "alice@example.com");
jsonnet.extString("customer_phone", process.env.CUSTOMER_PHONE ?? "555-010-2020");

const config = JSON.parse(jsonnet.evaluateFile(path.join(__dirname, "../jsonnet/main.jsonnet")));

const redactor = new AwsComprehendRedactor({
clientConfig: {
region: process.env.AWS_REGION ?? "us-east-1",
},
...config.redaction,
});

const result = await redactor.redact(config.prompt);

console.log(JSON.stringify(result, null, 2));
Loading