Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions JS/edgechains/arakoodev/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
"dependencies": {
"@babel/core": "^7.24.4",
"@babel/preset-env": "^7.24.4",
"@aws-sdk/client-comprehend": "^3.700.0",
"@hono/node-server": "^0.6.0",
"@lifeomic/attempt": "^3.1.0",
"@playwright/test": "^1.45.3",
Expand Down
6 changes: 6 additions & 0 deletions JS/edgechains/arakoodev/src/ai/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,9 @@ export { GeminiAI } from "./lib/gemini/gemini.js";
export { LlamaAI } from "./lib/llama/llama.js";
export { RetellAI } from "./lib/retell-ai/retell.js";
export { RetellWebClient } from "./lib/retell-ai/retellWebClient.js";
export { AwsComprehendRedactor } from "./lib/aws-comprehend/awsComprehendRedactor.js";
export type {
AwsComprehendRedactOptions,
AwsComprehendRedactorOptions,
NormalizedPiiEntity,
} from "./lib/aws-comprehend/awsComprehendRedactor.js";
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
import {
ComprehendClient,
DetectPiiEntitiesCommand,
type ComprehendClientConfig,
type LanguageCode,
type PiiEntity,
} from "@aws-sdk/client-comprehend";

type ComprehendClientLike = {
send(command: DetectPiiEntitiesCommand): Promise<{ Entities?: PiiEntity[] }>;
};

type RedactionTokenFactory = (entity: NormalizedPiiEntity) => string;

export interface AwsComprehendRedactorOptions {
client?: ComprehendClientLike;
region?: string;
credentials?: ComprehendClientConfig["credentials"];
languageCode?: LanguageCode | string;
confidenceThreshold?: number;
piiEntityTypes?: string[];
redactionToken?: string | RedactionTokenFactory;
}

export interface AwsComprehendRedactOptions {
languageCode?: LanguageCode | string;
confidenceThreshold?: number;
piiEntityTypes?: string[];
redactionToken?: string | RedactionTokenFactory;
}

export interface NormalizedPiiEntity {
beginOffset: number;
endOffset: number;
type: string;
score: number;
}

type ChatLikeOptions = {
prompt?: string;
messages?: Array<{ content?: string; [key: string]: unknown }>;
[key: string]: unknown;
};

type ChatEndpoint<TOptions extends ChatLikeOptions, TResult> = {
chat(options: TOptions): TResult | Promise<TResult>;
};

const DEFAULT_LANGUAGE_CODE = "en";
const DEFAULT_REGION = "us-east-1";
const DEFAULT_REDACTION_TOKEN = "[REDACTED]";

export class AwsComprehendRedactor {
private readonly client: ComprehendClientLike;
private readonly defaults: Required<
Pick<AwsComprehendRedactorOptions, "confidenceThreshold" | "languageCode">
> &
Pick<AwsComprehendRedactorOptions, "piiEntityTypes" | "redactionToken">;

constructor(options: AwsComprehendRedactorOptions = {}) {
this.client =
options.client ||
new ComprehendClient({
region: options.region || process.env.AWS_REGION || DEFAULT_REGION,
credentials: options.credentials,
});
this.defaults = {
languageCode: options.languageCode || DEFAULT_LANGUAGE_CODE,
confidenceThreshold: options.confidenceThreshold ?? 0,
piiEntityTypes: options.piiEntityTypes,
redactionToken: options.redactionToken,
};
}

async detectPiiEntities(
text: string,
options: AwsComprehendRedactOptions = {}
): Promise<NormalizedPiiEntity[]> {
if (!text) return [];

const response = await this.client.send(
new DetectPiiEntitiesCommand({
Text: text,
LanguageCode: this.getLanguageCode(options),
})
);

return (response.Entities || [])
.map((entity) => this.normalizeEntity(entity))
.filter((entity): entity is NormalizedPiiEntity => Boolean(entity))
.filter((entity) => this.shouldRedactEntity(entity, options));
}

async redactText(text: string, options: AwsComprehendRedactOptions = {}): Promise<string> {
const entities = await this.detectPiiEntities(text, options);
return this.applyRedactions(text, entities, options);
}

async redactPrompt(prompt: string, options: AwsComprehendRedactOptions = {}): Promise<string> {
return this.redactText(prompt, options);
}

async redactMessages<TMessage extends { content?: string }>(
messages: TMessage[],
options: AwsComprehendRedactOptions = {}
): Promise<TMessage[]> {
return Promise.all(
messages.map(async (message) => {
if (typeof message.content !== "string") return message;

return {
...message,
content: await this.redactText(message.content, options),
};
})
);
}

async redactChatOptions<TOptions extends ChatLikeOptions>(
chatOptions: TOptions,
options: AwsComprehendRedactOptions = {}
): Promise<TOptions> {
const redactedOptions = { ...chatOptions };

if (typeof redactedOptions.prompt === "string") {
redactedOptions.prompt = await this.redactText(redactedOptions.prompt, options);
}

if (Array.isArray(redactedOptions.messages)) {
redactedOptions.messages = await this.redactMessages(redactedOptions.messages, options);
}

return redactedOptions;
}

async redactAndCall<TOptions extends ChatLikeOptions, TResult>(
endpoint: ChatEndpoint<TOptions, TResult>,
chatOptions: TOptions,
options: AwsComprehendRedactOptions = {}
): Promise<TResult> {
const redactedOptions = await this.redactChatOptions(chatOptions, options);
return endpoint.chat(redactedOptions);
}

applyRedactions(
text: string,
entities: NormalizedPiiEntity[],
options: AwsComprehendRedactOptions = {}
): string {
const ranges = this.mergeRanges(
entities
.filter((entity) => this.shouldRedactEntity(entity, options))
.filter((entity) => this.isUsableRange(text, entity))
);

return ranges.reduceRight((result, entity) => {
const token = this.getRedactionToken(entity, options);
return `${result.slice(0, entity.beginOffset)}${token}${result.slice(
entity.endOffset
)}`;
}, text);
}

private normalizeEntity(entity: PiiEntity): NormalizedPiiEntity | null {
if (
typeof entity.BeginOffset !== "number" ||
typeof entity.EndOffset !== "number" ||
entity.BeginOffset >= entity.EndOffset
) {
return null;
}

return {
beginOffset: entity.BeginOffset,
endOffset: entity.EndOffset,
type: entity.Type || "PII",
score: entity.Score ?? 0,
};
}

private shouldRedactEntity(
entity: NormalizedPiiEntity,
options: AwsComprehendRedactOptions
): boolean {
const confidenceThreshold =
options.confidenceThreshold ?? this.defaults.confidenceThreshold;
const piiEntityTypes = options.piiEntityTypes || this.defaults.piiEntityTypes;

if (entity.score < confidenceThreshold) return false;
if (!piiEntityTypes?.length) return true;

return piiEntityTypes.includes(entity.type);
}

private getLanguageCode(options: AwsComprehendRedactOptions): LanguageCode {
return (options.languageCode || this.defaults.languageCode) as LanguageCode;
}

private getRedactionToken(
entity: NormalizedPiiEntity,
options: AwsComprehendRedactOptions
): string {
const token = options.redactionToken ?? this.defaults.redactionToken;

if (typeof token === "function") return token(entity);
return token || DEFAULT_REDACTION_TOKEN;
}

private isUsableRange(text: string, entity: NormalizedPiiEntity): boolean {
return entity.beginOffset >= 0 && entity.endOffset <= text.length;
}

private mergeRanges(entities: NormalizedPiiEntity[]): NormalizedPiiEntity[] {
const sorted = [...entities].sort(
(a, b) => a.beginOffset - b.beginOffset || b.endOffset - a.endOffset
);
const merged: NormalizedPiiEntity[] = [];

for (const entity of sorted) {
const previous = merged[merged.length - 1];

if (!previous || entity.beginOffset >= previous.endOffset) {
merged.push({ ...entity });
continue;
}

previous.endOffset = Math.max(previous.endOffset, entity.endOffset);
previous.score = Math.max(previous.score, entity.score);
}

return merged;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import { describe, expect, test, vi } from "vitest";
import { AwsComprehendRedactor } from "../lib/aws-comprehend/awsComprehendRedactor.js";

const buildEntity = (text: string, value: string, type = "EMAIL") => {
const beginOffset = text.indexOf(value);
return {
BeginOffset: beginOffset,
EndOffset: beginOffset + value.length,
Score: 0.99,
Type: type,
};
};

describe("AwsComprehendRedactor", () => {
test("redacts detected PII in text", async () => {
const text = "Email jane@example.com before launch.";
const client = {
send: vi.fn().mockResolvedValue({
Entities: [buildEntity(text, "jane@example.com")],
}),
};
const redactor = new AwsComprehendRedactor({ client });

await expect(redactor.redactText(text)).resolves.toBe(
"Email [REDACTED] before launch."
);
expect(client.send).toHaveBeenCalledTimes(1);
expect(client.send.mock.calls[0][0].input).toMatchObject({
Text: text,
LanguageCode: "en",
});
});

test("redacts prompts and message content before calling an endpoint", async () => {
const prompt = "My phone is 555-0100.";
const client = {
send: vi.fn().mockResolvedValue({
Entities: [buildEntity(prompt, "555-0100", "PHONE")],
}),
};
const endpoint = {
chat: vi.fn().mockResolvedValue("ok"),
};
const redactor = new AwsComprehendRedactor({
client,
redactionToken: (entity) => `[${entity.type}]`,
});

const result = await redactor.redactAndCall(endpoint, { prompt });

expect(result).toBe("ok");
expect(endpoint.chat).toHaveBeenCalledWith({ prompt: "My phone is [PHONE]." });
});

test("honors confidence and entity type filters", () => {
const text = "Email jane@example.com and call 555-0100.";
const email = {
beginOffset: text.indexOf("jane@example.com"),
endOffset: text.indexOf("jane@example.com") + "jane@example.com".length,
score: 0.99,
type: "EMAIL",
};
const phone = {
beginOffset: text.indexOf("555-0100"),
endOffset: text.indexOf("555-0100") + "555-0100".length,
score: 0.4,
type: "PHONE",
};
const redactor = new AwsComprehendRedactor({
client: { send: vi.fn() },
confidenceThreshold: 0.9,
piiEntityTypes: ["EMAIL"],
});

expect(redactor.applyRedactions(text, [email, phone])).toBe(
"Email [REDACTED] and call 555-0100."
);
});
});
12 changes: 12 additions & 0 deletions JS/edgechains/examples/aws-comprehend-redaction/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"name": "aws-comprehend-redaction",
"version": "0.0.1",
"type": "module",
"scripts": {
"start": "tsx src/index.ts"
},
"dependencies": {
"@arakoodev/edgechains.js": "file:../../arakoodev",
"tsx": "^4.19.2"
}
}
24 changes: 24 additions & 0 deletions JS/edgechains/examples/aws-comprehend-redaction/src/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import { AwsComprehendRedactor, OpenAI } from "@arakoodev/edgechains.js/ai";

const redactor = new AwsComprehendRedactor({
region: process.env.AWS_REGION || "us-east-1",
confidenceThreshold: 0.9,
redactionToken: (entity) => `[${entity.type}]`,
});

const openAI = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
orgId: process.env.OPENAI_ORG_ID,
});

const prompt = "Summarize this support note: Jane Doe can be reached at jane@example.com.";
const redactedPrompt = await redactor.redactPrompt(prompt);

console.log("Redacted prompt:", redactedPrompt);

const response = await redactor.redactAndCall(openAI, {
prompt,
max_tokens: 100,
});

console.log(response);
Loading