diff --git a/JS/edgechains/arakoodev/package.json b/JS/edgechains/arakoodev/package.json index 0b0bd3784..77d0d0c54 100644 --- a/JS/edgechains/arakoodev/package.json +++ b/JS/edgechains/arakoodev/package.json @@ -24,6 +24,7 @@ "dependencies": { "@babel/core": "^7.24.4", "@babel/preset-env": "^7.24.4", + "@aws-sdk/client-comprehend": "^3.700.0", "@hono/node-server": "^0.6.0", "@lifeomic/attempt": "^3.1.0", "@playwright/test": "^1.45.3", diff --git a/JS/edgechains/arakoodev/src/ai/src/index.ts b/JS/edgechains/arakoodev/src/ai/src/index.ts index 2c98f37dc..a765be6eb 100644 --- a/JS/edgechains/arakoodev/src/ai/src/index.ts +++ b/JS/edgechains/arakoodev/src/ai/src/index.ts @@ -3,3 +3,9 @@ export { GeminiAI } from "./lib/gemini/gemini.js"; export { LlamaAI } from "./lib/llama/llama.js"; export { RetellAI } from "./lib/retell-ai/retell.js"; export { RetellWebClient } from "./lib/retell-ai/retellWebClient.js"; +export { AwsComprehendRedactor } from "./lib/aws-comprehend/awsComprehendRedactor.js"; +export type { + AwsComprehendRedactOptions, + AwsComprehendRedactorOptions, + NormalizedPiiEntity, +} from "./lib/aws-comprehend/awsComprehendRedactor.js"; diff --git a/JS/edgechains/arakoodev/src/ai/src/lib/aws-comprehend/awsComprehendRedactor.ts b/JS/edgechains/arakoodev/src/ai/src/lib/aws-comprehend/awsComprehendRedactor.ts new file mode 100644 index 000000000..2faad4683 --- /dev/null +++ b/JS/edgechains/arakoodev/src/ai/src/lib/aws-comprehend/awsComprehendRedactor.ts @@ -0,0 +1,233 @@ +import { + ComprehendClient, + DetectPiiEntitiesCommand, + type ComprehendClientConfig, + type LanguageCode, + type PiiEntity, +} from "@aws-sdk/client-comprehend"; + +type ComprehendClientLike = { + send(command: DetectPiiEntitiesCommand): Promise<{ Entities?: PiiEntity[] }>; +}; + +type RedactionTokenFactory = (entity: NormalizedPiiEntity) => string; + +export interface AwsComprehendRedactorOptions { + client?: ComprehendClientLike; + region?: string; + credentials?: ComprehendClientConfig["credentials"]; + languageCode?: LanguageCode | string; + confidenceThreshold?: number; + piiEntityTypes?: string[]; + redactionToken?: string | RedactionTokenFactory; +} + +export interface AwsComprehendRedactOptions { + languageCode?: LanguageCode | string; + confidenceThreshold?: number; + piiEntityTypes?: string[]; + redactionToken?: string | RedactionTokenFactory; +} + +export interface NormalizedPiiEntity { + beginOffset: number; + endOffset: number; + type: string; + score: number; +} + +type ChatLikeOptions = { + prompt?: string; + messages?: Array<{ content?: string; [key: string]: unknown }>; + [key: string]: unknown; +}; + +type ChatEndpoint = { + chat(options: TOptions): TResult | Promise; +}; + +const DEFAULT_LANGUAGE_CODE = "en"; +const DEFAULT_REGION = "us-east-1"; +const DEFAULT_REDACTION_TOKEN = "[REDACTED]"; + +export class AwsComprehendRedactor { + private readonly client: ComprehendClientLike; + private readonly defaults: Required< + Pick + > & + Pick; + + constructor(options: AwsComprehendRedactorOptions = {}) { + this.client = + options.client || + new ComprehendClient({ + region: options.region || process.env.AWS_REGION || DEFAULT_REGION, + credentials: options.credentials, + }); + this.defaults = { + languageCode: options.languageCode || DEFAULT_LANGUAGE_CODE, + confidenceThreshold: options.confidenceThreshold ?? 0, + piiEntityTypes: options.piiEntityTypes, + redactionToken: options.redactionToken, + }; + } + + async detectPiiEntities( + text: string, + options: AwsComprehendRedactOptions = {} + ): Promise { + if (!text) return []; + + const response = await this.client.send( + new DetectPiiEntitiesCommand({ + Text: text, + LanguageCode: this.getLanguageCode(options), + }) + ); + + return (response.Entities || []) + .map((entity) => this.normalizeEntity(entity)) + .filter((entity): entity is NormalizedPiiEntity => Boolean(entity)) + .filter((entity) => this.shouldRedactEntity(entity, options)); + } + + async redactText(text: string, options: AwsComprehendRedactOptions = {}): Promise { + const entities = await this.detectPiiEntities(text, options); + return this.applyRedactions(text, entities, options); + } + + async redactPrompt(prompt: string, options: AwsComprehendRedactOptions = {}): Promise { + return this.redactText(prompt, options); + } + + async redactMessages( + messages: TMessage[], + options: AwsComprehendRedactOptions = {} + ): Promise { + return Promise.all( + messages.map(async (message) => { + if (typeof message.content !== "string") return message; + + return { + ...message, + content: await this.redactText(message.content, options), + }; + }) + ); + } + + async redactChatOptions( + chatOptions: TOptions, + options: AwsComprehendRedactOptions = {} + ): Promise { + const redactedOptions = { ...chatOptions }; + + if (typeof redactedOptions.prompt === "string") { + redactedOptions.prompt = await this.redactText(redactedOptions.prompt, options); + } + + if (Array.isArray(redactedOptions.messages)) { + redactedOptions.messages = await this.redactMessages(redactedOptions.messages, options); + } + + return redactedOptions; + } + + async redactAndCall( + endpoint: ChatEndpoint, + chatOptions: TOptions, + options: AwsComprehendRedactOptions = {} + ): Promise { + const redactedOptions = await this.redactChatOptions(chatOptions, options); + return endpoint.chat(redactedOptions); + } + + applyRedactions( + text: string, + entities: NormalizedPiiEntity[], + options: AwsComprehendRedactOptions = {} + ): string { + const ranges = this.mergeRanges( + entities + .filter((entity) => this.shouldRedactEntity(entity, options)) + .filter((entity) => this.isUsableRange(text, entity)) + ); + + return ranges.reduceRight((result, entity) => { + const token = this.getRedactionToken(entity, options); + return `${result.slice(0, entity.beginOffset)}${token}${result.slice( + entity.endOffset + )}`; + }, text); + } + + private normalizeEntity(entity: PiiEntity): NormalizedPiiEntity | null { + if ( + typeof entity.BeginOffset !== "number" || + typeof entity.EndOffset !== "number" || + entity.BeginOffset >= entity.EndOffset + ) { + return null; + } + + return { + beginOffset: entity.BeginOffset, + endOffset: entity.EndOffset, + type: entity.Type || "PII", + score: entity.Score ?? 0, + }; + } + + private shouldRedactEntity( + entity: NormalizedPiiEntity, + options: AwsComprehendRedactOptions + ): boolean { + const confidenceThreshold = + options.confidenceThreshold ?? this.defaults.confidenceThreshold; + const piiEntityTypes = options.piiEntityTypes || this.defaults.piiEntityTypes; + + if (entity.score < confidenceThreshold) return false; + if (!piiEntityTypes?.length) return true; + + return piiEntityTypes.includes(entity.type); + } + + private getLanguageCode(options: AwsComprehendRedactOptions): LanguageCode { + return (options.languageCode || this.defaults.languageCode) as LanguageCode; + } + + private getRedactionToken( + entity: NormalizedPiiEntity, + options: AwsComprehendRedactOptions + ): string { + const token = options.redactionToken ?? this.defaults.redactionToken; + + if (typeof token === "function") return token(entity); + return token || DEFAULT_REDACTION_TOKEN; + } + + private isUsableRange(text: string, entity: NormalizedPiiEntity): boolean { + return entity.beginOffset >= 0 && entity.endOffset <= text.length; + } + + private mergeRanges(entities: NormalizedPiiEntity[]): NormalizedPiiEntity[] { + const sorted = [...entities].sort( + (a, b) => a.beginOffset - b.beginOffset || b.endOffset - a.endOffset + ); + const merged: NormalizedPiiEntity[] = []; + + for (const entity of sorted) { + const previous = merged[merged.length - 1]; + + if (!previous || entity.beginOffset >= previous.endOffset) { + merged.push({ ...entity }); + continue; + } + + previous.endOffset = Math.max(previous.endOffset, entity.endOffset); + previous.score = Math.max(previous.score, entity.score); + } + + return merged; + } +} diff --git a/JS/edgechains/arakoodev/src/ai/src/tests/awsComprehendRedactor.test.ts b/JS/edgechains/arakoodev/src/ai/src/tests/awsComprehendRedactor.test.ts new file mode 100644 index 000000000..278ac6631 --- /dev/null +++ b/JS/edgechains/arakoodev/src/ai/src/tests/awsComprehendRedactor.test.ts @@ -0,0 +1,79 @@ +import { describe, expect, test, vi } from "vitest"; +import { AwsComprehendRedactor } from "../lib/aws-comprehend/awsComprehendRedactor.js"; + +const buildEntity = (text: string, value: string, type = "EMAIL") => { + const beginOffset = text.indexOf(value); + return { + BeginOffset: beginOffset, + EndOffset: beginOffset + value.length, + Score: 0.99, + Type: type, + }; +}; + +describe("AwsComprehendRedactor", () => { + test("redacts detected PII in text", async () => { + const text = "Email jane@example.com before launch."; + const client = { + send: vi.fn().mockResolvedValue({ + Entities: [buildEntity(text, "jane@example.com")], + }), + }; + const redactor = new AwsComprehendRedactor({ client }); + + await expect(redactor.redactText(text)).resolves.toBe( + "Email [REDACTED] before launch." + ); + expect(client.send).toHaveBeenCalledTimes(1); + expect(client.send.mock.calls[0][0].input).toMatchObject({ + Text: text, + LanguageCode: "en", + }); + }); + + test("redacts prompts and message content before calling an endpoint", async () => { + const prompt = "My phone is 555-0100."; + const client = { + send: vi.fn().mockResolvedValue({ + Entities: [buildEntity(prompt, "555-0100", "PHONE")], + }), + }; + const endpoint = { + chat: vi.fn().mockResolvedValue("ok"), + }; + const redactor = new AwsComprehendRedactor({ + client, + redactionToken: (entity) => `[${entity.type}]`, + }); + + const result = await redactor.redactAndCall(endpoint, { prompt }); + + expect(result).toBe("ok"); + expect(endpoint.chat).toHaveBeenCalledWith({ prompt: "My phone is [PHONE]." }); + }); + + test("honors confidence and entity type filters", () => { + const text = "Email jane@example.com and call 555-0100."; + const email = { + beginOffset: text.indexOf("jane@example.com"), + endOffset: text.indexOf("jane@example.com") + "jane@example.com".length, + score: 0.99, + type: "EMAIL", + }; + const phone = { + beginOffset: text.indexOf("555-0100"), + endOffset: text.indexOf("555-0100") + "555-0100".length, + score: 0.4, + type: "PHONE", + }; + const redactor = new AwsComprehendRedactor({ + client: { send: vi.fn() }, + confidenceThreshold: 0.9, + piiEntityTypes: ["EMAIL"], + }); + + expect(redactor.applyRedactions(text, [email, phone])).toBe( + "Email [REDACTED] and call 555-0100." + ); + }); +}); diff --git a/JS/edgechains/examples/aws-comprehend-redaction/package.json b/JS/edgechains/examples/aws-comprehend-redaction/package.json new file mode 100644 index 000000000..750927134 --- /dev/null +++ b/JS/edgechains/examples/aws-comprehend-redaction/package.json @@ -0,0 +1,12 @@ +{ + "name": "aws-comprehend-redaction", + "version": "0.0.1", + "type": "module", + "scripts": { + "start": "tsx src/index.ts" + }, + "dependencies": { + "@arakoodev/edgechains.js": "file:../../arakoodev", + "tsx": "^4.19.2" + } +} diff --git a/JS/edgechains/examples/aws-comprehend-redaction/src/index.ts b/JS/edgechains/examples/aws-comprehend-redaction/src/index.ts new file mode 100644 index 000000000..2b9f93747 --- /dev/null +++ b/JS/edgechains/examples/aws-comprehend-redaction/src/index.ts @@ -0,0 +1,24 @@ +import { AwsComprehendRedactor, OpenAI } from "@arakoodev/edgechains.js/ai"; + +const redactor = new AwsComprehendRedactor({ + region: process.env.AWS_REGION || "us-east-1", + confidenceThreshold: 0.9, + redactionToken: (entity) => `[${entity.type}]`, +}); + +const openAI = new OpenAI({ + apiKey: process.env.OPENAI_API_KEY, + orgId: process.env.OPENAI_ORG_ID, +}); + +const prompt = "Summarize this support note: Jane Doe can be reached at jane@example.com."; +const redactedPrompt = await redactor.redactPrompt(prompt); + +console.log("Redacted prompt:", redactedPrompt); + +const response = await redactor.redactAndCall(openAI, { + prompt, + max_tokens: 100, +}); + +console.log(response);