diff --git a/JS/edgechains/arakoodev/package.json b/JS/edgechains/arakoodev/package.json index 0b0bd3784..491ca4f7b 100644 --- a/JS/edgechains/arakoodev/package.json +++ b/JS/edgechains/arakoodev/package.json @@ -1,79 +1,160 @@ -{ + + "name": "@arakoodev/edgechains.js", + "version": "0.1.23", + "main": "dist/index.js", + "files": [ + "dist" + ], + "exports": { + "./ai": "./dist/ai/src/index.js", + "./vector-db": "./dist/vector-db/src/index.js", - "./document-loader": "./dist/document-loader/src/index.js", - "./splitter": "./dist/splitter/src/index.js", - "./arakooserver": "./dist/arakooserver/src/index.js", - "./db": "./dist/db/src/index.js", - "./scraper": "./dist/scraper/src/index.js", - "./sync-rpc": "./dist/sync-rpc/index.js" + + "./document-loader": "./dist/document-loader/src/index.js", + + "./splitter": "./dist/splitter/src/index.js", + + "./arakooserver": "./dist/arakooserver/src/index.js", + + "./db": "./dist/db/src/index.js", + + "./scraper": "./dist/scraper/src/index.js", + + "./sync-rpc": "./dist/sync-rpc/index.js" + }, + "scripts": { + "build": "rm -rf dist && tsc -b", + "lint": "eslint --ignore-path .eslintignore --ext .js,.ts", - "format": "prettier --ignore-path .gitignore --write \"**/*.+(js|ts|json)\"", - "test": "vitest" + + "format": "prettier --ignore-path .gitignore --write \"**/*.+(js|ts|json)\"", + + "test": "vitest" + }, + "dependencies": { + + "@aws-sdk/client-comprehend": "^3.1055.0", + "@babel/core": "^7.24.4", - "@babel/preset-env": "^7.24.4", - "@hono/node-server": "^0.6.0", - "@lifeomic/attempt": "^3.1.0", - "@playwright/test": "^1.45.3", - "@supabase/supabase-js": "^2.42.3", - "axios": "^1.7.4", - "axios-retry": "^4.1.0", - "cheerio": "^1.0.0-rc.12", - "cors": "^2.8.5", - "document": "^0.4.7", - "dts-bundle-generator": "^9.3.1", - "esbuild": "^0.20.2", - "hono": "3.9", - "jest-environment-jsdom": "^29.7.0", - "jsdom": "^24.1.0", - "node-html-parser": "^6.1.13", - "pdf-parse": "^1.1.1", - "pg": "^8.11.5", - "playwright": "^1.45.1", - "prettier": "^3.2.5", - "regenerator-runtime": "^0.14.1", - "request": "^2.88.2", - "retell-client-js-sdk": "^2.0.4", - "retell-sdk": "^4.9.0", - "retry": "^0.13.1", - "ts-node": "^10.9.2", - "typeorm": "^0.3.20", - "vitest": "^2.0.3", - "youtube-transcript": "^1.2.1", - "zod": "^3.23.8", - "zod-to-json-schema": "^3.23.0" + + "@babel/preset-env": "^7.24.4", + + "@hono/node-server": "^0.6.0", + + "@lifeomic/attempt": "^3.1.0", + + "@playwright/test": "^1.45.3", + + "@supabase/supabase-js": "^2.42.3", + + "axios": "^1.7.4", + + "axios-retry": "^4.1.0", + + "cheerio": "^1.0.0-rc.12", + + "cors": "^2.8.5", + + "document": "^0.4.7", + + "dts-bundle-generator": "^9.3.1", + + "esbuild": "^0.20.2", + + "hono": "3.9", + + "jest-environment-jsdom": "^29.7.0", + + "jsdom": "^24.1.0", + + "node-html-parser": "^6.1.13", + + "pdf-parse": "^1.1.1", + + "pg": "^8.11.5", + + "playwright": "^1.45.1", + + "prettier": "^3.2.5", + + "regenerator-runtime": "^0.14.1", + + "request": "^2.88.2", + + "retell-client-js-sdk": "^2.0.4", + + "retell-sdk": "^4.9.0", + + "retry": "^0.13.1", + + "ts-node": "^10.9.2", + + "typeorm": "^0.3.20", + + "vitest": "^2.0.3", + + "youtube-transcript": "^1.2.1", + + "zod": "^3.23.8", + + "zod-to-json-schema": "^3.23.0" + }, + "keywords": [], + "author": "", + "license": "ISC", + "devDependencies": { + "@babel/preset-typescript": "^7.24.1", + "@types/cors": "^2.8.17", - "@types/jest": "^29.5.12", - "@types/node": "^20.17.2", - "@types/pdf-parse": "^1.1.4", - "@types/ws": "^8.5.12", - "buffer": "^6.0.3", - "crypto-browserify": "^3.12.1", - "jest": "^29.7.0", - "process": "^0.11.10", - "stream-browserify": "^3.0.0", - "stream-http": "^3.2.0", - "ts-jest": "^29.1.2", - "ts-loader": "^9.5.1", - "typescript": "^5.6.3", - "util": "^0.12.5" + + "@types/jest": "^29.5.12", + + "@types/node": "^20.17.2", + + "@types/pdf-parse": "^1.1.4", + + "@types/ws": "^8.5.12", + + "buffer": "^6.0.3", + + "crypto-browserify": "^3.12.1", + + "jest": "^29.7.0", + + "process": "^0.11.10", + + "stream-browserify": "^3.0.0", + + "stream-http": "^3.2.0", + + "ts-jest": "^29.1.2", + + "ts-loader": "^9.5.1", + + "typescript": "^5.6.3", + + "util": "^0.12.5" + } + } + diff --git a/JS/edgechains/arakoodev/src/ai/src/index.ts b/JS/edgechains/arakoodev/src/ai/src/index.ts index 2c98f37dc..778d0daec 100644 --- a/JS/edgechains/arakoodev/src/ai/src/index.ts +++ b/JS/edgechains/arakoodev/src/ai/src/index.ts @@ -1,5 +1,10 @@ -export { OpenAI } from "./lib/openai/openai.js"; +xport { OpenAI } from "./lib/openai/openai.js"; export { GeminiAI } from "./lib/gemini/gemini.js"; export { LlamaAI } from "./lib/llama/llama.js"; export { RetellAI } from "./lib/retell-ai/retell.js"; export { RetellWebClient } from "./lib/retell-ai/retellWebClient.js"; +export { AwsComprehendRedactor } from "./lib/aws-comprehend/comprehendRedactor.js"; +export type { + AwsComprehendRedactorOptions, + ChatInputWithPrompt, +} from "./lib/aws-comprehend/comprehendRedactor.js"; diff --git a/JS/edgechains/arakoodev/src/ai/src/lib/aws-comprehend/comprehendRedactor.ts b/JS/edgechains/arakoodev/src/ai/src/lib/aws-comprehend/comprehendRedactor.ts new file mode 100644 index 000000000..310b5ab2c --- /dev/null +++ b/JS/edgechains/arakoodev/src/ai/src/lib/aws-comprehend/comprehendRedactor.ts @@ -0,0 +1,94 @@ +import { + ComprehendClient, + DetectPiiEntitiesCommand, + LanguageCode, + PiiEntity, +} from "@aws-sdk/client-comprehend"; + +interface ComprehendClientLike { + send(command: DetectPiiEntitiesCommand): Promise<{ Entities?: PiiEntity[] }>; +} + +interface MessageLike { + content: string; + [key: string]: unknown; +} + +export interface AwsComprehendRedactorOptions { + client?: ComprehendClientLike; + region?: string; + languageCode?: LanguageCode; + minScore?: number; + entityTypes?: string[]; + mask?: string | ((entity: PiiEntity) => string); +} + +export interface ChatInputWithPrompt { + prompt?: string; + messages?: MessageLike[]; + [key: string]: unknown; +} + +export class AwsComprehendRedactor { + private readonly client: ComprehendClientLike; + private readonly languageCode: LanguageCode; + private readonly minScore: number; + private readonly entityTypes?: Set; + private readonly mask: string | ((entity: PiiEntity) => string); + + constructor(options: AwsComprehendRedactorOptions = {}) { + this.client = + options.client || + new ComprehendClient({ + region: options.region || process.env.AWS_REGION || process.env.AWS_DEFAULT_REGION, + }); + this.languageCode = options.languageCode || LanguageCode.EN; + this.minScore = options.minScore ?? 0; + this.entityTypes = options.entityTypes ? new Set(options.entityTypes) : undefined; + this.mask = options.mask || ((entity) => `[REDACTED_${entity.Type || "PII"}]`); + } + + async redactText(text: string): Promise { + if (!text) { + return text; + } + + const response = await this.client.send( + new DetectPiiEntitiesCommand({ + Text: text, + LanguageCode: this.languageCode, + }) + ); + + const entities = (response.Entities || []) + .filter((entity) => entity.BeginOffset !== undefined && entity.EndOffset !== undefined) + .filter((entity) => (entity.Score ?? 1) >= this.minScore) + .filter((entity) => !this.entityTypes || this.entityTypes.has(entity.Type || "")) + .sort((a, b) => (b.BeginOffset || 0) - (a.BeginOffset || 0)); + + return entities.reduce((redacted, entity) => { + const begin = entity.BeginOffset || 0; + const end = entity.EndOffset || begin; + const replacement = typeof this.mask === "function" ? this.mask(entity) : this.mask; + + return redacted.slice(0, begin) + replacement + redacted.slice(end); + }, text); + } + + async redactMessages(messages: T[]): Promise { + return await Promise.all( + messages.map(async (message) => ({ + ...message, + content: await this.redactText(message.content), + })) + ); + } + + async redactChatInput(input: T): Promise { + return { + ...input, + prompt: input.prompt === undefined ? undefined : await this.redactText(input.prompt), + messages: input.messages === undefined ? undefined : await this.redactMessages(input.messages), + }; + } +} diff --git a/JS/edgechains/arakoodev/src/ai/src/tests/awsComprehendRedactor.test.ts b/JS/edgechains/arakoodev/src/ai/src/tests/awsComprehendRedactor.test.ts new file mode 100644 index 000000000..5899abfe9 --- /dev/null +++ b/JS/edgechains/arakoodev/src/ai/src/tests/awsComprehendRedactor.test.ts @@ -0,0 +1,94 @@ +import { DetectPiiEntitiesCommand, PiiEntityType } from "@aws-sdk/client-comprehend"; +import { describe, expect, test, vi } from "vitest"; +import { AwsComprehendRedactor } from "../lib/aws-comprehend/comprehendRedactor"; + +describe("AwsComprehendRedactor", () => { + test("redacts detected PII entities from text", async () => { + const client = { + send: vi.fn(async (command: DetectPiiEntitiesCommand) => { + expect(command.input.Text).toBe("Email me at jane@example.com or call 555-0101."); + return { + Entities: [ + { + Type: PiiEntityType.EMAIL, + BeginOffset: 12, + EndOffset: 28, + Score: 0.99, + }, + { + Type: PiiEntityType.PHONE, + BeginOffset: 37, + EndOffset: 45, + Score: 0.98, + }, + ], + }; + }), + }; + const redactor = new AwsComprehendRedactor({ client }); + + await expect(redactor.redactText("Email me at jane@example.com or call 555-0101.")).resolves.toBe( + "Email me at [REDACTED_EMAIL] or call [REDACTED_PHONE]." + ); + expect(client.send).toHaveBeenCalledTimes(1); + }); + + test("redacts prompts and chat messages for endpoint chaining", async () => { + const client = { + send: vi.fn(async (command: DetectPiiEntitiesCommand) => { + const text = command.input.Text || ""; + return { + Entities: text.includes("secret@example.com") + ? [ + { + Type: PiiEntityType.EMAIL, + BeginOffset: text.indexOf("secret@example.com"), + EndOffset: text.indexOf("secret@example.com") + "secret@example.com".length, + Score: 0.99, + }, + ] + : [], + }; + }), + }; + const redactor = new AwsComprehendRedactor({ client, mask: "[PRIVATE]" }); + + const result = await redactor.redactChatInput({ + prompt: "Summarize this: secret@example.com", + messages: [ + { role: "system", content: "Be concise" }, + { role: "user", content: "Contact secret@example.com today" }, + ], + model: "gpt-3.5-turbo", + }); + + expect(result).toEqual({ + prompt: "Summarize this: [PRIVATE]", + messages: [ + { role: "system", content: "Be concise" }, + { role: "user", content: "Contact [PRIVATE] today" }, + ], + model: "gpt-3.5-turbo", + }); + }); + + test("can filter by score and entity type", async () => { + const client = { + send: vi.fn(async () => ({ + Entities: [ + { Type: PiiEntityType.EMAIL, BeginOffset: 0, EndOffset: 15, Score: 0.6 }, + { Type: PiiEntityType.NAME, BeginOffset: 20, EndOffset: 24, Score: 0.99 }, + ], + })), + }; + const redactor = new AwsComprehendRedactor({ + client, + minScore: 0.9, + entityTypes: [PiiEntityType.EMAIL], + }); + + await expect(redactor.redactText("me@example.com says John")).resolves.toBe( + "me@example.com says John" + ); + }); +}); diff --git a/JS/edgechains/examples/aws-comprehend-redaction/package.json b/JS/edgechains/examples/aws-comprehend-redaction/package.json new file mode 100644 index 000000000..1ee1c7d42 --- /dev/null +++ b/JS/edgechains/examples/aws-comprehend-redaction/package.json @@ -0,0 +1,12 @@ +{ + "name": "aws-comprehend-redaction-example", + "type": "module", + "scripts": { + "start": "ts-node src/index.ts" + }, + "dependencies": { + "@arakoodev/edgechains.js": "file:../../arakoodev", + "ts-node": "^10.9.2", + "typescript": "^5.6.3" + } +} diff --git a/JS/edgechains/examples/aws-comprehend-redaction/src/index.ts b/JS/edgechains/examples/aws-comprehend-redaction/src/index.ts new file mode 100644 index 000000000..883e24ec6 --- /dev/null +++ b/JS/edgechains/examples/aws-comprehend-redaction/src/index.ts @@ -0,0 +1,24 @@ +import { AwsComprehendRedactor, OpenAI } from "@arakoodev/edgechains.js/ai"; + +async function main() { + const redactor = new AwsComprehendRedactor({ + region: process.env.AWS_REGION || "us-east-1", + }); + const openai = new OpenAI({ + apiKey: process.env.OPENAI_API_KEY, + orgId: process.env.OPENAI_ORG_ID, + }); + + const safeInput = await redactor.redactChatInput({ + prompt: "Summarize this support note without exposing PII: Jane Doe uses jane@example.com.", + model: "gpt-3.5-turbo", + }); + + const response = await openai.chat(safeInput); + console.log(response.content); +} + +main().catch((error) => { + console.error(error); + process.exit(1); +});