diff --git a/cli/src/__tests__/python-bridge/PythonBridge.test.ts b/cli/src/__tests__/python-bridge/PythonBridge.test.ts index 06c4f02..2c57e07 100644 --- a/cli/src/__tests__/python-bridge/PythonBridge.test.ts +++ b/cli/src/__tests__/python-bridge/PythonBridge.test.ts @@ -483,10 +483,10 @@ describe('PythonBridge Timeout Handling', () => { const mockProcess = new EventEmitter() as any; mockProcess.stdout = new EventEmitter(); mockProcess.stderr = new EventEmitter(); - mockProcess.stdin = { - write: jest.fn(), - end: jest.fn(), - }; + const stdinEmitter = new EventEmitter() as any; + stdinEmitter.write = jest.fn(); + stdinEmitter.end = jest.fn(); + mockProcess.stdin = stdinEmitter; mockProcess.kill = jest.fn(); mockProcess.exitCode = null; mockSpawn.mockReturnValue(mockProcess); @@ -503,10 +503,10 @@ describe('PythonBridge Timeout Handling', () => { const mockProcess = new EventEmitter() as any; mockProcess.stdout = new EventEmitter(); mockProcess.stderr = new EventEmitter(); - mockProcess.stdin = { - write: jest.fn(), - end: jest.fn(), - }; + const stdinEmitter2 = new EventEmitter() as any; + stdinEmitter2.write = jest.fn(); + stdinEmitter2.end = jest.fn(); + mockProcess.stdin = stdinEmitter2; mockProcess.kill = jest.fn(); mockProcess.exitCode = null; diff --git a/cli/src/python-bridge/python-bridge.ts b/cli/src/python-bridge/python-bridge.ts index f0afca2..7369217 100644 --- a/cli/src/python-bridge/python-bridge.ts +++ b/cli/src/python-bridge/python-bridge.ts @@ -146,7 +146,13 @@ function detectPythonExecutable(): string { // 3. Check for explicit path from environment (for CI) const environmentPath = process.env.DOCIMP_PYTHON_PATH; if (environmentPath) { - return environmentPath; + if (existsSync(environmentPath)) { + return environmentPath; + } + throw new Error( + `DOCIMP_PYTHON_PATH is set to "${environmentPath}" but executable does not exist.\n` + + `Please check the path or unset the environment variable.` + ); } // 4. Try common Python executables @@ -396,79 +402,7 @@ export class PythonBridge implements IPythonBridge { arguments_.push('--audit-file', absoluteAuditFile); } - // Handle uv wrapper: spawn('uv', ['run', 'python', ...args]) instead of spawn('python', args) - let executable: string; - let spawnArguments: string[]; - - if (this.pythonPath === 'uv') { - executable = 'uv'; - // Use --project flag to point to project root (parent of analyzer/ directory) - // This ensures uv finds pyproject.toml even when cwd is analyzer/ - spawnArguments = ['run', '--project', '..', 'python', ...arguments_]; - } else { - executable = this.pythonPath; - spawnArguments = arguments_; - } - - // Clean up environment for uv run: remove VIRTUAL_ENV to avoid conflicts - const environment = { ...process.env }; - if (this.pythonPath === 'uv') { - delete environment.VIRTUAL_ENV; - } - - const childProcess = spawn(executable, spawnArguments, { - cwd: this.analyzerModule, - env: environment, - }); - - // Setup timeout handling - const { cleanup, timeoutPromise } = this.setupProcessTimeout( - childProcess, - this.defaultTimeout, - 'apply-audit' - ); - - // Create promise for normal process completion - const processPromise = new Promise((resolve, reject) => { - let stderr = ''; - - // Send ratings as JSON via stdin - childProcess.stdin.write(JSON.stringify(ratings)); - childProcess.stdin.end(); - - childProcess.stderr.on('data', (data: Buffer) => { - stderr += data.toString(); - }); - - childProcess.on('error', (error: Error) => { - reject( - new Error( - `Failed to spawn Python process: ${error.message}\n` + - `Make sure Python is installed and the analyzer module is available.` - ) - ); - }); - - childProcess.on('close', (code: number) => { - if (code !== 0) { - reject( - new Error( - `Python analyzer exited with code ${code}\n` + `stderr: ${stderr}` - ) - ); - return; - } - - resolve(); - }); - }); - - // Race between timeout and normal completion, cleanup in finally - try { - return await Promise.race([processPromise, timeoutPromise]); - } finally { - cleanup(); - } + return this.executePythonStdin(arguments_, ratings, 'apply-audit'); } /** @@ -563,22 +497,92 @@ export class PythonBridge implements IPythonBridge { */ async apply(data: ApplyData): Promise { const arguments_ = ['-m', 'src.main', 'apply']; + return this.executePythonStdin(arguments_, data, 'apply'); + } - // Handle uv wrapper: spawn('uv', ['run', 'python', ...args]) instead of spawn('python', args) + /** + * Setup timeout handling for a child process. + * + * Implements graceful shutdown: SIGTERM -> wait -> SIGKILL + * + * @param childProcess - The child process to monitor + * @param timeoutMs - Timeout in milliseconds + * @param commandName - Name of command for error messages + * @returns Object with cleanup function and timeout promise + */ + private setupProcessTimeout( + childProcess: ChildProcess, + timeoutMs: number, + commandName: string + ): { cleanup: () => void; timeoutPromise: Promise } { + let timeoutId: NodeJS.Timeout | null = null; + let killTimeoutId: NodeJS.Timeout | null = null; + + const cleanup = () => { + if (timeoutId) { + clearTimeout(timeoutId); + timeoutId = null; + } + if (killTimeoutId) { + clearTimeout(killTimeoutId); + killTimeoutId = null; + } + }; + + const timeoutPromise = new Promise((_resolve, reject) => { + timeoutId = setTimeout(() => { + // Try graceful shutdown first (SIGTERM) + childProcess.kill('SIGTERM'); + + // If process doesn't exit within configured delay, force kill (SIGKILL) + killTimeoutId = setTimeout(() => { + if (childProcess.exitCode === null) { + childProcess.kill('SIGKILL'); + } + }, this.killEscalationDelay); + + reject( + new Error( + `Python ${commandName} command timed out after ${timeoutMs}ms.\n` + + `The Python process may be frozen or the operation is taking too long.\n` + + `Consider increasing the timeout in your docimp.config.js file.` + ) + ); + }, timeoutMs); + }); + + return { cleanup, timeoutPromise }; + } + + /** + * Execute a Python command that receives JSON data via stdin. + * + * Unlike executePython which reads JSON from stdout, this method + * writes JSON data to the process's stdin and waits for exit. + * + * @param arguments_ - Python module arguments + * @param stdinData - Data to serialize as JSON and send via stdin + * @param commandName - Name of command for error messages + * @param timeoutMs - Optional timeout override (default: this.defaultTimeout) + * @returns Promise resolving when the command completes successfully + */ + private async executePythonStdin( + arguments_: string[], + stdinData: unknown, + commandName: string, + timeoutMs?: number + ): Promise { let executable: string; let spawnArguments: string[]; if (this.pythonPath === 'uv') { executable = 'uv'; - // Use --project flag to point to project root (parent of analyzer/ directory) - // This ensures uv finds pyproject.toml even when cwd is analyzer/ spawnArguments = ['run', '--project', '..', 'python', ...arguments_]; } else { executable = this.pythonPath; spawnArguments = arguments_; } - // Clean up environment for uv run: remove VIRTUAL_ENV to avoid conflicts const environment = { ...process.env }; if (this.pythonPath === 'uv') { delete environment.VIRTUAL_ENV; @@ -589,19 +593,23 @@ export class PythonBridge implements IPythonBridge { env: environment, }); - // Setup timeout handling + const timeout = timeoutMs ?? this.defaultTimeout; const { cleanup, timeoutPromise } = this.setupProcessTimeout( childProcess, - this.defaultTimeout, - 'apply' + timeout, + commandName ); - // Create promise for normal process completion const processPromise = new Promise((resolve, reject) => { let stderr = ''; - // Send apply data as JSON via stdin - childProcess.stdin.write(JSON.stringify(data)); + childProcess.stdin.on('error', (error: Error) => { + reject( + new Error(`Failed to write to Python process stdin: ${error.message}`) + ); + }); + + childProcess.stdin.write(JSON.stringify(stdinData)); childProcess.stdin.end(); childProcess.stderr.on('data', (data: Buffer) => { @@ -626,12 +634,10 @@ export class PythonBridge implements IPythonBridge { ); return; } - resolve(); }); }); - // Race between timeout and normal completion, cleanup in finally try { return await Promise.race([processPromise, timeoutPromise]); } finally { @@ -639,60 +645,6 @@ export class PythonBridge implements IPythonBridge { } } - /** - * Setup timeout handling for a child process. - * - * Implements graceful shutdown: SIGTERM -> wait -> SIGKILL - * - * @param childProcess - The child process to monitor - * @param timeoutMs - Timeout in milliseconds - * @param commandName - Name of command for error messages - * @returns Object with cleanup function and timeout promise - */ - private setupProcessTimeout( - childProcess: ChildProcess, - timeoutMs: number, - commandName: string - ): { cleanup: () => void; timeoutPromise: Promise } { - let timeoutId: NodeJS.Timeout | null = null; - let killTimeoutId: NodeJS.Timeout | null = null; - - const cleanup = () => { - if (timeoutId) { - clearTimeout(timeoutId); - timeoutId = null; - } - if (killTimeoutId) { - clearTimeout(killTimeoutId); - killTimeoutId = null; - } - }; - - const timeoutPromise = new Promise((_resolve, reject) => { - timeoutId = setTimeout(() => { - // Try graceful shutdown first (SIGTERM) - childProcess.kill('SIGTERM'); - - // If process doesn't exit within configured delay, force kill (SIGKILL) - killTimeoutId = setTimeout(() => { - if (childProcess.exitCode === null) { - childProcess.kill('SIGKILL'); - } - }, this.killEscalationDelay); - - reject( - new Error( - `Python ${commandName} command timed out after ${timeoutMs}ms.\n` + - `The Python process may be frozen or the operation is taking too long.\n` + - `Consider increasing the timeout in your docimp.config.js file.` - ) - ); - }, timeoutMs); - }); - - return { cleanup, timeoutPromise }; - } - /** * Execute Python subprocess and return text output (not JSON). * diff --git a/cli/src/utils/file-tracker.ts b/cli/src/utils/file-tracker.ts index ba73919..8c8ebc7 100644 --- a/cli/src/utils/file-tracker.ts +++ b/cli/src/utils/file-tracker.ts @@ -5,7 +5,7 @@ */ import { createHash } from 'node:crypto'; -import { promises as fs } from 'node:fs'; +import { createReadStream, promises as fs } from 'node:fs'; export interface FileSnapshot { filepath: string; @@ -19,6 +19,23 @@ export interface CodeItem { [key: string]: unknown; } +/** + * Compute SHA256 checksum of a file using streaming to avoid + * loading the entire file into memory. + * + * @param filepath - Path to the file to checksum + * @returns Promise resolving to hex-encoded SHA256 checksum + */ +function computeFileChecksum(filepath: string): Promise { + return new Promise((resolve, reject) => { + const hash = createHash('sha256'); + const stream = createReadStream(filepath); + stream.on('data', (chunk) => hash.update(chunk)); + stream.on('end', () => resolve(hash.digest('hex'))); + stream.on('error', reject); + }); +} + /** * Tracks file modifications using checksums and timestamps. */ @@ -50,11 +67,8 @@ export const FileTracker = { const timestamp = stats.mtimeMs; const size = stats.size; - // Compute SHA256 checksum - const fileBuffer = await fs.readFile(filepath); - const hash = createHash('sha256'); - hash.update(fileBuffer); - const checksum = hash.digest('hex'); + // Compute SHA256 checksum using streaming for constant memory usage + const checksum = await computeFileChecksum(filepath); // Create snapshot return { @@ -64,10 +78,11 @@ export const FileTracker = { size, }; } catch (error) { - // Log permission errors but continue with other files const nodeError = error as NodeJS.ErrnoException; if (nodeError.code === 'EACCES' || nodeError.code === 'EPERM') { console.warn(`Warning: Permission denied when reading ${filepath}`); + } else if (nodeError.code === 'ENOENT') { + console.warn(`Warning: File not found, skipping: ${filepath}`); } // Skip files we can't read (permission errors, non-existent files, etc.) return null; @@ -115,11 +130,8 @@ export const FileTracker = { // Check if file still exists await fs.access(filepath); - // Recompute checksum - const fileBuffer = await fs.readFile(filepath); - const hash = createHash('sha256'); - hash.update(fileBuffer); - const newChecksum = hash.digest('hex'); + // Recompute checksum using streaming for constant memory usage + const newChecksum = await computeFileChecksum(filepath); // Compare checksums (timestamp changes alone don't count) if (newChecksum !== oldSnapshot.checksum) { diff --git a/pyproject.toml b/pyproject.toml index 642e640..c60cd7f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ requires-python = ">=3.13" license = { text = "AGPL-3.0" } dependencies = [ - "anthropic>=0.72.0,<1.0.0", + "anthropic>=0.86.0,<1.0.0", "pydantic>=2.12.3,<3.0.0", "pydantic-core>=2.41.4,<3.0.0", "typing-extensions>=4.9.0", diff --git a/requirements.in b/requirements.in index a2f39d3..2968e73 100644 --- a/requirements.in +++ b/requirements.in @@ -1,5 +1,5 @@ # DocImp Core Dependencies -anthropic>=0.72.0,<1.0.0 +anthropic>=0.86.0,<1.0.0 pydantic>=2.12.3,<3.0.0 pydantic-core>=2.41.4,<3.0.0 typing-extensions>=4.9.0 diff --git a/requirements.lock b/requirements.lock index 9958cf0..6730ba2 100644 --- a/requirements.lock +++ b/requirements.lock @@ -2,7 +2,7 @@ # uv pip compile requirements.in -o requirements.lock annotated-types==0.7.0 # via pydantic -anthropic==0.72.0 +anthropic==0.86.0 # via -r requirements.in anyio==4.11.0 # via @@ -44,6 +44,7 @@ typing-extensions==4.15.0 # via # -r requirements.in # anthropic + # anyio # pydantic # pydantic-core # typing-inspection diff --git a/requirements.txt b/requirements.txt index 314a557..a3f9b82 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ # Install with: pip install -r requirements.txt # Claude API client for AI-powered documentation generation -anthropic>=0.72.0,<1.0.0 +anthropic>=0.86.0,<1.0.0 # Data validation and serialization # Note: pydantic and pydantic-core must be version-compatible diff --git a/uv.lock b/uv.lock index a3338b2..0c7a49b 100644 --- a/uv.lock +++ b/uv.lock @@ -13,7 +13,7 @@ wheels = [ [[package]] name = "anthropic" -version = "0.72.0" +version = "0.86.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -25,9 +25,9 @@ dependencies = [ { name = "sniffio" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/49/07/61f3ca8e69c5dcdaec31b36b79a53ea21c5b4ca5e93c7df58c71f43bf8d8/anthropic-0.72.0.tar.gz", hash = "sha256:8971fe76dcffc644f74ac3883069beb1527641115ae0d6eb8fa21c1ce4082f7a", size = 493721, upload-time = "2025-10-28T19:13:01.755Z" } +sdist = { url = "https://files.pythonhosted.org/packages/37/7a/8b390dc47945d3169875d342847431e5f7d5fa716b2e37494d57cfc1db10/anthropic-0.86.0.tar.gz", hash = "sha256:60023a7e879aa4fbb1fed99d487fe407b2ebf6569603e5047cfe304cebdaa0e5", size = 583820, upload-time = "2026-03-18T18:43:08.017Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/7b/b7/160d4fb30080395b4143f1d1a4f6c646ba9105561108d2a434b606c03579/anthropic-0.72.0-py3-none-any.whl", hash = "sha256:0e9f5a7582f038cab8efbb4c959e49ef654a56bfc7ba2da51b5a7b8a84de2e4d", size = 357464, upload-time = "2025-10-28T19:13:00.215Z" }, + { url = "https://files.pythonhosted.org/packages/63/5f/67db29c6e5d16c8c9c4652d3efb934d89cb750cad201539141781d8eae14/anthropic-0.86.0-py3-none-any.whl", hash = "sha256:9d2bbd339446acce98858c5627d33056efe01f70435b22b63546fe7edae0cd57", size = 469400, upload-time = "2026-03-18T18:43:06.526Z" }, ] [[package]] @@ -152,7 +152,7 @@ dev = [ [package.metadata] requires-dist = [ - { name = "anthropic", specifier = ">=0.72.0,<1.0.0" }, + { name = "anthropic", specifier = ">=0.86.0,<1.0.0" }, { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.7.0" }, { name = "pydantic", specifier = ">=2.12.3,<3.0.0" }, { name = "pydantic-core", specifier = ">=2.41.4,<3.0.0" },