diff --git a/.serena/.gitignore b/.serena/.gitignore new file mode 100644 index 0000000..2e510af --- /dev/null +++ b/.serena/.gitignore @@ -0,0 +1,2 @@ +/cache +/project.local.yml diff --git a/.serena/project.yml b/.serena/project.yml new file mode 100644 index 0000000..91afe46 --- /dev/null +++ b/.serena/project.yml @@ -0,0 +1,154 @@ +# the name by which the project can be referenced within Serena +project_name: "twd-cli" + + +# list of languages for which language servers are started; choose from: +# al bash clojure cpp csharp +# csharp_omnisharp dart elixir elm erlang +# fortran fsharp go groovy haskell +# haxe java julia kotlin lua +# markdown +# matlab nix pascal perl php +# php_phpactor powershell python python_jedi r +# rego ruby ruby_solargraph rust scala +# swift terraform toml typescript typescript_vts +# vue yaml zig +# (This list may be outdated. For the current list, see values of Language enum here: +# https://github.com/oraios/serena/blob/main/src/solidlsp/ls_config.py +# For some languages, there are alternative language servers, e.g. csharp_omnisharp, ruby_solargraph.) +# Note: +# - For C, use cpp +# - For JavaScript, use typescript +# - For Free Pascal/Lazarus, use pascal +# Special requirements: +# Some languages require additional setup/installations. +# See here for details: https://oraios.github.io/serena/01-about/020_programming-languages.html#language-servers +# When using multiple languages, the first language server that supports a given file will be used for that file. +# The first language is the default language and the respective language server will be used as a fallback. +# Note that when using the JetBrains backend, language servers are not used and this list is correspondingly ignored. +languages: +- typescript + +# the encoding used by text files in the project +# For a list of possible encodings, see https://docs.python.org/3.11/library/codecs.html#standard-encodings +encoding: "utf-8" + +# line ending convention to use when writing source files. +# Possible values: unset (use global setting), "lf", "crlf", or "native" (platform default) +# This does not affect Serena's own files (e.g. memories and configuration files), which always use native line endings. +line_ending: + +# The language backend to use for this project. +# If not set, the global setting from serena_config.yml is used. +# Valid values: LSP, JetBrains +# Note: the backend is fixed at startup. If a project with a different backend +# is activated post-init, an error will be returned. +language_backend: + +# whether to use project's .gitignore files to ignore files +ignore_all_files_in_gitignore: true + +# advanced configuration option allowing to configure language server-specific options. +# Maps the language key to the options. +# Have a look at the docstring of the constructors of the LS implementations within solidlsp (e.g., for C# or PHP) to see which options are available. +# No documentation on options means no options are available. +ls_specific_settings: {} + +# list of additional paths to ignore in this project. +# Same syntax as gitignore, so you can use * and **. +# Note: global ignored_paths from serena_config.yml are also applied additively. +ignored_paths: [] + +# whether the project is in read-only mode +# If set to true, all editing tools will be disabled and attempts to use them will result in an error +# Added on 2025-04-18 +read_only: false + +# list of tool names to exclude. +# This extends the existing exclusions (e.g. from the global configuration) +# +# Below is the complete list of tools for convenience. +# To make sure you have the latest list of tools, and to view their descriptions, +# execute `uv run scripts/print_tool_overview.py`. +# +# * `activate_project`: Activates a project based on the project name or path. +# * `check_onboarding_performed`: Checks whether project onboarding was already performed. +# * `create_text_file`: Creates/overwrites a file in the project directory. +# * `delete_memory`: Delete a memory file. Should only happen if a user asks for it explicitly, +# for example by saying that the information retrieved from a memory file is no longer correct +# or no longer relevant for the project. +# * `edit_memory`: Replaces content matching a regular expression in a memory. +# * `execute_shell_command`: Executes a shell command. +# * `find_file`: Finds files in the given relative paths +# * `find_referencing_symbols`: Finds symbols that reference the given symbol using the language server backend +# * `find_symbol`: Performs a global (or local) search using the language server backend. +# * `get_current_config`: Prints the current configuration of the agent, including the active and available projects, tools, contexts, and modes. +# * `get_symbols_overview`: Gets an overview of the top-level symbols defined in a given file. +# * `initial_instructions`: Provides instructions Serena usage (i.e. the 'Serena Instructions Manual') +# for clients that do not read the initial instructions when the MCP server is connected. +# * `insert_after_symbol`: Inserts content after the end of the definition of a given symbol. +# * `insert_before_symbol`: Inserts content before the beginning of the definition of a given symbol. +# * `list_dir`: Lists files and directories in the given directory (optionally with recursion). +# * `list_memories`: List available memories. Any memory can be read using the `read_memory` tool. +# * `onboarding`: Performs onboarding (identifying the project structure and essential tasks, e.g. for testing or building). +# * `read_file`: Reads a file within the project directory. +# * `read_memory`: Read the content of a memory file. This tool should only be used if the information +# is relevant to the current task. You can infer whether the information +# is relevant from the memory file name. +# You should not read the same memory file multiple times in the same conversation. +# * `rename_memory`: Renames or moves a memory. Moving between project and global scope is supported +# (e.g., renaming "global/foo" to "bar" moves it from global to project scope). +# * `rename_symbol`: Renames a symbol throughout the codebase using language server refactoring capabilities. +# For JB, we use a separate tool. +# * `replace_content`: Replaces content in a file (optionally using regular expressions). +# * `replace_symbol_body`: Replaces the full definition of a symbol using the language server backend. +# * `safe_delete_symbol`: +# * `search_for_pattern`: Performs a search for a pattern in the project. +# * `write_memory`: Write some information (utf-8-encoded) about this project that can be useful for future tasks to a memory in md format. +# The memory name should be meaningful. +excluded_tools: [] + +# list of tools to include that would otherwise be disabled (particularly optional tools that are disabled by default). +# This extends the existing inclusions (e.g. from the global configuration). +included_optional_tools: [] + +# fixed set of tools to use as the base tool set (if non-empty), replacing Serena's default set of tools. +# This cannot be combined with non-empty excluded_tools or included_optional_tools. +fixed_tools: [] + +# list of mode names to that are always to be included in the set of active modes +# The full set of modes to be activated is base_modes + default_modes. +# If the setting is undefined, the base_modes from the global configuration (serena_config.yml) apply. +# Otherwise, this setting overrides the global configuration. +# Set this to [] to disable base modes for this project. +# Set this to a list of mode names to always include the respective modes for this project. +base_modes: + +# list of mode names that are to be activated by default. +# The full set of modes to be activated is base_modes + default_modes. +# If the setting is undefined, the default_modes from the global configuration (serena_config.yml) apply. +# Otherwise, this overrides the setting from the global configuration (serena_config.yml). +# This setting can, in turn, be overridden by CLI parameters (--mode). +default_modes: + +# initial prompt for the project. It will always be given to the LLM upon activating the project +# (contrary to the memories, which are loaded on demand). +initial_prompt: "" + +# time budget (seconds) per tool call for the retrieval of additional symbol information +# such as docstrings or parameter information. +# This overrides the corresponding setting in the global configuration; see the documentation there. +# If null or missing, use the setting from the global configuration. +symbol_info_budget: + +# list of regex patterns which, when matched, mark a memory entry as read‑only. +# Extends the list from the global configuration, merging the two lists. +read_only_memory_patterns: [] + +# list of regex patterns for memories to completely ignore. +# Matching memories will not appear in list_memories or activate_project output +# and cannot be accessed via read_memory or write_memory. +# To access ignored memory files, use the read_file tool on the raw file path. +# Extends the list from the global configuration, merging the two lists. +# Example: ["_archive/.*", "_episodes/.*"] +ignored_memory_patterns: [] diff --git a/CLAUDE.md b/CLAUDE.md index 4f5a574..ca921f1 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -19,7 +19,9 @@ The codebase is a small ESM-only Node.js CLI with two core source files: **`bin/twd-cli.js`** — CLI entry point. Parses `process.argv` for the `run` command, calls `runTests()`, and exits with code 0 (pass) or 1 (failure). -**`src/config.js`** — `loadConfig()` reads `twd.config.json` from `process.cwd()`, merges it with defaults (url, timeout, coverage, headless, puppeteerArgs), and returns the merged config. Falls back to defaults if the file is missing or unparseable. +**`src/config.js`** — `loadConfig()` reads `twd.config.json` from `process.cwd()`, merges it with defaults (url, timeout, coverage, headless, puppeteerArgs, retryCount, protocolTimeout), and returns the merged config. Falls back to defaults if the file is missing or unparseable. + +`protocolTimeout` (default `300000`, 5 min) is passed to `puppeteer.launch` and bounds Puppeteer's CDP commands. It matters because the entire suite runs inside a single `page.evaluate` (`Runtime.callFunctionOn`), so Puppeteer's implicit 180000ms ceiling would abort long-but-passing suites with no per-test output. Raise it for slow CI; `0` means no timeout. **`src/index.js`** — `runTests()` is the main orchestrator: 1. Loads config via `loadConfig()` diff --git a/README.md b/README.md index 5da5521..181ca20 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,8 @@ Create a `twd.config.json` file in your project root: "nycOutputDir": "./.nyc_output", "headless": true, "puppeteerArgs": ["--no-sandbox", "--disable-setuid-sandbox"], - "retryCount": 2 + "retryCount": 2, + "protocolTimeout": 300000 } ``` @@ -53,6 +54,7 @@ Create a `twd.config.json` file in your project root: | `headless` | boolean | `true` | Run browser in headless mode | | `puppeteerArgs` | string[] | `["--no-sandbox", "--disable-setuid-sandbox"]` | Additional Puppeteer launch arguments | | `retryCount` | number | `2` | Number of attempts per test before reporting failure. Set to `1` to disable retries | +| `protocolTimeout` | number | `300000` | Puppeteer CDP `protocolTimeout` in ms (5 min). The whole suite runs inside one `page.evaluate`, so this bounds the **entire run** — raise it (e.g. `600000`) for slow CI; `0` means no timeout. Defaults above Puppeteer's implicit 180000ms ceiling | | `contracts` | array | — | OpenAPI contract validation specs (see [Contract Validation](#contract-validation)) | | `contractReportPath` | string | — | Path to write a markdown report for CI/PR integration | diff --git a/docs/spec-protocol-timeout.md b/docs/spec-protocol-timeout.md new file mode 100644 index 0000000..30e9c77 --- /dev/null +++ b/docs/spec-protocol-timeout.md @@ -0,0 +1,94 @@ +# Spec: Stop the full suite from dying at puppeteer's 180s `protocolTimeout` + +## Problem + +`twd-cli` runs the **entire** test suite inside a single `page.evaluate(...)` call +(`src/index.js:58`). Puppeteer maps every `page.evaluate` to one +`Runtime.callFunctionOn` CDP command, and that command is bound by +`protocolTimeout`, which defaults to **180000 ms (180s)**. + +When the whole suite takes longer than 180s — common on slow CI runners even +when it passes locally — puppeteer aborts with: + +``` +Error running tests: ProtocolError: Runtime.callFunctionOn timed out. +Increase the 'protocolTimeout' setting in launch/connect calls for a higher timeout if needed. +``` + +This is a hard failure with **no per-test output** (the whole batch is lost), so +it looks like a crash rather than a slow suite. Observed in +holafly/web-checkout CI run 26503182725: page loaded at 09:40:46, timed out at +09:43:46 — exactly 180s — while the same suite finishes in ~130s locally. + +The current `timeout` config field (default 10000) is **only** passed to +`page.waitForSelector` (`src/index.js:54`), not to puppeteer's protocol layer, +so there is no way to raise the ceiling today. + +## Goal + +Make the suite robust against the 180s protocol ceiling, and give a clearer +failure mode when a run is genuinely too slow. + +## Scope + +Two changes, independently shippable. Phase 1 unblocks immediately; Phase 2 is +the durable fix. + +### Phase 1 — Configurable `protocolTimeout` (quick, low-risk) + +1. Add `protocolTimeout` to `DEFAULT_CONFIG` in `src/config.js`. Default to a + safe value above the current implicit one, e.g. `300000` (5 min). Document + that `0` means "no timeout" (puppeteer treats `0` as unlimited). +2. Pass it through to `puppeteer.launch(...)` in `src/index.js:27`: + ```js + browser = await puppeteer.launch({ + headless: config.headless, + args: config.puppeteerArgs, + protocolTimeout: config.protocolTimeout, + }); + ``` +3. Update `twd.config.example.json`, `README.md`, and `CLAUDE.md` to document + the new field and explain *why* it exists (whole-suite-in-one-evaluate). + +**Acceptance:** a consumer can set `"protocolTimeout": 600000` in +`twd.config.json` and a 200s suite passes in CI. + +### Phase 2 — Run tests incrementally, not in one `page.evaluate` (durable) + +The deeper issue is that one slow run blows the whole batch and emits no +streamed output. Drive the in-browser `TestRunner` test-by-test (or in bounded +batches) from Node, so each `page.evaluate` is short and bounded by the +per-test `timeout`, not the suite total. + +Sketch: +- Expose the test list from the browser first: + `const tests = await page.evaluate(() => window.__testRunner.list())` + (add a `list()`/`getTests()` accessor to the runner in `twd-js` if absent). +- Loop in Node, evaluating one test (or a batch of N) per call, collecting + `testStatus` incrementally and printing results as they complete (matches the + streamed output `twd-relay` already gives). +- Each call is now bounded by per-test time, so `protocolTimeout` only has to + cover the slowest single test/batch, not the whole suite. + +Benefits: no whole-suite loss on timeout, live progress in CI logs, natural +place to enforce a real per-test timeout and to short-circuit on first failure +if desired. + +**Acceptance:** CI logs show per-test pass/fail as the run progresses; a single +hanging test fails only that test (or that batch) instead of the entire run. + +## Out of scope + +- Parallelizing across multiple pages/browsers. +- Changing `retryCount` semantics. + +## Notes for the implementer + +- `src/index.js:58-82` is the single `page.evaluate` to refactor for Phase 2. +- `src/config.js` `DEFAULT_CONFIG` (lines 4-13) is where the new field lands. +- Keep Phase 1 even after Phase 2 ships — a generous `protocolTimeout` is still + a sane safety net for the per-batch calls. +- Tests for this repo live under `tests/`; add config-merge coverage for the new + field and, for Phase 2, a fixture app with an artificially slow test. + + diff --git a/src/config.js b/src/config.js index bd6a959..9d1ad5c 100644 --- a/src/config.js +++ b/src/config.js @@ -10,6 +10,7 @@ const DEFAULT_CONFIG = { headless: true, puppeteerArgs: ['--no-sandbox', '--disable-setuid-sandbox'], retryCount: 2, + protocolTimeout: 300000, }; export function loadConfig() { diff --git a/src/index.js b/src/index.js index 35ba939..dc2c5ae 100644 --- a/src/index.js +++ b/src/index.js @@ -9,6 +9,14 @@ import { generateContractMarkdown } from './contractMarkdown.js'; import { buildTestPath } from './buildTestPath.js'; import { formatTestSummary, formatFailedTestsBlock } from './testSummary.js'; +function isProtocolTimeout(error) { + const message = error && error.message ? error.message : ''; + return ( + (error && error.name === 'ProtocolError' && /timed out/i.test(message)) || + /protocolTimeout/i.test(message) + ); +} + export async function runTests() { let browser; try { @@ -27,6 +35,7 @@ export async function runTests() { browser = await puppeteer.launch({ headless: config.headless, args: config.puppeteerArgs, + protocolTimeout: config.protocolTimeout, }); const page = await browser.newPage(); @@ -173,6 +182,13 @@ export async function runTests() { } catch (error) { console.error('Error running tests:', error); + if (isProtocolTimeout(error)) { + console.error( + '\nThis looks like a Puppeteer protocolTimeout. The whole test suite runs in a single\n' + + 'page.evaluate call, so the run aborts if it takes longer than "protocolTimeout" (ms).\n' + + 'Raise it in twd.config.json, e.g. { "protocolTimeout": 600000 } (0 = no timeout).' + ); + } if (browser) await browser.close(); throw error; } diff --git a/tests/config.test.js b/tests/config.test.js index a3a1b50..45bbdda 100644 --- a/tests/config.test.js +++ b/tests/config.test.js @@ -32,6 +32,7 @@ describe('loadConfig', () => { headless: true, puppeteerArgs: ['--no-sandbox', '--disable-setuid-sandbox'], retryCount: 2, + protocolTimeout: 300000, }); expect(fs.existsSync).toHaveBeenCalledWith(path.resolve(mockCwd, 'twd.config.json')); }); @@ -56,6 +57,7 @@ describe('loadConfig', () => { headless: true, puppeteerArgs: ['--no-sandbox', '--disable-setuid-sandbox'], retryCount: 2, + protocolTimeout: 300000, }); expect(fs.readFileSync).toHaveBeenCalledWith( path.resolve(mockCwd, 'twd.config.json'), @@ -73,6 +75,7 @@ describe('loadConfig', () => { headless: false, puppeteerArgs: ['--disable-dev-shm-usage'], retryCount: 3, + protocolTimeout: 600000, }; vi.mocked(fs.existsSync).mockReturnValue(true); @@ -100,6 +103,7 @@ describe('loadConfig', () => { headless: true, puppeteerArgs: ['--no-sandbox', '--disable-setuid-sandbox'], retryCount: 2, + protocolTimeout: 300000, }); expect(consoleWarnSpy).toHaveBeenCalledWith( expect.stringContaining('Warning: Could not parse twd.config.json'), @@ -123,6 +127,15 @@ describe('loadConfig', () => { expect(config.url).toBe('http://localhost:5173'); }); + it('should default protocolTimeout and allow user override', () => { + vi.mocked(fs.existsSync).mockReturnValue(false); + expect(loadConfig().protocolTimeout).toBe(300000); + + vi.mocked(fs.existsSync).mockReturnValue(true); + vi.mocked(fs.readFileSync).mockReturnValue(JSON.stringify({ protocolTimeout: 0 })); + expect(loadConfig().protocolTimeout).toBe(0); + }); + it('should handle partial user config correctly', () => { const userConfig = { headless: false, diff --git a/tests/runTests.test.js b/tests/runTests.test.js index 4b25f31..960fc64 100644 --- a/tests/runTests.test.js +++ b/tests/runTests.test.js @@ -81,6 +81,41 @@ describe("runTests", () => { expect(page.evaluate).toHaveBeenCalledWith(expect.any(Function), 3); }); + it("should pass protocolTimeout to puppeteer.launch", async () => { + const testStatus = [{ id: '1', status: 'pass' }]; + const handlers = [{ id: '1', name: 'test1', type: 'test' }]; + const page = createMockPage({ handlers, testStatus }); + const browser = createMockBrowser(page); + vi.mocked(puppeteer.launch).mockResolvedValue(browser); + vi.mocked(loadConfig).mockReturnValue({ + ...defaultMockConfig, + protocolTimeout: 600000, + }); + + await runTests(); + + expect(puppeteer.launch).toHaveBeenCalledWith( + expect.objectContaining({ protocolTimeout: 600000 }) + ); + }); + + it("should print a protocolTimeout hint when the run aborts on timeout", async () => { + const errorSpy = vi.spyOn(console, 'error').mockImplementation(() => {}); + const page = createMockPage({}); + const timeoutError = new Error('Runtime.callFunctionOn timed out.'); + timeoutError.name = 'ProtocolError'; + page.evaluate = vi.fn().mockRejectedValue(timeoutError); + const browser = createMockBrowser(page); + vi.mocked(puppeteer.launch).mockResolvedValue(browser); + + await expect(runTests()).rejects.toThrow('timed out'); + + const errors = errorSpy.mock.calls.map((c) => String(c[0])); + expect(errors.some((e) => e.includes('protocolTimeout'))).toBe(true); + expect(browser.close).toHaveBeenCalled(); + errorSpy.mockRestore(); + }); + it("should log retry summary when tests were retried", async () => { const testStatus = [ { id: '1', status: 'pass', retryAttempt: 2 }, diff --git a/twd.config.example.json b/twd.config.example.json index c5f8b20..0d7e2da 100644 --- a/twd.config.example.json +++ b/twd.config.example.json @@ -5,5 +5,6 @@ "coverageDir": "./coverage", "nycOutputDir": "./.nyc_output", "headless": true, - "puppeteerArgs": ["--no-sandbox", "--disable-setuid-sandbox"] + "puppeteerArgs": ["--no-sandbox", "--disable-setuid-sandbox"], + "protocolTimeout": 300000 }