From 60ee49ce0ea513597309bf4ce9cacdc848c9ca1d Mon Sep 17 00:00:00 2001 From: StoneHanaMori Date: Thu, 21 May 2026 23:23:51 +0800 Subject: [PATCH 1/4] fix: make Codex SkillClaw profile opt-in --- skillclaw/claw_adapter.py | 75 ++++++++++++++++++++----- tests/test_codex_profile_integration.py | 57 +++++++++++++++++++ 2 files changed, 117 insertions(+), 15 deletions(-) create mode 100644 tests/test_codex_profile_integration.py diff --git a/skillclaw/claw_adapter.py b/skillclaw/claw_adapter.py index a45447a..c9da57e 100644 --- a/skillclaw/claw_adapter.py +++ b/skillclaw/claw_adapter.py @@ -6,7 +6,7 @@ openclaw — runs `openclaw config set …` + `openclaw gateway restart` opencode — patches ~/.config/opencode/opencode.json to register SkillClaw provider hermes — patches ~/.hermes/config.yaml to point model traffic at SkillClaw - codex — patches ~/.codex/config.toml to register SkillClaw as a provider + codex — patches ~/.codex/config.toml to register an opt-in SkillClaw profile claude — patches ~/.claude/settings.json to route Anthropic traffic via SkillClaw qwenpaw — patches QwenPaw model config, selects SkillClaw as active model ironclaw — patches ~/.ironclaw/.env, runs `ironclaw service restart` @@ -328,6 +328,30 @@ def _upsert_top_level_toml_keys(text: str, updates: dict[str, object]) -> str: return "\n".join(merged).rstrip() + "\n" +def _remove_top_level_toml_keys(text: str, keys: set[str]) -> str: + """Remove selected top-level assignments before the first TOML table.""" + lines = text.splitlines() + first_table_index = len(lines) + for idx, line in enumerate(lines): + stripped = line.strip() + if stripped.startswith("[") and stripped.endswith("]"): + first_table_index = idx + break + + preamble = lines[:first_table_index] + remainder = lines[first_table_index:] + kept: list[str] = [] + for line in preamble: + stripped = line.strip() + if stripped.startswith("#") or "=" not in stripped: + kept.append(line) + continue + key = stripped.split("=", 1)[0].strip() + if key not in keys: + kept.append(line) + return "\n".join(kept + remainder).rstrip() + "\n" + + def _remove_toml_table(text: str, table_name: str) -> str: """Remove a TOML table and its body, if present.""" lines = text.splitlines() @@ -630,8 +654,21 @@ def _build_codex_provider_block(base_url: str, api_key: str) -> str: return "\n".join(lines) + "\n" +def _build_codex_profile_block(model_id: str) -> str: + lines = [ + "[profiles.skillclaw]", + f"model = {_format_toml_value(model_id)}", + 'model_provider = "skillclaw"', + ] + return "\n".join(lines) + "\n" + + def _configure_codex(cfg: "SkillClawConfig") -> None: - """Auto-configure Codex CLI to use the SkillClaw proxy.""" + """Register SkillClaw as an opt-in Codex profile. + + Do not change Codex's global ``model`` / ``model_provider`` defaults. + Users opt in explicitly with ``codex --profile skillclaw``. + """ model_id = cfg.served_model_name or cfg.llm_model_id or "skillclaw-model" api_key = cfg.proxy_api_key or "skillclaw" base_url = f"http://127.0.0.1:{cfg.proxy_port}/v1" @@ -645,15 +682,13 @@ def _configure_codex(cfg: "SkillClawConfig") -> None: except Exception as e: logger.warning("[ClawAdapter] Failed to read Codex config %s: %s", config_path, e) - updated = _upsert_top_level_toml_keys( - existing_text, - { - "model": model_id, - "model_provider": "skillclaw", - }, - ) + updated = existing_text + if str(_extract_top_level_toml_value(updated, "model_provider") or "") == "skillclaw": + updated = _remove_top_level_toml_keys(updated, {"model", "model_provider"}) updated = _remove_toml_table(updated, "model_providers.skillclaw").rstrip() + "\n\n" + updated = _remove_toml_table(updated, "profiles.skillclaw").rstrip() + "\n\n" updated += _build_codex_provider_block(base_url, api_key) + updated += "\n" + _build_codex_profile_block(model_id) _backup_codex_config_if_changed(config_path, updated) _write_text_atomic(config_path, updated, "Codex config") @@ -683,10 +718,13 @@ def inspect_codex_config(cfg: "SkillClawConfig") -> dict[str, object]: configured_base_url = str(provider_cfg.get("base_url") or "") configured_wire_api = str(provider_cfg.get("wire_api") or "") configured_token = str(provider_cfg.get("experimental_bearer_token") or "") + profile_cfg = _extract_toml_table(text, "profiles.skillclaw") + configured_profile_model = str(profile_cfg.get("model") or "") + configured_profile_provider = str(profile_cfg.get("model_provider") or "") proxy_match = ( - configured_model == expected_model - and configured_provider == "skillclaw" + configured_profile_model == expected_model + and configured_profile_provider == "skillclaw" and configured_base_url == expected_base_url and configured_wire_api == "responses" and configured_token == expected_api_key @@ -696,7 +734,8 @@ def inspect_codex_config(cfg: "SkillClawConfig") -> dict[str, object]: skills_dir_match = configured_skillclaw_skills_dir == expected_skills_dir issues: list[str] = [] notes: list[str] = [ - "Codex uses the OpenAI Responses-compatible SkillClaw endpoint via `model_providers.skillclaw`.", + "Codex can opt into SkillClaw with `codex --profile skillclaw`.", + "SkillClaw registers a Codex profile and does not change Codex's global model defaults.", "Codex session boundaries fall back to proxy-side heuristics because" " Codex does not send SkillClaw session headers.", ] @@ -705,8 +744,11 @@ def inspect_codex_config(cfg: "SkillClawConfig") -> dict[str, object]: if not config_path.exists(): issues.append("Codex config is missing: ~/.codex/config.toml") if not proxy_match: - issues.append("Codex model routing is not pointing at the local SkillClaw proxy.") - next_steps.append("Start SkillClaw once with `claw_type=codex` so it can rewrite ~/.codex/config.toml.") + issues.append("Codex SkillClaw profile is missing or not pointing at the local SkillClaw proxy.") + next_steps.append("Start SkillClaw once with `claw_type=codex` so it can register ~/.codex/config.toml.") + if configured_provider == "skillclaw": + issues.append("Codex global model_provider still points at SkillClaw; normal Codex runs may be intercepted.") + next_steps.append("Remove top-level `model_provider = \"skillclaw\"` or run `skillclaw restore codex`.") if not expected_skills_dir.is_dir(): issues.append(f"Codex skills directory is missing: {expected_skills_dir}") next_steps.append(f"Create or prepare the Codex skills directory: {expected_skills_dir}") @@ -723,9 +765,12 @@ def inspect_codex_config(cfg: "SkillClawConfig") -> dict[str, object]: "status": "ok" if not issues else "warning", "config_path": str(config_path), "config_exists": config_path.exists(), - "integration_scope": "codex-only", + "integration_scope": "codex-profile-only", "expected_model": expected_model, "configured_model": configured_model or "(unset)", + "expected_profile": "skillclaw", + "configured_profile_model": configured_profile_model or "(unset)", + "configured_profile_provider": configured_profile_provider or "(unset)", "expected_base_url": expected_base_url, "configured_base_url": configured_base_url or "(unset)", "configured_provider": configured_provider or "(unset)", diff --git a/tests/test_codex_profile_integration.py b/tests/test_codex_profile_integration.py new file mode 100644 index 0000000..5be83d1 --- /dev/null +++ b/tests/test_codex_profile_integration.py @@ -0,0 +1,57 @@ +from __future__ import annotations + +from pathlib import Path + +from skillclaw import claw_adapter +from skillclaw.config import SkillClawConfig + + +def test_configure_codex_registers_profile_without_replacing_global_defaults(monkeypatch, tmp_path: Path) -> None: + config_path = tmp_path / ".codex" / "config.toml" + config_path.parent.mkdir(parents=True) + config_path.write_text( + 'model = "gpt-5.5"\nmodel_provider = "openai"\n\n[profiles.default]\nmodel = "gpt-5.5"\n', + encoding="utf-8", + ) + monkeypatch.setattr(claw_adapter, "_CODEX_CONFIG_PATH", config_path) + monkeypatch.setattr(claw_adapter, "_CODEX_SKILLS_DIR", tmp_path / ".codex" / "skills") + monkeypatch.setattr(claw_adapter, "_CODEX_BACKUP_DIR", tmp_path / "backups") + + claw_adapter._configure_codex( + SkillClawConfig( + served_model_name="skillclaw-model", + proxy_api_key="skillclaw-key", + proxy_port=31000, + ) + ) + + text = config_path.read_text(encoding="utf-8") + assert 'model = "gpt-5.5"' in text + assert 'model_provider = "openai"' in text + assert "[model_providers.skillclaw]" in text + assert 'base_url = "http://127.0.0.1:31000/v1"' in text + assert 'wire_api = "responses"' in text + assert 'experimental_bearer_token = "skillclaw-key"' in text + assert "[profiles.skillclaw]" in text + assert 'model = "skillclaw-model"' in text + assert 'model_provider = "skillclaw"' in text + assert (tmp_path / ".codex" / "skills").is_dir() + + +def test_configure_codex_removes_legacy_global_skillclaw_defaults(monkeypatch, tmp_path: Path) -> None: + config_path = tmp_path / ".codex" / "config.toml" + config_path.parent.mkdir(parents=True) + config_path.write_text( + 'model = "skillclaw-model"\nmodel_provider = "skillclaw"\n\n[profiles.default]\nmodel = "gpt-5.5"\n', + encoding="utf-8", + ) + monkeypatch.setattr(claw_adapter, "_CODEX_CONFIG_PATH", config_path) + monkeypatch.setattr(claw_adapter, "_CODEX_SKILLS_DIR", tmp_path / ".codex" / "skills") + monkeypatch.setattr(claw_adapter, "_CODEX_BACKUP_DIR", tmp_path / "backups") + + claw_adapter._configure_codex(SkillClawConfig(served_model_name="skillclaw-model")) + + top_level = config_path.read_text(encoding="utf-8").split("[", 1)[0] + assert "model_provider" not in top_level + assert "model =" not in top_level + assert "[profiles.skillclaw]" in config_path.read_text(encoding="utf-8") From 8e4075d83f550ab15fb3455dddbb210bf1818d38 Mon Sep 17 00:00:00 2001 From: StoneHanaMori Date: Thu, 21 May 2026 23:24:30 +0800 Subject: [PATCH 2/4] fix: default Codex profile to Responses mode --- assets/README_ZH.md | 2 +- skillclaw/config_store.py | 15 +++++++++++++-- skillclaw/setup_wizard.py | 16 +++++++++++++--- tests/test_codex_profile_integration.py | 19 +++++++++++++++++++ 4 files changed, 46 insertions(+), 6 deletions(-) diff --git a/assets/README_ZH.md b/assets/README_ZH.md index 312b26b..670da41 100644 --- a/assets/README_ZH.md +++ b/assets/README_ZH.md @@ -211,7 +211,7 @@ skillclaw setup 第一次最小化验证时,推荐这样选: - `CLI agent` 选 `none`,先不要自动改外部 agent 配置 -- `skills` 目录保持默认值 `~/.skillclaw/skills`;如果你选了 Hermes,默认技能库会变成 `~/.hermes/skills` +- `skills` 目录保持默认值 `~/.skillclaw/skills`;如果你选了 Hermes、Codex 或 Claude Code,默认技能库会变成 `~/.hermes/skills`、`~/.codex/skills` 或 `~/.claude/skills` - 如果你只是想先验证代理能不能正常用,可以先关闭 shared storage - 如果你后面想在同一台机器上继续跑本地 evolver 闭环,就把 shared storage 打开并选 `local` backend,例如 `~/.skillclaw/local-share` - 如果你想先把成本压到最低,可以先关闭 PRM diff --git a/skillclaw/config_store.py b/skillclaw/config_store.py index 1b3a8f2..7273766 100644 --- a/skillclaw/config_store.py +++ b/skillclaw/config_store.py @@ -19,6 +19,10 @@ _DEFAULT_CODEX_SKILLS_DIR = Path.home() / ".codex" / "skills" _DEFAULT_CLAUDE_SKILLS_DIR = Path.home() / ".claude" / "skills" _DEFAULT_OPENCODE_SKILLS_DIR = Path.home() / ".config" / "opencode" / "skills" +_DEFAULT_LLM_API_MODE_BY_CLAW = { + "codex": "responses", +} +_FALLBACK_LLM_API_MODE = "chat" _DEFAULTS: dict = { "llm": { @@ -161,6 +165,12 @@ def default_skills_dir_for_claw(claw_type: str) -> Path: return _DEFAULT_SKILLS_DIR +def default_llm_api_mode_for_claw(claw_type: str) -> str: + """Return the default upstream API mode for the selected agent.""" + normalized = str(claw_type or "").strip().lower() + return _DEFAULT_LLM_API_MODE_BY_CLAW.get(normalized, _FALLBACK_LLM_API_MODE) + + def resolve_skills_dir(skills_dir: Any, *, claw_type: str) -> str: """Normalize a configured skills dir, applying agent-native defaults. @@ -254,13 +264,14 @@ def to_skillclaw_config(self) -> SkillClawConfig: llm_api_base = llm.get("api_base", "") llm_api_key = llm.get("api_key", "") llm_model_id = llm.get("model_id", "") - llm_api_mode = str(llm.get("api_mode", "chat") or "chat") + raw_claw_type = str(data.get("claw_type", "openclaw") or "openclaw") + default_api_mode = default_llm_api_mode_for_claw(raw_claw_type) + llm_api_mode = str(llm.get("api_mode", default_api_mode) or default_api_mode) proxy = data.get("proxy", {}) skills = data.get("skills", {}) orouter = data.get("openrouter", {}) prm = data.get("prm", {}) configure_openclaw = bool(data.get("configure_openclaw", True)) - raw_claw_type = str(data.get("claw_type", "openclaw") or "openclaw") if not configure_openclaw: raw_claw_type = "none" diff --git a/skillclaw/setup_wizard.py b/skillclaw/setup_wizard.py index 1197865..99131bb 100644 --- a/skillclaw/setup_wizard.py +++ b/skillclaw/setup_wizard.py @@ -8,7 +8,7 @@ from pathlib import Path from .claw_adapter import CLAW_TYPES -from .config_store import CONFIG_DIR, ConfigStore, resolve_skills_dir +from .config_store import CONFIG_DIR, ConfigStore, default_llm_api_mode_for_claw, resolve_skills_dir _PROVIDER_PRESETS = { "kimi": { @@ -202,7 +202,13 @@ def run(self): f"Recommended directory: {default_skills_dir}" ) elif claw_type == "codex": - print(f"Codex reads native skills from ~/.codex/skills.\nRecommended directory: {default_skills_dir}") + print( + "Codex will get a SkillClaw profile without changing its global defaults.\n" + "After starting SkillClaw, run: codex --profile skillclaw\n" + "Normal `codex` runs remain unchanged.\n" + "Codex reads native skills from ~/.codex/skills.\n" + f"Recommended directory: {default_skills_dir}" + ) elif claw_type == "claude": print( f"Claude Code reads native skills from ~/.claude/skills.\nRecommended directory: {default_skills_dir}" @@ -343,7 +349,8 @@ def run(self): proxy_config["port"] = proxy_port proxy_config.setdefault("host", "0.0.0.0") proxy_config["served_model_name"] = served_model_name or "skillclaw-model" - llm_api_mode = str(current_llm.get("api_mode", "chat") or "chat") + default_api_mode = default_llm_api_mode_for_claw(claw_type) + llm_api_mode = str(current_llm.get("api_mode", default_api_mode) or default_api_mode) data = { "claw_type": claw_type, "llm": { @@ -375,4 +382,7 @@ def run(self): print(f"\nConfig saved to: {cs.config_file}") print("\nRun 'skillclaw start' to launch SkillClaw.") + if claw_type == "codex": + print("Then run 'codex --profile skillclaw' to use Codex through SkillClaw.") + print("Use 'skillclaw doctor codex' if the profile does not work as expected.") print("=" * 60 + "\n") diff --git a/tests/test_codex_profile_integration.py b/tests/test_codex_profile_integration.py index 5be83d1..0f38b47 100644 --- a/tests/test_codex_profile_integration.py +++ b/tests/test_codex_profile_integration.py @@ -4,6 +4,7 @@ from skillclaw import claw_adapter from skillclaw.config import SkillClawConfig +from skillclaw.config_store import ConfigStore def test_configure_codex_registers_profile_without_replacing_global_defaults(monkeypatch, tmp_path: Path) -> None: @@ -55,3 +56,21 @@ def test_configure_codex_removes_legacy_global_skillclaw_defaults(monkeypatch, t assert "model_provider" not in top_level assert "model =" not in top_level assert "[profiles.skillclaw]" in config_path.read_text(encoding="utf-8") + + +def test_codex_config_defaults_to_responses_mode_and_codex_skills(tmp_path: Path) -> None: + store = ConfigStore(tmp_path / "config.yaml") + store.save( + { + "claw_type": "codex", + "llm": {"provider": "openai", "api_base": "http://upstream.test/v1", "model_id": "upstream"}, + "proxy": {"served_model_name": "skillclaw-model"}, + "skills": {"enabled": True}, + "prm": {"enabled": False}, + } + ) + + cfg = store.to_skillclaw_config() + + assert cfg.llm_api_mode == "responses" + assert cfg.skills_dir.endswith(".codex/skills") From b0882ed4e8e06cd104960ce12496be1d7772c4e5 Mon Sep 17 00:00:00 2001 From: StoneHanaMori Date: Thu, 21 May 2026 23:25:23 +0800 Subject: [PATCH 3/4] fix: inject skills in native Codex responses flow --- skillclaw/api_server.py | 32 +++++++++++++++++++ tests/test_codex_profile_integration.py | 41 +++++++++++++++++++++++++ 2 files changed, 73 insertions(+) diff --git a/skillclaw/api_server.py b/skillclaw/api_server.py index 43ec957..fa4fb21 100644 --- a/skillclaw/api_server.py +++ b/skillclaw/api_server.py @@ -1623,6 +1623,8 @@ async def responses( body = await request.json() if owner._responses_native_enabled(): + turn_type = _resolve_turn_type(x_turn_type, body.get("turn_type"), default="main") + body = owner._prepare_native_responses_body(body, turn_type=turn_type) if bool(body.get("stream", False)): return StreamingResponse( owner._stream_llm_responses(body), @@ -2581,6 +2583,36 @@ def _prepare_responses_forward( headers["Authorization"] = f"Bearer {self.config.llm_api_key}" return f"{api_base}/responses", send_body, headers + def _prepare_native_responses_body(self, body: dict[str, Any], *, turn_type: str) -> dict[str, Any]: + """Apply non-destructive SkillClaw hooks before native Responses forwarding.""" + prepared = dict(body) + if not self.skill_manager or turn_type != "main": + return prepared + + try: + self.skill_manager.refresh_if_changed() + except Exception as e: + logger.warning("[SkillManager] failed to refresh local skills: %s", e) + + skill_text = self.skill_manager.build_injection_prompt( + max_chars=getattr(self.config, "max_skills_prompt_chars", 30_000), + ) + if not skill_text: + return prepared + + all_skills = self.skill_manager.get_all_skills() + skill_names = [s.get("name", "unknown_skill") for s in all_skills if isinstance(s, dict)] + logger.info( + "[SkillManager] listing %d skills in Codex Responses instructions: %s", + len(skill_names), + ", ".join(skill_names)[:400], + ) + self.skill_manager.record_injection(skill_names) + + existing = _normalize_responses_content(prepared.get("instructions", "")) + prepared["instructions"] = (existing + "\n\n" + skill_text).strip() if existing else skill_text + return prepared + async def _forward_to_llm_responses(self, body: dict[str, Any]) -> dict[str, Any]: """Forward a Codex Responses payload to an upstream Responses API.""" import httpx diff --git a/tests/test_codex_profile_integration.py b/tests/test_codex_profile_integration.py index 0f38b47..832c365 100644 --- a/tests/test_codex_profile_integration.py +++ b/tests/test_codex_profile_integration.py @@ -3,10 +3,28 @@ from pathlib import Path from skillclaw import claw_adapter +from skillclaw.api_server import SkillClawAPIServer from skillclaw.config import SkillClawConfig from skillclaw.config_store import ConfigStore +class FakeSkillManager: + def __init__(self) -> None: + self.injected = [] + + def refresh_if_changed(self) -> None: + return None + + def build_injection_prompt(self, max_chars: int = 30_000) -> str: + return "demo" + + def get_all_skills(self) -> list[dict]: + return [{"name": "demo"}] + + def record_injection(self, names: list[str]) -> None: + self.injected.append(list(names)) + + def test_configure_codex_registers_profile_without_replacing_global_defaults(monkeypatch, tmp_path: Path) -> None: config_path = tmp_path / ".codex" / "config.toml" config_path.parent.mkdir(parents=True) @@ -74,3 +92,26 @@ def test_codex_config_defaults_to_responses_mode_and_codex_skills(tmp_path: Path assert cfg.llm_api_mode == "responses" assert cfg.skills_dir.endswith(".codex/skills") + + +def test_native_responses_body_injects_skills_without_dropping_codex_tools() -> None: + server = object.__new__(SkillClawAPIServer) + server.config = SkillClawConfig(max_skills_prompt_chars=10_000) + server.skill_manager = FakeSkillManager() + custom_tool = {"type": "custom", "name": "apply_patch"} + namespace_tool = {"type": "namespace", "name": "mcp__github__"} + body = { + "instructions": "base instructions", + "input": "hi", + "tools": [custom_tool, namespace_tool], + "tool_choice": {"type": "custom", "name": "apply_patch"}, + } + + prepared = server._prepare_native_responses_body(body, turn_type="main") + + assert prepared is not body + assert prepared["tools"] == [custom_tool, namespace_tool] + assert prepared["tool_choice"] == {"type": "custom", "name": "apply_patch"} + assert prepared["instructions"].startswith("base instructions") + assert "" in prepared["instructions"] + assert server.skill_manager.injected == [["demo"]] From ec6f2e77db7c5349ea9c836549d1be9a8f171be3 Mon Sep 17 00:00:00 2001 From: StoneHanaMori Date: Fri, 22 May 2026 00:15:37 +0800 Subject: [PATCH 4/4] refactor: remove tokenizer sample pipeline --- pyproject.toml | 6 +- skillclaw/api_server.py | 435 ++++++--------------------- skillclaw/data_formatter.py | 23 -- tests/test_anthropic_messages_api.py | 11 +- tests/test_responses_native.py | 9 +- 5 files changed, 99 insertions(+), 385 deletions(-) delete mode 100644 skillclaw/data_formatter.py diff --git a/pyproject.toml b/pyproject.toml index 3a4a070..8e26f57 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,10 +17,6 @@ dependencies = [ ] [project.optional-dependencies] -# Tokenizer for prompt truncation and PRM/OPD token-level data -tokenizer = [ - "transformers>=4.51.1", -] # Embedding-based skill retrieval embedding = [ "numpy", @@ -44,7 +40,7 @@ server = [ ] # Everything all = [ - "skillclaw[tokenizer,embedding,evolve,sharing,server]", + "skillclaw[embedding,evolve,sharing,server]", ] [project.scripts] diff --git a/skillclaw/api_server.py b/skillclaw/api_server.py index fa4fb21..52640a3 100644 --- a/skillclaw/api_server.py +++ b/skillclaw/api_server.py @@ -20,7 +20,6 @@ import time from contextlib import asynccontextmanager from datetime import datetime, timezone -from itertools import count from typing import Any, Optional import uvicorn @@ -28,7 +27,6 @@ from fastapi.responses import JSONResponse, StreamingResponse from .config import SkillClawConfig -from .data_formatter import ConversationSample from .prm_scorer import PRMScorer from .protocols import anthropic_messages as anthropic_protocol from .protocols import openai_responses as responses_protocol @@ -1027,16 +1025,6 @@ def _merge_tool_error_info( ] -def _extract_logprobs_from_chat_response(choice: dict[str, Any]) -> list[float]: - logprobs_obj = choice.get("logprobs") - if not isinstance(logprobs_obj, dict): - return [] - content = logprobs_obj.get("content") - if not isinstance(content, list): - return [] - return [float(item.get("logprob", 0.0)) for item in content if isinstance(item, dict)] - - def _rewrite_new_session_bootstrap_prompt(messages: list[dict]) -> tuple[list[dict], int]: """Rewrite OpenClaw /new bootstrap user prompt to a safer variant. @@ -1227,24 +1215,16 @@ def _token_estimate_text(content: Any) -> str: return str(content) if content is not None else "" -def _estimate_openai_body_input_tokens(tokenizer: Any, openai_body: dict[str, Any]) -> int: +def _estimate_openai_body_input_tokens(openai_body: dict[str, Any]) -> int: + """Return a provider-agnostic rough input token estimate. + + SkillClaw proxies external agents and does not own the upstream model's + exact tokenization. Keep this estimate local and dependency-free so + daemon readiness never depends on model-specific tokenization. + """ messages = list(openai_body.get("messages") or []) tools = openai_body.get("tools") image_tokens = sum(_estimate_image_content_tokens(msg.get("content")) for msg in messages if isinstance(msg, dict)) - if tokenizer is not None: - try: - text = tokenizer.apply_chat_template( - _normalize_messages_for_template(messages), - tools=tools if tools else None, - tokenize=False, - add_generation_prompt=True, - ) - tokenized = tokenizer(text, add_special_tokens=False) - input_ids = tokenized["input_ids"] if isinstance(tokenized, dict) else tokenized.input_ids - return max(0, len(input_ids) + image_tokens) - except Exception: - pass - text_parts = [] for msg in messages: if not isinstance(msg, dict): @@ -1416,7 +1396,7 @@ class SkillClawAPIServer: skill_manager: Optional SkillManager for injecting skills into system prompts. prm_scorer: - Optional PRMScorer. If None, all samples get reward=0. + Optional PRMScorer for turn feedback. """ def __init__( @@ -1446,13 +1426,11 @@ def __init__( self._system_prompt_cache_file = os.path.join(config.record_dir, f"system_prompt_cache_{cache_suffix}.json") # State machines - self._index_counter = count(0) - self._group_counter = count(0) self._turn_counts: dict[str, int] = {} self._pending_turn_data: dict[str, dict[int, dict]] = {} # session → {turn → data} self._prm_tasks: dict[str, dict[int, asyncio.Task]] = {} # session → {turn → task} self._pending_records: dict[str, dict] = {} # for record logging - self._session_effective: dict[str, int] = {} # at-least-one guarantee + self._session_scored_turns: dict[str, int] = {} # session -> finalized PRM turn count self._session_turns: dict[str, list] = {} self._session_last_active: dict[str, float] = {} # session -> unix_ts self._closing_sessions: set[str] = set() # session ids currently being closed @@ -1490,9 +1468,6 @@ def __init__( with open(self._prm_record_file, "w"): pass - # Tokenizer is used for prompt length accounting/truncation and for - # optional tokenized conversation sample export. - self._tokenizer = self._load_tokenizer() self.app = self._build_app() # Threading lifecycle (set by start()) @@ -1501,19 +1476,6 @@ def __init__( self._ready_event = threading.Event() self._server_stopped_event = threading.Event() - # ------------------------------------------------------------------ # - # Tokenizer # - # ------------------------------------------------------------------ # - - def _load_tokenizer(self): - try: - from transformers import AutoTokenizer - - return AutoTokenizer.from_pretrained(self.config.model_name, trust_remote_code=True) - except Exception as e: - logger.warning("[OpenClaw] could not load tokenizer: %s", e) - return None - # ------------------------------------------------------------------ # # FastAPI app # # ------------------------------------------------------------------ # @@ -1734,7 +1696,7 @@ async def anthropic_count_tokens( raw_body = await request.json() openai_body = _anthropic_to_openai_body(raw_body) - input_tokens = _estimate_openai_body_input_tokens(owner._tokenizer, openai_body) + input_tokens = _estimate_openai_body_input_tokens(openai_body) return JSONResponse(content={"input_tokens": input_tokens}) @app.post("/v1/messages") @@ -1899,7 +1861,7 @@ def _collect_active_session_ids(self) -> list[str]: session_ids.update(self._session_turns.keys()) session_ids.update(self._pending_turn_data.keys()) session_ids.update(self._turn_counts.keys()) - session_ids.update(self._session_effective.keys()) + session_ids.update(self._session_scored_turns.keys()) session_ids.update(self._prm_tasks.keys()) return sorted(s for s in session_ids if s and s not in self._closing_sessions) @@ -1973,7 +1935,7 @@ async def _shutdown_cleanup(self) -> None: await self._await_background_tasks(self._shutdown_drain_timeout_seconds) async def _close_session(self, session_id: str, reason: str = "explicit") -> None: - """Flush a session: submit remaining samples, upload session data, clean up state.""" + """Flush a session: finalize pending turn feedback, upload session data, clean up state.""" if not session_id: return if session_id in self._closing_sessions: @@ -1981,35 +1943,51 @@ async def _close_session(self, session_id: str, reason: str = "explicit") -> Non self._closing_sessions.add(session_id) try: self._flush_pending_record(session_id, None) - pending_snapshot = { - turn_num: dict(turn_data) for turn_num, turn_data in self._pending_turn_data.get(session_id, {}).items() - } - self._maybe_submit_ready_samples(session_id, force_last_prm=True) - prm_tasks = list(self._prm_tasks.get(session_id, {}).values()) - if prm_tasks: + pending = self._pending_turn_data.get(session_id, {}) + prm_tasks = self._prm_tasks.setdefault(session_id, {}) + if self.config.use_prm and self.prm_scorer: + for turn_num, turn_data in list(pending.items()): + if turn_num in prm_tasks: + continue + prm_task = asyncio.create_task( + self.prm_scorer.evaluate( + turn_data.get("response_text", ""), + turn_data.get("prompt_text", ""), + session_id=session_id, + turn_num=turn_num, + ) + ) + prm_task.add_done_callback(self._task_done_cb) + prm_task.add_done_callback( + lambda _t, sid=session_id, tnum=turn_num: self._on_prm_done_record_only(sid, tnum, _t) + ) + prm_tasks[turn_num] = prm_task + active_prm_tasks = list(prm_tasks.values()) + if active_prm_tasks: try: await asyncio.wait_for( - asyncio.gather(*prm_tasks, return_exceptions=True), + asyncio.gather(*active_prm_tasks, return_exceptions=True), timeout=_SHUTDOWN_DRAIN_TIMEOUT_SECONDS, ) except asyncio.TimeoutError: logger.warning("[SessionDetect] PRM drain timed out for session=%s", session_id) - for turn_num in sorted(pending_snapshot.keys()): - turn_data = pending_snapshot[turn_num] + for turn_num in sorted(list(pending.keys())): + turn_data = pending.pop(turn_num) prm_result = turn_data.pop("prm_result", None) - prm_task = self._prm_tasks.get(session_id, {}).get(turn_num) + prm_task = prm_tasks.get(turn_num) if prm_result is None and prm_task is not None and prm_task.done(): try: prm_result = prm_task.result() except (asyncio.CancelledError, Exception): prm_result = None - await self._submit_turn_sample( + prm_tasks.pop(turn_num, None) + await self._finalize_turn_feedback( turn_num, turn_data, session_id, prm_result, ) - eff = self._session_effective.pop(session_id, 0) + eff = self._session_scored_turns.pop(session_id, 0) self._turn_counts.pop(session_id, None) self._pending_turn_data.pop(session_id, None) prm_tasks = self._prm_tasks.pop(session_id, {}) @@ -2017,7 +1995,7 @@ async def _close_session(self, session_id: str, reason: str = "explicit") -> Non if isinstance(task, asyncio.Task) and not task.done(): task.cancel() logger.info( - "[SessionDetect] closed session=%s reason=%s (effective_samples=%d)", + "[SessionDetect] closed session=%s reason=%s (scored_turns=%d)", session_id, reason, eff, @@ -2131,7 +2109,7 @@ def _fire_prm_scoring( response_text: str, instruction_text: str, next_state, - submit_ready_samples: bool = True, + finalize_ready_turns: bool = True, ): if not self.prm_scorer or not next_state: return @@ -2140,10 +2118,10 @@ def _fire_prm_scoring( self.prm_scorer.evaluate(response_text, inst_text, session_id=session_id, turn_num=turn_num) ) task.add_done_callback(self._task_done_cb) - if submit_ready_samples: + if finalize_ready_turns: task.add_done_callback(lambda _t: self._on_prm_done(session_id, turn_num, _t)) else: - task.add_done_callback(lambda _t: self._on_prm_done_without_submit(session_id, turn_num, _t)) + task.add_done_callback(lambda _t: self._on_prm_done_record_only(session_id, turn_num, _t)) self._prm_tasks.setdefault(session_id, {})[turn_num] = task td = self._pending_turn_data.get(session_id, {}).get(turn_num) if td is not None: @@ -2184,9 +2162,9 @@ def _on_prm_done(self, session_id: str, turn_num: int, task: asyncio.Task): self._apply_prm_result(session_id, turn_num, prm_result) if session_id in self._closing_sessions: return - self._maybe_submit_ready_samples(session_id) + self._maybe_finalize_ready_turns(session_id) - def _on_prm_done_without_submit(self, session_id: str, turn_num: int, task: asyncio.Task): + def _on_prm_done_record_only(self, session_id: str, turn_num: int, task: asyncio.Task): """Callback used for close-session PRM tasks; records score only.""" if task.cancelled(): return @@ -2240,17 +2218,7 @@ async def _handle_request( logger.info("[OpenClaw] rewrote %d /new bootstrap user prompt(s) for provider safety", rewritten) def _prompt_len(msgs): - try: - norm_msgs = _normalize_messages_for_template(msgs) - text = self._tokenizer.apply_chat_template( - norm_msgs, - tools=body.get("tools"), - tokenize=False, - add_generation_prompt=True, - ) - return len(self._tokenizer(text, add_special_tokens=False)["input_ids"]) - except Exception: - return 0 + return _estimate_openai_body_input_tokens({"messages": msgs, "tools": body.get("tools")}) # Compress verbose system prompts (OpenClaw only). Non-OpenClaw # agents send short or no system prompts; compressing them wastes an @@ -2315,8 +2283,6 @@ def _prompt_len(msgs): forward_body = {k: v for k, v in body.items() if k not in _NON_STANDARD_BODY_KEYS} forward_body["stream"] = False forward_body.pop("stream_options", None) - forward_body["logprobs"] = True - forward_body["top_logprobs"] = 1 if "model" not in forward_body: forward_body["model"] = self._served_model forward_body["messages"] = messages # potentially skill-injected @@ -2383,10 +2349,6 @@ def _prompt_len(msgs): if response_msg.get("content") is None: response_msg["content"] = "" - norm_msgs = _normalize_messages_for_template(messages) - norm_resp = _normalize_messages_for_template([response_msg])[0] - full_norm = norm_msgs + [norm_resp] - skill_path_map = self.skill_manager.get_skill_path_map() if self.skill_manager else {} read_skills = _extract_read_skills_from_tool_calls( tool_calls, @@ -2411,102 +2373,12 @@ def _prompt_len(msgs): ) user_instruction = _extract_last_user_instruction(messages) - - if self._tokenizer is None: - self._turn_counts[session_id] = self._turn_counts.get(session_id, 0) + 1 - turn_num = self._turn_counts[session_id] - prompt_text_simple = "\n".join( - f"{m.get('role', '?')}: {_flatten_message_content(m.get('content', ''))}" for m in messages - ) - response_text_simple = content or (json.dumps(tool_calls, ensure_ascii=False) if tool_calls else "") - self._buffer_record( - session_id, - turn_num, - messages, - prompt_text_simple, - response_text_simple, - tool_calls, - ) - self._session_turns.setdefault(session_id, []).append( - { - "turn_num": turn_num, - "prompt_text": user_instruction, - "response_text": response_text_simple, - "reasoning_content": reasoning or None, - "tool_calls": tool_calls, - "read_skills": read_skills, - "modified_skills": modified_skills, - "tool_results": tool_summaries, - "tool_results_raw": [], - "tool_observations": [], - "tool_errors": [], - "injected_skills": injected_skills, - "prm_score": None, - } - ) - self._pending_turn_data.setdefault(session_id, {})[turn_num] = { - "prompt_ids": [], - "response_ids": [], - "response_logprobs": [], - "prompt_text": prompt_text_simple, - "response_text": response_text_simple, - } - if session_done: - await self._close_session(session_id) - output["session_id"] = session_id - return {"response": output} - - prompt_text = self._tokenizer.apply_chat_template( - norm_msgs, - tools=tools, - tokenize=False, - add_generation_prompt=True, - ) - full_text = self._tokenizer.apply_chat_template( - full_norm, - tools=tools, - tokenize=False, - add_generation_prompt=False, - ) - - if full_text.startswith(prompt_text): - response_text = full_text[len(prompt_text) :] - else: - logger.warning("[OpenClaw] prompt_text not prefix of full_text, using full_text as response") - response_text = full_text - - prompt_ids = self._tokenizer(prompt_text, add_special_tokens=False)["input_ids"] - response_ids = self._tokenizer(response_text, add_special_tokens=False)["input_ids"] - - if not response_ids and not response_text.strip() and not tool_calls: - logger.info("[OpenClaw] MAIN session=%s → empty response, skipping", session_id) - output["session_id"] = session_id - return {"response": output} - - response_logprobs = _extract_logprobs_from_chat_response(choice) - if len(response_logprobs) > len(response_ids): - response_logprobs = response_logprobs[: len(response_ids)] - elif len(response_logprobs) < len(response_ids): - response_logprobs = response_logprobs + [0.0] * (len(response_ids) - len(response_logprobs)) - - turn_data = { - "prompt_ids": prompt_ids, - "response_ids": response_ids, - "response_logprobs": response_logprobs, - "prompt_text": prompt_text, - "response_text": response_text, - } - self._turn_counts[session_id] = self._turn_counts.get(session_id, 0) + 1 turn_num = self._turn_counts[session_id] - - logger.info( - "[OpenClaw] MAIN session=%s turn=%d prompt_tokens=%d response_tokens=%d", - session_id, - turn_num, - len(prompt_ids), - len(response_ids), + prompt_text = "\n".join( + f"{m.get('role', '?')}: {_flatten_message_content(m.get('content', ''))}" for m in messages ) + response_text = content or (json.dumps(tool_calls, ensure_ascii=False) if tool_calls else "") self._buffer_record(session_id, turn_num, messages, prompt_text, response_text, tool_calls) self._session_turns.setdefault(session_id, []).append( { @@ -2525,10 +2397,20 @@ def _prompt_len(msgs): "prm_score": None, } ) - self._pending_turn_data.setdefault(session_id, {})[turn_num] = turn_data - self._maybe_submit_ready_samples(session_id) + self._pending_turn_data.setdefault(session_id, {})[turn_num] = { + "prompt_text": prompt_text, + "response_text": response_text, + } + logger.info( + "[OpenClaw] MAIN session=%s turn=%d prompt_est_tokens=%d response_chars=%d", + session_id, + turn_num, + _estimate_openai_body_input_tokens({"messages": messages, "tools": tools}), + len(response_text), + ) + self._maybe_finalize_ready_turns(session_id) else: - logger.info("[OpenClaw] SIDE session=%s → skipped (no training data)", session_id) + logger.info("[OpenClaw] SIDE session=%s -> skipped (side-channel turn)", session_id) if session_done: await self._close_session(session_id) @@ -2936,26 +2818,10 @@ def _truncate_messages( tools, max_prompt_tokens: int, ) -> list[dict]: - """ - Drop oldest non-system messages until the tokenized prompt fits within - max_prompt_tokens. The system message (if any) is always kept. - At least one user message is always kept even if it alone exceeds the limit. - """ - if self._tokenizer is None: - return messages + """Drop oldest non-system messages using a dependency-free token estimate.""" def _prompt_len(msgs): - try: - norm_msgs = _normalize_messages_for_template(msgs) - text = self._tokenizer.apply_chat_template( - norm_msgs, - tools=tools, - tokenize=False, - add_generation_prompt=True, - ) - return len(self._tokenizer(text, add_special_tokens=False)["input_ids"]) - except Exception: - return 0 + return _estimate_openai_body_input_tokens({"messages": msgs, "tools": tools}) if _prompt_len(messages) <= max_prompt_tokens: return messages @@ -2964,23 +2830,18 @@ def _prompt_len(msgs): sys_msgs = [m for m in messages if m.get("role") == "system"] non_sys = [m for m in messages if m.get("role") != "system"] - # Greedily keep most-recent messages - kept = [] - for msg in reversed(non_sys): - candidate = sys_msgs + list(reversed(kept + [msg])) + dropped = 0 + while len(non_sys) > 1: + candidate = sys_msgs + non_sys[dropped + 1 :] if _prompt_len(candidate) <= max_prompt_tokens: - kept.append(msg) - elif not kept: - kept.append(msg) # keep at least one user message - break - else: + dropped += 1 break + dropped += 1 - result = sys_msgs + list(reversed(kept)) - dropped = len(messages) - len(result) - if dropped > 0: - logger.warning( - "[OpenClaw] context truncated: dropped %d oldest messages (%d → %d tokens, limit=%d)", + result = sys_msgs + non_sys[dropped:] + if dropped: + logger.info( + "[OpenClaw] context truncated: dropped %d oldest messages (%d -> %d est tokens, limit=%d)", dropped, _prompt_len(messages), _prompt_len(result), @@ -3034,58 +2895,24 @@ def _inject_skills(self, messages: list[dict]) -> tuple[list[dict], list[str]]: return messages, skill_names # ------------------------------------------------------------------ # - # Sample submission # + # Turn feedback finalization # # ------------------------------------------------------------------ # - def _maybe_submit_ready_samples( - self, - session_id: str, - force_no_prm: bool = False, - force_last_prm: bool = False, - ): - """Submit turns whose PRM and teacher queries are done. - - force_no_prm: also submit turns that have no PRM task yet (used at - session end for the last turn which will never get a next_state). - force_last_prm: when closing a session, fire PRM for the latest - pending turn even if it never received a next_state. - When force is active, pending teacher tasks are also skipped. - """ + def _maybe_finalize_ready_turns(self, session_id: str): + """Finalize turns whose optional PRM scoring is done.""" prm_tasks = self._prm_tasks.setdefault(session_id, {}) pending = self._pending_turn_data.get(session_id, {}) for turn_num in sorted(list(pending.keys())): - # --- PRM readiness --- prm_task = prm_tasks.get(turn_num) - if not self.config.use_prm or not self.prm_scorer: - pass # no PRM → submit immediately - elif force_last_prm and prm_task is None: - turn_data = pending.get(turn_num) - if turn_data is not None: - prm_task = asyncio.create_task( - self.prm_scorer.evaluate( - turn_data.get("response_text", ""), - turn_data.get("prompt_text", ""), - session_id=session_id, - turn_num=turn_num, - ) - ) - prm_task.add_done_callback(self._task_done_cb) - prm_task.add_done_callback( - lambda _t, sid=session_id, tnum=turn_num: self._on_prm_done_without_submit(sid, tnum, _t) - ) - prm_tasks[turn_num] = prm_task - continue - elif prm_task is not None and not prm_task.done(): - continue # PRM still running - elif prm_task is None and not force_no_prm: - continue # waiting for next_state to fire PRM + if self.config.use_prm and self.prm_scorer: + if prm_task is None: + continue # waiting for the next turn to provide scoring context + if not prm_task.done(): + continue turn_data = pending.pop(turn_num) - prm_result = None - cached_prm_result = turn_data.pop("prm_result", None) - if cached_prm_result is not None: - prm_result = cached_prm_result - if prm_task is not None and prm_task.done(): + prm_result = turn_data.pop("prm_result", None) + if prm_result is None and prm_task is not None and prm_task.done(): try: prm_result = prm_task.result() except (asyncio.CancelledError, Exception): @@ -3093,7 +2920,7 @@ def _maybe_submit_ready_samples( prm_tasks.pop(turn_num, None) self._safe_create_task( - self._submit_turn_sample( + self._finalize_turn_feedback( turn_num, turn_data, session_id, @@ -3101,103 +2928,29 @@ def _maybe_submit_ready_samples( ) ) - async def _submit_ready_samples_inline( - self, - session_id: str, - force_no_prm: bool = False, - ) -> None: - """Submit ready samples inline, used when closing a session. - - Unlike ``_maybe_submit_ready_samples``, this awaits the submission - coroutine directly so the final PRM/sample records are durable before - session cleanup continues. - """ - prm_tasks = self._prm_tasks.setdefault(session_id, {}) - pending = self._pending_turn_data.get(session_id, {}) - for turn_num in sorted(list(pending.keys())): - prm_task = prm_tasks.get(turn_num) - if not self.config.use_prm or not self.prm_scorer: - pass - elif prm_task is not None and not prm_task.done(): - continue - elif prm_task is None and not force_no_prm: - continue - - turn_data = pending.pop(turn_num) - prm_result = None - cached_prm_result = turn_data.pop("prm_result", None) - if cached_prm_result is not None: - prm_result = cached_prm_result - if prm_task is not None and prm_task.done(): - try: - prm_result = prm_task.result() - except (asyncio.CancelledError, Exception): - pass - prm_tasks.pop(turn_num, None) - - await self._submit_turn_sample( - turn_num, - turn_data, - session_id, - prm_result, - ) - - async def _submit_turn_sample( + async def _finalize_turn_feedback( self, turn_num: int, turn_data: dict[str, Any], session_id: str, prm_result: Optional[dict], ): - prompt_ids = turn_data["prompt_ids"] - response_ids = turn_data["response_ids"] - - has_next_state = turn_data.get("has_next_state", False) - score = prm_result["score"] if prm_result else 0.0 - - exclude = not has_next_state or score == 0.0 - # Guarantee at least one tokenized sample per session is retained when - # sample export is enabled. - if exclude and has_next_state and self._session_effective.get(session_id, 0) == 0: - exclude = False - logger.info( - "[OpenClaw] promoting session=%s turn with score=0 → loss_mask=1 (at-least-one guarantee)", - session_id, - ) - - loss_mask = [0] * len(response_ids) if exclude else [1] * len(response_ids) - _ = ConversationSample( - session_id=session_id, - turn_num=turn_num, - prompt_tokens=prompt_ids, - response_tokens=response_ids, - response_logprobs=turn_data["response_logprobs"], - loss_mask=loss_mask, - reward=score, - prompt_text=turn_data.get("prompt_text", ""), - response_text=turn_data.get("response_text", ""), - skill_generation=self.skill_manager.generation if self.skill_manager else 0, - ) - - if not exclude: - self._session_effective[session_id] = self._session_effective.get(session_id, 0) + 1 - - index = next(self._index_counter) - next(self._group_counter) + """Finalize a turn after optional PRM scoring. + SkillClaw acts as an external-agent proxy, so finalization keeps only + feedback/record side effects that are consumed by the framework. + """ + score = prm_result.get("score", 0.0) if prm_result else 0.0 if prm_result: self._append_prm_record(session_id, turn_num, score, prm_result.get("votes", [])) + self._session_scored_turns[session_id] = self._session_scored_turns.get(session_id, 0) + 1 logger.info( - "[OpenClaw] submitted sample session=%s turn=%d index=%d score=%.1f exclude=%s " - "prompt_len=%d response_len=%d", + "[OpenClaw] finalized turn session=%s turn=%d score=%.1f response_chars=%d", session_id, turn_num, - index, score, - exclude, - len(prompt_ids), - len(response_ids), + len(turn_data.get("response_text", "")), ) # ------------------------------------------------------------------ # diff --git a/skillclaw/data_formatter.py b/skillclaw/data_formatter.py deleted file mode 100644 index 816dc19..0000000 --- a/skillclaw/data_formatter.py +++ /dev/null @@ -1,23 +0,0 @@ -""" -Data structures for conversation samples collected by the API proxy. -""" - -from __future__ import annotations - -from dataclasses import dataclass - - -@dataclass -class ConversationSample: - """One sample collected from the API proxy.""" - - session_id: str - turn_num: int - prompt_tokens: list[int] - response_tokens: list[int] - response_logprobs: list[float] - loss_mask: list[int] - reward: float - prompt_text: str = "" - response_text: str = "" - skill_generation: int = 0 diff --git a/tests/test_anthropic_messages_api.py b/tests/test_anthropic_messages_api.py index bdeb4b2..2184554 100644 --- a/tests/test_anthropic_messages_api.py +++ b/tests/test_anthropic_messages_api.py @@ -9,8 +9,7 @@ @pytest.fixture -def anthropic_server(monkeypatch, tmp_path): - monkeypatch.setattr(SkillClawAPIServer, "_load_tokenizer", lambda self: None) +def anthropic_server(tmp_path): return SkillClawAPIServer( SkillClawConfig( proxy_api_key="skillclaw", @@ -43,14 +42,6 @@ async def test_anthropic_count_tokens_endpoint_returns_local_estimate(anthropic_ @pytest.mark.asyncio async def test_anthropic_count_tokens_accounts_for_image_content(anthropic_server): - class FakeTokenizer: - def apply_chat_template(self, messages, tools=None, tokenize=False, add_generation_prompt=False): - return "user: screenshot" - - def __call__(self, text, add_special_tokens=False): - return {"input_ids": [1, 2, 3]} - - anthropic_server._tokenizer = FakeTokenizer() png_header = ( b"\x89PNG\r\n\x1a\n" + struct.pack(">I", 13) diff --git a/tests/test_responses_native.py b/tests/test_responses_native.py index 2370422..3995eb9 100644 --- a/tests/test_responses_native.py +++ b/tests/test_responses_native.py @@ -223,8 +223,7 @@ async def fake_stream(body): @pytest.mark.asyncio -async def test_responses_chat_bridge_merges_previous_response_history(monkeypatch, tmp_path): - monkeypatch.setattr(SkillClawAPIServer, "_load_tokenizer", lambda self: None) +async def test_responses_chat_bridge_merges_previous_response_history(tmp_path): server = SkillClawAPIServer( SkillClawConfig( proxy_api_key="skillclaw", @@ -280,8 +279,7 @@ async def fake_handle_request(body, session_id, turn_type, session_done): @pytest.mark.asyncio -async def test_responses_continuation_keeps_new_instructions_first(monkeypatch, tmp_path): - monkeypatch.setattr(SkillClawAPIServer, "_load_tokenizer", lambda self: None) +async def test_responses_continuation_keeps_new_instructions_first(tmp_path): server = SkillClawAPIServer( SkillClawConfig( proxy_api_key="skillclaw", @@ -339,8 +337,7 @@ async def fake_handle_request(body, session_id, turn_type, session_done): @pytest.mark.asyncio -async def test_responses_continuation_deduplicates_replayed_output_items(monkeypatch, tmp_path): - monkeypatch.setattr(SkillClawAPIServer, "_load_tokenizer", lambda self: None) +async def test_responses_continuation_deduplicates_replayed_output_items(tmp_path): server = SkillClawAPIServer( SkillClawConfig( proxy_api_key="skillclaw",