From 60ee49ce0ea513597309bf4ce9cacdc848c9ca1d Mon Sep 17 00:00:00 2001
From: StoneHanaMori <ysd23@mails.tsinghua.edu.cn>
Date: Thu, 21 May 2026 23:23:51 +0800
Subject: [PATCH 1/4] fix: make Codex SkillClaw profile opt-in

---
 skillclaw/claw_adapter.py               | 75 ++++++++++++++++++++-----
 tests/test_codex_profile_integration.py | 57 +++++++++++++++++++
 2 files changed, 117 insertions(+), 15 deletions(-)
 create mode 100644 tests/test_codex_profile_integration.py

diff --git a/skillclaw/claw_adapter.py b/skillclaw/claw_adapter.py
index a45447a..c9da57e 100644
--- a/skillclaw/claw_adapter.py
+++ b/skillclaw/claw_adapter.py
@@ -6,7 +6,7 @@
   openclaw  — runs `openclaw config set …` + `openclaw gateway restart`
   opencode  — patches ~/.config/opencode/opencode.json to register SkillClaw provider
   hermes    — patches ~/.hermes/config.yaml to point model traffic at SkillClaw
-  codex     — patches ~/.codex/config.toml to register SkillClaw as a provider
+  codex     — patches ~/.codex/config.toml to register an opt-in SkillClaw profile
   claude    — patches ~/.claude/settings.json to route Anthropic traffic via SkillClaw
   qwenpaw   — patches QwenPaw model config, selects SkillClaw as active model
   ironclaw  — patches ~/.ironclaw/.env, runs `ironclaw service restart`
@@ -328,6 +328,30 @@ def _upsert_top_level_toml_keys(text: str, updates: dict[str, object]) -> str:
     return "\n".join(merged).rstrip() + "\n"
 
 
+def _remove_top_level_toml_keys(text: str, keys: set[str]) -> str:
+    """Remove selected top-level assignments before the first TOML table."""
+    lines = text.splitlines()
+    first_table_index = len(lines)
+    for idx, line in enumerate(lines):
+        stripped = line.strip()
+        if stripped.startswith("[") and stripped.endswith("]"):
+            first_table_index = idx
+            break
+
+    preamble = lines[:first_table_index]
+    remainder = lines[first_table_index:]
+    kept: list[str] = []
+    for line in preamble:
+        stripped = line.strip()
+        if stripped.startswith("#") or "=" not in stripped:
+            kept.append(line)
+            continue
+        key = stripped.split("=", 1)[0].strip()
+        if key not in keys:
+            kept.append(line)
+    return "\n".join(kept + remainder).rstrip() + "\n"
+
+
 def _remove_toml_table(text: str, table_name: str) -> str:
     """Remove a TOML table and its body, if present."""
     lines = text.splitlines()
@@ -630,8 +654,21 @@ def _build_codex_provider_block(base_url: str, api_key: str) -> str:
     return "\n".join(lines) + "\n"
 
 
+def _build_codex_profile_block(model_id: str) -> str:
+    lines = [
+        "[profiles.skillclaw]",
+        f"model = {_format_toml_value(model_id)}",
+        'model_provider = "skillclaw"',
+    ]
+    return "\n".join(lines) + "\n"
+
+
 def _configure_codex(cfg: "SkillClawConfig") -> None:
-    """Auto-configure Codex CLI to use the SkillClaw proxy."""
+    """Register SkillClaw as an opt-in Codex profile.
+
+    Do not change Codex's global ``model`` / ``model_provider`` defaults.
+    Users opt in explicitly with ``codex --profile skillclaw``.
+    """
     model_id = cfg.served_model_name or cfg.llm_model_id or "skillclaw-model"
     api_key = cfg.proxy_api_key or "skillclaw"
     base_url = f"http://127.0.0.1:{cfg.proxy_port}/v1"
@@ -645,15 +682,13 @@ def _configure_codex(cfg: "SkillClawConfig") -> None:
         except Exception as e:
             logger.warning("[ClawAdapter] Failed to read Codex config %s: %s", config_path, e)
 
-    updated = _upsert_top_level_toml_keys(
-        existing_text,
-        {
-            "model": model_id,
-            "model_provider": "skillclaw",
-        },
-    )
+    updated = existing_text
+    if str(_extract_top_level_toml_value(updated, "model_provider") or "") == "skillclaw":
+        updated = _remove_top_level_toml_keys(updated, {"model", "model_provider"})
     updated = _remove_toml_table(updated, "model_providers.skillclaw").rstrip() + "\n\n"
+    updated = _remove_toml_table(updated, "profiles.skillclaw").rstrip() + "\n\n"
     updated += _build_codex_provider_block(base_url, api_key)
+    updated += "\n" + _build_codex_profile_block(model_id)
 
     _backup_codex_config_if_changed(config_path, updated)
     _write_text_atomic(config_path, updated, "Codex config")
@@ -683,10 +718,13 @@ def inspect_codex_config(cfg: "SkillClawConfig") -> dict[str, object]:
     configured_base_url = str(provider_cfg.get("base_url") or "")
     configured_wire_api = str(provider_cfg.get("wire_api") or "")
     configured_token = str(provider_cfg.get("experimental_bearer_token") or "")
+    profile_cfg = _extract_toml_table(text, "profiles.skillclaw")
+    configured_profile_model = str(profile_cfg.get("model") or "")
+    configured_profile_provider = str(profile_cfg.get("model_provider") or "")
 
     proxy_match = (
-        configured_model == expected_model
-        and configured_provider == "skillclaw"
+        configured_profile_model == expected_model
+        and configured_profile_provider == "skillclaw"
         and configured_base_url == expected_base_url
         and configured_wire_api == "responses"
         and configured_token == expected_api_key
@@ -696,7 +734,8 @@ def inspect_codex_config(cfg: "SkillClawConfig") -> dict[str, object]:
     skills_dir_match = configured_skillclaw_skills_dir == expected_skills_dir
     issues: list[str] = []
     notes: list[str] = [
-        "Codex uses the OpenAI Responses-compatible SkillClaw endpoint via `model_providers.skillclaw`.",
+        "Codex can opt into SkillClaw with `codex --profile skillclaw`.",
+        "SkillClaw registers a Codex profile and does not change Codex's global model defaults.",
         "Codex session boundaries fall back to proxy-side heuristics because"
         " Codex does not send SkillClaw session headers.",
     ]
@@ -705,8 +744,11 @@ def inspect_codex_config(cfg: "SkillClawConfig") -> dict[str, object]:
     if not config_path.exists():
         issues.append("Codex config is missing: ~/.codex/config.toml")
     if not proxy_match:
-        issues.append("Codex model routing is not pointing at the local SkillClaw proxy.")
-        next_steps.append("Start SkillClaw once with `claw_type=codex` so it can rewrite ~/.codex/config.toml.")
+        issues.append("Codex SkillClaw profile is missing or not pointing at the local SkillClaw proxy.")
+        next_steps.append("Start SkillClaw once with `claw_type=codex` so it can register ~/.codex/config.toml.")
+    if configured_provider == "skillclaw":
+        issues.append("Codex global model_provider still points at SkillClaw; normal Codex runs may be intercepted.")
+        next_steps.append("Remove top-level `model_provider = \"skillclaw\"` or run `skillclaw restore codex`.")
     if not expected_skills_dir.is_dir():
         issues.append(f"Codex skills directory is missing: {expected_skills_dir}")
         next_steps.append(f"Create or prepare the Codex skills directory: {expected_skills_dir}")
@@ -723,9 +765,12 @@ def inspect_codex_config(cfg: "SkillClawConfig") -> dict[str, object]:
         "status": "ok" if not issues else "warning",
         "config_path": str(config_path),
         "config_exists": config_path.exists(),
-        "integration_scope": "codex-only",
+        "integration_scope": "codex-profile-only",
         "expected_model": expected_model,
         "configured_model": configured_model or "(unset)",
+        "expected_profile": "skillclaw",
+        "configured_profile_model": configured_profile_model or "(unset)",
+        "configured_profile_provider": configured_profile_provider or "(unset)",
         "expected_base_url": expected_base_url,
         "configured_base_url": configured_base_url or "(unset)",
         "configured_provider": configured_provider or "(unset)",
diff --git a/tests/test_codex_profile_integration.py b/tests/test_codex_profile_integration.py
new file mode 100644
index 0000000..5be83d1
--- /dev/null
+++ b/tests/test_codex_profile_integration.py
@@ -0,0 +1,57 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+from skillclaw import claw_adapter
+from skillclaw.config import SkillClawConfig
+
+
+def test_configure_codex_registers_profile_without_replacing_global_defaults(monkeypatch, tmp_path: Path) -> None:
+    config_path = tmp_path / ".codex" / "config.toml"
+    config_path.parent.mkdir(parents=True)
+    config_path.write_text(
+        'model = "gpt-5.5"\nmodel_provider = "openai"\n\n[profiles.default]\nmodel = "gpt-5.5"\n',
+        encoding="utf-8",
+    )
+    monkeypatch.setattr(claw_adapter, "_CODEX_CONFIG_PATH", config_path)
+    monkeypatch.setattr(claw_adapter, "_CODEX_SKILLS_DIR", tmp_path / ".codex" / "skills")
+    monkeypatch.setattr(claw_adapter, "_CODEX_BACKUP_DIR", tmp_path / "backups")
+
+    claw_adapter._configure_codex(
+        SkillClawConfig(
+            served_model_name="skillclaw-model",
+            proxy_api_key="skillclaw-key",
+            proxy_port=31000,
+        )
+    )
+
+    text = config_path.read_text(encoding="utf-8")
+    assert 'model = "gpt-5.5"' in text
+    assert 'model_provider = "openai"' in text
+    assert "[model_providers.skillclaw]" in text
+    assert 'base_url = "http://127.0.0.1:31000/v1"' in text
+    assert 'wire_api = "responses"' in text
+    assert 'experimental_bearer_token = "skillclaw-key"' in text
+    assert "[profiles.skillclaw]" in text
+    assert 'model = "skillclaw-model"' in text
+    assert 'model_provider = "skillclaw"' in text
+    assert (tmp_path / ".codex" / "skills").is_dir()
+
+
+def test_configure_codex_removes_legacy_global_skillclaw_defaults(monkeypatch, tmp_path: Path) -> None:
+    config_path = tmp_path / ".codex" / "config.toml"
+    config_path.parent.mkdir(parents=True)
+    config_path.write_text(
+        'model = "skillclaw-model"\nmodel_provider = "skillclaw"\n\n[profiles.default]\nmodel = "gpt-5.5"\n',
+        encoding="utf-8",
+    )
+    monkeypatch.setattr(claw_adapter, "_CODEX_CONFIG_PATH", config_path)
+    monkeypatch.setattr(claw_adapter, "_CODEX_SKILLS_DIR", tmp_path / ".codex" / "skills")
+    monkeypatch.setattr(claw_adapter, "_CODEX_BACKUP_DIR", tmp_path / "backups")
+
+    claw_adapter._configure_codex(SkillClawConfig(served_model_name="skillclaw-model"))
+
+    top_level = config_path.read_text(encoding="utf-8").split("[", 1)[0]
+    assert "model_provider" not in top_level
+    assert "model =" not in top_level
+    assert "[profiles.skillclaw]" in config_path.read_text(encoding="utf-8")

From 8e4075d83f550ab15fb3455dddbb210bf1818d38 Mon Sep 17 00:00:00 2001
From: StoneHanaMori <ysd23@mails.tsinghua.edu.cn>
Date: Thu, 21 May 2026 23:24:30 +0800
Subject: [PATCH 2/4] fix: default Codex profile to Responses mode

---
 assets/README_ZH.md                     |  2 +-
 skillclaw/config_store.py               | 15 +++++++++++++--
 skillclaw/setup_wizard.py               | 16 +++++++++++++---
 tests/test_codex_profile_integration.py | 19 +++++++++++++++++++
 4 files changed, 46 insertions(+), 6 deletions(-)

diff --git a/assets/README_ZH.md b/assets/README_ZH.md
index 312b26b..670da41 100644
--- a/assets/README_ZH.md
+++ b/assets/README_ZH.md
@@ -211,7 +211,7 @@ skillclaw setup
 第一次最小化验证时，推荐这样选：
 
 - `CLI agent` 选 `none`，先不要自动改外部 agent 配置
-- `skills` 目录保持默认值 `~/.skillclaw/skills`；如果你选了 Hermes，默认技能库会变成 `~/.hermes/skills`
+- `skills` 目录保持默认值 `~/.skillclaw/skills`；如果你选了 Hermes、Codex 或 Claude Code，默认技能库会变成 `~/.hermes/skills`、`~/.codex/skills` 或 `~/.claude/skills`
 - 如果你只是想先验证代理能不能正常用，可以先关闭 shared storage
 - 如果你后面想在同一台机器上继续跑本地 evolver 闭环，就把 shared storage 打开并选 `local` backend，例如 `~/.skillclaw/local-share`
 - 如果你想先把成本压到最低，可以先关闭 PRM
diff --git a/skillclaw/config_store.py b/skillclaw/config_store.py
index 1b3a8f2..7273766 100644
--- a/skillclaw/config_store.py
+++ b/skillclaw/config_store.py
@@ -19,6 +19,10 @@
 _DEFAULT_CODEX_SKILLS_DIR = Path.home() / ".codex" / "skills"
 _DEFAULT_CLAUDE_SKILLS_DIR = Path.home() / ".claude" / "skills"
 _DEFAULT_OPENCODE_SKILLS_DIR = Path.home() / ".config" / "opencode" / "skills"
+_DEFAULT_LLM_API_MODE_BY_CLAW = {
+    "codex": "responses",
+}
+_FALLBACK_LLM_API_MODE = "chat"
 
 _DEFAULTS: dict = {
     "llm": {
@@ -161,6 +165,12 @@ def default_skills_dir_for_claw(claw_type: str) -> Path:
     return _DEFAULT_SKILLS_DIR
 
 
+def default_llm_api_mode_for_claw(claw_type: str) -> str:
+    """Return the default upstream API mode for the selected agent."""
+    normalized = str(claw_type or "").strip().lower()
+    return _DEFAULT_LLM_API_MODE_BY_CLAW.get(normalized, _FALLBACK_LLM_API_MODE)
+
+
 def resolve_skills_dir(skills_dir: Any, *, claw_type: str) -> str:
     """Normalize a configured skills dir, applying agent-native defaults.
 
@@ -254,13 +264,14 @@ def to_skillclaw_config(self) -> SkillClawConfig:
         llm_api_base = llm.get("api_base", "")
         llm_api_key = llm.get("api_key", "")
         llm_model_id = llm.get("model_id", "")
-        llm_api_mode = str(llm.get("api_mode", "chat") or "chat")
+        raw_claw_type = str(data.get("claw_type", "openclaw") or "openclaw")
+        default_api_mode = default_llm_api_mode_for_claw(raw_claw_type)
+        llm_api_mode = str(llm.get("api_mode", default_api_mode) or default_api_mode)
         proxy = data.get("proxy", {})
         skills = data.get("skills", {})
         orouter = data.get("openrouter", {})
         prm = data.get("prm", {})
         configure_openclaw = bool(data.get("configure_openclaw", True))
-        raw_claw_type = str(data.get("claw_type", "openclaw") or "openclaw")
         if not configure_openclaw:
             raw_claw_type = "none"
 
diff --git a/skillclaw/setup_wizard.py b/skillclaw/setup_wizard.py
index 1197865..99131bb 100644
--- a/skillclaw/setup_wizard.py
+++ b/skillclaw/setup_wizard.py
@@ -8,7 +8,7 @@
 from pathlib import Path
 
 from .claw_adapter import CLAW_TYPES
-from .config_store import CONFIG_DIR, ConfigStore, resolve_skills_dir
+from .config_store import CONFIG_DIR, ConfigStore, default_llm_api_mode_for_claw, resolve_skills_dir
 
 _PROVIDER_PRESETS = {
     "kimi": {
@@ -202,7 +202,13 @@ def run(self):
                 f"Recommended directory: {default_skills_dir}"
             )
         elif claw_type == "codex":
-            print(f"Codex reads native skills from ~/.codex/skills.\nRecommended directory: {default_skills_dir}")
+            print(
+                "Codex will get a SkillClaw profile without changing its global defaults.\n"
+                "After starting SkillClaw, run: codex --profile skillclaw\n"
+                "Normal `codex` runs remain unchanged.\n"
+                "Codex reads native skills from ~/.codex/skills.\n"
+                f"Recommended directory: {default_skills_dir}"
+            )
         elif claw_type == "claude":
             print(
                 f"Claude Code reads native skills from ~/.claude/skills.\nRecommended directory: {default_skills_dir}"
@@ -343,7 +349,8 @@ def run(self):
         proxy_config["port"] = proxy_port
         proxy_config.setdefault("host", "0.0.0.0")
         proxy_config["served_model_name"] = served_model_name or "skillclaw-model"
-        llm_api_mode = str(current_llm.get("api_mode", "chat") or "chat")
+        default_api_mode = default_llm_api_mode_for_claw(claw_type)
+        llm_api_mode = str(current_llm.get("api_mode", default_api_mode) or default_api_mode)
         data = {
             "claw_type": claw_type,
             "llm": {
@@ -375,4 +382,7 @@ def run(self):
 
         print(f"\nConfig saved to: {cs.config_file}")
         print("\nRun 'skillclaw start' to launch SkillClaw.")
+        if claw_type == "codex":
+            print("Then run 'codex --profile skillclaw' to use Codex through SkillClaw.")
+            print("Use 'skillclaw doctor codex' if the profile does not work as expected.")
         print("=" * 60 + "\n")
diff --git a/tests/test_codex_profile_integration.py b/tests/test_codex_profile_integration.py
index 5be83d1..0f38b47 100644
--- a/tests/test_codex_profile_integration.py
+++ b/tests/test_codex_profile_integration.py
@@ -4,6 +4,7 @@
 
 from skillclaw import claw_adapter
 from skillclaw.config import SkillClawConfig
+from skillclaw.config_store import ConfigStore
 
 
 def test_configure_codex_registers_profile_without_replacing_global_defaults(monkeypatch, tmp_path: Path) -> None:
@@ -55,3 +56,21 @@ def test_configure_codex_removes_legacy_global_skillclaw_defaults(monkeypatch, t
     assert "model_provider" not in top_level
     assert "model =" not in top_level
     assert "[profiles.skillclaw]" in config_path.read_text(encoding="utf-8")
+
+
+def test_codex_config_defaults_to_responses_mode_and_codex_skills(tmp_path: Path) -> None:
+    store = ConfigStore(tmp_path / "config.yaml")
+    store.save(
+        {
+            "claw_type": "codex",
+            "llm": {"provider": "openai", "api_base": "http://upstream.test/v1", "model_id": "upstream"},
+            "proxy": {"served_model_name": "skillclaw-model"},
+            "skills": {"enabled": True},
+            "prm": {"enabled": False},
+        }
+    )
+
+    cfg = store.to_skillclaw_config()
+
+    assert cfg.llm_api_mode == "responses"
+    assert cfg.skills_dir.endswith(".codex/skills")

From b0882ed4e8e06cd104960ce12496be1d7772c4e5 Mon Sep 17 00:00:00 2001
From: StoneHanaMori <ysd23@mails.tsinghua.edu.cn>
Date: Thu, 21 May 2026 23:25:23 +0800
Subject: [PATCH 3/4] fix: inject skills in native Codex responses flow

---
 skillclaw/api_server.py                 | 32 +++++++++++++++++++
 tests/test_codex_profile_integration.py | 41 +++++++++++++++++++++++++
 2 files changed, 73 insertions(+)

diff --git a/skillclaw/api_server.py b/skillclaw/api_server.py
index 43ec957..fa4fb21 100644
--- a/skillclaw/api_server.py
+++ b/skillclaw/api_server.py
@@ -1623,6 +1623,8 @@ async def responses(
 
             body = await request.json()
             if owner._responses_native_enabled():
+                turn_type = _resolve_turn_type(x_turn_type, body.get("turn_type"), default="main")
+                body = owner._prepare_native_responses_body(body, turn_type=turn_type)
                 if bool(body.get("stream", False)):
                     return StreamingResponse(
                         owner._stream_llm_responses(body),
@@ -2581,6 +2583,36 @@ def _prepare_responses_forward(
             headers["Authorization"] = f"Bearer {self.config.llm_api_key}"
         return f"{api_base}/responses", send_body, headers
 
+    def _prepare_native_responses_body(self, body: dict[str, Any], *, turn_type: str) -> dict[str, Any]:
+        """Apply non-destructive SkillClaw hooks before native Responses forwarding."""
+        prepared = dict(body)
+        if not self.skill_manager or turn_type != "main":
+            return prepared
+
+        try:
+            self.skill_manager.refresh_if_changed()
+        except Exception as e:
+            logger.warning("[SkillManager] failed to refresh local skills: %s", e)
+
+        skill_text = self.skill_manager.build_injection_prompt(
+            max_chars=getattr(self.config, "max_skills_prompt_chars", 30_000),
+        )
+        if not skill_text:
+            return prepared
+
+        all_skills = self.skill_manager.get_all_skills()
+        skill_names = [s.get("name", "unknown_skill") for s in all_skills if isinstance(s, dict)]
+        logger.info(
+            "[SkillManager] listing %d skills in Codex Responses instructions: %s",
+            len(skill_names),
+            ", ".join(skill_names)[:400],
+        )
+        self.skill_manager.record_injection(skill_names)
+
+        existing = _normalize_responses_content(prepared.get("instructions", ""))
+        prepared["instructions"] = (existing + "\n\n" + skill_text).strip() if existing else skill_text
+        return prepared
+
     async def _forward_to_llm_responses(self, body: dict[str, Any]) -> dict[str, Any]:
         """Forward a Codex Responses payload to an upstream Responses API."""
         import httpx
diff --git a/tests/test_codex_profile_integration.py b/tests/test_codex_profile_integration.py
index 0f38b47..832c365 100644
--- a/tests/test_codex_profile_integration.py
+++ b/tests/test_codex_profile_integration.py
@@ -3,10 +3,28 @@
 from pathlib import Path
 
 from skillclaw import claw_adapter
+from skillclaw.api_server import SkillClawAPIServer
 from skillclaw.config import SkillClawConfig
 from skillclaw.config_store import ConfigStore
 
 
+class FakeSkillManager:
+    def __init__(self) -> None:
+        self.injected = []
+
+    def refresh_if_changed(self) -> None:
+        return None
+
+    def build_injection_prompt(self, max_chars: int = 30_000) -> str:
+        return "<available_skills><skill><name>demo</name></skill></available_skills>"
+
+    def get_all_skills(self) -> list[dict]:
+        return [{"name": "demo"}]
+
+    def record_injection(self, names: list[str]) -> None:
+        self.injected.append(list(names))
+
+
 def test_configure_codex_registers_profile_without_replacing_global_defaults(monkeypatch, tmp_path: Path) -> None:
     config_path = tmp_path / ".codex" / "config.toml"
     config_path.parent.mkdir(parents=True)
@@ -74,3 +92,26 @@ def test_codex_config_defaults_to_responses_mode_and_codex_skills(tmp_path: Path
 
     assert cfg.llm_api_mode == "responses"
     assert cfg.skills_dir.endswith(".codex/skills")
+
+
+def test_native_responses_body_injects_skills_without_dropping_codex_tools() -> None:
+    server = object.__new__(SkillClawAPIServer)
+    server.config = SkillClawConfig(max_skills_prompt_chars=10_000)
+    server.skill_manager = FakeSkillManager()
+    custom_tool = {"type": "custom", "name": "apply_patch"}
+    namespace_tool = {"type": "namespace", "name": "mcp__github__"}
+    body = {
+        "instructions": "base instructions",
+        "input": "hi",
+        "tools": [custom_tool, namespace_tool],
+        "tool_choice": {"type": "custom", "name": "apply_patch"},
+    }
+
+    prepared = server._prepare_native_responses_body(body, turn_type="main")
+
+    assert prepared is not body
+    assert prepared["tools"] == [custom_tool, namespace_tool]
+    assert prepared["tool_choice"] == {"type": "custom", "name": "apply_patch"}
+    assert prepared["instructions"].startswith("base instructions")
+    assert "<available_skills>" in prepared["instructions"]
+    assert server.skill_manager.injected == [["demo"]]

From ec6f2e77db7c5349ea9c836549d1be9a8f171be3 Mon Sep 17 00:00:00 2001
From: StoneHanaMori <ysd23@mails.tsinghua.edu.cn>
Date: Fri, 22 May 2026 00:15:37 +0800
Subject: [PATCH 4/4] refactor: remove tokenizer sample pipeline

---
 pyproject.toml                       |   6 +-
 skillclaw/api_server.py              | 435 ++++++---------------------
 skillclaw/data_formatter.py          |  23 --
 tests/test_anthropic_messages_api.py |  11 +-
 tests/test_responses_native.py       |   9 +-
 5 files changed, 99 insertions(+), 385 deletions(-)
 delete mode 100644 skillclaw/data_formatter.py

diff --git a/pyproject.toml b/pyproject.toml
index 3a4a070..8e26f57 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,10 +17,6 @@ dependencies = [
 ]
 
 [project.optional-dependencies]
-# Tokenizer for prompt truncation and PRM/OPD token-level data
-tokenizer = [
-    "transformers>=4.51.1",
-]
 # Embedding-based skill retrieval
 embedding = [
     "numpy",
@@ -44,7 +40,7 @@ server = [
 ]
 # Everything
 all = [
-    "skillclaw[tokenizer,embedding,evolve,sharing,server]",
+    "skillclaw[embedding,evolve,sharing,server]",
 ]
 
 [project.scripts]
diff --git a/skillclaw/api_server.py b/skillclaw/api_server.py
index fa4fb21..52640a3 100644
--- a/skillclaw/api_server.py
+++ b/skillclaw/api_server.py
@@ -20,7 +20,6 @@
 import time
 from contextlib import asynccontextmanager
 from datetime import datetime, timezone
-from itertools import count
 from typing import Any, Optional
 
 import uvicorn
@@ -28,7 +27,6 @@
 from fastapi.responses import JSONResponse, StreamingResponse
 
 from .config import SkillClawConfig
-from .data_formatter import ConversationSample
 from .prm_scorer import PRMScorer
 from .protocols import anthropic_messages as anthropic_protocol
 from .protocols import openai_responses as responses_protocol
@@ -1027,16 +1025,6 @@ def _merge_tool_error_info(
     ]
 
 
-def _extract_logprobs_from_chat_response(choice: dict[str, Any]) -> list[float]:
-    logprobs_obj = choice.get("logprobs")
-    if not isinstance(logprobs_obj, dict):
-        return []
-    content = logprobs_obj.get("content")
-    if not isinstance(content, list):
-        return []
-    return [float(item.get("logprob", 0.0)) for item in content if isinstance(item, dict)]
-
-
 def _rewrite_new_session_bootstrap_prompt(messages: list[dict]) -> tuple[list[dict], int]:
     """Rewrite OpenClaw /new bootstrap user prompt to a safer variant.
 
@@ -1227,24 +1215,16 @@ def _token_estimate_text(content: Any) -> str:
     return str(content) if content is not None else ""
 
 
-def _estimate_openai_body_input_tokens(tokenizer: Any, openai_body: dict[str, Any]) -> int:
+def _estimate_openai_body_input_tokens(openai_body: dict[str, Any]) -> int:
+    """Return a provider-agnostic rough input token estimate.
+
+    SkillClaw proxies external agents and does not own the upstream model's
+    exact tokenization. Keep this estimate local and dependency-free so
+    daemon readiness never depends on model-specific tokenization.
+    """
     messages = list(openai_body.get("messages") or [])
     tools = openai_body.get("tools")
     image_tokens = sum(_estimate_image_content_tokens(msg.get("content")) for msg in messages if isinstance(msg, dict))
-    if tokenizer is not None:
-        try:
-            text = tokenizer.apply_chat_template(
-                _normalize_messages_for_template(messages),
-                tools=tools if tools else None,
-                tokenize=False,
-                add_generation_prompt=True,
-            )
-            tokenized = tokenizer(text, add_special_tokens=False)
-            input_ids = tokenized["input_ids"] if isinstance(tokenized, dict) else tokenized.input_ids
-            return max(0, len(input_ids) + image_tokens)
-        except Exception:
-            pass
-
     text_parts = []
     for msg in messages:
         if not isinstance(msg, dict):
@@ -1416,7 +1396,7 @@ class SkillClawAPIServer:
     skill_manager:
         Optional SkillManager for injecting skills into system prompts.
     prm_scorer:
-        Optional PRMScorer. If None, all samples get reward=0.
+        Optional PRMScorer for turn feedback.
     """
 
     def __init__(
@@ -1446,13 +1426,11 @@ def __init__(
         self._system_prompt_cache_file = os.path.join(config.record_dir, f"system_prompt_cache_{cache_suffix}.json")
 
         # State machines
-        self._index_counter = count(0)
-        self._group_counter = count(0)
         self._turn_counts: dict[str, int] = {}
         self._pending_turn_data: dict[str, dict[int, dict]] = {}  # session → {turn → data}
         self._prm_tasks: dict[str, dict[int, asyncio.Task]] = {}  # session → {turn → task}
         self._pending_records: dict[str, dict] = {}  # for record logging
-        self._session_effective: dict[str, int] = {}  # at-least-one guarantee
+        self._session_scored_turns: dict[str, int] = {}  # session -> finalized PRM turn count
         self._session_turns: dict[str, list] = {}
         self._session_last_active: dict[str, float] = {}  # session -> unix_ts
         self._closing_sessions: set[str] = set()  # session ids currently being closed
@@ -1490,9 +1468,6 @@ def __init__(
             with open(self._prm_record_file, "w"):
                 pass
 
-        # Tokenizer is used for prompt length accounting/truncation and for
-        # optional tokenized conversation sample export.
-        self._tokenizer = self._load_tokenizer()
         self.app = self._build_app()
 
         # Threading lifecycle (set by start())
@@ -1501,19 +1476,6 @@ def __init__(
         self._ready_event = threading.Event()
         self._server_stopped_event = threading.Event()
 
-    # ------------------------------------------------------------------ #
-    # Tokenizer                                                            #
-    # ------------------------------------------------------------------ #
-
-    def _load_tokenizer(self):
-        try:
-            from transformers import AutoTokenizer
-
-            return AutoTokenizer.from_pretrained(self.config.model_name, trust_remote_code=True)
-        except Exception as e:
-            logger.warning("[OpenClaw] could not load tokenizer: %s", e)
-            return None
-
     # ------------------------------------------------------------------ #
     # FastAPI app                                                          #
     # ------------------------------------------------------------------ #
@@ -1734,7 +1696,7 @@ async def anthropic_count_tokens(
 
             raw_body = await request.json()
             openai_body = _anthropic_to_openai_body(raw_body)
-            input_tokens = _estimate_openai_body_input_tokens(owner._tokenizer, openai_body)
+            input_tokens = _estimate_openai_body_input_tokens(openai_body)
             return JSONResponse(content={"input_tokens": input_tokens})
 
         @app.post("/v1/messages")
@@ -1899,7 +1861,7 @@ def _collect_active_session_ids(self) -> list[str]:
         session_ids.update(self._session_turns.keys())
         session_ids.update(self._pending_turn_data.keys())
         session_ids.update(self._turn_counts.keys())
-        session_ids.update(self._session_effective.keys())
+        session_ids.update(self._session_scored_turns.keys())
         session_ids.update(self._prm_tasks.keys())
         return sorted(s for s in session_ids if s and s not in self._closing_sessions)
 
@@ -1973,7 +1935,7 @@ async def _shutdown_cleanup(self) -> None:
         await self._await_background_tasks(self._shutdown_drain_timeout_seconds)
 
     async def _close_session(self, session_id: str, reason: str = "explicit") -> None:
-        """Flush a session: submit remaining samples, upload session data, clean up state."""
+        """Flush a session: finalize pending turn feedback, upload session data, clean up state."""
         if not session_id:
             return
         if session_id in self._closing_sessions:
@@ -1981,35 +1943,51 @@ async def _close_session(self, session_id: str, reason: str = "explicit") -> Non
         self._closing_sessions.add(session_id)
         try:
             self._flush_pending_record(session_id, None)
-            pending_snapshot = {
-                turn_num: dict(turn_data) for turn_num, turn_data in self._pending_turn_data.get(session_id, {}).items()
-            }
-            self._maybe_submit_ready_samples(session_id, force_last_prm=True)
-            prm_tasks = list(self._prm_tasks.get(session_id, {}).values())
-            if prm_tasks:
+            pending = self._pending_turn_data.get(session_id, {})
+            prm_tasks = self._prm_tasks.setdefault(session_id, {})
+            if self.config.use_prm and self.prm_scorer:
+                for turn_num, turn_data in list(pending.items()):
+                    if turn_num in prm_tasks:
+                        continue
+                    prm_task = asyncio.create_task(
+                        self.prm_scorer.evaluate(
+                            turn_data.get("response_text", ""),
+                            turn_data.get("prompt_text", ""),
+                            session_id=session_id,
+                            turn_num=turn_num,
+                        )
+                    )
+                    prm_task.add_done_callback(self._task_done_cb)
+                    prm_task.add_done_callback(
+                        lambda _t, sid=session_id, tnum=turn_num: self._on_prm_done_record_only(sid, tnum, _t)
+                    )
+                    prm_tasks[turn_num] = prm_task
+            active_prm_tasks = list(prm_tasks.values())
+            if active_prm_tasks:
                 try:
                     await asyncio.wait_for(
-                        asyncio.gather(*prm_tasks, return_exceptions=True),
+                        asyncio.gather(*active_prm_tasks, return_exceptions=True),
                         timeout=_SHUTDOWN_DRAIN_TIMEOUT_SECONDS,
                     )
                 except asyncio.TimeoutError:
                     logger.warning("[SessionDetect] PRM drain timed out for session=%s", session_id)
-            for turn_num in sorted(pending_snapshot.keys()):
-                turn_data = pending_snapshot[turn_num]
+            for turn_num in sorted(list(pending.keys())):
+                turn_data = pending.pop(turn_num)
                 prm_result = turn_data.pop("prm_result", None)
-                prm_task = self._prm_tasks.get(session_id, {}).get(turn_num)
+                prm_task = prm_tasks.get(turn_num)
                 if prm_result is None and prm_task is not None and prm_task.done():
                     try:
                         prm_result = prm_task.result()
                     except (asyncio.CancelledError, Exception):
                         prm_result = None
-                await self._submit_turn_sample(
+                prm_tasks.pop(turn_num, None)
+                await self._finalize_turn_feedback(
                     turn_num,
                     turn_data,
                     session_id,
                     prm_result,
                 )
-            eff = self._session_effective.pop(session_id, 0)
+            eff = self._session_scored_turns.pop(session_id, 0)
             self._turn_counts.pop(session_id, None)
             self._pending_turn_data.pop(session_id, None)
             prm_tasks = self._prm_tasks.pop(session_id, {})
@@ -2017,7 +1995,7 @@ async def _close_session(self, session_id: str, reason: str = "explicit") -> Non
                 if isinstance(task, asyncio.Task) and not task.done():
                     task.cancel()
             logger.info(
-                "[SessionDetect] closed session=%s reason=%s (effective_samples=%d)",
+                "[SessionDetect] closed session=%s reason=%s (scored_turns=%d)",
                 session_id,
                 reason,
                 eff,
@@ -2131,7 +2109,7 @@ def _fire_prm_scoring(
         response_text: str,
         instruction_text: str,
         next_state,
-        submit_ready_samples: bool = True,
+        finalize_ready_turns: bool = True,
     ):
         if not self.prm_scorer or not next_state:
             return
@@ -2140,10 +2118,10 @@ def _fire_prm_scoring(
             self.prm_scorer.evaluate(response_text, inst_text, session_id=session_id, turn_num=turn_num)
         )
         task.add_done_callback(self._task_done_cb)
-        if submit_ready_samples:
+        if finalize_ready_turns:
             task.add_done_callback(lambda _t: self._on_prm_done(session_id, turn_num, _t))
         else:
-            task.add_done_callback(lambda _t: self._on_prm_done_without_submit(session_id, turn_num, _t))
+            task.add_done_callback(lambda _t: self._on_prm_done_record_only(session_id, turn_num, _t))
         self._prm_tasks.setdefault(session_id, {})[turn_num] = task
         td = self._pending_turn_data.get(session_id, {}).get(turn_num)
         if td is not None:
@@ -2184,9 +2162,9 @@ def _on_prm_done(self, session_id: str, turn_num: int, task: asyncio.Task):
         self._apply_prm_result(session_id, turn_num, prm_result)
         if session_id in self._closing_sessions:
             return
-        self._maybe_submit_ready_samples(session_id)
+        self._maybe_finalize_ready_turns(session_id)
 
-    def _on_prm_done_without_submit(self, session_id: str, turn_num: int, task: asyncio.Task):
+    def _on_prm_done_record_only(self, session_id: str, turn_num: int, task: asyncio.Task):
         """Callback used for close-session PRM tasks; records score only."""
         if task.cancelled():
             return
@@ -2240,17 +2218,7 @@ async def _handle_request(
             logger.info("[OpenClaw] rewrote %d /new bootstrap user prompt(s) for provider safety", rewritten)
 
         def _prompt_len(msgs):
-            try:
-                norm_msgs = _normalize_messages_for_template(msgs)
-                text = self._tokenizer.apply_chat_template(
-                    norm_msgs,
-                    tools=body.get("tools"),
-                    tokenize=False,
-                    add_generation_prompt=True,
-                )
-                return len(self._tokenizer(text, add_special_tokens=False)["input_ids"])
-            except Exception:
-                return 0
+            return _estimate_openai_body_input_tokens({"messages": msgs, "tools": body.get("tools")})
 
         # Compress verbose system prompts (OpenClaw only).  Non-OpenClaw
         # agents send short or no system prompts; compressing them wastes an
@@ -2315,8 +2283,6 @@ def _prompt_len(msgs):
         forward_body = {k: v for k, v in body.items() if k not in _NON_STANDARD_BODY_KEYS}
         forward_body["stream"] = False
         forward_body.pop("stream_options", None)
-        forward_body["logprobs"] = True
-        forward_body["top_logprobs"] = 1
         if "model" not in forward_body:
             forward_body["model"] = self._served_model
         forward_body["messages"] = messages  # potentially skill-injected
@@ -2383,10 +2349,6 @@ def _prompt_len(msgs):
             if response_msg.get("content") is None:
                 response_msg["content"] = ""
 
-            norm_msgs = _normalize_messages_for_template(messages)
-            norm_resp = _normalize_messages_for_template([response_msg])[0]
-            full_norm = norm_msgs + [norm_resp]
-
             skill_path_map = self.skill_manager.get_skill_path_map() if self.skill_manager else {}
             read_skills = _extract_read_skills_from_tool_calls(
                 tool_calls,
@@ -2411,102 +2373,12 @@ def _prompt_len(msgs):
                 )
 
             user_instruction = _extract_last_user_instruction(messages)
-
-            if self._tokenizer is None:
-                self._turn_counts[session_id] = self._turn_counts.get(session_id, 0) + 1
-                turn_num = self._turn_counts[session_id]
-                prompt_text_simple = "\n".join(
-                    f"{m.get('role', '?')}: {_flatten_message_content(m.get('content', ''))}" for m in messages
-                )
-                response_text_simple = content or (json.dumps(tool_calls, ensure_ascii=False) if tool_calls else "")
-                self._buffer_record(
-                    session_id,
-                    turn_num,
-                    messages,
-                    prompt_text_simple,
-                    response_text_simple,
-                    tool_calls,
-                )
-                self._session_turns.setdefault(session_id, []).append(
-                    {
-                        "turn_num": turn_num,
-                        "prompt_text": user_instruction,
-                        "response_text": response_text_simple,
-                        "reasoning_content": reasoning or None,
-                        "tool_calls": tool_calls,
-                        "read_skills": read_skills,
-                        "modified_skills": modified_skills,
-                        "tool_results": tool_summaries,
-                        "tool_results_raw": [],
-                        "tool_observations": [],
-                        "tool_errors": [],
-                        "injected_skills": injected_skills,
-                        "prm_score": None,
-                    }
-                )
-                self._pending_turn_data.setdefault(session_id, {})[turn_num] = {
-                    "prompt_ids": [],
-                    "response_ids": [],
-                    "response_logprobs": [],
-                    "prompt_text": prompt_text_simple,
-                    "response_text": response_text_simple,
-                }
-                if session_done:
-                    await self._close_session(session_id)
-                output["session_id"] = session_id
-                return {"response": output}
-
-            prompt_text = self._tokenizer.apply_chat_template(
-                norm_msgs,
-                tools=tools,
-                tokenize=False,
-                add_generation_prompt=True,
-            )
-            full_text = self._tokenizer.apply_chat_template(
-                full_norm,
-                tools=tools,
-                tokenize=False,
-                add_generation_prompt=False,
-            )
-
-            if full_text.startswith(prompt_text):
-                response_text = full_text[len(prompt_text) :]
-            else:
-                logger.warning("[OpenClaw] prompt_text not prefix of full_text, using full_text as response")
-                response_text = full_text
-
-            prompt_ids = self._tokenizer(prompt_text, add_special_tokens=False)["input_ids"]
-            response_ids = self._tokenizer(response_text, add_special_tokens=False)["input_ids"]
-
-            if not response_ids and not response_text.strip() and not tool_calls:
-                logger.info("[OpenClaw] MAIN session=%s → empty response, skipping", session_id)
-                output["session_id"] = session_id
-                return {"response": output}
-
-            response_logprobs = _extract_logprobs_from_chat_response(choice)
-            if len(response_logprobs) > len(response_ids):
-                response_logprobs = response_logprobs[: len(response_ids)]
-            elif len(response_logprobs) < len(response_ids):
-                response_logprobs = response_logprobs + [0.0] * (len(response_ids) - len(response_logprobs))
-
-            turn_data = {
-                "prompt_ids": prompt_ids,
-                "response_ids": response_ids,
-                "response_logprobs": response_logprobs,
-                "prompt_text": prompt_text,
-                "response_text": response_text,
-            }
-
             self._turn_counts[session_id] = self._turn_counts.get(session_id, 0) + 1
             turn_num = self._turn_counts[session_id]
-
-            logger.info(
-                "[OpenClaw] MAIN session=%s turn=%d prompt_tokens=%d response_tokens=%d",
-                session_id,
-                turn_num,
-                len(prompt_ids),
-                len(response_ids),
+            prompt_text = "\n".join(
+                f"{m.get('role', '?')}: {_flatten_message_content(m.get('content', ''))}" for m in messages
             )
+            response_text = content or (json.dumps(tool_calls, ensure_ascii=False) if tool_calls else "")
             self._buffer_record(session_id, turn_num, messages, prompt_text, response_text, tool_calls)
             self._session_turns.setdefault(session_id, []).append(
                 {
@@ -2525,10 +2397,20 @@ def _prompt_len(msgs):
                     "prm_score": None,
                 }
             )
-            self._pending_turn_data.setdefault(session_id, {})[turn_num] = turn_data
-            self._maybe_submit_ready_samples(session_id)
+            self._pending_turn_data.setdefault(session_id, {})[turn_num] = {
+                "prompt_text": prompt_text,
+                "response_text": response_text,
+            }
+            logger.info(
+                "[OpenClaw] MAIN session=%s turn=%d prompt_est_tokens=%d response_chars=%d",
+                session_id,
+                turn_num,
+                _estimate_openai_body_input_tokens({"messages": messages, "tools": tools}),
+                len(response_text),
+            )
+            self._maybe_finalize_ready_turns(session_id)
         else:
-            logger.info("[OpenClaw] SIDE session=%s → skipped (no training data)", session_id)
+            logger.info("[OpenClaw] SIDE session=%s -> skipped (side-channel turn)", session_id)
 
         if session_done:
             await self._close_session(session_id)
@@ -2936,26 +2818,10 @@ def _truncate_messages(
         tools,
         max_prompt_tokens: int,
     ) -> list[dict]:
-        """
-        Drop oldest non-system messages until the tokenized prompt fits within
-        max_prompt_tokens.  The system message (if any) is always kept.
-        At least one user message is always kept even if it alone exceeds the limit.
-        """
-        if self._tokenizer is None:
-            return messages
+        """Drop oldest non-system messages using a dependency-free token estimate."""
 
         def _prompt_len(msgs):
-            try:
-                norm_msgs = _normalize_messages_for_template(msgs)
-                text = self._tokenizer.apply_chat_template(
-                    norm_msgs,
-                    tools=tools,
-                    tokenize=False,
-                    add_generation_prompt=True,
-                )
-                return len(self._tokenizer(text, add_special_tokens=False)["input_ids"])
-            except Exception:
-                return 0
+            return _estimate_openai_body_input_tokens({"messages": msgs, "tools": tools})
 
         if _prompt_len(messages) <= max_prompt_tokens:
             return messages
@@ -2964,23 +2830,18 @@ def _prompt_len(msgs):
         sys_msgs = [m for m in messages if m.get("role") == "system"]
         non_sys = [m for m in messages if m.get("role") != "system"]
 
-        # Greedily keep most-recent messages
-        kept = []
-        for msg in reversed(non_sys):
-            candidate = sys_msgs + list(reversed(kept + [msg]))
+        dropped = 0
+        while len(non_sys) > 1:
+            candidate = sys_msgs + non_sys[dropped + 1 :]
             if _prompt_len(candidate) <= max_prompt_tokens:
-                kept.append(msg)
-            elif not kept:
-                kept.append(msg)  # keep at least one user message
-                break
-            else:
+                dropped += 1
                 break
+            dropped += 1
 
-        result = sys_msgs + list(reversed(kept))
-        dropped = len(messages) - len(result)
-        if dropped > 0:
-            logger.warning(
-                "[OpenClaw] context truncated: dropped %d oldest messages (%d → %d tokens, limit=%d)",
+        result = sys_msgs + non_sys[dropped:]
+        if dropped:
+            logger.info(
+                "[OpenClaw] context truncated: dropped %d oldest messages (%d -> %d est tokens, limit=%d)",
                 dropped,
                 _prompt_len(messages),
                 _prompt_len(result),
@@ -3034,58 +2895,24 @@ def _inject_skills(self, messages: list[dict]) -> tuple[list[dict], list[str]]:
         return messages, skill_names
 
     # ------------------------------------------------------------------ #
-    # Sample submission                                                    #
+    # Turn feedback finalization                                           #
     # ------------------------------------------------------------------ #
 
-    def _maybe_submit_ready_samples(
-        self,
-        session_id: str,
-        force_no_prm: bool = False,
-        force_last_prm: bool = False,
-    ):
-        """Submit turns whose PRM and teacher queries are done.
-
-        force_no_prm: also submit turns that have no PRM task yet (used at
-        session end for the last turn which will never get a next_state).
-        force_last_prm: when closing a session, fire PRM for the latest
-        pending turn even if it never received a next_state.
-        When force is active, pending teacher tasks are also skipped.
-        """
+    def _maybe_finalize_ready_turns(self, session_id: str):
+        """Finalize turns whose optional PRM scoring is done."""
         prm_tasks = self._prm_tasks.setdefault(session_id, {})
         pending = self._pending_turn_data.get(session_id, {})
         for turn_num in sorted(list(pending.keys())):
-            # --- PRM readiness ---
             prm_task = prm_tasks.get(turn_num)
-            if not self.config.use_prm or not self.prm_scorer:
-                pass  # no PRM → submit immediately
-            elif force_last_prm and prm_task is None:
-                turn_data = pending.get(turn_num)
-                if turn_data is not None:
-                    prm_task = asyncio.create_task(
-                        self.prm_scorer.evaluate(
-                            turn_data.get("response_text", ""),
-                            turn_data.get("prompt_text", ""),
-                            session_id=session_id,
-                            turn_num=turn_num,
-                        )
-                    )
-                    prm_task.add_done_callback(self._task_done_cb)
-                    prm_task.add_done_callback(
-                        lambda _t, sid=session_id, tnum=turn_num: self._on_prm_done_without_submit(sid, tnum, _t)
-                    )
-                    prm_tasks[turn_num] = prm_task
-                continue
-            elif prm_task is not None and not prm_task.done():
-                continue  # PRM still running
-            elif prm_task is None and not force_no_prm:
-                continue  # waiting for next_state to fire PRM
+            if self.config.use_prm and self.prm_scorer:
+                if prm_task is None:
+                    continue  # waiting for the next turn to provide scoring context
+                if not prm_task.done():
+                    continue
 
             turn_data = pending.pop(turn_num)
-            prm_result = None
-            cached_prm_result = turn_data.pop("prm_result", None)
-            if cached_prm_result is not None:
-                prm_result = cached_prm_result
-            if prm_task is not None and prm_task.done():
+            prm_result = turn_data.pop("prm_result", None)
+            if prm_result is None and prm_task is not None and prm_task.done():
                 try:
                     prm_result = prm_task.result()
                 except (asyncio.CancelledError, Exception):
@@ -3093,7 +2920,7 @@ def _maybe_submit_ready_samples(
                 prm_tasks.pop(turn_num, None)
 
             self._safe_create_task(
-                self._submit_turn_sample(
+                self._finalize_turn_feedback(
                     turn_num,
                     turn_data,
                     session_id,
@@ -3101,103 +2928,29 @@ def _maybe_submit_ready_samples(
                 )
             )
 
-    async def _submit_ready_samples_inline(
-        self,
-        session_id: str,
-        force_no_prm: bool = False,
-    ) -> None:
-        """Submit ready samples inline, used when closing a session.
-
-        Unlike ``_maybe_submit_ready_samples``, this awaits the submission
-        coroutine directly so the final PRM/sample records are durable before
-        session cleanup continues.
-        """
-        prm_tasks = self._prm_tasks.setdefault(session_id, {})
-        pending = self._pending_turn_data.get(session_id, {})
-        for turn_num in sorted(list(pending.keys())):
-            prm_task = prm_tasks.get(turn_num)
-            if not self.config.use_prm or not self.prm_scorer:
-                pass
-            elif prm_task is not None and not prm_task.done():
-                continue
-            elif prm_task is None and not force_no_prm:
-                continue
-
-            turn_data = pending.pop(turn_num)
-            prm_result = None
-            cached_prm_result = turn_data.pop("prm_result", None)
-            if cached_prm_result is not None:
-                prm_result = cached_prm_result
-            if prm_task is not None and prm_task.done():
-                try:
-                    prm_result = prm_task.result()
-                except (asyncio.CancelledError, Exception):
-                    pass
-                prm_tasks.pop(turn_num, None)
-
-            await self._submit_turn_sample(
-                turn_num,
-                turn_data,
-                session_id,
-                prm_result,
-            )
-
-    async def _submit_turn_sample(
+    async def _finalize_turn_feedback(
         self,
         turn_num: int,
         turn_data: dict[str, Any],
         session_id: str,
         prm_result: Optional[dict],
     ):
-        prompt_ids = turn_data["prompt_ids"]
-        response_ids = turn_data["response_ids"]
-
-        has_next_state = turn_data.get("has_next_state", False)
-        score = prm_result["score"] if prm_result else 0.0
-
-        exclude = not has_next_state or score == 0.0
-        # Guarantee at least one tokenized sample per session is retained when
-        # sample export is enabled.
-        if exclude and has_next_state and self._session_effective.get(session_id, 0) == 0:
-            exclude = False
-            logger.info(
-                "[OpenClaw] promoting session=%s turn with score=0 → loss_mask=1 (at-least-one guarantee)",
-                session_id,
-            )
-
-        loss_mask = [0] * len(response_ids) if exclude else [1] * len(response_ids)
-        _ = ConversationSample(
-            session_id=session_id,
-            turn_num=turn_num,
-            prompt_tokens=prompt_ids,
-            response_tokens=response_ids,
-            response_logprobs=turn_data["response_logprobs"],
-            loss_mask=loss_mask,
-            reward=score,
-            prompt_text=turn_data.get("prompt_text", ""),
-            response_text=turn_data.get("response_text", ""),
-            skill_generation=self.skill_manager.generation if self.skill_manager else 0,
-        )
-
-        if not exclude:
-            self._session_effective[session_id] = self._session_effective.get(session_id, 0) + 1
-
-        index = next(self._index_counter)
-        next(self._group_counter)
+        """Finalize a turn after optional PRM scoring.
 
+        SkillClaw acts as an external-agent proxy, so finalization keeps only
+        feedback/record side effects that are consumed by the framework.
+        """
+        score = prm_result.get("score", 0.0) if prm_result else 0.0
         if prm_result:
             self._append_prm_record(session_id, turn_num, score, prm_result.get("votes", []))
+            self._session_scored_turns[session_id] = self._session_scored_turns.get(session_id, 0) + 1
 
         logger.info(
-            "[OpenClaw] submitted sample session=%s turn=%d index=%d score=%.1f exclude=%s "
-            "prompt_len=%d response_len=%d",
+            "[OpenClaw] finalized turn session=%s turn=%d score=%.1f response_chars=%d",
             session_id,
             turn_num,
-            index,
             score,
-            exclude,
-            len(prompt_ids),
-            len(response_ids),
+            len(turn_data.get("response_text", "")),
         )
 
     # ------------------------------------------------------------------ #
diff --git a/skillclaw/data_formatter.py b/skillclaw/data_formatter.py
deleted file mode 100644
index 816dc19..0000000
--- a/skillclaw/data_formatter.py
+++ /dev/null
@@ -1,23 +0,0 @@
-"""
-Data structures for conversation samples collected by the API proxy.
-"""
-
-from __future__ import annotations
-
-from dataclasses import dataclass
-
-
-@dataclass
-class ConversationSample:
-    """One sample collected from the API proxy."""
-
-    session_id: str
-    turn_num: int
-    prompt_tokens: list[int]
-    response_tokens: list[int]
-    response_logprobs: list[float]
-    loss_mask: list[int]
-    reward: float
-    prompt_text: str = ""
-    response_text: str = ""
-    skill_generation: int = 0
diff --git a/tests/test_anthropic_messages_api.py b/tests/test_anthropic_messages_api.py
index bdeb4b2..2184554 100644
--- a/tests/test_anthropic_messages_api.py
+++ b/tests/test_anthropic_messages_api.py
@@ -9,8 +9,7 @@
 
 
 @pytest.fixture
-def anthropic_server(monkeypatch, tmp_path):
-    monkeypatch.setattr(SkillClawAPIServer, "_load_tokenizer", lambda self: None)
+def anthropic_server(tmp_path):
     return SkillClawAPIServer(
         SkillClawConfig(
             proxy_api_key="skillclaw",
@@ -43,14 +42,6 @@ async def test_anthropic_count_tokens_endpoint_returns_local_estimate(anthropic_
 
 @pytest.mark.asyncio
 async def test_anthropic_count_tokens_accounts_for_image_content(anthropic_server):
-    class FakeTokenizer:
-        def apply_chat_template(self, messages, tools=None, tokenize=False, add_generation_prompt=False):
-            return "user: screenshot"
-
-        def __call__(self, text, add_special_tokens=False):
-            return {"input_ids": [1, 2, 3]}
-
-    anthropic_server._tokenizer = FakeTokenizer()
     png_header = (
         b"\x89PNG\r\n\x1a\n"
         + struct.pack(">I", 13)
diff --git a/tests/test_responses_native.py b/tests/test_responses_native.py
index 2370422..3995eb9 100644
--- a/tests/test_responses_native.py
+++ b/tests/test_responses_native.py
@@ -223,8 +223,7 @@ async def fake_stream(body):
 
 
 @pytest.mark.asyncio
-async def test_responses_chat_bridge_merges_previous_response_history(monkeypatch, tmp_path):
-    monkeypatch.setattr(SkillClawAPIServer, "_load_tokenizer", lambda self: None)
+async def test_responses_chat_bridge_merges_previous_response_history(tmp_path):
     server = SkillClawAPIServer(
         SkillClawConfig(
             proxy_api_key="skillclaw",
@@ -280,8 +279,7 @@ async def fake_handle_request(body, session_id, turn_type, session_done):
 
 
 @pytest.mark.asyncio
-async def test_responses_continuation_keeps_new_instructions_first(monkeypatch, tmp_path):
-    monkeypatch.setattr(SkillClawAPIServer, "_load_tokenizer", lambda self: None)
+async def test_responses_continuation_keeps_new_instructions_first(tmp_path):
     server = SkillClawAPIServer(
         SkillClawConfig(
             proxy_api_key="skillclaw",
@@ -339,8 +337,7 @@ async def fake_handle_request(body, session_id, turn_type, session_done):
 
 
 @pytest.mark.asyncio
-async def test_responses_continuation_deduplicates_replayed_output_items(monkeypatch, tmp_path):
-    monkeypatch.setattr(SkillClawAPIServer, "_load_tokenizer", lambda self: None)
+async def test_responses_continuation_deduplicates_replayed_output_items(tmp_path):
     server = SkillClawAPIServer(
         SkillClawConfig(
             proxy_api_key="skillclaw",