diff --git a/pyproject.toml b/pyproject.toml index 787d4d6c3..bd0d6ee8d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,6 +23,7 @@ dependencies = [ "typing-extensions>=4.0", "pyyaml>=6.0", "pydantic>=2.0", + "github-copilot-sdk>=0.1.18", "textual>=7.0", ] diff --git a/src/bcbench/agent/copilot/agent.py b/src/bcbench/agent/copilot/agent.py index d5f57b283..af02f6f6b 100644 --- a/src/bcbench/agent/copilot/agent.py +++ b/src/bcbench/agent/copilot/agent.py @@ -1,11 +1,14 @@ """GitHub Copilot CLI Agent implementation.""" +import asyncio import shutil import subprocess import sys from pathlib import Path import yaml +from copilot import CopilotClient, MCPServerConfig, SessionConfig +from copilot.generated.session_events import SessionEventType from bcbench.agent.copilot.metrics import parse_metrics from bcbench.agent.shared import build_mcp_config, build_prompt @@ -13,6 +16,7 @@ from bcbench.dataset import DatasetEntry from bcbench.exceptions import AgentError, AgentTimeoutError from bcbench.logger import get_logger +from bcbench.operations.skills_operations import setup_copilot_skills from bcbench.operations import setup_agent_skills, setup_custom_agent, setup_instructions_from_config from bcbench.types import AgentMetrics, EvaluationCategory, ExperimentConfiguration @@ -32,8 +36,10 @@ def run_copilot_agent(entry: DatasetEntry, model: str, category: EvaluationCateg logger.info(f"Running GitHub Copilot CLI on: {entry.instance_id}") prompt: str = build_prompt(entry, repo_path, copilot_config, category, al_mcp=al_mcp) - mcp_config_json, mcp_server_names = build_mcp_config(copilot_config, entry, repo_path, al_mcp=al_mcp) + mcp_config: dict[str, MCPServerConfig] | None = build_mcp_config(copilot_config, entry, repo_path, al_mcp=al_mcp) instructions_enabled: bool = setup_instructions_from_config(copilot_config, entry, repo_path) + copilot_skills: list[str] | None = setup_copilot_skills(copilot_config, entry, repo_path) + config = ExperimentConfiguration(mcp_servers=list(mcp_config.keys()) if mcp_config else None, custom_instructions=instructions_enabled, custom_agent=custom_agent) skills_enabled: bool = setup_agent_skills(copilot_config, entry, repo_path) custom_agent: str | None = setup_custom_agent(copilot_config, entry, repo_path) config = ExperimentConfiguration( @@ -64,13 +70,43 @@ def run_copilot_agent(entry: DatasetEntry, model: str, category: EvaluationCateg ] if not instructions_enabled: cmd_args.append("--no-custom-instructions") - if mcp_config_json: - cmd_args.append(f"--additional-mcp-config={mcp_config_json}") if custom_agent: cmd_args.append(f"--agent={custom_agent}") logger.debug(f"Copilot command args: {cmd_args}") + # Copilot SDK + async def run_copilot(): + client = CopilotClient({"cli_path": copilot_cmd}) + await client.start() + + session = await client.create_session( + SessionConfig( + model=model, + mcp_servers=mcp_config if mcp_config else {}, + streaming=True, + skill_directories=copilot_skills if copilot_skills else [], + ) + ) + + # Listen for response chunks + def handle_event(event): + if event.type == SessionEventType.ASSISTANT_MESSAGE_DELTA: + sys.stdout.write(event.data.delta_content) + sys.stdout.flush() + + session.on(handle_event) + + await session.send_and_wait( + {"prompt": prompt}, + timeout=_config.timeout.agent_execution, + ) + print() # newline after streaming + + await client.stop() + + asyncio.run(run_copilot()) + result = subprocess.run( cmd_args, cwd=str(repo_path), diff --git a/src/bcbench/agent/shared/mcp.py b/src/bcbench/agent/shared/mcp.py index 1ca948904..3e897a290 100644 --- a/src/bcbench/agent/shared/mcp.py +++ b/src/bcbench/agent/shared/mcp.py @@ -5,6 +5,7 @@ from pathlib import Path from typing import Any +from copilot import MCPLocalServerConfig, MCPRemoteServerConfig, MCPServerConfig from jinja2 import Template from bcbench.dataset import DatasetEntry @@ -39,33 +40,33 @@ def cleanup(self) -> None: _mcp_server_manager = _ALMcpServerManager() -def _build_server_entry(server: dict[str, Any], template_context: dict[str, Any]) -> tuple[str, dict[str, Any]]: +def _build_server_entry(server: dict[str, Any], template_context: dict[str, Any]) -> dict[str, MCPServerConfig]: server_type: str = server["type"] server_name: str = server["name"] tools: list[str] = server["tools"] match server_type: case "http": - return server_name, { - "type": server_type, - "url": server["url"], - "tools": tools, - } + return {server_name: MCPRemoteServerConfig( + tools=tools, + url=server["url"], + type=server_type + )} case "local": args: list[str] = server["args"] rendered_args = [Template(arg).render(**template_context) for arg in args] - return server_name, { - "type": server_type, - "command": server["command"], - "args": rendered_args, - "tools": tools, - } + return {server_name: MCPLocalServerConfig( + tools=tools, + command=server["command"], + args=rendered_args, + type=server_type, + )} case _: logger.error(f"Unsupported MCP server type: {server_type}, {server}") raise AgentError(f"Unsupported MCP server type: {server_type}") -def build_mcp_config(config: dict[str, Any], entry: DatasetEntry, repo_path: Path, al_mcp: bool = False) -> tuple[str | None, list[str] | None]: +def build_mcp_config(config: dict[str, Any], entry: DatasetEntry, repo_path: Path, al_mcp: bool = False) -> dict[str, MCPServerConfig] | None: # following docs: https://docs.github.com/en/enterprise-cloud@latest/copilot/how-tos/use-copilot-agents/coding-agent/extend-coding-agent-with-mcp mcp_servers: list[dict[str, Any]] = config.get("mcp", {}).get("servers", []) @@ -78,11 +79,15 @@ def build_mcp_config(config: dict[str, Any], entry: DatasetEntry, repo_path: Pat logger.info("AL MCP server enabled via --al-mcp flag") if not mcp_servers: - return None, None + return None template_context = {"repo_path": repo_path} mcp_server_names: list[str] = [server["name"] for server in mcp_servers] - mcp_config = {"mcpServers": dict(map(lambda s: _build_server_entry(s, template_context), mcp_servers))} + # mcp_config = {"mcpServers": dict(map(lambda s: _build_server_entry(s, template_context), mcp_servers))} + mcp_config : dict[str, MCPServerConfig] = {} + for server in mcp_servers: + server_entry = _build_server_entry(server, template_context) + mcp_config.update(server_entry) if al_mcp: # Launch MCP server with all project paths separated by semicolons @@ -92,4 +97,4 @@ def build_mcp_config(config: dict[str, Any], entry: DatasetEntry, repo_path: Pat logger.info(f"Using MCP servers: {mcp_server_names}") logger.debug(f"MCP configuration: {json.dumps(mcp_config, indent=2)}") - return json.dumps(mcp_config, separators=(",", ":")), mcp_server_names + return mcp_config diff --git a/src/bcbench/operations/skills_operations.py b/src/bcbench/operations/skills_operations.py index 28740b859..dd4bf9caa 100644 --- a/src/bcbench/operations/skills_operations.py +++ b/src/bcbench/operations/skills_operations.py @@ -1,4 +1,7 @@ from pathlib import Path +from shutil import copytree + +from bcbench.config import get_config from shutil import copytree, rmtree from bcbench.dataset.dataset_entry import DatasetEntry diff --git a/uv.lock b/uv.lock index 2f8617e4d..1e9d28e5f 100644 --- a/uv.lock +++ b/uv.lock @@ -145,6 +145,7 @@ name = "bcbench" version = "0.2.2" source = { editable = "." } dependencies = [ + { name = "github-copilot-sdk" }, { name = "jsonschema" }, { name = "mini-swe-agent" }, { name = "pydantic" }, @@ -175,6 +176,7 @@ dev = [ [package.metadata] requires-dist = [ + { name = "github-copilot-sdk", specifier = ">=0.1.18" }, { name = "jsonschema", specifier = ">=4.0" }, { name = "mini-swe-agent", specifier = ">=1.0.0" }, { name = "pydantic", specifier = ">=2.0" }, @@ -581,6 +583,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/eb/02/a6b21098b1d5d6249b7c5ab69dde30108a71e4e819d4a9778f1de1d5b70d/fsspec-2025.10.0-py3-none-any.whl", hash = "sha256:7c7712353ae7d875407f97715f0e1ffcc21e33d5b24556cb1e090ae9409ec61d", size = 200966, upload-time = "2025-10-30T14:58:42.53Z" }, ] +[[package]] +name = "github-copilot-sdk" +version = "0.1.18" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, + { name = "python-dateutil" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/bf/00/be64b9b33015d5e79fb5e5e95d871484e79a907b3792935b855ab40308ce/github_copilot_sdk-0.1.18.tar.gz", hash = "sha256:b2d56d40c0f48e81f2899d32fb4a8d2b8df22620913547da93fddf9b2f368e9e", size = 81318, upload-time = "2026-01-24T18:09:57.617Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ae/0f/f832b32bca9d89a26a2b810c69fdc37ac925e34855ee93a11bb3d90ca2b7/github_copilot_sdk-0.1.18-py3-none-any.whl", hash = "sha256:99cfdf4d4d0da6d92d5bf36a952546157785df83d6b0783b3f7a8e93a2762171", size = 33740, upload-time = "2026-01-24T18:09:55.696Z" }, +] + [[package]] name = "grpcio" version = "1.67.1"