Skip to content
Draft
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ dependencies = [
"typing-extensions>=4.0",
"pyyaml>=6.0",
"pydantic>=2.0",
"github-copilot-sdk>=0.1.18",
"textual>=7.0",
]

Expand Down
43 changes: 39 additions & 4 deletions src/bcbench/agent/copilot/agent.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
"""GitHub Copilot CLI Agent implementation."""

import asyncio
import shutil
import subprocess
import sys
from pathlib import Path

import yaml
from copilot import CopilotClient, MCPServerConfig, SessionConfig
from copilot.generated.session_events import SessionEventType

from bcbench.agent.copilot.metrics import parse_metrics
from bcbench.agent.shared import build_mcp_config, build_prompt
Expand All @@ -14,6 +17,7 @@
from bcbench.exceptions import AgentError, AgentTimeoutError
from bcbench.logger import get_logger
from bcbench.operations import setup_custom_agent, setup_instructions_from_config
from bcbench.operations.skills_operations import setup_copilot_skills
from bcbench.types import AgentMetrics, EvaluationCategory, ExperimentConfiguration

logger = get_logger(__name__)
Expand All @@ -32,10 +36,11 @@ def run_copilot_agent(entry: DatasetEntry, model: str, category: EvaluationCateg
logger.info(f"Running GitHub Copilot CLI on: {entry.instance_id}")

prompt: str = build_prompt(entry, repo_path, copilot_config, category, al_mcp=al_mcp)
mcp_config_json, mcp_server_names = build_mcp_config(copilot_config, entry, repo_path, al_mcp=al_mcp)
mcp_config: dict[str, MCPServerConfig] | None = build_mcp_config(copilot_config, entry, repo_path, al_mcp=al_mcp)
instructions_enabled: bool = setup_instructions_from_config(copilot_config, entry, repo_path)
copilot_skills: list[str] | None = setup_copilot_skills(copilot_config, entry, repo_path)
custom_agent: str | None = setup_custom_agent(copilot_config, entry, repo_path)
config = ExperimentConfiguration(mcp_servers=mcp_server_names, custom_instructions=instructions_enabled, custom_agent=custom_agent)
config = ExperimentConfiguration(mcp_servers=list(mcp_config.keys()) if mcp_config else None, custom_instructions=instructions_enabled, custom_agent=custom_agent)

logger.info(f"Executing Copilot CLI in directory: {repo_path}")
logger.debug(f"Using prompt:\n{prompt}")
Expand All @@ -58,13 +63,43 @@ def run_copilot_agent(entry: DatasetEntry, model: str, category: EvaluationCateg
]
if not instructions_enabled:
cmd_args.append("--no-custom-instructions")
if mcp_config_json:
cmd_args.append(f"--additional-mcp-config={mcp_config_json}")
if custom_agent:
cmd_args.append(f"--agent={custom_agent}")

logger.debug(f"Copilot command args: {cmd_args}")

# Copilot SDK
async def run_copilot():
client = CopilotClient({"cli_path": copilot_cmd})
await client.start()

session = await client.create_session(
SessionConfig(
model=model,
mcp_servers=mcp_config if mcp_config else {},
streaming=True,
skill_directories=copilot_skills if copilot_skills else [],
)
)

# Listen for response chunks
def handle_event(event):
if event.type == SessionEventType.ASSISTANT_MESSAGE_DELTA:
sys.stdout.write(event.data.delta_content)
sys.stdout.flush()

session.on(handle_event)

await session.send_and_wait(
{"prompt": prompt},
timeout=_config.timeout.agent_execution,
)
print() # newline after streaming

await client.stop()

asyncio.run(run_copilot())

result = subprocess.run(
cmd_args,
cwd=str(repo_path),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
---
name: al-test-generation
description: Guide for creating AL tests for Microsoft Dynamics 365 Business Central. Use this when asked to write, create, or generate AL test codeunits, test procedures, or test automation for Business Central.
---

To create AL tests for Microsoft Dynamics 365 Business Central, follow this process:

## 1. Analyze the Code Under Test

Before writing any test code:
1. Read and understand the procedure or functionality being tested
2. Trace through all code paths to identify UI interactions
3. Examine table definitions for TableRelation constraints

## 2. Identify Required Handler Methods

**CRITICAL: Tests fail with "Unhandled UI" errors when handlers are missing.**

Look for these patterns in the code under test:

| Code Pattern | Required Handler |
| ------------------------------------- | --------------------------- |
| `Confirm()` | `[ConfirmHandler]` |
| `Message()` | `[MessageHandler]` |
| `StrMenu()` | `[StrMenuHandler]` |
| `Page.Run()` | `[PageHandler]` |
| `Page.RunModal()` | `[ModalPageHandler]` |
| `Report.Run()` or `Report.RunModal()` | `[ReportHandler]` |
| Report request page | `[RequestPageHandler]` |
| `Hyperlink()` | `[HyperlinkHandler]` |
| `Notification.Send()` | `[SendNotificationHandler]` |

## 3. Analyze TableRelation Constraints

**CRITICAL: Tests fail with validation errors when inserting data that violates TableRelation constraints.**

Before inserting test data:
1. Read the table definition for all fields receiving values
2. Identify fields with `TableRelation` properties
3. Ensure related records exist before inserting test data
4. Use Library functions (e.g., `LibrarySales`, `LibraryPurchase`) to create prerequisite data

## 4. Write Test Structure

Follow the AAA pattern (Arrange-Act-Assert):

```AL
[Test]
[HandlerFunctions('RequiredHandlers')]
procedure TestProcedureName()
begin
// [GIVEN] Setup test data and preconditions
Initialize();
CreateTestData();

// [WHEN] Execute the action being tested
ExecuteAction();

// [THEN] Verify the expected results
VerifyResults();
end;
```

## 5. Handler Method Signatures

```AL
[ConfirmHandler]
procedure ConfirmHandlerYes(Question: Text[1024]; var Reply: Boolean)
begin
Reply := true;
end;

[MessageHandler]
procedure MessageHandler(Message: Text[1024])
begin
// Empty - suppresses message display
end;

[ModalPageHandler]
procedure ModalPageHandler(var TestPage: TestPage "Page Name")
begin
TestPage.OK().Invoke();
end;
```

## 6. Best Practices

- Use descriptive test procedure names that explain what is being tested
- One assertion concept per test
- Use Library Variable Storage to pass data between handlers and tests
- Do NOT verify values inside handler procedures
- Clean up test data in teardown or use transaction rollback
- Use `Initialize()` procedure to set up common test fixtures
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
---
name: al-test-generation
description: Guide for creating AL tests for Microsoft Dynamics 365 Business Central. Use this when asked to write, create, or generate AL test codeunits, test procedures, or test automation for Business Central.
---

To create AL tests for Microsoft Dynamics 365 Business Central, follow this process:

## 1. Analyze the Code Under Test

Before writing any test code:
1. Read and understand the procedure or functionality being tested
2. Trace through all code paths to identify UI interactions
3. Examine table definitions for TableRelation constraints

## 2. Identify Required Handler Methods

**CRITICAL: Tests fail with "Unhandled UI" errors when handlers are missing.**

Look for these patterns in the code under test:

| Code Pattern | Required Handler |
| ------------------------------------- | --------------------------- |
| `Confirm()` | `[ConfirmHandler]` |
| `Message()` | `[MessageHandler]` |
| `StrMenu()` | `[StrMenuHandler]` |
| `Page.Run()` | `[PageHandler]` |
| `Page.RunModal()` | `[ModalPageHandler]` |
| `Report.Run()` or `Report.RunModal()` | `[ReportHandler]` |
| Report request page | `[RequestPageHandler]` |
| `Hyperlink()` | `[HyperlinkHandler]` |
| `Notification.Send()` | `[SendNotificationHandler]` |

## 3. Analyze TableRelation Constraints

**CRITICAL: Tests fail with validation errors when inserting data that violates TableRelation constraints.**

Before inserting test data:
1. Read the table definition for all fields receiving values
2. Identify fields with `TableRelation` properties
3. Ensure related records exist before inserting test data
4. Use Library functions (e.g., `LibrarySales`, `LibraryPurchase`) to create prerequisite data

## 4. Write Test Structure

Follow the AAA pattern (Arrange-Act-Assert):

```AL
[Test]
[HandlerFunctions('RequiredHandlers')]
procedure TestProcedureName()
begin
// [GIVEN] Setup test data and preconditions
Initialize();
CreateTestData();

// [WHEN] Execute the action being tested
ExecuteAction();

// [THEN] Verify the expected results
VerifyResults();
end;
```

## 5. Handler Method Signatures

```AL
[ConfirmHandler]
procedure ConfirmHandlerYes(Question: Text[1024]; var Reply: Boolean)
begin
Reply := true;
end;

[MessageHandler]
procedure MessageHandler(Message: Text[1024])
begin
// Empty - suppresses message display
end;

[ModalPageHandler]
procedure ModalPageHandler(var TestPage: TestPage "Page Name")
begin
TestPage.OK().Invoke();
end;
```

## 6. Best Practices

- Use descriptive test procedure names that explain what is being tested
- One assertion concept per test
- Use Library Variable Storage to pass data between handlers and tests
- Do NOT verify values inside handler procedures
- Clean up test data in teardown or use transaction rollback
- Use `Initialize()` procedure to set up common test fixtures
8 changes: 8 additions & 0 deletions src/bcbench/agent/shared/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,14 @@ prompt:
instructions:
enabled: false

# controls:
# 1. whether to copy skills (`src/bcbench/agent/copilot/instructions/<sanitized-repo>/skills/`) into repo/.github/skills/
# 2. SDK passes skill_directories to SessionConfig; CLI auto-discovers from .github/skills/
skills:
enabled: true
# name is for documentation only - all skills in the skills/ folder are loaded
name: al-test-generation

# controls:
# 1. whether to copy custom agents (`src/bcbench/agent/copilot/instructions/<sanitized-repo>/agents/`) into the repo
# 2. whether to pass --agent=<agent-name> to copilot
Expand Down
37 changes: 21 additions & 16 deletions src/bcbench/agent/shared/mcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from pathlib import Path
from typing import Any

from copilot import MCPLocalServerConfig, MCPRemoteServerConfig, MCPServerConfig
from jinja2 import Template

from bcbench.dataset import DatasetEntry
Expand Down Expand Up @@ -39,33 +40,33 @@ def cleanup(self) -> None:
_mcp_server_manager = _ALMcpServerManager()


def _build_server_entry(server: dict[str, Any], template_context: dict[str, Any]) -> tuple[str, dict[str, Any]]:
def _build_server_entry(server: dict[str, Any], template_context: dict[str, Any]) -> dict[str, MCPServerConfig]:
server_type: str = server["type"]
server_name: str = server["name"]
tools: list[str] = server["tools"]

match server_type:
case "http":
return server_name, {
"type": server_type,
"url": server["url"],
"tools": tools,
}
return {server_name: MCPRemoteServerConfig(
tools=tools,
url=server["url"],
type=server_type
)}
case "local":
args: list[str] = server["args"]
rendered_args = [Template(arg).render(**template_context) for arg in args]
return server_name, {
"type": server_type,
"command": server["command"],
"args": rendered_args,
"tools": tools,
}
return {server_name: MCPLocalServerConfig(
tools=tools,
command=server["command"],
args=rendered_args,
type=server_type,
)}
case _:
logger.error(f"Unsupported MCP server type: {server_type}, {server}")
raise AgentError(f"Unsupported MCP server type: {server_type}")


def build_mcp_config(config: dict[str, Any], entry: DatasetEntry, repo_path: Path, al_mcp: bool = False) -> tuple[str | None, list[str] | None]:
def build_mcp_config(config: dict[str, Any], entry: DatasetEntry, repo_path: Path, al_mcp: bool = False) -> dict[str, MCPServerConfig] | None:
# following docs: https://docs.github.com/en/enterprise-cloud@latest/copilot/how-tos/use-copilot-agents/coding-agent/extend-coding-agent-with-mcp
mcp_servers: list[dict[str, Any]] = config.get("mcp", {}).get("servers", [])

Expand All @@ -78,11 +79,15 @@ def build_mcp_config(config: dict[str, Any], entry: DatasetEntry, repo_path: Pat
logger.info("AL MCP server enabled via --al-mcp flag")

if not mcp_servers:
return None, None
return None

template_context = {"repo_path": repo_path}
mcp_server_names: list[str] = [server["name"] for server in mcp_servers]
mcp_config = {"mcpServers": dict(map(lambda s: _build_server_entry(s, template_context), mcp_servers))}
# mcp_config = {"mcpServers": dict(map(lambda s: _build_server_entry(s, template_context), mcp_servers))}
mcp_config : dict[str, MCPServerConfig] = {}
for server in mcp_servers:
server_entry = _build_server_entry(server, template_context)
mcp_config.update(server_entry)

if al_mcp:
# Launch MCP server with all project paths separated by semicolons
Expand All @@ -92,4 +97,4 @@ def build_mcp_config(config: dict[str, Any], entry: DatasetEntry, repo_path: Pat
logger.info(f"Using MCP servers: {mcp_server_names}")
logger.debug(f"MCP configuration: {json.dumps(mcp_config, indent=2)}")

return json.dumps(mcp_config, separators=(",", ":")), mcp_server_names
return mcp_config
2 changes: 2 additions & 0 deletions src/bcbench/operations/__init__.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,24 @@
"""Operations for Business Central and Git."""

from bcbench.operations.bc_operations import (
build_and_publish_projects,
build_ps_app_build_and_publish,
build_ps_dataset_tests_script,
build_ps_test_script,
run_tests,
)
from bcbench.operations.git_operations import (
apply_patch,
checkout_commit,
clean_project_paths,
clean_repo,
stage_and_get_diff,
)
from bcbench.operations.instruction_operations import copy_problem_statement_folder, setup_custom_agent, setup_instructions_from_config
from bcbench.operations.skills_operations import setup_copilot_skills
from bcbench.operations.project_operations import categorize_projects
from bcbench.operations.setup_operations import setup_repo_postbuild, setup_repo_prebuild
from bcbench.operations.test_operations import extract_tests_from_patch

Check failure on line 21 in src/bcbench/operations/__init__.py

View workflow job for this annotation

GitHub Actions / lint-and-test

Ruff (I001)

src/bcbench/operations/__init__.py:3:1: I001 Import block is un-sorted or un-formatted

__all__ = [
"apply_patch",
Expand All @@ -32,6 +33,7 @@
"copy_problem_statement_folder",
"extract_tests_from_patch",
"run_tests",
"setup_copilot_skills",
"setup_custom_agent",
"setup_instructions_from_config",
"setup_repo_postbuild",
Expand Down
Loading
Loading