From 8032c52842c0b30a82bbb941bfbff99306b9bbc7 Mon Sep 17 00:00:00 2001 From: ScarletCarpet Date: Sat, 4 Jul 2026 00:05:18 +0800 Subject: [PATCH] fix(review-filter): add `submit_filter_result` func for review filter stage At review filter stage, LLM(test GLM5.2) may return natural language content such as: """ Looking at each comment... - commit 1: ... - commit 2: ... Summary: ```json [...]``` """ add function calling for LLM will get more stable result for some model of providers. and this tool only appears at review filter stage, so it does no harm for review quality. --- internal/agent/agent.go | 63 ++++++++++++- internal/agent/agent_test.go | 90 +++++++++++++++++++ .../prompts/review_filter_task_user.md | 12 +-- internal/viewer/store.go | 2 +- 4 files changed, 154 insertions(+), 13 deletions(-) diff --git a/internal/agent/agent.go b/internal/agent/agent.go index 4be6504d..5cedd127 100644 --- a/internal/agent/agent.go +++ b/internal/agent/agent.go @@ -22,6 +22,7 @@ import ( "github.com/open-code-review/open-code-review/internal/stdout" "github.com/open-code-review/open-code-review/internal/telemetry" "github.com/open-code-review/open-code-review/internal/tool" + "go.opentelemetry.io/otel/attribute" ) // AgentWarning is re-exported from llmloop for backwards compatibility with @@ -500,6 +501,28 @@ func (a *Agent) executeSubtask(ctx context.Context, d model.Diff) error { return err } +// submitFilterTool is the tool definition that constrains the review filter +var submitFilterTool = llm.ToolDef{ + Type: "function", + Function: llm.FunctionDef{ + Name: "submit_filter_result", + Description: "Submit the list of review comments that are provably incorrect based on the diff", + Parameters: map[string]any{ + "type": "object", + "properties": map[string]any{ + "comment_ids": map[string]any{ + "type": "array", + "description": "IDs of review comments confirmed as incorrect return an empty array when none, e.g. [\"c-0\", \"c-2\"]", + "items": map[string]any{ + "type": "string", + }, + }, + }, + "required": []any{"comment_ids"}, + }, + }, +} + // executeReviewFilter runs the REVIEW_FILTER_TASK to remove comments that are // provably incorrect based solely on the diff. Errors are logged and silently ignored. func (a *Agent) executeReviewFilter(ctx context.Context, d model.Diff, newPath string) { @@ -537,6 +560,7 @@ func (a *Agent) executeReviewFilter(ctx context.Context, d model.Diff, newPath s resp, err := a.args.LLMClient.CompletionsWithCtx(ctx, llm.ChatRequest{ Model: a.args.Model, Messages: messages, + Tools: []llm.ToolDef{submitFilterTool}, MaxTokens: a.args.Template.MaxTokens, }) if err != nil { @@ -547,12 +571,23 @@ func (a *Agent) executeReviewFilter(ctx context.Context, d model.Diff, newPath s rec.SetResponse(resp, time.Since(startTime)) a.runner.RecordUsage(resp.Usage) - indices := parseFilterResponse(resp.Content(), len(comments)) + indices := parseFilterToolCalls(resp.ToolCalls(), len(comments)) + if indices == nil { + indices = parseFilterResponse(resp.Content(), len(comments)) + } if len(indices) == 0 { + telemetry.Event(ctx, "review_filter.completed", + attribute.String("file.path", newPath), + attribute.Int("total_comments", len(comments)), + attribute.Int("removed", 0)) return } a.args.CommentCollector.RemoveByPathAndIndices(newPath, indices) + telemetry.Event(ctx, "review_filter.completed", + attribute.String("file.path", newPath), + attribute.Int("total_comments", len(comments)), + attribute.Int("removed", len(indices))) fmt.Fprintf(stdout.Writer(), "[ocr] Review filter removed %d comment(s) for %s\n", len(indices), newPath) } @@ -575,6 +610,32 @@ func buildFilterCommentsJSON(comments []model.LlmComment) string { return string(data) } +// parseFilterToolCalls extracts comment indices from the LLM's tool call +// response to submit_filter_result. Returns nil if no matching tool call +// is found, allowing fallback to text-based parsing. +func parseFilterToolCalls(calls []llm.ToolCall, total int) map[int]struct{} { + for _, call := range calls { + if call.Function.Name != "submit_filter_result" { + continue + } + var args struct { + CommentIDs []string `json:"comment_ids"` + } + if err := json.Unmarshal([]byte(call.Function.Arguments), &args); err != nil { + continue + } + indices := make(map[int]struct{}) + for _, id := range args.CommentIDs { + var idx int + if _, err := fmt.Sscanf(id, "c-%d", &idx); err == nil && idx >= 0 && idx < total { + indices[idx] = struct{}{} + } + } + return indices + } + return nil +} + // parseFilterResponse extracts comment indices from the LLM filter response. // Returns a set of 0-based indices. Invalid IDs or out-of-range indices are ignored. func parseFilterResponse(raw string, total int) map[int]struct{} { diff --git a/internal/agent/agent_test.go b/internal/agent/agent_test.go index 99f82806..cd91d8e5 100644 --- a/internal/agent/agent_test.go +++ b/internal/agent/agent_test.go @@ -216,6 +216,96 @@ func TestParseFilterResponse(t *testing.T) { } } +func TestParseFilterToolCalls(t *testing.T) { + tests := []struct { + name string + calls []llm.ToolCall + total int + wantSet map[int]struct{} + }{ + { + name: "no tool calls", + calls: nil, + total: 5, + wantSet: nil, + }, + { + name: "submit_filter_result with IDs", + calls: []llm.ToolCall{{ + Function: llm.FunctionCall{ + Name: "submit_filter_result", + Arguments: `{"comment_ids": ["c-0", "c-2"]}`, + }, + }}, + total: 5, + wantSet: map[int]struct{}{0: {}, 2: {}}, + }, + { + name: "submit_filter_result empty array", + calls: []llm.ToolCall{{ + Function: llm.FunctionCall{ + Name: "submit_filter_result", + Arguments: `{"comment_ids": []}`, + }, + }}, + total: 5, + wantSet: map[int]struct{}{}, + }, + { + name: "ignores non-matching tool names", + calls: []llm.ToolCall{{ + Function: llm.FunctionCall{ + Name: "other_tool", + Arguments: `{"comment_ids": ["c-0"]}`, + }, + }}, + total: 5, + wantSet: nil, + }, + { + name: "out-of-range indices ignored", + calls: []llm.ToolCall{{ + Function: llm.FunctionCall{ + Name: "submit_filter_result", + Arguments: `{"comment_ids": ["c-0", "c-10"]}`, + }, + }}, + total: 5, + wantSet: map[int]struct{}{0: {}}, + }, + { + name: "invalid JSON arguments returns nil", + calls: []llm.ToolCall{{ + Function: llm.FunctionCall{ + Name: "submit_filter_result", + Arguments: `not json`, + }, + }}, + total: 5, + wantSet: nil, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := parseFilterToolCalls(tt.calls, tt.total) + if tt.wantSet == nil { + if got != nil { + t.Errorf("expected nil, got %v", got) + } + return + } + if len(got) != len(tt.wantSet) { + t.Fatalf("len = %d, want %d; got %v", len(got), len(tt.wantSet), got) + } + for idx := range tt.wantSet { + if _, ok := got[idx]; !ok { + t.Errorf("missing index %d in result", idx) + } + } + }) + } +} + func TestExtFromPath(t *testing.T) { a := New(Args{}) diff --git a/internal/config/template/prompts/review_filter_task_user.md b/internal/config/template/prompts/review_filter_task_user.md index d2b64725..175db748 100644 --- a/internal/config/template/prompts/review_filter_task_user.md +++ b/internal/config/template/prompts/review_filter_task_user.md @@ -42,14 +42,4 @@ After confirming that the facts visible in the diff are accurate, determine whet ### Output -Return all incorrect review comment IDs directly, without any explanation. Use JSON array format: - -```json -["id-xxx", "id-yyy"] -``` - -If there are no review comments that can be confirmed as incorrect, return an empty array: - -```json -[] -``` +Call the `submit_filter_result` tool with the IDs of all review comments that can be confirmed as incorrect, without any explanation. diff --git a/internal/viewer/store.go b/internal/viewer/store.go index dd60fe0a..2e1ad094 100644 --- a/internal/viewer/store.go +++ b/internal/viewer/store.go @@ -385,7 +385,7 @@ func LoadSession(root, encodedRepo, sessionID string) (*ViewSession, error) { name, _ := tm["name"].(string) args, _ := tm["arguments"].(string) info := ToolCallInfo{Name: name, Arguments: args} - if name == "task_done" { + if name == "task_done" || name == "submit_filter_result" { info.Ok = true } card.ToolCalls = append(card.ToolCalls, info)