diff --git a/CHANGELOG.md b/CHANGELOG.md index 5425d85..2ffe796 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,6 +35,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - **`yaml.parseNumber` preserves version-like strings** — values whose float representation would lose information (e.g. `"4.30"` → `4.3` → `"4.3"`) now stay strings. Only coerces to float when the value round-trips through YAML marshal back to the exact input. +- **`multi_review` reviewer diff visibility** — production runs revealed that reviewer agents (especially weaker models) hallucinate "clone missing" / "ref not found" failures rather than running `git diff ..` themselves. The fix pre-computes the diff on the remote and writes it to `/diff.txt`; reviewers' task message now tells them to `cat` that file rather than navigate the repo and invoke git. + - New `PreComputeDiff` helper in `internal/support/multireview/diff.go` runs `git -C diff .. > /diff.txt && wc -c && wc -l` in a single SSH round-trip; returns the diff path plus size/line counts for the task message header. + - `runMultiReview` calls it after `ShipBundle` returns and hard-stops on failure before any reviewer is invoked. + - Rewritten task message includes the diff path, size, line count, an anti-hallucination clause (do NOT report "clone failed" unless `ls` proves it), and a `--stat` hint when diff size exceeds 1 MB. + - **`--base` is now required** — working-tree mode is intentionally unsupported (it produced the failure mode this fix exists to prevent). + - **`--per-reviewer-timeout-seconds` default bumped 600 → 1200** — production observation: Greta and Dax timed out at 600s on a real sprint. Aligns with the existing `--timeout-seconds` (total) default. + ## [1.8.2] - 2026-01-27 ### Fixed diff --git a/docs/llm-support-commands.md b/docs/llm-support-commands.md index 15d382f..9ed9120 100644 --- a/docs/llm-support-commands.md +++ b/docs/llm-support-commands.md @@ -2132,20 +2132,32 @@ llm-support multi_review [flags] | `--repo` | Local repo path to bundle and ship | | `--openclaw-host` | SSH target running `openclaw-gateway` (e.g. `user@nucleus.lan`) | | `--output-dir` | Where per-reviewer artifacts and merged stream land | +| `--base` | Base ref for the diff range. **REQUIRED** — we pre-compute `git diff ..` on the remote so reviewers only need to `cat` the resulting file. Working-tree mode is intentionally unsupported. | **Optional flags:** | Flag | Default | Description | |------|---------|-------------| | `--serial-reviewers` | (empty) | Subset that runs sequentially after the parallel lane (shared rate limits) | -| `--base` | (empty) | Base ref for the diff range, included in the auto-built task message | | `--head` | `HEAD` | Head ref | | `--timeout-seconds` | `1200` | Total wall-clock budget for the entire fan-out | -| `--per-reviewer-timeout-seconds` | `600` | Per-reviewer soft timeout | +| `--per-reviewer-timeout-seconds` | `1200` | Per-reviewer soft timeout. Bumped from 600s after production observation that real sprints with reasoning-model reviewers can legitimately need 10-20 minutes. | | `--gateway-container` | `openclaw-gateway` | Docker container running openclaw | -| `--task-message` | (auto) | Override the auto-built task message | +| `--task-message` | (auto) | Override the auto-built task message. Bypasses the pre-compute helper instructions — you take responsibility for telling reviewers where the diff is. | | `--skip-cleanup` | `false` | Do not remove the remote workdir after running | +**Pre-compute step:** + +After the bundle clone succeeds on the remote, `multi_review` runs: + +``` +git -C diff .. > /diff.txt +wc -c /diff.txt +wc -l /diff.txt +``` + +The diff file lands at `/diff.txt` — a sibling of the cloned repo, inside the same temp directory that gets garbage-collected when the run finishes. The task message tells each reviewer to `cat ` and review the contents. Reviewers don't need to `cd` into the repo or run git themselves. If the diff is >1MB, the task message appends a `git diff --stat` hint so reviewers can do a file-level pass first. + **Output layout under `--output-dir`:** ``` diff --git a/internal/support/commands/multi_review.go b/internal/support/commands/multi_review.go index 7823188..00804c0 100644 --- a/internal/support/commands/multi_review.go +++ b/internal/support/commands/multi_review.go @@ -14,12 +14,13 @@ import ( "github.com/spf13/cobra" ) -// invokeReviewerFn and shipBundleFn are package-level function variables that -// tests swap to inject deterministic behavior. Production points them at the -// real multireview package functions. +// invokeReviewerFn, shipBundleFn, and preComputeDiffFn are package-level +// function variables that tests swap to inject deterministic behavior. +// Production points them at the real multireview package functions. var ( invokeReviewerFn = multireview.InvokeReviewer shipBundleFn = multireview.ShipBundle + preComputeDiffFn = multireview.PreComputeDiff ) // Flag variables for the multi_review command. @@ -63,10 +64,18 @@ func newMultiReviewCmd() *cobra.Command { cmd := &cobra.Command{ Use: "multi_review", Short: "Fan out a code review to multiple openclaw reviewer agents", - Long: `Bundle a local repo, ship it to an openclaw-hosting machine, and invoke -several reviewer agents in parallel (or in a serial lane for those that -share rate-limited providers). Collects per-reviewer findings and writes -a merged TD stream the /code-review command can consume. + Long: `Bundle a local repo, ship it to an openclaw-hosting machine, pre-compute +the diff once, and invoke several reviewer agents in parallel (or in a +serial lane for those that share rate-limited providers). Collects +per-reviewer findings and writes a merged TD stream the /code-review +command can consume. + +Pre-compute step: --base is REQUIRED. After the bundle is shipped, we run +'git diff ..' on the remote and write the result to +/diff.txt. Reviewers are told to 'cat' that file rather than +running git themselves — observed in production, weaker reviewers +hallucinate "clone missing" failures rather than persist through a multi-step +git invocation. Pre-computing eliminates that surface. Output layout: /raw//{review.md,td-stream.txt,status.json,response.json} @@ -77,8 +86,11 @@ Output layout: /multi-review-summary.json (per-reviewer status + counts) Failure semantics: + - --base missing → validation error, no remote calls - Bundle/ship failure → hard-stop (no point invoking reviewers without the diff staged on the remote) + - Diff pre-compute failure (bad ref, etc.) → hard-stop before reviewers + are invoked; surfaces git stderr in the error message - Per-reviewer failure → recorded as failed in summary; other reviewers continue; exit 0 with partial: true - All reviewers fail → exit 1 with summary of what failed`, @@ -87,12 +99,12 @@ Failure semantics: cmd.Flags().StringVar(&mrReviewers, "reviewers", "", "Comma-separated reviewer agent names (required)") cmd.Flags().StringVar(&mrSerialReviewers, "serial-reviewers", "", "Comma-separated subset that runs serially after the parallel lane") cmd.Flags().StringVar(&mrRepo, "repo", "", "Local repo path to bundle (required)") - cmd.Flags().StringVar(&mrBaseRef, "base", "", "Base ref for the diff range (informational, included in task message)") + cmd.Flags().StringVar(&mrBaseRef, "base", "", "Base ref for the diff range — REQUIRED (we pre-compute git diff .. on the remote)") cmd.Flags().StringVar(&mrHeadRef, "head", "HEAD", "Head ref for the diff range") cmd.Flags().StringVar(&mrOpenclawHost, "openclaw-host", "", "SSH target running openclaw-gateway (required)") cmd.Flags().StringVar(&mrOutputDir, "output-dir", "", "Where per-reviewer artifacts and merged stream land (required)") cmd.Flags().IntVar(&mrTimeoutSeconds, "timeout-seconds", 1200, "Total wall-clock budget for the entire fan-out") - cmd.Flags().IntVar(&mrPerReviewerTO, "per-reviewer-timeout-seconds", 600, "Per-reviewer soft timeout") + cmd.Flags().IntVar(&mrPerReviewerTO, "per-reviewer-timeout-seconds", 1200, "Per-reviewer soft timeout (default 1200s — observed 600s default timing out for real sprints)") cmd.Flags().StringVar(&mrGatewayContainer, "gateway-container", "openclaw-gateway", "Docker container running openclaw") cmd.Flags().StringVar(&mrTaskMessage, "task-message", "", "Override the task message sent to each reviewer; default is auto-built from --base/--head/--repo") cmd.Flags().BoolVar(&mrSkipCleanup, "skip-cleanup", false, "Do not remove the remote workdir after running") @@ -117,6 +129,9 @@ func runMultiReview(cmd *cobra.Command, _ []string) error { if mrOutputDir == "" { return fmt.Errorf("--output-dir required") } + if mrBaseRef == "" { + return fmt.Errorf("--base required (working-tree mode is not supported — pass --base= so we can pre-compute the diff for reviewers)") + } allReviewers := splitAndTrim(mrReviewers) serial := splitAndTrim(mrSerialReviewers) @@ -160,10 +175,26 @@ func runMultiReview(cmd *cobra.Command, _ []string) error { } }() - // 2. Build the task message (auto if not overridden). + // 2. Pre-compute the diff on the remote so reviewers only need to + // `cat `. Removes the hallucination surface where weaker + // reviewers were inventing "clone missing" failures rather than + // running `git diff` themselves. + diffRes, err := preComputeDiffFn(totalCtx, multireview.PreComputeDiffParams{ + Host: mrOpenclawHost, + RemoteRepoPath: shipRes.RemoteRepoPath, + RemoteWorkdir: remoteWorkdir, + BaseRef: mrBaseRef, + HeadRef: mrHeadRef, + Timeout: time.Duration(mrTimeoutSeconds) * time.Second, + }) + if err != nil { + return fmt.Errorf("pre-compute diff on %s: %w", mrOpenclawHost, err) + } + + // 3. Build the task message (auto if not overridden). taskMessage := mrTaskMessage if taskMessage == "" { - taskMessage = buildDefaultTaskMessage(shipRes.RemoteRepoPath, repoName, mrBaseRef, mrHeadRef) + taskMessage = buildDefaultTaskMessage(shipRes.RemoteRepoPath, repoName, mrBaseRef, mrHeadRef, diffRes) } // 3. Invoke reviewers. Parallel lane first, then serial. @@ -288,19 +319,34 @@ func runMultiReview(cmd *cobra.Command, _ []string) error { return nil } -func buildDefaultTaskMessage(remoteRepo, repoName, base, head string) string { +func buildDefaultTaskMessage(remoteRepo, repoName, base, head string, diff multireview.PreComputeDiffResult) string { if head == "" { head = "HEAD" } var b strings.Builder b.WriteString("Code review.\n\n") - b.WriteString(fmt.Sprintf("Repository: a fresh clone of %s is on this host at %s/\n\n", repoName, remoteRepo)) - if base != "" { - b.WriteString(fmt.Sprintf("Range to review: %s..%s\n\n", base, head)) - b.WriteString("To see the diff:\n") - b.WriteString(fmt.Sprintf(" cd %s && git diff %s..%s\n\n", remoteRepo, base, head)) - } else { - b.WriteString(fmt.Sprintf("Working tree at %s — review the current state.\n\n", remoteRepo)) + b.WriteString(fmt.Sprintf("A pre-computed unified diff is at:\n %s\n", diff.DiffPath)) + b.WriteString(fmt.Sprintf("Size: %d bytes (%d lines). Range: %s..%s. Repo clone: %s/\n\n", + diff.SizeBytes, diff.LineCount, base, head, remoteRepo)) + b.WriteString(`INSTRUCTIONS — follow exactly: +1. Run ` + "`cat " + diff.DiffPath + "`" + ` ONCE. That is the diff you must review. +2. If the cat output looks empty or wrong, run ` + "`ls -la " + diff.DiffPath + "`" + ` and + ` + "`wc -l " + diff.DiffPath + "`" + ` and INCLUDE the literal output in your reply. +3. Do NOT report "repository missing", "clone failed", or "refs not found" + unless step 2 actually shows the file is absent. Hallucinating an + infrastructure failure when the file exists is worse than no review. +4. You may also ` + "`cd " + remoteRepo + "`" + ` and inspect individual files referenced + in the diff for context. The clone IS there. + +`) + // Large-diff hint: tell reviewers with tight context to start with --stat. + if diff.SizeBytes > 1_000_000 { + mb := float64(diff.SizeBytes) / 1_000_000 + b.WriteString(fmt.Sprintf(`NOTE: Diff is large (%.1f MB). If your context budget is tight, get the +file-level summary first with `+"`git -C %s diff --stat %s..%s`"+`, +then focus on files with the most changes. + +`, mb, remoteRepo, base, head)) } b.WriteString(`Produce your normal review report (verdict + severity-graded findings + what was done well + out-of-scope). Reply with the review body only — no preamble. diff --git a/internal/support/commands/multi_review_test.go b/internal/support/commands/multi_review_test.go index c065503..2cc5c97 100644 --- a/internal/support/commands/multi_review_test.go +++ b/internal/support/commands/multi_review_test.go @@ -68,6 +68,23 @@ func withMockShipBundle(t *testing.T) { t.Cleanup(func() { shipBundleFn = orig }) } +// withMockPreComputeDiff swaps preComputeDiffFn so tests get a deterministic +// diff result without invoking ssh. Default mock returns a 1234-byte / 50-line +// diff at /diff.txt. +func withMockPreComputeDiff(t *testing.T) { + t.Helper() + orig := preComputeDiffFn + preComputeDiffFn = func(ctx context.Context, p multireview.PreComputeDiffParams) (multireview.PreComputeDiffResult, error) { + return multireview.PreComputeDiffResult{ + DiffPath: p.RemoteWorkdir + "/diff.txt", + SizeBytes: 1234, + LineCount: 50, + Empty: false, + }, nil + } + t.Cleanup(func() { preComputeDiffFn = orig }) +} + func mockResultFor(agent string, tdLine string) multireview.InvokeReviewerResult { return multireview.InvokeReviewerResult{ AgentName: agent, @@ -84,6 +101,7 @@ func TestMultiReview_HappyPath(t *testing.T) { outDir := filepath.Join(t.TempDir(), "out") withMockShipBundle(t) + withMockPreComputeDiff(t) withMockInvoker(t, func(ctx context.Context, p multireview.InvokeReviewerParams) (multireview.InvokeReviewerResult, error) { return mockResultFor(p.AgentName, "MEDIUM|src/a.go:1|test problem|test fix|robustness"), nil }) @@ -94,6 +112,7 @@ func TestMultiReview_HappyPath(t *testing.T) { "--repo", repo, "--openclaw-host", "user@example.lan", "--output-dir", outDir, + "--base", "v1", "--timeout-seconds", "30", }) var stdout bytes.Buffer @@ -169,6 +188,7 @@ func TestMultiReview_TwoLane(t *testing.T) { outDir := filepath.Join(t.TempDir(), "out") withMockShipBundle(t) + withMockPreComputeDiff(t) withMockInvoker(t, func(ctx context.Context, p multireview.InvokeReviewerParams) (multireview.InvokeReviewerResult, error) { return mockResultFor(p.AgentName, "LOW|f:1|p|x|c"), nil }) @@ -180,6 +200,7 @@ func TestMultiReview_TwoLane(t *testing.T) { "--repo", repo, "--openclaw-host", "user@example.lan", "--output-dir", outDir, + "--base", "v1", "--timeout-seconds", "30", }) cmd.SetOut(new(bytes.Buffer)) @@ -199,6 +220,7 @@ func TestMultiReview_PartialFailure(t *testing.T) { outDir := filepath.Join(t.TempDir(), "out") withMockShipBundle(t) + withMockPreComputeDiff(t) withMockInvoker(t, func(ctx context.Context, p multireview.InvokeReviewerParams) (multireview.InvokeReviewerResult, error) { if p.AgentName == "greta" { return multireview.InvokeReviewerResult{AgentName: p.AgentName}, fmt.Errorf("simulated failure") @@ -212,6 +234,7 @@ func TestMultiReview_PartialFailure(t *testing.T) { "--repo", repo, "--openclaw-host", "user@example.lan", "--output-dir", outDir, + "--base", "v1", "--timeout-seconds", "30", }) cmd.SetOut(new(bytes.Buffer)) @@ -254,6 +277,7 @@ func TestMultiReview_AllFail(t *testing.T) { outDir := filepath.Join(t.TempDir(), "out") withMockShipBundle(t) + withMockPreComputeDiff(t) withMockInvoker(t, func(ctx context.Context, p multireview.InvokeReviewerParams) (multireview.InvokeReviewerResult, error) { return multireview.InvokeReviewerResult{AgentName: p.AgentName}, fmt.Errorf("simulated failure") }) @@ -264,6 +288,7 @@ func TestMultiReview_AllFail(t *testing.T) { "--repo", repo, "--openclaw-host", "user@example.lan", "--output-dir", outDir, + "--base", "v1", "--timeout-seconds", "30", }) cmd.SetOut(new(bytes.Buffer)) @@ -294,6 +319,7 @@ func TestMultiReview_ShipFailureHardStops(t *testing.T) { "--repo", repo, "--openclaw-host", "user@example.lan", "--output-dir", outDir, + "--base", "v1", "--timeout-seconds", "30", }) cmd.SetOut(new(bytes.Buffer)) @@ -302,3 +328,196 @@ func TestMultiReview_ShipFailureHardStops(t *testing.T) { t.Fatal("expected hard-stop on ship failure") } } + +// ---- diff-precompute tests ---- + +func TestMultiReview_DiffFailureHardStops(t *testing.T) { + // preComputeDiffFn errors → run aborts before any reviewer is invoked. + repo := initFixtureRepoMR(t) + outDir := filepath.Join(t.TempDir(), "out") + + withMockShipBundle(t) + origDiff := preComputeDiffFn + preComputeDiffFn = func(ctx context.Context, p multireview.PreComputeDiffParams) (multireview.PreComputeDiffResult, error) { + return multireview.PreComputeDiffResult{}, fmt.Errorf("diff: git exit 128, stderr: fatal: bad revision 'badref'") + } + t.Cleanup(func() { preComputeDiffFn = origDiff }) + + // invokeReviewerFn should never be called — fail loud if it is. + withMockInvoker(t, func(ctx context.Context, p multireview.InvokeReviewerParams) (multireview.InvokeReviewerResult, error) { + t.Errorf("invokeReviewerFn called after diff failure — should have hard-stopped first") + return multireview.InvokeReviewerResult{AgentName: p.AgentName}, nil + }) + + cmd := newMultiReviewCmd() + cmd.SetArgs([]string{ + "--reviewers", "bruce,greta", + "--repo", repo, + "--openclaw-host", "user@example.lan", + "--output-dir", outDir, + "--base", "v1", + "--timeout-seconds", "30", + }) + cmd.SetOut(new(bytes.Buffer)) + cmd.SetErr(new(bytes.Buffer)) + if err := cmd.Execute(); err == nil { + t.Fatal("expected hard-stop when diff fails") + } + // Summary should NOT be written when we hard-stop before invoking. + if _, err := os.Stat(filepath.Join(outDir, "multi-review-summary.json")); err == nil { + t.Error("summary should not exist after pre-diff hard-stop") + } +} + +func TestMultiReview_EmptyBaseRefRejected(t *testing.T) { + repo := initFixtureRepoMR(t) + outDir := filepath.Join(t.TempDir(), "out") + + // We don't need any of the mocks — validation should reject before then. + // Ensure invokeReviewerFn and shipBundleFn are never called by failing loud. + origShip := shipBundleFn + shipBundleFn = func(ctx context.Context, p multireview.ShipBundleParams) (multireview.ShipBundleResult, error) { + t.Errorf("shipBundleFn called — should have validated --base before this") + return multireview.ShipBundleResult{}, nil + } + t.Cleanup(func() { shipBundleFn = origShip }) + + cmd := newMultiReviewCmd() + cmd.SetArgs([]string{ + "--reviewers", "bruce", + "--repo", repo, + "--openclaw-host", "user@example.lan", + "--output-dir", outDir, + // no --base passed + "--timeout-seconds", "30", + }) + cmd.SetOut(new(bytes.Buffer)) + cmd.SetErr(new(bytes.Buffer)) + if err := cmd.Execute(); err == nil { + t.Fatal("expected validation error when --base is missing") + } +} + +func TestMultiReview_DiffPathInTaskMessage(t *testing.T) { + // Verify the task message passed to reviewers includes `cat ` + // and the anti-hallucination clause. + repo := initFixtureRepoMR(t) + outDir := filepath.Join(t.TempDir(), "out") + + withMockShipBundle(t) + withMockPreComputeDiff(t) + + var capturedTaskMessage string + withMockInvoker(t, func(ctx context.Context, p multireview.InvokeReviewerParams) (multireview.InvokeReviewerResult, error) { + capturedTaskMessage = p.TaskMessage + return mockResultFor(p.AgentName, "LOW|f:1|p|x|c"), nil + }) + + cmd := newMultiReviewCmd() + cmd.SetArgs([]string{ + "--reviewers", "bruce", + "--repo", repo, + "--openclaw-host", "user@example.lan", + "--output-dir", outDir, + "--base", "v1", + "--timeout-seconds", "30", + }) + cmd.SetOut(new(bytes.Buffer)) + if err := cmd.Execute(); err != nil { + t.Fatalf("execute: %v", err) + } + // The diff path should appear in the task message. The mock builds it as + // /diff.txt; RemoteWorkdir is `/tmp/multi-review-` + // in production, so we just check for the suffix. + if !strings.Contains(capturedTaskMessage, "/diff.txt") { + t.Errorf("task message missing diff path suffix `/diff.txt`. Got:\n%s", capturedTaskMessage) + } + if !strings.Contains(capturedTaskMessage, "/tmp/multi-review-") { + t.Errorf("task message missing workdir prefix `/tmp/multi-review-`. Got:\n%s", capturedTaskMessage) + } + // The cat instruction should be present + if !strings.Contains(capturedTaskMessage, "cat ") { + t.Errorf("task message missing `cat` instruction. Got:\n%s", capturedTaskMessage) + } + // The anti-hallucination clause should be present + if !strings.Contains(capturedTaskMessage, "Hallucinating") && !strings.Contains(capturedTaskMessage, "Do NOT report") { + t.Errorf("task message missing anti-hallucination clause. Got:\n%s", capturedTaskMessage) + } + // Old `git diff ..` instruction should NOT be present (reviewers shouldn't run git themselves anymore) + if strings.Contains(capturedTaskMessage, "git diff v1..") { + t.Errorf("task message should not include git diff instruction. Got:\n%s", capturedTaskMessage) + } +} + +func TestMultiReview_LargeDiffWarning(t *testing.T) { + // SizeBytes > 1_000_000 should append the --stat hint. + repo := initFixtureRepoMR(t) + outDir := filepath.Join(t.TempDir(), "out") + + withMockShipBundle(t) + origDiff := preComputeDiffFn + preComputeDiffFn = func(ctx context.Context, p multireview.PreComputeDiffParams) (multireview.PreComputeDiffResult, error) { + return multireview.PreComputeDiffResult{ + DiffPath: p.RemoteWorkdir + "/diff.txt", + SizeBytes: 2_500_000, // 2.5 MB + LineCount: 60000, + Empty: false, + }, nil + } + t.Cleanup(func() { preComputeDiffFn = origDiff }) + + var capturedTaskMessage string + withMockInvoker(t, func(ctx context.Context, p multireview.InvokeReviewerParams) (multireview.InvokeReviewerResult, error) { + capturedTaskMessage = p.TaskMessage + return mockResultFor(p.AgentName, "LOW|f:1|p|x|c"), nil + }) + + cmd := newMultiReviewCmd() + cmd.SetArgs([]string{ + "--reviewers", "bruce", + "--repo", repo, + "--openclaw-host", "user@example.lan", + "--output-dir", outDir, + "--base", "v1", + "--timeout-seconds", "30", + }) + cmd.SetOut(new(bytes.Buffer)) + if err := cmd.Execute(); err != nil { + t.Fatalf("execute: %v", err) + } + if !strings.Contains(capturedTaskMessage, "--stat") { + t.Errorf("task message should include --stat hint for large diff. Got:\n%s", capturedTaskMessage) + } +} + +func TestMultiReview_SmallDiffNoWarning(t *testing.T) { + // SizeBytes < 1_000_000 should NOT append the --stat hint. + repo := initFixtureRepoMR(t) + outDir := filepath.Join(t.TempDir(), "out") + + withMockShipBundle(t) + withMockPreComputeDiff(t) // default mock: 1234 bytes + + var capturedTaskMessage string + withMockInvoker(t, func(ctx context.Context, p multireview.InvokeReviewerParams) (multireview.InvokeReviewerResult, error) { + capturedTaskMessage = p.TaskMessage + return mockResultFor(p.AgentName, "LOW|f:1|p|x|c"), nil + }) + + cmd := newMultiReviewCmd() + cmd.SetArgs([]string{ + "--reviewers", "bruce", + "--repo", repo, + "--openclaw-host", "user@example.lan", + "--output-dir", outDir, + "--base", "v1", + "--timeout-seconds", "30", + }) + cmd.SetOut(new(bytes.Buffer)) + if err := cmd.Execute(); err != nil { + t.Fatalf("execute: %v", err) + } + if strings.Contains(capturedTaskMessage, "--stat") { + t.Errorf("task message should not include --stat hint for small diff. Got:\n%s", capturedTaskMessage) + } +} diff --git a/internal/support/multireview/diff.go b/internal/support/multireview/diff.go new file mode 100644 index 0000000..2a193d8 --- /dev/null +++ b/internal/support/multireview/diff.go @@ -0,0 +1,151 @@ +package multireview + +import ( + "context" + "fmt" + "path/filepath" + "strconv" + "strings" + "time" +) + +// PreComputeDiffParams configures pre-computing a diff inside the remote workdir. +// +// We do this so reviewer agents only need to `cat` a known file, not navigate +// the cloned repo and run git themselves. In real use we observed weaker +// reviewers hallucinate "clone missing" / "ref not found" failures rather +// than persisting through the diff fetch. Pre-computing removes that surface. +type PreComputeDiffParams struct { + // Host is the SSH target (e.g. "user@nucleus.lan"). + Host string + // RemoteRepoPath is the cloned repo on the remote (the `git -C` target). + RemoteRepoPath string + // RemoteWorkdir is the parent directory; diff.txt lands here so it's + // peers with the clone — keeps a single defer-clean dir. + RemoteWorkdir string + // BaseRef is required — no working-tree fallback (that mode produced + // the very failures this fix exists to prevent). + BaseRef string + // HeadRef defaults to "HEAD" when empty. + HeadRef string + // Timeout for the whole SSH round-trip. + Timeout time.Duration +} + +// PreComputeDiffResult reports the diff that was written. +type PreComputeDiffResult struct { + // DiffPath is the absolute path of the diff file inside the remote workdir. + DiffPath string + // SizeBytes is the byte count of the diff file (from wc -c). + SizeBytes int64 + // LineCount is the line count of the diff file (from wc -l). + LineCount int + // Empty is true when the diff has zero bytes (BaseRef == HeadRef or no + // changes between them). Callers can decide whether to proceed. + Empty bool +} + +// PreComputeDiff runs `git diff ..` on the remote, redirects the +// output to /diff.txt, and returns the path plus size/line counts. +// +// Failure semantics: +// - Missing required inputs → validation error, no SSH call +// - Non-zero git exit (bad ref, etc.) → wraps stderr in the error +// - Context deadline → propagated +// +// Zero-byte diff is NOT an error; the result has Empty=true and the caller +// chooses whether to proceed. +func PreComputeDiff(parent context.Context, p PreComputeDiffParams) (PreComputeDiffResult, error) { + if p.Host == "" { + return PreComputeDiffResult{}, fmt.Errorf("diff: host required") + } + if p.RemoteRepoPath == "" { + return PreComputeDiffResult{}, fmt.Errorf("diff: remote repo path required") + } + if p.RemoteWorkdir == "" { + return PreComputeDiffResult{}, fmt.Errorf("diff: remote workdir required") + } + if p.BaseRef == "" { + return PreComputeDiffResult{}, fmt.Errorf("diff: base ref required (working-tree mode is not supported — pass --base)") + } + if p.HeadRef == "" { + p.HeadRef = "HEAD" + } + if p.Timeout <= 0 { + p.Timeout = 5 * time.Minute + } + + diffPath := filepath.Join(p.RemoteWorkdir, "diff.txt") + + // Single composite remote command: write the diff, then emit wc results. + // `git diff` exits non-zero on bad refs which short-circuits the && + // chain and surfaces as a non-zero SSH exit + stderr we wrap in the + // returned error. + remoteCmd := fmt.Sprintf( + "git -C %s diff %s..%s > %s && wc -c %s && wc -l %s", + shellQuote(p.RemoteRepoPath), + shellQuote(p.BaseRef), + shellQuote(p.HeadRef), + shellQuote(diffPath), + shellQuote(diffPath), + shellQuote(diffPath), + ) + + res, err := SSHRun(parent, SSHParams{ + Host: p.Host, + Command: remoteCmd, + Timeout: p.Timeout, + }) + if err != nil { + return PreComputeDiffResult{}, fmt.Errorf("diff: ssh: %w", err) + } + if res.ExitCode != 0 { + return PreComputeDiffResult{}, fmt.Errorf("diff: git exit %d, stderr: %s", res.ExitCode, strings.TrimSpace(res.Stderr)) + } + + size, lines, err := parseWcOutput(res.Stdout) + if err != nil { + return PreComputeDiffResult{}, fmt.Errorf("diff: parse wc output: %w (stdout: %q)", err, res.Stdout) + } + + return PreComputeDiffResult{ + DiffPath: diffPath, + SizeBytes: size, + LineCount: lines, + Empty: size == 0, + }, nil +} + +// parseWcOutput extracts the byte and line counts from two consecutive `wc` +// outputs. Handles both BSD format (" 1234 path") and GNU format +// ("1234 path") by taking the first numeric token on each non-empty line. +func parseWcOutput(stdout string) (sizeBytes int64, lineCount int, err error) { + lines := strings.Split(strings.TrimSpace(stdout), "\n") + if len(lines) < 2 { + return 0, 0, fmt.Errorf("expected 2 lines (wc -c, wc -l), got %d", len(lines)) + } + size, err := firstNumericToken(lines[0]) + if err != nil { + return 0, 0, fmt.Errorf("wc -c line: %w", err) + } + count, err := firstNumericToken(lines[1]) + if err != nil { + return 0, 0, fmt.Errorf("wc -l line: %w", err) + } + return size, int(count), nil +} + +// firstNumericToken returns the first whitespace-delimited integer in line. +// Tolerates leading whitespace (BSD wc) and ignores anything after the number +// (path). +func firstNumericToken(line string) (int64, error) { + fields := strings.Fields(line) + if len(fields) == 0 { + return 0, fmt.Errorf("empty line") + } + n, err := strconv.ParseInt(fields[0], 10, 64) + if err != nil { + return 0, fmt.Errorf("not a number: %q", fields[0]) + } + return n, nil +} diff --git a/internal/support/multireview/diff_test.go b/internal/support/multireview/diff_test.go new file mode 100644 index 0000000..e7e382d --- /dev/null +++ b/internal/support/multireview/diff_test.go @@ -0,0 +1,261 @@ +package multireview + +import ( + "context" + "os/exec" + "strings" + "testing" + "time" +) + +func TestPreComputeDiff_Success(t *testing.T) { + // Mock the SSH call to return a clean diff output + wc results. + // The actual command we run is one composite shell pipeline; we mock the + // wrapping ssh call and return canned stdout that matches the format the + // production code expects. + origExec := execCommandContext + t.Cleanup(func() { execCommandContext = origExec }) + execCommandContext = func(ctx context.Context, name string, args ...string) *exec.Cmd { + // Production runs: `wc -c ; wc -l ` separated by newlines. + // First line: byte count + path. Second line: line count + path. + return exec.CommandContext(ctx, "/bin/sh", "-c", + `echo " 12345 /tmp/workdir/diff.txt"; echo " 250 /tmp/workdir/diff.txt"`) + } + + res, err := PreComputeDiff(context.Background(), PreComputeDiffParams{ + Host: "user@example.lan", + RemoteRepoPath: "/tmp/workdir/myrepo", + RemoteWorkdir: "/tmp/workdir", + BaseRef: "v1.0.0", + HeadRef: "HEAD", + Timeout: 30 * time.Second, + }) + if err != nil { + t.Fatalf("PreComputeDiff: %v", err) + } + if res.DiffPath != "/tmp/workdir/diff.txt" { + t.Errorf("DiffPath=%q want /tmp/workdir/diff.txt", res.DiffPath) + } + if res.SizeBytes != 12345 { + t.Errorf("SizeBytes=%d want 12345", res.SizeBytes) + } + if res.LineCount != 250 { + t.Errorf("LineCount=%d want 250", res.LineCount) + } + if res.Empty { + t.Error("Empty should be false for non-zero diff") + } +} + +func TestPreComputeDiff_RequiresInputs(t *testing.T) { + ctx := context.Background() + cases := []struct { + name string + p PreComputeDiffParams + }{ + {"empty host", PreComputeDiffParams{RemoteRepoPath: "/r", RemoteWorkdir: "/w", BaseRef: "v1", HeadRef: "HEAD"}}, + {"empty repo path", PreComputeDiffParams{Host: "u@h", RemoteWorkdir: "/w", BaseRef: "v1", HeadRef: "HEAD"}}, + {"empty workdir", PreComputeDiffParams{Host: "u@h", RemoteRepoPath: "/r", BaseRef: "v1", HeadRef: "HEAD"}}, + {"empty base ref", PreComputeDiffParams{Host: "u@h", RemoteRepoPath: "/r", RemoteWorkdir: "/w", HeadRef: "HEAD"}}, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + _, err := PreComputeDiff(ctx, c.p) + if err == nil { + t.Errorf("expected error for %s", c.name) + } + }) + } +} + +func TestPreComputeDiff_RefNotFound(t *testing.T) { + origExec := execCommandContext + t.Cleanup(func() { execCommandContext = origExec }) + execCommandContext = func(ctx context.Context, name string, args ...string) *exec.Cmd { + return exec.CommandContext(ctx, "/bin/sh", "-c", + `echo "fatal: bad revision 'badref'" >&2; exit 128`) + } + + _, err := PreComputeDiff(context.Background(), PreComputeDiffParams{ + Host: "user@example.lan", + RemoteRepoPath: "/tmp/w/repo", + RemoteWorkdir: "/tmp/w", + BaseRef: "badref", + HeadRef: "HEAD", + Timeout: 10 * time.Second, + }) + if err == nil { + t.Fatal("expected error for bad ref") + } + if !strings.Contains(err.Error(), "bad revision") && !strings.Contains(err.Error(), "exit") { + t.Errorf("error should reference git failure: %v", err) + } +} + +func TestPreComputeDiff_EmptyDiff(t *testing.T) { + // git diff produces an empty file when base==head. wc -c returns 0. + origExec := execCommandContext + t.Cleanup(func() { execCommandContext = origExec }) + execCommandContext = func(ctx context.Context, name string, args ...string) *exec.Cmd { + return exec.CommandContext(ctx, "/bin/sh", "-c", + `echo "0 /tmp/w/diff.txt"; echo "0 /tmp/w/diff.txt"`) + } + + res, err := PreComputeDiff(context.Background(), PreComputeDiffParams{ + Host: "user@example.lan", + RemoteRepoPath: "/tmp/w/repo", + RemoteWorkdir: "/tmp/w", + BaseRef: "HEAD", + HeadRef: "HEAD", + Timeout: 10 * time.Second, + }) + if err != nil { + t.Fatalf("PreComputeDiff: %v", err) + } + if !res.Empty { + t.Error("Empty should be true for zero-byte diff") + } + if res.SizeBytes != 0 { + t.Errorf("SizeBytes=%d want 0", res.SizeBytes) + } +} + +func TestPreComputeDiff_LargeDiff(t *testing.T) { + // A 2MB diff has SizeBytes ~ 2000000. + origExec := execCommandContext + t.Cleanup(func() { execCommandContext = origExec }) + execCommandContext = func(ctx context.Context, name string, args ...string) *exec.Cmd { + return exec.CommandContext(ctx, "/bin/sh", "-c", + `echo "2000000 /tmp/w/diff.txt"; echo "50000 /tmp/w/diff.txt"`) + } + + res, err := PreComputeDiff(context.Background(), PreComputeDiffParams{ + Host: "user@example.lan", + RemoteRepoPath: "/tmp/w/repo", + RemoteWorkdir: "/tmp/w", + BaseRef: "v0", + HeadRef: "HEAD", + Timeout: 10 * time.Second, + }) + if err != nil { + t.Fatalf("PreComputeDiff: %v", err) + } + if res.SizeBytes != 2000000 { + t.Errorf("SizeBytes=%d want 2000000", res.SizeBytes) + } + if res.LineCount != 50000 { + t.Errorf("LineCount=%d want 50000", res.LineCount) + } + if res.Empty { + t.Error("Empty should be false for large diff") + } +} + +func TestPreComputeDiff_WcOutputBSDFormat(t *testing.T) { + // BSD wc emits leading-space format: " 1234 path" + origExec := execCommandContext + t.Cleanup(func() { execCommandContext = origExec }) + execCommandContext = func(ctx context.Context, name string, args ...string) *exec.Cmd { + return exec.CommandContext(ctx, "/bin/sh", "-c", + `echo " 1234 /tmp/w/diff.txt"; echo " 50 /tmp/w/diff.txt"`) + } + + res, err := PreComputeDiff(context.Background(), PreComputeDiffParams{ + Host: "user@example.lan", + RemoteRepoPath: "/tmp/w/repo", + RemoteWorkdir: "/tmp/w", + BaseRef: "v0", + HeadRef: "HEAD", + Timeout: 10 * time.Second, + }) + if err != nil { + t.Fatalf("PreComputeDiff: %v", err) + } + if res.SizeBytes != 1234 { + t.Errorf("BSD format: SizeBytes=%d want 1234", res.SizeBytes) + } + if res.LineCount != 50 { + t.Errorf("BSD format: LineCount=%d want 50", res.LineCount) + } +} + +func TestPreComputeDiff_WcOutputGNUFormat(t *testing.T) { + // GNU wc emits leading-digit format: "1234 path" + origExec := execCommandContext + t.Cleanup(func() { execCommandContext = origExec }) + execCommandContext = func(ctx context.Context, name string, args ...string) *exec.Cmd { + return exec.CommandContext(ctx, "/bin/sh", "-c", + `echo "1234 /tmp/w/diff.txt"; echo "50 /tmp/w/diff.txt"`) + } + + res, err := PreComputeDiff(context.Background(), PreComputeDiffParams{ + Host: "user@example.lan", + RemoteRepoPath: "/tmp/w/repo", + RemoteWorkdir: "/tmp/w", + BaseRef: "v0", + HeadRef: "HEAD", + Timeout: 10 * time.Second, + }) + if err != nil { + t.Fatalf("PreComputeDiff: %v", err) + } + if res.SizeBytes != 1234 { + t.Errorf("GNU format: SizeBytes=%d want 1234", res.SizeBytes) + } + if res.LineCount != 50 { + t.Errorf("GNU format: LineCount=%d want 50", res.LineCount) + } +} + +func TestPreComputeDiff_Timeout(t *testing.T) { + origExec := execCommandContext + t.Cleanup(func() { execCommandContext = origExec }) + execCommandContext = func(ctx context.Context, name string, args ...string) *exec.Cmd { + return exec.CommandContext(ctx, "/bin/sh", "-c", `sleep 10`) + } + + _, err := PreComputeDiff(context.Background(), PreComputeDiffParams{ + Host: "user@example.lan", + RemoteRepoPath: "/tmp/w/repo", + RemoteWorkdir: "/tmp/w", + BaseRef: "v0", + HeadRef: "HEAD", + Timeout: 100 * time.Millisecond, + }) + if err == nil { + t.Fatal("expected timeout error") + } +} + +func TestPreComputeDiff_HeadRefDefaultsToHEAD(t *testing.T) { + // When HeadRef is empty, it should default to "HEAD" rather than passing + // an empty string to git. + origExec := execCommandContext + t.Cleanup(func() { execCommandContext = origExec }) + var capturedCmd string + execCommandContext = func(ctx context.Context, name string, args ...string) *exec.Cmd { + // Capture the remote command we sent (last arg) so we can assert HEAD. + if len(args) > 0 { + capturedCmd = args[len(args)-1] + } + return exec.CommandContext(ctx, "/bin/sh", "-c", + `echo "100 /tmp/w/diff.txt"; echo "5 /tmp/w/diff.txt"`) + } + + _, err := PreComputeDiff(context.Background(), PreComputeDiffParams{ + Host: "user@example.lan", + RemoteRepoPath: "/tmp/w/repo", + RemoteWorkdir: "/tmp/w", + BaseRef: "v0", + HeadRef: "", // explicitly empty + Timeout: 10 * time.Second, + }) + if err != nil { + t.Fatalf("PreComputeDiff: %v", err) + } + // The refs are shell-quoted, so look for the HEAD token in context: + // `'v0'..'HEAD'` should appear in the command. + if !strings.Contains(capturedCmd, "'HEAD'") { + t.Errorf("expected remote command to default HeadRef to HEAD (look for 'HEAD' in quoted form), got: %s", capturedCmd) + } +}