diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5425d85..2ffe796 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -35,6 +35,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- **`yaml.parseNumber` preserves version-like strings** — values whose float representation would lose information (e.g. `"4.30"` → `4.3` → `"4.3"`) now stay strings. Only coerces to float when the value round-trips through YAML marshal back to the exact input.
+- **`multi_review` reviewer diff visibility** — production runs revealed that reviewer agents (especially weaker models) hallucinate "clone missing" / "ref not found" failures rather than running `git diff ..
` themselves. The fix pre-computes the diff on the remote and writes it to `/diff.txt`; reviewers' task message now tells them to `cat` that file rather than navigate the repo and invoke git.
+ - New `PreComputeDiff` helper in `internal/support/multireview/diff.go` runs `git -C diff .. > /diff.txt && wc -c && wc -l` in a single SSH round-trip; returns the diff path plus size/line counts for the task message header.
+ - `runMultiReview` calls it after `ShipBundle` returns and hard-stops on failure before any reviewer is invoked.
+ - Rewritten task message includes the diff path, size, line count, an anti-hallucination clause (do NOT report "clone failed" unless `ls` proves it), and a `--stat` hint when diff size exceeds 1 MB.
+ - **`--base` is now required** — working-tree mode is intentionally unsupported (it produced the failure mode this fix exists to prevent).
+ - **`--per-reviewer-timeout-seconds` default bumped 600 → 1200** — production observation: Greta and Dax timed out at 600s on a real sprint. Aligns with the existing `--timeout-seconds` (total) default.
+
## [1.8.2] - 2026-01-27
### Fixed
diff --git a/docs/llm-support-commands.md b/docs/llm-support-commands.md
index 15d382f..9ed9120 100644
--- a/docs/llm-support-commands.md
+++ b/docs/llm-support-commands.md
@@ -2132,20 +2132,32 @@ llm-support multi_review [flags]
| `--repo` | Local repo path to bundle and ship |
| `--openclaw-host` | SSH target running `openclaw-gateway` (e.g. `user@nucleus.lan`) |
| `--output-dir` | Where per-reviewer artifacts and merged stream land |
+| `--base` | Base ref for the diff range. **REQUIRED** — we pre-compute `git diff ..` on the remote so reviewers only need to `cat` the resulting file. Working-tree mode is intentionally unsupported. |
**Optional flags:**
| Flag | Default | Description |
|------|---------|-------------|
| `--serial-reviewers` | (empty) | Subset that runs sequentially after the parallel lane (shared rate limits) |
-| `--base` | (empty) | Base ref for the diff range, included in the auto-built task message |
| `--head` | `HEAD` | Head ref |
| `--timeout-seconds` | `1200` | Total wall-clock budget for the entire fan-out |
-| `--per-reviewer-timeout-seconds` | `600` | Per-reviewer soft timeout |
+| `--per-reviewer-timeout-seconds` | `1200` | Per-reviewer soft timeout. Bumped from 600s after production observation that real sprints with reasoning-model reviewers can legitimately need 10-20 minutes. |
| `--gateway-container` | `openclaw-gateway` | Docker container running openclaw |
-| `--task-message` | (auto) | Override the auto-built task message |
+| `--task-message` | (auto) | Override the auto-built task message. Bypasses the pre-compute helper instructions — you take responsibility for telling reviewers where the diff is. |
| `--skip-cleanup` | `false` | Do not remove the remote workdir after running |
+**Pre-compute step:**
+
+After the bundle clone succeeds on the remote, `multi_review` runs:
+
+```
+git -C diff .. > /diff.txt
+wc -c /diff.txt
+wc -l /diff.txt
+```
+
+The diff file lands at `/diff.txt` — a sibling of the cloned repo, inside the same temp directory that gets garbage-collected when the run finishes. The task message tells each reviewer to `cat ` and review the contents. Reviewers don't need to `cd` into the repo or run git themselves. If the diff is >1MB, the task message appends a `git diff --stat` hint so reviewers can do a file-level pass first.
+
**Output layout under `--output-dir`:**
```
diff --git a/internal/support/commands/multi_review.go b/internal/support/commands/multi_review.go
index 7823188..00804c0 100644
--- a/internal/support/commands/multi_review.go
+++ b/internal/support/commands/multi_review.go
@@ -14,12 +14,13 @@ import (
"github.com/spf13/cobra"
)
-// invokeReviewerFn and shipBundleFn are package-level function variables that
-// tests swap to inject deterministic behavior. Production points them at the
-// real multireview package functions.
+// invokeReviewerFn, shipBundleFn, and preComputeDiffFn are package-level
+// function variables that tests swap to inject deterministic behavior.
+// Production points them at the real multireview package functions.
var (
invokeReviewerFn = multireview.InvokeReviewer
shipBundleFn = multireview.ShipBundle
+ preComputeDiffFn = multireview.PreComputeDiff
)
// Flag variables for the multi_review command.
@@ -63,10 +64,18 @@ func newMultiReviewCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "multi_review",
Short: "Fan out a code review to multiple openclaw reviewer agents",
- Long: `Bundle a local repo, ship it to an openclaw-hosting machine, and invoke
-several reviewer agents in parallel (or in a serial lane for those that
-share rate-limited providers). Collects per-reviewer findings and writes
-a merged TD stream the /code-review command can consume.
+ Long: `Bundle a local repo, ship it to an openclaw-hosting machine, pre-compute
+the diff once, and invoke several reviewer agents in parallel (or in a
+serial lane for those that share rate-limited providers). Collects
+per-reviewer findings and writes a merged TD stream the /code-review
+command can consume.
+
+Pre-compute step: --base is REQUIRED. After the bundle is shipped, we run
+'git diff ..' on the remote and write the result to
+/diff.txt. Reviewers are told to 'cat' that file rather than
+running git themselves — observed in production, weaker reviewers
+hallucinate "clone missing" failures rather than persist through a multi-step
+git invocation. Pre-computing eliminates that surface.
Output layout:
/raw//{review.md,td-stream.txt,status.json,response.json}
@@ -77,8 +86,11 @@ Output layout:
/multi-review-summary.json (per-reviewer status + counts)
Failure semantics:
+ - --base missing → validation error, no remote calls
- Bundle/ship failure → hard-stop (no point invoking reviewers without
the diff staged on the remote)
+ - Diff pre-compute failure (bad ref, etc.) → hard-stop before reviewers
+ are invoked; surfaces git stderr in the error message
- Per-reviewer failure → recorded as failed in summary; other reviewers
continue; exit 0 with partial: true
- All reviewers fail → exit 1 with summary of what failed`,
@@ -87,12 +99,12 @@ Failure semantics:
cmd.Flags().StringVar(&mrReviewers, "reviewers", "", "Comma-separated reviewer agent names (required)")
cmd.Flags().StringVar(&mrSerialReviewers, "serial-reviewers", "", "Comma-separated subset that runs serially after the parallel lane")
cmd.Flags().StringVar(&mrRepo, "repo", "", "Local repo path to bundle (required)")
- cmd.Flags().StringVar(&mrBaseRef, "base", "", "Base ref for the diff range (informational, included in task message)")
+ cmd.Flags().StringVar(&mrBaseRef, "base", "", "Base ref for the diff range — REQUIRED (we pre-compute git diff .. on the remote)")
cmd.Flags().StringVar(&mrHeadRef, "head", "HEAD", "Head ref for the diff range")
cmd.Flags().StringVar(&mrOpenclawHost, "openclaw-host", "", "SSH target running openclaw-gateway (required)")
cmd.Flags().StringVar(&mrOutputDir, "output-dir", "", "Where per-reviewer artifacts and merged stream land (required)")
cmd.Flags().IntVar(&mrTimeoutSeconds, "timeout-seconds", 1200, "Total wall-clock budget for the entire fan-out")
- cmd.Flags().IntVar(&mrPerReviewerTO, "per-reviewer-timeout-seconds", 600, "Per-reviewer soft timeout")
+ cmd.Flags().IntVar(&mrPerReviewerTO, "per-reviewer-timeout-seconds", 1200, "Per-reviewer soft timeout (default 1200s — observed 600s default timing out for real sprints)")
cmd.Flags().StringVar(&mrGatewayContainer, "gateway-container", "openclaw-gateway", "Docker container running openclaw")
cmd.Flags().StringVar(&mrTaskMessage, "task-message", "", "Override the task message sent to each reviewer; default is auto-built from --base/--head/--repo")
cmd.Flags().BoolVar(&mrSkipCleanup, "skip-cleanup", false, "Do not remove the remote workdir after running")
@@ -117,6 +129,9 @@ func runMultiReview(cmd *cobra.Command, _ []string) error {
if mrOutputDir == "" {
return fmt.Errorf("--output-dir required")
}
+ if mrBaseRef == "" {
+ return fmt.Errorf("--base required (working-tree mode is not supported — pass --base=[ so we can pre-compute the diff for reviewers)")
+ }
allReviewers := splitAndTrim(mrReviewers)
serial := splitAndTrim(mrSerialReviewers)
@@ -160,10 +175,26 @@ func runMultiReview(cmd *cobra.Command, _ []string) error {
}
}()
- // 2. Build the task message (auto if not overridden).
+ // 2. Pre-compute the diff on the remote so reviewers only need to
+ // `cat `. Removes the hallucination surface where weaker
+ // reviewers were inventing "clone missing" failures rather than
+ // running `git diff` themselves.
+ diffRes, err := preComputeDiffFn(totalCtx, multireview.PreComputeDiffParams{
+ Host: mrOpenclawHost,
+ RemoteRepoPath: shipRes.RemoteRepoPath,
+ RemoteWorkdir: remoteWorkdir,
+ BaseRef: mrBaseRef,
+ HeadRef: mrHeadRef,
+ Timeout: time.Duration(mrTimeoutSeconds) * time.Second,
+ })
+ if err != nil {
+ return fmt.Errorf("pre-compute diff on %s: %w", mrOpenclawHost, err)
+ }
+
+ // 3. Build the task message (auto if not overridden).
taskMessage := mrTaskMessage
if taskMessage == "" {
- taskMessage = buildDefaultTaskMessage(shipRes.RemoteRepoPath, repoName, mrBaseRef, mrHeadRef)
+ taskMessage = buildDefaultTaskMessage(shipRes.RemoteRepoPath, repoName, mrBaseRef, mrHeadRef, diffRes)
}
// 3. Invoke reviewers. Parallel lane first, then serial.
@@ -288,19 +319,34 @@ func runMultiReview(cmd *cobra.Command, _ []string) error {
return nil
}
-func buildDefaultTaskMessage(remoteRepo, repoName, base, head string) string {
+func buildDefaultTaskMessage(remoteRepo, repoName, base, head string, diff multireview.PreComputeDiffResult) string {
if head == "" {
head = "HEAD"
}
var b strings.Builder
b.WriteString("Code review.\n\n")
- b.WriteString(fmt.Sprintf("Repository: a fresh clone of %s is on this host at %s/\n\n", repoName, remoteRepo))
- if base != "" {
- b.WriteString(fmt.Sprintf("Range to review: %s..%s\n\n", base, head))
- b.WriteString("To see the diff:\n")
- b.WriteString(fmt.Sprintf(" cd %s && git diff %s..%s\n\n", remoteRepo, base, head))
- } else {
- b.WriteString(fmt.Sprintf("Working tree at %s — review the current state.\n\n", remoteRepo))
+ b.WriteString(fmt.Sprintf("A pre-computed unified diff is at:\n %s\n", diff.DiffPath))
+ b.WriteString(fmt.Sprintf("Size: %d bytes (%d lines). Range: %s..%s. Repo clone: %s/\n\n",
+ diff.SizeBytes, diff.LineCount, base, head, remoteRepo))
+ b.WriteString(`INSTRUCTIONS — follow exactly:
+1. Run ` + "`cat " + diff.DiffPath + "`" + ` ONCE. That is the diff you must review.
+2. If the cat output looks empty or wrong, run ` + "`ls -la " + diff.DiffPath + "`" + ` and
+ ` + "`wc -l " + diff.DiffPath + "`" + ` and INCLUDE the literal output in your reply.
+3. Do NOT report "repository missing", "clone failed", or "refs not found"
+ unless step 2 actually shows the file is absent. Hallucinating an
+ infrastructure failure when the file exists is worse than no review.
+4. You may also ` + "`cd " + remoteRepo + "`" + ` and inspect individual files referenced
+ in the diff for context. The clone IS there.
+
+`)
+ // Large-diff hint: tell reviewers with tight context to start with --stat.
+ if diff.SizeBytes > 1_000_000 {
+ mb := float64(diff.SizeBytes) / 1_000_000
+ b.WriteString(fmt.Sprintf(`NOTE: Diff is large (%.1f MB). If your context budget is tight, get the
+file-level summary first with `+"`git -C %s diff --stat %s..%s`"+`,
+then focus on files with the most changes.
+
+`, mb, remoteRepo, base, head))
}
b.WriteString(`Produce your normal review report (verdict + severity-graded findings + what was done well + out-of-scope).
Reply with the review body only — no preamble.
diff --git a/internal/support/commands/multi_review_test.go b/internal/support/commands/multi_review_test.go
index c065503..2cc5c97 100644
--- a/internal/support/commands/multi_review_test.go
+++ b/internal/support/commands/multi_review_test.go
@@ -68,6 +68,23 @@ func withMockShipBundle(t *testing.T) {
t.Cleanup(func() { shipBundleFn = orig })
}
+// withMockPreComputeDiff swaps preComputeDiffFn so tests get a deterministic
+// diff result without invoking ssh. Default mock returns a 1234-byte / 50-line
+// diff at /diff.txt.
+func withMockPreComputeDiff(t *testing.T) {
+ t.Helper()
+ orig := preComputeDiffFn
+ preComputeDiffFn = func(ctx context.Context, p multireview.PreComputeDiffParams) (multireview.PreComputeDiffResult, error) {
+ return multireview.PreComputeDiffResult{
+ DiffPath: p.RemoteWorkdir + "/diff.txt",
+ SizeBytes: 1234,
+ LineCount: 50,
+ Empty: false,
+ }, nil
+ }
+ t.Cleanup(func() { preComputeDiffFn = orig })
+}
+
func mockResultFor(agent string, tdLine string) multireview.InvokeReviewerResult {
return multireview.InvokeReviewerResult{
AgentName: agent,
@@ -84,6 +101,7 @@ func TestMultiReview_HappyPath(t *testing.T) {
outDir := filepath.Join(t.TempDir(), "out")
withMockShipBundle(t)
+ withMockPreComputeDiff(t)
withMockInvoker(t, func(ctx context.Context, p multireview.InvokeReviewerParams) (multireview.InvokeReviewerResult, error) {
return mockResultFor(p.AgentName, "MEDIUM|src/a.go:1|test problem|test fix|robustness"), nil
})
@@ -94,6 +112,7 @@ func TestMultiReview_HappyPath(t *testing.T) {
"--repo", repo,
"--openclaw-host", "user@example.lan",
"--output-dir", outDir,
+ "--base", "v1",
"--timeout-seconds", "30",
})
var stdout bytes.Buffer
@@ -169,6 +188,7 @@ func TestMultiReview_TwoLane(t *testing.T) {
outDir := filepath.Join(t.TempDir(), "out")
withMockShipBundle(t)
+ withMockPreComputeDiff(t)
withMockInvoker(t, func(ctx context.Context, p multireview.InvokeReviewerParams) (multireview.InvokeReviewerResult, error) {
return mockResultFor(p.AgentName, "LOW|f:1|p|x|c"), nil
})
@@ -180,6 +200,7 @@ func TestMultiReview_TwoLane(t *testing.T) {
"--repo", repo,
"--openclaw-host", "user@example.lan",
"--output-dir", outDir,
+ "--base", "v1",
"--timeout-seconds", "30",
})
cmd.SetOut(new(bytes.Buffer))
@@ -199,6 +220,7 @@ func TestMultiReview_PartialFailure(t *testing.T) {
outDir := filepath.Join(t.TempDir(), "out")
withMockShipBundle(t)
+ withMockPreComputeDiff(t)
withMockInvoker(t, func(ctx context.Context, p multireview.InvokeReviewerParams) (multireview.InvokeReviewerResult, error) {
if p.AgentName == "greta" {
return multireview.InvokeReviewerResult{AgentName: p.AgentName}, fmt.Errorf("simulated failure")
@@ -212,6 +234,7 @@ func TestMultiReview_PartialFailure(t *testing.T) {
"--repo", repo,
"--openclaw-host", "user@example.lan",
"--output-dir", outDir,
+ "--base", "v1",
"--timeout-seconds", "30",
})
cmd.SetOut(new(bytes.Buffer))
@@ -254,6 +277,7 @@ func TestMultiReview_AllFail(t *testing.T) {
outDir := filepath.Join(t.TempDir(), "out")
withMockShipBundle(t)
+ withMockPreComputeDiff(t)
withMockInvoker(t, func(ctx context.Context, p multireview.InvokeReviewerParams) (multireview.InvokeReviewerResult, error) {
return multireview.InvokeReviewerResult{AgentName: p.AgentName}, fmt.Errorf("simulated failure")
})
@@ -264,6 +288,7 @@ func TestMultiReview_AllFail(t *testing.T) {
"--repo", repo,
"--openclaw-host", "user@example.lan",
"--output-dir", outDir,
+ "--base", "v1",
"--timeout-seconds", "30",
})
cmd.SetOut(new(bytes.Buffer))
@@ -294,6 +319,7 @@ func TestMultiReview_ShipFailureHardStops(t *testing.T) {
"--repo", repo,
"--openclaw-host", "user@example.lan",
"--output-dir", outDir,
+ "--base", "v1",
"--timeout-seconds", "30",
})
cmd.SetOut(new(bytes.Buffer))
@@ -302,3 +328,196 @@ func TestMultiReview_ShipFailureHardStops(t *testing.T) {
t.Fatal("expected hard-stop on ship failure")
}
}
+
+// ---- diff-precompute tests ----
+
+func TestMultiReview_DiffFailureHardStops(t *testing.T) {
+ // preComputeDiffFn errors → run aborts before any reviewer is invoked.
+ repo := initFixtureRepoMR(t)
+ outDir := filepath.Join(t.TempDir(), "out")
+
+ withMockShipBundle(t)
+ origDiff := preComputeDiffFn
+ preComputeDiffFn = func(ctx context.Context, p multireview.PreComputeDiffParams) (multireview.PreComputeDiffResult, error) {
+ return multireview.PreComputeDiffResult{}, fmt.Errorf("diff: git exit 128, stderr: fatal: bad revision 'badref'")
+ }
+ t.Cleanup(func() { preComputeDiffFn = origDiff })
+
+ // invokeReviewerFn should never be called — fail loud if it is.
+ withMockInvoker(t, func(ctx context.Context, p multireview.InvokeReviewerParams) (multireview.InvokeReviewerResult, error) {
+ t.Errorf("invokeReviewerFn called after diff failure — should have hard-stopped first")
+ return multireview.InvokeReviewerResult{AgentName: p.AgentName}, nil
+ })
+
+ cmd := newMultiReviewCmd()
+ cmd.SetArgs([]string{
+ "--reviewers", "bruce,greta",
+ "--repo", repo,
+ "--openclaw-host", "user@example.lan",
+ "--output-dir", outDir,
+ "--base", "v1",
+ "--timeout-seconds", "30",
+ })
+ cmd.SetOut(new(bytes.Buffer))
+ cmd.SetErr(new(bytes.Buffer))
+ if err := cmd.Execute(); err == nil {
+ t.Fatal("expected hard-stop when diff fails")
+ }
+ // Summary should NOT be written when we hard-stop before invoking.
+ if _, err := os.Stat(filepath.Join(outDir, "multi-review-summary.json")); err == nil {
+ t.Error("summary should not exist after pre-diff hard-stop")
+ }
+}
+
+func TestMultiReview_EmptyBaseRefRejected(t *testing.T) {
+ repo := initFixtureRepoMR(t)
+ outDir := filepath.Join(t.TempDir(), "out")
+
+ // We don't need any of the mocks — validation should reject before then.
+ // Ensure invokeReviewerFn and shipBundleFn are never called by failing loud.
+ origShip := shipBundleFn
+ shipBundleFn = func(ctx context.Context, p multireview.ShipBundleParams) (multireview.ShipBundleResult, error) {
+ t.Errorf("shipBundleFn called — should have validated --base before this")
+ return multireview.ShipBundleResult{}, nil
+ }
+ t.Cleanup(func() { shipBundleFn = origShip })
+
+ cmd := newMultiReviewCmd()
+ cmd.SetArgs([]string{
+ "--reviewers", "bruce",
+ "--repo", repo,
+ "--openclaw-host", "user@example.lan",
+ "--output-dir", outDir,
+ // no --base passed
+ "--timeout-seconds", "30",
+ })
+ cmd.SetOut(new(bytes.Buffer))
+ cmd.SetErr(new(bytes.Buffer))
+ if err := cmd.Execute(); err == nil {
+ t.Fatal("expected validation error when --base is missing")
+ }
+}
+
+func TestMultiReview_DiffPathInTaskMessage(t *testing.T) {
+ // Verify the task message passed to reviewers includes `cat `
+ // and the anti-hallucination clause.
+ repo := initFixtureRepoMR(t)
+ outDir := filepath.Join(t.TempDir(), "out")
+
+ withMockShipBundle(t)
+ withMockPreComputeDiff(t)
+
+ var capturedTaskMessage string
+ withMockInvoker(t, func(ctx context.Context, p multireview.InvokeReviewerParams) (multireview.InvokeReviewerResult, error) {
+ capturedTaskMessage = p.TaskMessage
+ return mockResultFor(p.AgentName, "LOW|f:1|p|x|c"), nil
+ })
+
+ cmd := newMultiReviewCmd()
+ cmd.SetArgs([]string{
+ "--reviewers", "bruce",
+ "--repo", repo,
+ "--openclaw-host", "user@example.lan",
+ "--output-dir", outDir,
+ "--base", "v1",
+ "--timeout-seconds", "30",
+ })
+ cmd.SetOut(new(bytes.Buffer))
+ if err := cmd.Execute(); err != nil {
+ t.Fatalf("execute: %v", err)
+ }
+ // The diff path should appear in the task message. The mock builds it as
+ // /diff.txt; RemoteWorkdir is `/tmp/multi-review-`
+ // in production, so we just check for the suffix.
+ if !strings.Contains(capturedTaskMessage, "/diff.txt") {
+ t.Errorf("task message missing diff path suffix `/diff.txt`. Got:\n%s", capturedTaskMessage)
+ }
+ if !strings.Contains(capturedTaskMessage, "/tmp/multi-review-") {
+ t.Errorf("task message missing workdir prefix `/tmp/multi-review-`. Got:\n%s", capturedTaskMessage)
+ }
+ // The cat instruction should be present
+ if !strings.Contains(capturedTaskMessage, "cat ") {
+ t.Errorf("task message missing `cat` instruction. Got:\n%s", capturedTaskMessage)
+ }
+ // The anti-hallucination clause should be present
+ if !strings.Contains(capturedTaskMessage, "Hallucinating") && !strings.Contains(capturedTaskMessage, "Do NOT report") {
+ t.Errorf("task message missing anti-hallucination clause. Got:\n%s", capturedTaskMessage)
+ }
+ // Old `git diff ..` instruction should NOT be present (reviewers shouldn't run git themselves anymore)
+ if strings.Contains(capturedTaskMessage, "git diff v1..") {
+ t.Errorf("task message should not include git diff instruction. Got:\n%s", capturedTaskMessage)
+ }
+}
+
+func TestMultiReview_LargeDiffWarning(t *testing.T) {
+ // SizeBytes > 1_000_000 should append the --stat hint.
+ repo := initFixtureRepoMR(t)
+ outDir := filepath.Join(t.TempDir(), "out")
+
+ withMockShipBundle(t)
+ origDiff := preComputeDiffFn
+ preComputeDiffFn = func(ctx context.Context, p multireview.PreComputeDiffParams) (multireview.PreComputeDiffResult, error) {
+ return multireview.PreComputeDiffResult{
+ DiffPath: p.RemoteWorkdir + "/diff.txt",
+ SizeBytes: 2_500_000, // 2.5 MB
+ LineCount: 60000,
+ Empty: false,
+ }, nil
+ }
+ t.Cleanup(func() { preComputeDiffFn = origDiff })
+
+ var capturedTaskMessage string
+ withMockInvoker(t, func(ctx context.Context, p multireview.InvokeReviewerParams) (multireview.InvokeReviewerResult, error) {
+ capturedTaskMessage = p.TaskMessage
+ return mockResultFor(p.AgentName, "LOW|f:1|p|x|c"), nil
+ })
+
+ cmd := newMultiReviewCmd()
+ cmd.SetArgs([]string{
+ "--reviewers", "bruce",
+ "--repo", repo,
+ "--openclaw-host", "user@example.lan",
+ "--output-dir", outDir,
+ "--base", "v1",
+ "--timeout-seconds", "30",
+ })
+ cmd.SetOut(new(bytes.Buffer))
+ if err := cmd.Execute(); err != nil {
+ t.Fatalf("execute: %v", err)
+ }
+ if !strings.Contains(capturedTaskMessage, "--stat") {
+ t.Errorf("task message should include --stat hint for large diff. Got:\n%s", capturedTaskMessage)
+ }
+}
+
+func TestMultiReview_SmallDiffNoWarning(t *testing.T) {
+ // SizeBytes < 1_000_000 should NOT append the --stat hint.
+ repo := initFixtureRepoMR(t)
+ outDir := filepath.Join(t.TempDir(), "out")
+
+ withMockShipBundle(t)
+ withMockPreComputeDiff(t) // default mock: 1234 bytes
+
+ var capturedTaskMessage string
+ withMockInvoker(t, func(ctx context.Context, p multireview.InvokeReviewerParams) (multireview.InvokeReviewerResult, error) {
+ capturedTaskMessage = p.TaskMessage
+ return mockResultFor(p.AgentName, "LOW|f:1|p|x|c"), nil
+ })
+
+ cmd := newMultiReviewCmd()
+ cmd.SetArgs([]string{
+ "--reviewers", "bruce",
+ "--repo", repo,
+ "--openclaw-host", "user@example.lan",
+ "--output-dir", outDir,
+ "--base", "v1",
+ "--timeout-seconds", "30",
+ })
+ cmd.SetOut(new(bytes.Buffer))
+ if err := cmd.Execute(); err != nil {
+ t.Fatalf("execute: %v", err)
+ }
+ if strings.Contains(capturedTaskMessage, "--stat") {
+ t.Errorf("task message should not include --stat hint for small diff. Got:\n%s", capturedTaskMessage)
+ }
+}
diff --git a/internal/support/multireview/diff.go b/internal/support/multireview/diff.go
new file mode 100644
index 0000000..2a193d8
--- /dev/null
+++ b/internal/support/multireview/diff.go
@@ -0,0 +1,151 @@
+package multireview
+
+import (
+ "context"
+ "fmt"
+ "path/filepath"
+ "strconv"
+ "strings"
+ "time"
+)
+
+// PreComputeDiffParams configures pre-computing a diff inside the remote workdir.
+//
+// We do this so reviewer agents only need to `cat` a known file, not navigate
+// the cloned repo and run git themselves. In real use we observed weaker
+// reviewers hallucinate "clone missing" / "ref not found" failures rather
+// than persisting through the diff fetch. Pre-computing removes that surface.
+type PreComputeDiffParams struct {
+ // Host is the SSH target (e.g. "user@nucleus.lan").
+ Host string
+ // RemoteRepoPath is the cloned repo on the remote (the `git -C` target).
+ RemoteRepoPath string
+ // RemoteWorkdir is the parent directory; diff.txt lands here so it's
+ // peers with the clone — keeps a single defer-clean dir.
+ RemoteWorkdir string
+ // BaseRef is required — no working-tree fallback (that mode produced
+ // the very failures this fix exists to prevent).
+ BaseRef string
+ // HeadRef defaults to "HEAD" when empty.
+ HeadRef string
+ // Timeout for the whole SSH round-trip.
+ Timeout time.Duration
+}
+
+// PreComputeDiffResult reports the diff that was written.
+type PreComputeDiffResult struct {
+ // DiffPath is the absolute path of the diff file inside the remote workdir.
+ DiffPath string
+ // SizeBytes is the byte count of the diff file (from wc -c).
+ SizeBytes int64
+ // LineCount is the line count of the diff file (from wc -l).
+ LineCount int
+ // Empty is true when the diff has zero bytes (BaseRef == HeadRef or no
+ // changes between them). Callers can decide whether to proceed.
+ Empty bool
+}
+
+// PreComputeDiff runs `git diff ..` on the remote, redirects the
+// output to /diff.txt, and returns the path plus size/line counts.
+//
+// Failure semantics:
+// - Missing required inputs → validation error, no SSH call
+// - Non-zero git exit (bad ref, etc.) → wraps stderr in the error
+// - Context deadline → propagated
+//
+// Zero-byte diff is NOT an error; the result has Empty=true and the caller
+// chooses whether to proceed.
+func PreComputeDiff(parent context.Context, p PreComputeDiffParams) (PreComputeDiffResult, error) {
+ if p.Host == "" {
+ return PreComputeDiffResult{}, fmt.Errorf("diff: host required")
+ }
+ if p.RemoteRepoPath == "" {
+ return PreComputeDiffResult{}, fmt.Errorf("diff: remote repo path required")
+ }
+ if p.RemoteWorkdir == "" {
+ return PreComputeDiffResult{}, fmt.Errorf("diff: remote workdir required")
+ }
+ if p.BaseRef == "" {
+ return PreComputeDiffResult{}, fmt.Errorf("diff: base ref required (working-tree mode is not supported — pass --base)")
+ }
+ if p.HeadRef == "" {
+ p.HeadRef = "HEAD"
+ }
+ if p.Timeout <= 0 {
+ p.Timeout = 5 * time.Minute
+ }
+
+ diffPath := filepath.Join(p.RemoteWorkdir, "diff.txt")
+
+ // Single composite remote command: write the diff, then emit wc results.
+ // `git diff` exits non-zero on bad refs which short-circuits the &&
+ // chain and surfaces as a non-zero SSH exit + stderr we wrap in the
+ // returned error.
+ remoteCmd := fmt.Sprintf(
+ "git -C %s diff %s..%s > %s && wc -c %s && wc -l %s",
+ shellQuote(p.RemoteRepoPath),
+ shellQuote(p.BaseRef),
+ shellQuote(p.HeadRef),
+ shellQuote(diffPath),
+ shellQuote(diffPath),
+ shellQuote(diffPath),
+ )
+
+ res, err := SSHRun(parent, SSHParams{
+ Host: p.Host,
+ Command: remoteCmd,
+ Timeout: p.Timeout,
+ })
+ if err != nil {
+ return PreComputeDiffResult{}, fmt.Errorf("diff: ssh: %w", err)
+ }
+ if res.ExitCode != 0 {
+ return PreComputeDiffResult{}, fmt.Errorf("diff: git exit %d, stderr: %s", res.ExitCode, strings.TrimSpace(res.Stderr))
+ }
+
+ size, lines, err := parseWcOutput(res.Stdout)
+ if err != nil {
+ return PreComputeDiffResult{}, fmt.Errorf("diff: parse wc output: %w (stdout: %q)", err, res.Stdout)
+ }
+
+ return PreComputeDiffResult{
+ DiffPath: diffPath,
+ SizeBytes: size,
+ LineCount: lines,
+ Empty: size == 0,
+ }, nil
+}
+
+// parseWcOutput extracts the byte and line counts from two consecutive `wc`
+// outputs. Handles both BSD format (" 1234 path") and GNU format
+// ("1234 path") by taking the first numeric token on each non-empty line.
+func parseWcOutput(stdout string) (sizeBytes int64, lineCount int, err error) {
+ lines := strings.Split(strings.TrimSpace(stdout), "\n")
+ if len(lines) < 2 {
+ return 0, 0, fmt.Errorf("expected 2 lines (wc -c, wc -l), got %d", len(lines))
+ }
+ size, err := firstNumericToken(lines[0])
+ if err != nil {
+ return 0, 0, fmt.Errorf("wc -c line: %w", err)
+ }
+ count, err := firstNumericToken(lines[1])
+ if err != nil {
+ return 0, 0, fmt.Errorf("wc -l line: %w", err)
+ }
+ return size, int(count), nil
+}
+
+// firstNumericToken returns the first whitespace-delimited integer in line.
+// Tolerates leading whitespace (BSD wc) and ignores anything after the number
+// (path).
+func firstNumericToken(line string) (int64, error) {
+ fields := strings.Fields(line)
+ if len(fields) == 0 {
+ return 0, fmt.Errorf("empty line")
+ }
+ n, err := strconv.ParseInt(fields[0], 10, 64)
+ if err != nil {
+ return 0, fmt.Errorf("not a number: %q", fields[0])
+ }
+ return n, nil
+}
diff --git a/internal/support/multireview/diff_test.go b/internal/support/multireview/diff_test.go
new file mode 100644
index 0000000..e7e382d
--- /dev/null
+++ b/internal/support/multireview/diff_test.go
@@ -0,0 +1,261 @@
+package multireview
+
+import (
+ "context"
+ "os/exec"
+ "strings"
+ "testing"
+ "time"
+)
+
+func TestPreComputeDiff_Success(t *testing.T) {
+ // Mock the SSH call to return a clean diff output + wc results.
+ // The actual command we run is one composite shell pipeline; we mock the
+ // wrapping ssh call and return canned stdout that matches the format the
+ // production code expects.
+ origExec := execCommandContext
+ t.Cleanup(func() { execCommandContext = origExec })
+ execCommandContext = func(ctx context.Context, name string, args ...string) *exec.Cmd {
+ // Production runs: `wc -c ; wc -l ` separated by newlines.
+ // First line: byte count + path. Second line: line count + path.
+ return exec.CommandContext(ctx, "/bin/sh", "-c",
+ `echo " 12345 /tmp/workdir/diff.txt"; echo " 250 /tmp/workdir/diff.txt"`)
+ }
+
+ res, err := PreComputeDiff(context.Background(), PreComputeDiffParams{
+ Host: "user@example.lan",
+ RemoteRepoPath: "/tmp/workdir/myrepo",
+ RemoteWorkdir: "/tmp/workdir",
+ BaseRef: "v1.0.0",
+ HeadRef: "HEAD",
+ Timeout: 30 * time.Second,
+ })
+ if err != nil {
+ t.Fatalf("PreComputeDiff: %v", err)
+ }
+ if res.DiffPath != "/tmp/workdir/diff.txt" {
+ t.Errorf("DiffPath=%q want /tmp/workdir/diff.txt", res.DiffPath)
+ }
+ if res.SizeBytes != 12345 {
+ t.Errorf("SizeBytes=%d want 12345", res.SizeBytes)
+ }
+ if res.LineCount != 250 {
+ t.Errorf("LineCount=%d want 250", res.LineCount)
+ }
+ if res.Empty {
+ t.Error("Empty should be false for non-zero diff")
+ }
+}
+
+func TestPreComputeDiff_RequiresInputs(t *testing.T) {
+ ctx := context.Background()
+ cases := []struct {
+ name string
+ p PreComputeDiffParams
+ }{
+ {"empty host", PreComputeDiffParams{RemoteRepoPath: "/r", RemoteWorkdir: "/w", BaseRef: "v1", HeadRef: "HEAD"}},
+ {"empty repo path", PreComputeDiffParams{Host: "u@h", RemoteWorkdir: "/w", BaseRef: "v1", HeadRef: "HEAD"}},
+ {"empty workdir", PreComputeDiffParams{Host: "u@h", RemoteRepoPath: "/r", BaseRef: "v1", HeadRef: "HEAD"}},
+ {"empty base ref", PreComputeDiffParams{Host: "u@h", RemoteRepoPath: "/r", RemoteWorkdir: "/w", HeadRef: "HEAD"}},
+ }
+ for _, c := range cases {
+ t.Run(c.name, func(t *testing.T) {
+ _, err := PreComputeDiff(ctx, c.p)
+ if err == nil {
+ t.Errorf("expected error for %s", c.name)
+ }
+ })
+ }
+}
+
+func TestPreComputeDiff_RefNotFound(t *testing.T) {
+ origExec := execCommandContext
+ t.Cleanup(func() { execCommandContext = origExec })
+ execCommandContext = func(ctx context.Context, name string, args ...string) *exec.Cmd {
+ return exec.CommandContext(ctx, "/bin/sh", "-c",
+ `echo "fatal: bad revision 'badref'" >&2; exit 128`)
+ }
+
+ _, err := PreComputeDiff(context.Background(), PreComputeDiffParams{
+ Host: "user@example.lan",
+ RemoteRepoPath: "/tmp/w/repo",
+ RemoteWorkdir: "/tmp/w",
+ BaseRef: "badref",
+ HeadRef: "HEAD",
+ Timeout: 10 * time.Second,
+ })
+ if err == nil {
+ t.Fatal("expected error for bad ref")
+ }
+ if !strings.Contains(err.Error(), "bad revision") && !strings.Contains(err.Error(), "exit") {
+ t.Errorf("error should reference git failure: %v", err)
+ }
+}
+
+func TestPreComputeDiff_EmptyDiff(t *testing.T) {
+ // git diff produces an empty file when base==head. wc -c returns 0.
+ origExec := execCommandContext
+ t.Cleanup(func() { execCommandContext = origExec })
+ execCommandContext = func(ctx context.Context, name string, args ...string) *exec.Cmd {
+ return exec.CommandContext(ctx, "/bin/sh", "-c",
+ `echo "0 /tmp/w/diff.txt"; echo "0 /tmp/w/diff.txt"`)
+ }
+
+ res, err := PreComputeDiff(context.Background(), PreComputeDiffParams{
+ Host: "user@example.lan",
+ RemoteRepoPath: "/tmp/w/repo",
+ RemoteWorkdir: "/tmp/w",
+ BaseRef: "HEAD",
+ HeadRef: "HEAD",
+ Timeout: 10 * time.Second,
+ })
+ if err != nil {
+ t.Fatalf("PreComputeDiff: %v", err)
+ }
+ if !res.Empty {
+ t.Error("Empty should be true for zero-byte diff")
+ }
+ if res.SizeBytes != 0 {
+ t.Errorf("SizeBytes=%d want 0", res.SizeBytes)
+ }
+}
+
+func TestPreComputeDiff_LargeDiff(t *testing.T) {
+ // A 2MB diff has SizeBytes ~ 2000000.
+ origExec := execCommandContext
+ t.Cleanup(func() { execCommandContext = origExec })
+ execCommandContext = func(ctx context.Context, name string, args ...string) *exec.Cmd {
+ return exec.CommandContext(ctx, "/bin/sh", "-c",
+ `echo "2000000 /tmp/w/diff.txt"; echo "50000 /tmp/w/diff.txt"`)
+ }
+
+ res, err := PreComputeDiff(context.Background(), PreComputeDiffParams{
+ Host: "user@example.lan",
+ RemoteRepoPath: "/tmp/w/repo",
+ RemoteWorkdir: "/tmp/w",
+ BaseRef: "v0",
+ HeadRef: "HEAD",
+ Timeout: 10 * time.Second,
+ })
+ if err != nil {
+ t.Fatalf("PreComputeDiff: %v", err)
+ }
+ if res.SizeBytes != 2000000 {
+ t.Errorf("SizeBytes=%d want 2000000", res.SizeBytes)
+ }
+ if res.LineCount != 50000 {
+ t.Errorf("LineCount=%d want 50000", res.LineCount)
+ }
+ if res.Empty {
+ t.Error("Empty should be false for large diff")
+ }
+}
+
+func TestPreComputeDiff_WcOutputBSDFormat(t *testing.T) {
+ // BSD wc emits leading-space format: " 1234 path"
+ origExec := execCommandContext
+ t.Cleanup(func() { execCommandContext = origExec })
+ execCommandContext = func(ctx context.Context, name string, args ...string) *exec.Cmd {
+ return exec.CommandContext(ctx, "/bin/sh", "-c",
+ `echo " 1234 /tmp/w/diff.txt"; echo " 50 /tmp/w/diff.txt"`)
+ }
+
+ res, err := PreComputeDiff(context.Background(), PreComputeDiffParams{
+ Host: "user@example.lan",
+ RemoteRepoPath: "/tmp/w/repo",
+ RemoteWorkdir: "/tmp/w",
+ BaseRef: "v0",
+ HeadRef: "HEAD",
+ Timeout: 10 * time.Second,
+ })
+ if err != nil {
+ t.Fatalf("PreComputeDiff: %v", err)
+ }
+ if res.SizeBytes != 1234 {
+ t.Errorf("BSD format: SizeBytes=%d want 1234", res.SizeBytes)
+ }
+ if res.LineCount != 50 {
+ t.Errorf("BSD format: LineCount=%d want 50", res.LineCount)
+ }
+}
+
+func TestPreComputeDiff_WcOutputGNUFormat(t *testing.T) {
+ // GNU wc emits leading-digit format: "1234 path"
+ origExec := execCommandContext
+ t.Cleanup(func() { execCommandContext = origExec })
+ execCommandContext = func(ctx context.Context, name string, args ...string) *exec.Cmd {
+ return exec.CommandContext(ctx, "/bin/sh", "-c",
+ `echo "1234 /tmp/w/diff.txt"; echo "50 /tmp/w/diff.txt"`)
+ }
+
+ res, err := PreComputeDiff(context.Background(), PreComputeDiffParams{
+ Host: "user@example.lan",
+ RemoteRepoPath: "/tmp/w/repo",
+ RemoteWorkdir: "/tmp/w",
+ BaseRef: "v0",
+ HeadRef: "HEAD",
+ Timeout: 10 * time.Second,
+ })
+ if err != nil {
+ t.Fatalf("PreComputeDiff: %v", err)
+ }
+ if res.SizeBytes != 1234 {
+ t.Errorf("GNU format: SizeBytes=%d want 1234", res.SizeBytes)
+ }
+ if res.LineCount != 50 {
+ t.Errorf("GNU format: LineCount=%d want 50", res.LineCount)
+ }
+}
+
+func TestPreComputeDiff_Timeout(t *testing.T) {
+ origExec := execCommandContext
+ t.Cleanup(func() { execCommandContext = origExec })
+ execCommandContext = func(ctx context.Context, name string, args ...string) *exec.Cmd {
+ return exec.CommandContext(ctx, "/bin/sh", "-c", `sleep 10`)
+ }
+
+ _, err := PreComputeDiff(context.Background(), PreComputeDiffParams{
+ Host: "user@example.lan",
+ RemoteRepoPath: "/tmp/w/repo",
+ RemoteWorkdir: "/tmp/w",
+ BaseRef: "v0",
+ HeadRef: "HEAD",
+ Timeout: 100 * time.Millisecond,
+ })
+ if err == nil {
+ t.Fatal("expected timeout error")
+ }
+}
+
+func TestPreComputeDiff_HeadRefDefaultsToHEAD(t *testing.T) {
+ // When HeadRef is empty, it should default to "HEAD" rather than passing
+ // an empty string to git.
+ origExec := execCommandContext
+ t.Cleanup(func() { execCommandContext = origExec })
+ var capturedCmd string
+ execCommandContext = func(ctx context.Context, name string, args ...string) *exec.Cmd {
+ // Capture the remote command we sent (last arg) so we can assert HEAD.
+ if len(args) > 0 {
+ capturedCmd = args[len(args)-1]
+ }
+ return exec.CommandContext(ctx, "/bin/sh", "-c",
+ `echo "100 /tmp/w/diff.txt"; echo "5 /tmp/w/diff.txt"`)
+ }
+
+ _, err := PreComputeDiff(context.Background(), PreComputeDiffParams{
+ Host: "user@example.lan",
+ RemoteRepoPath: "/tmp/w/repo",
+ RemoteWorkdir: "/tmp/w",
+ BaseRef: "v0",
+ HeadRef: "", // explicitly empty
+ Timeout: 10 * time.Second,
+ })
+ if err != nil {
+ t.Fatalf("PreComputeDiff: %v", err)
+ }
+ // The refs are shell-quoted, so look for the HEAD token in context:
+ // `'v0'..'HEAD'` should appear in the command.
+ if !strings.Contains(capturedCmd, "'HEAD'") {
+ t.Errorf("expected remote command to default HeadRef to HEAD (look for 'HEAD' in quoted form), got: %s", capturedCmd)
+ }
+}
]