From 4ac51fe1992d15ab25caa9a5a8b467d3506dd145 Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Wed, 10 Jun 2026 12:43:09 +0900 Subject: [PATCH 01/26] add agent brief for token diagnostics Entire-Checkpoint: f1dfd807cfdc --- cmd/entire/cli/session_tokens.go | 114 ++++++++++++++++++- cmd/entire/cli/sessions_test.go | 181 +++++++++++++++++++++++++++++++ 2 files changed, 292 insertions(+), 3 deletions(-) diff --git a/cmd/entire/cli/session_tokens.go b/cmd/entire/cli/session_tokens.go index 44f18ed9e7..5b588232e5 100644 --- a/cmd/entire/cli/session_tokens.go +++ b/cmd/entire/cli/session_tokens.go @@ -69,6 +69,7 @@ type tokenRecommendationSignals struct { func newTokensCmd() *cobra.Command { var jsonFlag bool var currentFlag bool + var agentBriefFlag bool cmd := &cobra.Command{ Use: "tokens [session-id]", @@ -78,9 +79,16 @@ func newTokensCmd() *cobra.Command { When no session ID is provided, Entire reports on the most recently active session, preferring the current worktree and falling back to the newest session if no state matches this worktree. The report uses token and context data Entire -already captured for the session.`, +already captured for the session. + +Use --agent-brief when an agent needs compact guidance for the next step, for +example: "Use Entire token tracking to check how this session is doing and +optimize next steps."`, Args: cobra.MaximumNArgs(1), RunE: func(cmd *cobra.Command, args []string) error { + if jsonFlag && agentBriefFlag { + return errors.New("--json and --agent-brief are mutually exclusive") + } if currentFlag && len(args) > 0 { return errors.New("--current and session ID argument are mutually exclusive") } @@ -89,16 +97,17 @@ already captured for the session.`, if len(args) > 0 { sessionID = args[0] } - return runSessionTokens(cmd.Context(), cmd, sessionID, currentFlag, jsonFlag) + return runSessionTokens(cmd.Context(), cmd, sessionID, currentFlag, jsonFlag, agentBriefFlag) }, } cmd.Flags().BoolVar(&jsonFlag, "json", false, "Output as JSON") cmd.Flags().BoolVar(¤tFlag, "current", false, "Prefer the current worktree's most recent session") + cmd.Flags().BoolVar(&agentBriefFlag, "agent-brief", false, "Output compact next-step guidance for agents") return cmd } -func runSessionTokens(ctx context.Context, cmd *cobra.Command, sessionID string, current, jsonOutput bool) error { +func runSessionTokens(ctx context.Context, cmd *cobra.Command, sessionID string, current, jsonOutput, agentBrief bool) error { if sessionID == "" || current { sessionID = strategy.FindMostRecentSession(ctx) if sessionID == "" { @@ -121,6 +130,10 @@ func runSessionTokens(ctx context.Context, cmd *cobra.Command, sessionID string, if jsonOutput { return writeSessionTokensJSON(cmd.OutOrStdout(), report) } + if agentBrief { + writeSessionTokensAgentBrief(cmd.OutOrStdout(), report) + return nil + } writeSessionTokensText(cmd.OutOrStdout(), report) return nil } @@ -404,6 +417,101 @@ func writeSessionTokensText(w io.Writer, report sessionTokensReport) { writeTokenLimitations(w, report.Limitations) } +func writeSessionTokensAgentBrief(w io.Writer, report sessionTokensReport) { + fmt.Fprintln(w, "Session token brief") + fmt.Fprintf(w, "Session: %s\n", report.SessionID) + fmt.Fprintln(w) + fmt.Fprintln(w, agentBriefUsageLine(report.Tokens)) + fmt.Fprintln(w) + fmt.Fprintln(w, "Next best action:") + fmt.Fprintln(w, agentBriefNextAction(report)) + + signals := agentBriefSignals(report) + if len(signals) > 0 { + fmt.Fprintln(w) + fmt.Fprintln(w, "Signals:") + for _, signal := range signals { + fmt.Fprintf(w, "- %s\n", signal) + } + } +} + +func agentBriefUsageLine(tokens *sessionTokensUsage) string { + if tokens == nil { + return "Token usage: unavailable." + } + if tokens.CacheRead > 0 { + return fmt.Sprintf( + "Token usage: %s total; %s cache/context replay; %s.", + formatTokenCount(tokens.Total), + formatPercent(tokenPercent(tokens.CacheRead, tokens.Total)), + formatAPICalls(tokens.APICalls), + ) + } + return fmt.Sprintf("Token usage: %s total; %s.", formatTokenCount(tokens.Total), formatAPICalls(tokens.APICalls)) +} + +func formatAPICalls(count int) string { + if count == 1 { + return "1 API call" + } + return fmt.Sprintf("%d API calls", count) +} + +func agentBriefNextAction(report sessionTokensReport) string { + switch { + case hasTokenRecommendation(report, "context-replay-hotspot") && hasTokenRecommendation(report, "api-call-amplification"): + return "Summarize the useful findings, then batch the next diagnostic step. Avoid more exploratory reads until you have a narrowed hypothesis." + case hasTokenRecommendation(report, "context-replay-hotspot"): + return "Summarize the current useful findings before continuing, and keep the next prompt narrow." + case hasTokenRecommendation(report, "no-token-data"): + return "Token usage is not available yet. Use this as a context check, not a spend diagnosis; continue after the next checkpoint captures usage." + case hasTokenRecommendation(report, "subagent-heavy"): + return "Keep the next agent or subagent task narrow with a concrete expected output; avoid broad parallel exploration." + case hasTokenRecommendation(report, "high-context-pressure"): + return "Preserve the useful findings and compact or restart before adding more broad context." + case hasTokenRecommendation(report, "long-session"): + return "Compact or restart after summarizing useful findings if older context is no longer needed." + default: + return "Continue normally; no high-signal token optimization is available from this session yet." + } +} + +func agentBriefSignals(report sessionTokensReport) []string { + var signals []string + if hasTokenRecommendation(report, "context-replay-hotspot") { + signals = append(signals, "Cache/context replay dominates token volume.") + } + if hasTokenRecommendation(report, "api-call-amplification") { + signals = append(signals, "API call count is high for one session.") + } + if hasTokenRecommendation(report, "subagent-heavy") { + signals = append(signals, "Subagent usage is a meaningful part of total tokens.") + } + if hasTokenRecommendation(report, "high-context-pressure") { + signals = append(signals, "Context pressure is high.") + } + if hasTokenRecommendation(report, "long-session") { + signals = append(signals, "Session has crossed a long-session or checkpoint boundary.") + } + if hasTokenRecommendation(report, "no-token-data") { + signals = append([]string{"Token usage is unavailable for this session."}, signals...) + } + if len(signals) == 0 && report.Tokens != nil { + signals = append(signals, "No high-signal token risk detected from captured usage.") + } + return signals +} + +func hasTokenRecommendation(report sessionTokensReport, id string) bool { + for _, rec := range report.Recommendations { + if rec.ID == id { + return true + } + } + return false +} + func writeTokenRecommendations(w io.Writer, recs []sessionTokensRecommendation) { fmt.Fprintln(w) fmt.Fprintln(w, "Recommendations") diff --git a/cmd/entire/cli/sessions_test.go b/cmd/entire/cli/sessions_test.go index 28697b3c7b..5f4caa0bb5 100644 --- a/cmd/entire/cli/sessions_test.go +++ b/cmd/entire/cli/sessions_test.go @@ -1213,6 +1213,139 @@ func reportHasSessionRecommendation(report sessionTokensReport, id string) bool return false } +func TestTokensCmd_AgentBriefPrioritizesNextAction(t *testing.T) { + setupStopTestRepo(t) + + ctx := context.Background() + state := makeSessionState("test-tokens-brief", session.PhaseActive) + state.AgentType = testAgentClaude + state.TokenUsage = &agent.TokenUsage{ + InputTokens: 94, + CacheCreationTokens: 122171, + CacheReadTokens: 6052424, + OutputTokens: 38956, + APICallCount: 70, + } + + if err := strategy.SaveSessionState(ctx, state); err != nil { + t.Fatalf("SaveSessionState() error = %v", err) + } + + cmd := newTokensCmd() + var stdout bytes.Buffer + cmd.SetOut(&stdout) + cmd.SetArgs([]string{"test-tokens-brief", "--agent-brief"}) + + if err := cmd.ExecuteContext(ctx); err != nil { + t.Fatalf("expected no error, got: %v", err) + } + + out := stdout.String() + checks := []string{ + "Session token brief", + "Session: test-tokens-brief", + "Token usage: 6213.6k total; 97.4% cache/context replay; 70 API calls.", + "Next best action:", + "Summarize the useful findings, then batch the next diagnostic step.", + "Signals:", + "- Cache/context replay dominates token volume.", + "- API call count is high for one session.", + } + for _, check := range checks { + if !strings.Contains(out, check) { + t.Errorf("expected %q in output, got:\n%s", check, out) + } + } + if strings.Contains(out, "Recommendations") { + t.Fatalf("expected agent brief to omit regular recommendations section, got:\n%s", out) + } + if strings.Contains(out, "Likely contributors") { + t.Fatalf("expected agent brief to omit contributor detail, got:\n%s", out) + } +} + +func TestTokensCmd_AgentBriefHighCacheReplayWithoutHighAPICalls(t *testing.T) { + setupStopTestRepo(t) + + ctx := context.Background() + state := makeSessionState("test-tokens-brief-cache-only", session.PhaseActive) + state.AgentType = testAgentClaude + state.TokenUsage = &agent.TokenUsage{ + InputTokens: 27_892, + CacheReadTokens: 608_896, + OutputTokens: 865, + APICallCount: 3, + } + + if err := strategy.SaveSessionState(ctx, state); err != nil { + t.Fatalf("SaveSessionState() error = %v", err) + } + + cmd := newTokensCmd() + var stdout bytes.Buffer + cmd.SetOut(&stdout) + cmd.SetArgs([]string{"test-tokens-brief-cache-only", "--agent-brief"}) + + if err := cmd.ExecuteContext(ctx); err != nil { + t.Fatalf("expected no error, got: %v", err) + } + + out := stdout.String() + checks := []string{ + "Token usage: 637.7k total; 95.5% cache/context replay; 3 API calls.", + "Summarize the current useful findings before continuing, and keep the next prompt narrow.", + "- Cache/context replay dominates token volume.", + } + for _, check := range checks { + if !strings.Contains(out, check) { + t.Errorf("expected %q in output, got:\n%s", check, out) + } + } + if strings.Contains(out, "Continue normally") { + t.Fatalf("expected high cache replay to avoid continue-normally action, got:\n%s", out) + } +} + +func TestTokensCmd_AgentBriefNoTokenData(t *testing.T) { + setupStopTestRepo(t) + + ctx := context.Background() + state := makeSessionState("test-tokens-brief-missing", session.PhaseActive) + state.AgentType = testAgentGemini + state.ContextTokens = 9000 + state.ContextWindowSize = 10000 + + if err := strategy.SaveSessionState(ctx, state); err != nil { + t.Fatalf("SaveSessionState() error = %v", err) + } + + cmd := newTokensCmd() + var stdout bytes.Buffer + cmd.SetOut(&stdout) + cmd.SetArgs([]string{"test-tokens-brief-missing", "--agent-brief"}) + + if err := cmd.ExecuteContext(ctx); err != nil { + t.Fatalf("expected no error, got: %v", err) + } + + out := stdout.String() + checks := []string{ + "Session token brief", + "Session: test-tokens-brief-missing", + "Token usage: unavailable.", + "Next best action:", + "Token usage is not available yet.", + "Signals:", + "- Token usage is unavailable for this session.", + "- Context pressure is high.", + } + for _, check := range checks { + if !strings.Contains(out, check) { + t.Errorf("expected %q in output, got:\n%s", check, out) + } + } +} + func TestSessionsCmd_TokensSubcommand(t *testing.T) { setupStopTestRepo(t) @@ -1245,6 +1378,39 @@ func TestSessionsCmd_TokensSubcommand(t *testing.T) { } } +func TestSessionsCmd_TokensSubcommandAgentBrief(t *testing.T) { + setupStopTestRepo(t) + + ctx := context.Background() + state := makeSessionState("test-tokens-subcommand-brief", session.PhaseActive) + state.TokenUsage = &agent.TokenUsage{ + InputTokens: 1200, + OutputTokens: 300, + APICallCount: 2, + } + + if err := strategy.SaveSessionState(ctx, state); err != nil { + t.Fatalf("SaveSessionState() error = %v", err) + } + + cmd := newSessionsCmd() + var stdout bytes.Buffer + cmd.SetOut(&stdout) + cmd.SetArgs([]string{"tokens", "test-tokens-subcommand-brief", "--agent-brief"}) + + if err := cmd.ExecuteContext(ctx); err != nil { + t.Fatalf("expected no error, got: %v", err) + } + + out := stdout.String() + if !strings.Contains(out, "Session token brief") { + t.Fatalf("expected agent brief output, got:\n%s", out) + } + if !strings.Contains(out, "Token usage: 1.5k total") { + t.Fatalf("expected token summary in brief, got:\n%s", out) + } +} + func TestSessionsCmd_HelpIncludesTokensSubcommand(t *testing.T) { cmd := newSessionsCmd() var stdout bytes.Buffer @@ -1264,6 +1430,21 @@ func TestSessionsCmd_HelpIncludesTokensSubcommand(t *testing.T) { } } +func TestTokensCmd_JSONAndAgentBriefAreMutuallyExclusive(t *testing.T) { + setupStopTestRepo(t) + + cmd := newTokensCmd() + cmd.SetArgs([]string{"test-session", "--json", "--agent-brief"}) + + err := cmd.ExecuteContext(context.Background()) + if err == nil { + t.Fatal("expected error for --json with --agent-brief") + } + if !strings.Contains(err.Error(), "mutually exclusive") { + t.Fatalf("expected mutually exclusive error, got: %v", err) + } +} + func TestTokensCmd_PrioritizesContextReplayHotspot(t *testing.T) { setupStopTestRepo(t) From 19ca13eb9d41f00aa3e0eb4ae11d8ae868ea3733 Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Thu, 11 Jun 2026 09:46:58 +0900 Subject: [PATCH 02/26] add checkpoint token comparison Entire-Checkpoint: cea04e74bd49 --- cmd/entire/cli/checkpoint_tokens.go | 195 +++++++++++++- cmd/entire/cli/sessions_test.go | 387 ++++++++++++++++++++++++++++ 2 files changed, 569 insertions(+), 13 deletions(-) diff --git a/cmd/entire/cli/checkpoint_tokens.go b/cmd/entire/cli/checkpoint_tokens.go index d517b713fb..755a0df47c 100644 --- a/cmd/entire/cli/checkpoint_tokens.go +++ b/cmd/entire/cli/checkpoint_tokens.go @@ -5,6 +5,7 @@ import ( "encoding/json" "fmt" "io" + "strconv" "strings" "github.com/entireio/cli/cmd/entire/cli/agent" @@ -27,11 +28,39 @@ type checkpointTokensReport struct { Context *sessionTokensContext `json:"context,omitempty"` Contributors []sessionTokensContributor `json:"contributors,omitempty"` Recommendations []sessionTokensRecommendation `json:"recommendations,omitempty"` + Comparison *checkpointTokensComparison `json:"comparison,omitempty"` Limitations []string `json:"limitations,omitempty"` } +type checkpointTokensComparison struct { + BaselineCheckpointID string `json:"baseline_checkpoint_id"` + TargetCheckpointID string `json:"target_checkpoint_id"` + Status string `json:"status"` + Total *checkpointTokensMetricDelta `json:"total,omitempty"` + CacheRead *checkpointTokensMetricDelta `json:"cache_read,omitempty"` + APICalls *checkpointTokensMetricDelta `json:"api_calls,omitempty"` + Qualification string `json:"qualification"` + Limitations []string `json:"limitations,omitempty"` +} + +type checkpointTokensMetricDelta struct { + Baseline int `json:"baseline"` + Current int `json:"current"` + Change int `json:"change"` + ChangePercent *float64 `json:"change_percent,omitempty"` + Direction string `json:"direction"` +} + +const ( + checkpointComparisonStatusUnavailable = "unavailable" + checkpointComparisonStatusObservedReduction = "observed_reduction" + checkpointComparisonStatusObservedIncrease = "observed_increase" + checkpointComparisonStatusObservedNoChange = "observed_no_change" +) + func newCheckpointTokensCmd() *cobra.Command { var jsonFlag bool + var compareFlag string cmd := &cobra.Command{ Use: "tokens ", @@ -42,19 +71,23 @@ The report reads committed checkpoint metadata using the same checkpoint resolution path as 'entire checkpoint explain'. Checkpoint IDs may be abbreviated as long as the prefix is unambiguous; positional targets may also resolve from a commit ref with an Entire-Checkpoint trailer, and missing metadata may be fetched -from the checkpoint remote.`, +from the checkpoint remote. + +Use --compare to compare this checkpoint against a previous +checkpoint and qualify observed token reduction or increase.`, Args: cobra.ExactArgs(1), RunE: func(cmd *cobra.Command, args []string) error { - return runCheckpointTokens(cmd.Context(), cmd, args[0], jsonFlag) + return runCheckpointTokens(cmd.Context(), cmd, args[0], jsonFlag, compareFlag) }, } cmd.Flags().BoolVar(&jsonFlag, "json", false, "Output as JSON") + cmd.Flags().StringVar(&compareFlag, "compare", "", "Compare against a baseline checkpoint ID") return cmd } -func runCheckpointTokens(ctx context.Context, cmd *cobra.Command, checkpointIDPrefix string, jsonOutput bool) error { - cpID, lookup, err := resolveExplainCheckpointID(ctx, cmd.ErrOrStderr(), explainExportOptions{target: checkpointIDPrefix}) +func runCheckpointTokens(ctx context.Context, cmd *cobra.Command, checkpointIDPrefix string, jsonOutput bool, comparePrefix string) error { + report, lookup, err := loadCheckpointTokensReport(ctx, cmd, checkpointIDPrefix) if lookup != nil { defer lookup.Close() } @@ -62,27 +95,46 @@ func runCheckpointTokens(ctx context.Context, cmd *cobra.Command, checkpointIDPr return tokenCommandError(err) } + if comparePrefix != "" { + baselineReport, baselineLookup, err := loadCheckpointTokensReport(ctx, cmd, comparePrefix) + if baselineLookup != nil { + defer baselineLookup.Close() + } + if err != nil { + return tokenCommandError(err) + } + report.Comparison = buildCheckpointTokensComparison(report, baselineReport) + } + + if jsonOutput { + return writeCheckpointTokensJSON(cmd.OutOrStdout(), report) + } + writeCheckpointTokensText(cmd.OutOrStdout(), report) + return nil +} + +func loadCheckpointTokensReport(ctx context.Context, cmd *cobra.Command, checkpointIDPrefix string) (checkpointTokensReport, *explainCheckpointLookup, error) { + cpID, lookup, err := resolveExplainCheckpointID(ctx, cmd.ErrOrStderr(), explainExportOptions{target: checkpointIDPrefix}) + if err != nil { + return checkpointTokensReport{}, lookup, err + } + summary, err := lookup.store.ReadCommitted(ctx, cpID) if err != nil { - return tokenCommandError(fmt.Errorf("failed to read checkpoint: %w", err)) + return checkpointTokensReport{}, lookup, fmt.Errorf("failed to read checkpoint: %w", err) } if summary == nil || len(summary.Sessions) == 0 { cmd.SilenceUsage = true fmt.Fprintln(cmd.ErrOrStderr(), "Checkpoint not found.") - return NewSilentError(fmt.Errorf("%w: %s", checkpoint.ErrCheckpointNotFound, checkpointIDPrefix)) + return checkpointTokensReport{}, lookup, NewSilentError(fmt.Errorf("%w: %s", checkpoint.ErrCheckpointNotFound, checkpointIDPrefix)) } metas, metadataWarnings, err := readCheckpointTokenSessionMetadata(ctx, lookup.store, cpID, len(summary.Sessions)) if err != nil { - return tokenCommandError(err) + return checkpointTokensReport{}, lookup, err } - report := buildCheckpointTokensReport(cpID, summary, metas, metadataWarnings) - if jsonOutput { - return writeCheckpointTokensJSON(cmd.OutOrStdout(), report) - } - writeCheckpointTokensText(cmd.OutOrStdout(), report) - return nil + return buildCheckpointTokensReport(cpID, summary, metas, metadataWarnings), lookup, nil } func readCheckpointTokenSessionMetadata(ctx context.Context, store checkpoint.CommittedListReader, cpID id.CheckpointID, sessionCount int) ([]*checkpoint.CommittedMetadata, int, error) { @@ -318,6 +370,78 @@ func tokenPluralSuffix(count int) string { return "s" } +func buildCheckpointTokensComparison(target, baseline checkpointTokensReport) *checkpointTokensComparison { + comparison := &checkpointTokensComparison{ + BaselineCheckpointID: baseline.CheckpointID, + TargetCheckpointID: target.CheckpointID, + } + if target.Tokens == nil || baseline.Tokens == nil { + comparison.Status = checkpointComparisonStatusUnavailable + comparison.Qualification = "Comparison unavailable because token usage is missing for one checkpoint." + comparison.Limitations = append(comparison.Limitations, comparison.Qualification) + return comparison + } + + comparison.Total = buildCheckpointMetricDelta(baseline.Tokens.Total, target.Tokens.Total) + comparison.CacheRead = buildCheckpointMetricDelta(baseline.Tokens.CacheRead, target.Tokens.CacheRead) + comparison.APICalls = buildCheckpointMetricDelta(baseline.Tokens.APICalls, target.Tokens.APICalls) + comparison.Status = checkpointComparisonStatus(comparison.Total) + comparison.Qualification = checkpointComparisonQualification(comparison.Status) + return comparison +} + +func buildCheckpointMetricDelta(baseline, current int) *checkpointTokensMetricDelta { + delta := &checkpointTokensMetricDelta{ + Baseline: baseline, + Current: current, + Change: current - baseline, + Direction: checkpointDeltaDirection(current - baseline), + } + if baseline != 0 { + percent := float64(delta.Change) * 100 / float64(baseline) + delta.ChangePercent = &percent + } + return delta +} + +func checkpointDeltaDirection(change int) string { + switch { + case change < 0: + return "down" + case change > 0: + return "up" + default: + return "unchanged" + } +} + +func checkpointComparisonStatus(total *checkpointTokensMetricDelta) string { + if total == nil { + return checkpointComparisonStatusUnavailable + } + switch total.Direction { + case "down": + return checkpointComparisonStatusObservedReduction + case "up": + return checkpointComparisonStatusObservedIncrease + default: + return checkpointComparisonStatusObservedNoChange + } +} + +func checkpointComparisonQualification(status string) string { + switch status { + case checkpointComparisonStatusObservedReduction: + return "Observed token use decreased for this checkpoint comparison. This does not prove quality was preserved; verify the task outcome or tests before treating it as a successful optimization." + case checkpointComparisonStatusObservedIncrease: + return "Observed token use increased for this checkpoint comparison. Check whether the extra context was necessary before treating it as waste." + case checkpointComparisonStatusObservedNoChange: + return "Observed token use was unchanged for this checkpoint comparison. Quality still depends on the task outcome, not token totals alone." + default: + return "Comparison unavailable because token usage is missing for one checkpoint." + } +} + func writeCheckpointTokensJSON(w io.Writer, report checkpointTokensReport) error { enc := json.NewEncoder(w) enc.SetIndent("", " ") @@ -356,9 +480,54 @@ func writeCheckpointTokensText(w io.Writer, report checkpointTokensReport) { } writeTokenUsageSection(w, report.Tokens) + writeCheckpointTokenComparison(w, report.Comparison) if len(report.Recommendations) > 0 { writeTokenRecommendations(w, report.Recommendations) } writeTokenContributors(w, report.Contributors, report.Context) writeTokenLimitations(w, report.Limitations) } + +func writeCheckpointTokenComparison(w io.Writer, comparison *checkpointTokensComparison) { + if comparison == nil { + return + } + + fmt.Fprintln(w) + fmt.Fprintln(w, "Comparison") + fmt.Fprintf(w, "Baseline: %s\n", comparison.BaselineCheckpointID) + if comparison.Status != checkpointComparisonStatusUnavailable { + fmt.Fprintf(w, "Total tokens: %s\n", formatCheckpointMetricDelta(comparison.Total, formatTokenCount)) + fmt.Fprintf(w, "Cache/context replay: %s\n", formatCheckpointMetricDelta(comparison.CacheRead, formatTokenCount)) + fmt.Fprintf(w, "API calls: %s\n", formatCheckpointMetricDelta(comparison.APICalls, formatPlainCount)) + } + fmt.Fprintln(w) + fmt.Fprintln(w, "Qualification") + fmt.Fprintln(w, comparison.Qualification) +} + +func formatCheckpointMetricDelta(delta *checkpointTokensMetricDelta, formatValue func(int) string) string { + if delta == nil { + return "unavailable" + } + from := formatValue(delta.Baseline) + to := formatValue(delta.Current) + if delta.Direction == "unchanged" { + return fmt.Sprintf("unchanged (%s -> %s)", from, to) + } + if delta.ChangePercent == nil { + return fmt.Sprintf("%s (%s -> %s)", delta.Direction, from, to) + } + return fmt.Sprintf("%s %s (%s -> %s)", delta.Direction, formatPercent(absFloat(*delta.ChangePercent)), from, to) +} + +func formatPlainCount(value int) string { + return strconv.Itoa(value) +} + +func absFloat(value float64) float64 { + if value < 0 { + return -value + } + return value +} diff --git a/cmd/entire/cli/sessions_test.go b/cmd/entire/cli/sessions_test.go index 5f4caa0bb5..9d59ef2b72 100644 --- a/cmd/entire/cli/sessions_test.go +++ b/cmd/entire/cli/sessions_test.go @@ -1857,6 +1857,393 @@ func TestAddCheckpointTokenUsageSaturatesOverflow(t *testing.T) { } } +func TestCheckpointTokensCmd_TextOutputWithComparison(t *testing.T) { + repo, _ := runExplainAutoTestRepo(t) + ctx := context.Background() + store := checkpoint.NewGitStore(repo) + baselineID := id.MustCheckpointID("aaa111bbb222") + currentID := id.MustCheckpointID("bbb222ccc333") + + if err := store.WriteCommitted(ctx, checkpoint.WriteCommittedOptions{ + CheckpointID: baselineID, + SessionID: "checkpoint-token-baseline", + Strategy: strategy.StrategyNameManualCommit, + Branch: "tokens-compare", + Agent: testAgentClaude, + Transcript: redact.AlreadyRedacted([]byte(`{"type":"user","message":{"content":[{"type":"text","text":"baseline"}]}}` + "\n")), + AuthorName: "Test", + AuthorEmail: "test@example.com", + TokenUsage: &agent.TokenUsage{ + InputTokens: 200_000, + CacheCreationTokens: 50_000, + CacheReadTokens: 750_000, + APICallCount: 10, + }, + }); err != nil { + t.Fatalf("WriteCommitted() baseline error = %v", err) + } + if err := store.WriteCommitted(ctx, checkpoint.WriteCommittedOptions{ + CheckpointID: currentID, + SessionID: "checkpoint-token-current", + Strategy: strategy.StrategyNameManualCommit, + Branch: "tokens-compare", + Agent: testAgentClaude, + Transcript: redact.AlreadyRedacted([]byte(`{"type":"user","message":{"content":[{"type":"text","text":"current"}]}}` + "\n")), + AuthorName: "Test", + AuthorEmail: "test@example.com", + TokenUsage: &agent.TokenUsage{ + InputTokens: 150_000, + CacheCreationTokens: 50_000, + CacheReadTokens: 300_000, + APICallCount: 4, + }, + }); err != nil { + t.Fatalf("WriteCommitted() current error = %v", err) + } + + cmd := newCheckpointGroupCmd() + var stdout bytes.Buffer + cmd.SetOut(&stdout) + cmd.SetArgs([]string{"tokens", "bbb222", "--compare", "aaa111"}) + + if err := cmd.ExecuteContext(ctx); err != nil { + t.Fatalf("expected no error, got: %v", err) + } + + out := stdout.String() + checks := []string{ + "Checkpoint tokens", + "Checkpoint: bbb222ccc333", + "Token usage", + "Total: 500k tokens", + "Comparison", + "Baseline: aaa111bbb222", + "Total tokens: down 50% (1000k -> 500k)", + "Cache/context replay: down 60% (750k -> 300k)", + "API calls: down 60% (10 -> 4)", + "Qualification", + "Observed token use decreased for this checkpoint comparison.", + "This does not prove quality was preserved", + } + for _, check := range checks { + if !strings.Contains(out, check) { + t.Errorf("expected %q in output, got:\n%s", check, out) + } + } +} + +func TestCheckpointTokensCmd_JSONOutputWithComparison(t *testing.T) { + repo, _ := runExplainAutoTestRepo(t) + ctx := context.Background() + store := checkpoint.NewGitStore(repo) + baselineID := id.MustCheckpointID("abc111abc111") + currentID := id.MustCheckpointID("abc222abc222") + + if err := store.WriteCommitted(ctx, checkpoint.WriteCommittedOptions{ + CheckpointID: baselineID, + SessionID: "checkpoint-token-json-baseline", + Strategy: strategy.StrategyNameManualCommit, + Agent: testAgentGemini, + Transcript: redact.AlreadyRedacted([]byte(`{"type":"user","message":{"content":[{"type":"text","text":"baseline json"}]}}` + "\n")), + AuthorName: "Test", + AuthorEmail: "test@example.com", + TokenUsage: &agent.TokenUsage{ + InputTokens: 100, + CacheReadTokens: 300, + OutputTokens: 100, + APICallCount: 5, + }, + }); err != nil { + t.Fatalf("WriteCommitted() baseline error = %v", err) + } + if err := store.WriteCommitted(ctx, checkpoint.WriteCommittedOptions{ + CheckpointID: currentID, + SessionID: "checkpoint-token-json-current", + Strategy: strategy.StrategyNameManualCommit, + Agent: testAgentGemini, + Transcript: redact.AlreadyRedacted([]byte(`{"type":"user","message":{"content":[{"type":"text","text":"current json"}]}}` + "\n")), + AuthorName: "Test", + AuthorEmail: "test@example.com", + TokenUsage: &agent.TokenUsage{ + InputTokens: 120, + CacheReadTokens: 480, + OutputTokens: 200, + APICallCount: 8, + }, + }); err != nil { + t.Fatalf("WriteCommitted() current error = %v", err) + } + + cmd := newCheckpointGroupCmd() + var stdout bytes.Buffer + cmd.SetOut(&stdout) + cmd.SetArgs([]string{"tokens", "abc222", "--compare", "abc111", "--json"}) + + if err := cmd.ExecuteContext(ctx); err != nil { + t.Fatalf("expected no error, got: %v", err) + } + + var result checkpointTokensReport + if err := json.Unmarshal(stdout.Bytes(), &result); err != nil { + t.Fatalf("expected valid JSON, got parse error: %v\noutput: %s", err, stdout.String()) + } + if result.Comparison == nil { + t.Fatalf("expected comparison, got nil") + } + if result.Comparison.Status != "observed_increase" { + t.Fatalf("expected observed_increase status, got %q", result.Comparison.Status) + } + if result.Comparison.BaselineCheckpointID != "abc111abc111" { + t.Errorf("baseline checkpoint id = %q, want abc111abc111", result.Comparison.BaselineCheckpointID) + } + if result.Comparison.TargetCheckpointID != "abc222abc222" { + t.Errorf("target checkpoint id = %q, want abc222abc222", result.Comparison.TargetCheckpointID) + } + if result.Comparison.Total == nil { + t.Fatalf("expected total delta, got nil") + } + if result.Comparison.Total.Baseline != 500 || result.Comparison.Total.Current != 800 { + t.Fatalf("unexpected total delta: %+v", result.Comparison.Total) + } + if result.Comparison.Total.Change != 300 { + t.Fatalf("expected total change 300, got %+v", result.Comparison.Total) + } + if result.Comparison.Total.Direction != "up" { + t.Fatalf("expected total direction up, got %+v", result.Comparison.Total) + } + if result.Comparison.Total.ChangePercent == nil || *result.Comparison.Total.ChangePercent != 60 { + t.Fatalf("expected total change percent 60, got %+v", result.Comparison.Total) + } +} + +func TestCheckpointTokensCmd_ComparisonNoChange(t *testing.T) { + repo, _ := runExplainAutoTestRepo(t) + ctx := context.Background() + store := checkpoint.NewGitStore(repo) + baselineID := id.MustCheckpointID("111aaa222bbb") + currentID := id.MustCheckpointID("222bbb333ccc") + + writeCommittedTokenCheckpoint(ctx, t, store, baselineID, "checkpoint-token-no-change-baseline", &agent.TokenUsage{ + InputTokens: 100, + OutputTokens: 100, + APICallCount: 2, + }) + writeCommittedTokenCheckpoint(ctx, t, store, currentID, "checkpoint-token-no-change-current", &agent.TokenUsage{ + InputTokens: 100, + OutputTokens: 100, + APICallCount: 2, + }) + + cmd := newCheckpointGroupCmd() + var stdout bytes.Buffer + cmd.SetOut(&stdout) + cmd.SetArgs([]string{"tokens", "222bbb", "--compare", "111aaa"}) + + if err := cmd.ExecuteContext(ctx); err != nil { + t.Fatalf("expected no error, got: %v", err) + } + + out := stdout.String() + checks := []string{ + "Total tokens: unchanged (200 -> 200)", + "Cache/context replay: unchanged (0 -> 0)", + "API calls: unchanged (2 -> 2)", + "Observed token use was unchanged for this checkpoint comparison.", + "Quality still depends on the task outcome", + } + for _, check := range checks { + if !strings.Contains(out, check) { + t.Errorf("expected %q in output, got:\n%s", check, out) + } + } +} + +func TestCheckpointTokensCmd_ComparisonUnavailableWhenBaselineTokenDataMissing(t *testing.T) { + repo, _ := runExplainAutoTestRepo(t) + ctx := context.Background() + store := checkpoint.NewGitStore(repo) + baselineID := id.MustCheckpointID("333ccc444ddd") + currentID := id.MustCheckpointID("444ddd555eee") + + writeCommittedTokenCheckpoint(ctx, t, store, baselineID, "checkpoint-token-missing-baseline", nil) + writeCommittedTokenCheckpoint(ctx, t, store, currentID, "checkpoint-token-current-with-data", &agent.TokenUsage{ + InputTokens: 100, + OutputTokens: 50, + APICallCount: 1, + }) + + cmd := newCheckpointGroupCmd() + var stdout bytes.Buffer + cmd.SetOut(&stdout) + cmd.SetArgs([]string{"tokens", "444ddd", "--compare", "333ccc"}) + + if err := cmd.ExecuteContext(ctx); err != nil { + t.Fatalf("expected no error, got: %v", err) + } + + out := stdout.String() + checks := []string{ + "Comparison", + "Baseline: 333ccc444ddd", + "Qualification", + "Comparison unavailable because token usage is missing for one checkpoint.", + } + for _, check := range checks { + if !strings.Contains(out, check) { + t.Errorf("expected %q in output, got:\n%s", check, out) + } + } + if strings.Contains(out, "Total tokens:") { + t.Fatalf("expected unavailable comparison to omit metric deltas, got:\n%s", out) + } +} + +func TestCheckpointTokensCmd_JSONComparisonUnavailableWhenCurrentTokenDataMissing(t *testing.T) { + repo, _ := runExplainAutoTestRepo(t) + ctx := context.Background() + store := checkpoint.NewGitStore(repo) + baselineID := id.MustCheckpointID("555eee666fff") + currentID := id.MustCheckpointID("666fff777aaa") + + writeCommittedTokenCheckpoint(ctx, t, store, baselineID, "checkpoint-token-baseline-with-data", &agent.TokenUsage{ + InputTokens: 100, + OutputTokens: 50, + APICallCount: 1, + }) + writeCommittedTokenCheckpoint(ctx, t, store, currentID, "checkpoint-token-missing-current", nil) + + cmd := newCheckpointGroupCmd() + var stdout bytes.Buffer + cmd.SetOut(&stdout) + cmd.SetArgs([]string{"tokens", "666fff", "--compare", "555eee", "--json"}) + + if err := cmd.ExecuteContext(ctx); err != nil { + t.Fatalf("expected no error, got: %v", err) + } + + var result checkpointTokensReport + if err := json.Unmarshal(stdout.Bytes(), &result); err != nil { + t.Fatalf("expected valid JSON, got parse error: %v\noutput: %s", err, stdout.String()) + } + if result.Comparison == nil { + t.Fatalf("expected comparison, got nil") + } + if result.Comparison.Status != "unavailable" { + t.Fatalf("expected unavailable status, got %q", result.Comparison.Status) + } + if result.Comparison.Total != nil { + t.Fatalf("expected no total delta when current token data is missing, got %+v", result.Comparison.Total) + } + if len(result.Comparison.Limitations) == 0 { + t.Fatalf("expected comparison limitation, got %+v", result.Comparison) + } +} + +func TestCheckpointTokensCmd_ComparisonUsesMultiSessionAggregates(t *testing.T) { + repo, _ := runExplainAutoTestRepo(t) + ctx := context.Background() + store := checkpoint.NewGitStore(repo) + baselineID := id.MustCheckpointID("777aaa888bbb") + currentID := id.MustCheckpointID("888bbb999ccc") + + writeCommittedTokenCheckpoint(ctx, t, store, baselineID, "checkpoint-token-baseline-one", &agent.TokenUsage{ + InputTokens: 1_000, + APICallCount: 1, + }) + writeCommittedTokenCheckpoint(ctx, t, store, baselineID, "checkpoint-token-baseline-two", &agent.TokenUsage{ + OutputTokens: 1_000, + APICallCount: 1, + }) + writeCommittedTokenCheckpoint(ctx, t, store, currentID, "checkpoint-token-current-one", &agent.TokenUsage{ + InputTokens: 500, + APICallCount: 1, + }) + writeCommittedTokenCheckpoint(ctx, t, store, currentID, "checkpoint-token-current-two", &agent.TokenUsage{ + OutputTokens: 500, + APICallCount: 1, + }) + + cmd := newCheckpointGroupCmd() + var stdout bytes.Buffer + cmd.SetOut(&stdout) + cmd.SetArgs([]string{"tokens", "888bbb", "--compare", "777aaa"}) + + if err := cmd.ExecuteContext(ctx); err != nil { + t.Fatalf("expected no error, got: %v", err) + } + + out := stdout.String() + checks := []string{ + "Sessions: 2", + "Total: 1k tokens", + "Baseline: 777aaa888bbb", + "Total tokens: down 50% (2k -> 1k)", + "API calls: unchanged (2 -> 2)", + } + for _, check := range checks { + if !strings.Contains(out, check) { + t.Errorf("expected %q in output, got:\n%s", check, out) + } + } +} + +func TestCheckpointTokensCmd_ComparisonOmitsPercentWhenBaselineMetricIsZero(t *testing.T) { + repo, _ := runExplainAutoTestRepo(t) + ctx := context.Background() + store := checkpoint.NewGitStore(repo) + baselineID := id.MustCheckpointID("999ccc000aaa") + currentID := id.MustCheckpointID("000aaa111bbb") + + writeCommittedTokenCheckpoint(ctx, t, store, baselineID, "checkpoint-token-zero-api-baseline", &agent.TokenUsage{ + InputTokens: 100, + }) + writeCommittedTokenCheckpoint(ctx, t, store, currentID, "checkpoint-token-zero-api-current", &agent.TokenUsage{ + InputTokens: 100, + APICallCount: 3, + }) + + cmd := newCheckpointGroupCmd() + var stdout bytes.Buffer + cmd.SetOut(&stdout) + cmd.SetArgs([]string{"tokens", "000aaa", "--compare", "999ccc"}) + + if err := cmd.ExecuteContext(ctx); err != nil { + t.Fatalf("expected no error, got: %v", err) + } + + out := stdout.String() + checks := []string{ + "Total tokens: unchanged (100 -> 100)", + "API calls: up (0 -> 3)", + } + for _, check := range checks { + if !strings.Contains(out, check) { + t.Errorf("expected %q in output, got:\n%s", check, out) + } + } + if strings.Contains(out, "API calls: up ") && strings.Contains(out, "API calls: up %") { + t.Fatalf("expected zero-baseline API delta to omit percent, got:\n%s", out) + } +} + +func writeCommittedTokenCheckpoint(ctx context.Context, t *testing.T, store *checkpoint.GitStore, cpID id.CheckpointID, sessionID string, usage *agent.TokenUsage) { + t.Helper() + + if err := store.WriteCommitted(ctx, checkpoint.WriteCommittedOptions{ + CheckpointID: cpID, + SessionID: sessionID, + Strategy: strategy.StrategyNameManualCommit, + Branch: "tokens-compare", + Agent: testAgentClaude, + Transcript: redact.AlreadyRedacted([]byte(`{"type":"user","message":{"content":[{"type":"text","text":"compare"}]}}` + "\n")), + AuthorName: "Test", + AuthorEmail: "test@example.com", + TokenUsage: usage, + }); err != nil { + t.Fatalf("WriteCommitted(%s) error = %v", cpID, err) + } +} + func TestInfoCmd_EndedSession(t *testing.T) { setupStopTestRepo(t) From 83b933062ca236939f58de8fe0ae87dddc29ba7b Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Sat, 13 Jun 2026 18:21:36 +0900 Subject: [PATCH 03/26] add API-only token brief guidance Entire-Checkpoint: 210fe202c7ca --- cmd/entire/cli/session_tokens.go | 2 ++ cmd/entire/cli/sessions_test.go | 41 ++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/cmd/entire/cli/session_tokens.go b/cmd/entire/cli/session_tokens.go index 5b588232e5..caeabf8147 100644 --- a/cmd/entire/cli/session_tokens.go +++ b/cmd/entire/cli/session_tokens.go @@ -462,6 +462,8 @@ func agentBriefNextAction(report sessionTokensReport) string { switch { case hasTokenRecommendation(report, "context-replay-hotspot") && hasTokenRecommendation(report, "api-call-amplification"): return "Summarize the useful findings, then batch the next diagnostic step. Avoid more exploratory reads until you have a narrowed hypothesis." + case hasTokenRecommendation(report, "api-call-amplification"): + return "Batch the next diagnostic step around one narrowed hypothesis before making more tool calls." case hasTokenRecommendation(report, "context-replay-hotspot"): return "Summarize the current useful findings before continuing, and keep the next prompt narrow." case hasTokenRecommendation(report, "no-token-data"): diff --git a/cmd/entire/cli/sessions_test.go b/cmd/entire/cli/sessions_test.go index 9d59ef2b72..f0e05be35d 100644 --- a/cmd/entire/cli/sessions_test.go +++ b/cmd/entire/cli/sessions_test.go @@ -1306,6 +1306,47 @@ func TestTokensCmd_AgentBriefHighCacheReplayWithoutHighAPICalls(t *testing.T) { } } +func TestTokensCmd_AgentBriefHighAPICallsWithoutCacheReplay(t *testing.T) { + setupStopTestRepo(t) + + ctx := context.Background() + state := makeSessionState("test-tokens-brief-api-only", session.PhaseActive) + state.AgentType = testAgentClaude + state.TokenUsage = &agent.TokenUsage{ + InputTokens: 10_000, + OutputTokens: 1_000, + APICallCount: 25, + } + + if err := strategy.SaveSessionState(ctx, state); err != nil { + t.Fatalf("SaveSessionState() error = %v", err) + } + + cmd := newTokensCmd() + var stdout bytes.Buffer + cmd.SetOut(&stdout) + cmd.SetArgs([]string{"test-tokens-brief-api-only", "--agent-brief"}) + + if err := cmd.ExecuteContext(ctx); err != nil { + t.Fatalf("expected no error, got: %v", err) + } + + out := stdout.String() + checks := []string{ + "Token usage: 11k total; 25 API calls.", + "Batch the next diagnostic step around one narrowed hypothesis before making more tool calls.", + "- API call count is high for one session.", + } + for _, check := range checks { + if !strings.Contains(out, check) { + t.Errorf("expected %q in output, got:\n%s", check, out) + } + } + if strings.Contains(out, "Continue normally") { + t.Fatalf("expected high API calls to avoid continue-normally action, got:\n%s", out) + } +} + func TestTokensCmd_AgentBriefNoTokenData(t *testing.T) { setupStopTestRepo(t) From 79f19d06f8fd4c68295ac0887956b3266dc599bb Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Sat, 13 Jun 2026 19:29:37 +0900 Subject: [PATCH 04/26] fix checkpoint comparison test store refs Entire-Checkpoint: abbcd842b704 --- cmd/entire/cli/sessions_test.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cmd/entire/cli/sessions_test.go b/cmd/entire/cli/sessions_test.go index f0e05be35d..b8eda50c62 100644 --- a/cmd/entire/cli/sessions_test.go +++ b/cmd/entire/cli/sessions_test.go @@ -1901,7 +1901,7 @@ func TestAddCheckpointTokenUsageSaturatesOverflow(t *testing.T) { func TestCheckpointTokensCmd_TextOutputWithComparison(t *testing.T) { repo, _ := runExplainAutoTestRepo(t) ctx := context.Background() - store := checkpoint.NewGitStore(repo) + store := checkpoint.NewGitStore(repo, checkpoint.DefaultV1Refs()) baselineID := id.MustCheckpointID("aaa111bbb222") currentID := id.MustCheckpointID("bbb222ccc333") @@ -1976,7 +1976,7 @@ func TestCheckpointTokensCmd_TextOutputWithComparison(t *testing.T) { func TestCheckpointTokensCmd_JSONOutputWithComparison(t *testing.T) { repo, _ := runExplainAutoTestRepo(t) ctx := context.Background() - store := checkpoint.NewGitStore(repo) + store := checkpoint.NewGitStore(repo, checkpoint.DefaultV1Refs()) baselineID := id.MustCheckpointID("abc111abc111") currentID := id.MustCheckpointID("abc222abc222") @@ -2060,7 +2060,7 @@ func TestCheckpointTokensCmd_JSONOutputWithComparison(t *testing.T) { func TestCheckpointTokensCmd_ComparisonNoChange(t *testing.T) { repo, _ := runExplainAutoTestRepo(t) ctx := context.Background() - store := checkpoint.NewGitStore(repo) + store := checkpoint.NewGitStore(repo, checkpoint.DefaultV1Refs()) baselineID := id.MustCheckpointID("111aaa222bbb") currentID := id.MustCheckpointID("222bbb333ccc") @@ -2102,7 +2102,7 @@ func TestCheckpointTokensCmd_ComparisonNoChange(t *testing.T) { func TestCheckpointTokensCmd_ComparisonUnavailableWhenBaselineTokenDataMissing(t *testing.T) { repo, _ := runExplainAutoTestRepo(t) ctx := context.Background() - store := checkpoint.NewGitStore(repo) + store := checkpoint.NewGitStore(repo, checkpoint.DefaultV1Refs()) baselineID := id.MustCheckpointID("333ccc444ddd") currentID := id.MustCheckpointID("444ddd555eee") @@ -2142,7 +2142,7 @@ func TestCheckpointTokensCmd_ComparisonUnavailableWhenBaselineTokenDataMissing(t func TestCheckpointTokensCmd_JSONComparisonUnavailableWhenCurrentTokenDataMissing(t *testing.T) { repo, _ := runExplainAutoTestRepo(t) ctx := context.Background() - store := checkpoint.NewGitStore(repo) + store := checkpoint.NewGitStore(repo, checkpoint.DefaultV1Refs()) baselineID := id.MustCheckpointID("555eee666fff") currentID := id.MustCheckpointID("666fff777aaa") @@ -2183,7 +2183,7 @@ func TestCheckpointTokensCmd_JSONComparisonUnavailableWhenCurrentTokenDataMissin func TestCheckpointTokensCmd_ComparisonUsesMultiSessionAggregates(t *testing.T) { repo, _ := runExplainAutoTestRepo(t) ctx := context.Background() - store := checkpoint.NewGitStore(repo) + store := checkpoint.NewGitStore(repo, checkpoint.DefaultV1Refs()) baselineID := id.MustCheckpointID("777aaa888bbb") currentID := id.MustCheckpointID("888bbb999ccc") @@ -2231,7 +2231,7 @@ func TestCheckpointTokensCmd_ComparisonUsesMultiSessionAggregates(t *testing.T) func TestCheckpointTokensCmd_ComparisonOmitsPercentWhenBaselineMetricIsZero(t *testing.T) { repo, _ := runExplainAutoTestRepo(t) ctx := context.Background() - store := checkpoint.NewGitStore(repo) + store := checkpoint.NewGitStore(repo, checkpoint.DefaultV1Refs()) baselineID := id.MustCheckpointID("999ccc000aaa") currentID := id.MustCheckpointID("000aaa111bbb") From 1735b020f12cbedb2b2784311af049dedc9b2d22 Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Thu, 18 Jun 2026 00:54:42 -0400 Subject: [PATCH 05/26] fix checkpoint token metadata fallbacks Entire-Checkpoint: 7a1b83ebd234 --- cmd/entire/cli/checkpoint_tokens.go | 5 ++- cmd/entire/cli/review_context.go | 4 +- cmd/entire/cli/sessions_test.go | 66 +++++++++++++++++++++++++++++ 3 files changed, 72 insertions(+), 3 deletions(-) diff --git a/cmd/entire/cli/checkpoint_tokens.go b/cmd/entire/cli/checkpoint_tokens.go index 755a0df47c..3145d2c3c0 100644 --- a/cmd/entire/cli/checkpoint_tokens.go +++ b/cmd/entire/cli/checkpoint_tokens.go @@ -137,10 +137,13 @@ func loadCheckpointTokensReport(ctx context.Context, cmd *cobra.Command, checkpo return buildCheckpointTokensReport(cpID, summary, metas, metadataWarnings), lookup, nil } -func readCheckpointTokenSessionMetadata(ctx context.Context, store checkpoint.CommittedListReader, cpID id.CheckpointID, sessionCount int) ([]*checkpoint.CommittedMetadata, int, error) { +func readCheckpointTokenSessionMetadata(ctx context.Context, store checkpointSessionMetadataReader, cpID id.CheckpointID, sessionCount int) ([]*checkpoint.CommittedMetadata, int, error) { metas := make([]*checkpoint.CommittedMetadata, 0, sessionCount) var warnings int for i := range sessionCount { + if ctxErr := ctx.Err(); ctxErr != nil { + return nil, warnings, ctxErr //nolint:wrapcheck // Propagating context cancellation. + } meta, err := store.ReadSessionMetadata(ctx, cpID, i) if err != nil { if ctxErr := ctx.Err(); ctxErr != nil { diff --git a/cmd/entire/cli/review_context.go b/cmd/entire/cli/review_context.go index c98888bee6..70bf7a5b8b 100644 --- a/cmd/entire/cli/review_context.go +++ b/cmd/entire/cli/review_context.go @@ -28,7 +28,7 @@ const ( reviewContextCommitSeparator = "\x1e" ) -type reviewContextSessionMetadataReader interface { +type checkpointSessionMetadataReader interface { ReadSessionMetadata(ctx context.Context, checkpointID checkpointid.CheckpointID, sessionIndex int) (*checkpoint.CommittedMetadata, error) } @@ -311,7 +311,7 @@ func readReviewContextSessionMetadata( cpID checkpointid.CheckpointID, sessionIndex int, ) (*checkpoint.CommittedMetadata, error) { - if r, ok := reader.(reviewContextSessionMetadataReader); ok { + if r, ok := reader.(checkpointSessionMetadataReader); ok { return r.ReadSessionMetadata(ctx, cpID, sessionIndex) //nolint:wrapcheck // Best-effort prompt context. } content, err := reader.ReadSessionContent(ctx, cpID, sessionIndex) diff --git a/cmd/entire/cli/sessions_test.go b/cmd/entire/cli/sessions_test.go index b8eda50c62..42d836e2ac 100644 --- a/cmd/entire/cli/sessions_test.go +++ b/cmd/entire/cli/sessions_test.go @@ -1898,6 +1898,72 @@ func TestAddCheckpointTokenUsageSaturatesOverflow(t *testing.T) { } } +func TestCheckpointTokensReport_UsesRootSummaryWhenNoSessionMetadataReadable(t *testing.T) { + t.Parallel() + + cpID := id.MustCheckpointID("abc123def456") + report := buildCheckpointTokensReport( + cpID, + &checkpoint.CheckpointSummary{ + CheckpointID: cpID, + Sessions: []checkpoint.SessionFilePaths{ + {Metadata: "0/metadata.json"}, + {Metadata: "1/metadata.json"}, + }, + TokenUsage: &agent.TokenUsage{ + InputTokens: 1000, + OutputTokens: 500, + APICallCount: 7, + }, + }, + nil, + 2, + ) + + if report.Tokens == nil { + t.Fatalf("expected token data, got nil") + } + if report.Tokens.Total != 1500 || report.Tokens.APICalls != 7 { + t.Fatalf("expected root summary tokens, got %+v", report.Tokens) + } +} + +type cancelingCheckpointMetadataReader struct { + cancel context.CancelFunc + calls int +} + +func (r *cancelingCheckpointMetadataReader) ReadSessionMetadata( + _ context.Context, + _ id.CheckpointID, + _ int, +) (*checkpoint.CommittedMetadata, error) { + r.calls++ + if r.calls == 1 { + r.cancel() + return &checkpoint.CommittedMetadata{SessionID: "read-before-cancel"}, nil + } + return &checkpoint.CommittedMetadata{SessionID: "read-after-cancel"}, nil +} + +func TestReadCheckpointTokenSessionMetadataStopsBetweenReadsWhenContextCanceled(t *testing.T) { + t.Parallel() + + ctx, cancel := context.WithCancel(context.Background()) + reader := &cancelingCheckpointMetadataReader{cancel: cancel} + + metas, warnings, err := readCheckpointTokenSessionMetadata(ctx, reader, id.MustCheckpointID("abc123abc123"), 2) + if !errors.Is(err, context.Canceled) { + t.Fatalf("expected context.Canceled, got metas=%+v warnings=%d err=%v", metas, warnings, err) + } + if reader.calls != 1 { + t.Fatalf("expected one metadata read before cancellation, got %d", reader.calls) + } + if metas != nil || warnings != 0 { + t.Fatalf("expected canceled read to return no partial results, got metas=%+v warnings=%d", metas, warnings) + } +} + func TestCheckpointTokensCmd_TextOutputWithComparison(t *testing.T) { repo, _ := runExplainAutoTestRepo(t) ctx := context.Background() From 0828cf518a072ffb4b2d8823b7efae09bf8e655a Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Thu, 18 Jun 2026 18:49:55 -0400 Subject: [PATCH 06/26] clamp checkpoint token delta overflow Entire-Checkpoint: 70741de008a8 --- cmd/entire/cli/checkpoint_tokens.go | 39 +++++++++++++++++++++++------ cmd/entire/cli/sessions_test.go | 25 +++++++++++++++++- 2 files changed, 55 insertions(+), 9 deletions(-) diff --git a/cmd/entire/cli/checkpoint_tokens.go b/cmd/entire/cli/checkpoint_tokens.go index 3145d2c3c0..e18d7ec71f 100644 --- a/cmd/entire/cli/checkpoint_tokens.go +++ b/cmd/entire/cli/checkpoint_tokens.go @@ -56,6 +56,10 @@ const ( checkpointComparisonStatusObservedReduction = "observed_reduction" checkpointComparisonStatusObservedIncrease = "observed_increase" checkpointComparisonStatusObservedNoChange = "observed_no_change" + + checkpointDeltaDirectionDown = "down" + checkpointDeltaDirectionUp = "up" + checkpointDeltaDirectionUnchanged = "unchanged" ) func newCheckpointTokensCmd() *cobra.Command { @@ -394,11 +398,12 @@ func buildCheckpointTokensComparison(target, baseline checkpointTokensReport) *c } func buildCheckpointMetricDelta(baseline, current int) *checkpointTokensMetricDelta { + change := saturatingIntSub(current, baseline) delta := &checkpointTokensMetricDelta{ Baseline: baseline, Current: current, - Change: current - baseline, - Direction: checkpointDeltaDirection(current - baseline), + Change: change, + Direction: checkpointDeltaDirection(change), } if baseline != 0 { percent := float64(delta.Change) * 100 / float64(baseline) @@ -407,14 +412,32 @@ func buildCheckpointMetricDelta(baseline, current int) *checkpointTokensMetricDe return delta } +func saturatingIntSub(a, b int) int { + if b < 0 && a > maxInt()+b { + return maxInt() + } + if b > 0 && a < minInt()+b { + return minInt() + } + return a - b +} + +func maxInt() int { + return int(^uint(0) >> 1) +} + +func minInt() int { + return -maxInt() - 1 +} + func checkpointDeltaDirection(change int) string { switch { case change < 0: - return "down" + return checkpointDeltaDirectionDown case change > 0: - return "up" + return checkpointDeltaDirectionUp default: - return "unchanged" + return checkpointDeltaDirectionUnchanged } } @@ -423,9 +446,9 @@ func checkpointComparisonStatus(total *checkpointTokensMetricDelta) string { return checkpointComparisonStatusUnavailable } switch total.Direction { - case "down": + case checkpointDeltaDirectionDown: return checkpointComparisonStatusObservedReduction - case "up": + case checkpointDeltaDirectionUp: return checkpointComparisonStatusObservedIncrease default: return checkpointComparisonStatusObservedNoChange @@ -515,7 +538,7 @@ func formatCheckpointMetricDelta(delta *checkpointTokensMetricDelta, formatValue } from := formatValue(delta.Baseline) to := formatValue(delta.Current) - if delta.Direction == "unchanged" { + if delta.Direction == checkpointDeltaDirectionUnchanged { return fmt.Sprintf("unchanged (%s -> %s)", from, to) } if delta.ChangePercent == nil { diff --git a/cmd/entire/cli/sessions_test.go b/cmd/entire/cli/sessions_test.go index 42d836e2ac..3a597a6c95 100644 --- a/cmd/entire/cli/sessions_test.go +++ b/cmd/entire/cli/sessions_test.go @@ -2115,7 +2115,7 @@ func TestCheckpointTokensCmd_JSONOutputWithComparison(t *testing.T) { if result.Comparison.Total.Change != 300 { t.Fatalf("expected total change 300, got %+v", result.Comparison.Total) } - if result.Comparison.Total.Direction != "up" { + if result.Comparison.Total.Direction != checkpointDeltaDirectionUp { t.Fatalf("expected total direction up, got %+v", result.Comparison.Total) } if result.Comparison.Total.ChangePercent == nil || *result.Comparison.Total.ChangePercent != 60 { @@ -2165,6 +2165,29 @@ func TestCheckpointTokensCmd_ComparisonNoChange(t *testing.T) { } } +func TestBuildCheckpointMetricDeltaClampsChangeOverflow(t *testing.T) { + t.Parallel() + + maxInt := int(^uint(0) >> 1) + minInt := -maxInt - 1 + + up := buildCheckpointMetricDelta(minInt, maxInt) + if up.Change != maxInt { + t.Fatalf("upward overflow change = %d, want %d", up.Change, maxInt) + } + if up.Direction != checkpointDeltaDirectionUp { + t.Fatalf("upward overflow direction = %q, want up", up.Direction) + } + + down := buildCheckpointMetricDelta(maxInt, minInt) + if down.Change != minInt { + t.Fatalf("downward overflow change = %d, want %d", down.Change, minInt) + } + if down.Direction != checkpointDeltaDirectionDown { + t.Fatalf("downward overflow direction = %q, want down", down.Direction) + } +} + func TestCheckpointTokensCmd_ComparisonUnavailableWhenBaselineTokenDataMissing(t *testing.T) { repo, _ := runExplainAutoTestRepo(t) ctx := context.Background() From 589d5b026f622b6bf74d35edc99720d8e2b76a6a Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Mon, 22 Jun 2026 17:00:18 -0400 Subject: [PATCH 07/26] clarify token delta overflow guard Entire-Checkpoint: 65d01316b402 --- cmd/entire/cli/checkpoint_tokens.go | 12 ++++++-- cmd/entire/cli/sessions_test.go | 44 +++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 2 deletions(-) diff --git a/cmd/entire/cli/checkpoint_tokens.go b/cmd/entire/cli/checkpoint_tokens.go index e18d7ec71f..134fc97086 100644 --- a/cmd/entire/cli/checkpoint_tokens.go +++ b/cmd/entire/cli/checkpoint_tokens.go @@ -413,8 +413,16 @@ func buildCheckpointMetricDelta(baseline, current int) *checkpointTokensMetricDe } func saturatingIntSub(a, b int) int { - if b < 0 && a > maxInt()+b { - return maxInt() + if b < 0 { + if b == minInt() { + if a >= 0 { + return maxInt() + } + return a - b + } + if a > maxInt()-(-b) { + return maxInt() + } } if b > 0 && a < minInt()+b { return minInt() diff --git a/cmd/entire/cli/sessions_test.go b/cmd/entire/cli/sessions_test.go index 3a597a6c95..2cf0b3805f 100644 --- a/cmd/entire/cli/sessions_test.go +++ b/cmd/entire/cli/sessions_test.go @@ -2188,6 +2188,50 @@ func TestBuildCheckpointMetricDeltaClampsChangeOverflow(t *testing.T) { } } +func TestSaturatingIntSubHandlesMinIntSubtrahend(t *testing.T) { + t.Parallel() + + maxInt := int(^uint(0) >> 1) + minInt := -maxInt - 1 + + tests := []struct { + name string + a int + want int + }{ + { + name: "clamps non-negative minuend", + a: 0, + want: maxInt, + }, + { + name: "keeps max exact result", + a: -1, + want: maxInt, + }, + { + name: "keeps representable result", + a: -2, + want: maxInt - 1, + }, + { + name: "keeps zero exact result", + a: minInt, + want: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + if got := saturatingIntSub(tt.a, minInt); got != tt.want { + t.Fatalf("saturatingIntSub(%d, minInt) = %d, want %d", tt.a, got, tt.want) + } + }) + } +} + func TestCheckpointTokensCmd_ComparisonUnavailableWhenBaselineTokenDataMissing(t *testing.T) { repo, _ := runExplainAutoTestRepo(t) ctx := context.Background() From 7fcdff6adc3ba79c0ce3f76a633f98d2f83bdb37 Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Mon, 22 Jun 2026 17:59:10 -0400 Subject: [PATCH 08/26] avoid checkpoint delta percent overflow Entire-Checkpoint: 84e842d92745 --- cmd/entire/cli/checkpoint_tokens.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/entire/cli/checkpoint_tokens.go b/cmd/entire/cli/checkpoint_tokens.go index 134fc97086..08b6420811 100644 --- a/cmd/entire/cli/checkpoint_tokens.go +++ b/cmd/entire/cli/checkpoint_tokens.go @@ -406,7 +406,7 @@ func buildCheckpointMetricDelta(baseline, current int) *checkpointTokensMetricDe Direction: checkpointDeltaDirection(change), } if baseline != 0 { - percent := float64(delta.Change) * 100 / float64(baseline) + percent := (float64(delta.Change) / float64(baseline)) * 100 delta.ChangePercent = &percent } return delta From dd4bc64008fcf342f27d576b6f0198062f2ece2c Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Tue, 23 Jun 2026 10:31:56 -0400 Subject: [PATCH 09/26] Clarify token guidance heuristics --- cmd/entire/cli/checkpoint_tokens.go | 18 ++++++++++++--- cmd/entire/cli/session_tokens.go | 22 ++++++++++++++----- cmd/entire/cli/sessions_test.go | 34 +++++++++++++++++++++++++++-- 3 files changed, 63 insertions(+), 11 deletions(-) diff --git a/cmd/entire/cli/checkpoint_tokens.go b/cmd/entire/cli/checkpoint_tokens.go index 08b6420811..4edab66ea1 100644 --- a/cmd/entire/cli/checkpoint_tokens.go +++ b/cmd/entire/cli/checkpoint_tokens.go @@ -39,6 +39,7 @@ type checkpointTokensComparison struct { Total *checkpointTokensMetricDelta `json:"total,omitempty"` CacheRead *checkpointTokensMetricDelta `json:"cache_read,omitempty"` APICalls *checkpointTokensMetricDelta `json:"api_calls,omitempty"` + CacheReadCaveat string `json:"cache_read_caveat,omitempty"` Qualification string `json:"qualification"` Limitations []string `json:"limitations,omitempty"` } @@ -392,6 +393,7 @@ func buildCheckpointTokensComparison(target, baseline checkpointTokensReport) *c comparison.Total = buildCheckpointMetricDelta(baseline.Tokens.Total, target.Tokens.Total) comparison.CacheRead = buildCheckpointMetricDelta(baseline.Tokens.CacheRead, target.Tokens.CacheRead) comparison.APICalls = buildCheckpointMetricDelta(baseline.Tokens.APICalls, target.Tokens.APICalls) + comparison.CacheReadCaveat = checkpointComparisonCacheReadCaveat(comparison.CacheRead) comparison.Status = checkpointComparisonStatus(comparison.Total) comparison.Qualification = checkpointComparisonQualification(comparison.Status) return comparison @@ -466,16 +468,23 @@ func checkpointComparisonStatus(total *checkpointTokensMetricDelta) string { func checkpointComparisonQualification(status string) string { switch status { case checkpointComparisonStatusObservedReduction: - return "Observed token use decreased for this checkpoint comparison. This does not prove quality was preserved; verify the task outcome or tests before treating it as a successful optimization." + return "Observed total token use decreased for this checkpoint comparison. This does not prove quality was preserved; verify the task outcome or tests before treating it as a successful optimization." case checkpointComparisonStatusObservedIncrease: - return "Observed token use increased for this checkpoint comparison. Check whether the extra context was necessary before treating it as waste." + return "Observed total token use increased for this checkpoint comparison. Check whether the extra context was necessary before treating it as waste." case checkpointComparisonStatusObservedNoChange: - return "Observed token use was unchanged for this checkpoint comparison. Quality still depends on the task outcome, not token totals alone." + return "Observed total token use was unchanged for this checkpoint comparison. Quality still depends on the task outcome, not token totals alone." default: return "Comparison unavailable because token usage is missing for one checkpoint." } } +func checkpointComparisonCacheReadCaveat(delta *checkpointTokensMetricDelta) string { + if delta == nil || (delta.Baseline == 0 && delta.Current == 0) { + return "" + } + return "Total tokens include cache/context replay; use the cache/context replay delta below before treating total direction as work saved or added." +} + func writeCheckpointTokensJSON(w io.Writer, report checkpointTokensReport) error { enc := json.NewEncoder(w) enc.SetIndent("", " ") @@ -530,6 +539,9 @@ func writeCheckpointTokenComparison(w io.Writer, comparison *checkpointTokensCom fmt.Fprintln(w) fmt.Fprintln(w, "Comparison") fmt.Fprintf(w, "Baseline: %s\n", comparison.BaselineCheckpointID) + if comparison.CacheReadCaveat != "" { + fmt.Fprintf(w, "Caveat: %s\n", comparison.CacheReadCaveat) + } if comparison.Status != checkpointComparisonStatusUnavailable { fmt.Fprintf(w, "Total tokens: %s\n", formatCheckpointMetricDelta(comparison.Total, formatTokenCount)) fmt.Fprintf(w, "Cache/context replay: %s\n", formatCheckpointMetricDelta(comparison.CacheRead, formatTokenCount)) diff --git a/cmd/entire/cli/session_tokens.go b/cmd/entire/cli/session_tokens.go index caeabf8147..f4f6e72b80 100644 --- a/cmd/entire/cli/session_tokens.go +++ b/cmd/entire/cli/session_tokens.go @@ -66,6 +66,16 @@ type tokenRecommendationSignals struct { CheckpointCount int } +// Recommendation thresholds are coarse diagnostics for clear token hotspots, not a cost model or quality verdict. +const ( + recommendationHighCacheReadPercent = 80 + recommendationHighAPICalls = 20 + recommendationSubagentShareDenominator = 10 + recommendationHighContextPercent = 80 + recommendationLongSessionTurns = 10 + recommendationLongSessionCheckpoints = 5 +) + func newTokensCmd() *cobra.Command { var jsonFlag bool var currentFlag bool @@ -285,7 +295,7 @@ func recommendationRules(signals tokenRecommendationSignals) []sessionTokensReco cacheReadHotspot := false if signals.Tokens != nil && signals.Tokens.CacheRead > 0 { cacheReadPercent := tokenPercent(signals.Tokens.CacheRead, topLevelSessionTokenTotal(signals.Tokens)) - if cacheReadPercent >= 80 { + if cacheReadPercent >= recommendationHighCacheReadPercent { cacheReadHotspot = true recs = append(recs, sessionTokensRecommendation{ ID: "context-replay-hotspot", @@ -298,7 +308,7 @@ func recommendationRules(signals tokenRecommendationSignals) []sessionTokensReco }) } } - if signals.Tokens != nil && signals.Tokens.APICalls >= 20 { + if signals.Tokens != nil && signals.Tokens.APICalls >= recommendationHighAPICalls { message := fmt.Sprintf("API call count is high for one session: %d calls. Batch the next diagnosis and reduce iterative calls.", signals.Tokens.APICalls) if cacheReadHotspot { message = fmt.Sprintf("Large context was replayed across %d API calls; batch the next diagnosis and reduce iterative tool calls.", signals.Tokens.APICalls) @@ -318,7 +328,7 @@ func recommendationRules(signals tokenRecommendationSignals) []sessionTokensReco Signals: []string{"subagent_tokens"}, }) } - if signals.Context != nil && signals.Context.Percent >= 80 { + if signals.Context != nil && signals.Context.Percent >= recommendationHighContextPercent { recs = append(recs, sessionTokensRecommendation{ ID: "high-context-pressure", Severity: "medium", @@ -326,7 +336,7 @@ func recommendationRules(signals tokenRecommendationSignals) []sessionTokensReco Signals: []string{"context_tokens"}, }) } - if cacheReadHotspot && signals.Tokens != nil && signals.Tokens.APICalls >= 20 { + if cacheReadHotspot && signals.Tokens != nil && signals.Tokens.APICalls >= recommendationHighAPICalls { recs = append(recs, sessionTokensRecommendation{ ID: "summarize-before-boundary", Severity: "low", @@ -334,7 +344,7 @@ func recommendationRules(signals tokenRecommendationSignals) []sessionTokensReco Signals: []string{"cache_read_tokens", "api_call_count"}, }) } - if signals.TurnCount >= 10 || signals.CheckpointCount >= 5 { + if signals.TurnCount >= recommendationLongSessionTurns || signals.CheckpointCount >= recommendationLongSessionCheckpoints { recs = append(recs, sessionTokensRecommendation{ ID: "long-session", Severity: "low", @@ -350,7 +360,7 @@ func tokenShareAtLeastOneTenth(part, total int) bool { if part <= 0 || total <= 0 { return false } - return part >= (total-1)/10+1 + return part >= (total-1)/recommendationSubagentShareDenominator+1 } func tokenPercent(value, total int) float64 { diff --git a/cmd/entire/cli/sessions_test.go b/cmd/entire/cli/sessions_test.go index 2cf0b3805f..f4babc2f56 100644 --- a/cmd/entire/cli/sessions_test.go +++ b/cmd/entire/cli/sessions_test.go @@ -1095,6 +1095,32 @@ func TestRecommendationRulesCacheReplayUsesTopLevelTokenTotal(t *testing.T) { } } +func TestRecommendationThresholdsDocumentCurrentHeuristics(t *testing.T) { + t.Parallel() + + checks := map[string]int{ + "cache read hotspot percent": recommendationHighCacheReadPercent, + "high API calls": recommendationHighAPICalls, + "subagent share denominator": recommendationSubagentShareDenominator, + "high context percent": recommendationHighContextPercent, + "long session turns": recommendationLongSessionTurns, + "long checkpoint count": recommendationLongSessionCheckpoints, + } + want := map[string]int{ + "cache read hotspot percent": 80, + "high API calls": 20, + "subagent share denominator": 10, + "high context percent": 80, + "long session turns": 10, + "long checkpoint count": 5, + } + for name, got := range checks { + if got != want[name] { + t.Fatalf("%s = %d, want %d", name, got, want[name]) + } + } +} + func TestTokensCmd_JSONOutputReportsLimitations(t *testing.T) { setupStopTestRepo(t) @@ -2025,11 +2051,12 @@ func TestCheckpointTokensCmd_TextOutputWithComparison(t *testing.T) { "Total: 500k tokens", "Comparison", "Baseline: aaa111bbb222", + "Caveat: Total tokens include cache/context replay; use the cache/context replay delta below before treating total direction as work saved or added.", "Total tokens: down 50% (1000k -> 500k)", "Cache/context replay: down 60% (750k -> 300k)", "API calls: down 60% (10 -> 4)", "Qualification", - "Observed token use decreased for this checkpoint comparison.", + "Observed total token use decreased for this checkpoint comparison.", "This does not prove quality was preserved", } for _, check := range checks { @@ -2121,6 +2148,9 @@ func TestCheckpointTokensCmd_JSONOutputWithComparison(t *testing.T) { if result.Comparison.Total.ChangePercent == nil || *result.Comparison.Total.ChangePercent != 60 { t.Fatalf("expected total change percent 60, got %+v", result.Comparison.Total) } + if result.Comparison.CacheReadCaveat == "" { + t.Fatalf("expected cache read caveat, got %+v", result.Comparison) + } } func TestCheckpointTokensCmd_ComparisonNoChange(t *testing.T) { @@ -2155,7 +2185,7 @@ func TestCheckpointTokensCmd_ComparisonNoChange(t *testing.T) { "Total tokens: unchanged (200 -> 200)", "Cache/context replay: unchanged (0 -> 0)", "API calls: unchanged (2 -> 2)", - "Observed token use was unchanged for this checkpoint comparison.", + "Observed total token use was unchanged for this checkpoint comparison.", "Quality still depends on the task outcome", } for _, check := range checks { From 54766c1ef70474f1497a6b77d7a1228a88c7fad1 Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Tue, 23 Jun 2026 12:59:41 -0400 Subject: [PATCH 10/26] Align agent brief cache replay scope --- cmd/entire/cli/session_tokens.go | 2 +- cmd/entire/cli/sessions_test.go | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/cmd/entire/cli/session_tokens.go b/cmd/entire/cli/session_tokens.go index f4f6e72b80..212b009d5b 100644 --- a/cmd/entire/cli/session_tokens.go +++ b/cmd/entire/cli/session_tokens.go @@ -454,7 +454,7 @@ func agentBriefUsageLine(tokens *sessionTokensUsage) string { return fmt.Sprintf( "Token usage: %s total; %s cache/context replay; %s.", formatTokenCount(tokens.Total), - formatPercent(tokenPercent(tokens.CacheRead, tokens.Total)), + formatPercent(tokenPercent(tokens.CacheRead, topLevelSessionTokenTotal(tokens))), formatAPICalls(tokens.APICalls), ) } diff --git a/cmd/entire/cli/sessions_test.go b/cmd/entire/cli/sessions_test.go index f4babc2f56..be956dbadc 100644 --- a/cmd/entire/cli/sessions_test.go +++ b/cmd/entire/cli/sessions_test.go @@ -1290,6 +1290,25 @@ func TestTokensCmd_AgentBriefPrioritizesNextAction(t *testing.T) { } } +func TestAgentBriefUsageLineUsesTopLevelCacheReplayTotal(t *testing.T) { + t.Parallel() + + line := agentBriefUsageLine(&sessionTokensUsage{ + Total: 10000, + Input: 100, + CacheRead: 800, + CacheWrite: 50, + Output: 50, + APICalls: 20, + SubagentTotal: 9000, + }) + + want := "Token usage: 10k total; 80% cache/context replay; 20 API calls." + if line != want { + t.Fatalf("agentBriefUsageLine() = %q, want %q", line, want) + } +} + func TestTokensCmd_AgentBriefHighCacheReplayWithoutHighAPICalls(t *testing.T) { setupStopTestRepo(t) From 1531c238178e802c7242c9393c1ca68083e9acad Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Tue, 23 Jun 2026 13:04:47 -0400 Subject: [PATCH 11/26] Check cancellation before token metadata reads --- cmd/entire/cli/checkpoint_tokens.go | 3 +++ cmd/entire/cli/sessions_test.go | 15 +++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/cmd/entire/cli/checkpoint_tokens.go b/cmd/entire/cli/checkpoint_tokens.go index 4edab66ea1..eaa32bfd82 100644 --- a/cmd/entire/cli/checkpoint_tokens.go +++ b/cmd/entire/cli/checkpoint_tokens.go @@ -143,6 +143,9 @@ func loadCheckpointTokensReport(ctx context.Context, cmd *cobra.Command, checkpo } func readCheckpointTokenSessionMetadata(ctx context.Context, store checkpointSessionMetadataReader, cpID id.CheckpointID, sessionCount int) ([]*checkpoint.CommittedMetadata, int, error) { + if ctxErr := ctx.Err(); ctxErr != nil { + return nil, 0, ctxErr //nolint:wrapcheck // Propagating context cancellation. + } metas := make([]*checkpoint.CommittedMetadata, 0, sessionCount) var warnings int for i := range sessionCount { diff --git a/cmd/entire/cli/sessions_test.go b/cmd/entire/cli/sessions_test.go index be956dbadc..69ff5bad40 100644 --- a/cmd/entire/cli/sessions_test.go +++ b/cmd/entire/cli/sessions_test.go @@ -2009,6 +2009,21 @@ func TestReadCheckpointTokenSessionMetadataStopsBetweenReadsWhenContextCanceled( } } +func TestReadCheckpointTokenSessionMetadataChecksCanceledContextBeforeAllocation(t *testing.T) { + t.Parallel() + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + metas, warnings, err := readCheckpointTokenSessionMetadata(ctx, nil, id.MustCheckpointID("abc123abc123"), 0) + if !errors.Is(err, context.Canceled) { + t.Fatalf("expected context.Canceled, got metas=%+v warnings=%d err=%v", metas, warnings, err) + } + if metas != nil || warnings != 0 { + t.Fatalf("expected canceled read to return no results, got metas=%+v warnings=%d", metas, warnings) + } +} + func TestCheckpointTokensCmd_TextOutputWithComparison(t *testing.T) { repo, _ := runExplainAutoTestRepo(t) ctx := context.Background() From b11488c2d95122395003271e05c01a59d75a39f7 Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Tue, 23 Jun 2026 13:40:14 -0400 Subject: [PATCH 12/26] Use checkpoint summary when metadata is partial --- cmd/entire/cli/checkpoint_tokens.go | 2 +- cmd/entire/cli/sessions_test.go | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cmd/entire/cli/checkpoint_tokens.go b/cmd/entire/cli/checkpoint_tokens.go index eaa32bfd82..83a424c142 100644 --- a/cmd/entire/cli/checkpoint_tokens.go +++ b/cmd/entire/cli/checkpoint_tokens.go @@ -197,7 +197,7 @@ func buildCheckpointTokensReport(cpID id.CheckpointID, summary *checkpoint.Check } usage := aggregateCheckpointTokenUsage(metas) - if usage == nil && summary != nil { + if summary != nil && summary.TokenUsage != nil && (usage == nil || metadataWarnings > 0) { usage = summary.TokenUsage } if tokens := buildSessionTokensUsage(usage); tokens != nil { diff --git a/cmd/entire/cli/sessions_test.go b/cmd/entire/cli/sessions_test.go index 69ff5bad40..8a4940eb5f 100644 --- a/cmd/entire/cli/sessions_test.go +++ b/cmd/entire/cli/sessions_test.go @@ -1864,7 +1864,7 @@ func TestCheckpointTokensCmd_JSONOutput(t *testing.T) { } } -func TestCheckpointTokensReport_UsesReadableMetadataWhenSessionMetadataIncomplete(t *testing.T) { +func TestCheckpointTokensReport_UsesRootSummaryWhenSessionMetadataIncomplete(t *testing.T) { t.Parallel() cpID := id.MustCheckpointID("abc123abc123") @@ -1896,8 +1896,8 @@ func TestCheckpointTokensReport_UsesReadableMetadataWhenSessionMetadataIncomplet if report.Tokens == nil { t.Fatalf("expected token data, got nil") } - if report.Tokens.Total != 100 { - t.Fatalf("expected readable session metadata total 100, got %+v", report.Tokens) + if report.Tokens.Total != 1500 || report.Tokens.APICalls != 7 { + t.Fatalf("expected root summary tokens, got %+v", report.Tokens) } if len(report.Limitations) == 0 || !strings.Contains(report.Limitations[0], "1 checkpoint session metadata file could not be read") { t.Fatalf("expected incomplete metadata limitation, got %+v", report.Limitations) From 39be4214fffa53c02ce275c7c4acff7c2b6b1228 Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Tue, 23 Jun 2026 13:53:11 -0400 Subject: [PATCH 13/26] Avoid singular checkpoint metadata on partial reads --- cmd/entire/cli/checkpoint_tokens.go | 2 +- cmd/entire/cli/sessions_test.go | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/cmd/entire/cli/checkpoint_tokens.go b/cmd/entire/cli/checkpoint_tokens.go index 83a424c142..8c6e9bdcb7 100644 --- a/cmd/entire/cli/checkpoint_tokens.go +++ b/cmd/entire/cli/checkpoint_tokens.go @@ -180,7 +180,7 @@ func buildCheckpointTokensReport(cpID id.CheckpointID, summary *checkpoint.Check report.Agents = checkpointAgentLabels(metas) report.Models = checkpointModelLabels(metas) - if len(metas) == 1 && metas[0] != nil { + if report.SessionCount == 1 && len(metas) == 1 && metas[0] != nil { meta := metas[0] report.SessionID = meta.SessionID if len(report.Agents) > 0 { diff --git a/cmd/entire/cli/sessions_test.go b/cmd/entire/cli/sessions_test.go index 8a4940eb5f..cd6b944ef5 100644 --- a/cmd/entire/cli/sessions_test.go +++ b/cmd/entire/cli/sessions_test.go @@ -1885,6 +1885,8 @@ func TestCheckpointTokensReport_UsesRootSummaryWhenSessionMetadataIncomplete(t * []*checkpoint.CommittedMetadata{ { SessionID: "readable-session", + Agent: "Claude Code", + Model: "claude-opus-4-6", TokenUsage: &agent.TokenUsage{ InputTokens: 100, }, @@ -1899,6 +1901,9 @@ func TestCheckpointTokensReport_UsesRootSummaryWhenSessionMetadataIncomplete(t * if report.Tokens.Total != 1500 || report.Tokens.APICalls != 7 { t.Fatalf("expected root summary tokens, got %+v", report.Tokens) } + if report.SessionID != "" || report.Agent != "" || report.Model != "" { + t.Fatalf("expected multi-session checkpoint to omit singular session fields, got session_id=%q agent=%q model=%q", report.SessionID, report.Agent, report.Model) + } if len(report.Limitations) == 0 || !strings.Contains(report.Limitations[0], "1 checkpoint session metadata file could not be read") { t.Fatalf("expected incomplete metadata limitation, got %+v", report.Limitations) } From 0d55f5545e1dea2b66e988d4e93cf83043e3be5d Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Thu, 11 Jun 2026 15:18:04 +0900 Subject: [PATCH 14/26] add token profile diagnostics Entire-Checkpoint: cdebea3708fd --- cmd/entire/cli/root.go | 1 + cmd/entire/cli/tokens_profile.go | 413 ++++++++++++++++++++++++++ cmd/entire/cli/tokens_profile_test.go | 231 ++++++++++++++ 3 files changed, 645 insertions(+) create mode 100644 cmd/entire/cli/tokens_profile.go create mode 100644 cmd/entire/cli/tokens_profile_test.go diff --git a/cmd/entire/cli/root.go b/cmd/entire/cli/root.go index b4c26447b5..7145ae14e9 100644 --- a/cmd/entire/cli/root.go +++ b/cmd/entire/cli/root.go @@ -84,6 +84,7 @@ func NewRootCmd() *cobra.Command { // Noun groups (canonical homes for subcommands). cmd.AddCommand(newSessionsCmd()) // 'session' (with 'sessions' as Cobra alias) cmd.AddCommand(newCheckpointGroupCmd()) // 'checkpoint' / 'cp' / 'checkpoints' + cmd.AddCommand(newTokensGroupCmd()) // 'tokens' cmd.AddCommand(newAgentGroupCmd()) // 'agent' cmd.AddCommand(newAuthCmd()) // 'auth' cmd.AddCommand(newDoctorCmd()) // 'doctor' (group: trace/logs/bundle) diff --git a/cmd/entire/cli/tokens_profile.go b/cmd/entire/cli/tokens_profile.go new file mode 100644 index 0000000000..7f6a358fbf --- /dev/null +++ b/cmd/entire/cli/tokens_profile.go @@ -0,0 +1,413 @@ +package cli + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "io" + + "github.com/entireio/cli/cmd/entire/cli/agent" + "github.com/entireio/cli/cmd/entire/cli/checkpoint" + "github.com/entireio/cli/cmd/entire/cli/checkpoint/id" + "github.com/spf13/cobra" +) + +type tokensProfileReport struct { + Source string `json:"source"` + CheckpointsAvailable int `json:"checkpoints_available"` + CheckpointsAnalyzed int `json:"checkpoints_analyzed"` + CheckpointsWithTokenData int `json:"checkpoints_with_token_data"` + MissingTokenData int `json:"missing_token_data"` + MetadataReadWarnings int `json:"metadata_read_warnings,omitempty"` + Tokens *sessionTokensUsage `json:"tokens,omitempty"` + Signals []tokensProfileSignal `json:"signals,omitempty"` + Recommendations []sessionTokensRecommendation `json:"recommendations,omitempty"` + Limitations []string `json:"limitations,omitempty"` +} + +type tokensProfileSignal struct { + ID string `json:"id"` + Label string `json:"label"` + Count int `json:"count"` + Percent int `json:"percent"` + CheckpointIDs []string `json:"checkpoint_ids,omitempty"` +} + +type tokensProfileSignalDefinition struct { + id string + label string +} + +var tokensProfileSignalDefinitions = []tokensProfileSignalDefinition{ + {id: "context-replay-hotspot", label: "Cache/context replay hotspot"}, + {id: "api-call-amplification", label: "API call amplification"}, + {id: "subagent-heavy", label: "Subagent-heavy sessions"}, + {id: "missing-token-data", label: "Missing token data"}, +} + +func newTokensGroupCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "tokens", + Short: "Analyze token usage across sessions and checkpoints", + Long: `Analyze token usage across sessions and checkpoints. + +Commands: + profile Aggregate token usage across committed checkpoints + +Examples: + entire tokens profile + entire tokens profile --json`, + RunE: func(cmd *cobra.Command, _ []string) error { + return cmd.Help() + }, + } + + cmd.AddCommand(newTokensProfileCmd()) + return cmd +} + +func newTokensProfileCmd() *cobra.Command { + var jsonFlag bool + var limitFlag int + var allFlag bool + + cmd := &cobra.Command{ + Use: "profile", + Short: "Aggregate token usage and recommendations across checkpoint history", + Long: `Aggregate token usage and recommendations across committed checkpoint history. + +The profile reads committed checkpoint metadata only. It does not inspect +transcripts or source files, so it is deterministic and avoids adding token +cost while diagnosing token usage. By default it scans the latest 50 committed +checkpoints; use --limit or --all to change the scope.`, + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, _ []string) error { + limit := limitFlag + if allFlag { + limit = 0 + } else if limit <= 0 { + return errors.New("--limit must be positive unless --all is used") + } + return runTokensProfile(cmd.Context(), cmd, jsonFlag, limit) + }, + } + + cmd.Flags().BoolVar(&jsonFlag, "json", false, "Output as JSON") + cmd.Flags().IntVar(&limitFlag, "limit", 50, "Maximum committed checkpoints to analyze") + cmd.Flags().BoolVar(&allFlag, "all", false, "Analyze all committed checkpoints") + return cmd +} + +func runTokensProfile(ctx context.Context, cmd *cobra.Command, jsonOutput bool, limit int) error { + repo, err := openRepository(ctx) + if err != nil { + cmd.SilenceUsage = true + fmt.Fprintln(cmd.ErrOrStderr(), "Not a git repository.") + return NewSilentError(err) + } + defer repo.Close() + + store := checkpoint.NewCommittedReadStore(ctx, repo) + infos, err := store.ListCommitted(ctx) + if err != nil { + return fmt.Errorf("failed to list checkpoints: %w", err) + } + + report, err := buildTokensProfileReport(ctx, store, infos, limit) + if err != nil { + return err + } + + if jsonOutput { + return writeTokensProfileJSON(cmd.OutOrStdout(), report) + } + writeTokensProfileText(cmd.OutOrStdout(), report) + return nil +} + +func buildTokensProfileReport(ctx context.Context, store *checkpoint.GitStore, infos []checkpoint.CommittedInfo, limit int) (tokensProfileReport, error) { + checkpointsAvailable := len(infos) + infos = limitTokensProfileCheckpoints(infos, limit) + report := tokensProfileReport{ + Source: "committed_checkpoints", + CheckpointsAvailable: checkpointsAvailable, + CheckpointsAnalyzed: len(infos), + } + signals := make(map[string]*tokensProfileSignal, len(tokensProfileSignalDefinitions)) + var aggregate *agent.TokenUsage + + for _, info := range infos { + if err := ctx.Err(); err != nil { + return tokensProfileReport{}, err //nolint:wrapcheck // Propagating context cancellation. + } + + summary, err := store.ReadCommitted(ctx, info.CheckpointID) + if err != nil { + return tokensProfileReport{}, fmt.Errorf("failed to read checkpoint %s: %w", info.CheckpointID, err) + } + if summary == nil { + report.MissingTokenData++ + addTokensProfileSignal(signals, "missing-token-data", info.CheckpointID, report.CheckpointsAnalyzed) + continue + } + + usage, metadataReadWarning, err := tokensProfileCheckpointUsage(ctx, store, info.CheckpointID, summary) + if err != nil { + return tokensProfileReport{}, err + } + if metadataReadWarning { + report.MetadataReadWarnings++ + } + tokens := buildSessionTokensUsage(usage) + if tokens == nil { + report.MissingTokenData++ + addTokensProfileSignal(signals, "missing-token-data", info.CheckpointID, report.CheckpointsAnalyzed) + continue + } + + report.CheckpointsWithTokenData++ + aggregate = addCheckpointTokenUsage(aggregate, usage) + addTokensProfileTokenSignals(signals, info.CheckpointID, tokens, report.CheckpointsAnalyzed) + } + + report.Tokens = buildSessionTokensUsage(aggregate) + report.Signals = orderedTokensProfileSignals(signals) + report.Recommendations = tokensProfileRecommendations(report) + report.Limitations = tokensProfileLimitations(report) + return report, nil +} + +func limitTokensProfileCheckpoints(infos []checkpoint.CommittedInfo, limit int) []checkpoint.CommittedInfo { + if limit <= 0 || len(infos) <= limit { + return infos + } + return infos[:limit] +} + +func tokensProfileCheckpointUsage(ctx context.Context, store *checkpoint.GitStore, checkpointID id.CheckpointID, summary *checkpoint.CheckpointSummary) (*agent.TokenUsage, bool, error) { + if summary == nil { + return nil, false, nil + } + + metas := make([]*checkpoint.CommittedMetadata, 0, len(summary.Sessions)) + metadataReadWarning := false + for i := range len(summary.Sessions) { + meta, err := store.ReadSessionMetadata(ctx, checkpointID, i) + if err != nil { + if ctxErr := ctx.Err(); ctxErr != nil { + return nil, false, ctxErr //nolint:wrapcheck // Propagating context cancellation. + } + metadataReadWarning = true + continue + } + metas = append(metas, meta) + } + sessionUsage := aggregateCheckpointTokenUsage(metas) + if !metadataReadWarning && sessionUsage != nil { + return sessionUsage, false, nil + } + if summary.TokenUsage != nil { + return summary.TokenUsage, metadataReadWarning, nil + } + return sessionUsage, metadataReadWarning, nil +} + +func addTokensProfileTokenSignals(signals map[string]*tokensProfileSignal, checkpointID id.CheckpointID, tokens *sessionTokensUsage, denominator int) { + if tokens == nil { + return + } + if tokens.Total > 0 && tokenPercent(tokens.CacheRead, tokens.Total) >= 80 { + addTokensProfileSignal(signals, "context-replay-hotspot", checkpointID, denominator) + } + if tokens.APICalls >= 20 { + addTokensProfileSignal(signals, "api-call-amplification", checkpointID, denominator) + } + if tokens.Total > 0 && tokens.SubagentTotal*100 >= tokens.Total*10 { + addTokensProfileSignal(signals, "subagent-heavy", checkpointID, denominator) + } +} + +func addTokensProfileSignal(signals map[string]*tokensProfileSignal, signalID string, checkpointID id.CheckpointID, denominator int) { + signal := signals[signalID] + if signal == nil { + definition := tokensProfileSignalDefinitionFor(signalID) + signal = &tokensProfileSignal{ + ID: definition.id, + Label: definition.label, + } + signals[signalID] = signal + } + signal.Count++ + if denominator > 0 { + signal.Percent = roundedPercent(signal.Count, denominator) + } + if checkpointID != "" { + signal.CheckpointIDs = append(signal.CheckpointIDs, checkpointID.String()) + } +} + +func tokensProfileSignalDefinitionFor(signalID string) tokensProfileSignalDefinition { + for _, definition := range tokensProfileSignalDefinitions { + if definition.id == signalID { + return definition + } + } + return tokensProfileSignalDefinition{id: signalID, label: signalID} +} + +func orderedTokensProfileSignals(signals map[string]*tokensProfileSignal) []tokensProfileSignal { + ordered := make([]tokensProfileSignal, 0, len(signals)) + for _, definition := range tokensProfileSignalDefinitions { + if signal := signals[definition.id]; signal != nil { + ordered = append(ordered, *signal) + } + } + return ordered +} + +func tokensProfileRecommendations(report tokensProfileReport) []sessionTokensRecommendation { + var recs []sessionTokensRecommendation + + if report.CheckpointsAnalyzed == 0 { + return []sessionTokensRecommendation{{ + ID: "no-checkpoints", + Severity: "low", + Message: "Create checkpoints first; token profiling needs committed checkpoint metadata to identify patterns.", + Signals: []string{"empty_checkpoint_history"}, + }} + } + + if tokensProfileSignalCount(report.Signals, "context-replay-hotspot") > 0 || + tokensProfileSignalCount(report.Signals, "api-call-amplification") > 0 { + recs = append(recs, sessionTokensRecommendation{ + ID: "search-before-reinvestigation", + Severity: "high", + Message: "Use `entire search` for prior decisions/checkpoints before broad re-investigation.", + Signals: []string{"cache_read_tokens", "api_call_count"}, + }) + } + if tokensProfileSignalCount(report.Signals, "api-call-amplification") > 0 { + recs = append(recs, sessionTokensRecommendation{ + ID: "batch-diagnostics", + Severity: "medium", + Message: "Batch diagnostic reads around one narrowed hypothesis when API call amplification repeats.", + Signals: []string{"api_call_count"}, + }) + } + if tokensProfileSignalCount(report.Signals, "context-replay-hotspot") > 0 { + recs = append(recs, sessionTokensRecommendation{ + ID: "preserve-then-compact", + Severity: "medium", + Message: "Summarize useful findings before continuing large-context work; compact or restart only after preserving relevant context.", + Signals: []string{"cache_read_tokens"}, + }) + } + if tokensProfileSignalCount(report.Signals, "subagent-heavy") > 0 { + recs = append(recs, sessionTokensRecommendation{ + ID: "scope-subagents", + Severity: "medium", + Message: "Scope subagent tasks tightly with a narrow objective and expected output.", + Signals: []string{"subagent_tokens"}, + }) + } + if report.MissingTokenData > 0 { + recs = append(recs, sessionTokensRecommendation{ + ID: "improve-token-coverage", + Severity: "low", + Message: "Increase token coverage by using agents and checkpoints that report token usage.", + Signals: []string{"missing_token_usage"}, + }) + } + + if len(recs) == 0 { + recs = append(recs, sessionTokensRecommendation{ + ID: "no-repeated-hotspots", + Severity: "low", + Message: "No repeated token hotspots were visible in committed checkpoint metadata.", + Signals: []string{"checkpoint_token_metadata"}, + }) + } + return recs +} + +func tokensProfileSignalCount(signals []tokensProfileSignal, signalID string) int { + for _, signal := range signals { + if signal.ID == signalID { + return signal.Count + } + } + return 0 +} + +func tokensProfileLimitations(report tokensProfileReport) []string { + var limitations []string + if report.CheckpointsAvailable > report.CheckpointsAnalyzed { + limitations = append(limitations, fmt.Sprintf("Limited to latest %d of %d committed checkpoints; use --limit or --all to change scope.", report.CheckpointsAnalyzed, report.CheckpointsAvailable)) + } + if report.CheckpointsAnalyzed == 0 { + limitations = append(limitations, "No committed checkpoints found.") + } + if report.MissingTokenData > 0 { + limitations = append(limitations, fmt.Sprintf("%d checkpoint%s did not include token usage.", report.MissingTokenData, pluralSuffix(report.MissingTokenData))) + } + if report.MetadataReadWarnings > 0 { + limitations = append(limitations, fmt.Sprintf("%d checkpoint%s had incomplete session metadata; profile used root token summaries or readable sessions where available.", report.MetadataReadWarnings, pluralSuffix(report.MetadataReadWarnings))) + } + if report.CheckpointsAnalyzed > 0 { + limitations = append(limitations, "Tool-level search/read spend is not captured yet; this profile infers patterns from token totals, cache/context replay, API call counts, and subagent totals.") + } + return limitations +} + +func writeTokensProfileJSON(w io.Writer, report tokensProfileReport) error { + enc := json.NewEncoder(w) + enc.SetIndent("", " ") + if err := enc.Encode(report); err != nil { + return fmt.Errorf("failed to encode token profile report: %w", err) + } + return nil +} + +func writeTokensProfileText(w io.Writer, report tokensProfileReport) { + fmt.Fprintln(w, "Token profile") + fmt.Fprintln(w) + fmt.Fprintf(w, "Source: %s\n", report.Source) + fmt.Fprintf(w, "Checkpoints available: %d\n", report.CheckpointsAvailable) + fmt.Fprintf(w, "Checkpoints analyzed: %d\n", report.CheckpointsAnalyzed) + fmt.Fprintf(w, "With token data: %d\n", report.CheckpointsWithTokenData) + fmt.Fprintf(w, "Missing token data: %d\n", report.MissingTokenData) + if report.MetadataReadWarnings > 0 { + fmt.Fprintf(w, "Metadata warnings: %d\n", report.MetadataReadWarnings) + } + + writeTokenUsageSection(w, report.Tokens) + writeTokensProfileSignals(w, report.Signals) + if len(report.Recommendations) > 0 { + writeTokenRecommendations(w, report.Recommendations) + } + writeTokenLimitations(w, report.Limitations) +} + +func writeTokensProfileSignals(w io.Writer, signals []tokensProfileSignal) { + if len(signals) == 0 { + return + } + + fmt.Fprintln(w) + fmt.Fprintln(w, "Repeated signals") + for _, signal := range signals { + fmt.Fprintf(w, "- %s: %d checkpoint%s", signal.Label, signal.Count, pluralSuffix(signal.Count)) + if signal.Percent > 0 { + fmt.Fprintf(w, " (%d%%)", signal.Percent) + } + fmt.Fprintln(w) + } +} + +func pluralSuffix(count int) string { + if count == 1 { + return "" + } + return "s" +} diff --git a/cmd/entire/cli/tokens_profile_test.go b/cmd/entire/cli/tokens_profile_test.go new file mode 100644 index 0000000000..ea03772b26 --- /dev/null +++ b/cmd/entire/cli/tokens_profile_test.go @@ -0,0 +1,231 @@ +package cli + +import ( + "bytes" + "context" + "encoding/json" + "strings" + "testing" + + "github.com/entireio/cli/cmd/entire/cli/agent" + "github.com/entireio/cli/cmd/entire/cli/checkpoint" + "github.com/entireio/cli/cmd/entire/cli/checkpoint/id" + "github.com/entireio/cli/cmd/entire/cli/strategy" + "github.com/entireio/cli/redact" +) + +func TestTokensProfileCmd_TextOutputAggregatesCommittedCheckpoints(t *testing.T) { + repo, _ := runExplainAutoTestRepo(t) + ctx := context.Background() + store := checkpoint.NewGitStore(repo) + + writeProfileTokenCheckpoint(ctx, t, store, "100aaa000001", "profile-cache-hotspot", &agent.TokenUsage{ + InputTokens: 100, + CacheCreationTokens: 100, + CacheReadTokens: 800, + APICallCount: 5, + }) + writeProfileTokenCheckpoint(ctx, t, store, "100aaa000002", "profile-api-heavy", &agent.TokenUsage{ + InputTokens: 400, + OutputTokens: 100, + APICallCount: 25, + }) + writeProfileTokenCheckpoint(ctx, t, store, "100aaa000003", "profile-subagent-heavy", &agent.TokenUsage{ + InputTokens: 500, + OutputTokens: 500, + APICallCount: 3, + SubagentTokens: &agent.TokenUsage{ + InputTokens: 1_000, + }, + }) + writeProfileTokenCheckpoint(ctx, t, store, "100aaa000004", "profile-missing", nil) + + cmd := newTokensGroupCmd() + var stdout bytes.Buffer + cmd.SetOut(&stdout) + cmd.SetArgs([]string{"profile"}) + + if err := cmd.ExecuteContext(ctx); err != nil { + t.Fatalf("expected no error, got: %v", err) + } + + out := stdout.String() + checks := []string{ + "Token profile", + "Checkpoints analyzed: 4", + "With token data: 3", + "Missing token data: 1", + "Token usage", + "Total: 3.5k tokens", + "Cache read: 800", + "API calls: 33", + "Repeated signals", + "Cache/context replay hotspot: 1 checkpoint", + "API call amplification: 1 checkpoint", + "Subagent-heavy sessions: 1 checkpoint", + "Missing token data: 1 checkpoint", + "Recommendations", + "Use `entire search` for prior decisions/checkpoints before broad re-investigation.", + "Tool-level search/read spend is not captured yet", + } + for _, check := range checks { + if !strings.Contains(out, check) { + t.Errorf("expected %q in output, got:\n%s", check, out) + } + } + + tokenUsageIndex := strings.Index(out, "Token usage") + recommendationsIndex := strings.Index(out, "Recommendations") + if tokenUsageIndex == -1 || recommendationsIndex == -1 { + t.Fatalf("expected token usage and recommendations sections, got:\n%s", out) + } + if tokenUsageIndex > recommendationsIndex { + t.Fatalf("expected token usage before recommendations, got:\n%s", out) + } +} + +func TestTokensProfileCmd_JSONOutput(t *testing.T) { + repo, _ := runExplainAutoTestRepo(t) + ctx := context.Background() + store := checkpoint.NewGitStore(repo) + + writeProfileTokenCheckpoint(ctx, t, store, "200bbb000001", "profile-json-cache", &agent.TokenUsage{ + InputTokens: 100, + CacheReadTokens: 900, + APICallCount: 2, + }) + writeProfileTokenCheckpoint(ctx, t, store, "200bbb000002", "profile-json-api", &agent.TokenUsage{ + InputTokens: 200, + OutputTokens: 100, + APICallCount: 22, + }) + + cmd := newTokensGroupCmd() + var stdout bytes.Buffer + cmd.SetOut(&stdout) + cmd.SetArgs([]string{"profile", "--json"}) + + if err := cmd.ExecuteContext(ctx); err != nil { + t.Fatalf("expected no error, got: %v", err) + } + + var result tokensProfileReport + if err := json.Unmarshal(stdout.Bytes(), &result); err != nil { + t.Fatalf("expected valid JSON, got parse error: %v\noutput: %s", err, stdout.String()) + } + if result.CheckpointsAnalyzed != 2 { + t.Fatalf("checkpoints_analyzed = %d, want 2", result.CheckpointsAnalyzed) + } + if result.CheckpointsWithTokenData != 2 { + t.Fatalf("checkpoints_with_token_data = %d, want 2", result.CheckpointsWithTokenData) + } + if result.Tokens == nil || result.Tokens.Total != 1300 { + t.Fatalf("unexpected token total: %+v", result.Tokens) + } + if got := signalCount(result.Signals, "context-replay-hotspot"); got != 1 { + t.Fatalf("context-replay-hotspot signal count = %d, want 1", got) + } + if got := signalCount(result.Signals, "api-call-amplification"); got != 1 { + t.Fatalf("api-call-amplification signal count = %d, want 1", got) + } + if len(result.Recommendations) == 0 { + t.Fatalf("expected recommendations, got none") + } +} + +func TestTokensProfileCmd_LimitScopesAnalyzedCheckpoints(t *testing.T) { + repo, _ := runExplainAutoTestRepo(t) + ctx := context.Background() + store := checkpoint.NewGitStore(repo) + + writeProfileTokenCheckpoint(ctx, t, store, "300ccc000001", "profile-limit-one", &agent.TokenUsage{ + InputTokens: 100, + OutputTokens: 100, + APICallCount: 1, + }) + writeProfileTokenCheckpoint(ctx, t, store, "300ccc000002", "profile-limit-two", &agent.TokenUsage{ + InputTokens: 100, + OutputTokens: 100, + APICallCount: 1, + }) + writeProfileTokenCheckpoint(ctx, t, store, "300ccc000003", "profile-limit-three", &agent.TokenUsage{ + InputTokens: 100, + OutputTokens: 100, + APICallCount: 1, + }) + + cmd := newTokensGroupCmd() + var stdout bytes.Buffer + cmd.SetOut(&stdout) + cmd.SetArgs([]string{"profile", "--limit", "2"}) + + if err := cmd.ExecuteContext(ctx); err != nil { + t.Fatalf("expected no error, got: %v", err) + } + + out := stdout.String() + checks := []string{ + "Checkpoints available: 3", + "Checkpoints analyzed: 2", + "Total: 400 tokens", + "Limited to latest 2 of 3 committed checkpoints", + } + for _, check := range checks { + if !strings.Contains(out, check) { + t.Errorf("expected %q in output, got:\n%s", check, out) + } + } +} + +func TestTokensProfileCmd_EmptyHistory(t *testing.T) { + runExplainAutoTestRepo(t) + + cmd := newTokensGroupCmd() + var stdout bytes.Buffer + cmd.SetOut(&stdout) + cmd.SetArgs([]string{"profile"}) + + if err := cmd.ExecuteContext(context.Background()); err != nil { + t.Fatalf("expected no error, got: %v", err) + } + + out := stdout.String() + checks := []string{ + "Token profile", + "Checkpoints analyzed: 0", + "Token data: unavailable", + "No committed checkpoints found.", + } + for _, check := range checks { + if !strings.Contains(out, check) { + t.Errorf("expected %q in output, got:\n%s", check, out) + } + } +} + +func signalCount(signals []tokensProfileSignal, id string) int { + for _, signal := range signals { + if signal.ID == id { + return signal.Count + } + } + return 0 +} + +func writeProfileTokenCheckpoint(ctx context.Context, t *testing.T, store *checkpoint.GitStore, checkpointID string, sessionID string, usage *agent.TokenUsage) { + t.Helper() + + if err := store.WriteCommitted(ctx, checkpoint.WriteCommittedOptions{ + CheckpointID: id.MustCheckpointID(checkpointID), + SessionID: sessionID, + Strategy: strategy.StrategyNameManualCommit, + Branch: "tokens-profile", + Agent: testAgentClaude, + Transcript: redact.AlreadyRedacted([]byte(`{"type":"user","message":{"content":[{"type":"text","text":"profile"}]}}` + "\n")), + AuthorName: "Test", + AuthorEmail: "test@example.com", + TokenUsage: usage, + }); err != nil { + t.Fatalf("WriteCommitted(%s) error = %v", checkpointID, err) + } +} From cc33b021d4a80ab21e5ba14f86116776013c8c17 Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Sat, 13 Jun 2026 18:24:55 +0900 Subject: [PATCH 15/26] fix token profile review feedback Entire-Checkpoint: e945a42148c6 --- cmd/entire/cli/tokens_profile.go | 2 ++ cmd/entire/cli/tokens_profile_test.go | 15 +++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/cmd/entire/cli/tokens_profile.go b/cmd/entire/cli/tokens_profile.go index 7f6a358fbf..d7157ac240 100644 --- a/cmd/entire/cli/tokens_profile.go +++ b/cmd/entire/cli/tokens_profile.go @@ -96,6 +96,7 @@ checkpoints; use --limit or --all to change the scope.`, cmd.Flags().BoolVar(&jsonFlag, "json", false, "Output as JSON") cmd.Flags().IntVar(&limitFlag, "limit", 50, "Maximum committed checkpoints to analyze") cmd.Flags().BoolVar(&allFlag, "all", false, "Analyze all committed checkpoints") + cmd.MarkFlagsMutuallyExclusive("limit", "all") return cmd } @@ -109,6 +110,7 @@ func runTokensProfile(ctx context.Context, cmd *cobra.Command, jsonOutput bool, defer repo.Close() store := checkpoint.NewCommittedReadStore(ctx, repo) + store.SetBlobFetcher(FetchBlobsByHash) infos, err := store.ListCommitted(ctx) if err != nil { return fmt.Errorf("failed to list checkpoints: %w", err) diff --git a/cmd/entire/cli/tokens_profile_test.go b/cmd/entire/cli/tokens_profile_test.go index ea03772b26..afce935667 100644 --- a/cmd/entire/cli/tokens_profile_test.go +++ b/cmd/entire/cli/tokens_profile_test.go @@ -177,6 +177,21 @@ func TestTokensProfileCmd_LimitScopesAnalyzedCheckpoints(t *testing.T) { } } +func TestTokensProfileCmd_LimitAndAllAreMutuallyExclusive(t *testing.T) { + runExplainAutoTestRepo(t) + + cmd := newTokensGroupCmd() + cmd.SetArgs([]string{"profile", "--limit", "2", "--all"}) + + err := cmd.ExecuteContext(context.Background()) + if err == nil { + t.Fatal("expected error for --limit with --all") + } + if !strings.Contains(err.Error(), "limit") || !strings.Contains(err.Error(), "all") { + t.Fatalf("expected error to mention limit and all, got: %v", err) + } +} + func TestTokensProfileCmd_EmptyHistory(t *testing.T) { runExplainAutoTestRepo(t) From 9291dac76a0b851fcbccc4639dbca8fc6629554c Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Sat, 13 Jun 2026 19:33:54 +0900 Subject: [PATCH 16/26] fix token profile API-call-only usage Entire-Checkpoint: c04a3bb165a4 --- cmd/entire/cli/tokens_profile.go | 2 +- cmd/entire/cli/tokens_profile_test.go | 42 +++++++++++++++++++++++++-- 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/cmd/entire/cli/tokens_profile.go b/cmd/entire/cli/tokens_profile.go index d7157ac240..5b13c3e5c6 100644 --- a/cmd/entire/cli/tokens_profile.go +++ b/cmd/entire/cli/tokens_profile.go @@ -109,7 +109,7 @@ func runTokensProfile(ctx context.Context, cmd *cobra.Command, jsonOutput bool, } defer repo.Close() - store := checkpoint.NewCommittedReadStore(ctx, repo) + store := checkpoint.NewGitStore(repo, checkpoint.ResolveCommittedRefs(ctx)) store.SetBlobFetcher(FetchBlobsByHash) infos, err := store.ListCommitted(ctx) if err != nil { diff --git a/cmd/entire/cli/tokens_profile_test.go b/cmd/entire/cli/tokens_profile_test.go index afce935667..c540451377 100644 --- a/cmd/entire/cli/tokens_profile_test.go +++ b/cmd/entire/cli/tokens_profile_test.go @@ -17,7 +17,7 @@ import ( func TestTokensProfileCmd_TextOutputAggregatesCommittedCheckpoints(t *testing.T) { repo, _ := runExplainAutoTestRepo(t) ctx := context.Background() - store := checkpoint.NewGitStore(repo) + store := checkpoint.NewGitStore(repo, checkpoint.DefaultV1Refs()) writeProfileTokenCheckpoint(ctx, t, store, "100aaa000001", "profile-cache-hotspot", &agent.TokenUsage{ InputTokens: 100, @@ -87,7 +87,7 @@ func TestTokensProfileCmd_TextOutputAggregatesCommittedCheckpoints(t *testing.T) func TestTokensProfileCmd_JSONOutput(t *testing.T) { repo, _ := runExplainAutoTestRepo(t) ctx := context.Background() - store := checkpoint.NewGitStore(repo) + store := checkpoint.NewGitStore(repo, checkpoint.DefaultV1Refs()) writeProfileTokenCheckpoint(ctx, t, store, "200bbb000001", "profile-json-cache", &agent.TokenUsage{ InputTokens: 100, @@ -133,10 +133,46 @@ func TestTokensProfileCmd_JSONOutput(t *testing.T) { } } +func TestTokensProfileCmd_JSONOutputReportsAPICallOnlyCheckpoints(t *testing.T) { + repo, _ := runExplainAutoTestRepo(t) + ctx := context.Background() + store := checkpoint.NewGitStore(repo, checkpoint.DefaultV1Refs()) + + writeProfileTokenCheckpoint(ctx, t, store, "250bbb000001", "profile-json-api-only", &agent.TokenUsage{ + APICallCount: 25, + }) + + cmd := newTokensGroupCmd() + var stdout bytes.Buffer + cmd.SetOut(&stdout) + cmd.SetArgs([]string{"profile", "--json"}) + + if err := cmd.ExecuteContext(ctx); err != nil { + t.Fatalf("expected no error, got: %v", err) + } + + var result tokensProfileReport + if err := json.Unmarshal(stdout.Bytes(), &result); err != nil { + t.Fatalf("expected valid JSON, got parse error: %v\noutput: %s", err, stdout.String()) + } + if result.CheckpointsWithTokenData != 1 { + t.Fatalf("checkpoints_with_token_data = %d, want 1", result.CheckpointsWithTokenData) + } + if result.MissingTokenData != 0 { + t.Fatalf("missing_token_data = %d, want 0", result.MissingTokenData) + } + if result.Tokens == nil || result.Tokens.Total != 0 || result.Tokens.APICalls != 25 { + t.Fatalf("unexpected token usage: %+v", result.Tokens) + } + if got := signalCount(result.Signals, "api-call-amplification"); got != 1 { + t.Fatalf("api-call-amplification signal count = %d, want 1", got) + } +} + func TestTokensProfileCmd_LimitScopesAnalyzedCheckpoints(t *testing.T) { repo, _ := runExplainAutoTestRepo(t) ctx := context.Background() - store := checkpoint.NewGitStore(repo) + store := checkpoint.NewGitStore(repo, checkpoint.DefaultV1Refs()) writeProfileTokenCheckpoint(ctx, t, store, "300ccc000001", "profile-limit-one", &agent.TokenUsage{ InputTokens: 100, From ff1ae901cd4ebe5dff7ac5c42fabf203a1084d74 Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Tue, 16 Jun 2026 19:53:11 -0400 Subject: [PATCH 17/26] clarify token profile usage scope Entire-Checkpoint: aa9d6a1ab8b0 --- CLAUDE.md | 20 +++++++++++++------- cmd/entire/cli/session_tokens.go | 6 +++++- cmd/entire/cli/tokens_profile.go | 9 ++++++++- cmd/entire/cli/tokens_profile_test.go | 12 ++++++++++-- 4 files changed, 36 insertions(+), 11 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 1a045922ef..4d7fe70473 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -22,18 +22,20 @@ This repo contains the CLI for Entire. ### Command Layout -The CLI is organized around five noun groups plus a small set of top-level -verbs. The groups are the canonical home for each verb; legacy top-level -shortcuts remain functional but hidden, and emit a deprecation hint pointing -at the canonical group form. - -- `session` (alias: `sessions`): `list`, `info`, `stop`, `attach`, `resume`, `current`. +The visible CLI is organized around five noun groups plus a small set of +top-level verbs. The groups are the canonical home for each verb; legacy +top-level shortcuts remain functional but hidden, and emit a deprecation hint +pointing at the canonical group form. Newer experimental command families are +discoverable through `entire labs` and may remain hidden from root help while +their canonical paths are still runnable. + +- `session` (alias: `sessions`): `list`, `info`, `tokens`, `stop`, `attach`, `resume`, `current`. `resume` with a branch arg switches to it and resumes its session; with no arg it opens an interactive picker of stopped sessions (across all worktrees), resolving each to its branch and pointing at the owning worktree when the branch is checked out elsewhere. Resume keeps an existing local session log as-is by default (`--force` overwrites it from the checkpoint). -- `checkpoint` (aliases: `cp`, `checkpoints`): `list`, `explain`, `search`, plus +- `checkpoint` (aliases: `cp`, `checkpoints`): `list`, `explain`, `tokens`, `search`, plus the deprecated `rewind` (functional, prints a cobra deprecation message, will be removed in a future release) - `agent`: bare opens the interactive agent selector, plus `list`, `add`, `remove` @@ -45,6 +47,10 @@ at the canonical group form. current one) and `--all-contexts` (log out of every saved login) - `doctor`: bare runs the scan-and-fix flow, plus `trace`, `logs`, `bundle` +Experimental command families advertised through `entire labs`: + +- `tokens`: `profile` (hidden from root help while token diagnostics mature) + Top-level lifecycle and standalone commands: `enable`, `disable`, `status`, `login`, `logout`, `clean`, `version`, `dispatch`, `activity`, `help`, `configure`. diff --git a/cmd/entire/cli/session_tokens.go b/cmd/entire/cli/session_tokens.go index 212b009d5b..f087631c59 100644 --- a/cmd/entire/cli/session_tokens.go +++ b/cmd/entire/cli/session_tokens.go @@ -533,8 +533,12 @@ func writeTokenRecommendations(w io.Writer, recs []sessionTokensRecommendation) } func writeTokenUsageSection(w io.Writer, tokens *sessionTokensUsage) { + writeTokenUsageSectionWithTitle(w, "Token usage", tokens) +} + +func writeTokenUsageSectionWithTitle(w io.Writer, title string, tokens *sessionTokensUsage) { fmt.Fprintln(w) - fmt.Fprintln(w, "Token usage") + fmt.Fprintln(w, title) if tokens != nil { fmt.Fprintf(w, "Total: %s tokens\n", formatTokenCount(tokens.Total)) parts := []string{ diff --git a/cmd/entire/cli/tokens_profile.go b/cmd/entire/cli/tokens_profile.go index 5b13c3e5c6..16813458c0 100644 --- a/cmd/entire/cli/tokens_profile.go +++ b/cmd/entire/cli/tokens_profile.go @@ -15,6 +15,7 @@ import ( type tokensProfileReport struct { Source string `json:"source"` + UsageScope string `json:"usage_scope"` CheckpointsAvailable int `json:"checkpoints_available"` CheckpointsAnalyzed int `json:"checkpoints_analyzed"` CheckpointsWithTokenData int `json:"checkpoints_with_token_data"` @@ -46,6 +47,8 @@ var tokensProfileSignalDefinitions = []tokensProfileSignalDefinition{ {id: "missing-token-data", label: "Missing token data"}, } +const tokensProfileUsageScopeCheckpointObserved = "checkpoint_observed" + func newTokensGroupCmd() *cobra.Command { cmd := &cobra.Command{ Use: "tokens", @@ -133,6 +136,7 @@ func buildTokensProfileReport(ctx context.Context, store *checkpoint.GitStore, i infos = limitTokensProfileCheckpoints(infos, limit) report := tokensProfileReport{ Source: "committed_checkpoints", + UsageScope: tokensProfileUsageScopeCheckpointObserved, CheckpointsAvailable: checkpointsAvailable, CheckpointsAnalyzed: len(infos), } @@ -356,6 +360,9 @@ func tokensProfileLimitations(report tokensProfileReport) []string { if report.MetadataReadWarnings > 0 { limitations = append(limitations, fmt.Sprintf("%d checkpoint%s had incomplete session metadata; profile used root token summaries or readable sessions where available.", report.MetadataReadWarnings, pluralSuffix(report.MetadataReadWarnings))) } + if report.Tokens != nil { + limitations = append(limitations, "Token totals are summed from analyzed checkpoints and may include overlapping checkpoint history; treat them as checkpoint-observed volume, not guaranteed unique session spend.") + } if report.CheckpointsAnalyzed > 0 { limitations = append(limitations, "Tool-level search/read spend is not captured yet; this profile infers patterns from token totals, cache/context replay, API call counts, and subagent totals.") } @@ -383,7 +390,7 @@ func writeTokensProfileText(w io.Writer, report tokensProfileReport) { fmt.Fprintf(w, "Metadata warnings: %d\n", report.MetadataReadWarnings) } - writeTokenUsageSection(w, report.Tokens) + writeTokenUsageSectionWithTitle(w, "Checkpoint-observed token usage", report.Tokens) writeTokensProfileSignals(w, report.Signals) if len(report.Recommendations) > 0 { writeTokenRecommendations(w, report.Recommendations) diff --git a/cmd/entire/cli/tokens_profile_test.go b/cmd/entire/cli/tokens_profile_test.go index c540451377..83d29e45e6 100644 --- a/cmd/entire/cli/tokens_profile_test.go +++ b/cmd/entire/cli/tokens_profile_test.go @@ -55,7 +55,7 @@ func TestTokensProfileCmd_TextOutputAggregatesCommittedCheckpoints(t *testing.T) "Checkpoints analyzed: 4", "With token data: 3", "Missing token data: 1", - "Token usage", + "Checkpoint-observed token usage", "Total: 3.5k tokens", "Cache read: 800", "API calls: 33", @@ -66,6 +66,7 @@ func TestTokensProfileCmd_TextOutputAggregatesCommittedCheckpoints(t *testing.T) "Missing token data: 1 checkpoint", "Recommendations", "Use `entire search` for prior decisions/checkpoints before broad re-investigation.", + "Token totals are summed from analyzed checkpoints and may include overlapping checkpoint history", "Tool-level search/read spend is not captured yet", } for _, check := range checks { @@ -74,7 +75,7 @@ func TestTokensProfileCmd_TextOutputAggregatesCommittedCheckpoints(t *testing.T) } } - tokenUsageIndex := strings.Index(out, "Token usage") + tokenUsageIndex := strings.Index(out, "Checkpoint-observed token usage") recommendationsIndex := strings.Index(out, "Recommendations") if tokenUsageIndex == -1 || recommendationsIndex == -1 { t.Fatalf("expected token usage and recommendations sections, got:\n%s", out) @@ -113,6 +114,13 @@ func TestTokensProfileCmd_JSONOutput(t *testing.T) { if err := json.Unmarshal(stdout.Bytes(), &result); err != nil { t.Fatalf("expected valid JSON, got parse error: %v\noutput: %s", err, stdout.String()) } + var raw map[string]interface{} + if err := json.Unmarshal(stdout.Bytes(), &raw); err != nil { + t.Fatalf("expected valid JSON object, got parse error: %v\noutput: %s", err, stdout.String()) + } + if raw["usage_scope"] != "checkpoint_observed" { + t.Fatalf("usage_scope = %v, want checkpoint_observed", raw["usage_scope"]) + } if result.CheckpointsAnalyzed != 2 { t.Fatalf("checkpoints_analyzed = %d, want 2", result.CheckpointsAnalyzed) } From c34ad59d0210c99324bba75777d45f2fc2586085 Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Wed, 17 Jun 2026 19:08:45 -0400 Subject: [PATCH 18/26] Add cost-proxy token guidance Entire-Checkpoint: e46d5e136530 --- cmd/entire/cli/checkpoint_tokens.go | 92 +++++- cmd/entire/cli/session_tokens.go | 79 ++++- cmd/entire/cli/sessions_test.go | 368 ++++++++++++++++++++++- cmd/entire/cli/strategy/session_state.go | 42 ++- 4 files changed, 541 insertions(+), 40 deletions(-) diff --git a/cmd/entire/cli/checkpoint_tokens.go b/cmd/entire/cli/checkpoint_tokens.go index 8c6e9bdcb7..4293d3b564 100644 --- a/cmd/entire/cli/checkpoint_tokens.go +++ b/cmd/entire/cli/checkpoint_tokens.go @@ -3,6 +3,7 @@ package cli import ( "context" "encoding/json" + "errors" "fmt" "io" "strconv" @@ -37,7 +38,10 @@ type checkpointTokensComparison struct { TargetCheckpointID string `json:"target_checkpoint_id"` Status string `json:"status"` Total *checkpointTokensMetricDelta `json:"total,omitempty"` + Input *checkpointTokensMetricDelta `json:"input,omitempty"` CacheRead *checkpointTokensMetricDelta `json:"cache_read,omitempty"` + CacheWrite *checkpointTokensMetricDelta `json:"cache_write,omitempty"` + Output *checkpointTokensMetricDelta `json:"output,omitempty"` APICalls *checkpointTokensMetricDelta `json:"api_calls,omitempty"` CacheReadCaveat string `json:"cache_read_caveat,omitempty"` Qualification string `json:"qualification"` @@ -66,6 +70,7 @@ const ( func newCheckpointTokensCmd() *cobra.Command { var jsonFlag bool var compareFlag string + var agentBriefFlag bool cmd := &cobra.Command{ Use: "tokens ", @@ -82,16 +87,20 @@ Use --compare to compare this checkpoint against a previous checkpoint and qualify observed token reduction or increase.`, Args: cobra.ExactArgs(1), RunE: func(cmd *cobra.Command, args []string) error { - return runCheckpointTokens(cmd.Context(), cmd, args[0], jsonFlag, compareFlag) + if jsonFlag && agentBriefFlag { + return errors.New("--json and --agent-brief are mutually exclusive") + } + return runCheckpointTokens(cmd.Context(), cmd, args[0], jsonFlag, compareFlag, agentBriefFlag) }, } cmd.Flags().BoolVar(&jsonFlag, "json", false, "Output as JSON") cmd.Flags().StringVar(&compareFlag, "compare", "", "Compare against a baseline checkpoint ID") + cmd.Flags().BoolVar(&agentBriefFlag, "agent-brief", false, "Output compact next-step guidance for agents") return cmd } -func runCheckpointTokens(ctx context.Context, cmd *cobra.Command, checkpointIDPrefix string, jsonOutput bool, comparePrefix string) error { +func runCheckpointTokens(ctx context.Context, cmd *cobra.Command, checkpointIDPrefix string, jsonOutput bool, comparePrefix string, agentBrief bool) error { report, lookup, err := loadCheckpointTokensReport(ctx, cmd, checkpointIDPrefix) if lookup != nil { defer lookup.Close() @@ -114,6 +123,10 @@ func runCheckpointTokens(ctx context.Context, cmd *cobra.Command, checkpointIDPr if jsonOutput { return writeCheckpointTokensJSON(cmd.OutOrStdout(), report) } + if agentBrief { + writeCheckpointTokensAgentBrief(cmd.OutOrStdout(), report) + return nil + } writeCheckpointTokensText(cmd.OutOrStdout(), report) return nil } @@ -394,14 +407,47 @@ func buildCheckpointTokensComparison(target, baseline checkpointTokensReport) *c } comparison.Total = buildCheckpointMetricDelta(baseline.Tokens.Total, target.Tokens.Total) + comparison.Input = buildCheckpointMetricDelta(baseline.Tokens.Input, target.Tokens.Input) comparison.CacheRead = buildCheckpointMetricDelta(baseline.Tokens.CacheRead, target.Tokens.CacheRead) + comparison.CacheWrite = buildCheckpointMetricDelta(baseline.Tokens.CacheWrite, target.Tokens.CacheWrite) + comparison.Output = buildCheckpointMetricDelta(baseline.Tokens.Output, target.Tokens.Output) comparison.APICalls = buildCheckpointMetricDelta(baseline.Tokens.APICalls, target.Tokens.APICalls) comparison.CacheReadCaveat = checkpointComparisonCacheReadCaveat(comparison.CacheRead) comparison.Status = checkpointComparisonStatus(comparison.Total) comparison.Qualification = checkpointComparisonQualification(comparison.Status) + if classes := checkpointCostProxyPressureIncreased(comparison); len(classes) > 0 { + comparison.Qualification += fmt.Sprintf(" Cost-proxy pressure increased for %s even though total tokens decreased.", formatTokenClassList(classes)) + } return comparison } +func checkpointCostProxyPressureIncreased(comparison *checkpointTokensComparison) []string { + if comparison == nil || comparison.Total == nil || comparison.Total.Change >= 0 { + return nil + } + var classes []string + if comparison.CacheWrite != nil && comparison.CacheWrite.Change > 0 { + classes = append(classes, "cache write") + } + if comparison.Output != nil && comparison.Output.Change > 0 { + classes = append(classes, "output") + } + return classes +} + +func formatTokenClassList(classes []string) string { + switch len(classes) { + case 0: + return "" + case 1: + return classes[0] + case 2: + return classes[0] + " and " + classes[1] + default: + return strings.Join(classes[:len(classes)-1], ", ") + ", and " + classes[len(classes)-1] + } +} + func buildCheckpointMetricDelta(baseline, current int) *checkpointTokensMetricDelta { change := saturatingIntSub(current, baseline) delta := &checkpointTokensMetricDelta{ @@ -534,6 +580,45 @@ func writeCheckpointTokensText(w io.Writer, report checkpointTokensReport) { writeTokenLimitations(w, report.Limitations) } +func writeCheckpointTokensAgentBrief(w io.Writer, report checkpointTokensReport) { + fmt.Fprintln(w, "Checkpoint token brief") + fmt.Fprintf(w, "Checkpoint: %s\n", report.CheckpointID) + fmt.Fprintln(w) + fmt.Fprintln(w, agentBriefUsageLine(report.Tokens)) + fmt.Fprintln(w) + fmt.Fprintln(w, "Next best action:") + fmt.Fprintln(w, checkpointAgentBriefNextAction(report)) + + signals := agentBriefSignals(checkpointAgentBriefSessionReport(report)) + if len(signals) > 0 { + fmt.Fprintln(w) + fmt.Fprintln(w, "Signals:") + for _, signal := range signals { + fmt.Fprintf(w, "- %s\n", signal) + } + } +} + +func checkpointAgentBriefNextAction(report checkpointTokensReport) string { + sessionReport := checkpointAgentBriefSessionReport(report) + if hasTokenRecommendation(sessionReport, "no-token-data") { + return "Do not spend extra commands on token optimization for this checkpoint. Continue with the task and capture a newer checkpoint before rechecking tokens." + } + if action, ok := agentBriefOptimizationAction(sessionReport); ok { + return action + } + return "Continue normally; no high-signal token optimization is available from this checkpoint." +} + +func checkpointAgentBriefSessionReport(report checkpointTokensReport) sessionTokensReport { + return sessionTokensReport{ + Tokens: report.Tokens, + Context: report.Context, + Recommendations: report.Recommendations, + Limitations: report.Limitations, + } +} + func writeCheckpointTokenComparison(w io.Writer, comparison *checkpointTokensComparison) { if comparison == nil { return @@ -547,7 +632,10 @@ func writeCheckpointTokenComparison(w io.Writer, comparison *checkpointTokensCom } if comparison.Status != checkpointComparisonStatusUnavailable { fmt.Fprintf(w, "Total tokens: %s\n", formatCheckpointMetricDelta(comparison.Total, formatTokenCount)) + fmt.Fprintf(w, "Input: %s\n", formatCheckpointMetricDelta(comparison.Input, formatTokenCount)) fmt.Fprintf(w, "Cache/context replay: %s\n", formatCheckpointMetricDelta(comparison.CacheRead, formatTokenCount)) + fmt.Fprintf(w, "Cache write: %s\n", formatCheckpointMetricDelta(comparison.CacheWrite, formatTokenCount)) + fmt.Fprintf(w, "Output: %s\n", formatCheckpointMetricDelta(comparison.Output, formatTokenCount)) fmt.Fprintf(w, "API calls: %s\n", formatCheckpointMetricDelta(comparison.APICalls, formatPlainCount)) } fmt.Fprintln(w) diff --git a/cmd/entire/cli/session_tokens.go b/cmd/entire/cli/session_tokens.go index f087631c59..965c1881ee 100644 --- a/cmd/entire/cli/session_tokens.go +++ b/cmd/entire/cli/session_tokens.go @@ -76,6 +76,8 @@ const ( recommendationLongSessionCheckpoints = 5 ) +const agentBriefCostProxyBatchAction = "Use at most 3 batched reads before answering. Continue only if a named file or test can change the verdict; otherwise answer now. Avoid broad grep, broad diffs, broad tests, and repeated token diagnostics; keep the answer tight." + func newTokensCmd() *cobra.Command { var jsonFlag bool var currentFlag bool @@ -118,8 +120,12 @@ optimize next steps."`, } func runSessionTokens(ctx context.Context, cmd *cobra.Command, sessionID string, current, jsonOutput, agentBrief bool) error { - if sessionID == "" || current { - sessionID = strategy.FindMostRecentSession(ctx) + if sessionID == "" { + if current { + sessionID = strategy.FindMostRecentSessionInCurrentWorktree(ctx) + } else { + sessionID = strategy.FindMostRecentSession(ctx) + } if sessionID == "" { fmt.Fprintln(cmd.OutOrStdout(), "No active session found in this worktree.") return nil @@ -328,6 +334,24 @@ func recommendationRules(signals tokenRecommendationSignals) []sessionTokensReco Signals: []string{"subagent_tokens"}, }) } + if signals.Tokens != nil && signals.Tokens.Total > 0 && + tokenClassPressure(signals.Tokens.CacheWrite, signals.Tokens.Total, 5000, 10, 50_000) { + recs = append(recs, sessionTokensRecommendation{ + ID: "cache-write-pressure", + Severity: "medium", + Message: "Cache write is elevated; avoid broad new context and narrow the next read before continuing.", + Signals: []string{"cache_write_tokens"}, + }) + } + if signals.Tokens != nil && signals.Tokens.Total > 0 && + tokenClassPressure(signals.Tokens.Output, signals.Tokens.Total, 3000, 2, 10_000) { + recs = append(recs, sessionTokensRecommendation{ + ID: "output-pressure", + Severity: "medium", + Message: "Output tokens are elevated; keep the next answer tight and avoid restating evidence.", + Signals: []string{"output_tokens"}, + }) + } if signals.Context != nil && signals.Context.Percent >= recommendationHighContextPercent { recs = append(recs, sessionTokensRecommendation{ ID: "high-context-pressure", @@ -363,6 +387,16 @@ func tokenShareAtLeastOneTenth(part, total int) bool { return part >= (total-1)/recommendationSubagentShareDenominator+1 } +func tokenClassPressure(value, total, minTokens int, minPercent float64, highTokens int) bool { + if value <= 0 || total <= 0 { + return false + } + if value >= highTokens { + return true + } + return value >= minTokens && tokenPercent(value, total) >= minPercent +} + func tokenPercent(value, total int) float64 { if total <= 0 { return 0 @@ -469,23 +503,40 @@ func formatAPICalls(count int) string { } func agentBriefNextAction(report sessionTokensReport) string { + if hasTokenRecommendation(report, "no-token-data") { + return "Token usage is not available yet. Use this as a context check, not a spend diagnosis; continue after the next checkpoint captures usage." + } + if action, ok := agentBriefOptimizationAction(report); ok { + return action + } + return "Continue normally; no high-signal token optimization is available from this session yet." +} + +func agentBriefOptimizationAction(report sessionTokensReport) (string, bool) { switch { + case (hasTokenRecommendation(report, "cache-write-pressure") || hasTokenRecommendation(report, "output-pressure")) && + (hasTokenRecommendation(report, "context-replay-hotspot") || hasTokenRecommendation(report, "api-call-amplification")): + return agentBriefCostProxyBatchAction, true + case hasTokenRecommendation(report, "cache-write-pressure") && hasTokenRecommendation(report, "output-pressure"): + return agentBriefCostProxyBatchAction, true + case hasTokenRecommendation(report, "cache-write-pressure"): + return "Use at most 3 batched reads and avoid broad new context until you have one narrowed hypothesis.", true + case hasTokenRecommendation(report, "output-pressure"): + return "Keep the next answer tight; cite only necessary evidence and avoid restating prior context.", true case hasTokenRecommendation(report, "context-replay-hotspot") && hasTokenRecommendation(report, "api-call-amplification"): - return "Summarize the useful findings, then batch the next diagnostic step. Avoid more exploratory reads until you have a narrowed hypothesis." + return agentBriefCostProxyBatchAction, true case hasTokenRecommendation(report, "api-call-amplification"): - return "Batch the next diagnostic step around one narrowed hypothesis before making more tool calls." + return agentBriefCostProxyBatchAction, true case hasTokenRecommendation(report, "context-replay-hotspot"): - return "Summarize the current useful findings before continuing, and keep the next prompt narrow." - case hasTokenRecommendation(report, "no-token-data"): - return "Token usage is not available yet. Use this as a context check, not a spend diagnosis; continue after the next checkpoint captures usage." + return "Use at most 2 focused reads after summarizing known findings, then answer. Avoid broad grep, broad diffs, and broad tests.", true case hasTokenRecommendation(report, "subagent-heavy"): - return "Keep the next agent or subagent task narrow with a concrete expected output; avoid broad parallel exploration." + return "Do not launch broad subagents. Use one narrowly scoped check with a concrete expected output.", true case hasTokenRecommendation(report, "high-context-pressure"): - return "Preserve the useful findings and compact or restart before adding more broad context." + return "Preserve useful findings, then answer with at most 2 focused reads if more evidence is required.", true case hasTokenRecommendation(report, "long-session"): - return "Compact or restart after summarizing useful findings if older context is no longer needed." + return "Summarize useful findings and stop unless one focused read can change the answer.", true default: - return "Continue normally; no high-signal token optimization is available from this session yet." + return "", false } } @@ -497,6 +548,12 @@ func agentBriefSignals(report sessionTokensReport) []string { if hasTokenRecommendation(report, "api-call-amplification") { signals = append(signals, "API call count is high for one session.") } + if hasTokenRecommendation(report, "cache-write-pressure") { + signals = append(signals, "Cache write/new context pressure is elevated.") + } + if hasTokenRecommendation(report, "output-pressure") { + signals = append(signals, "Output pressure is elevated.") + } if hasTokenRecommendation(report, "subagent-heavy") { signals = append(signals, "Subagent usage is a meaningful part of total tokens.") } diff --git a/cmd/entire/cli/sessions_test.go b/cmd/entire/cli/sessions_test.go index cd6b944ef5..8ce4b6cdde 100644 --- a/cmd/entire/cli/sessions_test.go +++ b/cmd/entire/cli/sessions_test.go @@ -1239,6 +1239,61 @@ func reportHasSessionRecommendation(report sessionTokensReport, id string) bool return false } +func TestRecommendationRules_CacheWritePressure(t *testing.T) { + t.Parallel() + + recs := recommendationRules(tokenRecommendationSignals{ + Tokens: &sessionTokensUsage{ + Total: 50_000, + CacheWrite: 6_000, + }, + }) + + if !recommendationsIncludeID(recs, "cache-write-pressure") { + t.Fatalf("expected cache-write-pressure recommendation, got %+v", recs) + } +} + +func TestRecommendationRules_OutputPressure(t *testing.T) { + t.Parallel() + + recs := recommendationRules(tokenRecommendationSignals{ + Tokens: &sessionTokensUsage{ + Total: 100_000, + Output: 3_500, + }, + }) + + if !recommendationsIncludeID(recs, "output-pressure") { + t.Fatalf("expected output-pressure recommendation, got %+v", recs) + } +} + +func TestRecommendationRules_OutputPressureWithLargeCacheReplay(t *testing.T) { + t.Parallel() + + recs := recommendationRules(tokenRecommendationSignals{ + Tokens: &sessionTokensUsage{ + Total: 10_000_000, + CacheRead: 9_800_000, + Output: 100_000, + }, + }) + + if !recommendationsIncludeID(recs, "output-pressure") { + t.Fatalf("expected output-pressure recommendation for high absolute output, got %+v", recs) + } +} + +func recommendationsIncludeID(recs []sessionTokensRecommendation, id string) bool { + for _, rec := range recs { + if rec.ID == id { + return true + } + } + return false +} + func TestTokensCmd_AgentBriefPrioritizesNextAction(t *testing.T) { setupStopTestRepo(t) @@ -1272,10 +1327,13 @@ func TestTokensCmd_AgentBriefPrioritizesNextAction(t *testing.T) { "Session: test-tokens-brief", "Token usage: 6213.6k total; 97.4% cache/context replay; 70 API calls.", "Next best action:", - "Summarize the useful findings, then batch the next diagnostic step.", + "Use at most 3 batched reads before answering.", + "Avoid broad grep, broad diffs, broad tests, and repeated token diagnostics; keep the answer tight.", "Signals:", "- Cache/context replay dominates token volume.", "- API call count is high for one session.", + "- Cache write/new context pressure is elevated.", + "- Output pressure is elevated.", } for _, check := range checks { if !strings.Contains(out, check) { @@ -1338,7 +1396,8 @@ func TestTokensCmd_AgentBriefHighCacheReplayWithoutHighAPICalls(t *testing.T) { out := stdout.String() checks := []string{ "Token usage: 637.7k total; 95.5% cache/context replay; 3 API calls.", - "Summarize the current useful findings before continuing, and keep the next prompt narrow.", + "Use at most 2 focused reads after summarizing known findings, then answer.", + "Avoid broad grep, broad diffs, and broad tests.", "- Cache/context replay dominates token volume.", } for _, check := range checks { @@ -1379,7 +1438,8 @@ func TestTokensCmd_AgentBriefHighAPICallsWithoutCacheReplay(t *testing.T) { out := stdout.String() checks := []string{ "Token usage: 11k total; 25 API calls.", - "Batch the next diagnostic step around one narrowed hypothesis before making more tool calls.", + "Use at most 3 batched reads before answering.", + "Avoid broad grep, broad diffs, broad tests, and repeated token diagnostics; keep the answer tight.", "- API call count is high for one session.", } for _, check := range checks { @@ -1432,6 +1492,110 @@ func TestTokensCmd_AgentBriefNoTokenData(t *testing.T) { } } +func TestSessionTokensAgentBriefClassAwareCostProxy(t *testing.T) { + t.Parallel() + + tokens := &sessionTokensUsage{ + Total: 50_000, + CacheWrite: 6_000, + Output: 3_500, + APICalls: 4, + } + report := sessionTokensReport{ + SessionID: "test-cost-proxy-brief", + Tokens: tokens, + Recommendations: recommendationRules(tokenRecommendationSignals{Tokens: tokens}), + } + + var stdout bytes.Buffer + writeSessionTokensAgentBrief(&stdout, report) + + out := stdout.String() + checks := []string{ + "Session token brief", + "Session: test-cost-proxy-brief", + "Use at most 3 batched reads", + "Avoid broad grep, broad diffs, broad tests", + "keep the answer tight", + "- Cache write/new context pressure is elevated.", + "- Output pressure is elevated.", + } + for _, check := range checks { + if !strings.Contains(out, check) { + t.Errorf("expected %q in output, got:\n%s", check, out) + } + } +} + +func TestCheckpointTokensAgentBriefClassAwareCostProxy(t *testing.T) { + t.Parallel() + + tokens := &sessionTokensUsage{ + Total: 50_000, + CacheWrite: 6_000, + Output: 3_500, + APICalls: 4, + } + report := checkpointTokensReport{ + CheckpointID: "c05e500cafe0", + Tokens: tokens, + Recommendations: recommendationRules(tokenRecommendationSignals{Tokens: tokens}), + } + + var stdout bytes.Buffer + writeCheckpointTokensAgentBrief(&stdout, report) + + out := stdout.String() + checks := []string{ + "Checkpoint token brief", + "Checkpoint: c05e500cafe0", + "Use at most 3 batched reads", + "Avoid broad grep, broad diffs, broad tests", + "keep the answer tight", + "- Cache write/new context pressure is elevated.", + "- Output pressure is elevated.", + } + for _, check := range checks { + if !strings.Contains(out, check) { + t.Errorf("expected %q in output, got:\n%s", check, out) + } + } +} + +func TestCheckpointTokensAgentBriefCombinesOutputAndReplayPressure(t *testing.T) { + t.Parallel() + + tokens := &sessionTokensUsage{ + Total: 10_000_000, + CacheRead: 9_800_000, + Output: 100_000, + APICalls: 25, + } + report := checkpointTokensReport{ + CheckpointID: "c05e501cafe0", + Tokens: tokens, + Recommendations: recommendationRules(tokenRecommendationSignals{Tokens: tokens}), + } + + var stdout bytes.Buffer + writeCheckpointTokensAgentBrief(&stdout, report) + + out := stdout.String() + checks := []string{ + "Use at most 3 batched reads", + "Avoid broad grep, broad diffs, broad tests", + "keep the answer tight", + "- Cache/context replay dominates token volume.", + "- API call count is high for one session.", + "- Output pressure is elevated.", + } + for _, check := range checks { + if !strings.Contains(out, check) { + t.Errorf("expected %q in output, got:\n%s", check, out) + } + } +} + func TestSessionsCmd_TokensSubcommand(t *testing.T) { setupStopTestRepo(t) @@ -1531,6 +1695,21 @@ func TestTokensCmd_JSONAndAgentBriefAreMutuallyExclusive(t *testing.T) { } } +func TestCheckpointTokensCmd_JSONAndAgentBriefAreMutuallyExclusive(t *testing.T) { + t.Parallel() + + cmd := newCheckpointGroupCmd() + cmd.SetArgs([]string{"tokens", "abc123", "--json", "--agent-brief"}) + + err := cmd.ExecuteContext(context.Background()) + if err == nil { + t.Fatal("expected error for --json with --agent-brief") + } + if !strings.Contains(err.Error(), "mutually exclusive") { + t.Fatalf("expected mutually exclusive error, got: %v", err) + } +} + func TestTokensCmd_PrioritizesContextReplayHotspot(t *testing.T) { setupStopTestRepo(t) @@ -1742,6 +1921,96 @@ func TestCheckpointTokensCmd_TextOutputWithRealCheckpointShape(t *testing.T) { } } +func TestCheckpointTokensCmd_AgentBriefGivesOperationalBudget(t *testing.T) { + repo, _ := runExplainAutoTestRepo(t) + ctx := context.Background() + cpID := id.MustCheckpointID("b1efbeefcafe") + if err := checkpoint.NewGitStore(repo, checkpoint.DefaultV1Refs()).WriteCommitted(ctx, checkpoint.WriteCommittedOptions{ + CheckpointID: cpID, + SessionID: "checkpoint-token-brief", + Strategy: strategy.StrategyNameManualCommit, + Branch: "e2e-triage-fix", + Agent: testAgentClaude, + Transcript: redact.AlreadyRedacted([]byte(`{"type":"user","message":{"content":[{"type":"text","text":"why is slack failing"}]}}` + "\n")), + AuthorName: "Test", + AuthorEmail: "test@example.com", + TokenUsage: &agent.TokenUsage{ + InputTokens: 94, + CacheCreationTokens: 122171, + CacheReadTokens: 6052424, + OutputTokens: 38956, + APICallCount: 70, + }, + }); err != nil { + t.Fatalf("WriteCommitted() error = %v", err) + } + + cmd := newCheckpointGroupCmd() + var stdout bytes.Buffer + cmd.SetOut(&stdout) + cmd.SetArgs([]string{"tokens", "b1efbeef", "--agent-brief"}) + + if err := cmd.ExecuteContext(ctx); err != nil { + t.Fatalf("expected no error, got: %v", err) + } + + out := stdout.String() + checks := []string{ + "Checkpoint token brief", + "Checkpoint: b1efbeefcafe", + "Token usage: 6213.6k total; 97.4% cache/context replay; 70 API calls.", + "Next best action:", + "Use at most 3 batched reads before answering.", + "Avoid broad grep, broad diffs, broad tests, and repeated token diagnostics; keep the answer tight.", + "Signals:", + "- Cache/context replay dominates token volume.", + "- API call count is high for one session.", + "- Cache write/new context pressure is elevated.", + "- Output pressure is elevated.", + } + for _, check := range checks { + if !strings.Contains(out, check) { + t.Errorf("expected %q in output, got:\n%s", check, out) + } + } + for _, verboseSection := range []string{"Recommendations", "Likely contributors", "Limitations"} { + if strings.Contains(out, verboseSection) { + t.Fatalf("expected agent brief to omit %s section, got:\n%s", verboseSection, out) + } + } +} + +func TestCheckpointTokensCmd_AgentBriefMissingTokenData(t *testing.T) { + repo, _ := runExplainAutoTestRepo(t) + ctx := context.Background() + store := checkpoint.NewGitStore(repo, checkpoint.DefaultV1Refs()) + cpID := id.MustCheckpointID("deadcafebeef") + writeCommittedTokenCheckpoint(ctx, t, store, cpID, "checkpoint-token-missing-brief", nil) + + cmd := newCheckpointGroupCmd() + var stdout bytes.Buffer + cmd.SetOut(&stdout) + cmd.SetArgs([]string{"tokens", "deadcafe", "--agent-brief"}) + + if err := cmd.ExecuteContext(ctx); err != nil { + t.Fatalf("expected no error, got: %v", err) + } + + out := stdout.String() + checks := []string{ + "Checkpoint token brief", + "Checkpoint: deadcafebeef", + "Token usage: unavailable.", + "Do not spend extra commands on token optimization for this checkpoint.", + "- Token usage is unavailable for this session.", + } + for _, check := range checks { + if !strings.Contains(out, check) { + t.Errorf("expected %q in output, got:\n%s", check, out) + } + } +} + func TestCheckpointTokensCmd_TextOutputWithMultipleSessionsUsesAggregateScope(t *testing.T) { repo, _ := runExplainAutoTestRepo(t) ctx := context.Background() @@ -2049,6 +2318,7 @@ func TestCheckpointTokensCmd_TextOutputWithComparison(t *testing.T) { InputTokens: 200_000, CacheCreationTokens: 50_000, CacheReadTokens: 750_000, + OutputTokens: 10_000, APICallCount: 10, }, }); err != nil { @@ -2065,8 +2335,9 @@ func TestCheckpointTokensCmd_TextOutputWithComparison(t *testing.T) { AuthorEmail: "test@example.com", TokenUsage: &agent.TokenUsage{ InputTokens: 150_000, - CacheCreationTokens: 50_000, + CacheCreationTokens: 25_000, CacheReadTokens: 300_000, + OutputTokens: 25_000, APICallCount: 4, }, }); err != nil { @@ -2091,12 +2362,16 @@ func TestCheckpointTokensCmd_TextOutputWithComparison(t *testing.T) { "Comparison", "Baseline: aaa111bbb222", "Caveat: Total tokens include cache/context replay; use the cache/context replay delta below before treating total direction as work saved or added.", - "Total tokens: down 50% (1000k -> 500k)", + "Total tokens: down 50.5% (1010k -> 500k)", + "Input: down 25% (200k -> 150k)", "Cache/context replay: down 60% (750k -> 300k)", + "Cache write: down 50% (50k -> 25k)", + "Output: up 150% (10k -> 25k)", "API calls: down 60% (10 -> 4)", "Qualification", "Observed total token use decreased for this checkpoint comparison.", "This does not prove quality was preserved", + "Cost-proxy pressure increased for output", } for _, check := range checks { if !strings.Contains(out, check) { @@ -2121,10 +2396,11 @@ func TestCheckpointTokensCmd_JSONOutputWithComparison(t *testing.T) { AuthorName: "Test", AuthorEmail: "test@example.com", TokenUsage: &agent.TokenUsage{ - InputTokens: 100, - CacheReadTokens: 300, - OutputTokens: 100, - APICallCount: 5, + InputTokens: 100, + CacheCreationTokens: 50, + CacheReadTokens: 300, + OutputTokens: 100, + APICallCount: 5, }, }); err != nil { t.Fatalf("WriteCommitted() baseline error = %v", err) @@ -2138,10 +2414,11 @@ func TestCheckpointTokensCmd_JSONOutputWithComparison(t *testing.T) { AuthorName: "Test", AuthorEmail: "test@example.com", TokenUsage: &agent.TokenUsage{ - InputTokens: 120, - CacheReadTokens: 480, - OutputTokens: 200, - APICallCount: 8, + InputTokens: 120, + CacheCreationTokens: 80, + CacheReadTokens: 480, + OutputTokens: 200, + APICallCount: 8, }, }); err != nil { t.Fatalf("WriteCommitted() current error = %v", err) @@ -2175,11 +2452,11 @@ func TestCheckpointTokensCmd_JSONOutputWithComparison(t *testing.T) { if result.Comparison.Total == nil { t.Fatalf("expected total delta, got nil") } - if result.Comparison.Total.Baseline != 500 || result.Comparison.Total.Current != 800 { + if result.Comparison.Total.Baseline != 550 || result.Comparison.Total.Current != 880 { t.Fatalf("unexpected total delta: %+v", result.Comparison.Total) } - if result.Comparison.Total.Change != 300 { - t.Fatalf("expected total change 300, got %+v", result.Comparison.Total) + if result.Comparison.Total.Change != 330 { + t.Fatalf("expected total change 330, got %+v", result.Comparison.Total) } if result.Comparison.Total.Direction != checkpointDeltaDirectionUp { t.Fatalf("expected total direction up, got %+v", result.Comparison.Total) @@ -2190,6 +2467,65 @@ func TestCheckpointTokensCmd_JSONOutputWithComparison(t *testing.T) { if result.Comparison.CacheReadCaveat == "" { t.Fatalf("expected cache read caveat, got %+v", result.Comparison) } + if result.Comparison.Input == nil || result.Comparison.Input.Change != 20 { + t.Fatalf("expected input change 20, got %+v", result.Comparison.Input) + } + if result.Comparison.CacheWrite == nil || result.Comparison.CacheWrite.Change != 30 { + t.Fatalf("expected cache write change 30, got %+v", result.Comparison.CacheWrite) + } + if result.Comparison.Output == nil || result.Comparison.Output.Change != 100 { + t.Fatalf("expected output change 100, got %+v", result.Comparison.Output) + } +} + +func TestCheckpointTokensCmd_JSONComparisonQualifiesCostProxyPressure(t *testing.T) { + repo, _ := runExplainAutoTestRepo(t) + ctx := context.Background() + store := checkpoint.NewGitStore(repo, checkpoint.DefaultV1Refs()) + baselineID := id.MustCheckpointID("c0a111c0a111") + currentID := id.MustCheckpointID("c0a222c0a222") + + writeCommittedTokenCheckpoint(ctx, t, store, baselineID, "checkpoint-token-cost-proxy-baseline", &agent.TokenUsage{ + InputTokens: 100_000, + CacheReadTokens: 100_000, + APICallCount: 6, + }) + writeCommittedTokenCheckpoint(ctx, t, store, currentID, "checkpoint-token-cost-proxy-current", &agent.TokenUsage{ + InputTokens: 50_000, + CacheCreationTokens: 30_000, + OutputTokens: 30_000, + APICallCount: 4, + }) + + cmd := newCheckpointGroupCmd() + var stdout bytes.Buffer + cmd.SetOut(&stdout) + cmd.SetArgs([]string{"tokens", "c0a222", "--compare", "c0a111", "--json"}) + + if err := cmd.ExecuteContext(ctx); err != nil { + t.Fatalf("expected no error, got: %v", err) + } + + var result checkpointTokensReport + if err := json.Unmarshal(stdout.Bytes(), &result); err != nil { + t.Fatalf("expected valid JSON, got parse error: %v\noutput: %s", err, stdout.String()) + } + if result.Comparison == nil { + t.Fatalf("expected comparison, got nil") + } + if result.Comparison.Status != checkpointComparisonStatusObservedReduction { + t.Fatalf("expected observed reduction, got %q", result.Comparison.Status) + } + checks := []string{ + "Cost-proxy pressure increased", + "cache write", + "output", + } + for _, check := range checks { + if !strings.Contains(result.Comparison.Qualification, check) { + t.Fatalf("expected %q in qualification, got %q", check, result.Comparison.Qualification) + } + } } func TestCheckpointTokensCmd_ComparisonNoChange(t *testing.T) { diff --git a/cmd/entire/cli/strategy/session_state.go b/cmd/entire/cli/strategy/session_state.go index 4302c06c8a..84675c3302 100644 --- a/cmd/entire/cli/strategy/session_state.go +++ b/cmd/entire/cli/strategy/session_state.go @@ -138,20 +138,40 @@ func FindMostRecentSession(ctx context.Context) string { } // Scope to current worktree to prevent cross-worktree pollution. - worktreePath, wpErr := paths.WorktreeRoot(ctx) - if wpErr == nil && worktreePath != "" { - var filtered []*SessionState - for _, s := range states { - if s.WorktreePath == worktreePath { - filtered = append(filtered, s) - } - } - if len(filtered) > 0 { - states = filtered + if filtered := sessionStatesForCurrentWorktree(ctx, states); len(filtered) > 0 { + states = filtered + // If no sessions match the worktree, fall back to all sessions. + } + + return mostRecentSessionID(states) +} + +// FindMostRecentSessionInCurrentWorktree returns the most recently interacted +// session from the current worktree only. Unlike FindMostRecentSession, it does +// not fall back to sessions from other worktrees. +func FindMostRecentSessionInCurrentWorktree(ctx context.Context) string { + states, err := ListSessionStates(ctx) + if err != nil || len(states) == 0 { + return "" + } + return mostRecentSessionID(sessionStatesForCurrentWorktree(ctx, states)) +} + +func sessionStatesForCurrentWorktree(ctx context.Context, states []*SessionState) []*SessionState { + worktreePath, err := paths.WorktreeRoot(ctx) + if err != nil || worktreePath == "" { + return nil + } + filtered := make([]*SessionState, 0, len(states)) + for _, s := range states { + if s.WorktreePath == worktreePath { + filtered = append(filtered, s) } - // If no sessions match the worktree, fall back to all sessions } + return filtered +} +func mostRecentSessionID(states []*SessionState) string { var best *SessionState for _, s := range states { if s.LastInteractionTime == nil { From 53bff7759e019cd3fd1c7571ae565f1f0aeaaef8 Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Wed, 17 Jun 2026 19:27:09 -0400 Subject: [PATCH 19/26] Tighten agent token brief stop gate Entire-Checkpoint: 1051aa7485b2 --- cmd/entire/cli/session_tokens.go | 2 +- cmd/entire/cli/sessions_test.go | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/cmd/entire/cli/session_tokens.go b/cmd/entire/cli/session_tokens.go index 965c1881ee..b6223f150b 100644 --- a/cmd/entire/cli/session_tokens.go +++ b/cmd/entire/cli/session_tokens.go @@ -528,7 +528,7 @@ func agentBriefOptimizationAction(report sessionTokensReport) (string, bool) { case hasTokenRecommendation(report, "api-call-amplification"): return agentBriefCostProxyBatchAction, true case hasTokenRecommendation(report, "context-replay-hotspot"): - return "Use at most 2 focused reads after summarizing known findings, then answer. Avoid broad grep, broad diffs, and broad tests.", true + return "Use at most 2 focused reads only if a named file or test can change the answer; otherwise answer now. Avoid broad grep, broad diffs, and broad tests.", true case hasTokenRecommendation(report, "subagent-heavy"): return "Do not launch broad subagents. Use one narrowly scoped check with a concrete expected output.", true case hasTokenRecommendation(report, "high-context-pressure"): diff --git a/cmd/entire/cli/sessions_test.go b/cmd/entire/cli/sessions_test.go index 8ce4b6cdde..603d2997e1 100644 --- a/cmd/entire/cli/sessions_test.go +++ b/cmd/entire/cli/sessions_test.go @@ -1328,6 +1328,7 @@ func TestTokensCmd_AgentBriefPrioritizesNextAction(t *testing.T) { "Token usage: 6213.6k total; 97.4% cache/context replay; 70 API calls.", "Next best action:", "Use at most 3 batched reads before answering.", + "Continue only if a named file or test can change the verdict; otherwise answer now.", "Avoid broad grep, broad diffs, broad tests, and repeated token diagnostics; keep the answer tight.", "Signals:", "- Cache/context replay dominates token volume.", @@ -1396,7 +1397,7 @@ func TestTokensCmd_AgentBriefHighCacheReplayWithoutHighAPICalls(t *testing.T) { out := stdout.String() checks := []string{ "Token usage: 637.7k total; 95.5% cache/context replay; 3 API calls.", - "Use at most 2 focused reads after summarizing known findings, then answer.", + "Use at most 2 focused reads only if a named file or test can change the answer; otherwise answer now.", "Avoid broad grep, broad diffs, and broad tests.", "- Cache/context replay dominates token volume.", } @@ -1516,6 +1517,7 @@ func TestSessionTokensAgentBriefClassAwareCostProxy(t *testing.T) { "Session: test-cost-proxy-brief", "Use at most 3 batched reads", "Avoid broad grep, broad diffs, broad tests", + "otherwise answer now", "keep the answer tight", "- Cache write/new context pressure is elevated.", "- Output pressure is elevated.", @@ -1551,6 +1553,7 @@ func TestCheckpointTokensAgentBriefClassAwareCostProxy(t *testing.T) { "Checkpoint: c05e500cafe0", "Use at most 3 batched reads", "Avoid broad grep, broad diffs, broad tests", + "otherwise answer now", "keep the answer tight", "- Cache write/new context pressure is elevated.", "- Output pressure is elevated.", @@ -1961,6 +1964,7 @@ func TestCheckpointTokensCmd_AgentBriefGivesOperationalBudget(t *testing.T) { "Token usage: 6213.6k total; 97.4% cache/context replay; 70 API calls.", "Next best action:", "Use at most 3 batched reads before answering.", + "Continue only if a named file or test can change the verdict; otherwise answer now.", "Avoid broad grep, broad diffs, broad tests, and repeated token diagnostics; keep the answer tight.", "Signals:", "- Cache/context replay dominates token volume.", From 9102855b2a2e5c6018ceef6035b71d60d4f8ab65 Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Thu, 18 Jun 2026 00:58:47 -0400 Subject: [PATCH 20/26] dedupe token plural helper Entire-Checkpoint: 1869a59c64b1 --- cmd/entire/cli/tokens_profile.go | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/cmd/entire/cli/tokens_profile.go b/cmd/entire/cli/tokens_profile.go index 16813458c0..5255ce21ba 100644 --- a/cmd/entire/cli/tokens_profile.go +++ b/cmd/entire/cli/tokens_profile.go @@ -355,10 +355,10 @@ func tokensProfileLimitations(report tokensProfileReport) []string { limitations = append(limitations, "No committed checkpoints found.") } if report.MissingTokenData > 0 { - limitations = append(limitations, fmt.Sprintf("%d checkpoint%s did not include token usage.", report.MissingTokenData, pluralSuffix(report.MissingTokenData))) + limitations = append(limitations, fmt.Sprintf("%d checkpoint%s did not include token usage.", report.MissingTokenData, tokenPluralSuffix(report.MissingTokenData))) } if report.MetadataReadWarnings > 0 { - limitations = append(limitations, fmt.Sprintf("%d checkpoint%s had incomplete session metadata; profile used root token summaries or readable sessions where available.", report.MetadataReadWarnings, pluralSuffix(report.MetadataReadWarnings))) + limitations = append(limitations, fmt.Sprintf("%d checkpoint%s had incomplete session metadata; profile used root token summaries or readable sessions where available.", report.MetadataReadWarnings, tokenPluralSuffix(report.MetadataReadWarnings))) } if report.Tokens != nil { limitations = append(limitations, "Token totals are summed from analyzed checkpoints and may include overlapping checkpoint history; treat them as checkpoint-observed volume, not guaranteed unique session spend.") @@ -406,17 +406,10 @@ func writeTokensProfileSignals(w io.Writer, signals []tokensProfileSignal) { fmt.Fprintln(w) fmt.Fprintln(w, "Repeated signals") for _, signal := range signals { - fmt.Fprintf(w, "- %s: %d checkpoint%s", signal.Label, signal.Count, pluralSuffix(signal.Count)) + fmt.Fprintf(w, "- %s: %d checkpoint%s", signal.Label, signal.Count, tokenPluralSuffix(signal.Count)) if signal.Percent > 0 { fmt.Fprintf(w, " (%d%%)", signal.Percent) } fmt.Fprintln(w) } } - -func pluralSuffix(count int) string { - if count == 1 { - return "" - } - return "s" -} From 956e0fb3d013945bc91a79e5217ba2a0f8dde950 Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Thu, 18 Jun 2026 18:11:03 -0400 Subject: [PATCH 21/26] reject checkpoint token self-comparison Entire-Checkpoint: caff12e75277 --- cmd/entire/cli/checkpoint_tokens.go | 4 ++++ cmd/entire/cli/sessions_test.go | 29 +++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/cmd/entire/cli/checkpoint_tokens.go b/cmd/entire/cli/checkpoint_tokens.go index 4293d3b564..2705129935 100644 --- a/cmd/entire/cli/checkpoint_tokens.go +++ b/cmd/entire/cli/checkpoint_tokens.go @@ -117,6 +117,10 @@ func runCheckpointTokens(ctx context.Context, cmd *cobra.Command, checkpointIDPr if err != nil { return tokenCommandError(err) } + if baselineReport.CheckpointID == report.CheckpointID { + cmd.SilenceUsage = true + return fmt.Errorf("cannot compare checkpoint %s to itself", report.CheckpointID) + } report.Comparison = buildCheckpointTokensComparison(report, baselineReport) } diff --git a/cmd/entire/cli/sessions_test.go b/cmd/entire/cli/sessions_test.go index 603d2997e1..fc1428cd4c 100644 --- a/cmd/entire/cli/sessions_test.go +++ b/cmd/entire/cli/sessions_test.go @@ -2384,6 +2384,35 @@ func TestCheckpointTokensCmd_TextOutputWithComparison(t *testing.T) { } } +func TestCheckpointTokensCmd_RejectsSelfComparison(t *testing.T) { + repo, _ := runExplainAutoTestRepo(t) + ctx := context.Background() + store := checkpoint.NewGitStore(repo, checkpoint.DefaultV1Refs()) + cpID := id.MustCheckpointID("abc222abc222") + + writeCommittedTokenCheckpoint(ctx, t, store, cpID, "checkpoint-token-self-compare", &agent.TokenUsage{ + InputTokens: 100, + OutputTokens: 50, + APICallCount: 1, + }) + + cmd := newCheckpointGroupCmd() + var stdout bytes.Buffer + cmd.SetOut(&stdout) + cmd.SetArgs([]string{"tokens", "abc222", "--compare", "abc222abc222"}) + + err := cmd.ExecuteContext(ctx) + if err == nil { + t.Fatal("expected self-comparison error, got nil") + } + if !strings.Contains(err.Error(), "cannot compare checkpoint abc222abc222 to itself") { + t.Fatalf("expected self-comparison error, got: %v", err) + } + if stdout.Len() != 0 { + t.Fatalf("expected no report output for self-comparison, got:\n%s", stdout.String()) + } +} + func TestCheckpointTokensCmd_JSONOutputWithComparison(t *testing.T) { repo, _ := runExplainAutoTestRepo(t) ctx := context.Background() From 823823157f0d2e9fa1f28420695c81d27faa1b40 Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Wed, 17 Jun 2026 13:05:17 -0400 Subject: [PATCH 22/26] Surface token diagnostics in labs Entire-Checkpoint: 642c25c2fd0c --- cmd/entire/cli/labs.go | 66 +++++++++++++++++++------------- cmd/entire/cli/labs_test.go | 30 +++++++++------ cmd/entire/cli/tokens_profile.go | 5 ++- 3 files changed, 61 insertions(+), 40 deletions(-) diff --git a/cmd/entire/cli/labs.go b/cmd/entire/cli/labs.go index 6143b9c633..3933515f07 100644 --- a/cmd/entire/cli/labs.go +++ b/cmd/entire/cli/labs.go @@ -9,51 +9,61 @@ import ( ) type experimentalCommandInfo struct { - Name string - Invocation string - Summary string + CommandPath []string + Invocation string + Summary string } var experimentalCommands = []experimentalCommandInfo{ { - Name: "review", - Invocation: "entire review", - Summary: "Run configured review skills against the current branch", + CommandPath: []string{"review"}, + Invocation: "entire review", + Summary: "Run configured review skills against the current branch", }, { - Name: "investigate", - Invocation: "entire investigate", - Summary: "Run a multi-agent investigation against a topic, issue, or seed doc", + CommandPath: []string{"investigate"}, + Invocation: "entire investigate", + Summary: "Run a multi-agent investigation against a topic, issue, or seed doc", }, { - Name: "org", - Invocation: "entire org", - Summary: "Manage Entire organizations (create, list)", + CommandPath: []string{"tokens"}, + Invocation: "entire tokens", + Summary: "Analyze experimental token usage diagnostics", }, { - Name: "project", - Invocation: "entire project", - Summary: "Manage Entire projects (create, list)", + CommandPath: []string{"tokens", "profile"}, + Invocation: "entire tokens profile", + Summary: "Aggregate token usage across committed checkpoints", }, { - Name: "repo", - Invocation: "entire repo", - Summary: "Manage Entire repositories (create, list, get, delete)", + CommandPath: []string{"org"}, + Invocation: "entire org", + Summary: "Manage Entire organizations (create, list)", }, { - Name: "grant", - Invocation: "entire grant", - Summary: "Manage access grants and org membership (org, project, repo)", + CommandPath: []string{"project"}, + Invocation: "entire project", + Summary: "Manage Entire projects (create, list)", }, { - Name: "blame", - Invocation: "entire blame", - Summary: "Show which lines came from Entire checkpoints", + CommandPath: []string{"repo"}, + Invocation: "entire repo", + Summary: "Manage Entire repositories (create, list, get, delete)", }, { - Name: "why", - Invocation: "entire why", - Summary: "Show why a line exists (commit, checkpoint, prompt, session)", + CommandPath: []string{"grant"}, + Invocation: "entire grant", + Summary: "Manage access grants and org membership (org, project, repo)", + }, + { + CommandPath: []string{"blame"}, + Invocation: "entire blame", + Summary: "Show which lines came from Entire checkpoints", + }, + { + CommandPath: []string{"why"}, + Invocation: "entire why", + Summary: "Show why a line exists (commit, checkpoint, prompt, session)", }, } @@ -97,6 +107,8 @@ Available experimental commands: Try: entire review --help entire investigate --help + entire tokens --help + entire tokens profile --help entire org --help entire project --help entire repo --help diff --git a/cmd/entire/cli/labs_test.go b/cmd/entire/cli/labs_test.go index 7ec74b7f70..8121fa9e7f 100644 --- a/cmd/entire/cli/labs_test.go +++ b/cmd/entire/cli/labs_test.go @@ -26,6 +26,9 @@ func TestLabsCmd_PrintsExperimentalCommandList(t *testing.T) { "Available experimental commands", "entire review", "entire review --help", + "entire tokens", + "entire tokens profile", + "entire tokens profile --help", } { if !strings.Contains(got, want) { t.Fatalf("entire labs output missing %q:\n%s", want, got) @@ -93,8 +96,13 @@ func TestRootHelp_ShowsLabsButHidesReview(t *testing.T) { if !strings.Contains(got, "labs") || !strings.Contains(got, "Explore experimental Entire workflows") { t.Fatalf("root help should include labs command, got:\n%s", got) } - if strings.Contains(got, "review") { - t.Fatalf("root help should not include review while it is listed in labs, got:\n%s", got) + for _, hiddenExperimentalCommand := range []string{ + "review", + "tokens Analyze token usage across sessions and checkpoints", + } { + if strings.Contains(got, hiddenExperimentalCommand) { + t.Fatalf("root help should not include %q while it is listed in labs, got:\n%s", hiddenExperimentalCommand, got) + } } } @@ -143,12 +151,12 @@ func TestRenderExperimentalCommands_ColumnWidthAdjustsToLongest(t *testing.T) { t.Parallel() short := []experimentalCommandInfo{ - {Name: "a", Invocation: "entire a", Summary: "first"}, - {Name: "b", Invocation: "entire b", Summary: "second"}, + {Invocation: "entire a", Summary: "first"}, + {Invocation: "entire b", Summary: "second"}, } long := []experimentalCommandInfo{ - {Name: "a", Invocation: "entire a", Summary: "first"}, - {Name: "verylongcommand", Invocation: "entire verylongcommand", Summary: "second"}, + {Invocation: "entire a", Summary: "first"}, + {Invocation: "entire verylongcommand", Summary: "second"}, } shortCol := summaryColumns(t, short)[0] @@ -173,8 +181,8 @@ func TestRenderExperimentalCommands_MultiByteInvocationAligns(t *testing.T) { // padding, len("entire ▶▶") == 13 >= 12 would skip padding and misalign the // row; rune-based padding correctly adds 3 spaces. commands := []experimentalCommandInfo{ - {Name: "long", Invocation: "entire aaaaa", Summary: "first"}, - {Name: "multibyte", Invocation: "entire ▶▶", Summary: "second"}, + {Invocation: "entire aaaaa", Summary: "first"}, + {Invocation: "entire ▶▶", Summary: "second"}, } if got := len("entire ▶▶"); got < 12 { @@ -192,12 +200,12 @@ func TestLabsRegistryCommandsExistAtCanonicalPaths(t *testing.T) { root := NewRootCmd() for _, info := range experimentalCommands { - cmd, _, err := root.Find([]string{info.Name}) + cmd, _, err := root.Find(info.CommandPath) if err != nil { - t.Fatalf("labs command %q should exist at canonical path: %v", info.Name, err) + t.Fatalf("labs command %q should exist at canonical path: %v", info.Invocation, err) } if cmd == nil { - t.Fatalf("labs command %q resolved to nil command", info.Name) + t.Fatalf("labs command %q resolved to nil command", info.Invocation) } } } diff --git a/cmd/entire/cli/tokens_profile.go b/cmd/entire/cli/tokens_profile.go index 5255ce21ba..c888db7506 100644 --- a/cmd/entire/cli/tokens_profile.go +++ b/cmd/entire/cli/tokens_profile.go @@ -51,8 +51,9 @@ const tokensProfileUsageScopeCheckpointObserved = "checkpoint_observed" func newTokensGroupCmd() *cobra.Command { cmd := &cobra.Command{ - Use: "tokens", - Short: "Analyze token usage across sessions and checkpoints", + Use: "tokens", + Short: "Analyze token usage across sessions and checkpoints", + Hidden: true, Long: `Analyze token usage across sessions and checkpoints. Commands: From 9b84620cecfaa8ab58e12bd47849d66b51ce1ce4 Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Mon, 22 Jun 2026 17:31:37 -0400 Subject: [PATCH 23/26] avoid token profile percentage overflow Entire-Checkpoint: 1bbc15069665 --- cmd/entire/cli/tokens_profile.go | 2 +- cmd/entire/cli/tokens_profile_test.go | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/cmd/entire/cli/tokens_profile.go b/cmd/entire/cli/tokens_profile.go index c888db7506..9437670aac 100644 --- a/cmd/entire/cli/tokens_profile.go +++ b/cmd/entire/cli/tokens_profile.go @@ -230,7 +230,7 @@ func addTokensProfileTokenSignals(signals map[string]*tokensProfileSignal, check if tokens.APICalls >= 20 { addTokensProfileSignal(signals, "api-call-amplification", checkpointID, denominator) } - if tokens.Total > 0 && tokens.SubagentTotal*100 >= tokens.Total*10 { + if tokenShareAtLeastOneTenth(tokens.SubagentTotal, tokens.Total) { addTokensProfileSignal(signals, "subagent-heavy", checkpointID, denominator) } } diff --git a/cmd/entire/cli/tokens_profile_test.go b/cmd/entire/cli/tokens_profile_test.go index 83d29e45e6..c8582d3122 100644 --- a/cmd/entire/cli/tokens_profile_test.go +++ b/cmd/entire/cli/tokens_profile_test.go @@ -14,6 +14,21 @@ import ( "github.com/entireio/cli/redact" ) +func TestAddTokensProfileTokenSignalsSubagentHeavyAvoidsOverflow(t *testing.T) { + t.Parallel() + + maxInt := int(^uint(0) >> 1) + signals := map[string]*tokensProfileSignal{} + addTokensProfileTokenSignals(signals, id.MustCheckpointID("999aaa000001"), &sessionTokensUsage{ + Total: maxInt, + SubagentTotal: maxInt, + }, 1) + + if signals["subagent-heavy"] == nil { + t.Fatalf("expected subagent-heavy signal, got %+v", signals) + } +} + func TestTokensProfileCmd_TextOutputAggregatesCommittedCheckpoints(t *testing.T) { repo, _ := runExplainAutoTestRepo(t) ctx := context.Background() From 62c5cc25a4fc8fec932ed23ebf5e86170294367b Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Tue, 23 Jun 2026 10:55:27 -0400 Subject: [PATCH 24/26] Align token profile diagnostics --- cmd/entire/cli/checkpoint_tokens.go | 16 ++++++++--- cmd/entire/cli/sessions_test.go | 41 +++++++++++++++++++++++++++++ cmd/entire/cli/tokens_profile.go | 9 +------ 3 files changed, 54 insertions(+), 12 deletions(-) diff --git a/cmd/entire/cli/checkpoint_tokens.go b/cmd/entire/cli/checkpoint_tokens.go index 2705129935..94cd48e88d 100644 --- a/cmd/entire/cli/checkpoint_tokens.go +++ b/cmd/entire/cli/checkpoint_tokens.go @@ -213,10 +213,7 @@ func buildCheckpointTokensReport(cpID id.CheckpointID, summary *checkpoint.Check report.Branch = firstCheckpointBranch(metas) } - usage := aggregateCheckpointTokenUsage(metas) - if summary != nil && summary.TokenUsage != nil && (usage == nil || metadataWarnings > 0) { - usage = summary.TokenUsage - } + usage := checkpointTokenUsage(summary, metas, metadataWarnings > 0) if tokens := buildSessionTokensUsage(usage); tokens != nil { report.Tokens = tokens if tokens.SubagentTotal > 0 { @@ -349,6 +346,17 @@ func aggregateCheckpointTokenUsage(metas []*checkpoint.CommittedMetadata) *agent return total } +func checkpointTokenUsage(summary *checkpoint.CheckpointSummary, metas []*checkpoint.CommittedMetadata, metadataReadWarning bool) *agent.TokenUsage { + sessionUsage := aggregateCheckpointTokenUsage(metas) + if !metadataReadWarning && sessionUsage != nil { + return sessionUsage + } + if summary != nil && summary.TokenUsage != nil { + return summary.TokenUsage + } + return sessionUsage +} + func addCheckpointTokenUsage(a, b *agent.TokenUsage) *agent.TokenUsage { if a == nil && b == nil { return nil diff --git a/cmd/entire/cli/sessions_test.go b/cmd/entire/cli/sessions_test.go index fc1428cd4c..e6199e9dfc 100644 --- a/cmd/entire/cli/sessions_test.go +++ b/cmd/entire/cli/sessions_test.go @@ -1815,6 +1815,47 @@ func TestTokensCmd_DefaultsToCurrentSession(t *testing.T) { } } +func TestTokensCmd_CurrentDoesNotFallbackToOtherWorktree(t *testing.T) { + setupStopTestRepo(t) + + ctx := context.Background() + now := time.Now() + other := makeSessionState("test-tokens-other-only", session.PhaseActive) + other.WorktreePath = testOtherWorktreePath + other.LastInteractionTime = &now + other.TokenUsage = &agent.TokenUsage{InputTokens: 9999} + + if err := strategy.SaveSessionState(ctx, other); err != nil { + t.Fatalf("SaveSessionState() error = %v", err) + } + + defaultCmd := newTokensCmd() + var defaultStdout bytes.Buffer + defaultCmd.SetOut(&defaultStdout) + defaultCmd.SetArgs([]string{}) + if err := defaultCmd.ExecuteContext(ctx); err != nil { + t.Fatalf("expected default command to fall back, got: %v", err) + } + if !strings.Contains(defaultStdout.String(), "Session: test-tokens-other-only") { + t.Fatalf("expected default command to fall back to other worktree session, got:\n%s", defaultStdout.String()) + } + + currentCmd := newTokensCmd() + var currentStdout bytes.Buffer + currentCmd.SetOut(¤tStdout) + currentCmd.SetArgs([]string{"--current"}) + if err := currentCmd.ExecuteContext(ctx); err != nil { + t.Fatalf("expected --current command to avoid fallback without error, got: %v", err) + } + out := currentStdout.String() + if !strings.Contains(out, "No active session found in this worktree.") { + t.Fatalf("expected --current command to report no current worktree session, got:\n%s", out) + } + if strings.Contains(out, "test-tokens-other-only") { + t.Fatalf("expected --current command not to fall back to other worktree, got:\n%s", out) + } +} + func TestTokensCmd_CurrentAndSessionIDAreMutuallyExclusive(t *testing.T) { setupStopTestRepo(t) diff --git a/cmd/entire/cli/tokens_profile.go b/cmd/entire/cli/tokens_profile.go index 9437670aac..1d71e1d0cf 100644 --- a/cmd/entire/cli/tokens_profile.go +++ b/cmd/entire/cli/tokens_profile.go @@ -210,14 +210,7 @@ func tokensProfileCheckpointUsage(ctx context.Context, store *checkpoint.GitStor } metas = append(metas, meta) } - sessionUsage := aggregateCheckpointTokenUsage(metas) - if !metadataReadWarning && sessionUsage != nil { - return sessionUsage, false, nil - } - if summary.TokenUsage != nil { - return summary.TokenUsage, metadataReadWarning, nil - } - return sessionUsage, metadataReadWarning, nil + return checkpointTokenUsage(summary, metas, metadataReadWarning), metadataReadWarning, nil } func addTokensProfileTokenSignals(signals map[string]*tokensProfileSignal, checkpointID id.CheckpointID, tokens *sessionTokensUsage, denominator int) { From b7a0bf6f3c920c8d0db81b5dbebdbcaaabc47cac Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Tue, 23 Jun 2026 15:49:01 -0400 Subject: [PATCH 25/26] Gate checkpoint context on true session count --- cmd/entire/cli/checkpoint_tokens.go | 2 +- cmd/entire/cli/sessions_test.go | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/cmd/entire/cli/checkpoint_tokens.go b/cmd/entire/cli/checkpoint_tokens.go index 8c6e9bdcb7..9342a00134 100644 --- a/cmd/entire/cli/checkpoint_tokens.go +++ b/cmd/entire/cli/checkpoint_tokens.go @@ -230,7 +230,7 @@ func buildCheckpointTokensReport(cpID id.CheckpointID, summary *checkpoint.Check var turnCount int var skillEvents []agent.SkillEvent - if len(metas) == 1 && metas[0] != nil { + if report.SessionCount == 1 && len(metas) == 1 && metas[0] != nil { meta := metas[0] if metrics := meta.SessionMetrics; metrics != nil { turnCount = metrics.TurnCount diff --git a/cmd/entire/cli/sessions_test.go b/cmd/entire/cli/sessions_test.go index cd6b944ef5..1089998f58 100644 --- a/cmd/entire/cli/sessions_test.go +++ b/cmd/entire/cli/sessions_test.go @@ -1890,6 +1890,10 @@ func TestCheckpointTokensReport_UsesRootSummaryWhenSessionMetadataIncomplete(t * TokenUsage: &agent.TokenUsage{ InputTokens: 100, }, + SessionMetrics: &checkpoint.SessionMetrics{ + ContextTokens: 9000, + ContextWindowSize: 10000, + }, }, }, 1, @@ -1904,6 +1908,14 @@ func TestCheckpointTokensReport_UsesRootSummaryWhenSessionMetadataIncomplete(t * if report.SessionID != "" || report.Agent != "" || report.Model != "" { t.Fatalf("expected multi-session checkpoint to omit singular session fields, got session_id=%q agent=%q model=%q", report.SessionID, report.Agent, report.Model) } + if report.Context != nil { + t.Fatalf("expected multi-session checkpoint to omit singular context, got %+v", report.Context) + } + for _, contributor := range report.Contributors { + if contributor.Kind == "context_pressure" { + t.Fatalf("expected multi-session checkpoint to omit singular context contributor, got %+v", report.Contributors) + } + } if len(report.Limitations) == 0 || !strings.Contains(report.Limitations[0], "1 checkpoint session metadata file could not be read") { t.Fatalf("expected incomplete metadata limitation, got %+v", report.Limitations) } From 20af48cbecf2d533be5fa7ba640e0df25ef43c81 Mon Sep 17 00:00:00 2001 From: Peyton Montei Date: Tue, 23 Jun 2026 16:40:17 -0400 Subject: [PATCH 26/26] Align token profile replay denominator --- cmd/entire/cli/tokens_profile.go | 3 ++- cmd/entire/cli/tokens_profile_test.go | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/cmd/entire/cli/tokens_profile.go b/cmd/entire/cli/tokens_profile.go index 1d71e1d0cf..ad98990108 100644 --- a/cmd/entire/cli/tokens_profile.go +++ b/cmd/entire/cli/tokens_profile.go @@ -217,7 +217,8 @@ func addTokensProfileTokenSignals(signals map[string]*tokensProfileSignal, check if tokens == nil { return } - if tokens.Total > 0 && tokenPercent(tokens.CacheRead, tokens.Total) >= 80 { + topLevelTotal := topLevelSessionTokenTotal(tokens) + if topLevelTotal > 0 && tokenPercent(tokens.CacheRead, topLevelTotal) >= recommendationHighCacheReadPercent { addTokensProfileSignal(signals, "context-replay-hotspot", checkpointID, denominator) } if tokens.APICalls >= 20 { diff --git a/cmd/entire/cli/tokens_profile_test.go b/cmd/entire/cli/tokens_profile_test.go index c8582d3122..178ff7a854 100644 --- a/cmd/entire/cli/tokens_profile_test.go +++ b/cmd/entire/cli/tokens_profile_test.go @@ -29,6 +29,25 @@ func TestAddTokensProfileTokenSignalsSubagentHeavyAvoidsOverflow(t *testing.T) { } } +func TestAddTokensProfileTokenSignalsCacheReplayUsesTopLevelTokenTotal(t *testing.T) { + t.Parallel() + + signals := map[string]*tokensProfileSignal{} + addTokensProfileTokenSignals(signals, id.MustCheckpointID("999aaa000002"), &sessionTokensUsage{ + Total: 10000, + Input: 100, + CacheRead: 800, + CacheWrite: 50, + Output: 50, + APICalls: 20, + SubagentTotal: 9000, + }, 1) + + if signals["context-replay-hotspot"] == nil { + t.Fatalf("expected context-replay-hotspot signal, got %+v", signals) + } +} + func TestTokensProfileCmd_TextOutputAggregatesCommittedCheckpoints(t *testing.T) { repo, _ := runExplainAutoTestRepo(t) ctx := context.Background()