diff --git a/context-crystallizer/lib/context-analyzer.sh b/context-crystallizer/lib/context-analyzer.sh index 591d52d..09d4e4b 100755 --- a/context-crystallizer/lib/context-analyzer.sh +++ b/context-crystallizer/lib/context-analyzer.sh @@ -15,27 +15,65 @@ analyze_context() { echo '{"error": "transcript not found"}' return 1 fi - - # Get most recent MAIN AGENT message with usage - # CRITICAL: Filter for REAL model messages (claude-*), not synthetic placeholders - # Synthetic messages have model:"" with all-zero usage data - local USAGE_LINE USAGE - USAGE_LINE=$(tac "$TRANSCRIPT" 2>/dev/null | grep -m1 '"model":"claude-[^"]*".*"usage"' || echo "") - + + # --- Compact boundary handling ---------------------------------------- + # Claude Code writes a `{"type":"system","subtype":"compact_boundary", ...}` + # entry to the transcript every time the session is compacted. Entries + # before the boundary describe context that is no longer live, so using + # their usage numbers overstates the current window. + # + # Strategy: + # 1. Find the line number of the LAST compact_boundary (if any). + # 2. Scan only entries AFTER that line for the most recent claude-* + # usage — this is the authoritative post-compact total. + # 3. If no post-boundary claude-* usage exists yet (the first turn + # after a compact hasn't completed), fall back to the boundary's + # `compactMetadata.postTokens` as the effective TOTAL. + # 4. If no boundary entry exists, behaviour is unchanged: full-file + # scan for the most recent claude-* usage. + # --------------------------------------------------------------------- + local BOUNDARY_LINE USAGE_LINE USAGE + BOUNDARY_LINE=$(grep -n '"subtype":"compact_boundary"' "$TRANSCRIPT" 2>/dev/null | tail -n1 | cut -d: -f1) + + if [[ -n "$BOUNDARY_LINE" ]]; then + # Scan post-boundary entries, newest-first, for the latest claude-* usage. + # Fall back to postTokens from the boundary if none exists yet. + USAGE_LINE=$(tail -n +$((BOUNDARY_LINE + 1)) "$TRANSCRIPT" 2>/dev/null | tac | grep -m1 '"model":"claude-[^"]*".*"usage"' || echo "") + else + USAGE_LINE=$(tac "$TRANSCRIPT" 2>/dev/null | grep -m1 '"model":"claude-[^"]*".*"usage"' || echo "") + fi + USAGE=$(echo "$USAGE_LINE" | jq -c '.message.usage // empty' 2>/dev/null || echo "") - - if [[ -z "$USAGE" ]]; then + + local INPUT CACHE_CREATE CACHE_READ OUTPUT TOTAL PERCENT ACTION + if [[ -n "$USAGE" ]]; then + INPUT=$(echo "$USAGE" | jq -r '.input_tokens // 0') + CACHE_CREATE=$(echo "$USAGE" | jq -r '.cache_creation_input_tokens // 0') + CACHE_READ=$(echo "$USAGE" | jq -r '.cache_read_input_tokens // 0') + OUTPUT=$(echo "$USAGE" | jq -r '.output_tokens // 0') + TOTAL=$((INPUT + CACHE_CREATE + CACHE_READ)) + elif [[ -n "$BOUNDARY_LINE" ]]; then + # No post-boundary assistant turn yet — use the boundary's postTokens. + # Synthetic breakdown: the post-compact context is a fresh summary (not + # a cache-hit), so attribute the whole total to INPUT rather than + # CACHE_READ. The per-field breakdown is approximate; only TOTAL is + # load-bearing for downstream callers. + local POST_TOKENS + POST_TOKENS=$(sed -n "${BOUNDARY_LINE}p" "$TRANSCRIPT" 2>/dev/null | jq -r '.compactMetadata.postTokens // 0' 2>/dev/null) + if [[ -n "$POST_TOKENS" && "$POST_TOKENS" != "0" && "$POST_TOKENS" != "null" ]]; then + INPUT="$POST_TOKENS" + CACHE_CREATE=0 + CACHE_READ=0 + OUTPUT=0 + TOTAL="$POST_TOKENS" + else + echo '{"error": "no usage data", "tokens": 0, "percent": 0, "action": "none"}' + return 0 + fi + else echo '{"error": "no usage data", "tokens": 0, "percent": 0, "action": "none"}' return 0 fi - - local INPUT CACHE_CREATE CACHE_READ OUTPUT TOTAL PERCENT ACTION - INPUT=$(echo "$USAGE" | jq -r '.input_tokens // 0') - CACHE_CREATE=$(echo "$USAGE" | jq -r '.cache_creation_input_tokens // 0') - CACHE_READ=$(echo "$USAGE" | jq -r '.cache_read_input_tokens // 0') - OUTPUT=$(echo "$USAGE" | jq -r '.output_tokens // 0') - - TOTAL=$((INPUT + CACHE_CREATE + CACHE_READ)) RAW_PERCENT=$(echo "scale=1; ($TOTAL * 100) / $CONTEXT_LIMIT" | bc) # Apply calibration offset to align with Claude Code's native meter PERCENT=$(echo "scale=1; $RAW_PERCENT + $CALIBRATION_OFFSET" | bc) diff --git a/tests/regression/test_analyze_context_compact_boundary.sh b/tests/regression/test_analyze_context_compact_boundary.sh new file mode 100755 index 0000000..5e28105 --- /dev/null +++ b/tests/regression/test_analyze_context_compact_boundary.sh @@ -0,0 +1,130 @@ +#!/usr/bin/env bash +# test_analyze_context_compact_boundary.sh — regression test for issue #567. +# +# After a `/compact`, Claude Code writes a `{"type":"system", +# "subtype":"compact_boundary", ...}` entry to the transcript. Entries before +# the boundary describe context that is no longer live; the analyzer must +# ignore them when computing current token usage. +# +# Three scenarios covered: +# 1. Compact boundary present + a post-boundary claude-* assistant turn: +# the analyzer uses the post-boundary turn's usage, NOT the last +# pre-boundary turn's usage. +# 2. Compact boundary present + NO post-boundary turn yet: +# the analyzer falls back to `compactMetadata.postTokens` from the +# boundary entry. +# 3. No compact boundary: +# behaviour is unchanged — last claude-* usage in the file wins. +# +# No jq/python/node deps — bash + the analyzer library only. + +set -uo pipefail + +REPO_DIR="$(cd "$(dirname "$0")/../.." && pwd)" +ANALYZER="$REPO_DIR/context-crystallizer/lib/context-analyzer.sh" + +FAILS=0 +TMPDIR_TEST=$(mktemp -d) +trap 'rm -rf "$TMPDIR_TEST"' EXIT + +fail() { + echo " [FAIL] $*" + FAILS=$((FAILS + 1)) +} +pass() { echo " [PASS] $*"; } + +echo "test_analyze_context_compact_boundary (#567)" +echo "──────────────────────────────────────────" + +if [[ ! -f "$ANALYZER" ]]; then + fail "analyzer not found at $ANALYZER" + exit 1 +fi + +# shellcheck source=/dev/null +source "$ANALYZER" + +# A synthetic assistant-turn line with a specific cache_read value. +# The analyzer sums input + cache_create + cache_read → TOTAL, so cache_read +# dominates for these fixtures. +make_usage_line() { + local cache_read="$1" + printf '{"type":"assistant","message":{"model":"claude-opus-4-7","usage":{"input_tokens":1,"cache_creation_input_tokens":0,"cache_read_input_tokens":%s,"output_tokens":10}}}\n' "$cache_read" +} + +make_boundary_line() { + local post_tokens="$1" + printf '{"type":"system","subtype":"compact_boundary","compactMetadata":{"trigger":"manual","preTokens":250000,"postTokens":%s,"durationMs":1000}}\n' "$post_tokens" +} + +# --- Scenario 1: boundary + post-boundary turn ------------------------------- +# Pre-boundary turn at 219k cache_read (the "stale" value); post-boundary turn +# at 115k. Analyzer MUST return ~115k, not ~219k. +T1="$TMPDIR_TEST/boundary_with_post_turn.jsonl" +{ + make_usage_line 219000 + make_boundary_line 21000 + make_usage_line 115000 +} >"$T1" + +RESULT=$(analyze_context "$T1" 200000 2>/dev/null | grep -o '"total": *[0-9]*' | head -n1 | tr -d ' ' | cut -d: -f2) +if [[ "$RESULT" -ge 110000 && "$RESULT" -le 120000 ]]; then + pass "boundary + post-turn → TOTAL=$RESULT (expected ~115k)" +else + fail "boundary + post-turn → TOTAL=$RESULT (expected ~115k; pre-boundary leak would be ~219k)" +fi + +# --- Scenario 2: boundary + no post-boundary turn ---------------------------- +# Pre-boundary turn at 219k, boundary's postTokens=21946, no turn after. +# Analyzer MUST fall back to postTokens (21946), not use the pre-boundary value. +T2="$TMPDIR_TEST/boundary_no_post_turn.jsonl" +{ + make_usage_line 219000 + make_boundary_line 21946 +} >"$T2" + +RESULT=$(analyze_context "$T2" 200000 2>/dev/null | grep -o '"total": *[0-9]*' | head -n1 | tr -d ' ' | cut -d: -f2) +if [[ "$RESULT" == "21946" ]]; then + pass "boundary + no post-turn → TOTAL=$RESULT (expected 21946 from postTokens)" +else + fail "boundary + no post-turn → TOTAL=$RESULT (expected 21946 from postTokens)" +fi + +# --- Scenario 3: no boundary ------------------------------------------------ +# No boundary entry — analyzer uses last claude-* usage in the file. +T3="$TMPDIR_TEST/no_boundary.jsonl" +{ + make_usage_line 50000 + make_usage_line 95000 +} >"$T3" + +RESULT=$(analyze_context "$T3" 200000 2>/dev/null | grep -o '"total": *[0-9]*' | head -n1 | tr -d ' ' | cut -d: -f2) +if [[ "$RESULT" -ge 90000 && "$RESULT" -le 100000 ]]; then + pass "no boundary → TOTAL=$RESULT (expected ~95k, last turn in file)" +else + fail "no boundary → TOTAL=$RESULT (expected ~95k)" +fi + +# --- Scenario 4: boundary + no post-turn + missing postTokens ---------------- +# Defensive: if postTokens is missing or 0, analyzer returns no-usage error +# rather than silently under-reporting. +T4="$TMPDIR_TEST/boundary_missing_posttokens.jsonl" +{ + make_usage_line 150000 + printf '{"type":"system","subtype":"compact_boundary","compactMetadata":{"trigger":"manual","preTokens":150000,"durationMs":1000}}\n' +} >"$T4" + +OUT=$(analyze_context "$T4" 200000 2>/dev/null) +if echo "$OUT" | grep -q '"error": *"no usage data"'; then + pass "boundary + no post-turn + missing postTokens → returns no-usage error" +else + fail "boundary + no post-turn + missing postTokens → unexpected: $OUT" +fi + +echo "" +if [[ "$FAILS" -gt 0 ]]; then + echo " $FAILS failure(s)" + exit 1 +fi +echo " all scenarios passed" +exit 0