Wave-Engineering · bakeb7j0 · May 2, 2026 · May 2, 2026
diff --git a/context-crystallizer/lib/context-analyzer.sh b/context-crystallizer/lib/context-analyzer.sh
@@ -15,27 +15,65 @@ analyze_context() {
         echo '{"error": "transcript not found"}'
         return 1
     fi
-
-    # Get most recent MAIN AGENT message with usage
-    # CRITICAL: Filter for REAL model messages (claude-*), not synthetic placeholders
-    # Synthetic messages have model:"<synthetic>" with all-zero usage data
-    local USAGE_LINE USAGE
-    USAGE_LINE=$(tac "$TRANSCRIPT" 2>/dev/null | grep -m1 '"model":"claude-[^"]*".*"usage"' || echo "")
-
+
+    # --- Compact boundary handling ----------------------------------------
+    # Claude Code writes a `{"type":"system","subtype":"compact_boundary", ...}`
+    # entry to the transcript every time the session is compacted. Entries
+    # before the boundary describe context that is no longer live, so using
+    # their usage numbers overstates the current window.
+    #
+    # Strategy:
+    #   1. Find the line number of the LAST compact_boundary (if any).
+    #   2. Scan only entries AFTER that line for the most recent claude-*
+    #      usage — this is the authoritative post-compact total.
+    #   3. If no post-boundary claude-* usage exists yet (the first turn
+    #      after a compact hasn't completed), fall back to the boundary's
+    #      `compactMetadata.postTokens` as the effective TOTAL.
+    #   4. If no boundary entry exists, behaviour is unchanged: full-file
+    #      scan for the most recent claude-* usage.
+    # ---------------------------------------------------------------------
+    local BOUNDARY_LINE USAGE_LINE USAGE
+    BOUNDARY_LINE=$(grep -n '"subtype":"compact_boundary"' "$TRANSCRIPT" 2>/dev/null | tail -n1 | cut -d: -f1)
+
+    if [[ -n "$BOUNDARY_LINE" ]]; then
+        # Scan post-boundary entries, newest-first, for the latest claude-* usage.
+        # Fall back to postTokens from the boundary if none exists yet.
+        USAGE_LINE=$(tail -n +$((BOUNDARY_LINE + 1)) "$TRANSCRIPT" 2>/dev/null | tac | grep -m1 '"model":"claude-[^"]*".*"usage"' || echo "")
+    else
+        USAGE_LINE=$(tac "$TRANSCRIPT" 2>/dev/null | grep -m1 '"model":"claude-[^"]*".*"usage"' || echo "")
+    fi
+
     USAGE=$(echo "$USAGE_LINE" | jq -c '.message.usage // empty' 2>/dev/null || echo "")
-
-    if [[ -z "$USAGE" ]]; then
+
+    local INPUT CACHE_CREATE CACHE_READ OUTPUT TOTAL PERCENT ACTION
+    if [[ -n "$USAGE" ]]; then
+        INPUT=$(echo "$USAGE" | jq -r '.input_tokens // 0')
+        CACHE_CREATE=$(echo "$USAGE" | jq -r '.cache_creation_input_tokens // 0')
+        CACHE_READ=$(echo "$USAGE" | jq -r '.cache_read_input_tokens // 0')
+        OUTPUT=$(echo "$USAGE" | jq -r '.output_tokens // 0')
+        TOTAL=$((INPUT + CACHE_CREATE + CACHE_READ))
+    elif [[ -n "$BOUNDARY_LINE" ]]; then
+        # No post-boundary assistant turn yet — use the boundary's postTokens.
+        # Synthetic breakdown: the post-compact context is a fresh summary (not
+        # a cache-hit), so attribute the whole total to INPUT rather than
+        # CACHE_READ. The per-field breakdown is approximate; only TOTAL is
+        # load-bearing for downstream callers.
+        local POST_TOKENS
+        POST_TOKENS=$(sed -n "${BOUNDARY_LINE}p" "$TRANSCRIPT" 2>/dev/null | jq -r '.compactMetadata.postTokens // 0' 2>/dev/null)
+        if [[ -n "$POST_TOKENS" && "$POST_TOKENS" != "0" && "$POST_TOKENS" != "null" ]]; then
+            INPUT="$POST_TOKENS"
+            CACHE_CREATE=0
+            CACHE_READ=0
+            OUTPUT=0
+            TOTAL="$POST_TOKENS"
+        else
+            echo '{"error": "no usage data", "tokens": 0, "percent": 0, "action": "none"}'
+            return 0
+        fi
+    else
         echo '{"error": "no usage data", "tokens": 0, "percent": 0, "action": "none"}'
         return 0
     fi
-
-    local INPUT CACHE_CREATE CACHE_READ OUTPUT TOTAL PERCENT ACTION
-    INPUT=$(echo "$USAGE" | jq -r '.input_tokens // 0')
-    CACHE_CREATE=$(echo "$USAGE" | jq -r '.cache_creation_input_tokens // 0')
-    CACHE_READ=$(echo "$USAGE" | jq -r '.cache_read_input_tokens // 0')
-    OUTPUT=$(echo "$USAGE" | jq -r '.output_tokens // 0')
-
-    TOTAL=$((INPUT + CACHE_CREATE + CACHE_READ))
     RAW_PERCENT=$(echo "scale=1; ($TOTAL * 100) / $CONTEXT_LIMIT" | bc)
     # Apply calibration offset to align with Claude Code's native meter
     PERCENT=$(echo "scale=1; $RAW_PERCENT + $CALIBRATION_OFFSET" | bc)

diff --git a/tests/regression/test_analyze_context_compact_boundary.sh b/tests/regression/test_analyze_context_compact_boundary.sh
@@ -0,0 +1,130 @@
+#!/usr/bin/env bash
+# test_analyze_context_compact_boundary.sh — regression test for issue #567.
+#
+# After a `/compact`, Claude Code writes a `{"type":"system",
+# "subtype":"compact_boundary", ...}` entry to the transcript. Entries before
+# the boundary describe context that is no longer live; the analyzer must
+# ignore them when computing current token usage.
+#
+# Three scenarios covered:
+#   1. Compact boundary present + a post-boundary claude-* assistant turn:
+#      the analyzer uses the post-boundary turn's usage, NOT the last
+#      pre-boundary turn's usage.
+#   2. Compact boundary present + NO post-boundary turn yet:
+#      the analyzer falls back to `compactMetadata.postTokens` from the
+#      boundary entry.
+#   3. No compact boundary:
+#      behaviour is unchanged — last claude-* usage in the file wins.
+#
+# No jq/python/node deps — bash + the analyzer library only.
+
+set -uo pipefail
+
+REPO_DIR="$(cd "$(dirname "$0")/../.." && pwd)"
+ANALYZER="$REPO_DIR/context-crystallizer/lib/context-analyzer.sh"
+
+FAILS=0
+TMPDIR_TEST=$(mktemp -d)
+trap 'rm -rf "$TMPDIR_TEST"' EXIT
+
+fail() {
+	echo "  [FAIL] $*"
+	FAILS=$((FAILS + 1))
+}
+pass() { echo "  [PASS] $*"; }
+
+echo "test_analyze_context_compact_boundary (#567)"
+echo "──────────────────────────────────────────"
+
+if [[ ! -f "$ANALYZER" ]]; then
+	fail "analyzer not found at $ANALYZER"
+	exit 1
+fi
+
+# shellcheck source=/dev/null
+source "$ANALYZER"
+
+# A synthetic assistant-turn line with a specific cache_read value.
+# The analyzer sums input + cache_create + cache_read → TOTAL, so cache_read
+# dominates for these fixtures.
+make_usage_line() {
+	local cache_read="$1"
+	printf '{"type":"assistant","message":{"model":"claude-opus-4-7","usage":{"input_tokens":1,"cache_creation_input_tokens":0,"cache_read_input_tokens":%s,"output_tokens":10}}}\n' "$cache_read"
+}
+
+make_boundary_line() {
+	local post_tokens="$1"
+	printf '{"type":"system","subtype":"compact_boundary","compactMetadata":{"trigger":"manual","preTokens":250000,"postTokens":%s,"durationMs":1000}}\n' "$post_tokens"
+}
+
+# --- Scenario 1: boundary + post-boundary turn -------------------------------
+# Pre-boundary turn at 219k cache_read (the "stale" value); post-boundary turn
+# at 115k. Analyzer MUST return ~115k, not ~219k.
+T1="$TMPDIR_TEST/boundary_with_post_turn.jsonl"
+{
+	make_usage_line 219000
+	make_boundary_line 21000
+	make_usage_line 115000
+} >"$T1"
+
+RESULT=$(analyze_context "$T1" 200000 2>/dev/null | grep -o '"total": *[0-9]*' | head -n1 | tr -d ' ' | cut -d: -f2)
+if [[ "$RESULT" -ge 110000 && "$RESULT" -le 120000 ]]; then
+	pass "boundary + post-turn → TOTAL=$RESULT (expected ~115k)"
+else
+	fail "boundary + post-turn → TOTAL=$RESULT (expected ~115k; pre-boundary leak would be ~219k)"
+fi
+
+# --- Scenario 2: boundary + no post-boundary turn ----------------------------
+# Pre-boundary turn at 219k, boundary's postTokens=21946, no turn after.
+# Analyzer MUST fall back to postTokens (21946), not use the pre-boundary value.
+T2="$TMPDIR_TEST/boundary_no_post_turn.jsonl"
+{
+	make_usage_line 219000
+	make_boundary_line 21946
+} >"$T2"
+
+RESULT=$(analyze_context "$T2" 200000 2>/dev/null | grep -o '"total": *[0-9]*' | head -n1 | tr -d ' ' | cut -d: -f2)
+if [[ "$RESULT" == "21946" ]]; then
+	pass "boundary + no post-turn → TOTAL=$RESULT (expected 21946 from postTokens)"
+else
+	fail "boundary + no post-turn → TOTAL=$RESULT (expected 21946 from postTokens)"
+fi
+
+# --- Scenario 3: no boundary ------------------------------------------------
+# No boundary entry — analyzer uses last claude-* usage in the file.
+T3="$TMPDIR_TEST/no_boundary.jsonl"
+{
+	make_usage_line 50000
+	make_usage_line 95000
+} >"$T3"
+
+RESULT=$(analyze_context "$T3" 200000 2>/dev/null | grep -o '"total": *[0-9]*' | head -n1 | tr -d ' ' | cut -d: -f2)
+if [[ "$RESULT" -ge 90000 && "$RESULT" -le 100000 ]]; then
+	pass "no boundary → TOTAL=$RESULT (expected ~95k, last turn in file)"
+else
+	fail "no boundary → TOTAL=$RESULT (expected ~95k)"
+fi
+
+# --- Scenario 4: boundary + no post-turn + missing postTokens ----------------
+# Defensive: if postTokens is missing or 0, analyzer returns no-usage error
+# rather than silently under-reporting.
+T4="$TMPDIR_TEST/boundary_missing_posttokens.jsonl"
+{
+	make_usage_line 150000
+	printf '{"type":"system","subtype":"compact_boundary","compactMetadata":{"trigger":"manual","preTokens":150000,"durationMs":1000}}\n'
+} >"$T4"
+
+OUT=$(analyze_context "$T4" 200000 2>/dev/null)
+if echo "$OUT" | grep -q '"error": *"no usage data"'; then
+	pass "boundary + no post-turn + missing postTokens → returns no-usage error"
+else
+	fail "boundary + no post-turn + missing postTokens → unexpected: $OUT"
+fi
+
+echo ""
+if [[ "$FAILS" -gt 0 ]]; then
+	echo "  $FAILS failure(s)"
+	exit 1
+fi
+echo "  all scenarios passed"
+exit 0