Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 55 additions & 17 deletions context-crystallizer/lib/context-analyzer.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,27 +15,65 @@ analyze_context() {
echo '{"error": "transcript not found"}'
return 1
fi

# Get most recent MAIN AGENT message with usage
# CRITICAL: Filter for REAL model messages (claude-*), not synthetic placeholders
# Synthetic messages have model:"<synthetic>" with all-zero usage data
local USAGE_LINE USAGE
USAGE_LINE=$(tac "$TRANSCRIPT" 2>/dev/null | grep -m1 '"model":"claude-[^"]*".*"usage"' || echo "")


# --- Compact boundary handling ----------------------------------------
# Claude Code writes a `{"type":"system","subtype":"compact_boundary", ...}`
# entry to the transcript every time the session is compacted. Entries
# before the boundary describe context that is no longer live, so using
# their usage numbers overstates the current window.
#
# Strategy:
# 1. Find the line number of the LAST compact_boundary (if any).
# 2. Scan only entries AFTER that line for the most recent claude-*
# usage — this is the authoritative post-compact total.
# 3. If no post-boundary claude-* usage exists yet (the first turn
# after a compact hasn't completed), fall back to the boundary's
# `compactMetadata.postTokens` as the effective TOTAL.
# 4. If no boundary entry exists, behaviour is unchanged: full-file
# scan for the most recent claude-* usage.
# ---------------------------------------------------------------------
local BOUNDARY_LINE USAGE_LINE USAGE
BOUNDARY_LINE=$(grep -n '"subtype":"compact_boundary"' "$TRANSCRIPT" 2>/dev/null | tail -n1 | cut -d: -f1)

if [[ -n "$BOUNDARY_LINE" ]]; then
# Scan post-boundary entries, newest-first, for the latest claude-* usage.
# Fall back to postTokens from the boundary if none exists yet.
USAGE_LINE=$(tail -n +$((BOUNDARY_LINE + 1)) "$TRANSCRIPT" 2>/dev/null | tac | grep -m1 '"model":"claude-[^"]*".*"usage"' || echo "")
else
USAGE_LINE=$(tac "$TRANSCRIPT" 2>/dev/null | grep -m1 '"model":"claude-[^"]*".*"usage"' || echo "")
fi

USAGE=$(echo "$USAGE_LINE" | jq -c '.message.usage // empty' 2>/dev/null || echo "")

if [[ -z "$USAGE" ]]; then

local INPUT CACHE_CREATE CACHE_READ OUTPUT TOTAL PERCENT ACTION
if [[ -n "$USAGE" ]]; then
INPUT=$(echo "$USAGE" | jq -r '.input_tokens // 0')
CACHE_CREATE=$(echo "$USAGE" | jq -r '.cache_creation_input_tokens // 0')
CACHE_READ=$(echo "$USAGE" | jq -r '.cache_read_input_tokens // 0')
OUTPUT=$(echo "$USAGE" | jq -r '.output_tokens // 0')
TOTAL=$((INPUT + CACHE_CREATE + CACHE_READ))
elif [[ -n "$BOUNDARY_LINE" ]]; then
# No post-boundary assistant turn yet — use the boundary's postTokens.
# Synthetic breakdown: the post-compact context is a fresh summary (not
# a cache-hit), so attribute the whole total to INPUT rather than
# CACHE_READ. The per-field breakdown is approximate; only TOTAL is
# load-bearing for downstream callers.
local POST_TOKENS
POST_TOKENS=$(sed -n "${BOUNDARY_LINE}p" "$TRANSCRIPT" 2>/dev/null | jq -r '.compactMetadata.postTokens // 0' 2>/dev/null)
if [[ -n "$POST_TOKENS" && "$POST_TOKENS" != "0" && "$POST_TOKENS" != "null" ]]; then
INPUT="$POST_TOKENS"
CACHE_CREATE=0
CACHE_READ=0
OUTPUT=0
TOTAL="$POST_TOKENS"
else
echo '{"error": "no usage data", "tokens": 0, "percent": 0, "action": "none"}'
return 0
fi
else
echo '{"error": "no usage data", "tokens": 0, "percent": 0, "action": "none"}'
return 0
fi

local INPUT CACHE_CREATE CACHE_READ OUTPUT TOTAL PERCENT ACTION
INPUT=$(echo "$USAGE" | jq -r '.input_tokens // 0')
CACHE_CREATE=$(echo "$USAGE" | jq -r '.cache_creation_input_tokens // 0')
CACHE_READ=$(echo "$USAGE" | jq -r '.cache_read_input_tokens // 0')
OUTPUT=$(echo "$USAGE" | jq -r '.output_tokens // 0')

TOTAL=$((INPUT + CACHE_CREATE + CACHE_READ))
RAW_PERCENT=$(echo "scale=1; ($TOTAL * 100) / $CONTEXT_LIMIT" | bc)
# Apply calibration offset to align with Claude Code's native meter
PERCENT=$(echo "scale=1; $RAW_PERCENT + $CALIBRATION_OFFSET" | bc)
Expand Down
130 changes: 130 additions & 0 deletions tests/regression/test_analyze_context_compact_boundary.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
#!/usr/bin/env bash
# test_analyze_context_compact_boundary.sh — regression test for issue #567.
#
# After a `/compact`, Claude Code writes a `{"type":"system",
# "subtype":"compact_boundary", ...}` entry to the transcript. Entries before
# the boundary describe context that is no longer live; the analyzer must
# ignore them when computing current token usage.
#
# Three scenarios covered:
# 1. Compact boundary present + a post-boundary claude-* assistant turn:
# the analyzer uses the post-boundary turn's usage, NOT the last
# pre-boundary turn's usage.
# 2. Compact boundary present + NO post-boundary turn yet:
# the analyzer falls back to `compactMetadata.postTokens` from the
# boundary entry.
# 3. No compact boundary:
# behaviour is unchanged — last claude-* usage in the file wins.
#
# No jq/python/node deps — bash + the analyzer library only.

set -uo pipefail

REPO_DIR="$(cd "$(dirname "$0")/../.." && pwd)"
ANALYZER="$REPO_DIR/context-crystallizer/lib/context-analyzer.sh"

FAILS=0
TMPDIR_TEST=$(mktemp -d)
trap 'rm -rf "$TMPDIR_TEST"' EXIT

fail() {
echo " [FAIL] $*"
FAILS=$((FAILS + 1))
}
pass() { echo " [PASS] $*"; }

echo "test_analyze_context_compact_boundary (#567)"
echo "──────────────────────────────────────────"

if [[ ! -f "$ANALYZER" ]]; then
fail "analyzer not found at $ANALYZER"
exit 1
fi

# shellcheck source=/dev/null
source "$ANALYZER"

# A synthetic assistant-turn line with a specific cache_read value.
# The analyzer sums input + cache_create + cache_read → TOTAL, so cache_read
# dominates for these fixtures.
make_usage_line() {
local cache_read="$1"
printf '{"type":"assistant","message":{"model":"claude-opus-4-7","usage":{"input_tokens":1,"cache_creation_input_tokens":0,"cache_read_input_tokens":%s,"output_tokens":10}}}\n' "$cache_read"
}

make_boundary_line() {
local post_tokens="$1"
printf '{"type":"system","subtype":"compact_boundary","compactMetadata":{"trigger":"manual","preTokens":250000,"postTokens":%s,"durationMs":1000}}\n' "$post_tokens"
}

# --- Scenario 1: boundary + post-boundary turn -------------------------------
# Pre-boundary turn at 219k cache_read (the "stale" value); post-boundary turn
# at 115k. Analyzer MUST return ~115k, not ~219k.
T1="$TMPDIR_TEST/boundary_with_post_turn.jsonl"
{
make_usage_line 219000
make_boundary_line 21000
make_usage_line 115000
} >"$T1"

RESULT=$(analyze_context "$T1" 200000 2>/dev/null | grep -o '"total": *[0-9]*' | head -n1 | tr -d ' ' | cut -d: -f2)
if [[ "$RESULT" -ge 110000 && "$RESULT" -le 120000 ]]; then
pass "boundary + post-turn → TOTAL=$RESULT (expected ~115k)"
else
fail "boundary + post-turn → TOTAL=$RESULT (expected ~115k; pre-boundary leak would be ~219k)"
fi

# --- Scenario 2: boundary + no post-boundary turn ----------------------------
# Pre-boundary turn at 219k, boundary's postTokens=21946, no turn after.
# Analyzer MUST fall back to postTokens (21946), not use the pre-boundary value.
T2="$TMPDIR_TEST/boundary_no_post_turn.jsonl"
{
make_usage_line 219000
make_boundary_line 21946
} >"$T2"

RESULT=$(analyze_context "$T2" 200000 2>/dev/null | grep -o '"total": *[0-9]*' | head -n1 | tr -d ' ' | cut -d: -f2)
if [[ "$RESULT" == "21946" ]]; then
pass "boundary + no post-turn → TOTAL=$RESULT (expected 21946 from postTokens)"
else
fail "boundary + no post-turn → TOTAL=$RESULT (expected 21946 from postTokens)"
fi

# --- Scenario 3: no boundary ------------------------------------------------
# No boundary entry — analyzer uses last claude-* usage in the file.
T3="$TMPDIR_TEST/no_boundary.jsonl"
{
make_usage_line 50000
make_usage_line 95000
} >"$T3"

RESULT=$(analyze_context "$T3" 200000 2>/dev/null | grep -o '"total": *[0-9]*' | head -n1 | tr -d ' ' | cut -d: -f2)
if [[ "$RESULT" -ge 90000 && "$RESULT" -le 100000 ]]; then
pass "no boundary → TOTAL=$RESULT (expected ~95k, last turn in file)"
else
fail "no boundary → TOTAL=$RESULT (expected ~95k)"
fi

# --- Scenario 4: boundary + no post-turn + missing postTokens ----------------
# Defensive: if postTokens is missing or 0, analyzer returns no-usage error
# rather than silently under-reporting.
T4="$TMPDIR_TEST/boundary_missing_posttokens.jsonl"
{
make_usage_line 150000
printf '{"type":"system","subtype":"compact_boundary","compactMetadata":{"trigger":"manual","preTokens":150000,"durationMs":1000}}\n'
} >"$T4"

OUT=$(analyze_context "$T4" 200000 2>/dev/null)
if echo "$OUT" | grep -q '"error": *"no usage data"'; then
pass "boundary + no post-turn + missing postTokens → returns no-usage error"
else
fail "boundary + no post-turn + missing postTokens → unexpected: $OUT"
fi

echo ""
if [[ "$FAILS" -gt 0 ]]; then
echo " $FAILS failure(s)"
exit 1
fi
echo " all scenarios passed"
exit 0
Loading