diff --git a/AGENTS.md b/AGENTS.md
index 3e7c825..3cbfbf8 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -21,7 +21,7 @@ bun run generate-report.ts
 bun run generate-report.ts results/result-2024-12-07-14-30-45.json
 
 # Run unit tests for lib modules
-bun run test:self
+bun test
 
 # Run TypeScript type checking
 bun tsc --noEmit
@@ -68,6 +68,9 @@ MCP integration is configured via the interactive CLI at runtime. Options:
 │   ├── report.ts               # Report generation orchestration
 │   ├── report-template.ts      # HTML report template generation
 │   ├── report-styles.ts        # CSS styles for HTML reports
+│   ├── token-cache.ts          # Token cache simulation for cost estimation
+│   ├── utils.ts                # Utility functions (sanitization, cost calculation, etc.)
+│   ├── utils.test.ts           # Unit tests for utility functions
 │   └── tools/
 │       ├── index.ts            # Tool exports
 │       ├── result-write.ts     # ResultWrite tool for final output
@@ -149,8 +152,75 @@ Key functions:
 
 - `extractPricingFromGatewayModel()`: Parse gateway model pricing
 - `buildPricingMap()`: Build lookup map from gateway models
+- `lookupPricingFromMap()`: Find pricing for a specific model
 - `calculateCost()`: Calculate total cost from token usage
 - `formatCost()` / `formatMTokCost()`: Format costs for display
+- `getModelPricingDisplay()`: Convert per-token costs to per-MTok for display
+
+### Token Cache Simulation
+
+The `lib/token-cache.ts` module simulates prompt caching behavior:
+
+**TokenCache Class:**
+
+- Models growing prefix cache across multiple API calls
+- Tracks cache hits, cache writes, and output tokens
+- Calculates simulated costs using cache read/write rates
+- Default rates: 10% for reads, 125% for writes (if not specified in pricing)
+
+**Cache Behavior Model:**
+
+1. Each test runs in its own context (cache resets between tests)
+2. Step 1's input is written to cache (pays cache creation rate)
+3. Each subsequent step:
+   - Previous step's full input is cached (pays cache read rate)
+   - New tokens extend the cache (pays cache creation rate)
+4. The cache prefix grows with each step
+
+**simulateCacheSavings()** (in `lib/utils.ts`):
+
+- Estimates cost savings with prompt caching enabled
+- Returns `simulatedCostWithCache`, `cacheHits`, and `cacheWriteTokens`
+- Results displayed in HTML report as "Cache Simulation" section
+- Shows potential savings compared to actual cost without caching
+
+### Utility Functions
+
+The `lib/utils.ts` module provides core utilities:
+
+- `sanitizeModelName()`: Convert model IDs to filesystem-safe names
+- `getTimestampedFilename()`: Generate timestamped filenames with optional model suffix
+- `isHttpUrl()`: Check if string is HTTP/HTTPS URL
+- `extractResultWriteContent()`: Extract component code from agent steps
+- `calculateTotalCost()`: Aggregate token usage and costs across all tests
+- `buildAgentPrompt()`: Build user message array from test definition
+- `simulateCacheSavings()`: Simulate cache savings using growing prefix model
+
+### Reference Verification
+
+The `lib/verify-references.ts` module verifies reference implementations:
+
+**Key Functions:**
+
+- `loadTestDefinitions()`: Discover test suites in `tests/` directory
+- `copyReferenceToComponent()`: Copy Reference.svelte to Component.svelte temporarily
+- `cleanupComponent()`: Remove temporary Component.svelte file
+- `runTest()`: Execute tests and collect detailed results
+- `printSummary()`: Display verification results summary
+- `verifyAllReferences()`: Main function that orchestrates entire verification workflow
+
+**Workflow:**
+
+1. Discover all test suites with Reference.svelte
+2. For each test:
+   - Copy Reference.svelte → Component.svelte
+   - Run vitest against the test
+   - Collect pass/fail results
+   - Cleanup Component.svelte
+3. Print summary of all results
+4. Return exit code (0 for success, 1 for failures)
+
+Used by `verify-references.ts` script accessible via `bun run verify-tests`.
 
 ### Key Technologies
 
@@ -185,9 +255,10 @@ The project uses `@ai-sdk/mcp` with a custom patch applied via `patch-package`:
    f. Test results are collected (pass/fail, error details)
    g. Output directory is cleaned up
 5. Results aggregated with pricing calculations
-6. Results written to `results/result-YYYY-MM-DD-HH-MM-SS.json`
-7. HTML report generated at `results/result-YYYY-MM-DD-HH-MM-SS.html`
-8. Report automatically opens in default browser
+6. Cache simulation estimates potential savings
+7. Results written to `results/result-YYYY-MM-DD-HH-MM-SS.json`
+8. HTML report generated at `results/result-YYYY-MM-DD-HH-MM-SS.html`
+9. Report automatically opens in default browser
 
 ### Output Files
 
@@ -227,7 +298,8 @@ All results are saved in the `results/` directory with timestamped filenames:
     "pricing": {
       "inputCostPerMTok": 3,
       "outputCostPerMTok": 15,
-      "cacheReadCostPerMTok": 0.3
+      "cacheReadCostPerMTok": 0.3,
+      "cacheCreationCostPerMTok": 3.75
     },
     "totalCost": {
       "inputCost": 0.003,
@@ -237,6 +309,11 @@ All results are saved in the `results/` directory with timestamped filenames:
       "inputTokens": 1000,
       "outputTokens": 1000,
       "cachedInputTokens": 1000
+    },
+    "cacheSimulation": {
+      "simulatedCostWithCache": 0.015,
+      "cacheHits": 2000,
+      "cacheWriteTokens": 1500
     }
   }
 }
@@ -251,8 +328,9 @@ Unit tests for library modules are in `lib/*.test.ts`:
 - `lib/output-test-runner.test.ts` - Output directory management
 - `lib/tools/result-write.test.ts` - ResultWrite tool behavior
 - `lib/tools/test-component.test.ts` - TestComponent tool behavior
+- `lib/utils.test.ts` - Utility functions, cost calculation, cache simulation
 
-Run unit tests with: `bun run test:self`
+Run unit tests with: `bun test`
 
 ## TypeScript Configuration
 
@@ -277,5 +355,10 @@ Run unit tests with: `bun run test:self`
 - All result files are saved with timestamps to preserve historical benchmarks
 - MCP integration can be configured via interactive CLI without code changes
 - MCP status is clearly indicated in both the JSON metadata and HTML report with a visual badge
+- Cache simulation shows estimated savings if prompt caching were enabled
 - Exit code is 0 if all tests pass, 1 if any tests fail
 - Pricing is fetched from Vercel AI Gateway model metadata at runtime
+
+## Important notes
+
+Always run `bun run tsc` and `bun test` before completing work to make sure the TypeScript types and tests work.
diff --git a/index.ts b/index.ts
index b4a4bb3..99c68b0 100644
--- a/index.ts
+++ b/index.ts
@@ -2,21 +2,16 @@ import { Experimental_Agent as Agent, hasToolCall, stepCountIs } from "ai";
 import { experimental_createMCPClient as createMCPClient } from "./node_modules/@ai-sdk/mcp/dist/index.mjs";
 import { Experimental_StdioMCPTransport as StdioMCPTransport } from "./node_modules/@ai-sdk/mcp/dist/mcp-stdio/index.mjs";
 import { writeFileSync, mkdirSync, existsSync } from "node:fs";
-import {
-  generateReport,
-  type SingleTestResult,
-} from "./lib/report.ts";
+import { generateReport, type SingleTestResult } from "./lib/report.ts";
 import {
   getTimestampedFilename,
   isHttpUrl,
   extractResultWriteContent,
   calculateTotalCost,
-} from "./lib/utils.ts";
-import {
-  discoverTests,
   buildAgentPrompt,
-  type TestDefinition,
-} from "./lib/test-discovery.ts";
+  simulateCacheSavings,
+} from "./lib/utils.ts";
+import { discoverTests, type TestDefinition } from "./lib/test-discovery.ts";
 import {
   setupOutputsDirectory,
   cleanupOutputsDirectory,
@@ -30,8 +25,6 @@ import {
   getModelPricingDisplay,
   formatCost,
   formatMTokCost,
-  type ModelPricingLookup,
-  type GatewayModel,
 } from "./lib/pricing.ts";
 import type { LanguageModel } from "ai";
 import {
@@ -48,9 +41,9 @@ import { gateway } from "ai";
 
 async function validateAndConfirmPricing(
   models: string[],
-  pricingMap: Map<string, ModelPricingLookup | null>,
+  pricingMap: ReturnType<typeof buildPricingMap>,
 ) {
-  const lookups = new Map<string, ModelPricingLookup | null>();
+  const lookups = new Map<string, ReturnType<typeof lookupPricingFromMap>>();
 
   for (const modelId of models) {
     const lookup = lookupPricingFromMap(modelId, pricingMap);
@@ -64,7 +57,15 @@ async function validateAndConfirmPricing(
     const pricingLines = models.map((modelId) => {
       const lookup = lookups.get(modelId)!;
       const display = getModelPricingDisplay(lookup.pricing);
-      return `${modelId}\n  → ${formatMTokCost(display.inputCostPerMTok)}/MTok in, ${formatMTokCost(display.outputCostPerMTok)}/MTok out`;
+      const cacheReadText =
+        display.cacheReadCostPerMTok !== undefined
+          ? `, ${formatMTokCost(display.cacheReadCostPerMTok)}/MTok cache read`
+          : "";
+      const cacheWriteText =
+        display.cacheCreationCostPerMTok !== undefined
+          ? `, ${formatMTokCost(display.cacheCreationCostPerMTok)}/MTok cache write`
+          : "";
+      return `${modelId}\n  → ${formatMTokCost(display.inputCostPerMTok)}/MTok in, ${formatMTokCost(display.outputCostPerMTok)}/MTok out${cacheReadText}${cacheWriteText}`;
     });
 
     note(pricingLines.join("\n\n"), "💰 Pricing Found");
@@ -96,8 +97,16 @@ async function validateAndConfirmPricing(
       for (const modelId of modelsWithPricing) {
         const lookup = lookups.get(modelId)!;
         const display = getModelPricingDisplay(lookup.pricing);
+        const cacheReadText =
+          display.cacheReadCostPerMTok !== undefined
+            ? `, ${formatMTokCost(display.cacheReadCostPerMTok)}/MTok cache read`
+            : "";
+        const cacheWriteText =
+          display.cacheCreationCostPerMTok !== undefined
+            ? `, ${formatMTokCost(display.cacheCreationCostPerMTok)}/MTok cache write`
+            : "";
         lines.push(
-          `  ✓ ${modelId} (${formatMTokCost(display.inputCostPerMTok)}/MTok in)`,
+          `  ✓ ${modelId} (${formatMTokCost(display.inputCostPerMTok)}/MTok in, ${formatMTokCost(display.outputCostPerMTok)}/MTok out${cacheReadText}${cacheWriteText})`,
         );
       }
     }
@@ -126,8 +135,7 @@ async function selectOptions() {
 
   const available_models = await gateway.getAvailableModels();
 
-  const gatewayModels = available_models.models as GatewayModel[];
-  const pricingMap = buildPricingMap(gatewayModels);
+  const pricingMap = buildPricingMap(available_models.models);
 
   const models = await multiselect({
     message: "Select model(s) to benchmark",
@@ -171,6 +179,7 @@ async function selectOptions() {
       { value: "http", label: "MCP over HTTP" },
       { value: "stdio", label: "MCP over StdIO" },
     ],
+    initialValue: "http",
   });
 
   if (isCancel(mcp_integration)) {
@@ -248,7 +257,7 @@ async function runSingleTest(
   console.log(`\n[${testIndex + 1}/${totalTests}] Running test: ${test.name}`);
   console.log("─".repeat(50));
 
-  const prompt = buildAgentPrompt(test);
+  const messages = buildAgentPrompt(test);
 
   try {
     const tools = {
@@ -301,7 +310,7 @@ async function runSingleTest(
     if (testComponentEnabled) {
       console.log("  📋 TestComponent tool is available");
     }
-    const result = await agent.generate({ prompt });
+    const result = await agent.generate({ messages });
 
     const resultWriteContent = extractResultWriteContent(result.steps);
 
@@ -385,9 +394,17 @@ async function main() {
     const lookup = pricing.lookups.get(modelId);
     if (pricing.enabled && lookup) {
       const display = getModelPricingDisplay(lookup.pricing);
+      const cacheReadText =
+        display.cacheReadCostPerMTok !== undefined
+          ? `, ${formatMTokCost(display.cacheReadCostPerMTok)}/MTok cache read`
+          : "";
+      const cacheWriteText =
+        display.cacheCreationCostPerMTok !== undefined
+          ? `, ${formatMTokCost(display.cacheCreationCostPerMTok)}/MTok cache write`
+          : "";
       console.log(`   ${modelId}`);
       console.log(
-        `      💰 ${formatMTokCost(display.inputCostPerMTok)}/MTok in, ${formatMTokCost(display.outputCostPerMTok)}/MTok out`,
+        `      💰 ${formatMTokCost(display.inputCostPerMTok)}/MTok in, ${formatMTokCost(display.outputCostPerMTok)}/MTok out${cacheReadText}${cacheWriteText}`,
       );
     } else {
       console.log(`   ${modelId}`);
@@ -456,8 +473,16 @@ async function main() {
 
     if (pricingLookup) {
       const display = getModelPricingDisplay(pricingLookup.pricing);
+      const cacheReadText =
+        display.cacheReadCostPerMTok !== undefined
+          ? `, ${formatMTokCost(display.cacheReadCostPerMTok)}/MTok cache read`
+          : "";
+      const cacheWriteText =
+        display.cacheCreationCostPerMTok !== undefined
+          ? `, ${formatMTokCost(display.cacheCreationCostPerMTok)}/MTok cache write`
+          : "";
       console.log(
-        `💰 Pricing: ${formatMTokCost(display.inputCostPerMTok)}/MTok in, ${formatMTokCost(display.outputCostPerMTok)}/MTok out`,
+        `💰 Pricing: ${formatMTokCost(display.inputCostPerMTok)}/MTok in, ${formatMTokCost(display.outputCostPerMTok)}/MTok out${cacheReadText}${cacheWriteText}`,
       );
     }
 
@@ -514,6 +539,7 @@ async function main() {
 
     let totalCost = null;
     let pricingInfo = null;
+    let cacheSimulation = null;
 
     if (pricingLookup) {
       totalCost = calculateTotalCost(testResults, pricingLookup.pricing);
@@ -522,6 +548,7 @@ async function main() {
         inputCostPerMTok: pricingDisplay.inputCostPerMTok,
         outputCostPerMTok: pricingDisplay.outputCostPerMTok,
         cacheReadCostPerMTok: pricingDisplay.cacheReadCostPerMTok,
+        cacheCreationCostPerMTok: pricingDisplay.cacheCreationCostPerMTok,
       };
 
       console.log("\n💰 Cost Summary");
@@ -534,10 +561,45 @@ async function main() {
       );
       if (totalCost.cachedInputTokens > 0) {
         console.log(
-          `Cached tokens: ${totalCost.cachedInputTokens.toLocaleString()} (${formatCost(totalCost.cacheReadCost)})`,
+          `Cached tokens: ${totalCost.cachedInputTokens.toLocaleString()}`,
         );
       }
       console.log(`Total cost: ${formatCost(totalCost.totalCost)}`);
+
+      // Simulate cache savings
+      cacheSimulation = simulateCacheSavings(
+        testResults,
+        pricingLookup.pricing,
+      );
+      if (
+        cacheSimulation.cacheHits > 0 ||
+        cacheSimulation.cacheWriteTokens > 0
+      ) {
+        console.log("\n📊 Cache Simulation (estimated with prompt caching):");
+        console.log("─".repeat(50));
+        const totalCacheTokens =
+          cacheSimulation.cacheHits + cacheSimulation.cacheWriteTokens;
+        console.log(
+          `Cache reads: ${cacheSimulation.cacheHits.toLocaleString()} tokens`,
+        );
+        console.log(
+          `Cache writes: ${cacheSimulation.cacheWriteTokens.toLocaleString()} tokens`,
+        );
+        console.log(
+          `Total input tokens: ${totalCacheTokens.toLocaleString()} (reads + writes)`,
+        );
+        console.log(
+          `Estimated cost with cache: ${formatCost(cacheSimulation.simulatedCostWithCache)}`,
+        );
+        const savings =
+          totalCost.totalCost - cacheSimulation.simulatedCostWithCache;
+        const savingsPercent = (savings / totalCost.totalCost) * 100;
+        if (savings > 0) {
+          console.log(
+            `Potential savings: ${formatCost(savings)} (${savingsPercent.toFixed(1)}%)`,
+          );
+        }
+      }
     }
 
     const resultsDir = "results";
@@ -561,6 +623,7 @@ async function main() {
         pricingKey: pricingLookup?.matchedKey ?? null,
         pricing: pricingInfo,
         totalCost,
+        cacheSimulation,
       },
     };
 
diff --git a/lib/pricing.test.ts b/lib/pricing.test.ts
index c5edfc8..151689c 100644
--- a/lib/pricing.test.ts
+++ b/lib/pricing.test.ts
@@ -7,13 +7,12 @@ import {
   formatCost,
   formatMTokCost,
   getModelPricingDisplay,
-  type ModelPricing,
-  type GatewayModel,
 } from "./pricing.ts";
+import type { GatewayLanguageModelEntry } from "@ai-sdk/gateway";
 
 describe("extractPricingFromGatewayModel", () => {
   it("should extract pricing from a gateway model with all fields", () => {
-    const model: GatewayModel = {
+    const model: GatewayLanguageModelEntry = {
       id: "anthropic/claude-opus-4.5",
       name: "Claude Opus 4.5",
       pricing: {
@@ -22,6 +21,11 @@ describe("extractPricingFromGatewayModel", () => {
         cachedInputTokens: "0.0000005",
         cacheCreationInputTokens: "0.00000625",
       },
+      specification: {
+        specificationVersion: "v2",
+        provider: "anthropic",
+        modelId: "claude-opus-4.5",
+      },
       modelType: "language",
     };
 
@@ -35,13 +39,18 @@ describe("extractPricingFromGatewayModel", () => {
   });
 
   it("should extract pricing with only input and output", () => {
-    const model: GatewayModel = {
+    const model: GatewayLanguageModelEntry = {
       id: "openai/gpt-4o",
       name: "GPT-4o",
       pricing: {
         input: "0.000003",
         output: "0.000015",
       },
+      specification: {
+        specificationVersion: "v2",
+        provider: "openai",
+        modelId: "gpt-4o",
+      },
       modelType: "language",
     };
 
@@ -51,12 +60,18 @@ describe("extractPricingFromGatewayModel", () => {
     expect(pricing!.inputCostPerToken).toBe(0.000003);
     expect(pricing!.outputCostPerToken).toBe(0.000015);
     expect(pricing!.cacheReadInputTokenCost).toBeUndefined();
+    expect(pricing!.cacheCreationInputTokenCost).toBeUndefined();
   });
 
   it("should return null for model without pricing", () => {
-    const model: GatewayModel = {
+    const model: GatewayLanguageModelEntry = {
       id: "local/model",
       name: "Local Model",
+      specification: {
+        specificationVersion: "v2",
+        provider: "local",
+        modelId: "model",
+      },
       modelType: "language",
     };
 
@@ -65,12 +80,17 @@ describe("extractPricingFromGatewayModel", () => {
   });
 
   it("should throw error for model with empty pricing object", () => {
-    const model: GatewayModel = {
+    const model = {
       id: "local/model",
       name: "Local Model",
-      pricing: {},
+      pricing: {} as any,
+      specification: {
+        specificationVersion: "v2",
+        provider: "local",
+        modelId: "model",
+      },
       modelType: "language",
-    };
+    } as GatewayLanguageModelEntry;
 
     expect(() => extractPricingFromGatewayModel(model)).toThrowError(
       /Invalid pricing/,
@@ -78,13 +98,18 @@ describe("extractPricingFromGatewayModel", () => {
   });
 
   it("should throw error for invalid pricing values", () => {
-    const model: GatewayModel = {
+    const model: GatewayLanguageModelEntry = {
       id: "test/model",
       name: "Test Model",
       pricing: {
         input: "invalid",
         output: "0.000015",
       },
+      specification: {
+        specificationVersion: "v2",
+        provider: "test",
+        modelId: "model",
+      },
       modelType: "language",
     };
 
@@ -96,22 +121,37 @@ describe("extractPricingFromGatewayModel", () => {
 
 describe("buildPricingMap", () => {
   it("should build a map from gateway models", () => {
-    const models: GatewayModel[] = [
+    const models: GatewayLanguageModelEntry[] = [
       {
         id: "anthropic/claude-sonnet-4",
         name: "Claude Sonnet 4",
         pricing: { input: "0.000003", output: "0.000015" },
+        specification: {
+          specificationVersion: "v2",
+          provider: "anthropic",
+          modelId: "claude-sonnet-4",
+        },
         modelType: "language",
       },
       {
         id: "openai/gpt-4o",
         name: "GPT-4o",
         pricing: { input: "0.000005", output: "0.000015" },
+        specification: {
+          specificationVersion: "v2",
+          provider: "openai",
+          modelId: "gpt-4o",
+        },
         modelType: "language",
       },
       {
         id: "local/model",
         name: "Local Model",
+        specification: {
+          specificationVersion: "v2",
+          provider: "local",
+          modelId: "model",
+        },
         modelType: "language",
       },
     ];
@@ -127,11 +167,16 @@ describe("buildPricingMap", () => {
 
 describe("lookupPricingFromMap", () => {
   it("should return pricing lookup for existing model", () => {
-    const models: GatewayModel[] = [
+    const models: GatewayLanguageModelEntry[] = [
       {
         id: "anthropic/claude-sonnet-4",
         name: "Claude Sonnet 4",
         pricing: { input: "0.000003", output: "0.000015" },
+        specification: {
+          specificationVersion: "v2",
+          provider: "anthropic",
+          modelId: "claude-sonnet-4",
+        },
         modelType: "language",
       },
     ];
@@ -152,82 +197,40 @@ describe("lookupPricingFromMap", () => {
 });
 
 describe("calculateCost", () => {
-  const basePricing: ModelPricing = {
+  const basePricing = {
     inputCostPerToken: 0.000003, // $3 per MTok
     outputCostPerToken: 0.000015, // $15 per MTok
-  };
+  } satisfies NonNullable<ReturnType<typeof extractPricingFromGatewayModel>>;
 
-  const pricingWithCache: ModelPricing = {
+  const pricingWithCache = {
     ...basePricing,
     cacheReadInputTokenCost: 0.0000003, // $0.30 per MTok (10% of input)
-  };
+  } satisfies NonNullable<ReturnType<typeof extractPricingFromGatewayModel>>;
 
   describe("basic cost calculation", () => {
-    it("should calculate cost with no cached tokens", () => {
-      const result = calculateCost(basePricing, 1000, 500, 0);
+    it("should calculate cost correctly", () => {
+      const result = calculateCost(basePricing, 1000, 500);
 
       expect(result.inputTokens).toBe(1000);
       expect(result.outputTokens).toBe(500);
-      expect(result.cachedInputTokens).toBe(0);
       expect(result.inputCost).toBe(0.003); // 1000 * $3/MTok
       expect(result.outputCost).toBeCloseTo(0.0075); // 500 * $15/MTok
-      expect(result.cacheReadCost).toBe(0);
       expect(result.totalCost).toBe(0.0105);
     });
-
-    it("should default cachedInputTokens to 0", () => {
-      const result = calculateCost(basePricing, 1000, 500);
-
-      expect(result.cachedInputTokens).toBe(0);
-      expect(result.inputCost).toBe(0.003);
-    });
   });
 
-  describe("cached token billing", () => {
-    it("should bill cached tokens at reduced rate", () => {
-      // 1000 input tokens, 800 are cached
-      const result = calculateCost(pricingWithCache, 1000, 500, 800);
-
-      expect(result.inputTokens).toBe(1000);
-      expect(result.cachedInputTokens).toBe(800);
-      // Uncached: 200 tokens * $3/MTok = $0.0006
-      expect(result.inputCost).toBeCloseTo(0.0006);
-      // Cached: 800 tokens * $0.30/MTok = $0.00024
-      expect(result.cacheReadCost).toBeCloseTo(0.00024);
-      // Output: 500 * $15/MTok = $0.0075
-      expect(result.outputCost).toBeCloseTo(0.0075);
-      expect(result.totalCost).toBeCloseTo(0.00834);
-    });
-
-    it("should treat cached tokens as free when no cache rate specified", () => {
-      // Using basePricing which has no cacheReadInputTokenCost
-      const result = calculateCost(basePricing, 1000, 500, 800);
-
-      // Only 200 uncached tokens should be billed
-      expect(result.inputCost).toBeCloseTo(0.0006);
-      expect(result.cacheReadCost).toBe(0);
-    });
-
-    it("should handle all tokens being cached", () => {
-      const result = calculateCost(pricingWithCache, 1000, 500, 1000);
-
-      expect(result.inputCost).toBe(0);
-      expect(result.cacheReadCost).toBe(0.0003); // 1000 * $0.30/MTok
-    });
-  });
 
   describe("edge cases", () => {
     it("should handle zero tokens", () => {
-      const result = calculateCost(basePricing, 0, 0, 0);
+      const result = calculateCost(basePricing, 0, 0);
 
       expect(result.inputCost).toBe(0);
       expect(result.outputCost).toBe(0);
-      expect(result.cacheReadCost).toBe(0);
       expect(result.totalCost).toBe(0);
     });
 
     it("should handle large token counts", () => {
-      const result = calculateCost(basePricing, 1_000_000, 500_000, 0);
+      const result = calculateCost(basePricing, 1_000_000, 500_000);
 
       expect(result.inputCost).toBe(3); // 1M * $3/MTok
       expect(result.outputCost).toBe(7.5); // 500K * $15/MTok
@@ -235,11 +238,11 @@ describe("calculateCost", () => {
     });
 
     it("should handle pricing with zero costs", () => {
-      const freePricing: ModelPricing = {
+      const freePricing = {
         inputCostPerToken: 0,
         outputCostPerToken: 0,
-      };
-      const result = calculateCost(freePricing, 1000, 500, 0);
+      } satisfies NonNullable<ReturnType<typeof extractPricingFromGatewayModel>>;
+      const result = calculateCost(freePricing, 1000, 500);
 
       expect(result.totalCost).toBe(0);
     });
@@ -291,37 +294,55 @@ describe("formatMTokCost", () => {
 
 describe("getModelPricingDisplay", () => {
   it("should convert per-token costs to per-MTok", () => {
-    const pricing: ModelPricing = {
+    const pricing = {
       inputCostPerToken: 0.000003, // $3 per MTok
       outputCostPerToken: 0.000015, // $15 per MTok
-    };
+    } satisfies NonNullable<ReturnType<typeof extractPricingFromGatewayModel>>;
 
     const display = getModelPricingDisplay(pricing);
 
     expect(display.inputCostPerMTok).toBe(3);
     expect(display.outputCostPerMTok).toBe(15);
     expect(display.cacheReadCostPerMTok).toBeUndefined();
+    expect(display.cacheCreationCostPerMTok).toBeUndefined();
   });
 
   it("should include cache read cost when available", () => {
-    const pricing: ModelPricing = {
+    const pricing = {
       inputCostPerToken: 0.000003,
       outputCostPerToken: 0.000015,
       cacheReadInputTokenCost: 0.0000003, // $0.30 per MTok
-    };
+    } satisfies NonNullable<ReturnType<typeof extractPricingFromGatewayModel>>;
+
+    const display = getModelPricingDisplay(pricing);
+
+    expect(display.inputCostPerMTok).toBe(3);
+    expect(display.outputCostPerMTok).toBe(15);
+    expect(display.cacheReadCostPerMTok).toBe(0.3);
+    expect(display.cacheCreationCostPerMTok).toBeUndefined();
+  });
+
+  it("should include cache creation cost when available", () => {
+    const pricing = {
+      inputCostPerToken: 0.000003,
+      outputCostPerToken: 0.000015,
+      cacheReadInputTokenCost: 0.0000003, // $0.30 per MTok
+      cacheCreationInputTokenCost: 0.00000375, // $3.75 per MTok
+    } satisfies NonNullable<ReturnType<typeof extractPricingFromGatewayModel>>;
 
     const display = getModelPricingDisplay(pricing);
 
     expect(display.inputCostPerMTok).toBe(3);
     expect(display.outputCostPerMTok).toBe(15);
     expect(display.cacheReadCostPerMTok).toBe(0.3);
+    expect(display.cacheCreationCostPerMTok).toBe(3.75);
   });
 
   it("should handle zero costs", () => {
-    const pricing: ModelPricing = {
+    const pricing = {
       inputCostPerToken: 0,
       outputCostPerToken: 0,
-    };
+    } satisfies NonNullable<ReturnType<typeof extractPricingFromGatewayModel>>;
 
     const display = getModelPricingDisplay(pricing);
 
@@ -330,11 +351,11 @@ describe("getModelPricingDisplay", () => {
   });
 
   it("should preserve explicit zero cost for cache read", () => {
-    const pricing: ModelPricing = {
+    const pricing = {
       inputCostPerToken: 0.000003,
       outputCostPerToken: 0.000015,
       cacheReadInputTokenCost: 0,
-    };
+    } satisfies NonNullable<ReturnType<typeof extractPricingFromGatewayModel>>;
 
     const display = getModelPricingDisplay(pricing);
 
diff --git a/lib/pricing.ts b/lib/pricing.ts
index 46429fa..fbb314a 100644
--- a/lib/pricing.ts
+++ b/lib/pricing.ts
@@ -1,53 +1,7 @@
-export interface ModelPricing {
-  inputCostPerToken: number;
-  outputCostPerToken: number;
-  cacheReadInputTokenCost?: number;
-  cacheCreationInputTokenCost?: number;
-}
-
-export interface CostCalculation {
-  inputCost: number;
-  outputCost: number;
-  cacheReadCost: number;
-  totalCost: number;
-  inputTokens: number;
-  outputTokens: number;
-  cachedInputTokens: number;
-}
-
-export interface ModelPricingDisplay {
-  inputCostPerMTok: number;
-  outputCostPerMTok: number;
-  cacheReadCostPerMTok?: number;
-}
-
-export interface ModelPricingLookup {
-  pricing: ModelPricing;
-  matchedKey: string;
-}
-
-export interface GatewayPricing {
-  input?: string;
-  output?: string;
-  cachedInputTokens?: string;
-  cacheCreationInputTokens?: string;
-}
-
-export interface GatewayModel {
-  id: string;
-  name: string;
-  description?: string;
-  pricing?: GatewayPricing;
-  specification?: {
-    specificationVersion: string;
-    provider: string;
-    modelId: string;
-  };
-  modelType: string;
-}
+import type { GatewayLanguageModelEntry } from "@ai-sdk/gateway";
 
 export function extractPricingFromGatewayModel(
-  model: GatewayModel,
+  model: GatewayLanguageModelEntry,
 ) {
   if (!model.pricing) {
     return null;
@@ -64,9 +18,14 @@ export function extractPricingFromGatewayModel(
     );
   }
 
-  const result: ModelPricing = {
+  const result = {
     inputCostPerToken: inputCost,
     outputCostPerToken: outputCost,
+  } as {
+    inputCostPerToken: number;
+    outputCostPerToken: number;
+    cacheReadInputTokenCost?: number;
+    cacheCreationInputTokenCost?: number;
   };
 
   if (pricing.cachedInputTokens) {
@@ -86,10 +45,14 @@ export function extractPricingFromGatewayModel(
   return result;
 }
 
-export function buildPricingMap(
-  models: GatewayModel[],
-) {
-  const map = new Map<string, ModelPricingLookup | null>();
+export function buildPricingMap(models: GatewayLanguageModelEntry[]) {
+  const map = new Map<
+    string,
+    {
+      pricing: NonNullable<ReturnType<typeof extractPricingFromGatewayModel>>;
+      matchedKey: string;
+    } | null
+  >();
 
   for (const model of models) {
     const pricing = extractPricingFromGatewayModel(model);
@@ -108,13 +71,13 @@ export function buildPricingMap(
 
 export function lookupPricingFromMap(
   modelId: string,
-  pricingMap: Map<string, ModelPricingLookup | null>,
+  pricingMap: ReturnType<typeof buildPricingMap>,
 ) {
   return pricingMap.get(modelId) ?? null;
 }
 
 export function getModelPricingDisplay(
-  pricing: ModelPricing,
+  pricing: NonNullable<ReturnType<typeof extractPricingFromGatewayModel>>,
 ) {
   return {
     inputCostPerMTok: pricing.inputCostPerToken * 1_000_000,
@@ -123,31 +86,27 @@ export function getModelPricingDisplay(
       pricing.cacheReadInputTokenCost !== undefined
         ? pricing.cacheReadInputTokenCost * 1_000_000
         : undefined,
+    cacheCreationCostPerMTok:
+      pricing.cacheCreationInputTokenCost !== undefined
+        ? pricing.cacheCreationInputTokenCost * 1_000_000
+        : undefined,
   };
 }
 
 export function calculateCost(
-  pricing: ModelPricing,
+  pricing: NonNullable<ReturnType<typeof extractPricingFromGatewayModel>>,
   inputTokens: number,
   outputTokens: number,
-  cachedInputTokens: number = 0,
 ) {
-  const uncachedInputTokens = inputTokens - cachedInputTokens;
-  const inputCost = uncachedInputTokens * pricing.inputCostPerToken;
-
+  const inputCost = inputTokens * pricing.inputCostPerToken;
   const outputCost = outputTokens * pricing.outputCostPerToken;
 
-  const cacheReadCost =
-    cachedInputTokens * (pricing.cacheReadInputTokenCost ?? 0);
-
   return {
     inputCost,
     outputCost,
-    cacheReadCost,
-    totalCost: inputCost + outputCost + cacheReadCost,
+    totalCost: inputCost + outputCost,
     inputTokens,
     outputTokens,
-    cachedInputTokens,
   };
 }
 
diff --git a/lib/report-template.ts b/lib/report-template.ts
index 2ecb63a..b5b0d42 100644
--- a/lib/report-template.ts
+++ b/lib/report-template.ts
@@ -98,9 +98,7 @@ function renderContentBlock(block: ContentBlock) {
   return "";
 }
 
-function renderVerificationResult(
-  verification: TestVerificationResult | null,
-) {
+function renderVerificationResult(verification: TestVerificationResult | null) {
   if (!verification) {
     return `<div class="verification-result skipped">
       <span class="verification-icon">⊘</span>
@@ -259,6 +257,16 @@ function renderPricingSection(data: MultiTestResultData) {
       ? `<span class="pricing-key" title="Key matched in model-pricing.json">${escapeHtml(pricingKey)}</span>`
       : "";
 
+    const cacheReadText =
+      pricing.cacheReadCostPerMTok !== undefined
+        ? `<span class="rate-separator">·</span><span class="rate-value">${formatMTokCost(pricing.cacheReadCostPerMTok)}/MTok cache read</span>`
+        : "";
+
+    const cacheWriteText =
+      pricing.cacheCreationCostPerMTok !== undefined
+        ? `<span class="rate-separator">·</span><span class="rate-value">${formatMTokCost(pricing.cacheCreationCostPerMTok)}/MTok cache write</span>`
+        : "";
+
     pricingInfoHtml = `
       <div class="pricing-rates">
         <span class="rate-label">Model Pricing:</span>
@@ -266,21 +274,33 @@ function renderPricingSection(data: MultiTestResultData) {
         <span class="rate-value">${formatMTokCost(pricing.inputCostPerMTok)}/MTok in</span>
         <span class="rate-separator">·</span>
         <span class="rate-value">${formatMTokCost(pricing.outputCostPerMTok)}/MTok out</span>
-        ${pricing.cacheReadCostPerMTok !== undefined ? `<span class="rate-separator">·</span><span class="rate-value">${formatMTokCost(pricing.cacheReadCostPerMTok)}/MTok cached</span>` : ""}
+        ${cacheReadText}
+        ${cacheWriteText}
       </div>
     `;
   }
 
   let costBreakdownHtml = "";
   if (totalCost) {
-    const uncachedInputTokens =
-      totalCost.inputTokens - totalCost.cachedInputTokens;
+    const cacheSimRow =
+      data.metadata.cacheSimulation &&
+      pricing?.cacheReadCostPerMTok !== undefined &&
+      (data.metadata.cacheSimulation.cacheHits > 0 ||
+        data.metadata.cacheSimulation.cacheWriteTokens > 0)
+        ? `
+        <div class="cost-row simulated">
+          <span class="cost-label">Estimated cost with prompt cache:</span>
+          <span class="cost-tokens">${data.metadata.cacheSimulation.cacheHits.toLocaleString()} reads + ${data.metadata.cacheSimulation.cacheWriteTokens.toLocaleString()} writes = ${(data.metadata.cacheSimulation.cacheHits + data.metadata.cacheSimulation.cacheWriteTokens).toLocaleString()} tokens</span>
+          <span class="cost-value">${formatCost(data.metadata.cacheSimulation.simulatedCostWithCache)}</span>
+        </div>
+        `
+        : "";
 
     costBreakdownHtml = `
       <div class="cost-breakdown">
         <div class="cost-row">
           <span class="cost-label">Input tokens:</span>
-          <span class="cost-tokens">${uncachedInputTokens.toLocaleString()}</span>
+          <span class="cost-tokens">${totalCost.inputTokens.toLocaleString()}</span>
           <span class="cost-value">${formatCost(totalCost.inputCost)}</span>
         </div>
         <div class="cost-row">
@@ -292,9 +312,9 @@ function renderPricingSection(data: MultiTestResultData) {
           totalCost.cachedInputTokens > 0
             ? `
         <div class="cost-row cached">
-          <span class="cost-label">Cached tokens:</span>
+          <span class="cost-label">Cached tokens (from usage):</span>
           <span class="cost-tokens">${totalCost.cachedInputTokens.toLocaleString()} ⚡</span>
-          <span class="cost-value">${formatCost(totalCost.cacheReadCost)}</span>
+          <span class="cost-value">-</span>
         </div>
         `
             : ""
@@ -304,6 +324,7 @@ function renderPricingSection(data: MultiTestResultData) {
           <span class="cost-tokens"></span>
           <span class="cost-value">${formatCost(totalCost.totalCost)}</span>
         </div>
+        ${cacheSimRow}
       </div>
     `;
   }
@@ -388,7 +409,7 @@ function getPricingStyles() {
 
     .cost-row {
       display: grid;
-      grid-template-columns: 120px 1fr auto;
+      grid-template-columns: 200px 1fr auto;
       gap: 8px;
       align-items: center;
       font-size: 13px;
@@ -398,6 +419,14 @@ function getPricingStyles() {
       color: var(--text-muted);
     }
 
+    .cost-row.simulated {
+      margin-top: 8px;
+      padding-top: 8px;
+      border-top: 1px dashed var(--border);
+      color: var(--text-muted);
+      font-style: italic;
+    }
+
     .cost-row.total {
       margin-top: 8px;
       padding-top: 8px;
@@ -429,6 +458,10 @@ function getPricingStyles() {
       color: var(--success);
       font-size: 15px;
     }
+
+    .cost-row.simulated .cost-value {
+      color: var(--mcp-enabled);
+    }
   `;
 }
 
diff --git a/lib/report.ts b/lib/report.ts
index c7e50bf..a2ff3c2 100644
--- a/lib/report.ts
+++ b/lib/report.ts
@@ -1,6 +1,7 @@
 import { readFile, writeFile } from "node:fs/promises";
 import type { TestVerificationResult } from "./output-test-runner.ts";
 import { generateMultiTestHtml } from "./report-template.ts";
+import type { simulateCacheSavings } from "./utils.ts";
 
 interface TextBlock {
   type: "text";
@@ -69,12 +70,12 @@ export interface PricingInfo {
   inputCostPerMTok: number;
   outputCostPerMTok: number;
   cacheReadCostPerMTok?: number;
+  cacheCreationCostPerMTok?: number;
 }
 
 export interface TotalCostInfo {
   inputCost: number;
   outputCost: number;
-  cacheReadCost: number;
   totalCost: number;
   inputTokens: number;
   outputTokens: number;
@@ -90,6 +91,7 @@ interface Metadata {
   pricingKey?: string | null;
   pricing?: PricingInfo | null;
   totalCost?: TotalCostInfo | null;
+  cacheSimulation?: ReturnType<typeof simulateCacheSavings> | null;
 }
 
 export interface SingleTestResult {
diff --git a/lib/test-discovery.ts b/lib/test-discovery.ts
index 414292b..826f619 100644
--- a/lib/test-discovery.ts
+++ b/lib/test-discovery.ts
@@ -61,9 +61,3 @@ export function discoverTests() {
 
   return definitions;
 }
-
-export function buildAgentPrompt(test: TestDefinition) {
-  return `${test.prompt}
-
-IMPORTANT: When you have finished implementing the component, use the ResultWrite tool to output your final Svelte component code. Only output the component code itself, no explanations or markdown formatting.`;
-}
diff --git a/lib/token-cache.ts b/lib/token-cache.ts
new file mode 100644
index 0000000..40721a0
--- /dev/null
+++ b/lib/token-cache.ts
@@ -0,0 +1,79 @@
+import type { extractPricingFromGatewayModel } from "./pricing.ts";
+
+export class TokenCache {
+  private currentTokens: number;
+  private totalCachedTokens: number = 0;
+  private messages: Array<{ message: string; tokens: number }> = [];
+  private pricing: NonNullable<
+    ReturnType<typeof extractPricingFromGatewayModel>
+  > | null;
+  private totalOutputTokens: number = 0;
+
+  constructor(
+    tokens: number,
+    pricing?: NonNullable<
+      ReturnType<typeof extractPricingFromGatewayModel>
+    > | null,
+  ) {
+    this.currentTokens = tokens;
+    this.pricing = pricing ?? null;
+  }
+
+  addMessage(message: string, tokens: number, outputTokens: number = 0): void {
+    // The existing tokens are served from cache on this call
+    this.totalCachedTokens += this.currentTokens;
+
+    // Now add the new message to our running total
+    this.currentTokens += tokens;
+    this.totalOutputTokens += outputTokens;
+    this.messages.push({ message, tokens });
+  }
+
+  getCacheStats() {
+    return {
+      totalCachedTokens: this.totalCachedTokens,
+      currentContextTokens: this.currentTokens,
+      messageCount: this.messages.length,
+    };
+  }
+
+  calculateSimulatedCost(): {
+    simulatedCost: number;
+    cacheReadCost: number;
+    cacheWriteCost: number;
+    outputCost: number;
+  } {
+    if (
+      !this.pricing ||
+      !this.pricing.cacheReadInputTokenCost ||
+      !this.pricing.cacheCreationInputTokenCost
+    ) {
+      return {
+        simulatedCost: 0,
+        cacheReadCost: 0,
+        cacheWriteCost: 0,
+        outputCost: 0,
+      };
+    }
+
+    const cacheReadRate = this.pricing.cacheReadInputTokenCost;
+
+    const cacheWriteRate = this.pricing.cacheCreationInputTokenCost;
+
+    // Tokens read from cache across all API calls
+    const cacheReadCost = this.totalCachedTokens * cacheReadRate;
+
+    // Tokens written to cache across all API calls (all current tokens were written at some point)
+    const cacheWriteCost = this.currentTokens * cacheWriteRate;
+
+    // Output tokens at output rate
+    const outputCost = this.totalOutputTokens * this.pricing.outputCostPerToken;
+
+    return {
+      simulatedCost: cacheReadCost + cacheWriteCost + outputCost,
+      cacheReadCost,
+      cacheWriteCost,
+      outputCost,
+    };
+  }
+}
diff --git a/lib/utils.test.ts b/lib/utils.test.ts
index c3ad8df..6de99ad 100644
--- a/lib/utils.test.ts
+++ b/lib/utils.test.ts
@@ -3,8 +3,10 @@ import {
   sanitizeModelName,
   getTimestampedFilename,
   calculateTotalCost,
+  simulateCacheSavings,
 } from "./utils.ts";
-import type { ModelPricing } from "./pricing.ts";
+import { TokenCache } from "./token-cache.ts";
+import { extractPricingFromGatewayModel } from "./pricing.ts";
 import type { SingleTestResult } from "./report.ts";
 
 describe("sanitizeModelName", () => {
@@ -104,11 +106,11 @@ describe("getTimestampedFilename", () => {
 });
 
 describe("calculateTotalCost", () => {
-  const pricing: ModelPricing = {
+  const pricing = {
     inputCostPerToken: 1.0 / 1_000_000,
     outputCostPerToken: 2.0 / 1_000_000,
     cacheReadInputTokenCost: 0.1 / 1_000_000,
-  };
+  } satisfies NonNullable<ReturnType<typeof extractPricingFromGatewayModel>>;
 
   it("calculates zero cost for empty results", () => {
     const tests: SingleTestResult[] = [];
@@ -117,7 +119,6 @@ describe("calculateTotalCost", () => {
     expect(result).toEqual({
       inputCost: 0,
       outputCost: 0,
-      cacheReadCost: 0,
       totalCost: 0,
       inputTokens: 0,
       outputTokens: 0,
@@ -169,24 +170,496 @@ describe("calculateTotalCost", () => {
     // Total Input: 100 + 200 + 300 = 600
     // Total Output: 50 + 100 + 150 = 300
     // Total Cached: 10 + 0 + 20 = 30
-    // Uncached Input: 600 - 30 = 570
 
-    // Costs (per Token):
-    // Input: 570 * (1.0 / 1e6) = 0.00057
+    // Costs (per Token) - calculateCost bills all input at full rate:
+    // Input: 600 * (1.0 / 1e6) = 0.0006
     // Output: 300 * (2.0 / 1e6) = 0.0006
-    // Cache: 30 * (0.1 / 1e6) = 0.000003
-    // Total: 0.00057 + 0.0006 + 0.000003 = 0.001173
+    // Total: 0.0006 + 0.0006 = 0.0012
 
     const result = calculateTotalCost(tests, pricing);
 
     expect(result).toEqual({
-      inputCost: 0.00057,
+      inputCost: 0.0006,
       outputCost: 0.0006,
-      cacheReadCost: 0.000003,
-      totalCost: 0.001173,
+      totalCost: 0.0012,
       inputTokens: 600,
       outputTokens: 300,
       cachedInputTokens: 30,
     });
   });
 });
+
+describe("TokenCache", () => {
+  const pricing = {
+    inputCostPerToken: 1.0 / 1_000_000,
+    outputCostPerToken: 2.0 / 1_000_000,
+    cacheCreationInputTokenCost: 1.25 / 1_000_000,
+    cacheReadInputTokenCost: 0.1 / 1_000_000,
+  } satisfies NonNullable<ReturnType<typeof extractPricingFromGatewayModel>>;
+
+  it("initializes with correct values", () => {
+    const cache = new TokenCache(100, pricing);
+    const stats = cache.getCacheStats();
+
+    expect(stats.totalCachedTokens).toBe(0);
+    expect(stats.currentContextTokens).toBe(100);
+    expect(stats.messageCount).toBe(0);
+  });
+
+  it("accumulates cached tokens correctly", () => {
+    const cache = new TokenCache(100, pricing);
+
+    cache.addMessage("What is JavaScript?", 50);
+    let stats = cache.getCacheStats();
+    expect(stats.totalCachedTokens).toBe(100); // 100 from initial
+    expect(stats.currentContextTokens).toBe(150); // 100 + 50
+    expect(stats.messageCount).toBe(1);
+
+    cache.addMessage("JavaScript is...", 200);
+    stats = cache.getCacheStats();
+    expect(stats.totalCachedTokens).toBe(250); // 100 + 150
+    expect(stats.currentContextTokens).toBe(350); // 150 + 200
+    expect(stats.messageCount).toBe(2);
+
+    cache.addMessage("Can you give an example?", 30);
+    stats = cache.getCacheStats();
+    expect(stats.totalCachedTokens).toBe(600); // 100 + 150 + 350
+    expect(stats.currentContextTokens).toBe(380); // 350 + 30
+    expect(stats.messageCount).toBe(3);
+  });
+
+  it("tracks output tokens separately", () => {
+    const cache = new TokenCache(100, pricing);
+
+    cache.addMessage("msg1", 50, 200);
+    cache.addMessage("msg2", 30, 150);
+
+    const stats = cache.getCacheStats();
+    expect(stats.totalCachedTokens).toBe(250); // 100 + 150
+    expect(stats.currentContextTokens).toBe(180); // 100 + 50 + 30
+  });
+
+  it("calculates cost with pricing", () => {
+    const cache = new TokenCache(100, pricing);
+
+    cache.addMessage("msg1", 50, 200);
+    cache.addMessage("msg2", 100, 300);
+
+    const cost = cache.calculateSimulatedCost();
+
+    // totalCachedTokens = 100 + 150 = 250 (tokens read from cache across calls)
+    // currentTokens = 250 (all tokens written to cache)
+    // totalOutputTokens = 200 + 300 = 500
+
+    // cacheReadCost = 250 * 0.1e-6 = 0.000025
+    // cacheWriteCost = 250 * 1.25e-6 = 0.0003125 (cache write rate is 1.25x)
+    // outputCost = 500 * 2e-6 = 0.001
+    // simulatedCost = 0.000025 + 0.0003125 + 0.001 = 0.0013375
+
+    expect(cost.cacheReadCost).toBeCloseTo(0.000025, 6);
+    expect(cost.cacheWriteCost).toBeCloseTo(0.0003125, 6);
+    expect(cost.outputCost).toBeCloseTo(0.001, 6);
+    expect(cost.simulatedCost).toBeCloseTo(0.0013375, 6);
+  });
+
+  it("calculates zero cost without pricing", () => {
+    const cache = new TokenCache(100);
+
+    cache.addMessage("msg1", 50, 200);
+
+    const cost = cache.calculateSimulatedCost();
+
+    expect(cost.cacheReadCost).toBe(0);
+    expect(cost.cacheWriteCost).toBe(0);
+    expect(cost.outputCost).toBe(0);
+    expect(cost.simulatedCost).toBe(0);
+  });
+
+  it("handles zero tokens", () => {
+    const cache = new TokenCache(0, pricing);
+    const stats = cache.getCacheStats();
+
+    expect(stats.totalCachedTokens).toBe(0);
+    expect(stats.currentContextTokens).toBe(0);
+    expect(stats.messageCount).toBe(0);
+
+    const cost = cache.calculateSimulatedCost();
+    expect(cost.simulatedCost).toBe(0);
+  });
+});
+
+describe("simulateCacheSavings - growing prefix model", () => {
+  // Default pricing: input=$1/MTok, output=$2/MTok
+  // Default cache read: 10% of input = $0.10/MTok
+  // Default cache write: 125% of input = $1.25/MTok
+  const basicPricing = {
+    inputCostPerToken: 1.0 / 1_000_000,
+    outputCostPerToken: 2.0 / 1_000_000,
+  } satisfies NonNullable<ReturnType<typeof extractPricingFromGatewayModel>>;
+
+  it("returns zeros for empty tests array", () => {
+    const tests: SingleTestResult[] = [];
+    const result = simulateCacheSavings(tests, basicPricing);
+
+    expect(result).toEqual({
+      simulatedCostWithCache: 0,
+      cacheHits: 0,
+      cacheWriteTokens: 0,
+    });
+  });
+
+  it("handles single test with single step (no cache hits)", () => {
+    const tests: SingleTestResult[] = [
+      {
+        testName: "test1",
+        prompt: "p1",
+        resultWriteContent: null,
+        verification: {} as any,
+        steps: [
+          {
+            usage: {
+              inputTokens: 1000,
+              outputTokens: 500,
+              cachedInputTokens: 0,
+            },
+          } as any,
+        ],
+      },
+    ];
+
+    const result = simulateCacheSavings(tests, basicPricing);
+
+    // Step 1: 1000 input tokens at cache write rate (1.25/MTok) + 500 output at $2/MTok
+    // Simulated cost = 1000 * 1.25e-6 + 500 * 2e-6 = 0.00125 + 0.001 = 0.00225
+    expect(result.cacheHits).toBe(0);
+    expect(result.cacheWriteTokens).toBe(1000);
+    expect(result.simulatedCostWithCache).toBeCloseTo(0.00225, 6);
+  });
+
+  it("calculates savings for single test with multiple steps - growing prefix", () => {
+    const tests: SingleTestResult[] = [
+      {
+        testName: "test1",
+        prompt: "p1",
+        resultWriteContent: null,
+        verification: {} as any,
+        steps: [
+          {
+            usage: {
+              inputTokens: 1000,
+              outputTokens: 200,
+              cachedInputTokens: 0,
+            },
+          } as any,
+          {
+            usage: {
+              inputTokens: 1500,
+              outputTokens: 300,
+              cachedInputTokens: 0,
+            },
+          } as any,
+          {
+            usage: {
+              inputTokens: 2000,
+              outputTokens: 400,
+              cachedInputTokens: 0,
+            },
+          } as any,
+        ],
+      },
+    ];
+
+    const result = simulateCacheSavings(tests, basicPricing);
+
+    // Growing prefix model:
+    // Step 1: 1000 tokens → write all to cache
+    //   Cost: 1000 * 1.25e-6 + 200 * 2e-6 = 0.00125 + 0.0004 = 0.00165
+    // Step 2: 1500 tokens → 1000 cached (read), 500 new (write)
+    //   Cost: 1000 * 0.1e-6 + 500 * 1.25e-6 + 300 * 2e-6 = 0.0001 + 0.000625 + 0.0006 = 0.001325
+    // Step 3: 2000 tokens → 1500 cached (read), 500 new (write)
+    //   Cost: 1500 * 0.1e-6 + 500 * 1.25e-6 + 400 * 2e-6 = 0.00015 + 0.000625 + 0.0008 = 0.001575
+    // Total simulated: 0.00165 + 0.001325 + 0.001575 = 0.00455
+
+    expect(result.cacheHits).toBe(1000 + 1500); // 1000 from step 2 + 1500 from step 3
+    expect(result.cacheWriteTokens).toBe(1000 + 500 + 500); // 1000 step1 + 500 step2 + 500 step3
+    expect(result.simulatedCostWithCache).toBeCloseTo(0.00455, 6);
+  });
+
+  it("aggregates across multiple tests with cache reset per test", () => {
+    const tests: SingleTestResult[] = [
+      {
+        testName: "test1",
+        prompt: "p1",
+        resultWriteContent: null,
+        verification: {} as any,
+        steps: [
+          {
+            usage: {
+              inputTokens: 500,
+              outputTokens: 100,
+              cachedInputTokens: 0,
+            },
+          } as any,
+          {
+            usage: {
+              inputTokens: 800,
+              outputTokens: 100,
+              cachedInputTokens: 0,
+            },
+          } as any,
+        ],
+      },
+      {
+        testName: "test2",
+        prompt: "p2",
+        resultWriteContent: null,
+        verification: {} as any,
+        steps: [
+          {
+            usage: {
+              inputTokens: 600,
+              outputTokens: 200,
+              cachedInputTokens: 0,
+            },
+          } as any,
+          {
+            usage: {
+              inputTokens: 900,
+              outputTokens: 200,
+              cachedInputTokens: 0,
+            },
+          } as any,
+          {
+            usage: {
+              inputTokens: 1200,
+              outputTokens: 200,
+              cachedInputTokens: 0,
+            },
+          } as any,
+        ],
+      },
+    ];
+
+    const result = simulateCacheSavings(tests, basicPricing);
+
+    // Test 1:
+    //   Step 1: 500 write, 100 output
+    //   Step 2: 500 read, 300 write, 100 output
+    //   Hits: 500, Writes: 500 + 300 = 800
+    //
+    // Test 2:
+    //   Step 1: 600 write, 200 output
+    //   Step 2: 600 read, 300 write, 200 output
+    //   Step 3: 900 read, 300 write, 200 output
+    //   Hits: 600 + 900 = 1500, Writes: 600 + 300 + 300 = 1200
+
+    // Total: hits = 500 + 1500 = 2000, writes = 800 + 1200 = 2000
+
+    expect(result.cacheHits).toBe(2000);
+    expect(result.cacheWriteTokens).toBe(2000);
+
+    // Calculate expected cost manually:
+    // Test 1 Step 1: 500 * 1.25e-6 + 100 * 2e-6 = 0.000625 + 0.0002 = 0.000825
+    // Test 1 Step 2: 500 * 0.1e-6 + 300 * 1.25e-6 + 100 * 2e-6 = 0.00005 + 0.000375 + 0.0002 = 0.000625
+    // Test 2 Step 1: 600 * 1.25e-6 + 200 * 2e-6 = 0.00075 + 0.0004 = 0.00115
+    // Test 2 Step 2: 600 * 0.1e-6 + 300 * 1.25e-6 + 200 * 2e-6 = 0.00006 + 0.000375 + 0.0004 = 0.000835
+    // Test 2 Step 3: 900 * 0.1e-6 + 300 * 1.25e-6 + 200 * 2e-6 = 0.00009 + 0.000375 + 0.0004 = 0.000865
+    // Total: 0.000825 + 0.000625 + 0.00115 + 0.000835 + 0.000865 = 0.0043
+
+    expect(result.simulatedCostWithCache).toBeCloseTo(0.0043, 6);
+  });
+
+  it("skips tests with empty steps array", () => {
+    const tests: SingleTestResult[] = [
+      {
+        testName: "test1",
+        prompt: "p1",
+        resultWriteContent: null,
+        verification: {} as any,
+        steps: [],
+      },
+      {
+        testName: "test2",
+        prompt: "p2",
+        resultWriteContent: null,
+        verification: {} as any,
+        steps: [
+          {
+            usage: {
+              inputTokens: 1000,
+              outputTokens: 500,
+              cachedInputTokens: 0,
+            },
+          } as any,
+        ],
+      },
+    ];
+
+    const result = simulateCacheSavings(tests, basicPricing);
+
+    // Only test2 should be counted
+    expect(result.cacheHits).toBe(0);
+    expect(result.cacheWriteTokens).toBe(1000);
+  });
+
+  it("uses custom cache pricing when provided", () => {
+    const customPricing = {
+      inputCostPerToken: 1.0 / 1_000_000,
+      outputCostPerToken: 2.0 / 1_000_000,
+      cacheReadInputTokenCost: 0.05 / 1_000_000, // 5% instead of default 10%
+      cacheCreationInputTokenCost: 1.5 / 1_000_000, // 150% instead of default 125%
+    } satisfies NonNullable<ReturnType<typeof extractPricingFromGatewayModel>>;
+
+    const tests: SingleTestResult[] = [
+      {
+        testName: "test1",
+        prompt: "p1",
+        resultWriteContent: null,
+        verification: {} as any,
+        steps: [
+          {
+            usage: {
+              inputTokens: 1000,
+              outputTokens: 500,
+              cachedInputTokens: 0,
+            },
+          } as any,
+          {
+            usage: {
+              inputTokens: 1500,
+              outputTokens: 500,
+              cachedInputTokens: 0,
+            },
+          } as any,
+        ],
+      },
+    ];
+
+    const result = simulateCacheSavings(tests, customPricing);
+
+    // Step 1: 1000 write at $1.50/MTok + 500 output at $2/MTok
+    //   = 1000 * 1.5e-6 + 500 * 2e-6 = 0.0015 + 0.001 = 0.0025
+    // Step 2: 1000 read at $0.05/MTok + 500 write at $1.50/MTok + 500 output at $2/MTok
+    //   = 1000 * 0.05e-6 + 500 * 1.5e-6 + 500 * 2e-6 = 0.00005 + 0.00075 + 0.001 = 0.0018
+    // Total: 0.0025 + 0.0018 = 0.0043
+
+    expect(result.cacheHits).toBe(1000);
+    expect(result.cacheWriteTokens).toBe(1000 + 500);
+    expect(result.simulatedCostWithCache).toBeCloseTo(0.0043, 6);
+  });
+
+  it("handles input tokens decreasing between steps (edge case)", () => {
+    const tests: SingleTestResult[] = [
+      {
+        testName: "test1",
+        prompt: "p1",
+        resultWriteContent: null,
+        verification: {} as any,
+        steps: [
+          {
+            usage: {
+              inputTokens: 1000,
+              outputTokens: 100,
+              cachedInputTokens: 0,
+            },
+          } as any,
+          {
+            usage: {
+              inputTokens: 800, // Less than step 1 (unusual but possible)
+              outputTokens: 100,
+              cachedInputTokens: 0,
+            },
+          } as any,
+        ],
+      },
+    ];
+
+    const result = simulateCacheSavings(tests, basicPricing);
+
+    // Step 1: 1000 write
+    // Step 2: 1000 read (previous step), 0 new write (800 - 1000 = -200 → clamped to 0)
+    // This tests the Math.max(0, newPortion) behavior
+
+    expect(result.cacheHits).toBe(1000); // Still reads full previous prefix
+    expect(result.cacheWriteTokens).toBe(1000); // Only step 1 writes
+  });
+
+  it("handles zero actual cost edge case", () => {
+    const tests: SingleTestResult[] = [
+      {
+        testName: "test1",
+        prompt: "p1",
+        resultWriteContent: null,
+        verification: {} as any,
+        steps: [
+          {
+            usage: {
+              inputTokens: 0,
+              outputTokens: 0,
+              cachedInputTokens: 0,
+            },
+          } as any,
+        ],
+      },
+    ];
+
+    const result = simulateCacheSavings(tests, basicPricing);
+
+    expect(result.simulatedCostWithCache).toBe(0);
+    expect(result.cacheHits).toBe(0);
+    expect(result.cacheWriteTokens).toBe(0);
+  });
+
+  it("compares favorably to actual cost for multi-step tests", () => {
+    const tests: SingleTestResult[] = [
+      {
+        testName: "test1",
+        prompt: "p1",
+        resultWriteContent: null,
+        verification: {} as any,
+        steps: [
+          {
+            usage: {
+              inputTokens: 1000,
+              outputTokens: 100,
+              cachedInputTokens: 0,
+            },
+          } as any,
+          {
+            usage: {
+              inputTokens: 1200,
+              outputTokens: 100,
+              cachedInputTokens: 0,
+            },
+          } as any,
+          {
+            usage: {
+              inputTokens: 1400,
+              outputTokens: 100,
+              cachedInputTokens: 0,
+            },
+          } as any,
+        ],
+      },
+    ];
+
+    const result = simulateCacheSavings(tests, basicPricing);
+
+    // Actual cost (no caching):
+    // Input: (1000 + 1200 + 1400) * 1e-6 = 3600 * 1e-6 = 0.0036
+    // Output: (100 + 100 + 100) * 2e-6 = 300 * 2e-6 = 0.0006
+    // Total actual: 0.0042
+
+    const actualCost = 0.0042;
+
+    // Simulated should be less than actual for multi-step scenarios
+    expect(result.simulatedCostWithCache).toBeLessThan(actualCost);
+
+    // Calculate savings
+    const savings = actualCost - result.simulatedCostWithCache;
+    const savingsPercent = (savings / actualCost) * 100;
+
+    // Should have meaningful savings (>10% for this scenario)
+    expect(savingsPercent).toBeGreaterThan(10);
+  });
+});
diff --git a/lib/utils.ts b/lib/utils.ts
index e3cb01c..63c0efc 100644
--- a/lib/utils.ts
+++ b/lib/utils.ts
@@ -1,5 +1,8 @@
-import { calculateCost, type ModelPricing } from "./pricing.ts";
-import type { SingleTestResult, TotalCostInfo } from "./report.ts";
+import { calculateCost, extractPricingFromGatewayModel } from "./pricing.ts";
+import type { SingleTestResult } from "./report.ts";
+import type { ModelMessage } from "@ai-sdk/provider-utils";
+import type { TestDefinition } from "./test-discovery.ts";
+import { TokenCache } from "./token-cache.ts";
 
 export function sanitizeModelName(modelName: string) {
   return modelName.replace(/[^a-zA-Z0-9.]/g, "-");
@@ -53,7 +56,7 @@ export function extractResultWriteContent(steps: unknown[]) {
 
 export function calculateTotalCost(
   tests: SingleTestResult[],
-  pricing: ModelPricing,
+  pricing: NonNullable<ReturnType<typeof extractPricingFromGatewayModel>>,
 ) {
   let totalInputTokens = 0;
   let totalOutputTokens = 0;
@@ -71,16 +74,102 @@ export function calculateTotalCost(
     pricing,
     totalInputTokens,
     totalOutputTokens,
-    totalCachedInputTokens,
   );
 
   return {
     inputCost: costResult.inputCost,
     outputCost: costResult.outputCost,
-    cacheReadCost: costResult.cacheReadCost,
     totalCost: costResult.totalCost,
     inputTokens: totalInputTokens,
     outputTokens: totalOutputTokens,
     cachedInputTokens: totalCachedInputTokens,
   };
 }
+
+export function buildAgentPrompt(test: TestDefinition): ModelMessage[] {
+  return [
+    {
+      role: "user",
+      content: `${test.prompt}
+
+IMPORTANT: When you have finished implementing the component, use the ResultWrite tool to output your final Svelte component code. Only output the component code itself, no explanations or markdown formatting.`,
+    },
+  ];
+}
+
+/**
+ * Simulates cache savings using a growing prefix model.
+ *
+ * Cache behavior modeled:
+ * - Each test runs in its own context (cache resets between tests)
+ * - Step 1's input is written to cache (pays cache creation rate)
+ * - Each subsequent step:
+ *   - Previous step's full input is cached (pays cache read rate)
+ *   - New tokens extend the cache (pays cache creation rate)
+ * - The cache prefix grows with each step
+ *
+ * Example for a test with 3 steps (inputs: 1000 → 1500 → 2000):
+ *   Step 1: 1000 tokens → pay cache creation for 1000
+ *   Step 2: 1500 tokens → 1000 cached (read) + 500 new (creation)
+ *   Step 3: 2000 tokens → 1500 cached (read) + 500 new (creation)
+ */
+export function simulateCacheSavings(
+  tests: SingleTestResult[],
+  pricing: NonNullable<ReturnType<typeof extractPricingFromGatewayModel>>,
+) {
+  // Default rates if not specified:
+  // - Cache read: 10% of input cost
+  // - Cache creation: 125% of input cost (25% premium)
+  const cacheReadRate =
+    pricing.cacheReadInputTokenCost ?? pricing.inputCostPerToken * 0.1;
+  const cacheWriteRate =
+    pricing.cacheCreationInputTokenCost ?? pricing.inputCostPerToken * 1.25;
+
+  let totalCacheHits = 0; // Total tokens read from cache across all steps
+  let totalCacheWriteTokens = 0; // Total tokens written to cache (including step 1)
+  let simulatedCost = 0;
+
+  for (const test of tests) {
+    if (test.steps.length === 0) continue;
+
+    const firstStep = test.steps[0];
+    if (!firstStep) continue;
+
+    // Create cache with first step's input tokens
+    const cache = new TokenCache(firstStep.usage.inputTokens, pricing);
+    totalCacheWriteTokens += firstStep.usage.inputTokens;
+
+    // First step: pay cache creation rate for all input
+    simulatedCost += firstStep.usage.inputTokens * cacheWriteRate;
+    simulatedCost += firstStep.usage.outputTokens * pricing.outputCostPerToken;
+
+    // Add output tokens for first step (but no new input tokens yet)
+    cache.addMessage("step-0", 0, firstStep.usage.outputTokens);
+
+    // Process subsequent steps
+    for (let i = 1; i < test.steps.length; i++) {
+      const step = test.steps[i];
+      if (!step) continue;
+
+      const stats = cache.getCacheStats();
+      const cachedPortion = stats.currentContextTokens;
+      const newTokens = Math.max(0, step.usage.inputTokens - cachedPortion);
+
+      totalCacheHits += cachedPortion;
+      totalCacheWriteTokens += newTokens;
+
+      // Calculate cost for this step
+      simulatedCost += cachedPortion * cacheReadRate;
+      simulatedCost += newTokens * cacheWriteRate;
+      simulatedCost += step.usage.outputTokens * pricing.outputCostPerToken;
+
+      cache.addMessage(`step-${i}`, newTokens, step.usage.outputTokens);
+    }
+  }
+
+  return {
+    simulatedCostWithCache: simulatedCost,
+    cacheHits: totalCacheHits,
+    cacheWriteTokens: totalCacheWriteTokens,
+  };
+}