diff --git a/evaluation/trace_viewer.py b/evaluation/trace_viewer.py
index d12f189f..c509023d 100644
--- a/evaluation/trace_viewer.py
+++ b/evaluation/trace_viewer.py
@@ -1,662 +1,16 @@
-"""Self-contained HTML viewer for RLM rollout traces.
+"""Compatibility shim — the rollout viewer now lives in :mod:`kai.viewer`.
 
-Reads a rollout directory (the per-agent ``<agent>.jsonl`` files an RLM run
-writes via :mod:`kai.state.hooks`, plus the optional ``score.json`` /
-``run.json`` siblings) and renders a single offline HTML file.
-
-The view follows **causality, not wall-clock**. The root agent (``exploit``)
-is an orchestrator: it reasons, then runs Python, and that Python calls
-``spawn_analyzer(...)`` / ``spawn_researcher(...)`` / ``spawn_verifier(...)``
-etc. to delegate a subtask. The sub-agent runs to completion *inside* that
-code call and its ``final_answer`` comes back as the call's return value --
-which is why a naive timestamp sort is misleading: the parent iteration is
-stamped when it *finishes*, i.e. after the child it spawned has already run,
-so the child appears to precede its own cause.
-
-So we read the root top-to-bottom by iteration number -- reason -> run code
--> observe output -- and attach each spawned sub-agent's full sub-transcript
-under the exact ``spawn_*()`` call that caused it (matched per agent in call
-order), with the value it returned surfaced at the call site. You can expand
-a delegation to see *how* the sub-agent reached its answer.
-
-No external dependencies, no server, no spans -- just the rollouts on disk.
-Pulled smoke dirs are flat (``*.jsonl`` next to ``score.json``); a fresh run
-nests them under ``state/<hash>/rollouts/``. Both work: we glob for
-``*.jsonl`` and skip any file whose lines aren't valid JSON (empty files, or
-``cat: ... No such file`` stubs from a partial ``railway ssh`` pull).
+The viewer was lifted out of the benchmark harness into the core package so
+``kai view`` can render any pipeline run (findings + agent trace), not just
+benchmark rollouts. This module is kept so ``evaluation`` keeps importing
+``load_rollout_dir`` / ``render_html`` / ``write_html`` from here; new code
+should import from :mod:`kai.viewer` directly.
 """
 
 from __future__ import annotations
 
-import json
-import re
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import Any
-
-ROOT_AGENT = "exploit"
-SPAWN_RE = re.compile(r"\bspawn_([a-z][a-z_]*)\s*\(")
-
-# Per-agent tints, assigned in first-appearance order.
-PALETTE = [
-    "#7fdbca",
-    "#c792ea",
-    "#f78c6c",
-    "#82aaff",
-    "#ffcb6b",
-    "#f07178",
-    "#addb67",
-    "#89ddff",
-]
-
-
-@dataclass
-class Iteration:
-    """One reason -> act -> observe step of an agent."""
-
-    n: int
-    timestamp: str
-    reasoning: str
-    blocks: list[dict[str, str]] = field(default_factory=list)
-
-
-@dataclass
-class AgentTrace:
-    """A single (sub-)agent's rollout: its metadata + iterations + result."""
-
-    name: str
-    depth: int
-    model: str
-    backend: str
-    iterations: list[Iteration]
-    result: str | None
-    first_ts: str
-    color: str = ""
-
-    def legend_dict(self) -> dict[str, Any]:
-        return {
-            "name": self.name,
-            "depth": self.depth,
-            "model": self.model,
-            "iters": len(self.iterations),
-            "color": self.color,
-        }
-
-
-@dataclass
-class RunTrace:
-    """A whole run: the causal root spine plus run-level header fields."""
-
-    title: str
-    benchmark: str
-    task_id: str
-    success: bool | None
-    failure_reason: str | None
-    poc_source: str | None
-    models: list[str]
-    agents: list[AgentTrace]
-    root_name: str
-    root_result: str | None
-    root_steps: list[dict[str, Any]]
-    unlinked: list[dict[str, Any]]
-
-    def as_dict(self) -> dict[str, Any]:
-        return {
-            "title": self.title,
-            "benchmark": self.benchmark,
-            "task_id": self.task_id,
-            "success": self.success,
-            "failure_reason": self.failure_reason,
-            "poc_source": self.poc_source,
-            "models": self.models,
-            "legend": [a.legend_dict() for a in self.agents],
-            "root_name": self.root_name,
-            "root_result": self.root_result,
-            "root_steps": self.root_steps,
-            "unlinked": self.unlinked,
-        }
-
-
-def _load_jsonl(path: Path) -> list[dict[str, Any]]:
-    """Parse a ``.jsonl`` file, skipping any line that isn't valid JSON.
-
-    Pulled rollout dirs can contain empty files or a ``cat: ... No such
-    file`` stub where an agent never ran; those simply yield no records.
-    """
-
-    records: list[dict[str, Any]] = []
-    try:
-        text = path.read_text(encoding="utf-8", errors="replace")
-    except OSError:
-        return records
-    for line in text.splitlines():
-        line = line.strip()
-        if not line:
-            continue
-        try:
-            obj = json.loads(line)
-        except json.JSONDecodeError:
-            continue
-        if isinstance(obj, dict):
-            records.append(obj)
-    return records
-
-
-def _agent_from_records(
-    fallback_name: str, records: list[dict[str, Any]]
-) -> AgentTrace | None:
-    """Fold a file's records into one :class:`AgentTrace` (or ``None``)."""
-
-    meta = next((r for r in records if r.get("type") == "metadata"), {})
-    iters = [
-        Iteration(
-            n=int(r.get("iteration", 0)),
-            timestamp=str(r.get("timestamp", "")),
-            reasoning=str(r.get("response", "")),
-            blocks=[b for b in (r.get("code_blocks") or []) if isinstance(b, dict)],
-        )
-        for r in records
-        if r.get("type") == "iteration"
-    ]
-    if not iters and not meta:
-        return None
-    result_rec = next((r for r in records if r.get("type") == "result"), None)
-    result = str(result_rec.get("final_answer", "")) if result_rec is not None else None
-    first_ts = str(meta.get("timestamp", "")) or (iters[0].timestamp if iters else "")
-    return AgentTrace(
-        name=str(meta.get("agent") or fallback_name),
-        depth=int(meta.get("depth", 0)),
-        model=str(meta.get("model", "")),
-        backend=str(meta.get("backend", "")),
-        iterations=iters,
-        result=result,
-        first_ts=first_ts,
-    )
-
-
-def _read_json(path: Path) -> dict[str, Any]:
-    if not path.exists():
-        return {}
-    try:
-        obj = json.loads(path.read_text(encoding="utf-8", errors="replace"))
-    except (OSError, json.JSONDecodeError):
-        return {}
-    return obj if isinstance(obj, dict) else {}
-
-
-def _spawn_sessions(records: list[dict[str, Any]]) -> list[dict[str, Any]]:
-    """Split a sub-agent's records into one entry per spawn, time-ordered.
-
-    The root re-invokes a sub-agent many times; each invocation is a distinct
-    ``spawn_id`` whose iteration counter restarts at 1. One session == one
-    delegation the root can match a ``spawn_*()`` call to.
-    """
-
-    order: list[str] = []
-    sess: dict[str, dict[str, Any]] = {}
-    for r in records:
-        sid = str(r.get("spawn_id", ""))
-        kind = r.get("type")
-        if kind == "iteration":
-            if sid not in sess:
-                sess[sid] = {
-                    "first_ts": str(r.get("timestamp", "")),
-                    "returned": None,
-                    "iters": [],
-                }
-                order.append(sid)
-            sess[sid]["iters"].append(
-                {
-                    "iter": int(r.get("iteration", 0)),
-                    "ts": str(r.get("timestamp", "")),
-                    "reasoning": str(r.get("response", "")),
-                    "blocks": [
-                        b for b in (r.get("code_blocks") or []) if isinstance(b, dict)
-                    ],
-                }
-            )
-        elif kind == "result" and sid in sess:
-            sess[sid]["returned"] = str(r.get("final_answer", ""))
-    out = [sess[s] for s in order]
-    out.sort(key=lambda s: s["first_ts"])
-    return out
-
-
-def _child(name: str, color: str, session: dict[str, Any] | None) -> dict[str, Any]:
-    if session is None:
-        return {"agent": name, "color": color, "missing": True, "iters": []}
-    return {
-        "agent": name,
-        "color": color,
-        "returned": session.get("returned"),
-        "iters": session["iters"],
-    }
-
-
-def _build_root_spine(
-    root: AgentTrace,
-    sessions_by_agent: dict[str, list[dict[str, Any]]],
-    color_of: dict[str, str],
-) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
-    """Walk the root's iterations and hang each spawned sub-agent under the
-    ``spawn_*()`` call that produced it (FIFO per agent name).
-
-    Returns ``(root_steps, unlinked_children)``. ``unlinked`` holds sub-agent
-    sessions we couldn't tie to a call (count mismatch) so nothing is lost.
-    """
-
-    cursor = {name: 0 for name in sessions_by_agent if name != root.name}
-    steps: list[dict[str, Any]] = []
-    for it in root.iterations:
-        code = "\n".join(b.get("code", "") for b in it.blocks)
-        children: list[dict[str, Any]] = []
-        for name in SPAWN_RE.findall(code):
-            sessions = sessions_by_agent.get(name)
-            session = None
-            if sessions is not None and cursor.get(name, 0) < len(sessions):
-                session = sessions[cursor[name]]
-                cursor[name] += 1
-            children.append(_child(name, color_of.get(name, "#8a99ad"), session))
-        steps.append(
-            {
-                "iter": it.n,
-                "ts": it.timestamp,
-                "reasoning": it.reasoning,
-                "blocks": it.blocks,
-                "delegated": [c["agent"] for c in children],
-                "children": children,
-            }
-        )
-
-    unlinked: list[dict[str, Any]] = []
-    for name, sessions in sessions_by_agent.items():
-        if name == root.name:
-            continue
-        for session in sessions[cursor.get(name, 0) :]:
-            unlinked.append(_child(name, color_of.get(name, "#8a99ad"), session))
-    return steps, unlinked
+from ra.viewer.trace import load_rollout_dir
 
+from kai.viewer.html import render_html, write_html
 
-def load_rollout_dir(path: Path) -> RunTrace:
-    """Build a :class:`RunTrace` (root spine + causal nesting) from a dir."""
-
-    path = Path(path)
-    if not path.is_dir():
-        raise NotADirectoryError(f"{path} is not a directory")
-
-    agents: list[AgentTrace] = []
-    records_by_agent: dict[str, list[dict[str, Any]]] = {}
-    for jf in sorted(path.rglob("*.jsonl")):
-        if jf.name == "status_updates.jsonl":
-            continue
-        records = _load_jsonl(jf)
-        agent = _agent_from_records(jf.stem, records)
-        if agent is not None and agent.iterations:
-            agents.append(agent)
-            records_by_agent[agent.name] = records
-
-    agents.sort(key=lambda a: (a.depth, a.first_ts, a.name))
-    color_of = {a.name: PALETTE[i % len(PALETTE)] for i, a in enumerate(agents)}
-    for a in agents:
-        a.color = color_of[a.name]
-
-    root = _pick_root(agents)
-    sessions_by_agent = {
-        name: _spawn_sessions(records) for name, records in records_by_agent.items()
-    }
-    if root is not None:
-        root_steps, unlinked = _build_root_spine(root, sessions_by_agent, color_of)
-    else:
-        root_steps, unlinked = [], []
-
-    score = _read_json(path / "score.json")
-    details = score.get("details") or {}
-    task_ref = score.get("task_ref") or {}
-    run = _read_json(path / "run.json")
-
-    benchmark = str(task_ref.get("benchmark") or _guess_benchmark(path.name))
-    task_id = str(task_ref.get("task_id") or details.get("task_id") or path.name)
-    models = sorted({a.model for a in agents if a.model})
-    if not models and run.get("root_model"):
-        models = [str(run["root_model"])]
-
-    return RunTrace(
-        title=path.name,
-        benchmark=benchmark,
-        task_id=task_id,
-        success=score.get("success"),
-        failure_reason=score.get("failure_reason"),
-        poc_source=details.get("poc_source"),
-        models=models,
-        agents=agents,
-        root_name=root.name if root else "",
-        root_result=root.result if root else None,
-        root_steps=root_steps,
-        unlinked=unlinked,
-    )
-
-
-def _pick_root(agents: list[AgentTrace]) -> AgentTrace | None:
-    """The depth-0 orchestrator (prefer the conventional ``exploit``)."""
-
-    if not agents:
-        return None
-    named = next((a for a in agents if a.name == ROOT_AGENT and a.depth == 0), None)
-    if named is not None:
-        return named
-    return min(agents, key=lambda a: (a.depth, a.first_ts))
-
-
-def _guess_benchmark(dir_name: str) -> str:
-    for known in ("cybergym", "bountybench", "evmbench", "noop"):
-        if dir_name.startswith(known):
-            return known
-    return "rollout"
-
-
-def render_html(run: RunTrace) -> str:
-    """Render a self-contained HTML page (inline data + CSS + JS)."""
-
-    # ``</`` would prematurely close the <script>; escape it in the blob.
-    data_json = json.dumps(run.as_dict()).replace("</", "<\\/")
-    return _HTML_TEMPLATE.replace("__DATA__", data_json)
-
-
-def write_html(rollout_dir: Path, out: Path | None = None) -> Path:
-    """Load ``rollout_dir`` and write ``trace.html`` (or ``out``)."""
-
-    run = load_rollout_dir(rollout_dir)
-    target = out or (Path(rollout_dir) / "trace.html")
-    target.write_text(render_html(run), encoding="utf-8")
-    return target
-
-
-_HTML_TEMPLATE = r"""<!DOCTYPE html>
-<html lang="en">
-<head>
-<meta charset="utf-8">
-<meta name="viewport" content="width=device-width, initial-scale=1">
-<title>RLM rollout trace</title>
-<style>
-  :root { color-scheme: dark; }
-  * { box-sizing: border-box; }
-  body {
-    margin: 0; font: 14px/1.55 -apple-system, BlinkMacSystemFont, "Segoe UI",
-    Roboto, sans-serif; background: #0e1116; color: #d6deeb;
-  }
-  code, pre, .mono { font-family: ui-monospace, "SF Mono", Menlo, Consolas, monospace; }
-  header {
-    padding: 13px 18px; background: #11151c; border-bottom: 1px solid #232b36;
-    position: sticky; top: 0; z-index: 5;
-  }
-  header h1 { margin: 0 0 6px; font-size: 15px; }
-  header h1 .task { color: #7fdbca; }
-  .badges { display: flex; flex-wrap: wrap; gap: 8px; font-size: 12px; }
-  .badge {
-    padding: 2px 9px; border-radius: 999px; background: #1c2430;
-    border: 1px solid #2a3543; white-space: nowrap;
-  }
-  .ok { background: #10331f; border-color: #1d6b3a; color: #7ee2a8; }
-  .fail { background: #3a1717; border-color: #7a2b2b; color: #ff9b9b; }
-  .layout { display: flex; align-items: flex-start; }
-  nav {
-    width: 232px; flex: 0 0 232px; border-right: 1px solid #232b36;
-    padding: 12px 10px; background: #0c1219;
-    position: sticky; top: 56px; max-height: calc(100vh - 56px); overflow: auto;
-  }
-  nav .guide {
-    font-size: 12px; color: #9fb0c3; background: #11161f;
-    border: 1px solid #232b36; border-radius: 8px; padding: 10px; margin-bottom: 12px;
-  }
-  nav .guide b { color: #cdd9e5; }
-  nav .guide .k { color: #ffcb6b; }
-  nav .lt { font-size: 11px; text-transform: uppercase; letter-spacing: .05em;
-    color: #6f7e92; margin: 4px 6px; }
-  nav .agent {
-    display: block; width: 100%; text-align: left; cursor: pointer;
-    background: none; border: 0; color: inherit; padding: 5px 8px;
-    border-radius: 6px; line-height: 1.35;
-  }
-  nav .agent:hover { background: #161d27; }
-  nav .agent .swatch {
-    display: inline-block; width: 9px; height: 9px; border-radius: 2px;
-    margin-right: 6px; vertical-align: middle;
-  }
-  nav .agent .name { font-weight: 600; }
-  nav .agent .meta { font-size: 11px; color: #8a99ad; }
-  main { flex: 1 1 auto; padding: 16px 22px; min-width: 0; max-width: 980px; }
-  .step { border-left: 3px solid #7fdbca; padding: 2px 0 2px 14px; margin: 0 0 18px; }
-  .shead {
-    display: flex; flex-wrap: wrap; gap: 9px; align-items: baseline;
-    font-size: 12px; margin-bottom: 6px;
-  }
-  .shead .idx { color: #58657a; }
-  .shead .agent { font-weight: 700; color: #7fdbca; }
-  .shead .ts { color: #58657a; margin-left: auto; }
-  .shead .deleg {
-    color: #ffcb6b; border: 1px solid #5a4a1f; border-radius: 4px;
-    padding: 0 6px; font-size: 11px;
-  }
-  .prose { white-space: pre-wrap; margin: 0 0 9px; }
-  pre.code, pre.output {
-    margin: 0 0 9px; padding: 9px 11px; border-radius: 6px;
-    overflow: auto; font-size: 12.5px; white-space: pre-wrap; word-break: break-word;
-  }
-  pre.code { background: #0b1f2a; border: 1px solid #16384a; }
-  pre.output {
-    background: #15110b; border: 1px solid #3a2c16; color: #e8d8b0; max-height: 360px;
-  }
-  details.spawn {
-    margin: 2px 0 11px; border-left: 2px dashed #4a5468; padding-left: 12px;
-  }
-  details.spawn > summary {
-    cursor: pointer; font-size: 12.5px; padding: 4px 0; color: #cdd9e5;
-  }
-  details.spawn > summary .who { font-weight: 700; }
-  details.spawn > summary .ret { color: #9fb0c3; }
-  details.spawn[open] > summary .ret { display: none; }
-  .childhead { font-size: 11px; color: #8a99ad; margin: 9px 0 4px; }
-  .childit { padding-left: 10px; border-left: 1px solid #222a35; margin-bottom: 10px; }
-  .missing { color: #a06a6a; font-size: 12px; padding: 4px 0; }
-  .ret-box {
-    border: 1px solid #3a4a2c; background: #14180e; border-radius: 6px;
-    padding: 8px 10px; margin: 4px 0 9px; white-space: pre-wrap;
-    font-size: 12.5px; color: #d6e6b8; max-height: 220px; overflow: auto;
-  }
-  .result {
-    border: 1px solid #1d6b3a; border-radius: 8px; padding: 11px;
-    background: #0f1c14; white-space: pre-wrap; margin: 2px 0 0;
-  }
-  .sec { font-size: 12px; color: #8a99ad; margin: 26px 0 10px; border-top: 1px solid #232b36;
-    padding-top: 12px; }
-  .empty { color: #7e8da1; padding: 30px; }
-</style>
-</head>
-<body>
-<header>
-  <h1>Trace: <span class="task" id="title"></span></h1>
-  <div class="badges" id="badges"></div>
-</header>
-<div class="layout">
-  <nav id="nav"></nav>
-  <main id="main"></main>
-</div>
-<script id="data" type="application/json">__DATA__</script>
-<script>
-// Every dynamic value is inserted via textContent / DOM nodes (never
-// innerHTML), so unsanitised rollout text can't inject markup.
-const RUN = JSON.parse(document.getElementById("data").textContent);
-
-function el(tag, cls, text) {
-  const n = document.createElement(tag);
-  if (cls) n.className = cls;
-  if (text != null) n.textContent = String(text);
-  return n;
-}
-
-function proseNode(text) {
-  // Drop ``` fenced segments (code is rendered from blocks); keep prose.
-  const parts = String(text || "").split("```");
-  const prose = parts
-    .filter((_, i) => i % 2 === 0)
-    .join("\n")
-    .replace(/<br>/g, "")
-    .replace(/\n{3,}/g, "\n\n")
-    .trim();
-  return prose ? el("div", "prose", prose) : null;
-}
-
-function blockNodes(blocks) {
-  const nodes = [];
-  (blocks || []).forEach((b) => {
-    if (b.code && b.code.trim()) nodes.push(el("pre", "code", b.code));
-    if (b.output && b.output.trim()) nodes.push(el("pre", "output", b.output));
-  });
-  return nodes;
-}
-
-function head(s) {
-  return (s || "").replace(/\s+/g, " ").trim().slice(0, 130);
-}
-
-function childNode(child) {
-  const det = el("details", "spawn");
-  det.dataset.agent = child.agent;
-  det.style.borderLeftColor = child.color || "#4a5468";
-  const sum = el("summary");
-  const who = el("span", "who", "⤷ spawned " + child.agent);
-  who.style.color = child.color || "inherit";
-  sum.append(who);
-  if (child.missing) {
-    sum.append(el("span", "ret", " — no rollout captured in this dir"));
-    det.append(sum, el("div", "missing", "(sub-agent file absent or empty)"));
-    return det;
-  }
-  if (child.returned != null && child.returned !== "")
-    sum.append(el("span", "ret", " — returned: " + head(child.returned)));
-  else sum.append(el("span", "ret", " — (no return value recorded)"));
-  det.append(sum);
-
-  if (child.returned != null && child.returned !== "")
-    det.append(el("div", "ret-box", child.returned));
-  (child.iters || []).forEach((it) => {
-    const wrap = el("div", "childit");
-    wrap.append(el("div", "childhead", child.agent + " · iter " + it.iter));
-    const p = proseNode(it.reasoning);
-    if (p) wrap.append(p);
-    blockNodes(it.blocks).forEach((n) => wrap.append(n));
-    det.append(wrap);
-  });
-  return det;
-}
-
-function stepNode(step) {
-  const wrap = el("div", "step");
-  const h = el("div", "shead");
-  h.append(el("span", "idx", "#" + step.iter));
-  h.append(el("span", "agent", RUN.root_name));
-  if (step.delegated && step.delegated.length)
-    h.append(el("span", "deleg", "⤷ " + step.delegated.join(", ")));
-  h.append(el("span", "ts", (step.ts || "").replace("T", " ").slice(0, 19)));
-  wrap.append(h);
-
-  const prose = proseNode(step.reasoning);
-  if (prose) wrap.append(prose);
-  // Causal order: the spawn call's reasoning/code, then dive into the child,
-  // then the output (which contains what the child returned to the root).
-  (step.blocks || []).forEach((b) => {
-    if (b.code && b.code.trim()) wrap.append(el("pre", "code", b.code));
-  });
-  (step.children || []).forEach((c) => wrap.append(childNode(c)));
-  (step.blocks || []).forEach((b) => {
-    if (b.output && b.output.trim()) wrap.append(el("pre", "output", b.output));
-  });
-  return wrap;
-}
-
-function renderMain() {
-  const main = document.getElementById("main");
-  const nodes = [];
-  if (!RUN.root_steps.length) nodes.push(el("div", "empty", "No root agent found."));
-  RUN.root_steps.forEach((s) => nodes.push(stepNode(s)));
-  if (RUN.root_result) {
-    nodes.push(el("div", "sec", RUN.root_name + " — final answer"));
-    nodes.push(el("div", "result", RUN.root_result));
-  }
-  if (RUN.unlinked && RUN.unlinked.length) {
-    nodes.push(
-      el("div", "sec", "Sub-agent runs not tied to a spawn call (" +
-        RUN.unlinked.length + ")")
-    );
-    RUN.unlinked.forEach((c) => nodes.push(childNode(c)));
-  }
-  main.replaceChildren(...nodes);
-}
-
-function openAgent(name) {
-  let first = null;
-  document.querySelectorAll("details.spawn").forEach((d) => {
-    if (d.dataset.agent === name) {
-      d.open = true;
-      if (!first) first = d;
-    }
-  });
-  if (first) first.scrollIntoView({ block: "center" });
-}
-
-function badge(cls, text) {
-  return el("span", cls ? "badge " + cls : "badge", text);
-}
-
-function buildNav() {
-  const nav = document.getElementById("nav");
-  const guide = el("div", "guide");
-  guide.append(
-    document.createTextNode("Read "),
-    el("b", null, RUN.root_name),
-    document.createTextNode(" top-to-bottom: each step is reason → run code "),
-    document.createTextNode("→ observe output. A "),
-    el("span", "k", "spawn_*()"),
-    document.createTextNode(
-      " call delegates a subtask; its answer is in that step's output. "
-    ),
-    document.createTextNode("Expand "),
-    el("span", "k", "↳"),
-    document.createTextNode(" to see how the sub-agent got there.")
-  );
-  nav.append(guide, el("div", "lt", "agents — click to expand"));
-  RUN.legend.forEach((a) => {
-    const btn = el("button", "agent");
-    const sw = el("span", "swatch");
-    sw.style.background = a.color;
-    btn.append(
-      sw,
-      el("span", "name", a.name),
-      el("span", "meta", "  d" + a.depth + " · " + a.iters + " it")
-    );
-    btn.addEventListener("click", () => openAgent(a.name));
-    nav.append(btn);
-  });
-}
-
-function init() {
-  document.getElementById("title").textContent =
-    RUN.benchmark + " / " + RUN.task_id;
-  const badges = [];
-  if (RUN.success === true) badges.push(badge("ok", "✅ success"));
-  else if (RUN.success === false) badges.push(badge("fail", "❌ fail"));
-  if (RUN.failure_reason) badges.push(badge("fail", RUN.failure_reason));
-  if (RUN.poc_source) badges.push(badge("", "poc: " + RUN.poc_source));
-  badges.push(badge("", RUN.legend.length + " agents"));
-  (RUN.models || []).forEach((m) => badges.push(badge("mono", m)));
-  document.getElementById("badges").replaceChildren(...badges);
-
-  buildNav();
-  renderMain();
-}
-
-init();
-</script>
-</body>
-</html>
-"""
+__all__ = ["load_rollout_dir", "render_html", "write_html"]
diff --git a/src/kai/viewer/__init__.py b/src/kai/viewer/__init__.py
new file mode 100644
index 00000000..508a1756
--- /dev/null
+++ b/src/kai/viewer/__init__.py
@@ -0,0 +1,23 @@
+"""Self-contained HTML viewer for kai runs (findings + agent trace).
+
+Reads a run directory written by the pipeline -- ``exploits.json`` for the
+security findings and ``rollouts/*.jsonl`` (or flat ``*.jsonl``) for the
+agent trace -- and renders a single offline HTML file. No server, no
+external requests, no live state backend required.
+"""
+
+from __future__ import annotations
+
+from ra.viewer.trace import RunTrace, load_rollout_dir
+
+from kai.viewer.findings import Finding, load_findings
+from kai.viewer.html import render_html, write_html
+
+__all__ = [
+    "Finding",
+    "RunTrace",
+    "load_findings",
+    "load_rollout_dir",
+    "render_html",
+    "write_html",
+]
diff --git a/src/kai/viewer/__main__.py b/src/kai/viewer/__main__.py
new file mode 100644
index 00000000..8affaecf
--- /dev/null
+++ b/src/kai/viewer/__main__.py
@@ -0,0 +1,52 @@
+"""CLI entry point: ``python -m kai.viewer <run_dir> [-o OUT] [--open]``.
+
+Renders a run directory into a single self-contained HTML file. This is the
+implementation the ``kai view`` subcommand wraps; it also works standalone.
+"""
+
+from __future__ import annotations
+
+import argparse
+import sys
+import webbrowser
+from pathlib import Path
+
+from kai.viewer.html import write_html
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(
+        prog="python -m kai.viewer",
+        description="Render a kai run (findings + agent trace) to a single HTML file.",
+    )
+    parser.add_argument(
+        "run_dir",
+        help="run directory (a state/<run_id>/ dir with exploits.json and/or rollouts/)",
+    )
+    parser.add_argument(
+        "-o",
+        "--output",
+        help="output HTML path (default: <run_dir>/trace.html)",
+    )
+    parser.add_argument(
+        "--open",
+        action="store_true",
+        help="open the rendered file in a browser",
+    )
+    args = parser.parse_args(argv)
+
+    run_dir = Path(args.run_dir)
+    if not run_dir.is_dir():
+        print(f"error: {run_dir} is not a directory", file=sys.stderr)
+        return 2
+
+    out = Path(args.output) if args.output else None
+    target = write_html(run_dir, out)
+    print(target)
+    if args.open:
+        webbrowser.open(target.resolve().as_uri())
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/src/kai/viewer/findings.py b/src/kai/viewer/findings.py
new file mode 100644
index 00000000..2ae1bbd8
--- /dev/null
+++ b/src/kai/viewer/findings.py
@@ -0,0 +1,210 @@
+"""Load security findings from a run's ``exploits.json``.
+
+A normal pipeline run persists its findings as a JSON array of
+:class:`kai.state.models.ExploitRecord` dicts at
+``<state_dir>/<run_id>/exploits.json``. This module folds those into the
+flat :class:`Finding` view-model the HTML renderer draws, deriving display
+helpers (a one-line title, a severity bucket, a human-readable CVSS vector)
+without needing a live state backend.
+"""
+
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any
+
+from kai import cvss
+
+# Human-readable expansions for CVSS 3.1 vector codes, by metric.
+_CVSS_LABELS: dict[str, dict[str, str]] = {
+    "AV": {"N": "Network", "A": "Adjacent", "L": "Local", "P": "Physical"},
+    "AC": {"L": "Low", "H": "High"},
+    "PR": {"N": "None", "L": "Low", "H": "High"},
+    "UI": {"N": "None", "R": "Required"},
+    "S": {"U": "Unchanged", "C": "Changed"},
+    "C": {"H": "High", "L": "Low", "N": "None"},
+    "I": {"H": "High", "L": "Low", "N": "None"},
+    "A": {"H": "High", "L": "Low", "N": "None"},
+}
+_CVSS_ORDER = ("AV", "AC", "PR", "UI", "S", "C", "I", "A")
+
+# Status / category ordering: confirmed, runtime-exploitable findings first.
+_SEVERITY_RANK = {"critical": 4, "high": 3, "medium": 2, "low": 1, "none": 0}
+
+# Internal bookkeeping records that aren't user-facing findings: when the
+# pipeline merges duplicate hypotheses it keeps the merged-away ones as
+# ``deduplicated`` shells (no severity/PoC). The report and viewer hide them.
+_HIDDEN_STATUSES = {"deduplicated"}
+
+
+@dataclass
+class Finding:
+    """One vulnerability finding, flattened for display."""
+
+    exploit_id: str
+    title: str
+    hypothesis: str
+    exploit_sketch: str
+    file: str
+    function: str
+    category: str
+    status: str
+    confirmed: bool | None
+    severity: str
+    cvss_score: float | None
+    cvss_vector: str
+    cvss_rows: list[dict[str, str]] = field(default_factory=list)
+    poc_code: str = ""
+    patch: str = ""
+    attacker_role: str = ""
+    prerequisite: str = ""
+    adversarial_viability: str = ""
+    profit_model: str = ""
+    critic_summary: str = ""
+
+    def as_dict(self) -> dict[str, Any]:
+        return {
+            "exploit_id": self.exploit_id,
+            "title": self.title,
+            "hypothesis": self.hypothesis,
+            "exploit_sketch": self.exploit_sketch,
+            "file": self.file,
+            "function": self.function,
+            "category": self.category,
+            "status": self.status,
+            "confirmed": self.confirmed,
+            "severity": self.severity,
+            "cvss_score": self.cvss_score,
+            "cvss_vector": self.cvss_vector,
+            "cvss_rows": self.cvss_rows,
+            "poc_code": self.poc_code,
+            "patch": self.patch,
+            "attacker_role": self.attacker_role,
+            "prerequisite": self.prerequisite,
+            "adversarial_viability": self.adversarial_viability,
+            "profit_model": self.profit_model,
+            "critic_summary": self.critic_summary,
+        }
+
+
+def _title_of(record: dict[str, Any]) -> str:
+    """A one-line headline: the first sentence of the hypothesis, else a
+    ``<category> in <function>`` fallback."""
+
+    hypothesis = str(record.get("hypothesis") or "").strip()
+    if hypothesis:
+        first = hypothesis.replace("\n", " ").split(". ")[0].strip().rstrip(".")
+        # Cut at a word boundary so a long first sentence stays a scannable
+        # headline rather than wrapping across table cells / section titles.
+        if len(first) > 64:
+            first = first[:64].rsplit(" ", 1)[0] + "…"
+        return first
+    category = str(record.get("category") or "finding").replace("_", " ")
+    fn = str(record.get("function") or "").strip()
+    return f"{category} in {fn}" if fn else category
+
+
+def _cvss_rows(vector: str, justification: dict[str, str] | None) -> list[dict[str, str]]:
+    """Expand a CVSS vector into ordered ``{metric, value, why}`` rows."""
+
+    if not vector:
+        return []
+    try:
+        metrics = cvss.parse_vector(vector)
+    except Exception:
+        return []
+    justification = justification or {}
+    rows: list[dict[str, str]] = []
+    for code in _CVSS_ORDER:
+        if code not in metrics:
+            continue
+        value = metrics[code]
+        rows.append(
+            {
+                "metric": code,
+                "value": _CVSS_LABELS.get(code, {}).get(value, value),
+                "why": str(justification.get(code, "")),
+            }
+        )
+    return rows
+
+
+def _severity_of(record: dict[str, Any]) -> str:
+    """The record's severity, lowercased; derived from the CVSS score when
+    the field is absent."""
+
+    severity = str(record.get("severity") or "").strip().lower()
+    if severity in _SEVERITY_RANK:
+        return severity
+    score = record.get("cvss_score")
+    if isinstance(score, (int, float)):
+        return cvss.score_to_severity(float(score)).lower()
+    return "none"
+
+
+def _finding_from_record(record: dict[str, Any]) -> Finding:
+    return Finding(
+        exploit_id=str(record.get("exploit_id") or ""),
+        title=_title_of(record),
+        hypothesis=str(record.get("hypothesis") or ""),
+        exploit_sketch=str(record.get("exploit_sketch") or ""),
+        file=str(record.get("file") or ""),
+        function=str(record.get("function") or ""),
+        category=str(record.get("category") or ""),
+        status=str(record.get("status") or ""),
+        confirmed=record.get("confirmed"),
+        severity=_severity_of(record),
+        cvss_score=record.get("cvss_score"),
+        cvss_vector=str(record.get("cvss_vector") or ""),
+        cvss_rows=_cvss_rows(
+            str(record.get("cvss_vector") or ""), record.get("cvss_justification")
+        ),
+        poc_code=str(record.get("poc_code") or ""),
+        patch=str(record.get("patch") or ""),
+        attacker_role=str(record.get("attacker_role") or ""),
+        prerequisite=str(record.get("prerequisite") or record.get("required_privileges") or ""),
+        adversarial_viability=str(record.get("adversarial_viability") or ""),
+        profit_model=str(record.get("profit_model") or ""),
+        critic_summary=str(record.get("critic_summary") or ""),
+    )
+
+
+def _sort_key(f: Finding) -> tuple[int, int, float]:
+    """Confirmed first, then by severity, then CVSS score as the tie-breaker.
+
+    Severity is the secondary key so a high/critical finding that carries a
+    label but no usable CVSS score (the fixer can emit one without a vector)
+    still outranks a low finding that happens to have a numeric score.
+    """
+
+    confirmed = 1 if f.confirmed else 0
+    severity = _SEVERITY_RANK.get(f.severity, 0)
+    score = f.cvss_score if isinstance(f.cvss_score, (int, float)) else -1.0
+    return (confirmed, severity, score)
+
+
+def load_findings(run_dir: Path) -> list[Finding]:
+    """Read ``<run_dir>/exploits.json`` into sorted :class:`Finding` objects.
+
+    Returns an empty list when the file is absent or unparseable (e.g. a
+    benchmark rollout dir, which carries ``score.json`` but no findings).
+    """
+
+    path = Path(run_dir) / "exploits.json"
+    if not path.exists():
+        return []
+    try:
+        data = json.loads(path.read_text(encoding="utf-8", errors="replace"))
+    except (OSError, json.JSONDecodeError):
+        return []
+    if not isinstance(data, list):
+        return []
+    findings = [
+        _finding_from_record(r)
+        for r in data
+        if isinstance(r, dict) and r.get("status") not in _HIDDEN_STATUSES
+    ]
+    findings.sort(key=_sort_key, reverse=True)
+    return findings
diff --git a/src/kai/viewer/html.py b/src/kai/viewer/html.py
new file mode 100644
index 00000000..64a8db0d
--- /dev/null
+++ b/src/kai/viewer/html.py
@@ -0,0 +1,162 @@
+"""Render a kai run as a single self-contained HTML page.
+
+Composes kai's security **Findings** panel (severity, CVSS, PoC, patch) onto
+the reusable viewer in :mod:`ra.viewer` — which supplies the tabbed shell, the
+shared design system, and the built-in **Trace** panel. Findings stay here
+because they're domain concepts (CVSS, exploits); the trace viewer and styling
+live in ``ra`` so any ra agent can reuse them.
+
+Every dynamic value is written via ``textContent`` / DOM nodes, so unsanitised
+rollout text cannot inject markup.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from ra.viewer.html import Panel, render_page, trace_panel
+from ra.viewer.trace import RunTrace, load_rollout_dir
+
+from kai.viewer.findings import Finding, load_findings
+
+# Findings-panel layout: the master-detail split + interactive table rows +
+# the detail pane. Shared tokens/primitives come from ra.viewer.style.
+_FINDINGS_CSS = """\
+  .split { display: grid; grid-template-columns: minmax(360px, 1fr) minmax(420px, 1.3fr); }
+  @media (max-width: 880px) { .split { grid-template-columns: 1fr; } }
+  tbody tr { cursor: pointer; }
+  tbody tr:hover { background: color-mix(in srgb, var(--accent) 5%, transparent); }
+  tbody tr.sel { background: color-mix(in srgb, var(--accent) 9%, transparent); }
+  .detail { border-left: 1px solid var(--rule-2); padding: 18px 22px; min-width: 0; }
+  .detail h2 { margin: 0 0 4px; font-size: 18px; font-weight: 600; line-height: 1.3; }
+  .detail .where { font-size: 12.5px; color: var(--muted); margin-bottom: 16px; }
+"""
+
+_FINDINGS_SECTION = """\
+<section class="view" id="view-findings">
+  <div class="split">
+    <div><table><thead><tr>
+      <th class="num">CVSS</th><th>Finding</th><th>Category</th><th>Location</th>
+    </tr></thead><tbody id="rows"></tbody></table></div>
+    <div class="detail" id="detail"></div>
+  </div>
+</section>"""
+
+_FINDINGS_JS = r"""
+const FINDINGS = DATA.findings || [];
+const pct = s => Math.max(0, Math.min(100, Math.round((s || 0) / 10 * 100)));
+function fRow(f, i) {
+  const tr = el("tr", "sev-" + (f.severity || "none") + (f.confirmed ? "" : " unconf"));
+  tr.dataset.i = i;
+  const c = el("td", "cvss");
+  c.append(el("span", "dot"));
+  c.append(el("span", "score", f.cvss_score != null ? Number(f.cvss_score).toFixed(1) : "—"));
+  if (f.cvss_score != null) {
+    const bar = el("span", "bar"), fill = el("i");
+    fill.style.width = pct(f.cvss_score) + "%"; bar.append(fill); c.append(bar);
+  }
+  const t = el("td"); t.append(el("div", "ftitle", f.title));
+  tr.append(c, t, el("td", "cat", (f.category || "").replace(/_/g, " ")),
+    el("td", "loc", (f.file ? f.file.split("/").pop() : "") + (f.function ? ":" + f.function : "")));
+  tr.addEventListener("click", () => fSelect(i));
+  return tr;
+}
+function diffNode(patch) {
+  const pre = el("pre", "diff");
+  String(patch).split("\n").forEach(line => {
+    const k = line.startsWith("+") ? "add" : line.startsWith("-") ? "del" : null;
+    pre.append(el("span", k, line + "\n"));
+  });
+  return pre;
+}
+function kvRow(dl, k, v) { if (v) { dl.append(el("dt", null, k), el("dd", null, v)); } }
+function fSelect(i) {
+  document.querySelectorAll("#rows tr").forEach(r => r.classList.toggle("sel", +r.dataset.i === i));
+  const f = FINDINGS[i], d = document.getElementById("detail"); d.replaceChildren();
+  d.append(el("h2", "serif", f.title));
+  d.append(el("div", "where", f.file + (f.function ? " · " + f.function + "()" : "")));
+  const kv = el("dl", "kv");
+  const sevLine = (f.severity || "—") + (f.cvss_score != null ? " · CVSS " + Number(f.cvss_score).toFixed(1) : "");
+  kvRow(kv, "Severity", sevLine);
+  kvRow(kv, "Status", f.status + (f.confirmed ? " · confirmed" : ""));
+  kvRow(kv, "Category", (f.category || "").replace(/_/g, " "));
+  kvRow(kv, "Attacker", f.attacker_role);
+  kvRow(kv, "Precondition", f.prerequisite);
+  if (kv.children.length) d.append(kv);
+
+  if (f.hypothesis) { d.append(el("div", "sec-label", "Why it's exploitable")); d.append(el("p", "prose", f.hypothesis)); }
+  if (f.exploit_sketch) { d.append(el("div", "sec-label", "Exploit sketch")); d.append(el("p", "prose", f.exploit_sketch)); }
+
+  if (f.cvss_rows && f.cvss_rows.length) {
+    d.append(el("div", "sec-label", "CVSS 3.1 vector"));
+    if (f.cvss_vector) d.append(el("div", "vector mono", f.cvss_vector));
+    const g = el("div", "cvss-grid");
+    f.cvss_rows.forEach(r => { g.append(el("span", "m", r.metric), el("span", "v", r.value), el("span", "why", r.why)); });
+    d.append(g);
+  }
+  if (f.poc_code) { d.append(el("div", "sec-label", "Proof of concept")); d.append(el("pre", "code", f.poc_code)); }
+  if (f.patch) { d.append(el("div", "sec-label", "Suggested patch")); d.append(diffNode(f.patch)); }
+  if (f.critic_summary) { d.append(el("div", "sec-label", "Critic")); d.append(el("p", "prose", f.critic_summary)); }
+}
+function renderFindings() {
+  // Contribute the findings tally to the header facts (ahead of the models).
+  const facts = document.getElementById("facts"), ref = facts.firstChild;
+  const tally = el("span"); tally.append(el("b", null, FINDINGS.length), document.createTextNode(" findings"));
+  facts.insertBefore(tally, ref);
+  const crit = FINDINGS.filter(f => f.severity === "critical").length;
+  if (crit) { const c = el("span", "crit"); c.append(el("b", null, crit), document.createTextNode(" critical")); facts.insertBefore(c, ref); }
+
+  const rows = document.getElementById("rows");
+  if (!FINDINGS.length) {
+    document.getElementById("view-findings").querySelector(".split")
+      .replaceChildren(el("div", "empty", "No findings recorded for this run."));
+    return;
+  }
+  FINDINGS.forEach((f, i) => rows.append(fRow(f, i)));
+  fSelect(0);
+}
+"""
+
+
+def _findings_panel() -> Panel:
+    return Panel(
+        "findings", "Findings", _FINDINGS_SECTION, _FINDINGS_CSS, _FINDINGS_JS, "renderFindings();"
+    )
+
+
+def render_html(run: RunTrace, findings: list[Finding] | None = None) -> str:
+    """Render the full kai page (Findings + Trace) from a trace + findings list.
+
+    ``findings`` defaults to empty (e.g. a benchmark rollout dir has a trace but
+    no ``exploits.json``); the Findings tab then shows an empty state and the
+    Trace tab opens first.
+    """
+
+    findings = findings or []
+    data = {
+        "title": run.title,
+        "benchmark": run.benchmark,
+        "task_id": run.task_id,
+        "models": run.models,
+        "run": run.as_dict(),
+        "findings": [f.as_dict() for f in findings],
+    }
+    default_view = "findings" if findings else "trace"
+    return render_page(
+        data, [_findings_panel(), trace_panel()], brand="kai", default_view=default_view
+    )
+
+
+def write_html(run_dir: Path, out: Path | None = None) -> Path:
+    """Load ``run_dir`` (trace + findings) and write a single HTML file.
+
+    Defaults to ``<run_dir>/trace.html`` so existing callers that link to that
+    name keep working.
+    """
+
+    run = load_rollout_dir(run_dir)
+    findings = load_findings(run_dir)
+    target = out or (Path(run_dir) / "trace.html")
+    target.parent.mkdir(parents=True, exist_ok=True)
+    target.write_text(render_html(run, findings), encoding="utf-8")
+    return target
diff --git a/src/ra/viewer/__init__.py b/src/ra/viewer/__init__.py
new file mode 100644
index 00000000..a38cc784
--- /dev/null
+++ b/src/ra/viewer/__init__.py
@@ -0,0 +1,29 @@
+"""Reusable HTML viewer for ``ra`` agent runs.
+
+Renders a run directory's ``*.jsonl`` rollouts into a single offline HTML page
+— a tabbed shell with a built-in causal **Trace** panel and a shared design
+system (:mod:`ra.viewer.style`). Domain layers compose extra panels on top via
+:func:`ra.viewer.html.render_page`; kai-security, for example, adds a security
+**Findings** panel.
+"""
+
+from __future__ import annotations
+
+from ra.viewer.html import (
+    Panel,
+    render_page,
+    render_trace_html,
+    trace_panel,
+    write_trace_html,
+)
+from ra.viewer.trace import RunTrace, load_rollout_dir
+
+__all__ = [
+    "Panel",
+    "RunTrace",
+    "load_rollout_dir",
+    "render_page",
+    "render_trace_html",
+    "trace_panel",
+    "write_trace_html",
+]
diff --git a/src/ra/viewer/__main__.py b/src/ra/viewer/__main__.py
new file mode 100644
index 00000000..09c72f28
--- /dev/null
+++ b/src/ra/viewer/__main__.py
@@ -0,0 +1,47 @@
+"""CLI entry point: ``python -m ra.viewer <run_dir> [-o OUT] [--open]``.
+
+Renders any ``ra`` run's agent trace into a single self-contained HTML file.
+Domain tools (e.g. ``kai view``) wrap a richer page on top of this.
+"""
+
+from __future__ import annotations
+
+import argparse
+import sys
+import webbrowser
+from pathlib import Path
+
+from ra.viewer.html import write_trace_html
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(
+        prog="python -m ra.viewer",
+        description="Render an ra run's agent trace to a single HTML file.",
+    )
+    parser.add_argument(
+        "run_dir",
+        help="run directory (a dir with *.jsonl rollouts, or state/<id>/rollouts/)",
+    )
+    parser.add_argument(
+        "-o", "--output", help="output HTML path (default: <run_dir>/trace.html)"
+    )
+    parser.add_argument(
+        "--open", action="store_true", help="open the rendered file in a browser"
+    )
+    args = parser.parse_args(argv)
+
+    run_dir = Path(args.run_dir)
+    if not run_dir.is_dir():
+        print(f"error: {run_dir} is not a directory", file=sys.stderr)
+        return 2
+
+    target = write_trace_html(run_dir, Path(args.output) if args.output else None)
+    print(target)
+    if args.open:
+        webbrowser.open(target.resolve().as_uri())
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/src/ra/viewer/html.py b/src/ra/viewer/html.py
new file mode 100644
index 00000000..ef29e898
--- /dev/null
+++ b/src/ra/viewer/html.py
@@ -0,0 +1,289 @@
+"""Reusable, self-contained HTML viewer for any ``ra`` agent run.
+
+This is framework-level: any agent built on ``ra`` writes per-agent
+``*.jsonl`` rollouts, and this module renders them into a single offline HTML
+page (no server, no external requests). The page is built from **panels** — a
+tabbed shell plus one or more views — so a domain layer can add its own panel
+(e.g. kai adds a security **Findings** panel) on top of the built-in
+**Trace** panel.
+
+Every dynamic value is written via ``textContent`` / DOM nodes, never
+``innerHTML``, so unsanitised rollout text cannot inject markup. The palette
+and shared primitives come from :mod:`ra.viewer.style`.
+"""
+
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass
+from pathlib import Path
+
+from ra.viewer import style
+from ra.viewer.trace import RunTrace, load_rollout_dir
+
+
+@dataclass(frozen=True)
+class Panel:
+    """One tab in the viewer.
+
+    ``js`` defines a render function over the embedded ``DATA``/``RUN`` globals
+    (and the shared ``el()`` helper); ``render_call`` invokes it at init.
+    """
+
+    id: str
+    label: str
+    section: str  # the <section class="view" id="view-<id>"> … </section> block
+    css: str
+    js: str
+    render_call: str
+
+
+# Shell chrome only (header / tabs / theme toggle / view switching). Panel- and
+# domain-specific styling lives on each Panel; shared tokens + primitives come
+# from style.base_css().
+_SHELL_CSS = """\
+  header { display: flex; align-items: baseline; gap: 18px; flex-wrap: wrap;
+    padding: 12px 22px; border-bottom: 1px solid var(--rule-2);
+    position: sticky; top: 0; background: var(--paper); z-index: 5; }
+  header h1 { margin: 0; font-size: 16px; font-weight: 600; }
+  header h1 .sub { color: var(--muted); font-weight: 400; }
+  .facts { display: flex; gap: 16px; font-size: 12px; color: var(--muted-2); }
+  .facts b { color: var(--ink); font-weight: 600; }
+  .facts .crit b { color: var(--accent); }
+  .spacer { flex: 1 1 auto; }
+  .tabs { display: flex; gap: 2px; }
+  .tab { border: 0; background: none; color: var(--muted-2); cursor: pointer;
+    font: inherit; font-size: 13px; padding: 4px 10px; border-bottom: 2px solid transparent; }
+  .tab.active { color: var(--ink); border-bottom-color: var(--accent); }
+  .toggle { border: 1px solid var(--rule-2); background: none; color: var(--muted-2);
+    border-radius: 5px; cursor: pointer; font-size: 12px; padding: 3px 8px; }
+  .view { display: none; }
+  .view.active { display: block; }
+"""
+
+# ---------------------------------------------------------------------------
+# Built-in Trace panel: the causal agent spine.
+# ---------------------------------------------------------------------------
+_TRACE_CSS = """\
+  .trace { padding: 14px 22px; max-width: 920px; }
+  .legend { display: flex; flex-wrap: wrap; gap: 12px; font-size: 12px; color: var(--muted-2);
+    margin-bottom: 16px; padding-bottom: 12px; border-bottom: 1px solid var(--rule); }
+  .legend .a { display: inline-flex; align-items: center; gap: 6px; }
+  .legend .sw { width: 9px; height: 9px; border-radius: 2px; display: inline-block; }
+  .step { border-left: 2px solid var(--rule-2); padding: 0 0 2px 14px; margin: 0 0 16px; }
+  .step .h { display: flex; gap: 10px; align-items: baseline; font-size: 12px; color: var(--muted-2); margin-bottom: 5px; }
+  .step .h .who { color: var(--ink); font-weight: 600; }
+  .step .h .deleg { color: var(--accent); }
+  .step .h .ts { margin-left: auto; }
+  .reason { white-space: pre-wrap; margin: 0 0 8px; }
+  details.spawn { border-left: 2px dashed var(--rule-2); padding-left: 12px; margin: 4px 0 10px; }
+  details.spawn > summary { cursor: pointer; font-size: 12.5px; color: var(--muted-2); padding: 3px 0; }
+  details.spawn > summary .who { color: var(--ink); font-weight: 600; }
+  details.spawn[open] > summary .ret { display: none; }
+  .ret-box { border: 1px solid var(--rule-2); background: var(--code-bg); border-radius: 6px;
+    padding: 8px 10px; margin: 4px 0 9px; white-space: pre-wrap; font-size: 12.5px; max-height: 220px; overflow: auto; }
+  .childit { padding-left: 10px; border-left: 1px solid var(--rule); margin-bottom: 10px; }
+  .childhead { font-size: 11px; color: var(--muted-2); margin: 9px 0 4px; }
+  .missing { color: var(--del); font-size: 12px; padding: 4px 0; }
+  .sec { font-size: 12px; color: var(--muted-2); margin: 26px 0 10px; border-top: 1px solid var(--rule); padding-top: 12px; }
+  .result { border: 1px solid var(--rule-2); border-radius: 8px; padding: 11px; background: var(--panel); white-space: pre-wrap; margin: 2px 0 0; }
+"""
+
+_TRACE_SECTION = """\
+<section class="view" id="view-trace">
+  <div class="trace" id="trace"></div>
+</section>"""
+
+_TRACE_JS = r"""
+function head(s) { return (s || "").replace(/\s+/g, " ").trim().slice(0, 130); }
+function proseNode(text) {
+  const parts = String(text || "").split("```");
+  const prose = parts.filter((_, i) => i % 2 === 0).join("\n")
+    .replace(/<br>/g, "").replace(/\n{3,}/g, "\n\n").trim();
+  return prose ? el("div", "reason", prose) : null;
+}
+function childNode(child) {
+  const det = el("details", "spawn");
+  det.dataset.agent = child.agent;
+  if (child.color) det.style.borderLeftColor = child.color;
+  const sum = el("summary");
+  const who = el("span", "who", "⤷ spawned " + child.agent);
+  if (child.color) who.style.color = child.color;
+  sum.append(who);
+  if (child.missing) {
+    sum.append(el("span", "ret", " — no rollout captured"));
+    det.append(sum, el("div", "missing", "(sub-agent file absent or empty)"));
+    return det;
+  }
+  sum.append(el("span", "ret", child.returned ? " — returned: " + head(child.returned) : " — (no return value recorded)"));
+  det.append(sum);
+  if (child.returned) det.append(el("div", "ret-box", child.returned));
+  (child.iters || []).forEach(it => {
+    const wrap = el("div", "childit");
+    wrap.append(el("div", "childhead", child.agent + " · iter " + it.iter));
+    const p = proseNode(it.reasoning); if (p) wrap.append(p);
+    (it.blocks || []).forEach(b => {
+      if (b.code && b.code.trim()) wrap.append(el("pre", "code", b.code));
+      if (b.output && b.output.trim()) wrap.append(el("pre", "output", b.output));
+    });
+    det.append(wrap);
+  });
+  return det;
+}
+function stepNode(step) {
+  const wrap = el("div", "step");
+  const h = el("div", "h");
+  h.append(el("span", "who", RUN.root_name), el("span", null, "#" + step.iter));
+  if (step.delegated && step.delegated.length) h.append(el("span", "deleg", "⤷ " + step.delegated.join(", ")));
+  h.append(el("span", "ts", (step.ts || "").replace("T", " ").slice(0, 19)));
+  wrap.append(h);
+  const p = proseNode(step.reasoning); if (p) wrap.append(p);
+  (step.blocks || []).forEach(b => { if (b.code && b.code.trim()) wrap.append(el("pre", "code", b.code)); });
+  (step.children || []).forEach(c => wrap.append(childNode(c)));
+  (step.blocks || []).forEach(b => { if (b.output && b.output.trim()) wrap.append(el("pre", "output", b.output)); });
+  return wrap;
+}
+function renderTrace() {
+  const t = document.getElementById("trace");
+  if (!RUN.root_steps || !RUN.root_steps.length) {
+    t.replaceChildren(el("div", "empty", "No agent rollouts found for this run."));
+    return;
+  }
+  const legend = el("div", "legend");
+  (RUN.legend || []).forEach(a => {
+    const span = el("span", "a");
+    const sw = el("span", "sw"); sw.style.background = a.color; span.append(sw);
+    span.append(el("span", null, a.name + " · d" + a.depth + " · " + a.iters + " it"));
+    legend.append(span);
+  });
+  const nodes = [legend];
+  RUN.root_steps.forEach(s => nodes.push(stepNode(s)));
+  if (RUN.root_result) { nodes.push(el("div", "sec", RUN.root_name + " — final answer")); nodes.push(el("div", "result", RUN.root_result)); }
+  (RUN.unlinked || []).forEach(c => nodes.push(childNode(c)));
+  t.replaceChildren(...nodes);
+}
+"""
+
+
+def trace_panel() -> Panel:
+    """The built-in causal-trace panel, reusable by any ``ra`` agent."""
+
+    return Panel("trace", "Trace", _TRACE_SECTION, _TRACE_CSS, _TRACE_JS, "renderTrace();")
+
+
+_SHELL = r"""<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>__TITLE__</title>
+<style>
+__STYLE__
+</style>
+</head>
+<body>
+<header>
+  <h1 class="serif">__BRAND__ <span class="sub" id="sub"></span></h1>
+  <div class="facts" id="facts"></div>
+  <div class="spacer"></div>
+  <div class="tabs" id="tabs"></div>
+  <button class="toggle" id="themeBtn">◐ theme</button>
+</header>
+
+__SECTIONS__
+
+<script id="data" type="application/json">__DATA__</script>
+<script>
+const DATA = JSON.parse(document.getElementById("data").textContent);
+const RUN = DATA.run || {};
+const PANELS = __PANELS_META__;
+
+function el(tag, cls, text) {
+  const n = document.createElement(tag);
+  if (cls) n.className = cls;
+  if (text != null) n.textContent = String(text);
+  return n;
+}
+
+__PANEL_JS__
+
+function activate(name) {
+  document.querySelectorAll(".tab").forEach(x => x.classList.toggle("active", x.dataset.view === name));
+  document.querySelectorAll(".view").forEach(x => x.classList.toggle("active", x.id === "view-" + name));
+}
+function init() {
+  document.getElementById("sub").textContent = "— " + (DATA.title || DATA.task_id || "");
+  const facts = document.getElementById("facts");
+  (DATA.models || []).slice(0, 3).forEach(m => facts.append(el("span", "mono", m)));
+  const tabs = document.getElementById("tabs");
+  PANELS.forEach(p => {
+    const b = el("button", "tab", p.label); b.dataset.view = p.id;
+    b.addEventListener("click", () => activate(p.id)); tabs.append(b);
+  });
+  __RENDER_CALLS__
+  activate(__DEFAULT_VIEW__);
+  document.getElementById("themeBtn").addEventListener("click", () => {
+    document.documentElement.dataset.theme = document.documentElement.dataset.theme === "dark" ? "" : "dark";
+  });
+}
+init();
+</script>
+</body>
+</html>
+"""
+
+
+def render_page(
+    data: dict,
+    panels: list[Panel],
+    *,
+    brand: str = "ra",
+    default_view: str | None = None,
+) -> str:
+    """Assemble a self-contained page from ``data`` + an ordered list of panels.
+
+    ``data`` is embedded as JSON (the panels' JS reads it via the ``DATA`` /
+    ``RUN`` globals). ``default_view`` is the panel id shown first; it defaults
+    to the first panel.
+    """
+
+    blob = json.dumps(data).replace("</", "<\\/")  # don't let </ close <script>
+    css = style.base_css() + _SHELL_CSS + "".join(p.css for p in panels)
+    sections = "\n".join(p.section for p in panels)
+    panel_js = "\n".join(p.js for p in panels)
+    render_calls = "\n  ".join(p.render_call for p in panels)
+    panels_meta = json.dumps([{"id": p.id, "label": p.label} for p in panels])
+    default = json.dumps(default_view or (panels[0].id if panels else ""))
+    return (
+        _SHELL.replace("__TITLE__", f"{brand} — run view")
+        .replace("__BRAND__", brand)
+        .replace("__STYLE__", css)
+        .replace("__SECTIONS__", sections)
+        .replace("__PANELS_META__", panels_meta)
+        .replace("__PANEL_JS__", panel_js)
+        .replace("__RENDER_CALLS__", render_calls)
+        .replace("__DEFAULT_VIEW__", default)
+        .replace("__DATA__", blob)
+    )
+
+
+def render_trace_html(run: RunTrace, *, brand: str = "ra") -> str:
+    """Render a run's causal agent trace as a standalone single-page viewer."""
+
+    data = {
+        "title": run.title,
+        "task_id": run.task_id,
+        "models": run.models,
+        "run": run.as_dict(),
+    }
+    return render_page(data, [trace_panel()], brand=brand, default_view="trace")
+
+
+def write_trace_html(run_dir: Path, out: Path | None = None) -> Path:
+    """Load ``run_dir`` and write the standalone trace viewer to ``out``."""
+
+    run = load_rollout_dir(run_dir)
+    target = out or (Path(run_dir) / "trace.html")
+    target.parent.mkdir(parents=True, exist_ok=True)
+    target.write_text(render_trace_html(run), encoding="utf-8")
+    return target
diff --git a/src/ra/viewer/style.py b/src/ra/viewer/style.py
new file mode 100644
index 00000000..e1e0ca5a
--- /dev/null
+++ b/src/ra/viewer/style.py
@@ -0,0 +1,84 @@
+"""Shared design system for kai's HTML surfaces.
+
+One palette, one set of primitives, used by both the interactive viewer
+(:mod:`kai.viewer.html`) and the static report document (``kai report
+--format html``). Each surface concatenates ``TOKENS + COMPONENTS`` with its
+own layout CSS, so the look (colours, severity treatment, code/diff blocks)
+can never drift between them.
+"""
+
+from __future__ import annotations
+
+# Design tokens: the palette + the single accent. Dark theme overrides the
+# same variables, so every component below is theme-aware for free.
+TOKENS = """\
+  :root {
+    --paper:#fafaf7; --panel:#fff; --ink:#1a1a1a; --rule:#e3dfd6; --rule-2:#d8d4cc;
+    --muted:#8a857c; --muted-2:#6b665d; --accent:#b3261e; --add:#2f6f43; --del:#9a2a22;
+    --gray-bar:#c8c2b5; --code-bg:#f4f1ea;
+  }
+  [data-theme="dark"] {
+    --paper:#14171b; --panel:#1b1f25; --ink:#e7e3da; --rule:#2a3038; --rule-2:#343b44;
+    --muted:#9aa3ad; --muted-2:#7f8893; --accent:#e5675d; --add:#7ec99a; --del:#e79a92;
+    --gray-bar:#3a424c; --code-bg:#11151b;
+  }
+"""
+
+# Shared component primitives: base type, the findings table, the severity
+# encoding (dot + score + 0-10 bar), the key/value + CVSS detail blocks, and
+# code / diff / output panes.
+COMPONENTS = """\
+  * { box-sizing: border-box; }
+  html, body { margin: 0; }
+  body { background: var(--paper); color: var(--ink);
+    font: 14px/1.55 -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif; }
+  .serif { font-family: Charter, "Iowan Old Style", Georgia, serif; }
+  code, pre, .mono { font-family: ui-monospace, "SF Mono", Menlo, Consolas, monospace; }
+
+  table { border-collapse: collapse; width: 100%; }
+  thead th { text-align: left; font-size: 10px; letter-spacing: .07em; text-transform: uppercase;
+    color: var(--muted-2); font-weight: 600; padding: 10px 14px 8px; border-bottom: 1px solid var(--rule-2); }
+  thead th.num { text-align: right; }
+  tbody tr { border-bottom: 1px solid var(--rule); }
+  td { padding: 11px 14px; vertical-align: top; }
+  td.cvss { white-space: nowrap; }
+
+  .dot { display:inline-block; width:8px; height:8px; border-radius:50%; vertical-align: middle; margin-right: 7px; background: var(--gray-bar); }
+  .sev-critical .dot, .sev-high .dot { background: var(--accent); }
+  .sev-medium .dot { background: var(--muted-2); }
+  .score { font-family: ui-monospace, monospace; font-weight: 600; font-size: 13px; }
+  .bar { display:block; height: 3px; width: 64px; background: var(--gray-bar); margin-top: 6px; border-radius: 2px; }
+  .bar > i { display:block; height: 100%; background: var(--muted-2); border-radius: 2px; }
+  .sev-critical .bar > i, .sev-high .bar > i { background: var(--accent); }
+  .ftitle { font-weight: 600; }
+  .cat { font-size: 11px; color: var(--muted-2); }
+  .loc { font-size: 12px; color: var(--muted); }
+  .unconf { opacity: .62; }
+
+  .kv { display: grid; grid-template-columns: 130px 1fr; gap: 5px 14px; font-size: 13px; margin: 0; }
+  .kv dt { color: var(--muted-2); }
+  .kv dd { margin: 0; }
+  .sec-label { font-size: 11px; letter-spacing: .07em; text-transform: uppercase; color: var(--muted-2);
+    margin: 18px 0 8px; border-top: 1px solid var(--rule); padding-top: 12px; }
+  .prose { white-space: pre-wrap; margin: 0; }
+  .cvss-grid { display: grid; grid-template-columns: max-content max-content 1fr; gap: 5px 14px;
+    font-size: 12.5px; align-items: baseline; }
+  .cvss-grid .m { color: var(--muted-2); font-family: ui-monospace, monospace; }
+  .cvss-grid .v { font-weight: 500; }
+  .cvss-grid .why { color: var(--muted); font-size: 12px; }
+  .vector { font-size: 12px; color: var(--muted); margin: 0 0 10px; }
+
+  pre.code, pre.diff, pre.output { margin: 0 0 4px; padding: 11px 13px; border: 1px solid var(--rule-2);
+    border-radius: 6px; background: var(--code-bg); overflow: auto; font-size: 12.5px; line-height: 1.5; }
+  pre.code, pre.diff { white-space: pre; }
+  pre.output { white-space: pre-wrap; color: var(--muted-2); max-height: 320px; }
+  pre.diff .add { color: var(--add); }
+  pre.diff .del { color: var(--del); }
+  .empty { color: var(--muted); padding: 40px 22px; }
+"""
+
+
+def base_css() -> str:
+    """The shared stylesheet: tokens + component primitives."""
+
+    return TOKENS + COMPONENTS
diff --git a/src/ra/viewer/trace.py b/src/ra/viewer/trace.py
new file mode 100644
index 00000000..4e676b76
--- /dev/null
+++ b/src/ra/viewer/trace.py
@@ -0,0 +1,353 @@
+"""Load RLM rollout traces from a run directory.
+
+Reads the per-agent ``<agent>.jsonl`` files an RLM run writes via
+:mod:`kai.state.hooks` (plus the optional ``score.json`` / ``run.json``
+siblings) and folds them into a :class:`RunTrace` the HTML renderer can draw.
+
+The view follows **causality, not wall-clock**. The root agent (``exploit``)
+is an orchestrator: it reasons, then runs Python, and that Python calls
+``spawn_analyzer(...)`` / ``spawn_researcher(...)`` / ``spawn_verifier(...)``
+etc. to delegate a subtask. The sub-agent runs to completion *inside* that
+code call and its ``final_answer`` comes back as the call's return value --
+which is why a naive timestamp sort is misleading: the parent iteration is
+stamped when it *finishes*, i.e. after the child it spawned has already run,
+so the child appears to precede its own cause.
+
+So we read the root top-to-bottom by iteration number -- reason -> run code
+-> observe output -- and attach each spawned sub-agent's full sub-transcript
+under the exact ``spawn_*()`` call that caused it (matched per agent in call
+order), with the value it returned surfaced at the call site.
+
+No external dependencies, no server, no spans -- just the rollouts on disk.
+Pulled smoke dirs are flat (``*.jsonl`` next to ``score.json``); a fresh run
+nests them under ``state/<hash>/rollouts/``. Both work: we glob for
+``*.jsonl`` and skip any file whose lines aren't valid JSON (empty files, or
+``cat: ... No such file`` stubs from a partial ``railway ssh`` pull).
+"""
+
+from __future__ import annotations
+
+import json
+import re
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any
+
+ROOT_AGENT = "exploit"
+SPAWN_RE = re.compile(r"\bspawn_([a-z][a-z_]*)\s*\(")
+
+# Per-agent tints, assigned in first-appearance order.
+PALETTE = [
+    "#7fdbca",
+    "#c792ea",
+    "#f78c6c",
+    "#82aaff",
+    "#ffcb6b",
+    "#f07178",
+    "#addb67",
+    "#89ddff",
+]
+
+
+@dataclass
+class Iteration:
+    """One reason -> act -> observe step of an agent."""
+
+    n: int
+    timestamp: str
+    reasoning: str
+    blocks: list[dict[str, str]] = field(default_factory=list)
+
+
+@dataclass
+class AgentTrace:
+    """A single (sub-)agent's rollout: its metadata + iterations + result."""
+
+    name: str
+    depth: int
+    model: str
+    backend: str
+    iterations: list[Iteration]
+    result: str | None
+    first_ts: str
+    color: str = ""
+
+    def legend_dict(self) -> dict[str, Any]:
+        return {
+            "name": self.name,
+            "depth": self.depth,
+            "model": self.model,
+            "iters": len(self.iterations),
+            "color": self.color,
+        }
+
+
+@dataclass
+class RunTrace:
+    """A whole run: the causal root spine plus run-level header fields."""
+
+    title: str
+    benchmark: str
+    task_id: str
+    success: bool | None
+    failure_reason: str | None
+    poc_source: str | None
+    models: list[str]
+    agents: list[AgentTrace]
+    root_name: str
+    root_result: str | None
+    root_steps: list[dict[str, Any]]
+    unlinked: list[dict[str, Any]]
+
+    def as_dict(self) -> dict[str, Any]:
+        return {
+            "title": self.title,
+            "benchmark": self.benchmark,
+            "task_id": self.task_id,
+            "success": self.success,
+            "failure_reason": self.failure_reason,
+            "poc_source": self.poc_source,
+            "models": self.models,
+            "legend": [a.legend_dict() for a in self.agents],
+            "root_name": self.root_name,
+            "root_result": self.root_result,
+            "root_steps": self.root_steps,
+            "unlinked": self.unlinked,
+        }
+
+
+def _load_jsonl(path: Path) -> list[dict[str, Any]]:
+    """Parse a ``.jsonl`` file, skipping any line that isn't valid JSON.
+
+    Pulled rollout dirs can contain empty files or a ``cat: ... No such
+    file`` stub where an agent never ran; those simply yield no records.
+    """
+
+    records: list[dict[str, Any]] = []
+    try:
+        text = path.read_text(encoding="utf-8", errors="replace")
+    except OSError:
+        return records
+    for line in text.splitlines():
+        line = line.strip()
+        if not line:
+            continue
+        try:
+            obj = json.loads(line)
+        except json.JSONDecodeError:
+            continue
+        if isinstance(obj, dict):
+            records.append(obj)
+    return records
+
+
+def _agent_from_records(
+    fallback_name: str, records: list[dict[str, Any]]
+) -> AgentTrace | None:
+    """Fold a file's records into one :class:`AgentTrace` (or ``None``)."""
+
+    meta = next((r for r in records if r.get("type") == "metadata"), {})
+    iters = [
+        Iteration(
+            n=int(r.get("iteration", 0)),
+            timestamp=str(r.get("timestamp", "")),
+            reasoning=str(r.get("response", "")),
+            blocks=[b for b in (r.get("code_blocks") or []) if isinstance(b, dict)],
+        )
+        for r in records
+        if r.get("type") == "iteration"
+    ]
+    if not iters and not meta:
+        return None
+    result_rec = next((r for r in records if r.get("type") == "result"), None)
+    result = str(result_rec.get("final_answer", "")) if result_rec is not None else None
+    first_ts = str(meta.get("timestamp", "")) or (iters[0].timestamp if iters else "")
+    return AgentTrace(
+        name=str(meta.get("agent") or fallback_name),
+        depth=int(meta.get("depth", 0)),
+        model=str(meta.get("model", "")),
+        backend=str(meta.get("backend", "")),
+        iterations=iters,
+        result=result,
+        first_ts=first_ts,
+    )
+
+
+def _read_json(path: Path) -> dict[str, Any]:
+    if not path.exists():
+        return {}
+    try:
+        obj = json.loads(path.read_text(encoding="utf-8", errors="replace"))
+    except (OSError, json.JSONDecodeError):
+        return {}
+    return obj if isinstance(obj, dict) else {}
+
+
+def _spawn_sessions(records: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    """Split a sub-agent's records into one entry per spawn, time-ordered.
+
+    The root re-invokes a sub-agent many times; each invocation is a distinct
+    ``spawn_id`` whose iteration counter restarts at 1. One session == one
+    delegation the root can match a ``spawn_*()`` call to.
+    """
+
+    order: list[str] = []
+    sess: dict[str, dict[str, Any]] = {}
+    for r in records:
+        sid = str(r.get("spawn_id", ""))
+        kind = r.get("type")
+        if kind == "iteration":
+            if sid not in sess:
+                sess[sid] = {
+                    "first_ts": str(r.get("timestamp", "")),
+                    "returned": None,
+                    "iters": [],
+                }
+                order.append(sid)
+            sess[sid]["iters"].append(
+                {
+                    "iter": int(r.get("iteration", 0)),
+                    "ts": str(r.get("timestamp", "")),
+                    "reasoning": str(r.get("response", "")),
+                    "blocks": [
+                        b for b in (r.get("code_blocks") or []) if isinstance(b, dict)
+                    ],
+                }
+            )
+        elif kind == "result" and sid in sess:
+            sess[sid]["returned"] = str(r.get("final_answer", ""))
+    out = [sess[s] for s in order]
+    out.sort(key=lambda s: s["first_ts"])
+    return out
+
+
+def _child(name: str, color: str, session: dict[str, Any] | None) -> dict[str, Any]:
+    if session is None:
+        return {"agent": name, "color": color, "missing": True, "iters": []}
+    return {
+        "agent": name,
+        "color": color,
+        "returned": session.get("returned"),
+        "iters": session["iters"],
+    }
+
+
+def _build_root_spine(
+    root: AgentTrace,
+    sessions_by_agent: dict[str, list[dict[str, Any]]],
+    color_of: dict[str, str],
+) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
+    """Walk the root's iterations and hang each spawned sub-agent under the
+    ``spawn_*()`` call that produced it (FIFO per agent name).
+
+    Returns ``(root_steps, unlinked_children)``. ``unlinked`` holds sub-agent
+    sessions we couldn't tie to a call (count mismatch) so nothing is lost.
+    """
+
+    cursor = {name: 0 for name in sessions_by_agent if name != root.name}
+    steps: list[dict[str, Any]] = []
+    for it in root.iterations:
+        code = "\n".join(b.get("code", "") for b in it.blocks)
+        children: list[dict[str, Any]] = []
+        for name in SPAWN_RE.findall(code):
+            sessions = sessions_by_agent.get(name)
+            session = None
+            if sessions is not None and cursor.get(name, 0) < len(sessions):
+                session = sessions[cursor[name]]
+                cursor[name] += 1
+            children.append(_child(name, color_of.get(name, "#8a99ad"), session))
+        steps.append(
+            {
+                "iter": it.n,
+                "ts": it.timestamp,
+                "reasoning": it.reasoning,
+                "blocks": it.blocks,
+                "delegated": [c["agent"] for c in children],
+                "children": children,
+            }
+        )
+
+    unlinked: list[dict[str, Any]] = []
+    for name, sessions in sessions_by_agent.items():
+        if name == root.name:
+            continue
+        for session in sessions[cursor.get(name, 0) :]:
+            unlinked.append(_child(name, color_of.get(name, "#8a99ad"), session))
+    return steps, unlinked
+
+
+def load_rollout_dir(path: Path) -> RunTrace:
+    """Build a :class:`RunTrace` (root spine + causal nesting) from a dir."""
+
+    path = Path(path)
+    if not path.is_dir():
+        raise NotADirectoryError(f"{path} is not a directory")
+
+    agents: list[AgentTrace] = []
+    records_by_agent: dict[str, list[dict[str, Any]]] = {}
+    for jf in sorted(path.rglob("*.jsonl")):
+        if jf.name == "status_updates.jsonl":
+            continue
+        records = _load_jsonl(jf)
+        agent = _agent_from_records(jf.stem, records)
+        if agent is not None and agent.iterations:
+            agents.append(agent)
+            records_by_agent[agent.name] = records
+
+    agents.sort(key=lambda a: (a.depth, a.first_ts, a.name))
+    color_of = {a.name: PALETTE[i % len(PALETTE)] for i, a in enumerate(agents)}
+    for a in agents:
+        a.color = color_of[a.name]
+
+    root = _pick_root(agents)
+    sessions_by_agent = {
+        name: _spawn_sessions(records) for name, records in records_by_agent.items()
+    }
+    if root is not None:
+        root_steps, unlinked = _build_root_spine(root, sessions_by_agent, color_of)
+    else:
+        root_steps, unlinked = [], []
+
+    score = _read_json(path / "score.json")
+    details = score.get("details") or {}
+    task_ref = score.get("task_ref") or {}
+    run = _read_json(path / "run.json")
+
+    benchmark = str(task_ref.get("benchmark") or _guess_benchmark(path.name))
+    task_id = str(task_ref.get("task_id") or details.get("task_id") or path.name)
+    models = sorted({a.model for a in agents if a.model})
+    if not models and run.get("root_model"):
+        models = [str(run["root_model"])]
+
+    return RunTrace(
+        title=path.name,
+        benchmark=benchmark,
+        task_id=task_id,
+        success=score.get("success"),
+        failure_reason=score.get("failure_reason"),
+        poc_source=details.get("poc_source"),
+        models=models,
+        agents=agents,
+        root_name=root.name if root else "",
+        root_result=root.result if root else None,
+        root_steps=root_steps,
+        unlinked=unlinked,
+    )
+
+
+def _pick_root(agents: list[AgentTrace]) -> AgentTrace | None:
+    """The depth-0 orchestrator (prefer the conventional ``exploit``)."""
+
+    if not agents:
+        return None
+    named = next((a for a in agents if a.name == ROOT_AGENT and a.depth == 0), None)
+    if named is not None:
+        return named
+    return min(agents, key=lambda a: (a.depth, a.first_ts))
+
+
+def _guess_benchmark(dir_name: str) -> str:
+    for known in ("cybergym", "bountybench", "evmbench", "noop"):
+        if dir_name.startswith(known):
+            return known
+    return "rollout"
diff --git a/tests/test_ra_viewer.py b/tests/test_ra_viewer.py
new file mode 100644
index 00000000..90e7bfb1
--- /dev/null
+++ b/tests/test_ra_viewer.py
@@ -0,0 +1,59 @@
+"""Tests for the reusable ra.viewer (framework-level trace viewer + composer).
+
+These exercise the viewer with NO kai/findings involvement, proving any ra
+agent can render its run trace.
+"""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+from ra.viewer import Panel, load_rollout_dir, render_page, render_trace_html
+from ra.viewer.trace import RunTrace
+
+
+def _write_rollout(dir_path: Path) -> None:
+    rollouts = dir_path / "rollouts"
+    rollouts.mkdir()
+    rows = [
+        {"type": "metadata", "agent": "root", "depth": 0, "spawn_id": "r1",
+         "timestamp": "2026-06-03T00:00:00+00:00", "model": "some/model"},
+        {"type": "iteration", "agent": "root", "iteration": 1, "spawn_id": "r1",
+         "timestamp": "2026-06-03T00:01:00+00:00", "response": "thinking", "code_blocks": []},
+        {"type": "result", "agent": "root", "iteration": 1, "spawn_id": "r1",
+         "timestamp": "2026-06-03T00:02:00+00:00", "final_answer": "done"},
+    ]
+    (rollouts / "root.jsonl").write_text(
+        "\n".join(json.dumps(r) for r in rows), encoding="utf-8"
+    )
+
+
+def test_render_trace_html_is_self_contained(tmp_path: Path) -> None:
+    _write_rollout(tmp_path)
+    html = render_trace_html(load_rollout_dir(tmp_path))
+
+    assert html.startswith("<!DOCTYPE html>")
+    assert "http://" not in html and "https://" not in html
+    # Trace tab present; no kai Findings tab when used standalone.
+    assert 'id="view-trace"' in html
+    assert 'id="view-findings"' not in html
+    assert "renderTrace();" in html
+
+
+def test_render_page_composes_arbitrary_panels() -> None:
+    run = RunTrace(
+        title="t", benchmark=None, task_id="t", success=None, failure_reason=None,
+        poc_source=None, models=["m"], agents=[], root_name="root",
+        root_result=None, root_steps=[], unlinked=[],
+    )
+    custom = Panel(
+        id="notes", label="Notes",
+        section='<section class="view" id="view-notes"><p id="n"></p></section>',
+        css=".notes{}", js="function renderNotes(){document.getElementById('n').textContent='hi';}",
+        render_call="renderNotes();",
+    )
+    html = render_page({"title": "t", "run": run.as_dict()}, [custom], default_view="notes")
+    assert 'id="view-notes"' in html
+    assert "renderNotes();" in html
+    assert '"id": "notes"' in html or '"id":"notes"' in html
diff --git a/tests/test_viewer.py b/tests/test_viewer.py
new file mode 100644
index 00000000..b0986ddc
--- /dev/null
+++ b/tests/test_viewer.py
@@ -0,0 +1,147 @@
+"""Tests for the kai run viewer (findings loader + HTML render)."""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+from ra.viewer.trace import RunTrace, load_rollout_dir
+
+from kai.viewer import load_findings, render_html, write_html
+
+_EXPLOITS = [
+    {
+        "exploit_id": "e2",
+        "status": "rejected",
+        "confirmed": False,
+        "hypothesis": "Fee truncation rounds small trades to zero.",
+        "file": "contracts/Fees.sol",
+        "function": "calcFee",
+        "category": "theoretical_bounds",
+        "cvss_score": 4.3,
+    },
+    {
+        "exploit_id": "e1",
+        "status": "verified",
+        "confirmed": True,
+        "hypothesis": (
+            "Reentrancy in withdraw drains the vault. The external call "
+            "precedes the balance update and there is no guard."
+        ),
+        "file": "contracts/Vault.sol",
+        "function": "withdraw",
+        "category": "active_exploit",
+        "severity": "critical",
+        "cvss_score": 9.1,
+        "cvss_vector": "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H",
+        "cvss_justification": {"AV": "remote attacker", "AC": "no special conditions"},
+        "poc_code": "contract Attacker { function pwn() external {} }",
+        "patch": "-        msg.sender.call{value: amount}(\"\");\n+        balances[msg.sender] -= amount;",
+        "attacker_role": "anyone",
+        "prerequisite": "a non-zero deposit",
+    },
+]
+
+
+def _write_run(dir_path: Path) -> None:
+    (dir_path / "exploits.json").write_text(json.dumps(_EXPLOITS), encoding="utf-8")
+    rollouts = dir_path / "rollouts"
+    rollouts.mkdir()
+    exploit = [
+        {"type": "metadata", "agent": "exploit", "depth": 0, "spawn_id": "r1",
+         "timestamp": "2026-06-03T00:00:00+00:00", "model": "anthropic/claude-opus-4.8"},
+        {"type": "iteration", "agent": "exploit", "iteration": 1, "spawn_id": "r1",
+         "timestamp": "2026-06-03T00:01:00+00:00",
+         "response": "Analyzing the vault.", "code_blocks": []},
+        {"type": "result", "agent": "exploit", "iteration": 1, "spawn_id": "r1",
+         "timestamp": "2026-06-03T00:02:00+00:00", "final_answer": "done"},
+    ]
+    (rollouts / "exploit.jsonl").write_text(
+        "\n".join(json.dumps(r) for r in exploit), encoding="utf-8"
+    )
+
+
+def test_load_findings_drops_deduplicated(tmp_path: Path) -> None:
+    records = [
+        {"exploit_id": "keep", "status": "verified_and_fixed", "confirmed": True,
+         "hypothesis": "real bug", "file": "A.sol", "function": "f",
+         "category": "active_exploit", "severity": "high", "cvss_score": 8.0},
+        {"exploit_id": "dup", "status": "deduplicated", "confirmed": None,
+         "hypothesis": "merged duplicate", "file": "A.sol", "function": "f",
+         "category": "active_exploit"},
+    ]
+    (tmp_path / "exploits.json").write_text(json.dumps(records), encoding="utf-8")
+    findings = load_findings(tmp_path)
+    # The deduplicated bookkeeping shell is hidden; the real finding remains.
+    assert [f.exploit_id for f in findings] == ["keep"]
+
+
+def test_load_findings_sorts_severity_over_missing_score(tmp_path: Path) -> None:
+    # A critical finding with a severity label but no CVSS score must still
+    # outrank a low finding that happens to carry a numeric score.
+    records = [
+        {"exploit_id": "low_scored", "status": "verified", "confirmed": True,
+         "hypothesis": "low but scored", "severity": "low", "cvss_score": 3.1},
+        {"exploit_id": "crit_unscored", "status": "verified", "confirmed": True,
+         "hypothesis": "critical, no vector", "severity": "critical"},
+    ]
+    (tmp_path / "exploits.json").write_text(json.dumps(records), encoding="utf-8")
+    assert [f.exploit_id for f in load_findings(tmp_path)] == ["crit_unscored", "low_scored"]
+
+
+def test_load_findings_sorts_and_derives(tmp_path: Path) -> None:
+    _write_run(tmp_path)
+    findings = load_findings(tmp_path)
+
+    # Confirmed critical sorts ahead of the unconfirmed lower-severity finding.
+    assert [f.exploit_id for f in findings] == ["e1", "e2"]
+    e1, e2 = findings
+    assert e1.severity == "critical"
+    assert e1.title.startswith("Reentrancy in withdraw")
+    # Severity is derived from the CVSS score when the field is absent.
+    assert e2.severity == "medium"
+    # The CVSS vector is expanded into ordered, human-readable rows.
+    assert [r["metric"] for r in e1.cvss_rows] == ["AV", "AC", "PR", "UI", "S", "C", "I", "A"]
+    assert e1.cvss_rows[0] == {"metric": "AV", "value": "Network", "why": "remote attacker"}
+
+
+def test_load_findings_missing_file_is_empty(tmp_path: Path) -> None:
+    assert load_findings(tmp_path) == []
+
+
+def test_render_is_self_contained_and_has_findings(tmp_path: Path) -> None:
+    _write_run(tmp_path)
+    html = render_html(load_rollout_dir(tmp_path), load_findings(tmp_path))
+
+    assert html.startswith("<!DOCTYPE html>")
+    # Fully offline: no external resources.
+    assert "http://" not in html and "https://" not in html
+    for needle in (
+        "Reentrancy in withdraw",
+        "contracts/Vault.sol",
+        "active_exploit",
+        "critical",
+        "Attacker",  # poc_code
+        "balances[msg.sender]",  # patch diff body
+    ):
+        assert needle in html
+
+
+def test_render_without_findings_still_renders(tmp_path: Path) -> None:
+    # A benchmark-style dir: a trace but no exploits.json.
+    (tmp_path / "rollouts").mkdir()
+    run = RunTrace(
+        title="t", benchmark="rollout", task_id="t", success=None,
+        failure_reason=None, poc_source=None, models=[], agents=[],
+        root_name="", root_result=None, root_steps=[], unlinked=[],
+    )
+    html = render_html(run)
+    assert html.startswith("<!DOCTYPE html>")
+    assert "No findings recorded" in html or "view-findings" in html
+
+
+def test_write_html_creates_file(tmp_path: Path) -> None:
+    _write_run(tmp_path)
+    out = write_html(tmp_path)
+    assert out == tmp_path / "trace.html"
+    assert "Reentrancy in withdraw" in out.read_text(encoding="utf-8")