firstbatchxyz · aktasbatuhan · Jun 8, 2026 · Jun 8, 2026 · Jun 9, 2026 · Jun 8, 2026
diff --git a/README.md b/README.md
@@ -8,6 +8,28 @@ Kai runs a multi-stage pipeline: a **setup agent** prepares and builds the targe
 
 Built on [ra](src/ra/), a recursive language model framework where LLMs write code that launches other LLMs.
 
+## Quickstart
+
+```bash
+git clone https://github.com/firstbatchxyz/kai-security.git
+cd kai-security
+uv sync
+cp .env.example .env          # add OPENROUTER_API_KEY (or OPENAI_API_KEY)
+
+# Audit the bundled, intentionally-vulnerable example target
+uv run kai audit --repo-path examples/vulnerable-vault --verbose
+
+# Explore the findings + the agent's reasoning in your browser...
+uv run kai view output/state/<run_id> --open
+# ...or print a Markdown report (or a styled HTML one)
+uv run kai report output/state/<run_id>
+```
+
+`<run_id>` is printed during the run (the directory created under
+`output/state/`). Point `--repo-path` at any local checkout you're authorized
+to test. See [`examples/`](examples/) for more, and the [full CLI](#command-line-interface)
+and [Usage](#usage) below for every option.
+
 ## Installation
 
 Requires Python 3.12+ and [uv](https://docs.astral.sh/uv/).
@@ -24,6 +46,9 @@ uv sync
 cp .env.example .env
 ```
 
+`uv sync` installs the `kai` command (the distribution is published as
+`kai-security`; the command and import package are `kai`).
+
 Common developer commands are available through `make`:
 
 ```bash
@@ -33,6 +58,33 @@ make typecheck
 make run REPO_PATH=/path/to/target
 ```
 
+## Command-line interface
+
+```bash
+# Audit a repository you're authorized to test (setup → exploit pipeline)
+uv run kai audit --repo-path /path/to/target --verbose
+
+# Open a finished run as an interactive HTML report (findings + agent trace)
+uv run kai view output/state/<run_id> --open
+
+# Render a run's findings — Markdown to stdout, or a styled HTML document
+uv run kai report output/state/<run_id>
+uv run kai report output/state/<run_id> --format html -o report.html
+```
+
+`kai audit` is the friendly alias for the full pipeline; `kai pipeline` and
+`kai agent` expose the complete interface documented under [Usage](#usage)
+(equivalently `uv run python -m kai.main ...`). Run `kai <command> -h` for
+per-command options.
+
+## Examples
+
+The [`examples/`](examples/) directory has small, self-contained,
+intentionally-vulnerable targets you can audit end to end without a private
+repo or large spend — start with
+[`vulnerable-vault`](examples/vulnerable-vault/) (a Solidity vault with a
+reentrancy and an unchecked-transfer bug, plus a ready-made threat context).
+
 ### API keys
 
 | Key | Required | Used by |
@@ -379,6 +431,14 @@ make lint
 make typecheck
 ```
 
+## Benchmarking
+
+Kai ships an optional harness for scoring it against external security
+benchmarks (CyberGym, BountyBench, EVMBench) and for running fleets of audits
+in parallel. It drives `kai` as a subprocess and lives entirely in
+[`evaluation/`](evaluation/) — see [`evaluation/README.md`](evaluation/README.md).
+Most users don't need it; it's for measuring and improving Kai itself.
+
 ## Related Work
 
 Kai uses ideas from the Recursive Language Models paper. To cite that

diff --git a/examples/README.md b/examples/README.md
@@ -0,0 +1,21 @@
+# Examples
+
+Runnable targets for trying `kai` end to end — no private repos, minimal API
+spend.
+
+| Example | What it is | Highlights |
+|---------|------------|------------|
+| [`vulnerable-vault/`](vulnerable-vault/) | A tiny Solidity vault with two planted bugs | reentrancy + unchecked ERC-20 return; ships a `threat_context.yaml` |
+
+Each example is **intentionally vulnerable** and is for authorized
+demonstration only — do not deploy them.
+
+Quick run (see each example's README for details):
+
+```bash
+uv run kai audit --repo-path examples/vulnerable-vault --verbose
+uv run kai view output/state/<run_id> --open
+```
+
+Running an audit makes real LLM calls, so it needs an API key configured (see
+the project [README](../README.md#api-keys)) and incurs some cost.
diff --git a/examples/vulnerable-vault/README.md b/examples/vulnerable-vault/README.md
@@ -0,0 +1,70 @@
+# vulnerable-vault
+
+A tiny, self-contained, **intentionally vulnerable** Solidity project — a
+target you can point `kai audit` at to see the whole pipeline run end to end
+without a private repo or a large API spend.
+
+> ⚠️ Intentionally insecure. Do not deploy. For authorized demonstration only.
+
+## Planted bugs
+
+| # | Bug | Location |
+|---|-----|----------|
+| 1 | **Reentrancy** — the caller's balance is zeroed *after* the external call, no guard (a re-entrant caller drains the contract) | `src/Vault.sol` · `withdraw()` |
+| 2 | **Unchecked ERC-20 return** — `transfer()`'s boolean result is ignored | `src/Vault.sol` · `sweepToken()` |
+
+## Run it
+
+```bash
+# From the kai-security repo root
+uv run kai audit --repo-path examples/vulnerable-vault \
+  --threat-context examples/vulnerable-vault/threat_context.yaml --verbose
+```
+
+Then look at the results:
+
+```bash
+# Interactive HTML (findings + the agent's reasoning trace)
+uv run kai view output/state/<run_id> --open
+
+# Or a Markdown report (stdout), or a styled HTML document
+uv run kai report output/state/<run_id>
+uv run kai report output/state/<run_id> --format html -o report.html
+```
+
+`<run_id>` is printed during the run and is the directory name under
+`output/state/`.
+
+## What a real run produced
+
+This isn't hypothetical — here's an actual result. With the reentrancy bug,
+Kai built a Foundry proof-of-concept, confirmed the drain, and proposed a fix:
+
+```
+| CVSS | Severity | Finding                                   | Location            | Status              |
+| 9.8  | critical | Reentrancy in withdraw() (CEI violation)  | Vault.sol:withdraw  | verified_and_fixed ✓ |
+```
+
+with the correct Check-Effects-Interaction patch (move the balance update
+*before* the external call):
+
+```diff
+ function withdraw() external {
+     uint256 amount = balances[msg.sender];
+     require(amount > 0, "nothing to withdraw");
++    balances[msg.sender] = 0;
+     (bool ok, ) = msg.sender.call{value: amount}("");
+     require(ok, "transfer failed");
+-    balances[msg.sender] = 0;
+ }
+```
+
+> **Kai is an agentic system, so runs are not deterministic.** Which bugs get
+> confirmed, their CVSS scores, and the exact wording vary by run and by the
+> models you configure. In one run Kai confirmed the reentrancy as Critical
+> (above); in another it confirmed the unchecked-return in `sweepToken()` as
+> Medium instead. It also reasons about *exploitability*, not just patterns —
+> given a `withdraw()` that used a checked `-= amount`, it correctly **disproved**
+> a textbook-looking reentrancy because the subtraction underflows and reverts
+> under Solidity 0.8.x. Treat the output as a strong signal to investigate, not
+> a fixed checklist.
diff --git a/examples/vulnerable-vault/foundry.toml b/examples/vulnerable-vault/foundry.toml
@@ -0,0 +1,5 @@
+[profile.default]
+src = "src"
+out = "out"
+libs = ["lib"]
+solc = "0.8.20"
diff --git a/examples/vulnerable-vault/src/Vault.sol b/examples/vulnerable-vault/src/Vault.sol
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: MIT
+pragma solidity ^0.8.20;
+
+interface IERC20 {
+    function transfer(address to, uint256 amount) external returns (bool);
+}
+
+/// @title Vault
+/// @notice INTENTIONALLY VULNERABLE example target for kai-security demos.
+///         Do NOT deploy. The bugs below are planted so `kai audit` has
+///         something real to find on a tiny, self-contained codebase.
+contract Vault {
+    mapping(address => uint256) public balances;
+
+    function deposit() external payable {
+        balances[msg.sender] += msg.value;
+    }
+
+    /// BUG 1 — reentrancy: the caller's balance is zeroed *after* the external
+    /// call, with no reentrancy guard. A malicious receiver can re-enter
+    /// withdraw() from its fallback and drain the contract, because the balance
+    /// is still non-zero on each re-entry. (Zeroing with `= 0` rather than a
+    /// checked `-=` is what makes this genuinely exploitable under Solidity
+    /// 0.8.x — a checked subtraction would underflow and revert the drain.)
+    function withdraw() external {
+        uint256 amount = balances[msg.sender];
+        require(amount > 0, "nothing to withdraw");
+        (bool ok, ) = msg.sender.call{value: amount}("");
+        require(ok, "transfer failed");
+        balances[msg.sender] = 0;
+    }
+
+    /// BUG 2 — unchecked return value: ERC-20 transfer() can return false
+    /// instead of reverting; ignoring it lets a failed transfer look like a
+    /// success.
+    function sweepToken(IERC20 token, address to, uint256 amount) external {
+        token.transfer(to, amount);
+    }
+}
diff --git a/examples/vulnerable-vault/threat_context.yaml b/examples/vulnerable-vault/threat_context.yaml
@@ -0,0 +1,22 @@
+# Threat context for the vulnerable-vault example.
+# Tells kai who can interact with the contract and what the trust boundaries
+# are, so it ranks an unprivileged-attacker drain above admin-only concerns.
+
+deployment_type: smart-contract
+environment: on-chain
+
+access_roles:
+  - name: anyone
+    trust: none
+    description: "Permissionless caller — any EOA or contract"
+  - name: depositor
+    trust: none
+    description: "An account that has deposited ETH into the vault"
+
+boundaries:
+  - "User input → contract storage (deposit/withdraw accounting)"
+  - "External call to msg.sender during withdraw (reentrancy boundary)"
+
+known_constraints:
+  - "No admin or owner role exists; every function is permissionless"
+  - "ERC-20 tokens passed to sweepToken may be non-reverting (return false)"
diff --git a/pyproject.toml b/pyproject.toml
@@ -2,8 +2,17 @@
 requires = ["uv_build>=0.8.17,<0.9.0"]
 build-backend = "uv_build"
 
+# The published distribution is `kai-security`, but the import packages stay
+# `kai` (domain) and `ra` (framework). Ship BOTH — `kai` imports `ra`, so a
+# wheel with only `kai` is broken. Listing them also decouples the wheel from
+# the dotted distribution name. The bare `kai` name on PyPI is reserved for the
+# future umbrella dispatcher.
+[tool.uv.build-backend]
+module-name = ["kai", "ra"]
+module-root = "src"
+
 [project]
-name = "kai"
+name = "kai-security"
 version = "0.1.0"
 description = "Automated vulnerability discovery, verification, and patching using recursive language models."
 readme = "README.md"
@@ -51,6 +60,16 @@ Homepage = "https://github.com/firstbatchxyz/kai-security"
 Repository = "https://github.com/firstbatchxyz/kai-security"
 Issues = "https://github.com/firstbatchxyz/kai-security/issues"
 
+[project.scripts]
+kai = "kai.cli:main"
+
+# Umbrella contract: register kai-security under the shared `kai.plugins`
+# group so a dispatcher (this CLI today, a dedicated `kai` package later) can
+# offer it as `kai security …`. Sibling tools register their own namespace the
+# same way (e.g. kai-evolve → `evolve`). See docs/umbrella.md.
+[project.entry-points."kai.plugins"]
+security = "kai.cli:main"
+
 [project.optional-dependencies]
 dev = [
     "pytest>=9.0.2",