diff --git a/README.md b/README.md index 9a71461..6156836 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,28 @@ Kai runs a multi-stage pipeline: a **setup agent** prepares and builds the targe Built on [ra](src/ra/), a recursive language model framework where LLMs write code that launches other LLMs. +## Quickstart + +```bash +git clone https://github.com/firstbatchxyz/kai-security.git +cd kai-security +uv sync +cp .env.example .env # add OPENROUTER_API_KEY (or OPENAI_API_KEY) + +# Audit the bundled, intentionally-vulnerable example target +uv run kai audit --repo-path examples/vulnerable-vault --verbose + +# Explore the findings + the agent's reasoning in your browser... +uv run kai view output/state/ --open +# ...or print a Markdown report (or a styled HTML one) +uv run kai report output/state/ +``` + +`` is printed during the run (the directory created under +`output/state/`). Point `--repo-path` at any local checkout you're authorized +to test. See [`examples/`](examples/) for more, and the [full CLI](#command-line-interface) +and [Usage](#usage) below for every option. + ## Installation Requires Python 3.12+ and [uv](https://docs.astral.sh/uv/). @@ -24,6 +46,9 @@ uv sync cp .env.example .env ``` +`uv sync` installs the `kai` command (the distribution is published as +`kai-security`; the command and import package are `kai`). + Common developer commands are available through `make`: ```bash @@ -33,6 +58,33 @@ make typecheck make run REPO_PATH=/path/to/target ``` +## Command-line interface + +```bash +# Audit a repository you're authorized to test (setup → exploit pipeline) +uv run kai audit --repo-path /path/to/target --verbose + +# Open a finished run as an interactive HTML report (findings + agent trace) +uv run kai view output/state/ --open + +# Render a run's findings — Markdown to stdout, or a styled HTML document +uv run kai report output/state/ +uv run kai report output/state/ --format html -o report.html +``` + +`kai audit` is the friendly alias for the full pipeline; `kai pipeline` and +`kai agent` expose the complete interface documented under [Usage](#usage) +(equivalently `uv run python -m kai.main ...`). Run `kai -h` for +per-command options. + +## Examples + +The [`examples/`](examples/) directory has small, self-contained, +intentionally-vulnerable targets you can audit end to end without a private +repo or large spend — start with +[`vulnerable-vault`](examples/vulnerable-vault/) (a Solidity vault with a +reentrancy and an unchecked-transfer bug, plus a ready-made threat context). + ### API keys | Key | Required | Used by | @@ -379,6 +431,14 @@ make lint make typecheck ``` +## Benchmarking + +Kai ships an optional harness for scoring it against external security +benchmarks (CyberGym, BountyBench, EVMBench) and for running fleets of audits +in parallel. It drives `kai` as a subprocess and lives entirely in +[`evaluation/`](evaluation/) — see [`evaluation/README.md`](evaluation/README.md). +Most users don't need it; it's for measuring and improving Kai itself. + ## Related Work Kai uses ideas from the Recursive Language Models paper. To cite that diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 0000000..a5d5dbb --- /dev/null +++ b/examples/README.md @@ -0,0 +1,21 @@ +# Examples + +Runnable targets for trying `kai` end to end — no private repos, minimal API +spend. + +| Example | What it is | Highlights | +|---------|------------|------------| +| [`vulnerable-vault/`](vulnerable-vault/) | A tiny Solidity vault with two planted bugs | reentrancy + unchecked ERC-20 return; ships a `threat_context.yaml` | + +Each example is **intentionally vulnerable** and is for authorized +demonstration only — do not deploy them. + +Quick run (see each example's README for details): + +```bash +uv run kai audit --repo-path examples/vulnerable-vault --verbose +uv run kai view output/state/ --open +``` + +Running an audit makes real LLM calls, so it needs an API key configured (see +the project [README](../README.md#api-keys)) and incurs some cost. diff --git a/examples/vulnerable-vault/README.md b/examples/vulnerable-vault/README.md new file mode 100644 index 0000000..c88ea88 --- /dev/null +++ b/examples/vulnerable-vault/README.md @@ -0,0 +1,70 @@ +# vulnerable-vault + +A tiny, self-contained, **intentionally vulnerable** Solidity project — a +target you can point `kai audit` at to see the whole pipeline run end to end +without a private repo or a large API spend. + +> ⚠️ Intentionally insecure. Do not deploy. For authorized demonstration only. + +## Planted bugs + +| # | Bug | Location | +|---|-----|----------| +| 1 | **Reentrancy** — the caller's balance is zeroed *after* the external call, no guard (a re-entrant caller drains the contract) | `src/Vault.sol` · `withdraw()` | +| 2 | **Unchecked ERC-20 return** — `transfer()`'s boolean result is ignored | `src/Vault.sol` · `sweepToken()` | + +## Run it + +```bash +# From the kai-security repo root +uv run kai audit --repo-path examples/vulnerable-vault \ + --threat-context examples/vulnerable-vault/threat_context.yaml --verbose +``` + +Then look at the results: + +```bash +# Interactive HTML (findings + the agent's reasoning trace) +uv run kai view output/state/ --open + +# Or a Markdown report (stdout), or a styled HTML document +uv run kai report output/state/ +uv run kai report output/state/ --format html -o report.html +``` + +`` is printed during the run and is the directory name under +`output/state/`. + +## What a real run produced + +This isn't hypothetical — here's an actual result. With the reentrancy bug, +Kai built a Foundry proof-of-concept, confirmed the drain, and proposed a fix: + +``` +| CVSS | Severity | Finding | Location | Status | +| 9.8 | critical | Reentrancy in withdraw() (CEI violation) | Vault.sol:withdraw | verified_and_fixed ✓ | +``` + +with the correct Check-Effects-Interaction patch (move the balance update +*before* the external call): + +```diff + function withdraw() external { + uint256 amount = balances[msg.sender]; + require(amount > 0, "nothing to withdraw"); ++ balances[msg.sender] = 0; + (bool ok, ) = msg.sender.call{value: amount}(""); + require(ok, "transfer failed"); +- balances[msg.sender] = 0; + } +``` + +> **Kai is an agentic system, so runs are not deterministic.** Which bugs get +> confirmed, their CVSS scores, and the exact wording vary by run and by the +> models you configure. In one run Kai confirmed the reentrancy as Critical +> (above); in another it confirmed the unchecked-return in `sweepToken()` as +> Medium instead. It also reasons about *exploitability*, not just patterns — +> given a `withdraw()` that used a checked `-= amount`, it correctly **disproved** +> a textbook-looking reentrancy because the subtraction underflows and reverts +> under Solidity 0.8.x. Treat the output as a strong signal to investigate, not +> a fixed checklist. diff --git a/examples/vulnerable-vault/foundry.toml b/examples/vulnerable-vault/foundry.toml new file mode 100644 index 0000000..cdbe10f --- /dev/null +++ b/examples/vulnerable-vault/foundry.toml @@ -0,0 +1,5 @@ +[profile.default] +src = "src" +out = "out" +libs = ["lib"] +solc = "0.8.20" diff --git a/examples/vulnerable-vault/src/Vault.sol b/examples/vulnerable-vault/src/Vault.sol new file mode 100644 index 0000000..5bf3b0f --- /dev/null +++ b/examples/vulnerable-vault/src/Vault.sol @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: MIT +pragma solidity ^0.8.20; + +interface IERC20 { + function transfer(address to, uint256 amount) external returns (bool); +} + +/// @title Vault +/// @notice INTENTIONALLY VULNERABLE example target for kai-security demos. +/// Do NOT deploy. The bugs below are planted so `kai audit` has +/// something real to find on a tiny, self-contained codebase. +contract Vault { + mapping(address => uint256) public balances; + + function deposit() external payable { + balances[msg.sender] += msg.value; + } + + /// BUG 1 — reentrancy: the caller's balance is zeroed *after* the external + /// call, with no reentrancy guard. A malicious receiver can re-enter + /// withdraw() from its fallback and drain the contract, because the balance + /// is still non-zero on each re-entry. (Zeroing with `= 0` rather than a + /// checked `-=` is what makes this genuinely exploitable under Solidity + /// 0.8.x — a checked subtraction would underflow and revert the drain.) + function withdraw() external { + uint256 amount = balances[msg.sender]; + require(amount > 0, "nothing to withdraw"); + (bool ok, ) = msg.sender.call{value: amount}(""); + require(ok, "transfer failed"); + balances[msg.sender] = 0; + } + + /// BUG 2 — unchecked return value: ERC-20 transfer() can return false + /// instead of reverting; ignoring it lets a failed transfer look like a + /// success. + function sweepToken(IERC20 token, address to, uint256 amount) external { + token.transfer(to, amount); + } +} diff --git a/examples/vulnerable-vault/threat_context.yaml b/examples/vulnerable-vault/threat_context.yaml new file mode 100644 index 0000000..61fe952 --- /dev/null +++ b/examples/vulnerable-vault/threat_context.yaml @@ -0,0 +1,22 @@ +# Threat context for the vulnerable-vault example. +# Tells kai who can interact with the contract and what the trust boundaries +# are, so it ranks an unprivileged-attacker drain above admin-only concerns. + +deployment_type: smart-contract +environment: on-chain + +access_roles: + - name: anyone + trust: none + description: "Permissionless caller — any EOA or contract" + - name: depositor + trust: none + description: "An account that has deposited ETH into the vault" + +boundaries: + - "User input → contract storage (deposit/withdraw accounting)" + - "External call to msg.sender during withdraw (reentrancy boundary)" + +known_constraints: + - "No admin or owner role exists; every function is permissionless" + - "ERC-20 tokens passed to sweepToken may be non-reverting (return false)" diff --git a/pyproject.toml b/pyproject.toml index d185f07..3d6dd39 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,8 +2,17 @@ requires = ["uv_build>=0.8.17,<0.9.0"] build-backend = "uv_build" +# The published distribution is `kai-security`, but the import packages stay +# `kai` (domain) and `ra` (framework). Ship BOTH — `kai` imports `ra`, so a +# wheel with only `kai` is broken. Listing them also decouples the wheel from +# the dotted distribution name. The bare `kai` name on PyPI is reserved for the +# future umbrella dispatcher. +[tool.uv.build-backend] +module-name = ["kai", "ra"] +module-root = "src" + [project] -name = "kai" +name = "kai-security" version = "0.1.0" description = "Automated vulnerability discovery, verification, and patching using recursive language models." readme = "README.md" @@ -51,6 +60,16 @@ Homepage = "https://github.com/firstbatchxyz/kai-security" Repository = "https://github.com/firstbatchxyz/kai-security" Issues = "https://github.com/firstbatchxyz/kai-security/issues" +[project.scripts] +kai = "kai.cli:main" + +# Umbrella contract: register kai-security under the shared `kai.plugins` +# group so a dispatcher (this CLI today, a dedicated `kai` package later) can +# offer it as `kai security …`. Sibling tools register their own namespace the +# same way (e.g. kai-evolve → `evolve`). See docs/umbrella.md. +[project.entry-points."kai.plugins"] +security = "kai.cli:main" + [project.optional-dependencies] dev = [ "pytest>=9.0.2", diff --git a/src/kai/cli.py b/src/kai/cli.py new file mode 100644 index 0000000..3d3769f --- /dev/null +++ b/src/kai/cli.py @@ -0,0 +1,127 @@ +"""The ``kai`` command-line entry point. + +A thin dispatcher over the existing modules, giving the friendly verbs the +docs promise: + + kai audit analyze a repository (setup → exploit pipeline) + kai view open a finished run as interactive HTML + kai report render a run's findings (Markdown, or --format html) + +``kai pipeline`` / ``kai agent`` remain available as direct aliases into the +full :mod:`kai.main` interface. + +**Umbrella plugins.** Beyond its built-in verbs, ``kai`` discovers commands +registered by other installed packages under the ``kai.plugins`` entry-point +group, so a sibling tool can plug in a namespace — e.g. ``kai evolve …`` once +``kai-evolve`` is installed. kai-security registers itself as the ``security`` +plugin, so ``kai security audit`` is equivalent to ``kai audit``. See +``docs/umbrella.md``. + +The distribution is published as ``kai-security``; the command and the import +package stay ``kai``. +""" + +from __future__ import annotations + +import sys +from importlib.metadata import EntryPoint, entry_points + +_PLUGIN_GROUP = "kai.plugins" + +# Verbs handled directly by this module (a plugin can't shadow them). +_BUILTINS = ("audit", "view", "report", "pipeline", "agent") + +_USAGE_HEAD = """\ +kai — automated vulnerability discovery, verification, and patching + +usage: kai [options] + +commands: + audit Analyze a repository for vulnerabilities (setup → exploit) + view Open a finished run as interactive HTML (findings + trace) + report Render a run's findings as Markdown (default) or HTML + + pipeline Full pipeline interface (kai audit is the friendly alias) + agent Run a single agent +""" + +_USAGE_TAIL = "\nRun `kai -h` for command-specific options.\n" + + +def _plugins() -> dict[str, EntryPoint]: + """Commands registered by other packages under ``kai.plugins``. + + Built-in verbs always win, so a plugin can never shadow ``audit`` etc. + """ + + found = {ep.name: ep for ep in entry_points(group=_PLUGIN_GROUP)} + return {name: ep for name, ep in found.items() if name not in _BUILTINS} + + +def _usage(plugins: dict[str, EntryPoint]) -> str: + if not plugins: + return _USAGE_HEAD + _USAGE_TAIL + lines = "".join(f" {name:<17}(plugin)\n" for name in sorted(plugins)) + return f"{_USAGE_HEAD}\nplugins:\n{lines}{_USAGE_TAIL}" + + +def main(argv: list[str] | None = None) -> int: + """Dispatch a ``kai`` subcommand. Returns a process exit code.""" + + argv = list(sys.argv[1:] if argv is None else argv) + plugins = _plugins() + if not argv or argv[0] in ("-h", "--help", "help"): + sys.stdout.write(_usage(plugins)) + return 0 + + command, rest = argv[0], argv[1:] + + if command in ("audit", "pipeline"): + from kai.main import main as kai_main + + kai_main(["pipeline", *rest]) + return 0 + if command == "agent": + from kai.main import main as kai_main + + kai_main(["agent", *rest]) + return 0 + if command == "view": + from kai.viewer.__main__ import main as view_main + + return view_main(rest) + if command == "report": + from kai.report import main as report_main + + return report_main(rest) + + if command in plugins: + return _run_plugin(command, plugins[command], rest) + + sys.stderr.write(f"kai: unknown command {command!r}\n\n") + sys.stdout.write(_usage(plugins)) + return 2 + + +def _run_plugin(name: str, ep: EntryPoint, rest: list[str]) -> int: + """Invoke a plugin exactly like its own console script. + + Plugins follow the standard console-script convention: a zero-arg callable + that reads ``sys.argv`` and returns an exit code (or ``None``). We point + ``sys.argv`` at ``kai `` and call it, so any package that + already ships a ``[project.scripts]`` entry point works as a kai plugin + unchanged — no kai-specific signature required. + """ + + handler = ep.load() + saved_argv = sys.argv + sys.argv = [f"kai {name}", *rest] + try: + result = handler() + finally: + sys.argv = saved_argv + return result if isinstance(result, int) else 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/src/kai/report.py b/src/kai/report.py new file mode 100644 index 0000000..cdd6de9 --- /dev/null +++ b/src/kai/report.py @@ -0,0 +1,341 @@ +"""Render a run's findings as a Markdown security report. + +The no-browser companion to :mod:`kai.viewer`: same on-disk source +(``/exploits.json``), but a plain-text report you can pipe into CI, +paste into a PR, or read over SSH. Markdown renders on GitHub and stays +legible in a terminal, so one format serves both. + + python -m kai.report [--format md|html] [-o OUT] + +``--format html`` renders a styled single-page document using the viewer's +design system (:mod:`kai.viewer.style`), so it matches ``kai view``. +""" + +from __future__ import annotations + +import argparse +import sys +from html import escape +from pathlib import Path + +from ra.viewer import style + +from kai.viewer.findings import Finding, load_findings + +_SEVERITY_ORDER = ("critical", "high", "medium", "low", "none") + + +def _cell(text: str) -> str: + """Make a value safe for a single Markdown table cell.""" + + return str(text).replace("|", r"\|").replace("\n", " ").strip() + + +def _score(finding: Finding) -> str: + return f"{finding.cvss_score:.1f}" if finding.cvss_score is not None else "—" + + +def _location(finding: Finding) -> str: + file = finding.file.split("/")[-1] if finding.file else "" + return f"{file}:{finding.function}" if finding.function else file + + +def _summary_line(findings: list[Finding]) -> str: + counts: dict[str, int] = {} + for f in findings: + counts[f.severity] = counts.get(f.severity, 0) + 1 + parts = [f"{counts[s]} {s}" for s in _SEVERITY_ORDER if counts.get(s)] + n = len(findings) + head = f"**{n} finding{'s' if n != 1 else ''}**" + return f"{head} · {' · '.join(parts)}" if parts else head + + +def _summary_table(findings: list[Finding]) -> list[str]: + rows = [ + "| CVSS | Severity | Finding | Category | Location | Status |", + "|---|---|---|---|---|---|", + ] + for f in findings: + status = f.status + (" ✓" if f.confirmed else "") + rows.append( + f"| {_score(f)} | {f.severity} | {_cell(f.title)} | " + f"{_cell(f.category.replace('_', ' '))} | {_cell(_location(f))} | " + f"{_cell(status)} |" + ) + return rows + + +def _finding_section(idx: int, f: Finding) -> list[str]: + out: list[str] = [] + sev = f" ({f.severity})" if f.severity != "none" else "" + out.append(f"## {idx}. {f.title} · CVSS {_score(f)}{sev}") + out.append("") + + facts = [ + ("Location", f"{f.file} · `{f.function}()`" if f.function else f.file), + ("Category", f.category.replace("_", " ")), + ("Attacker", f.attacker_role), + ("Precondition", f.prerequisite), + ("Status", f.status + (" · confirmed" if f.confirmed else "")), + ] + for label, value in facts: + if value: + out.append(f"- **{label}:** {value}") + out.append("") + + if f.hypothesis: + out += ["**Why it's exploitable**", "", f.hypothesis, ""] + if f.exploit_sketch: + out += ["**Exploit sketch**", "", f.exploit_sketch, ""] + + if f.cvss_rows: + out += ["**CVSS 3.1**" + (f" — `{f.cvss_vector}`" if f.cvss_vector else ""), ""] + out += ["| Metric | Value | Justification |", "|---|---|---|"] + for r in f.cvss_rows: + out.append(f"| {r['metric']} | {r['value']} | {_cell(r['why'])} |") + out.append("") + + if f.poc_code: + out += ["**Proof of concept**", "", "```", f.poc_code, "```", ""] + if f.patch: + out += ["**Suggested patch**", "", "```diff", f.patch, "```", ""] + if f.critic_summary: + out += ["**Critic**", "", f.critic_summary, ""] + return out + + +def render_markdown(findings: list[Finding], title: str = "") -> str: + """Render a sorted findings list into a Markdown report.""" + + lines = [f"# Security findings{f' — {title}' if title else ''}", ""] + if not findings: + lines += ["No findings recorded for this run.", ""] + return "\n".join(lines) + + lines += [_summary_line(findings), ""] + lines += _summary_table(findings) + lines += ["", "---", ""] + for idx, f in enumerate(findings, start=1): + lines += _finding_section(idx, f) + return "\n".join(lines).rstrip() + "\n" + + +def render_run(run_dir: Path) -> str: + """Load ``/exploits.json`` and render the Markdown report.""" + + run_dir = Path(run_dir) + return render_markdown(load_findings(run_dir), title=run_dir.name) + + +# --------------------------------------------------------------------------- +# HTML (--format html): a styled single-page report document. +# +# Shares kai.viewer.style so it matches `kai view` exactly. Unlike the +# interactive viewer (master-detail + trace tabs), this is a linear, fully +# expanded document meant to be printed, attached, or shared. Static HTML, +# so every dynamic value is escaped server-side. +# --------------------------------------------------------------------------- + +_REPORT_LAYOUT = """\ + header.doc { max-width: 820px; margin: 0 auto; padding: 22px 24px 14px; } + header.doc h1 { margin: 0 0 6px; font-size: 22px; font-weight: 600; } + header.doc .summary { font-size: 13px; color: var(--muted-2); } + header.doc .summary b { color: var(--ink); } + header.doc .summary .crit { color: var(--accent); font-weight: 600; } + .toggle { float: right; border: 1px solid var(--rule-2); background: none; color: var(--muted-2); + border-radius: 5px; cursor: pointer; font-size: 12px; padding: 3px 8px; } + main.report { max-width: 820px; margin: 0 auto; padding: 8px 24px 64px; } + .summary-table { margin: 18px 0 4px; } + .finding { border-top: 1px solid var(--rule); padding-top: 22px; margin-top: 26px; } + .finding:first-of-type { border-top: 0; margin-top: 14px; } + .finding h2 { font-size: 18px; margin: 0 0 4px; font-weight: 600; line-height: 1.35; } + .finding .where { font-size: 12.5px; color: var(--muted); margin-bottom: 14px; } +""" + +_REPORT_CSS = style.base_css() + _REPORT_LAYOUT + +_THEME_TOGGLE = ( + '" +) + + +def _bar(finding: Finding) -> str: + if finding.cvss_score is None: + return "" + pct = max(0, min(100, round(finding.cvss_score / 10 * 100))) + return f'' + + +def _summary_row(f: Finding) -> str: + status = escape(f.status + (" ✓" if f.confirmed else "")) + return ( + f'' + f'{_score(f)}{_bar(f)}' + f'{escape(f.title)}' + f'{escape(f.category.replace("_", " "))}' + f'{escape(_location(f))}{status}' + ) + + +def _html_diff(patch: str) -> str: + lines = [] + for line in patch.split("\n"): + cls = "add" if line.startswith("+") else "del" if line.startswith("-") else "" + lines.append(f'{escape(line)}' if cls else escape(line)) + return '
' + "\n".join(lines) + "
" + + +def _html_finding(idx: int, f: Finding) -> str: + sev = f" ({escape(f.severity)})" if f.severity != "none" else "" + where = escape(f.file) + (f" · {escape(f.function)}()" if f.function else "") + out = [ + f'
', + f'

{idx}. {escape(f.title)} · CVSS {_score(f)}{sev}

', + f'
{where}
', + ] + + kv = [] + for label, value in ( + ("Category", f.category.replace("_", " ")), + ("Attacker", f.attacker_role), + ("Precondition", f.prerequisite), + ("Status", f.status + (" · confirmed" if f.confirmed else "")), + ): + if value: + kv.append(f"
{escape(label)}
{escape(value)}
") + if kv: + out.append('
' + "".join(kv) + "
") + + if f.hypothesis: + out += ['
Why it\'s exploitable
', + f'

{escape(f.hypothesis)}

'] + if f.exploit_sketch: + out += ['
Exploit sketch
', + f'

{escape(f.exploit_sketch)}

'] + if f.cvss_rows: + out.append('
CVSS 3.1 vector
') + if f.cvss_vector: + out.append(f'
{escape(f.cvss_vector)}
') + rows = "".join( + f'{escape(r["metric"])}' + f'{escape(r["value"])}' + f'{escape(r["why"])}' + for r in f.cvss_rows + ) + out.append(f'
{rows}
') + if f.poc_code: + out += ['
Proof of concept
', + f'
{escape(f.poc_code)}
'] + if f.patch: + out += ['
Suggested patch
', _html_diff(f.patch)] + if f.critic_summary: + out += ['
Critic
', + f'

{escape(f.critic_summary)}

'] + out.append("
") + return "\n".join(out) + + +def render_html(findings: list[Finding], title: str = "") -> str: + """Render a styled, self-contained single-page HTML report document.""" + + crit = sum(1 for f in findings if f.severity == "critical") + n = len(findings) + summary = f"{n} finding{'s' if n != 1 else ''}" + if crit: + summary += f' · {crit} critical' + + body = [ + '
', + _THEME_TOGGLE, + f'

Security findings{" — " + escape(title) if title else ""}

', + f'
{summary}
', + "
", + '
', + ] + if not findings: + body.append('
No findings recorded for this run.
') + else: + body.append( + '' + "" + "" + + "".join(_summary_row(f) for f in findings) + + "
CVSSFindingCategoryLocationStatus
" + ) + body += [_html_finding(i, f) for i, f in enumerate(findings, start=1)] + body.append("
") + + return ( + "\n" + '' + '' + f"kai — {escape(title) or 'findings'}\n" + f"\n\n" + + "\n".join(body) + + "\n\n" + ) + + +def render_run_html(run_dir: Path) -> str: + """Load ``/exploits.json`` and render the HTML report document.""" + + run_dir = Path(run_dir) + return render_html(load_findings(run_dir), title=run_dir.name) + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser( + prog="python -m kai.report", + description="Render a run's findings as a security report.", + ) + parser.add_argument("run_dir", help="run directory (a state// dir)") + parser.add_argument( + "-f", + "--format", + choices=("md", "html"), + default="md", + help="md (Markdown, default) or html (styled single-page document)", + ) + parser.add_argument( + "-o", + "--output", + help="write to PATH (md: default stdout; html: default /report.html)", + ) + parser.add_argument( + "--open", + action="store_true", + help="open the rendered file in a browser (html only)", + ) + args = parser.parse_args(argv) + + run_dir = Path(args.run_dir) + if not run_dir.is_dir(): + print(f"error: {run_dir} is not a directory", file=sys.stderr) + return 2 + + if args.format == "md": + markdown = render_run(run_dir) + if args.output: + out = Path(args.output) + out.parent.mkdir(parents=True, exist_ok=True) + out.write_text(markdown, encoding="utf-8") + print(out) + else: + sys.stdout.write(markdown) + return 0 + + target = Path(args.output) if args.output else run_dir / "report.html" + target.parent.mkdir(parents=True, exist_ok=True) + target.write_text(render_run_html(run_dir), encoding="utf-8") + print(target) + if args.open: + import webbrowser + + webbrowser.open(target.resolve().as_uri()) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 0000000..39a7c68 --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,136 @@ +"""Tests for the unified ``kai`` CLI dispatcher.""" + +from __future__ import annotations + +import json +import sys +from pathlib import Path + +import pytest + +from kai import cli + + +def _write_run(dir_path: Path) -> None: + exploits = [ + { + "exploit_id": "e1", "status": "verified", "confirmed": True, + "hypothesis": "Reentrancy in withdraw drains the vault.", + "file": "Vault.sol", "function": "withdraw", "category": "active_exploit", + "severity": "critical", "cvss_score": 9.1, + } + ] + (dir_path / "exploits.json").write_text(json.dumps(exploits), encoding="utf-8") + + +def test_help_and_no_args_print_usage(capsys: pytest.CaptureFixture[str]) -> None: + assert cli.main([]) == 0 + assert "usage: kai " in capsys.readouterr().out + assert cli.main(["--help"]) == 0 + assert "audit" in capsys.readouterr().out + + +def test_unknown_command_returns_2(capsys: pytest.CaptureFixture[str]) -> None: + assert cli.main(["bogus"]) == 2 + err = capsys.readouterr().err + assert "unknown command 'bogus'" in err + + +@pytest.mark.parametrize( + "command,expected", + [ + (["audit", "/repo", "--verbose"], ["pipeline", "/repo", "--verbose"]), + (["pipeline", "--recipe", "r.json"], ["pipeline", "--recipe", "r.json"]), + (["agent", "setup", "--input", "{}"], ["agent", "setup", "--input", "{}"]), + ], +) +def test_audit_pipeline_agent_delegate_to_kai_main( + command: list[str], expected: list[str], monkeypatch: pytest.MonkeyPatch +) -> None: + captured: list[list[str]] = [] + monkeypatch.setattr("kai.main.main", lambda argv: captured.append(argv)) + assert cli.main(command) == 0 + assert captured == [expected] + + +def test_view_delegates_and_writes_html(tmp_path: Path) -> None: + _write_run(tmp_path) + out = tmp_path / "v.html" + assert cli.main(["view", str(tmp_path), "-o", str(out)]) == 0 + assert out.exists() and out.read_text(encoding="utf-8").startswith("") + + +def test_report_delegates(tmp_path: Path, capsys: pytest.CaptureFixture[str]) -> None: + _write_run(tmp_path) + assert cli.main(["report", str(tmp_path)]) == 0 + assert "Security findings" in capsys.readouterr().out + + out = tmp_path / "r.html" + assert cli.main(["report", str(tmp_path), "--format", "html", "-o", str(out)]) == 0 + assert out.exists() + + +class _FakeEntryPoint: + def __init__(self, handler: object, name: str = "evolve") -> None: + self._handler = handler + self.name = name + + def load(self) -> object: + return self._handler + + +def test_plugin_invoked_console_script_style(monkeypatch: pytest.MonkeyPatch) -> None: + # Plugins are zero-arg callables that read sys.argv (the console-script + # convention) — NOT functions taking an argv list. + seen: dict[str, list[str]] = {} + + def handler() -> int: + seen["argv"] = list(sys.argv) + return 7 + + monkeypatch.setattr(cli, "_plugins", lambda: {"evolve": _FakeEntryPoint(handler)}) + before = list(sys.argv) + + assert cli.main(["evolve", "run", "--x", "1"]) == 7 + # The dispatcher pointed sys.argv at the plugin's invocation, then restored. + assert seen["argv"] == ["kai evolve", "run", "--x", "1"] + assert sys.argv == before + + +def test_plugin_none_return_is_zero(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(cli, "_plugins", lambda: {"evolve": _FakeEntryPoint(lambda: None)}) + assert cli.main(["evolve"]) == 0 + + +def test_plugin_argv_restored_on_exception(monkeypatch: pytest.MonkeyPatch) -> None: + def boom() -> int: + raise RuntimeError("plugin crashed") + + monkeypatch.setattr(cli, "_plugins", lambda: {"evolve": _FakeEntryPoint(boom)}) + before = list(sys.argv) + with pytest.raises(RuntimeError): + cli.main(["evolve", "x"]) + assert sys.argv == before # restored even when the plugin raises + + +def test_builtin_wins_over_plugin(monkeypatch: pytest.MonkeyPatch) -> None: + # A plugin can't shadow a built-in verb: _plugins() filters them out. + fakes = [_FakeEntryPoint(None, name="audit"), _FakeEntryPoint(None, name="evolve")] + monkeypatch.setattr("kai.cli.entry_points", lambda group: fakes) + plugins = cli._plugins() + assert "audit" not in plugins + assert "evolve" in plugins + + +def test_usage_lists_plugins( + monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] +) -> None: + monkeypatch.setattr(cli, "_plugins", lambda: {"evolve": _FakeEntryPoint(None)}) + assert cli.main([]) == 0 + out = capsys.readouterr().out + assert "plugins:" in out and "evolve" in out + + +def test_security_plugin_is_registered() -> None: + # kai-security registers itself under kai.plugins, so `kai security …` works. + assert "security" in cli._plugins() diff --git a/tests/test_report.py b/tests/test_report.py new file mode 100644 index 0000000..38fd197 --- /dev/null +++ b/tests/test_report.py @@ -0,0 +1,110 @@ +"""Tests for the Markdown findings report.""" + +from __future__ import annotations + +import json +from pathlib import Path + +from kai.report import main, render_html, render_markdown, render_run +from kai.viewer.findings import load_findings + +_EXPLOITS = [ + { + "exploit_id": "e2", + "status": "rejected", + "confirmed": False, + "hypothesis": "Fee truncation rounds small trades to zero.", + "file": "contracts/Fees.sol", + "function": "calcFee", + "category": "theoretical_bounds", + "cvss_score": 4.3, + }, + { + "exploit_id": "e1", + "status": "verified", + "confirmed": True, + "hypothesis": "Reentrancy in withdraw drains the vault.", + "file": "contracts/Vault.sol", + "function": "withdraw", + "category": "active_exploit", + "severity": "critical", + "cvss_score": 9.1, + "cvss_vector": "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H", + "cvss_justification": {"AV": "remote attacker"}, + "poc_code": "contract Attacker { function pwn() external {} }", + "patch": "- call_before_update;\n+ update_before_call;", + "attacker_role": "anyone", + "prerequisite": "a non-zero deposit", + }, +] + + +def _write_run(dir_path: Path) -> None: + (dir_path / "exploits.json").write_text(json.dumps(_EXPLOITS), encoding="utf-8") + + +def test_report_summary_table_and_order(tmp_path: Path) -> None: + _write_run(tmp_path) + md = render_markdown(load_findings(tmp_path), title="myrepo") + + assert md.startswith("# Security findings — myrepo") + assert "**2 findings** · 1 critical · 1 medium" in md + # Summary table header + both findings, confirmed-critical sorted first. + assert "| CVSS | Severity | Finding | Category | Location | Status |" in md + crit_at = md.index("Reentrancy in withdraw") + med_at = md.index("Fee truncation") + assert crit_at < med_at + + +def test_report_sections_and_code_fences(tmp_path: Path) -> None: + _write_run(tmp_path) + md = render_run(tmp_path) + + assert "## 1. Reentrancy in withdraw" in md + assert "CVSS 9.1 (critical)" in md + assert "- **Attacker:** anyone" in md + # PoC fenced, patch fenced as a diff, CVSS breakdown table present. + assert "```\ncontract Attacker" in md + assert "```diff\n- call_before_update;" in md + assert "| AV | Network | remote attacker |" in md + + +def test_report_empty(tmp_path: Path) -> None: + md = render_markdown([], title="empty") + assert "No findings recorded for this run." in md + + +def test_main_writes_file(tmp_path: Path) -> None: + _write_run(tmp_path) + out = tmp_path / "report.md" + rc = main([str(tmp_path), "-o", str(out)]) + assert rc == 0 + assert "Reentrancy in withdraw" in out.read_text(encoding="utf-8") + + +def test_main_rejects_non_dir(tmp_path: Path) -> None: + assert main([str(tmp_path / "nope")]) == 2 + + +def test_html_report_is_self_contained_and_styled(tmp_path: Path) -> None: + _write_run(tmp_path) + html = render_html(load_findings(tmp_path), title="myrepo") + + assert html.startswith("") + # Fully offline, and shares the viewer's design tokens (one design system). + assert "http://" not in html and "https://" not in html + assert "--accent:" in html # tokens from kai.viewer.style + assert 'class="finding sev-critical' in html + assert "Reentrancy in withdraw" in html + assert '
' in html
+    # The patch diff classes drive the +/- colouring.
+    assert '' in html and '' in html
+
+
+def test_main_format_html_writes_file(tmp_path: Path) -> None:
+    _write_run(tmp_path)
+    rc = main([str(tmp_path), "--format", "html"])
+    assert rc == 0
+    out = tmp_path / "report.html"
+    assert out.exists()
+    assert "Reentrancy in withdraw" in out.read_text(encoding="utf-8")
diff --git a/uv.lock b/uv.lock
index 73752a4..ce4ac16 100644
--- a/uv.lock
+++ b/uv.lock
@@ -691,7 +691,7 @@ wheels = [
 ]
 
 [[package]]
-name = "kai"
+name = "kai-security"
 version = "0.1.0"
 source = { editable = "." }
 dependencies = [