diff --git a/.github/actions/mayros-review/action.yml b/.github/actions/mayros-review/action.yml new file mode 100644 index 00000000..8a325f56 --- /dev/null +++ b/.github/actions/mayros-review/action.yml @@ -0,0 +1,213 @@ +name: "Mayros PR Review" +description: "Automated code review using Mayros CLI" +branding: + icon: "code" + color: "blue" + +inputs: + mayros-version: + description: "Mayros CLI version to install" + required: false + default: "latest" + prompt: + description: "Custom review prompt" + required: false + default: "Review this pull request. Focus on: code quality, security issues, performance concerns, and adherence to project conventions. Be concise and actionable." + model: + description: "LLM model identifier" + required: false + default: "anthropic/claude-sonnet-4-20250514" + github-token: + description: "GitHub token for posting comments" + required: true + anthropic-api-key: + description: "Anthropic API key for LLM calls" + required: true + max-diff-lines: + description: "Maximum diff lines to include in the review prompt (0 = unlimited)" + required: false + default: "3000" + node-version: + description: "Node.js version to use" + required: false + default: "22" + +outputs: + review-posted: + description: "Whether a review comment was posted (true/false)" + value: ${{ steps.post-review.outputs.posted }} + review-length: + description: "Character count of the generated review" + value: ${{ steps.run-review.outputs.review-length }} + diff-lines: + description: "Number of diff lines analyzed" + value: ${{ steps.get-diff.outputs.diff-lines }} + diff-truncated: + description: "Whether the diff was truncated (true/false)" + value: ${{ steps.get-diff.outputs.truncated }} + +runs: + using: "composite" + steps: + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: ${{ inputs.node-version }} + + - name: Install Mayros + id: install-mayros + shell: bash + run: | + MAYROS_VERSION="${{ inputs.mayros-version }}" + # Sanitize version string: only allow semver chars and "latest" + if [[ ! "$MAYROS_VERSION" =~ ^[a-zA-Z0-9.\-]+$ ]]; then + echo "::error::Invalid mayros-version format: ${MAYROS_VERSION}" + exit 1 + fi + + echo "Installing mayros@${MAYROS_VERSION}..." + if npm install -g "mayros@${MAYROS_VERSION}"; then + echo "installed=global" >> "$GITHUB_OUTPUT" + echo "Mayros installed globally" + mayros --version || true + else + echo "::warning::Global install failed, will fall back to npx" + echo "installed=npx" >> "$GITHUB_OUTPUT" + fi + + - name: Get PR diff + id: get-diff + shell: bash + env: + GH_TOKEN: ${{ inputs.github-token }} + MAX_DIFF_LINES: ${{ inputs.max-diff-lines }} + run: | + PR_NUMBER="${{ github.event.pull_request.number }}" + if [ -z "$PR_NUMBER" ] || [ "$PR_NUMBER" = "" ]; then + echo "::error::This action must run on a pull_request event" + exit 1 + fi + + # Use -- to prevent argument injection + if ! gh pr diff -- "$PR_NUMBER" > /tmp/pr-diff-raw.txt 2>/tmp/pr-diff-err.txt; then + echo "::error::Failed to fetch diff for PR #${PR_NUMBER}: $(cat /tmp/pr-diff-err.txt)" + exit 1 + fi + + TOTAL_LINES=$(wc -l < /tmp/pr-diff-raw.txt | tr -d ' ') + echo "diff-lines=${TOTAL_LINES}" >> "$GITHUB_OUTPUT" + + if [ "$TOTAL_LINES" -eq 0 ]; then + echo "::warning::PR #${PR_NUMBER} has an empty diff, skipping review" + echo "truncated=false" >> "$GITHUB_OUTPUT" + echo "empty=true" >> "$GITHUB_OUTPUT" + exit 0 + fi + + # Truncate if needed + TRUNCATED="false" + if [ "$MAX_DIFF_LINES" -gt 0 ] && [ "$TOTAL_LINES" -gt "$MAX_DIFF_LINES" ]; then + head -n "$MAX_DIFF_LINES" /tmp/pr-diff-raw.txt > /tmp/pr-diff.txt + TRUNCATED="true" + echo "::notice::Diff truncated from ${TOTAL_LINES} to ${MAX_DIFF_LINES} lines" + else + cp /tmp/pr-diff-raw.txt /tmp/pr-diff.txt + fi + + # Hard cap at 50K characters to avoid token limits + CHAR_COUNT=$(wc -c < /tmp/pr-diff.txt | tr -d ' ') + if [ "$CHAR_COUNT" -gt 50000 ]; then + head -c 50000 /tmp/pr-diff.txt > /tmp/pr-diff-capped.txt + mv /tmp/pr-diff-capped.txt /tmp/pr-diff.txt + TRUNCATED="true" + echo "::notice::Diff truncated to 50K characters (was ${CHAR_COUNT} chars)" + fi + + echo "truncated=${TRUNCATED}" >> "$GITHUB_OUTPUT" + echo "empty=false" >> "$GITHUB_OUTPUT" + FINAL_LINES=$(wc -l < /tmp/pr-diff.txt | tr -d ' ') + echo "PR #${PR_NUMBER} diff: ${TOTAL_LINES} total lines, ${FINAL_LINES} included (truncated=${TRUNCATED})" + + - name: Run Mayros review + id: run-review + if: steps.get-diff.outputs.empty != 'true' + shell: bash + env: + ANTHROPIC_API_KEY: ${{ inputs.anthropic-api-key }} + INPUT_PROMPT: ${{ inputs.prompt }} + INPUT_MODEL: ${{ inputs.model }} + run: | + # Build prompt from file to avoid shell quoting issues + { + echo "${INPUT_PROMPT}" + echo "" + echo "Here is the PR diff to review:" + echo "" + echo '```diff' + cat /tmp/pr-diff.txt + echo '```' + } > /tmp/review-prompt.txt + + if [ "${{ steps.get-diff.outputs.truncated }}" = "true" ]; then + echo "" >> /tmp/review-prompt.txt + echo "NOTE: This diff was truncated. Focus your review on the code shown above." >> /tmp/review-prompt.txt + fi + + PROMPT_CONTENT=$(cat /tmp/review-prompt.txt) + + # Run review with proper argument separation + set +e + REVIEW=$(npx mayros -p "$PROMPT_CONTENT" --model "$INPUT_MODEL" 2>/tmp/mayros-stderr.txt) + EXIT_CODE=$? + set -e + + if [ $EXIT_CODE -ne 0 ]; then + STDERR_MSG=$(cat /tmp/mayros-stderr.txt 2>/dev/null || echo "unknown error") + echo "::error::Mayros review failed (exit ${EXIT_CODE}): ${STDERR_MSG}" + echo "review-length=0" >> "$GITHUB_OUTPUT" + echo "failed=true" >> "$GITHUB_OUTPUT" + exit 1 + fi + + if [ -z "$REVIEW" ]; then + echo "::error::Mayros returned an empty review" + echo "review-length=0" >> "$GITHUB_OUTPUT" + echo "failed=true" >> "$GITHUB_OUTPUT" + exit 1 + fi + + echo "$REVIEW" > /tmp/review-output.txt + REVIEW_LEN=${#REVIEW} + echo "review-length=${REVIEW_LEN}" >> "$GITHUB_OUTPUT" + echo "failed=false" >> "$GITHUB_OUTPUT" + echo "Review generated (${REVIEW_LEN} characters)" + + - name: Post review comment + id: post-review + if: steps.run-review.outputs.failed != 'true' && steps.get-diff.outputs.empty != 'true' + shell: bash + env: + GH_TOKEN: ${{ inputs.github-token }} + run: | + PR_NUMBER="${{ github.event.pull_request.number }}" + REVIEW=$(cat /tmp/review-output.txt) + + # Build comment body via file to avoid HEREDOC quoting issues + { + echo "## Mayros Code Review" + echo "" + echo "$REVIEW" + echo "" + echo "---" + echo "*Automated review by [Mayros](https://mayros.apilium.com)*" + } > /tmp/comment-body.txt + + # Post comment using -- to prevent argument injection + if gh pr comment -- "$PR_NUMBER" --body-file /tmp/comment-body.txt; then + echo "posted=true" >> "$GITHUB_OUTPUT" + echo "Review posted to PR #${PR_NUMBER}" + else + echo "::error::Failed to post review comment to PR #${PR_NUMBER}" + echo "posted=false" >> "$GITHUB_OUTPUT" + exit 1 + fi diff --git a/.github/workflows/mayros-review.yml b/.github/workflows/mayros-review.yml new file mode 100644 index 00000000..e7763f26 --- /dev/null +++ b/.github/workflows/mayros-review.yml @@ -0,0 +1,33 @@ +name: Mayros PR Review + +on: + pull_request: + types: [opened, synchronize] + paths-ignore: + - "docs/**" + - "*.md" + - "LICENSE" + - ".gitignore" + +permissions: + contents: read + pull-requests: write + +concurrency: + group: mayros-review-${{ github.event.pull_request.number }} + cancel-in-progress: true + +jobs: + review: + runs-on: ubuntu-latest + if: github.event.pull_request.draft == false + timeout-minutes: 10 + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - uses: ./.github/actions/mayros-review + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + anthropic-api-key: ${{ secrets.ANTHROPIC_API_KEY }} diff --git a/.gitignore b/.gitignore index ae9ff4e7..008583cd 100644 --- a/.gitignore +++ b/.gitignore @@ -136,6 +136,9 @@ CLAUDE.md .claude/RULES.md docs/evolution/ +docs/experiments/ +docs/refactor/ +secret/ # JetBrains plugin build artifacts tools/jetbrains-plugin/.gradle/ diff --git a/README.md b/README.md index 77423637..04902088 100644 --- a/README.md +++ b/README.md @@ -1,177 +1,220 @@ -# ⚡🛡️ Mayros — Personal AI Assistant +# ⚡🛡️ Mayros
+ AI agent framework · Coding CLI · Personal assistant
+ One platform. Your terminal, your channels, your devices.
+
+ Product · Download · Docs · Getting Started · Vision · Discord +
-If you want a personal, single-user assistant that feels local, fast, and always-on, this is it. +--- -[Product](https://apilium.com/en/products/mayros) · [Download](https://mayros.apilium.com) · [Docs](https://apilium.com/en/doc/mayros) · [Vision](VISION.md) · [Getting Started](https://apilium.com/en/doc/mayros/start/getting-started) · [Updating](https://apilium.com/en/doc/mayros/install/updating) · [Docker](https://apilium.com/en/doc/mayros/install/docker) +**Mayros** is an open-source AI agent framework that runs on your own devices. It ships with an interactive **coding CLI** (`mayros code`), connects to **17 messaging channels** (WhatsApp, Telegram, Slack, Discord, Signal, iMessage, Teams, and more), speaks and listens on **macOS/iOS/Android**, and has a **knowledge graph** that remembers everything across sessions. All backed by a local-first Gateway and an 18-layer security architecture. + +> **55 extensions · 9,200+ tests · 29 hooks · MCP support · Multi-model · Multi-agent** + +```bash +npm install -g @apilium/mayros@latest +mayros onboard +mayros code # interactive coding CLI +``` -Preferred setup: run the onboarding wizard (`mayros onboard`) in your terminal. -The wizard guides you step by step through setting up the gateway, workspace, channels, and skills. The CLI wizard is the recommended path and works on **macOS, Linux, and Windows (via WSL2; strongly recommended)**. -Works with npm, pnpm, or bun. -New install? Start here: [Getting started](https://apilium.com/en/doc/mayros/start/getting-started) +--- -## Models (selection + auth) +## Why Mayros? -- Models config + CLI: [Models](https://apilium.com/en/doc/mayros/concepts/models) -- Auth profile rotation (OAuth vs API keys) + fallbacks: [Model failover](https://apilium.com/en/doc/mayros/concepts/model-failover) +| | Mayros | Others | +| ---------------------- | ---------------------------------------------------------------------------------------------------- | ------------------------- | +| 🧠 **Knowledge Graph** | AIngle Cortex — persistent memory across sessions, projects, and agents | Flat conversation history | +| 🤖 **Multi-Agent** | Teams, workflows, mailbox, background tasks, git worktree isolation | Single agent | +| 📱 **Multi-Channel** | 17 channels — WhatsApp, Telegram, Slack, Discord, Signal, iMessage, Teams, Matrix, WebChat, and more | Terminal only | +| 🔒 **Security** | 18 layers — WASM sandbox, bash scanner, interactive permissions, namespace isolation, rate limiter | Basic sandboxing | +| 🎙️ **Voice** | Always-on Voice Wake + Talk Mode on macOS, iOS, Android | None | +| 🖥️ **IDE** | VSCode + JetBrains plugins with chat, plan, traces, KG | VSCode only | +| 📊 **Observability** | Full trace system, decision graph, session fork/rewind | Basic logging | +| 🔌 **Extensions** | 55 plugin extensions, 29 hook types, MCP client (4 transports) | Limited plugins | +| 🗺️ **Plan Mode** | Cortex-backed semantic planning: explore → assert → approve → execute | Simple plan files | -## Install (recommended) +--- -Runtime: **Node ≥22**. +## Install + +Runtime: **Node ≥ 22**. Works with npm, pnpm, or bun. ```bash -npm install -g mayros@latest -# or: pnpm add -g mayros@latest +npm install -g @apilium/mayros@latest +# or: pnpm add -g @apilium/mayros@latest mayros onboard --install-daemon ``` -The wizard installs the Gateway daemon (launchd/systemd user service) so it stays running. +The wizard sets up the Gateway, workspace, channels, and skills. It installs the Gateway as a background daemon (launchd/systemd) so it stays running. + +New install? Start here: **[Getting Started](https://apilium.com/en/doc/mayros/start/getting-started)** · Upgrading? **[Updating guide](https://apilium.com/en/doc/mayros/install/updating)** (and run `mayros doctor`) + +--- + +## Coding CLI + +`mayros code` is an interactive terminal UI for coding, conversation, and agent-driven workflows. + +
+
+
`. Public DMs require explicit opt-in.
-Mayros connects to real messaging surfaces. Treat inbound DMs as **untrusted input**.
+---
-Full security guide: [Security](https://apilium.com/en/doc/mayros/gateway/security)
+## Knowledge Graph (AIngle Cortex)
-Default behavior on Telegram/WhatsApp/Signal/iMessage/Microsoft Teams/Discord/Google Chat/Slack:
+Mayros remembers. Not just conversation history — semantic knowledge stored as RDF triples in [AIngle Cortex](https://github.com/ApiliumCode/aingle).
-- **DM pairing** (`dmPolicy="pairing"` / `channels.discord.dmPolicy="pairing"` / `channels.slack.dmPolicy="pairing"`; legacy: `channels.discord.dm.policy`, `channels.slack.dm.policy`): unknown senders receive a short pairing code and the bot does not process their message.
-- Approve with: `mayros pairing approve ` (then the sender is added to a local allowlist store).
-- Public inbound DMs require an explicit opt-in: set `dmPolicy="open"` and include `"*"` in the channel allowlist (`allowFrom` / `channels.discord.allowFrom` / `channels.slack.allowFrom`; legacy: `channels.discord.dm.allowFrom`, `channels.slack.dm.allowFrom`).
+**Three-tier memory:**
-Run `mayros doctor` to surface risky/misconfigured DM policies.
+1. **MAYROS.md** — flat-file persona and instructions, always loaded into the system prompt
+2. **AIngle Cortex** — RDF triple store (`subject → predicate → object`) scoped by namespace. Optional: falls back to file-based memory when unavailable
+3. **Titans STM/LTM** — short-term and long-term memory with temporal recall
-## Highlights
+**Built on top:**
-- **[Local-first Gateway](https://apilium.com/en/doc/mayros/gateway)** — single control plane for sessions, channels, tools, and events.
-- **[Multi-channel inbox](https://apilium.com/en/doc/mayros/channels)** — WhatsApp, Telegram, Slack, Discord, Google Chat, Signal, BlueBubbles (iMessage), iMessage (legacy), Microsoft Teams, Matrix, Zalo, Zalo Personal, WebChat, macOS, iOS/Android.
-- **[Multi-agent routing](https://apilium.com/en/doc/mayros/gateway/configuration)** — route inbound channels/accounts/peers to isolated agents (workspaces + per-agent sessions).
-- **[Voice Wake](https://apilium.com/en/doc/mayros/nodes/voicewake) + [Talk Mode](https://apilium.com/en/doc/mayros/nodes/talk)** — always-on speech for macOS/iOS/Android with ElevenLabs.
-- **[Live Canvas](https://apilium.com/en/doc/mayros/platforms/mac/canvas)** — agent-driven visual workspace with [A2UI](https://apilium.com/en/doc/mayros/platforms/mac/canvas#canvas-a2ui).
-- **[First-class tools](https://apilium.com/en/doc/mayros/tools)** — browser, canvas, nodes, cron, sessions, and Discord/Slack actions.
-- **[Companion apps](https://apilium.com/en/doc/mayros/platforms/macos)** — macOS menu bar app + iOS/Android [nodes](https://apilium.com/en/doc/mayros/nodes).
-- **[Onboarding](https://apilium.com/en/doc/mayros/start/wizard) + [skills](https://apilium.com/en/doc/mayros/tools/skills)** — wizard-driven setup with bundled/managed/workspace skills.
-- **Terminal UI** — interactive TUI with themes, vim mode, image paste, slash commands.
-- **IDE plugins** — VSCode + JetBrains extensions connected via Gateway WebSocket.
-- **Knowledge Graph** — project memory, code indexer, cross-session recall via Cortex.
-- **Multi-agent mesh** — teams, workflows, agent mailbox, background tasks.
-- **Semantic plan mode** — explore, assert, approve, execute with Cortex backing.
-- **50+ extensions** — security sandbox, permissions, MCP client, observability, 17 channels.
+- **Code indexer** — scans your codebase → RDF triples in Cortex (incremental, only re-indexes changed files)
+- **Project memory** — persists conventions, findings, and architecture decisions across sessions
+- **Smart compaction** — extracts key information before context pruning
+- **Cross-session recall** — injects relevant knowledge from previous sessions into new prompts
-## Terminal UI
+**Design principles:** namespace isolation (no cross-namespace reads), graceful degradation (Cortex is a sidecar, not an FFI binding), circuit breaker with exponential backoff.
-Mayros includes an interactive terminal interface for direct coding and conversation.
+CLI: `mayros kg search|explore|query|stats|triples|namespaces|export|import`
-**Entry points:**
+---
-- `mayros code` — main interactive TUI session
-- `mayros tui` — alias for `mayros code`
-- `mayros -p "query"` — headless mode (non-interactive, streams response to stdout)
+## Multi-Agent Mesh
-**Features:**
+Agents that work together. Mayros supports coordinated multi-agent workflows with shared knowledge.
-- Welcome screen with shield mascot and two-column info panel
-- 3 themes (dark, light, high-contrast) — switch with `/theme`
-- 3 output styles (standard, explanatory, learning) — switch with `/style`
-- Vim mode with motions, operators, and undo — toggle with `/vim`
-- `Ctrl+V` image paste from clipboard
-- `/copy` pipes last response to system clipboard; `/export [file]` writes to file
-- `/diff` inline diff viewer with stats
-- `/context` token usage bar chart
-- `/plan` semantic plan mode (Cortex-backed)
-
-**Key slash commands:**
-
-| Command | Description |
-| ---------------- | --------------------------- |
-| `/help` | List all available commands |
-| `/new`, `/reset` | Reset session |
-| `/compact` | Compact session context |
-| `/think ` | Set thinking level |
-| `/model ` | Switch model |
-| `/plan` | Enter semantic plan mode |
-| `/diff` | Show pending changes |
-| `/context` | Visualize token usage |
-| `/theme` | Cycle themes |
-| `/style` | Cycle output styles |
-| `/vim` | Toggle vim mode |
-| `/copy` | Copy last response |
-| `/export [file]` | Export session |
-| `/permission` | Set permission mode |
-| `/fast` | Toggle fast mode |
+- **Team manager** — Cortex-backed lifecycle: create, assign roles, merge results, disband
+- **Workflow orchestrator** — built-in workflows (code-review, research, refactor) + custom definitions
+- **Agent mailbox** — persistent inter-agent messaging (send/inbox/outbox/archive)
+- **Background task tracker** — long-running tasks with status and cancellation
+- **Git worktree isolation** — each agent works in its own worktree to avoid conflicts
+- **Session fork/rewind** — checkpoint-based exploration with rewind capability
-**Markdown-driven extensibility:**
+CLI: `mayros workflow run|list` · `mayros dashboard team|summary|agent` · `mayros tasks list|status|cancel|summary` · `mayros mailbox list|read|send|archive|stats`
-- Custom agents: `~/.mayros/agents/*.md` — define persona, tools, and behavior in markdown
-- Custom commands: `~/.mayros/commands/*.md` — define slash commands as markdown templates
-- Interactive selectors when commands run without required arguments
+---
## IDE Plugins
-Mayros provides IDE extensions that connect to the running Gateway via WebSocket.
+Mayros lives inside your editor, connected via Gateway WebSocket.
**VSCode** (`tools/vscode-extension/`):
@@ -184,63 +227,23 @@ Mayros provides IDE extensions that connect to the running Gateway via WebSocket
- Unified tabbed panel with the same feature set
- Protocol v3 compatibility
-Both plugins connect via WebSocket to `ws://127.0.0.1:18789` (the Gateway).
+Both connect to `ws://127.0.0.1:18789`.
-## Semantic Memory (AIngle Cortex)
+---
-Mayros includes a three-tier memory architecture so the assistant remembers context across conversations and channels:
+## Voice & Companion Apps
-1. **MAYROS.md** — flat-file persona and instructions, always loaded into the system prompt.
-2. **[AIngle Cortex](https://github.com/ApiliumCode/aingle)** — an RDF triple store that runs as an HTTP sidecar. Skills and the agent read/write semantic triples (`subject → predicate → object`) scoped by namespace. Cortex is optional: when unavailable the assistant falls back to markdown-based memory.
-3. **Titans STM/LTM** — short-term and long-term memory layers that complement the graph with temporal recall.
+- **[Voice Wake](https://apilium.com/en/doc/mayros/nodes/voicewake) + [Talk Mode](https://apilium.com/en/doc/mayros/nodes/talk)** — always-on speech for macOS/iOS/Android with ElevenLabs
+- **[Live Canvas](https://apilium.com/en/doc/mayros/platforms/mac/canvas)** — agent-driven visual workspace with [A2UI](https://apilium.com/en/doc/mayros/platforms/mac/canvas#canvas-a2ui)
+- **[macOS app](https://apilium.com/en/doc/mayros/platforms/macos)** — menu bar control, Voice Wake, Talk Mode overlay, WebChat, debug tools
+- **[iOS node](https://apilium.com/en/doc/mayros/platforms/ios)** — Canvas, Voice Wake, Talk Mode, camera, screen recording, Bonjour pairing
+- **[Android node](https://apilium.com/en/doc/mayros/platforms/android)** — Canvas, Talk Mode, camera, screen recording, optional SMS
-Key design points:
-
-- **Namespace isolation** — every query is forced to `{ns}:` prefix; no cross-namespace reads.
-- **Graceful degradation** — Cortex is an HTTP sidecar, not an FFI binding. If the sidecar is down, the gateway continues working with file-based memory.
-- **Circuit breaker** — `cortex-resilience.ts` wraps all Cortex calls with a 3-state circuit breaker and exponential backoff.
-- **Skill access** — skills interact with memory through 6 semantic tools (`skill_graph_query`, `skill_assert`, `skill_memory_context`, etc.) inside the QuickJS WASM sandbox.
-
-Cortex version: **aingle_cortex 0.2.6** · AIngle crate: **0.0.101** · Zome types: **0.0.4**
-
-## Knowledge Graph & Code Indexer
-
-The code indexer scans your codebase and maps it to RDF triples stored in Cortex. Combined with project memory, this gives the assistant deep, persistent understanding of your project.
-
-- **Code indexer** — scans source files → RDF triples in Cortex (incremental, only re-indexes changed files)
-- **Project memory** — persists conventions, findings, and architecture decisions across sessions
-- **Smart compaction** — extracts key information before context pruning so nothing important is lost
-- **Cross-session recall** — injects relevant knowledge from previous sessions into new prompts
-
-CLI: `mayros kg search|explore|query|stats|triples|namespaces|export|import`
-
-## Multi-Agent Mesh
-
-Mayros supports coordinated multi-agent workflows where agents can form teams, delegate work, and communicate asynchronously.
-
-- **Team manager** — Cortex-backed lifecycle: create, assign roles, disband
-- **Workflow orchestrator** — built-in workflow definitions (code-review, research, refactor) + custom definitions via registry
-- **Agent mailbox** — persistent inter-agent messaging (send/inbox/outbox/archive)
-- **Background task tracker** — track long-running agent tasks with status and cancellation
-- **Git worktree isolation** — each agent can work in its own worktree to avoid conflicts
-
-CLI: `mayros workflow run|list`, `mayros dashboard team|summary|agent`, `mayros tasks list|status|cancel|summary`, `mayros mailbox list|read|send|archive|stats`
-
-## Plan Mode
-
-Cortex-backed semantic planning for complex multi-step tasks.
-
-- **Explore** — gather context from the codebase and Cortex graph
-- **Assert** — declare facts and constraints the plan must satisfy
-- **Approve** — review the plan before execution
-- **Execute** — run the approved plan with progress tracking
-
-CLI: `mayros plan start|explore|assert|show|approve|execute|done|list|status`
-TUI: `/plan` slash command
+---
## Extensions Ecosystem
-Mayros ships with 50+ extensions organized by category:
+55 extensions loaded as plugins at startup:
| Category | Extension | Purpose |
| ------------- | ------------------------- | ------------------------------------------------------------------------- |
@@ -252,386 +255,248 @@ Mayros ships with 50+ extensions organized by category:
| Security | `bash-sandbox` | Command parsing, domain checker, blocklist, audit log |
| Permissions | `interactive-permissions` | Runtime permission dialogs, intent classification, policy store |
| Hooks | `llm-hooks` | Markdown-defined hook evaluation with safe condition parser |
-| MCP | `mcp-client` | Model Context Protocol client (stdio, SSE, WebSocket, HTTP transports) |
+| MCP | `mcp-client` | Model Context Protocol client (stdio, SSE, WebSocket, HTTP) |
| Economy | `token-economy` | Budget tracking, prompt cache optimization |
| Hub | `skill-hub` | Apilium Hub marketplace, Ed25519 signing, dependency audit |
| IoT | `iot-bridge` | IoT node fleet management |
-| Channels | 17 channel plugins | Discord, Telegram, WhatsApp, Slack, Signal, iMessage, Teams, Matrix, etc. |
+| Channels | 17 plugins | Discord, Telegram, WhatsApp, Slack, Signal, iMessage, Teams, Matrix, etc. |
-Extensions live in `extensions/` and are loaded as plugins at startup.
+---
## Hooks System
-Mayros exposes 29 hook types across the assistant lifecycle:
-
-- **Lifecycle hooks** — `before_prompt_build`, `after_response`, `before_compaction`, `agent_end`, etc.
-- **Security hooks** — `permission_request` (modifying: allow/deny/ask), `config_change`
-- **Coordination hooks** — `teammate_idle`, `task_completed`, `notification` (info/warn/error)
-- **HTTP webhook dispatcher** — POST delivery with HMAC-SHA256 signatures, retry + exponential backoff
-- **Async hook queue** — background execution with concurrency limits and dead-letter queue
-- **Markdown-defined hooks** — place `.md` files in `~/.mayros/hooks/` for custom hook logic
-
-## Everything we built so far
-
-### Core platform
-
-- [Gateway WS control plane](https://apilium.com/en/doc/mayros/gateway) with sessions, presence, config, cron, webhooks, [Control UI](https://apilium.com/en/doc/mayros/web), and [Canvas host](https://apilium.com/en/doc/mayros/platforms/mac/canvas#canvas-a2ui).
-- [CLI surface](https://apilium.com/en/doc/mayros/tools/agent-send): gateway, agent, send, [wizard](https://apilium.com/en/doc/mayros/start/wizard), and [doctor](https://apilium.com/en/doc/mayros/gateway/doctor).
-- [Pi agent runtime](https://apilium.com/en/doc/mayros/concepts/agent) in RPC mode with tool streaming and block streaming.
-- [Session model](https://apilium.com/en/doc/mayros/concepts/session): `main` for direct chats, group isolation, activation modes, queue modes, reply-back..
-- [Media pipeline](https://apilium.com/en/doc/mayros/nodes/images): images/audio/video, transcription hooks, size caps, temp file lifecycle. Audio details: [Audio](https://apilium.com/en/doc/mayros/nodes/audio).
-
-### Channels
-
-- [Channels](https://apilium.com/en/doc/mayros/channels): [WhatsApp](https://apilium.com/en/doc/mayros/channels/whatsapp) (Baileys), [Telegram](https://apilium.com/en/doc/mayros/channels/telegram) (grammY), [Slack](https://apilium.com/en/doc/mayros/channels/slack) (Bolt), [Discord](https://apilium.com/en/doc/mayros/channels/discord) (discord.js), [Google Chat](https://apilium.com/en/doc/mayros/channels/googlechat) (Chat API), [Signal](https://apilium.com/en/doc/mayros/channels/signal) (signal-cli), [BlueBubbles](https://apilium.com/en/doc/mayros/channels/bluebubbles) (iMessage, recommended), [iMessage](https://apilium.com/en/doc/mayros/channels/imessage) (legacy imsg), [Microsoft Teams](https://apilium.com/en/doc/mayros/channels/msteams) (extension), [Matrix](https://apilium.com/en/doc/mayros/channels/matrix) (extension), [Zalo](https://apilium.com/en/doc/mayros/channels/zalo) (extension), [Zalo Personal](https://apilium.com/en/doc/mayros/channels/zalouser) (extension), [WebChat](https://apilium.com/en/doc/mayros/web/webchat).
-- Mention gating, reply tags, per-channel chunking and routing. Channel rules: [Channels](https://apilium.com/en/doc/mayros/channels).
-
-### Apps + nodes
-
-- [macOS app](https://apilium.com/en/doc/mayros/platforms/macos): menu bar control plane, [Voice Wake](https://apilium.com/en/doc/mayros/nodes/voicewake)/PTT, [Talk Mode](https://apilium.com/en/doc/mayros/nodes/talk) overlay, [WebChat](https://apilium.com/en/doc/mayros/web/webchat), debug tools, [remote gateway](https://apilium.com/en/doc/mayros/gateway/remote) control.
-- [iOS node](https://apilium.com/en/doc/mayros/platforms/ios): [Canvas](https://apilium.com/en/doc/mayros/platforms/mac/canvas), [Voice Wake](https://apilium.com/en/doc/mayros/nodes/voicewake), [Talk Mode](https://apilium.com/en/doc/mayros/nodes/talk), camera, screen recording, Bonjour pairing.
-- [Android node](https://apilium.com/en/doc/mayros/platforms/android): [Canvas](https://apilium.com/en/doc/mayros/platforms/mac/canvas), [Talk Mode](https://apilium.com/en/doc/mayros/nodes/talk), camera, screen recording, optional SMS.
-- [macOS node mode](https://apilium.com/en/doc/mayros/nodes): system.run/notify + canvas/camera exposure.
-
-### Tools + automation
+29 hook types across the assistant lifecycle:
-- [Browser control](https://apilium.com/en/doc/mayros/tools/browser): dedicated mayros Chrome/Chromium, snapshots, actions, uploads, profiles.
-- [Canvas](https://apilium.com/en/doc/mayros/platforms/mac/canvas): [A2UI](https://apilium.com/en/doc/mayros/platforms/mac/canvas#canvas-a2ui) push/reset, eval, snapshot.
-- [Nodes](https://apilium.com/en/doc/mayros/nodes): camera snap/clip, screen record, [location.get](https://apilium.com/en/doc/mayros/nodes/location-command), notifications.
-- [Cron + wakeups](https://apilium.com/en/doc/mayros/automation/cron-jobs); [webhooks](https://apilium.com/en/doc/mayros/automation/webhook); [Gmail Pub/Sub](https://apilium.com/en/doc/mayros/automation/gmail-pubsub).
-- [Skills platform](https://apilium.com/en/doc/mayros/tools/skills): bundled, managed, and workspace skills with install gating + UI.
+- **Lifecycle** — `before_prompt_build`, `after_response`, `before_compaction`, `agent_end`, etc.
+- **Security** — `permission_request` (modifying: allow/deny/ask), `config_change`
+- **Coordination** — `teammate_idle`, `task_completed`, `notification`
+- **HTTP webhooks** — POST delivery with HMAC-SHA256 signatures, retry + exponential backoff
+- **Async queue** — background execution with concurrency limits and dead-letter queue
+- **Markdown hooks** — place `.md` files in `~/.mayros/hooks/` for custom logic
-### Runtime + safety
+---
-- [Retry policy](https://apilium.com/en/doc/mayros/concepts/retry) and [streaming/chunking](https://apilium.com/en/doc/mayros/concepts/streaming).
-- [Presence](https://apilium.com/en/doc/mayros/concepts/presence), [typing indicators](https://apilium.com/en/doc/mayros/concepts/typing-indicators), and [usage tracking](https://apilium.com/en/doc/mayros/concepts/usage-tracking).
-- [Models](https://apilium.com/en/doc/mayros/concepts/models), [model failover](https://apilium.com/en/doc/mayros/concepts/model-failover), and [session pruning](https://apilium.com/en/doc/mayros/concepts/session-pruning).
-- [Security](https://apilium.com/en/doc/mayros/gateway/security) and [troubleshooting](https://apilium.com/en/doc/mayros/channels/troubleshooting).
+## Security (18 layers)
-### Ops + packaging
+Mayros takes security seriously. 18 layers of defense:
-- [Control UI](https://apilium.com/en/doc/mayros/web) + [WebChat](https://apilium.com/en/doc/mayros/web/webchat) served directly from the Gateway.
-- [Tailscale Serve/Funnel](https://apilium.com/en/doc/mayros/gateway/tailscale) or [SSH tunnels](https://apilium.com/en/doc/mayros/gateway/remote) with token/password auth.
-- [Docker](https://apilium.com/en/doc/mayros/install/docker)-based installs.
-- [Doctor](https://apilium.com/en/doc/mayros/gateway/doctor) migrations, [logging](https://apilium.com/en/doc/mayros/logging).
+| Layer | Description |
+| --------------------------- | --------------------------------------------------------------- |
+| QuickJS WASM Sandbox | Skills run in isolated WASM — no fs, net, process, eval |
+| Static Scanner | 16 rules + anti-evasion preprocessing |
+| Enrichment Sanitizer | Unicode normalization, injection detection, depth limits |
+| Bash Sandbox | Command parsing, domain blocklist, audit logging |
+| Interactive Permissions | Runtime dialogs, intent classification, policy store |
+| Namespace Isolation | All queries forced to `{ns}:` prefix — no cross-namespace reads |
+| Tool Allowlist | Intersection model — ALL active skills must allow a tool |
+| Rate Limiter | Sliding window per skill (default: 60 calls/min) |
+| Query/Write Limits | Per-skill caps on graph reads and writes |
+| Enrichment Timeout | 2s timeout prevents DoS via slow enrichment |
+| Hot-Reload Validation | Atomic swap, manifest validation, downgrade blocking |
+| Path Traversal Protection | Reject `..` + `isPathInside()` double-check |
+| Verify-then-Promote | Temp extract → verify hashes → atomic promote |
+| Circuit Breaker | 3-state (closed/open/half-open) + exponential backoff |
+| DM Pairing | Unknown senders get pairing code, not access |
+| Audit Logging | Skill name + operation tagged on all sandbox writes |
+| Docker Sandboxing | Per-session Docker containers for non-main sessions |
+| Enterprise Managed Settings | Enforced config overrides with locked keys |
-### Developer tools
+---
-- Terminal UI (`mayros code`) with themes, vim mode, slash commands, image paste, and headless mode (`mayros -p`).
-- VSCode and JetBrains IDE plugins connected via Gateway WebSocket.
-- Trace CLI (`mayros trace`), plan CLI (`mayros plan`), knowledge graph CLI (`mayros kg`).
+## Models
-### Agent coordination
+Mayros is multi-model. Bring any provider.
-- Teams, workflows, agent mailbox, background task tracker.
-- Session fork/rewind for checkpoint-based exploration.
-- Rules engine with hierarchical Cortex-backed rules.
-- Agent persistent memory and contextual awareness notifications.
+- Models config + CLI: **[Models](https://apilium.com/en/doc/mayros/concepts/models)**
+- Auth profile rotation (OAuth vs API keys) + fallbacks: **[Model failover](https://apilium.com/en/doc/mayros/concepts/model-failover)**
-### Security layers
+Minimal config:
-- 18-layer security architecture: QuickJS WASM sandbox, static scanner (16 rules), enrichment sanitizer, bash sandbox, interactive permissions, namespace isolation, tool allowlist (intersection model), rate limiter, query/write limits, enrichment timeout, hot-reload validation, path traversal protection, verify-then-promote, circuit breaker, audit logging, and more.
-
-## How it works (short)
-
-```
-WhatsApp / Telegram / Slack / Discord / Google Chat / Signal / iMessage / BlueBubbles / Microsoft Teams / Matrix / Zalo / Zalo Personal / WebChat
- │
- ▼
-┌───────────────────────────────┐
-│ Gateway │
-│ (control plane) │
-│ ws://127.0.0.1:18789 │
-└──────────────┬────────────────┘
- │
- ├─ TUI (mayros code)
- ├─ VSCode / JetBrains
- ├─ Pi agent (RPC)
- ├─ CLI (mayros …)
- ├─ WebChat UI
- ├─ macOS app
- └─ iOS / Android nodes
+```json5
+{
+ agent: {
+ model: "anthropic/claude-opus-4-6",
+ },
+}
```
-## Key subsystems
+Full reference: **[Configuration](https://apilium.com/en/doc/mayros/gateway/configuration)**
-- **[Gateway WebSocket network](https://apilium.com/en/doc/mayros/concepts/architecture)** — single WS control plane for clients, tools, and events (plus ops: [Gateway runbook](https://apilium.com/en/doc/mayros/gateway)).
-- **[Tailscale exposure](https://apilium.com/en/doc/mayros/gateway/tailscale)** — Serve/Funnel for the Gateway dashboard + WS (remote access: [Remote](https://apilium.com/en/doc/mayros/gateway/remote)).
-- **[Browser control](https://apilium.com/en/doc/mayros/tools/browser)** — mayros‑managed Chrome/Chromium with CDP control.
-- **[Canvas + A2UI](https://apilium.com/en/doc/mayros/platforms/mac/canvas)** — agent‑driven visual workspace (A2UI host: [Canvas/A2UI](https://apilium.com/en/doc/mayros/platforms/mac/canvas#canvas-a2ui)).
-- **[Voice Wake](https://apilium.com/en/doc/mayros/nodes/voicewake) + [Talk Mode](https://apilium.com/en/doc/mayros/nodes/talk)** — always‑on speech and continuous conversation.
-- **[Nodes](https://apilium.com/en/doc/mayros/nodes)** — Canvas, camera snap/clip, screen record, `location.get`, notifications, plus macOS‑only `system.run`/`system.notify`.
+---
-## Tailscale access (Gateway dashboard)
-
-Mayros can auto-configure Tailscale **Serve** (tailnet-only) or **Funnel** (public) while the Gateway stays bound to loopback. Configure `gateway.tailscale.mode`:
-
-- `off`: no Tailscale automation (default).
-- `serve`: tailnet-only HTTPS via `tailscale serve` (uses Tailscale identity headers by default).
-- `funnel`: public HTTPS via `tailscale funnel` (requires shared password auth).
-
-Notes:
-
-- `gateway.bind` must stay `loopback` when Serve/Funnel is enabled (Mayros enforces this).
-- Serve can be forced to require a password by setting `gateway.auth.mode: "password"` or `gateway.auth.allowTailscale: false`.
-- Funnel refuses to start unless `gateway.auth.mode: "password"` is set.
-- Optional: `gateway.tailscale.resetOnExit` to undo Serve/Funnel on shutdown.
-
-Details: [Tailscale guide](https://apilium.com/en/doc/mayros/gateway/tailscale) · [Web surfaces](https://apilium.com/en/doc/mayros/web)
-
-## Remote Gateway (Linux is great)
+## Plan Mode
-It’s perfectly fine to run the Gateway on a small Linux instance. Clients (macOS app, CLI, WebChat) can connect over **Tailscale Serve/Funnel** or **SSH tunnels**, and you can still pair device nodes (macOS/iOS/Android) to execute device‑local actions when needed.
+Cortex-backed semantic planning for complex multi-step tasks.
-- **Gateway host** runs the exec tool and channel connections by default.
-- **Device nodes** run device‑local actions (`system.run`, camera, screen recording, notifications) via `node.invoke`.
- In short: exec runs where the Gateway lives; device actions run where the device lives.
+- **Explore** — gather context from the codebase and Cortex graph
+- **Assert** — declare facts and constraints the plan must satisfy
+- **Approve** — review the plan before execution
+- **Execute** — run the approved plan with progress tracking
-Details: [Remote access](https://apilium.com/en/doc/mayros/gateway/remote) · [Nodes](https://apilium.com/en/doc/mayros/nodes) · [Security](https://apilium.com/en/doc/mayros/gateway/security)
+CLI: `mayros plan start|explore|assert|show|approve|execute|done|list|status` · TUI: `/plan`
-## macOS permissions via the Gateway protocol
+---
-The macOS app can run in **node mode** and advertises its capabilities + permission map over the Gateway WebSocket (`node.list` / `node.describe`). Clients can then execute local actions via `node.invoke`:
+## Remote Gateway
-- `system.run` runs a local command and returns stdout/stderr/exit code; set `needsScreenRecording: true` to require screen-recording permission (otherwise you’ll get `PERMISSION_MISSING`).
-- `system.notify` posts a user notification and fails if notifications are denied.
-- `canvas.*`, `camera.*`, `screen.record`, and `location.get` are also routed via `node.invoke` and follow TCC permission status.
+Run the Gateway on a small Linux instance. Clients connect over **Tailscale Serve/Funnel** or **SSH tunnels**, and device nodes (macOS/iOS/Android) handle local actions via `node.invoke`.
-Elevated bash (host permissions) is separate from macOS TCC:
+Tailscale modes: `off` (default) · `serve` (tailnet-only HTTPS) · `funnel` (public HTTPS, requires password auth).
-- Use `/elevated on|off` to toggle per‑session elevated access when enabled + allowlisted.
-- Gateway persists the per‑session toggle via `sessions.patch` (WS method) alongside `thinkingLevel`, `verboseLevel`, `model`, `sendPolicy`, and `groupActivation`.
+Details: **[Remote access](https://apilium.com/en/doc/mayros/gateway/remote)** · **[Tailscale guide](https://apilium.com/en/doc/mayros/gateway/tailscale)** · **[Docker](https://apilium.com/en/doc/mayros/install/docker)**
-Details: [Nodes](https://apilium.com/en/doc/mayros/nodes) · [macOS app](https://apilium.com/en/doc/mayros/platforms/macos) · [Gateway protocol](https://apilium.com/en/doc/mayros/concepts/architecture)
+---
-## Agent to Agent (sessions\_\* tools)
+## Chat Commands (Channels)
-- Use these to coordinate work across sessions without jumping between chat surfaces.
-- `sessions_list` — discover active sessions (agents) and their metadata.
-- `sessions_history` — fetch transcript logs for a session.
-- `sessions_send` — message another session; optional reply‑back ping‑pong + announce step (`REPLY_SKIP`, `ANNOUNCE_SKIP`).
+Send these in WhatsApp/Telegram/Slack/Discord/Google Chat/Microsoft Teams/WebChat:
-Details: [Session tools](https://apilium.com/en/doc/mayros/concepts/session-tool)
+| Command | Description |
+| ----------------------------- | ------------------------------------------------------ |
+| `/status` | Session status (model, tokens, cost) |
+| `/new`, `/reset` | Reset the session |
+| `/compact` | Compact session context |
+| `/think ` | Set thinking level (off/minimal/low/medium/high/xhigh) |
+| `/verbose on\|off` | Toggle verbose mode |
+| `/usage off\|tokens\|full` | Per-response usage footer |
+| `/restart` | Restart the gateway (owner-only) |
+| `/activation mention\|always` | Group activation (groups only) |
-## Skills registry (Skills Hub)
+---
-Skills Hub is a minimal skill registry. With Skills Hub enabled, the agent can search for skills automatically and pull in new ones as needed.
+## From Source
-[Skills Hub](https://hub.apilium.com)
+```bash
+git clone https://github.com/ApiliumCode/mayros.git
+cd mayros
-## Chat commands
+pnpm install
+pnpm ui:build # auto-installs UI deps on first run
+pnpm build
-The Terminal UI (`mayros code`) supports 30+ slash commands — run `/help` for the full list.
+pnpm mayros onboard --install-daemon
-Send these in WhatsApp/Telegram/Slack/Google Chat/Microsoft Teams/WebChat (group commands are owner-only):
+# Dev loop (auto-reload)
+pnpm gateway:watch
+```
-- `/status` — compact session status (model + tokens, cost when available)
-- `/new` or `/reset` — reset the session
-- `/compact` — compact session context (summary)
-- `/think ` — off|minimal|low|medium|high|xhigh (GPT-5.2 + Codex models only)
-- `/verbose on|off`
-- `/usage off|tokens|full` — per-response usage footer
-- `/restart` — restart the gateway (owner-only in groups)
-- `/activation mention|always` — group activation toggle (groups only)
+`pnpm mayros ...` runs TypeScript directly (via `tsx`). `pnpm build` produces `dist/`.
-## Apps (optional)
+**Development channels:**
-The Gateway alone delivers a great experience. All apps are optional and add extra features.
+- **stable** — tagged releases, npm dist-tag `latest`
+- **beta** — prerelease tags, npm dist-tag `beta`
+- **dev** — moving head of `main`, npm dist-tag `dev`
-If you plan to build/run companion apps, follow the platform runbooks below.
+Switch: `mayros update --channel stable|beta|dev`. Details: **[Development channels](https://apilium.com/en/doc/mayros/install/development-channels)**
-### macOS (Mayros.app) (optional)
+---
-- Menu bar control for the Gateway and health.
-- Voice Wake + push-to-talk overlay.
-- WebChat + debug tools.
-- Remote gateway control over SSH.
+## Skills Hub
-Note: signed builds required for macOS permissions to stick across rebuilds (see `docs/mac/permissions.md`).
+[Skills Hub](https://hub.apilium.com) is a skill marketplace. With it enabled, the agent can search for skills automatically and pull in new ones.
-### iOS node (optional)
+- Workspace root: `~/.mayros/workspace`
+- Skills: `~/.mayros/workspace/skills//SKILL.md`
+- Injected prompt files: `AGENTS.md`, `SOUL.md`, `TOOLS.md`
-- Pairs as a node via the Bridge.
-- Voice trigger forwarding + Canvas surface.
-- Controlled via `mayros nodes …`.
+---
-Runbook: [iOS connect](https://apilium.com/en/doc/mayros/platforms/ios).
+## Channel Setup
-### Android node (optional)
+
+WhatsApp
-- Pairs via the same Bridge + pairing flow as iOS.
-- Exposes Canvas, Camera, and Screen capture commands.
-- Runbook: [Android connect](https://apilium.com/en/doc/mayros/platforms/android).
+- Link the device: `pnpm mayros channels login` (stores creds in `~/.mayros/credentials`)
+- Allowlist: `channels.whatsapp.allowFrom`
+- Groups: `channels.whatsapp.groups` (include `"*"` to allow all)
-## Agent workspace + skills
+[Full guide →](https://apilium.com/en/doc/mayros/channels/whatsapp)
-- Workspace root: `~/.mayros/workspace` (configurable via `agents.defaults.workspace`).
-- Injected prompt files: `AGENTS.md`, `SOUL.md`, `TOOLS.md`.
-- Skills: `~/.mayros/workspace/skills//SKILL.md`.
+
-## Configuration
+
+Telegram
-Minimal `~/.mayros/mayros.json` (model + defaults):
+Set `TELEGRAM_BOT_TOKEN` or `channels.telegram.botToken`:
```json5
-{
- agent: {
- model: "anthropic/claude-opus-4-6",
- },
-}
+{ channels: { telegram: { botToken: "123456:ABCDEF" } } }
```
-[Full configuration reference (all keys + examples).](https://apilium.com/en/doc/mayros/gateway/configuration)
+[Full guide →](https://apilium.com/en/doc/mayros/channels/telegram)
-## Security model (important)
+
-- **Default:** tools run on the host for the **main** session, so the agent has full access when it’s just you.
-- **Group/channel safety:** set `agents.defaults.sandbox.mode: "non-main"` to run **non‑main sessions** (groups/channels) inside per‑session Docker sandboxes; bash then runs in Docker for those sessions.
-- **Sandbox defaults:** allowlist `bash`, `process`, `read`, `write`, `edit`, `sessions_list`, `sessions_history`, `sessions_send`, `sessions_spawn`; denylist `browser`, `canvas`, `nodes`, `cron`, `discord`, `gateway`.
+
+Slack
-Details: [Security guide](https://apilium.com/en/doc/mayros/gateway/security) · [Docker + sandboxing](https://apilium.com/en/doc/mayros/install/docker) · [Sandbox config](https://apilium.com/en/doc/mayros/gateway/configuration)
+Set `SLACK_BOT_TOKEN` + `SLACK_APP_TOKEN` (or config equivalents).
-### [WhatsApp](https://apilium.com/en/doc/mayros/channels/whatsapp)
+[Full guide →](https://apilium.com/en/doc/mayros/channels/slack)
-- Link the device: `pnpm mayros channels login` (stores creds in `~/.mayros/credentials`).
-- Allowlist who can talk to the assistant via `channels.whatsapp.allowFrom`.
-- If `channels.whatsapp.groups` is set, it becomes a group allowlist; include `"*"` to allow all.
+
-### [Telegram](https://apilium.com/en/doc/mayros/channels/telegram)
+
+Discord
-- Set `TELEGRAM_BOT_TOKEN` or `channels.telegram.botToken` (env wins).
-- Optional: set `channels.telegram.groups` (with `channels.telegram.groups."*".requireMention`); when set, it is a group allowlist (include `"*"` to allow all). Also `channels.telegram.allowFrom` or `channels.telegram.webhookUrl` + `channels.telegram.webhookSecret` as needed.
+Set `DISCORD_BOT_TOKEN` or `channels.discord.token`:
```json5
-{
- channels: {
- telegram: {
- botToken: "123456:ABCDEF",
- },
- },
-}
+{ channels: { discord: { token: "1234abcd" } } }
```
-### [Slack](https://apilium.com/en/doc/mayros/channels/slack)
+[Full guide →](https://apilium.com/en/doc/mayros/channels/discord)
-- Set `SLACK_BOT_TOKEN` + `SLACK_APP_TOKEN` (or `channels.slack.botToken` + `channels.slack.appToken`).
+
-### [Discord](https://apilium.com/en/doc/mayros/channels/discord)
+
+Signal · BlueBubbles · iMessage · Teams · Matrix · Zalo · WebChat
-- Set `DISCORD_BOT_TOKEN` or `channels.discord.token` (env wins).
-- Optional: set `commands.native`, `commands.text`, or `commands.useAccessGroups`, plus `channels.discord.allowFrom`, `channels.discord.guilds`, or `channels.discord.mediaMaxMb` as needed.
+- **Signal** — requires `signal-cli` + config section
+- **BlueBubbles** (recommended iMessage) — `channels.bluebubbles.serverUrl` + `password` + webhook
+- **iMessage** (legacy) — macOS-only via `imsg`
+- **Microsoft Teams** — Bot Framework app + `msteams` config
+- **Matrix** — `matrix-js-sdk` extension
+- **Zalo / Zalo Personal** — extension channels
+- **WebChat** — uses Gateway WebSocket directly
-```json5
-{
- channels: {
- discord: {
- token: "1234abcd",
- },
- },
-}
-```
+[Channel docs →](https://apilium.com/en/doc/mayros/channels)
-### [Signal](https://apilium.com/en/doc/mayros/channels/signal)
+
-- Requires `signal-cli` and a `channels.signal` config section.
+---
-### [BlueBubbles (iMessage)](https://apilium.com/en/doc/mayros/channels/bluebubbles)
+## Documentation
-- **Recommended** iMessage integration.
-- Configure `channels.bluebubbles.serverUrl` + `channels.bluebubbles.password` and a webhook (`channels.bluebubbles.webhookPath`).
-- The BlueBubbles server runs on macOS; the Gateway can run on macOS or elsewhere.
+**Start here:**
-### [iMessage (legacy)](https://apilium.com/en/doc/mayros/channels/imessage)
+- [Getting started](https://apilium.com/en/doc/mayros/start/getting-started) — first-time setup
+- [Architecture](https://apilium.com/en/doc/mayros/concepts/architecture) — gateway + protocol model
+- [Configuration](https://apilium.com/en/doc/mayros/gateway/configuration) — every key + examples
+- [Security](https://apilium.com/en/doc/mayros/gateway/security) — security model and guidance
-- Legacy macOS-only integration via `imsg` (Messages must be signed in).
-- If `channels.imessage.groups` is set, it becomes a group allowlist; include `"*"` to allow all.
+**Platform guides:**
-### [Microsoft Teams](https://apilium.com/en/doc/mayros/channels/msteams)
+[macOS](https://apilium.com/en/doc/mayros/platforms/macos) · [iOS](https://apilium.com/en/doc/mayros/platforms/ios) · [Android](https://apilium.com/en/doc/mayros/platforms/android) · [Linux](https://apilium.com/en/doc/mayros/platforms/linux) · [Windows (WSL2)](https://apilium.com/en/doc/mayros/platforms/windows)
-- Configure a Teams app + Bot Framework, then add a `msteams` config section.
-- Allowlist who can talk via `msteams.allowFrom`; group access via `msteams.groupAllowFrom` or `msteams.groupPolicy: "open"`.
+**Operations:**
-### [WebChat](https://apilium.com/en/doc/mayros/web/webchat)
+[Gateway runbook](https://apilium.com/en/doc/mayros/gateway) · [Docker](https://apilium.com/en/doc/mayros/install/docker) · [Health checks](https://apilium.com/en/doc/mayros/gateway/health) · [Doctor](https://apilium.com/en/doc/mayros/gateway/doctor) · [Logging](https://apilium.com/en/doc/mayros/logging) · [Troubleshooting](https://apilium.com/en/doc/mayros/channels/troubleshooting)
-- Uses the Gateway WebSocket; no separate WebChat port/config.
+**Deep dives:**
-Browser control (optional):
+[Agent loop](https://apilium.com/en/doc/mayros/concepts/agent-loop) · [Sessions](https://apilium.com/en/doc/mayros/concepts/session) · [Models](https://apilium.com/en/doc/mayros/concepts/models) · [Presence](https://apilium.com/en/doc/mayros/concepts/presence) · [Streaming](https://apilium.com/en/doc/mayros/concepts/streaming) · [Skills](https://apilium.com/en/doc/mayros/tools/skills) · [Browser](https://apilium.com/en/doc/mayros/tools/browser) · [Canvas](https://apilium.com/en/doc/mayros/platforms/mac/canvas) · [Nodes](https://apilium.com/en/doc/mayros/nodes) · [Cron](https://apilium.com/en/doc/mayros/automation/cron-jobs) · [Webhooks](https://apilium.com/en/doc/mayros/automation/webhook) · [Gmail Pub/Sub](https://apilium.com/en/doc/mayros/automation/gmail-pubsub)
-```json5
-{
- browser: {
- enabled: true,
- color: "#FF4500",
- },
-}
-```
+**Advanced:**
+
+[Discovery + transports](https://apilium.com/en/doc/mayros/gateway/discovery) · [Bonjour/mDNS](https://apilium.com/en/doc/mayros/gateway/bonjour) · [Gateway pairing](https://apilium.com/en/doc/mayros/gateway/pairing) · [Tailscale](https://apilium.com/en/doc/mayros/gateway/tailscale) · [Remote gateway](https://apilium.com/en/doc/mayros/gateway/remote) · [Control UI](https://apilium.com/en/doc/mayros/web/control-ui) · [RPC adapters](https://apilium.com/en/doc/mayros/reference/rpc) · [TypeBox schemas](https://apilium.com/en/doc/mayros/concepts/typebox)
+
+**Templates:**
+
+[AGENTS](https://apilium.com/en/doc/mayros/reference/templates/AGENTS) · [BOOTSTRAP](https://apilium.com/en/doc/mayros/reference/templates/BOOTSTRAP) · [IDENTITY](https://apilium.com/en/doc/mayros/reference/templates/IDENTITY) · [TOOLS](https://apilium.com/en/doc/mayros/reference/templates/TOOLS) · [USER](https://apilium.com/en/doc/mayros/reference/templates/USER) · [Default AGENTS](https://apilium.com/en/doc/mayros/reference/AGENTS.default) · [Skills config](https://apilium.com/en/doc/mayros/tools/skills-config)
-## Docs
-
-Use these when you’re past the onboarding flow and want the deeper reference.
-
-- [Start with the docs index for navigation and “what’s where.”](https://apilium.com/en/doc/mayros)
-- [Read the architecture overview for the gateway + protocol model.](https://apilium.com/en/doc/mayros/concepts/architecture)
-- [Use the full configuration reference when you need every key and example.](https://apilium.com/en/doc/mayros/gateway/configuration)
-- [Run the Gateway by the book with the operational runbook.](https://apilium.com/en/doc/mayros/gateway)
-- [Learn how the Control UI/Web surfaces work and how to expose them safely.](https://apilium.com/en/doc/mayros/web)
-- [Understand remote access over SSH tunnels or tailnets.](https://apilium.com/en/doc/mayros/gateway/remote)
-- [Follow the onboarding wizard flow for a guided setup.](https://apilium.com/en/doc/mayros/start/wizard)
-- [Wire external triggers via the webhook surface.](https://apilium.com/en/doc/mayros/automation/webhook)
-- [Set up Gmail Pub/Sub triggers.](https://apilium.com/en/doc/mayros/automation/gmail-pubsub)
-- [Learn the macOS menu bar companion details.](https://apilium.com/en/doc/mayros/platforms/mac/menu-bar)
-- [Platform guides: Windows (WSL2)](https://apilium.com/en/doc/mayros/platforms/windows), [Linux](https://apilium.com/en/doc/mayros/platforms/linux), [macOS](https://apilium.com/en/doc/mayros/platforms/macos), [iOS](https://apilium.com/en/doc/mayros/platforms/ios), [Android](https://apilium.com/en/doc/mayros/platforms/android)
-- [Debug common failures with the troubleshooting guide.](https://apilium.com/en/doc/mayros/channels/troubleshooting)
-- [Review security guidance before exposing anything.](https://apilium.com/en/doc/mayros/gateway/security)
-
-## Advanced docs (discovery + control)
-
-- [Discovery + transports](https://apilium.com/en/doc/mayros/gateway/discovery)
-- [Bonjour/mDNS](https://apilium.com/en/doc/mayros/gateway/bonjour)
-- [Gateway pairing](https://apilium.com/en/doc/mayros/gateway/pairing)
-- [Remote gateway README](https://apilium.com/en/doc/mayros/gateway/remote-gateway-readme)
-- [Control UI](https://apilium.com/en/doc/mayros/web/control-ui)
-- [Dashboard](https://apilium.com/en/doc/mayros/web/dashboard)
-
-## Operations & troubleshooting
-
-- [Health checks](https://apilium.com/en/doc/mayros/gateway/health)
-- [Gateway lock](https://apilium.com/en/doc/mayros/gateway/gateway-lock)
-- [Background process](https://apilium.com/en/doc/mayros/gateway/background-process)
-- [Browser troubleshooting (Linux)](https://apilium.com/en/doc/mayros/tools/browser-linux-troubleshooting)
-- [Logging](https://apilium.com/en/doc/mayros/logging)
-
-## Deep dives
-
-- [Agent loop](https://apilium.com/en/doc/mayros/concepts/agent-loop)
-- [Presence](https://apilium.com/en/doc/mayros/concepts/presence)
-- [TypeBox schemas](https://apilium.com/en/doc/mayros/concepts/typebox)
-- [RPC adapters](https://apilium.com/en/doc/mayros/reference/rpc)
-- [Queue](https://apilium.com/en/doc/mayros/concepts/queue)
-
-## Workspace & skills
-
-- [Skills config](https://apilium.com/en/doc/mayros/tools/skills-config)
-- [Default AGENTS](https://apilium.com/en/doc/mayros/reference/AGENTS.default)
-- [Templates: AGENTS](https://apilium.com/en/doc/mayros/reference/templates/AGENTS)
-- [Templates: BOOTSTRAP](https://apilium.com/en/doc/mayros/reference/templates/BOOTSTRAP)
-- [Templates: IDENTITY](https://apilium.com/en/doc/mayros/reference/templates/IDENTITY)
-- [Templates: TOOLS](https://apilium.com/en/doc/mayros/reference/templates/TOOLS)
-- [Templates: USER](https://apilium.com/en/doc/mayros/reference/templates/USER)
-
-## Platform internals
-
-- [macOS dev setup](https://apilium.com/en/doc/mayros/platforms/mac/dev-setup)
-- [macOS menu bar](https://apilium.com/en/doc/mayros/platforms/mac/menu-bar)
-- [macOS voice wake](https://apilium.com/en/doc/mayros/platforms/mac/voicewake)
-- [iOS node](https://apilium.com/en/doc/mayros/platforms/ios)
-- [Android node](https://apilium.com/en/doc/mayros/platforms/android)
-- [Windows (WSL2)](https://apilium.com/en/doc/mayros/platforms/windows)
-- [Linux app](https://apilium.com/en/doc/mayros/platforms/linux)
-
-## Email hooks (Gmail)
-
-- [apilium.com/en/doc/mayros/gmail-pubsub](https://apilium.com/en/doc/mayros/automation/gmail-pubsub)
+---
## Community
diff --git a/apps/android/app/src/main/java/ai/mayros/android/voice/TalkModeManager.kt b/apps/android/app/src/main/java/ai/mayros/android/voice/TalkModeManager.kt
index 317bd51e..e92dca67 100644
--- a/apps/android/app/src/main/java/ai/mayros/android/voice/TalkModeManager.kt
+++ b/apps/android/app/src/main/java/ai/mayros/android/voice/TalkModeManager.kt
@@ -523,7 +523,15 @@ class TalkModeManager(
language = TalkModeRuntime.validatedLanguage(directive?.language),
latencyTier = TalkModeRuntime.validatedLatencyTier(directive?.latencyTier),
)
- streamAndPlay(voiceId = voiceId!!, apiKey = apiKey!!, request = request)
+ val safeVoiceId = voiceId ?: run {
+ Log.w(tag, "voiceId became null after check, cannot stream")
+ return
+ }
+ val safeApiKey = apiKey ?: run {
+ Log.w(tag, "apiKey became null after check, cannot stream")
+ return
+ }
+ streamAndPlay(voiceId = safeVoiceId, apiKey = safeApiKey, request = request)
Log.d(tag, "elevenlabs stream ok durMs=${SystemClock.elapsedRealtime() - ttsStarted}")
}
} catch (err: Throwable) {
diff --git a/apps/macos/Sources/Mayros/LaunchdManager.swift b/apps/macos/Sources/Mayros/LaunchdManager.swift
index b9126a07..419a0449 100644
--- a/apps/macos/Sources/Mayros/LaunchdManager.swift
+++ b/apps/macos/Sources/Mayros/LaunchdManager.swift
@@ -5,7 +5,11 @@ enum LaunchdManager {
let process = Process()
process.launchPath = "/bin/launchctl"
process.arguments = args
- try? process.run()
+ do {
+ try process.run()
+ } catch {
+ NSLog("LaunchdManager: launchctl %@ failed: %@", args.joined(separator: " "), error.localizedDescription)
+ }
}
static func startMayros() {
diff --git a/apps/macos/Sources/MayrosDiscovery/TailscaleNetwork.swift b/apps/macos/Sources/MayrosDiscovery/TailscaleNetwork.swift
index ef78e6f4..bff35fd9 100644
--- a/apps/macos/Sources/MayrosDiscovery/TailscaleNetwork.swift
+++ b/apps/macos/Sources/MayrosDiscovery/TailscaleNetwork.swift
@@ -21,14 +21,15 @@ public enum TailscaleNetwork {
let flags = Int32(ptr.pointee.ifa_flags)
let isUp = (flags & IFF_UP) != 0
let isLoopback = (flags & IFF_LOOPBACK) != 0
- let family = ptr.pointee.ifa_addr.pointee.sa_family
+ guard let addrPtr = ptr.pointee.ifa_addr else { continue }
+ let family = addrPtr.pointee.sa_family
if !isUp || isLoopback || family != UInt8(AF_INET) { continue }
- var addr = ptr.pointee.ifa_addr.pointee
+ var addr = addrPtr.pointee
var buffer = [CChar](repeating: 0, count: Int(NI_MAXHOST))
let result = getnameinfo(
&addr,
- socklen_t(ptr.pointee.ifa_addr.pointee.sa_len),
+ socklen_t(addrPtr.pointee.sa_len),
&buffer,
socklen_t(buffer.count),
nil,
diff --git a/apps/macos/Sources/MayrosDiscovery/WideAreaGatewayDiscovery.swift b/apps/macos/Sources/MayrosDiscovery/WideAreaGatewayDiscovery.swift
index 8ac6ecf1..fbf49906 100644
--- a/apps/macos/Sources/MayrosDiscovery/WideAreaGatewayDiscovery.swift
+++ b/apps/macos/Sources/MayrosDiscovery/WideAreaGatewayDiscovery.swift
@@ -240,7 +240,7 @@ enum WideAreaGatewayDiscovery {
}
process.waitUntilExit()
- let data = (try? outPipe.fileHandleForReading.readToEnd()) ?? Data()
+ let data = outPipe.fileHandleForReading.readToEndSafely()
let output = String(data: data, encoding: .utf8)?.trimmingCharacters(in: .whitespacesAndNewlines)
return output?.isEmpty == false ? output : nil
}
diff --git a/apps/macos/Tests/MayrosIPCTests/Placeholder.swift b/apps/macos/Tests/MayrosIPCTests/Placeholder.swift
deleted file mode 100644
index 14e5c056..00000000
--- a/apps/macos/Tests/MayrosIPCTests/Placeholder.swift
+++ /dev/null
@@ -1,7 +0,0 @@
-import Testing
-
-@Suite struct PlaceholderTests {
- @Test func placeholder() {
- #expect(true)
- }
-}
diff --git a/docs/assets/mayros-coding-cli-terminal-interface.png b/docs/assets/mayros-coding-cli-terminal-interface.png
new file mode 100644
index 00000000..0d8969ea
Binary files /dev/null and b/docs/assets/mayros-coding-cli-terminal-interface.png differ
diff --git a/docs/experiments/onboarding-config-protocol.md b/docs/experiments/onboarding-config-protocol.md
deleted file mode 100644
index 648d24b5..00000000
--- a/docs/experiments/onboarding-config-protocol.md
+++ /dev/null
@@ -1,40 +0,0 @@
----
-summary: "RPC protocol notes for onboarding wizard and config schema"
-read_when: "Changing onboarding wizard steps or config schema endpoints"
-title: "Onboarding and Config Protocol"
----
-
-# Onboarding + Config Protocol
-
-Purpose: shared onboarding + config surfaces across CLI, macOS app, and Web UI.
-
-## Components
-
-- Wizard engine (shared session + prompts + onboarding state).
-- CLI onboarding uses the same wizard flow as the UI clients.
-- Gateway RPC exposes wizard + config schema endpoints.
-- macOS onboarding uses the wizard step model.
-- Web UI renders config forms from JSON Schema + UI hints.
-
-## Gateway RPC
-
-- `wizard.start` params: `{ mode?: "local"|"remote", workspace?: string }`
-- `wizard.next` params: `{ sessionId, answer?: { stepId, value? } }`
-- `wizard.cancel` params: `{ sessionId }`
-- `wizard.status` params: `{ sessionId }`
-- `config.schema` params: `{}`
-
-Responses (shape)
-
-- Wizard: `{ sessionId, done, step?, status?, error? }`
-- Config schema: `{ schema, uiHints, version, generatedAt }`
-
-## UI Hints
-
-- `uiHints` keyed by path; optional metadata (label/help/group/order/advanced/sensitive/placeholder).
-- Sensitive fields render as password inputs; no redaction layer.
-- Unsupported schema nodes fall back to the raw JSON editor.
-
-## Notes
-
-- This doc is the single place to track protocol refactors for onboarding/config.
diff --git a/docs/experiments/plans/browser-evaluate-cdp-refactor.md b/docs/experiments/plans/browser-evaluate-cdp-refactor.md
deleted file mode 100644
index 21b4aa1b..00000000
--- a/docs/experiments/plans/browser-evaluate-cdp-refactor.md
+++ /dev/null
@@ -1,229 +0,0 @@
----
-summary: "Plan: isolate browser act:evaluate from Playwright queue using CDP, with end-to-end deadlines and safer ref resolution"
-owner: "mayros"
-status: "draft"
-last_updated: "2026-02-10"
-title: "Browser Evaluate CDP Refactor"
----
-
-# Browser Evaluate CDP Refactor Plan
-
-## Context
-
-`act:evaluate` executes user provided JavaScript in the page. Today it runs via Playwright
-(`page.evaluate` or `locator.evaluate`). Playwright serializes CDP commands per page, so a
-stuck or long running evaluate can block the page command queue and make every later action
-on that tab look "stuck".
-
-PR #13498 adds a pragmatic safety net (bounded evaluate, abort propagation, and best-effort
-recovery). This document describes a larger refactor that makes `act:evaluate` inherently
-isolated from Playwright so a stuck evaluate cannot wedge normal Playwright operations.
-
-## Goals
-
-- `act:evaluate` cannot permanently block later browser actions on the same tab.
-- Timeouts are single source of truth end to end so a caller can rely on a budget.
-- Abort and timeout are treated the same way across HTTP and in-process dispatch.
-- Element targeting for evaluate is supported without switching everything off Playwright.
-- Maintain backward compatibility for existing callers and payloads.
-
-## Non-goals
-
-- Replace all browser actions (click, type, wait, etc.) with CDP implementations.
-- Remove the existing safety net introduced in PR #13498 (it remains a useful fallback).
-- Introduce new unsafe capabilities beyond the existing `browser.evaluateEnabled` gate.
-- Add process isolation (worker process/thread) for evaluate. If we still see hard to recover
- stuck states after this refactor, that is a follow-up idea.
-
-## Current Architecture (Why It Gets Stuck)
-
-At a high level:
-
-- Callers send `act:evaluate` to the browser control service.
-- The route handler calls into Playwright to execute the JavaScript.
-- Playwright serializes page commands, so an evaluate that never finishes blocks the queue.
-- A stuck queue means later click/type/wait operations on the tab can appear to hang.
-
-## Proposed Architecture
-
-### 1. Deadline Propagation
-
-Introduce a single budget concept and derive everything from it:
-
-- Caller sets `timeoutMs` (or a deadline in the future).
-- The outer request timeout, route handler logic, and the execution budget inside the page
- all use the same budget, with small headroom where needed for serialization overhead.
-- Abort is propagated as an `AbortSignal` everywhere so cancellation is consistent.
-
-Implementation direction:
-
-- Add a small helper (for example `createBudget({ timeoutMs, signal })`) that returns:
- - `signal`: the linked AbortSignal
- - `deadlineAtMs`: absolute deadline
- - `remainingMs()`: remaining budget for child operations
-- Use this helper in:
- - `src/browser/client-fetch.ts` (HTTP and in-process dispatch)
- - `src/node-host/runner.ts` (proxy path)
- - browser action implementations (Playwright and CDP)
-
-### 2. Separate Evaluate Engine (CDP Path)
-
-Add a CDP based evaluate implementation that does not share Playwright's per page command
-queue. The key property is that the evaluate transport is a separate WebSocket connection
-and a separate CDP session attached to the target.
-
-Implementation direction:
-
-- New module, for example `src/browser/cdp-evaluate.ts`, that:
- - Connects to the configured CDP endpoint (browser level socket).
- - Uses `Target.attachToTarget({ targetId, flatten: true })` to get a `sessionId`.
- - Runs either:
- - `Runtime.evaluate` for page level evaluate, or
- - `DOM.resolveNode` plus `Runtime.callFunctionOn` for element evaluate.
- - On timeout or abort:
- - Sends `Runtime.terminateExecution` best-effort for the session.
- - Closes the WebSocket and returns a clear error.
-
-Notes:
-
-- This still executes JavaScript in the page, so termination can have side effects. The win
- is that it does not wedge the Playwright queue, and it is cancelable at the transport
- layer by killing the CDP session.
-
-### 3. Ref Story (Element Targeting Without A Full Rewrite)
-
-The hard part is element targeting. CDP needs a DOM handle or `backendDOMNodeId`, while
-today most browser actions use Playwright locators based on refs from snapshots.
-
-Recommended approach: keep existing refs, but attach an optional CDP resolvable id.
-
-#### 3.1 Extend Stored Ref Info
-
-Extend the stored role ref metadata to optionally include a CDP id:
-
-- Today: `{ role, name, nth }`
-- Proposed: `{ role, name, nth, backendDOMNodeId?: number }`
-
-This keeps all existing Playwright based actions working and allows CDP evaluate to accept
-the same `ref` value when the `backendDOMNodeId` is available.
-
-#### 3.2 Populate backendDOMNodeId At Snapshot Time
-
-When producing a role snapshot:
-
-1. Generate the existing role ref map as today (role, name, nth).
-2. Fetch the AX tree via CDP (`Accessibility.getFullAXTree`) and compute a parallel map of
- `(role, name, nth) -> backendDOMNodeId` using the same duplicate handling rules.
-3. Merge the id back into the stored ref info for the current tab.
-
-If mapping fails for a ref, leave `backendDOMNodeId` undefined. This makes the feature
-best-effort and safe to roll out.
-
-#### 3.3 Evaluate Behavior With Ref
-
-In `act:evaluate`:
-
-- If `ref` is present and has `backendDOMNodeId`, run element evaluate via CDP.
-- If `ref` is present but has no `backendDOMNodeId`, fall back to the Playwright path (with
- the safety net).
-
-Optional escape hatch:
-
-- Extend the request shape to accept `backendDOMNodeId` directly for advanced callers (and
- for debugging), while keeping `ref` as the primary interface.
-
-### 4. Keep A Last Resort Recovery Path
-
-Even with CDP evaluate, there are other ways to wedge a tab or a connection. Keep the
-existing recovery mechanisms (terminate execution + disconnect Playwright) as a last resort
-for:
-
-- legacy callers
-- environments where CDP attach is blocked
-- unexpected Playwright edge cases
-
-## Implementation Plan (Single Iteration)
-
-### Deliverables
-
-- A CDP based evaluate engine that runs outside the Playwright per-page command queue.
-- A single end-to-end timeout/abort budget used consistently by callers and handlers.
-- Ref metadata that can optionally carry `backendDOMNodeId` for element evaluate.
-- `act:evaluate` prefers the CDP engine when possible and falls back to Playwright when not.
-- Tests that prove a stuck evaluate does not wedge later actions.
-- Logs/metrics that make failures and fallbacks visible.
-
-### Implementation Checklist
-
-1. Add a shared "budget" helper to link `timeoutMs` + upstream `AbortSignal` into:
- - a single `AbortSignal`
- - an absolute deadline
- - a `remainingMs()` helper for downstream operations
-2. Update all caller paths to use that helper so `timeoutMs` means the same thing everywhere:
- - `src/browser/client-fetch.ts` (HTTP and in-process dispatch)
- - `src/node-host/runner.ts` (node proxy path)
- - CLI wrappers that call `/act` (add `--timeout-ms` to `browser evaluate`)
-3. Implement `src/browser/cdp-evaluate.ts`:
- - connect to the browser-level CDP socket
- - `Target.attachToTarget` to get a `sessionId`
- - run `Runtime.evaluate` for page evaluate
- - run `DOM.resolveNode` + `Runtime.callFunctionOn` for element evaluate
- - on timeout/abort: best-effort `Runtime.terminateExecution` then close the socket
-4. Extend stored role ref metadata to optionally include `backendDOMNodeId`:
- - keep existing `{ role, name, nth }` behavior for Playwright actions
- - add `backendDOMNodeId?: number` for CDP element targeting
-5. Populate `backendDOMNodeId` during snapshot creation (best-effort):
- - fetch AX tree via CDP (`Accessibility.getFullAXTree`)
- - compute `(role, name, nth) -> backendDOMNodeId` and merge into the stored ref map
- - if mapping is ambiguous or missing, leave the id undefined
-6. Update `act:evaluate` routing:
- - if no `ref`: always use CDP evaluate
- - if `ref` resolves to a `backendDOMNodeId`: use CDP element evaluate
- - otherwise: fall back to Playwright evaluate (still bounded and abortable)
-7. Keep the existing "last resort" recovery path as a fallback, not the default path.
-8. Add tests:
- - stuck evaluate times out within budget and the next click/type succeeds
- - abort cancels evaluate (client disconnect or timeout) and unblocks subsequent actions
- - mapping failures cleanly fall back to Playwright
-9. Add observability:
- - evaluate duration and timeout counters
- - terminateExecution usage
- - fallback rate (CDP -> Playwright) and reasons
-
-### Acceptance Criteria
-
-- A deliberately hung `act:evaluate` returns within the caller budget and does not wedge the
- tab for later actions.
-- `timeoutMs` behaves consistently across CLI, agent tool, node proxy, and in-process calls.
-- If `ref` can be mapped to `backendDOMNodeId`, element evaluate uses CDP; otherwise the
- fallback path is still bounded and recoverable.
-
-## Testing Plan
-
-- Unit tests:
- - `(role, name, nth)` matching logic between role refs and AX tree nodes.
- - Budget helper behavior (headroom, remaining time math).
-- Integration tests:
- - CDP evaluate timeout returns within budget and does not block the next action.
- - Abort cancels evaluate and triggers termination best-effort.
-- Contract tests:
- - Ensure `BrowserActRequest` and `BrowserActResponse` remain compatible.
-
-## Risks And Mitigations
-
-- Mapping is imperfect:
- - Mitigation: best-effort mapping, fallback to Playwright evaluate, and add debug tooling.
-- `Runtime.terminateExecution` has side effects:
- - Mitigation: only use on timeout/abort and document the behavior in errors.
-- Extra overhead:
- - Mitigation: only fetch AX tree when snapshots are requested, cache per target, and keep
- CDP session short lived.
-- Extension relay limitations:
- - Mitigation: use browser level attach APIs when per page sockets are not available, and
- keep the current Playwright path as fallback.
-
-## Open Questions
-
-- Should the new engine be configurable as `playwright`, `cdp`, or `auto`?
-- Do we want to expose a new "nodeRef" format for advanced users, or keep `ref` only?
-- How should frame snapshots and selector scoped snapshots participate in AX mapping?
diff --git a/docs/experiments/plans/cron-add-hardening.md b/docs/experiments/plans/cron-add-hardening.md
deleted file mode 100644
index dcbea1c2..00000000
--- a/docs/experiments/plans/cron-add-hardening.md
+++ /dev/null
@@ -1,63 +0,0 @@
----
-summary: "Harden cron.add input handling, align schemas, and improve cron UI/agent tooling"
-owner: "mayros"
-status: "complete"
-last_updated: "2026-01-05"
-title: "Cron Add Hardening"
----
-
-# Cron Add Hardening & Schema Alignment
-
-## Context
-
-Recent gateway logs show repeated `cron.add` failures with invalid parameters (missing `sessionTarget`, `wakeMode`, `payload`, and malformed `schedule`). This indicates that at least one client (likely the agent tool call path) is sending wrapped or partially specified job payloads. Separately, there is drift between cron provider enums in TypeScript, gateway schema, CLI flags, and UI form types, plus a UI mismatch for `cron.status` (expects `jobCount` while gateway returns `jobs`).
-
-## Goals
-
-- Stop `cron.add` INVALID_REQUEST spam by normalizing common wrapper payloads and inferring missing `kind` fields.
-- Align cron provider lists across gateway schema, cron types, CLI docs, and UI forms.
-- Make agent cron tool schema explicit so the LLM produces correct job payloads.
-- Fix the Control UI cron status job count display.
-- Add tests to cover normalization and tool behavior.
-
-## Non-goals
-
-- Change cron scheduling semantics or job execution behavior.
-- Add new schedule kinds or cron expression parsing.
-- Overhaul the UI/UX for cron beyond the necessary field fixes.
-
-## Findings (current gaps)
-
-- `CronPayloadSchema` in gateway excludes `signal` + `imessage`, while TS types include them.
-- Control UI CronStatus expects `jobCount`, but gateway returns `jobs`.
-- Agent cron tool schema allows arbitrary `job` objects, enabling malformed inputs.
-- Gateway strictly validates `cron.add` with no normalization, so wrapped payloads fail.
-
-## What changed
-
-- `cron.add` and `cron.update` now normalize common wrapper shapes and infer missing `kind` fields.
-- Agent cron tool schema matches the gateway schema, which reduces invalid payloads.
-- Provider enums are aligned across gateway, CLI, UI, and macOS picker.
-- Control UI uses the gateway’s `jobs` count field for status.
-
-## Current behavior
-
-- **Normalization:** wrapped `data`/`job` payloads are unwrapped; `schedule.kind` and `payload.kind` are inferred when safe.
-- **Defaults:** safe defaults are applied for `wakeMode` and `sessionTarget` when missing.
-- **Providers:** Discord/Slack/Signal/iMessage are now consistently surfaced across CLI/UI.
-
-See [Cron jobs](/automation/cron-jobs) for the normalized shape and examples.
-
-## Verification
-
-- Watch gateway logs for reduced `cron.add` INVALID_REQUEST errors.
-- Confirm Control UI cron status shows job count after refresh.
-
-## Optional Follow-ups
-
-- Manual Control UI smoke: add a cron job per provider + verify status job count.
-
-## Open Questions
-
-- Should `cron.add` accept explicit `state` from clients (currently disallowed by schema)?
-- Should we allow `webchat` as an explicit delivery provider (currently filtered in delivery resolution)?
diff --git a/docs/experiments/plans/group-policy-hardening.md b/docs/experiments/plans/group-policy-hardening.md
deleted file mode 100644
index 2a51b7c1..00000000
--- a/docs/experiments/plans/group-policy-hardening.md
+++ /dev/null
@@ -1,40 +0,0 @@
----
-summary: "Telegram allowlist hardening: prefix + whitespace normalization"
-read_when:
- - Reviewing historical Telegram allowlist changes
-title: "Telegram Allowlist Hardening"
----
-
-# Telegram Allowlist Hardening
-
-**Date**: 2026-01-05
-**Status**: Complete
-**PR**: #216
-
-## Summary
-
-Telegram allowlists now accept `telegram:` and `tg:` prefixes case-insensitively, and tolerate
-accidental whitespace. This aligns inbound allowlist checks with outbound send normalization.
-
-## What changed
-
-- Prefixes `telegram:` and `tg:` are treated the same (case-insensitive).
-- Allowlist entries are trimmed; empty entries are ignored.
-
-## Examples
-
-All of these are accepted for the same ID:
-
-- `telegram:123456`
-- `TG:123456`
-- `tg:123456`
-
-## Why it matters
-
-Copy/paste from logs or chat IDs often includes prefixes and whitespace. Normalizing avoids
-false negatives when deciding whether to respond in DMs or groups.
-
-## Related docs
-
-- [Group Chats](/channels/groups)
-- [Telegram Provider](/channels/telegram)
diff --git a/docs/experiments/plans/openresponses-gateway.md b/docs/experiments/plans/openresponses-gateway.md
deleted file mode 100644
index d2bf3b09..00000000
--- a/docs/experiments/plans/openresponses-gateway.md
+++ /dev/null
@@ -1,123 +0,0 @@
----
-summary: "Plan: Add OpenResponses /v1/responses endpoint and deprecate chat completions cleanly"
-owner: "mayros"
-status: "draft"
-last_updated: "2026-01-19"
-title: "OpenResponses Gateway Plan"
----
-
-# OpenResponses Gateway Integration Plan
-
-## Context
-
-Mayros Gateway currently exposes a minimal OpenAI-compatible Chat Completions endpoint at
-`/v1/chat/completions` (see [OpenAI Chat Completions](/gateway/openai-http-api)).
-
-Open Responses is an open inference standard based on the OpenAI Responses API. It is designed
-for agentic workflows and uses item-based inputs plus semantic streaming events. The OpenResponses
-spec defines `/v1/responses`, not `/v1/chat/completions`.
-
-## Goals
-
-- Add a `/v1/responses` endpoint that adheres to OpenResponses semantics.
-- Keep Chat Completions as a compatibility layer that is easy to disable and eventually remove.
-- Standardize validation and parsing with isolated, reusable schemas.
-
-## Non-goals
-
-- Full OpenResponses feature parity in the first pass (images, files, hosted tools).
-- Replacing internal agent execution logic or tool orchestration.
-- Changing the existing `/v1/chat/completions` behavior during the first phase.
-
-## Research Summary
-
-Sources: OpenResponses OpenAPI, OpenResponses specification site, and the Hugging Face blog post.
-
-Key points extracted:
-
-- `POST /v1/responses` accepts `CreateResponseBody` fields like `model`, `input` (string or
- `ItemParam[]`), `instructions`, `tools`, `tool_choice`, `stream`, `max_output_tokens`, and
- `max_tool_calls`.
-- `ItemParam` is a discriminated union of:
- - `message` items with roles `system`, `developer`, `user`, `assistant`
- - `function_call` and `function_call_output`
- - `reasoning`
- - `item_reference`
-- Successful responses return a `ResponseResource` with `object: "response"`, `status`, and
- `output` items.
-- Streaming uses semantic events such as:
- - `response.created`, `response.in_progress`, `response.completed`, `response.failed`
- - `response.output_item.added`, `response.output_item.done`
- - `response.content_part.added`, `response.content_part.done`
- - `response.output_text.delta`, `response.output_text.done`
-- The spec requires:
- - `Content-Type: text/event-stream`
- - `event:` must match the JSON `type` field
- - terminal event must be literal `[DONE]`
-- Reasoning items may expose `content`, `encrypted_content`, and `summary`.
-- HF examples include `OpenResponses-Version: latest` in requests (optional header).
-
-## Proposed Architecture
-
-- Add `src/gateway/open-responses.schema.ts` containing Zod schemas only (no gateway imports).
-- Add `src/gateway/openresponses-http.ts` (or `open-responses-http.ts`) for `/v1/responses`.
-- Keep `src/gateway/openai-http.ts` intact as a legacy compatibility adapter.
-- Add config `gateway.http.endpoints.responses.enabled` (default `false`).
-- Keep `gateway.http.endpoints.chatCompletions.enabled` independent; allow both endpoints to be
- toggled separately.
-- Emit a startup warning when Chat Completions is enabled to signal legacy status.
-
-## Deprecation Path for Chat Completions
-
-- Maintain strict module boundaries: no shared schema types between responses and chat completions.
-- Make Chat Completions opt-in by config so it can be disabled without code changes.
-- Update docs to label Chat Completions as legacy once `/v1/responses` is stable.
-- Optional future step: map Chat Completions requests to the Responses handler for a simpler
- removal path.
-
-## Phase 1 Support Subset
-
-- Accept `input` as string or `ItemParam[]` with message roles and `function_call_output`.
-- Extract system and developer messages into `extraSystemPrompt`.
-- Use the most recent `user` or `function_call_output` as the current message for agent runs.
-- Reject unsupported content parts (image/file) with `invalid_request_error`.
-- Return a single assistant message with `output_text` content.
-- Return `usage` with zeroed values until token accounting is wired.
-
-## Validation Strategy (No SDK)
-
-- Implement Zod schemas for the supported subset of:
- - `CreateResponseBody`
- - `ItemParam` + message content part unions
- - `ResponseResource`
- - Streaming event shapes used by the gateway
-- Keep schemas in a single, isolated module to avoid drift and allow future codegen.
-
-## Streaming Implementation (Phase 1)
-
-- SSE lines with both `event:` and `data:`.
-- Required sequence (minimum viable):
- - `response.created`
- - `response.output_item.added`
- - `response.content_part.added`
- - `response.output_text.delta` (repeat as needed)
- - `response.output_text.done`
- - `response.content_part.done`
- - `response.completed`
- - `[DONE]`
-
-## Tests and Verification Plan
-
-- Add e2e coverage for `/v1/responses`:
- - Auth required
- - Non-stream response shape
- - Stream event ordering and `[DONE]`
- - Session routing with headers and `user`
-- Keep `src/gateway/openai-http.e2e.test.ts` unchanged.
-- Manual: curl to `/v1/responses` with `stream: true` and verify event ordering and terminal
- `[DONE]`.
-
-## Doc Updates (Follow-up)
-
-- Add a new docs page for `/v1/responses` usage and examples.
-- Update `/gateway/openai-http-api` with a legacy note and pointer to `/v1/responses`.
diff --git a/docs/experiments/plans/pty-process-supervision.md b/docs/experiments/plans/pty-process-supervision.md
deleted file mode 100644
index 88ac774d..00000000
--- a/docs/experiments/plans/pty-process-supervision.md
+++ /dev/null
@@ -1,192 +0,0 @@
----
-summary: "Production plan for reliable interactive process supervision (PTY + non-PTY) with explicit ownership, unified lifecycle, and deterministic cleanup"
-owner: "mayros"
-status: "in-progress"
-last_updated: "2026-02-15"
-title: "PTY and Process Supervision Plan"
----
-
-# PTY and Process Supervision Plan
-
-## 1. Problem and goal
-
-We need one reliable lifecycle for long-running command execution across:
-
-- `exec` foreground runs
-- `exec` background runs
-- `process` follow up actions (`poll`, `log`, `send-keys`, `paste`, `submit`, `kill`, `remove`)
-- CLI agent runner subprocesses
-
-The goal is not just to support PTY. The goal is predictable ownership, cancellation, timeout, and cleanup with no unsafe process matching heuristics.
-
-## 2. Scope and boundaries
-
-- Keep implementation internal in `src/process/supervisor`.
-- Do not create a new package for this.
-- Keep current behavior compatibility where practical.
-- Do not broaden scope to terminal replay or tmux style session persistence.
-
-## 3. Implemented in this branch
-
-### Supervisor baseline already present
-
-- Supervisor module is in place under `src/process/supervisor/*`.
-- Exec runtime and CLI runner are already routed through supervisor spawn and wait.
-- Registry finalization is idempotent.
-
-### This pass completed
-
-1. Explicit PTY command contract
-
-- `SpawnInput` is now a discriminated union in `src/process/supervisor/types.ts`.
-- PTY runs require `ptyCommand` instead of reusing generic `argv`.
-- Supervisor no longer rebuilds PTY command strings from argv joins in `src/process/supervisor/supervisor.ts`.
-- Exec runtime now passes `ptyCommand` directly in `src/agents/bash-tools.exec-runtime.ts`.
-
-2. Process layer type decoupling
-
-- Supervisor types no longer import `SessionStdin` from agents.
-- Process local stdin contract lives in `src/process/supervisor/types.ts` (`ManagedRunStdin`).
-- Adapters now depend only on process level types:
- - `src/process/supervisor/adapters/child.ts`
- - `src/process/supervisor/adapters/pty.ts`
-
-3. Process tool lifecycle ownership improvement
-
-- `src/agents/bash-tools.process.ts` now requests cancellation through supervisor first.
-- `process kill/remove` now use process-tree fallback termination when supervisor lookup misses.
-- `remove` keeps deterministic remove behavior by dropping running session entries immediately after termination is requested.
-
-4. Single source watchdog defaults
-
-- Added shared defaults in `src/agents/cli-watchdog-defaults.ts`.
-- `src/agents/cli-backends.ts` consumes the shared defaults.
-- `src/agents/cli-runner/reliability.ts` consumes the same shared defaults.
-
-5. Dead helper cleanup
-
-- Removed unused `killSession` helper path from `src/agents/bash-tools.shared.ts`.
-
-6. Direct supervisor path tests added
-
-- Added `src/agents/bash-tools.process.supervisor.test.ts` to cover kill and remove routing through supervisor cancellation.
-
-7. Reliability gap fixes completed
-
-- `src/agents/bash-tools.process.ts` now falls back to real OS-level process termination when supervisor lookup misses.
-- `src/process/supervisor/adapters/child.ts` now uses process-tree termination semantics for default cancel/timeout kill paths.
-- Added shared process-tree utility in `src/process/kill-tree.ts`.
-
-8. PTY contract edge-case coverage added
-
-- Added `src/process/supervisor/supervisor.pty-command.test.ts` for verbatim PTY command forwarding and empty-command rejection.
-- Added `src/process/supervisor/adapters/child.test.ts` for process-tree kill behavior in child adapter cancellation.
-
-## 4. Remaining gaps and decisions
-
-### Reliability status
-
-The two required reliability gaps for this pass are now closed:
-
-- `process kill/remove` now has a real OS termination fallback when supervisor lookup misses.
-- child cancel/timeout now uses process-tree kill semantics for default kill path.
-- Regression tests were added for both behaviors.
-
-### Durability and startup reconciliation
-
-Restart behavior is now explicitly defined as in-memory lifecycle only.
-
-- `reconcileOrphans()` remains a no-op in `src/process/supervisor/supervisor.ts` by design.
-- Active runs are not recovered after process restart.
-- This boundary is intentional for this implementation pass to avoid partial persistence risks.
-
-### Maintainability follow-ups
-
-1. `runExecProcess` in `src/agents/bash-tools.exec-runtime.ts` still handles multiple responsibilities and can be split into focused helpers in a follow-up.
-
-## 5. Implementation plan
-
-The implementation pass for required reliability and contract items is complete.
-
-Completed:
-
-- `process kill/remove` fallback real termination
-- process-tree cancellation for child adapter default kill path
-- regression tests for fallback kill and child adapter kill path
-- PTY command edge-case tests under explicit `ptyCommand`
-- explicit in-memory restart boundary with `reconcileOrphans()` no-op by design
-
-Optional follow-up:
-
-- split `runExecProcess` into focused helpers with no behavior drift
-
-## 6. File map
-
-### Process supervisor
-
-- `src/process/supervisor/types.ts` updated with discriminated spawn input and process local stdin contract.
-- `src/process/supervisor/supervisor.ts` updated to use explicit `ptyCommand`.
-- `src/process/supervisor/adapters/child.ts` and `src/process/supervisor/adapters/pty.ts` decoupled from agent types.
-- `src/process/supervisor/registry.ts` idempotent finalize unchanged and retained.
-
-### Exec and process integration
-
-- `src/agents/bash-tools.exec-runtime.ts` updated to pass PTY command explicitly and keep fallback path.
-- `src/agents/bash-tools.process.ts` updated to cancel via supervisor with real process-tree fallback termination.
-- `src/agents/bash-tools.shared.ts` removed direct kill helper path.
-
-### CLI reliability
-
-- `src/agents/cli-watchdog-defaults.ts` added as shared baseline.
-- `src/agents/cli-backends.ts` and `src/agents/cli-runner/reliability.ts` now consume same defaults.
-
-## 7. Validation run in this pass
-
-Unit tests:
-
-- `pnpm vitest src/process/supervisor/registry.test.ts`
-- `pnpm vitest src/process/supervisor/supervisor.test.ts`
-- `pnpm vitest src/process/supervisor/supervisor.pty-command.test.ts`
-- `pnpm vitest src/process/supervisor/adapters/child.test.ts`
-- `pnpm vitest src/agents/cli-backends.test.ts`
-- `pnpm vitest src/agents/bash-tools.exec.pty-cleanup.test.ts`
-- `pnpm vitest src/agents/bash-tools.process.poll-timeout.test.ts`
-- `pnpm vitest src/agents/bash-tools.process.supervisor.test.ts`
-- `pnpm vitest src/process/exec.test.ts`
-
-E2E targets:
-
-- `pnpm test:e2e src/agents/cli-runner.e2e.test.ts`
-- `pnpm test:e2e src/agents/bash-tools.exec.pty-fallback.e2e.test.ts src/agents/bash-tools.exec.background-abort.e2e.test.ts src/agents/bash-tools.process.send-keys.e2e.test.ts`
-
-Typecheck note:
-
-- `pnpm tsgo` currently fails in this repo due to a pre-existing UI typing dependency issue (`@vitest/browser-playwright` resolution), unrelated to this process supervision work.
-
-## 8. Operational guarantees preserved
-
-- Exec env hardening behavior is unchanged.
-- Approval and allowlist flow is unchanged.
-- Output sanitization and output caps are unchanged.
-- PTY adapter still guarantees wait settlement on forced kill and listener disposal.
-
-## 9. Definition of done
-
-1. Supervisor is lifecycle owner for managed runs.
-2. PTY spawn uses explicit command contract with no argv reconstruction.
-3. Process layer has no type dependency on agent layer for supervisor stdin contracts.
-4. Watchdog defaults are single source.
-5. Targeted unit and e2e tests remain green.
-6. Restart durability boundary is explicitly documented or fully implemented.
-
-## 10. Summary
-
-The branch now has a coherent and safer supervision shape:
-
-- explicit PTY contract
-- cleaner process layering
-- supervisor driven cancellation path for process operations
-- real fallback termination when supervisor lookup misses
-- process-tree cancellation for child-run default kill paths
-- unified watchdog defaults
-- explicit in-memory restart boundary (no orphan reconciliation across restart in this pass)
diff --git a/docs/experiments/plans/session-binding-channel-agnostic.md b/docs/experiments/plans/session-binding-channel-agnostic.md
deleted file mode 100644
index c66b6e81..00000000
--- a/docs/experiments/plans/session-binding-channel-agnostic.md
+++ /dev/null
@@ -1,223 +0,0 @@
----
-summary: "Channel agnostic session binding architecture and iteration 1 delivery scope"
-owner: "onutc"
-status: "in-progress"
-last_updated: "2026-02-21"
-title: "Session Binding Channel Agnostic Plan"
----
-
-# Session Binding Channel Agnostic Plan
-
-## Overview
-
-This document defines the long term channel agnostic session binding model and the concrete scope for the next implementation iteration.
-
-Goal:
-
-- make subagent bound session routing a core capability
-- keep channel specific behavior in adapters
-- avoid regressions in normal Discord behavior
-
-## Why this exists
-
-Current behavior mixes:
-
-- completion content policy
-- destination routing policy
-- Discord specific details
-
-This caused edge cases such as:
-
-- duplicate main and thread delivery under concurrent runs
-- stale token usage on reused binding managers
-- missing activity accounting for webhook sends
-
-## Iteration 1 scope
-
-This iteration is intentionally limited.
-
-### 1. Add channel agnostic core interfaces
-
-Add core types and service interfaces for bindings and routing.
-
-Proposed core types:
-
-```ts
-export type BindingTargetKind = "subagent" | "session";
-export type BindingStatus = "active" | "ending" | "ended";
-
-export type ConversationRef = {
- channel: string;
- accountId: string;
- conversationId: string;
- parentConversationId?: string;
-};
-
-export type SessionBindingRecord = {
- bindingId: string;
- targetSessionKey: string;
- targetKind: BindingTargetKind;
- conversation: ConversationRef;
- status: BindingStatus;
- boundAt: number;
- expiresAt?: number;
- metadata?: Record;
-};
-```
-
-Core service contract:
-
-```ts
-export interface SessionBindingService {
- bind(input: {
- targetSessionKey: string;
- targetKind: BindingTargetKind;
- conversation: ConversationRef;
- metadata?: Record;
- ttlMs?: number;
- }): Promise;
-
- listBySession(targetSessionKey: string): SessionBindingRecord[];
- resolveByConversation(ref: ConversationRef): SessionBindingRecord | null;
- touch(bindingId: string, at?: number): void;
- unbind(input: {
- bindingId?: string;
- targetSessionKey?: string;
- reason: string;
- }): Promise;
-}
-```
-
-### 2. Add one core delivery router for subagent completions
-
-Add a single destination resolution path for completion events.
-
-Router contract:
-
-```ts
-export interface BoundDeliveryRouter {
- resolveDestination(input: {
- eventKind: "task_completion";
- targetSessionKey: string;
- requester?: ConversationRef;
- failClosed: boolean;
- }): {
- binding: SessionBindingRecord | null;
- mode: "bound" | "fallback";
- reason: string;
- };
-}
-```
-
-For this iteration:
-
-- only `task_completion` is routed through this new path
-- existing paths for other event kinds remain as-is
-
-### 3. Keep Discord as adapter
-
-Discord remains the first adapter implementation.
-
-Adapter responsibilities:
-
-- create/reuse thread conversations
-- send bound messages via webhook or channel send
-- validate thread state (archived/deleted)
-- map adapter metadata (webhook identity, thread ids)
-
-### 4. Fix currently known correctness issues
-
-Required in this iteration:
-
-- refresh token usage when reusing existing thread binding manager
-- record outbound activity for webhook based Discord sends
-- stop implicit main channel fallback when a bound thread destination is selected for session mode completion
-
-### 5. Preserve current runtime safety defaults
-
-No behavior change for users with thread bound spawn disabled.
-
-Defaults stay:
-
-- `channels.discord.threadBindings.spawnSubagentSessions = false`
-
-Result:
-
-- normal Discord users stay on current behavior
-- new core path affects only bound session completion routing where enabled
-
-## Not in iteration 1
-
-Explicitly deferred:
-
-- ACP binding targets (`targetKind: "acp"`)
-- new channel adapters beyond Discord
-- global replacement of all delivery paths (`spawn_ack`, future `subagent_message`)
-- protocol level changes
-- store migration/versioning redesign for all binding persistence
-
-Notes on ACP:
-
-- interface design keeps room for ACP
-- ACP implementation is not started in this iteration
-
-## Routing invariants
-
-These invariants are mandatory for iteration 1.
-
-- destination selection and content generation are separate steps
-- if session mode completion resolves to an active bound destination, delivery must target that destination
-- no hidden reroute from bound destination to main channel
-- fallback behavior must be explicit and observable
-
-## Compatibility and rollout
-
-Compatibility target:
-
-- no regression for users with thread bound spawning off
-- no change to non-Discord channels in this iteration
-
-Rollout:
-
-1. Land interfaces and router behind current feature gates.
-2. Route Discord completion mode bound deliveries through router.
-3. Keep legacy path for non-bound flows.
-4. Verify with targeted tests and canary runtime logs.
-
-## Tests required in iteration 1
-
-Unit and integration coverage required:
-
-- manager token rotation uses latest token after manager reuse
-- webhook sends update channel activity timestamps
-- two active bound sessions in same requester channel do not duplicate to main channel
-- completion for bound session mode run resolves to thread destination only
-- disabled spawn flag keeps legacy behavior unchanged
-
-## Proposed implementation files
-
-Core:
-
-- `src/infra/outbound/session-binding-service.ts` (new)
-- `src/infra/outbound/bound-delivery-router.ts` (new)
-- `src/agents/subagent-announce.ts` (completion destination resolution integration)
-
-Discord adapter and runtime:
-
-- `src/discord/monitor/thread-bindings.manager.ts`
-- `src/discord/monitor/reply-delivery.ts`
-- `src/discord/send.outbound.ts`
-
-Tests:
-
-- `src/discord/monitor/provider*.test.ts`
-- `src/discord/monitor/reply-delivery.test.ts`
-- `src/agents/subagent-announce.format.e2e.test.ts`
-
-## Done criteria for iteration 1
-
-- core interfaces exist and are wired for completion routing
-- correctness fixes above are merged with tests
-- no main and thread duplicate completion delivery in session mode bound runs
-- no behavior change for disabled bound spawn deployments
-- ACP remains explicitly deferred
diff --git a/docs/experiments/plans/thread-bound-subagents.md b/docs/experiments/plans/thread-bound-subagents.md
deleted file mode 100644
index 8663ab55..00000000
--- a/docs/experiments/plans/thread-bound-subagents.md
+++ /dev/null
@@ -1,338 +0,0 @@
----
-summary: "Discord thread bound subagent sessions with plugin lifecycle hooks, routing, and config kill switches"
-owner: "onutc"
-status: "implemented"
-last_updated: "2026-02-21"
-title: "Thread Bound Subagents"
----
-
-# Thread Bound Subagents
-
-## Overview
-
-This feature lets users interact with spawned subagents directly inside Discord threads.
-
-Instead of only waiting for a completion summary in the parent session, users can move into a dedicated thread that routes messages to the spawned subagent session. Replies are sent in-thread with a thread bound persona.
-
-The implementation is split between channel agnostic core lifecycle hooks and Discord specific extension behavior.
-
-## Goals
-
-- Allow direct thread conversation with a spawned subagent session.
-- Keep default subagent orchestration channel agnostic.
-- Support both automatic thread creation on spawn and manual focus controls.
-- Provide predictable cleanup on completion, kill, timeout, and thread lifecycle changes.
-- Keep behavior configurable with global defaults plus channel and account overrides.
-
-## Out of scope
-
-- New ACP protocol features.
-- Non Discord thread binding implementations in this document.
-- New bot accounts or app level Discord identity changes.
-
-## What shipped
-
-- `sessions_spawn` supports `thread: true` and `mode: "run" | "session"`.
-- Spawn flow supports persistent thread bound sessions.
-- Discord thread binding manager supports bind, unbind, TTL sweep, and persistence.
-- Plugin hook lifecycle for subagents:
- - `subagent_spawning`
- - `subagent_spawned`
- - `subagent_delivery_target`
- - `subagent_ended`
-- Discord extension implements thread auto bind, delivery target override, and unbind on end.
-- Text commands for manual control:
- - `/focus`
- - `/unfocus`
- - `/agents`
- - `/session ttl`
-- Global and Discord scoped enablement and TTL controls, including a global kill switch.
-
-## Core concepts
-
-### Spawn modes
-
-- `mode: "run"`
- - one task lifecycle
- - completion announcement flow
-- `mode: "session"`
- - persistent thread bound session
- - supports follow up user messages in thread
-
-Default mode behavior:
-
-- if `thread: true` and mode omitted, mode defaults to `"session"`
-- otherwise mode defaults to `"run"`
-
-Constraint:
-
-- `mode: "session"` requires `thread: true`
-
-### Thread binding target model
-
-Bindings are generic targets, not only subagents.
-
-- `targetKind: "subagent" | "acp"`
-- `targetSessionKey: string`
-
-This allows the same routing primitive to support ACP/session bindings as well.
-
-### Thread binding manager
-
-The manager is responsible for:
-
-- binding or creating threads for a session target
-- unbinding by thread or by target session
-- managing webhook reuse and recent unbound webhook echo suppression
-- TTL based unbind and stale thread cleanup
-- persistence load and save
-
-## Architecture
-
-### Core and extension boundary
-
-Core (`src/agents/*`) does not directly depend on Discord routing internals.
-
-Core emits lifecycle intent through plugin hooks.
-
-Discord extension (`extensions/discord/src/subagent-hooks.ts`) implements Discord specific behavior:
-
-- pre spawn thread bind preparation
-- completion delivery target override to bound thread
-- unbind on subagent end
-
-### Plugin hook flow
-
-1. `subagent_spawning`
- - before run starts
- - can block spawn with `status: "error"`
- - used to prepare thread binding when `thread: true`
-2. `subagent_spawned`
- - post run registration event
-3. `subagent_delivery_target`
- - completion routing override hook
- - can redirect completion delivery to bound Discord thread origin
-4. `subagent_ended`
- - cleanup and unbind signal
-
-### Account ID normalization contract
-
-Thread binding and routing state must use one canonical account id abstraction.
-
-Specification:
-
-- Introduce a shared account id module (proposed: `src/routing/account-id.ts`) and stop defining local normalizers.
-- Expose two explicit helpers:
- - `normalizeAccountId(value): string`
- - returns canonical, defaulted id (current default is `default`)
- - use for map keys, manager registration and lookup, persistence keys, routing keys
- - `normalizeOptionalAccountId(value): string | undefined`
- - returns canonical id when present, `undefined` when absent
- - use for inbound optional context fields and merge logic
-- Do not implement ad hoc account normalization in feature modules.
- - This includes `trim`, `toLowerCase`, or defaulting logic in local helper functions.
-- Any map keyed by account id must only accept canonical ids from shared helpers.
-- Hook payloads and delivery context should carry raw optional account ids, and normalize at module boundaries only.
-
-Migration guardrails:
-
-- Replace duplicate normalizers in routing, reply payload, command context, and provider helpers with shared helpers.
-- Add contract tests that assert identical normalization behavior across:
- - route resolution
- - thread binding manager lookup
- - reply delivery target filtering
- - command run context merge
-
-### Persistence and state
-
-Binding state path:
-
-- `${stateDir}/discord/thread-bindings.json`
-
-Record shape contains:
-
-- account, channel, thread
-- target kind and target session key
-- agent label metadata
-- webhook id/token
-- boundBy, boundAt, expiresAt
-
-State is stored on `globalThis` to keep one shared registry across ESM and Jiti loader paths.
-
-## Configuration
-
-### Effective precedence
-
-For Discord thread binding options, account override wins, then channel, then global session default, then built in fallback.
-
-- account: `channels.discord.accounts..threadBindings.`
-- channel: `channels.discord.threadBindings.`
-- global: `session.threadBindings.`
-
-### Keys
-
-| Key | Scope | Default | Notes |
-| ------------------------------------------------------- | --------------- | --------------- | ----------------------------------------- |
-| `session.threadBindings.enabled` | global | `true` | master default kill switch |
-| `session.threadBindings.ttlHours` | global | `24` | default auto unfocus TTL |
-| `channels.discord.threadBindings.enabled` | channel/account | inherits global | Discord override kill switch |
-| `channels.discord.threadBindings.ttlHours` | channel/account | inherits global | Discord TTL override |
-| `channels.discord.threadBindings.spawnSubagentSessions` | channel/account | `false` | opt in for `thread: true` spawn auto bind |
-
-### Runtime effect of enable switch
-
-When effective `enabled` is false for a Discord account:
-
-- provider creates a noop thread binding manager for runtime wiring
-- no real manager is registered for lookup by account id
-- inbound bound thread routing is effectively disabled
-- completion routing overrides do not resolve bound thread origins
-- `/focus`, `/unfocus`, and thread binding specific operations report unavailable
-- `thread: true` spawn path returns actionable error from Discord hook layer
-
-## Flow and behavior
-
-### Spawn with `thread: true`
-
-1. Spawn validates mode and permissions.
-2. `subagent_spawning` hook runs.
-3. Discord extension checks effective flags:
- - thread bindings enabled
- - `spawnSubagentSessions` enabled
-4. Extension attempts auto bind and thread creation.
-5. If bind fails:
- - spawn returns error
- - provisional child session is deleted
-6. If bind succeeds:
- - child run starts
- - run is registered with spawn mode
-
-### Manual focus and unfocus
-
-- `/focus `
- - Discord only
- - resolves subagent or session target
- - binds current or created thread to target session
-- `/unfocus`
- - Discord thread only
- - unbinds current thread
-
-### Inbound routing
-
-- Discord preflight checks current thread id against thread binding manager.
-- If bound, effective session routing uses bound target session key.
-- If not bound, normal routing path is used.
-
-### Outbound routing
-
-- Reply delivery checks whether current session has thread bindings.
-- Bound sessions deliver to thread via webhook aware path.
-- Unbound sessions use normal bot delivery.
-
-### Completion routing
-
-- Core completion flow calls `subagent_delivery_target`.
-- Discord extension returns bound thread origin when it can resolve one.
-- Core merges hook origin with requester origin and delivers completion.
-
-### Cleanup
-
-Cleanup occurs on:
-
-- completion
-- error or timeout completion path
-- kill and terminate paths
-- TTL expiration
-- archived or deleted thread probes
-- manual `/unfocus`
-
-Cleanup behavior includes unbind and optional farewell messaging.
-
-## Commands and user UX
-
-| Command | Purpose |
-| ---------------------------------------------------------- | -------------------------------------------------------------------- | ------------------------------------- | --------------- | ------------------------------------------- |
-| `/subagents spawn [--model] [--thinking]` | spawn subagent; may be thread bound when `thread: true` path is used |
-| `/focus ` | manually bind thread to subagent or session |
-| `/unfocus` | remove binding from current thread |
-| `/agents` | list active agents and binding state |
-| `/session ttl ` | update TTL for focused thread binding |
-
-Notes:
-
-- `/session ttl` is currently Discord thread focused behavior.
-- Thread intro and farewell text are generated by thread binding message helpers.
-
-## Failure handling and safety
-
-- Spawn returns explicit errors when thread binding cannot be prepared.
-- Spawn failure after provisional bind attempts best effort unbind and session delete.
-- Completion logic prevents duplicate ended hook emission.
-- Retry and expiry guards prevent infinite completion announce retry loops.
-- Webhook echo suppression avoids unbound webhook messages being reprocessed as inbound turns.
-
-## Module map
-
-### Core orchestration
-
-- `src/agents/subagent-spawn.ts`
-- `src/agents/subagent-announce.ts`
-- `src/agents/subagent-registry.ts`
-- `src/agents/subagent-registry-cleanup.ts`
-- `src/agents/subagent-registry-completion.ts`
-
-### Discord runtime
-
-- `src/discord/monitor/provider.ts`
-- `src/discord/monitor/thread-bindings.manager.ts`
-- `src/discord/monitor/thread-bindings.state.ts`
-- `src/discord/monitor/thread-bindings.lifecycle.ts`
-- `src/discord/monitor/thread-bindings.messages.ts`
-- `src/discord/monitor/message-handler.preflight.ts`
-- `src/discord/monitor/message-handler.process.ts`
-- `src/discord/monitor/reply-delivery.ts`
-
-### Plugin hooks and extension
-
-- `src/plugins/types.ts`
-- `src/plugins/hooks.ts`
-- `extensions/discord/src/subagent-hooks.ts`
-
-### Config and schema
-
-- `src/config/types.base.ts`
-- `src/config/types.discord.ts`
-- `src/config/zod-schema.session.ts`
-- `src/config/zod-schema.providers-core.ts`
-- `src/config/schema.help.ts`
-- `src/config/schema.labels.ts`
-
-## Test coverage highlights
-
-- `extensions/discord/src/subagent-hooks.test.ts`
-- `src/discord/monitor/thread-bindings.ttl.test.ts`
-- `src/discord/monitor/thread-bindings.shared-state.test.ts`
-- `src/discord/monitor/reply-delivery.test.ts`
-- `src/discord/monitor/message-handler.preflight.test.ts`
-- `src/discord/monitor/message-handler.process.test.ts`
-- `src/auto-reply/reply/commands-subagents-focus.test.ts`
-- `src/auto-reply/reply/commands-session-ttl.test.ts`
-- `src/agents/subagent-registry.steer-restart.test.ts`
-- `src/agents/subagent-registry-completion.test.ts`
-
-## Operational summary
-
-- Use `session.threadBindings.enabled` as the global kill switch default.
-- Use `channels.discord.threadBindings.enabled` and account overrides for selective enablement.
-- Keep `spawnSubagentSessions` opt in for thread auto spawn behavior.
-- Use TTL settings for automatic unfocus policy control.
-
-This model keeps subagent lifecycle orchestration generic while giving Discord a full thread bound interaction path.
-
-## Related plan
-
-For channel agnostic SessionBinding architecture and scoped iteration planning, see:
-
-- `docs/experiments/plans/session-binding-channel-agnostic.md`
-
-ACP remains a next step in that plan and is intentionally not implemented in this shipped Discord thread-bound flow.
diff --git a/docs/experiments/proposals/model-config.md b/docs/experiments/proposals/model-config.md
deleted file mode 100644
index 6a0ef652..00000000
--- a/docs/experiments/proposals/model-config.md
+++ /dev/null
@@ -1,36 +0,0 @@
----
-summary: "Exploration: model config, auth profiles, and fallback behavior"
-read_when:
- - Exploring future model selection + auth profile ideas
-title: "Model Config Exploration"
----
-
-# Model Config (Exploration)
-
-This document captures **ideas** for future model configuration. It is not a
-shipping spec. For current behavior, see:
-
-- [Models](/concepts/models)
-- [Model failover](/concepts/model-failover)
-- [OAuth + profiles](/concepts/oauth)
-
-## Motivation
-
-Operators want:
-
-- Multiple auth profiles per provider (personal vs work).
-- Simple `/model` selection with predictable fallbacks.
-- Clear separation between text models and image-capable models.
-
-## Possible direction (high level)
-
-- Keep model selection simple: `provider/model` with optional aliases.
-- Let providers have multiple auth profiles, with an explicit order.
-- Use a global fallback list so all sessions fail over consistently.
-- Only override image routing when explicitly configured.
-
-## Open questions
-
-- Should profile rotation be per-provider or per-model?
-- How should the UI surface profile selection for a session?
-- What is the safest migration path from legacy config keys?
diff --git a/docs/experiments/research/memory.md b/docs/experiments/research/memory.md
deleted file mode 100644
index a4176610..00000000
--- a/docs/experiments/research/memory.md
+++ /dev/null
@@ -1,228 +0,0 @@
----
-summary: "Research notes: offline memory system for Atlas workspaces (Markdown source-of-truth + derived index)"
-read_when:
- - Designing workspace memory (~/.mayros/workspace) beyond daily Markdown logs
- - Deciding: standalone CLI vs deep Mayros integration
- - Adding offline recall + reflection (retain/recall/reflect)
-title: "Workspace Memory Research"
----
-
-# Workspace Memory v2 (offline): research notes
-
-Target: Atlas-style workspace (`agents.defaults.workspace`, default `~/.mayros/workspace`) where “memory” is stored as one Markdown file per day (`memory/YYYY-MM-DD.md`) plus a small set of stable files (e.g. `memory.md`, `MAYROS.md`).
-
-This doc proposes an **offline-first** memory architecture that keeps Markdown as the canonical, reviewable source of truth, but adds **structured recall** (search, entity summaries, confidence updates) via a derived index.
-
-## Why change?
-
-The current setup (one file per day) is excellent for:
-
-- “append-only” journaling
-- human editing
-- git-backed durability + auditability
-- low-friction capture (“just write it down”)
-
-It’s weak for:
-
-- high-recall retrieval (“what did we decide about X?”, “last time we tried Y?”)
-- entity-centric answers (“tell me about Alice / The Castle / warelay”) without rereading many files
-- opinion/preference stability (and evidence when it changes)
-- time constraints (“what was true during Nov 2025?”) and conflict resolution
-
-## Design goals
-
-- **Offline**: works without network; can run on laptop/Castle; no cloud dependency.
-- **Explainable**: retrieved items should be attributable (file + location) and separable from inference.
-- **Low ceremony**: daily logging stays Markdown, no heavy schema work.
-- **Incremental**: v1 is useful with FTS only; semantic/vector and graphs are optional upgrades.
-- **Agent-friendly**: makes “recall within token budgets” easy (return small bundles of facts).
-
-## North star model (Hindsight × Letta)
-
-Two pieces to blend:
-
-1. **Letta/MemGPT-style control loop**
-
-- keep a small “core” always in context (persona + key user facts)
-- everything else is out-of-context and retrieved via tools
-- memory writes are explicit tool calls (append/replace/insert), persisted, then re-injected next turn
-
-2. **Hindsight-style memory substrate**
-
-- separate what’s observed vs what’s believed vs what’s summarized
-- support retain/recall/reflect
-- confidence-bearing opinions that can evolve with evidence
-- entity-aware retrieval + temporal queries (even without full knowledge graphs)
-
-## Proposed architecture (Markdown source-of-truth + derived index)
-
-### Canonical store (git-friendly)
-
-Keep `~/.mayros/workspace` as canonical human-readable memory.
-
-Suggested workspace layout:
-
-```
-~/.mayros/workspace/
- memory.md # small: durable facts + preferences (core-ish)
- memory/
- YYYY-MM-DD.md # daily log (append; narrative)
- bank/ # “typed” memory pages (stable, reviewable)
- world.md # objective facts about the world
- experience.md # what the agent did (first-person)
- opinions.md # subjective prefs/judgments + confidence + evidence pointers
- entities/
- Peter.md
- The-Castle.md
- warelay.md
- ...
-```
-
-Notes:
-
-- **Daily log stays daily log**. No need to turn it into JSON.
-- The `bank/` files are **curated**, produced by reflection jobs, and can still be edited by hand.
-- `memory.md` remains “small + core-ish”: the things you want Atlas to see every session.
-
-### Derived store (machine recall)
-
-Add a derived index under the workspace (not necessarily git tracked):
-
-```
-~/.mayros/workspace/.memory/index.sqlite
-```
-
-Back it with:
-
-- SQLite schema for facts + entity links + opinion metadata
-- SQLite **FTS5** for lexical recall (fast, tiny, offline)
-- optional embeddings table for semantic recall (still offline)
-
-The index is always **rebuildable from Markdown**.
-
-## Retain / Recall / Reflect (operational loop)
-
-### Retain: normalize daily logs into “facts”
-
-Hindsight’s key insight that matters here: store **narrative, self-contained facts**, not tiny snippets.
-
-Practical rule for `memory/YYYY-MM-DD.md`:
-
-- at end of day (or during), add a `## Retain` section with 2–5 bullets that are:
- - narrative (cross-turn context preserved)
- - self-contained (standalone makes sense later)
- - tagged with type + entity mentions
-
-Example:
-
-```
-## Retain
-- W @Peter: Currently in Marrakech (Nov 27–Dec 1, 2025) for Andy’s birthday.
-- B @warelay: I fixed the Baileys WS crash by wrapping connection.update handlers in try/catch (see memory/2025-11-27.md).
-- O(c=0.95) @Peter: Prefers concise replies (<1500 chars) on WhatsApp; long content goes into files.
-```
-
-Minimal parsing:
-
-- Type prefix: `W` (world), `B` (experience/biographical), `O` (opinion), `S` (observation/summary; usually generated)
-- Entities: `@Peter`, `@warelay`, etc (slugs map to `bank/entities/*.md`)
-- Opinion confidence: `O(c=0.0..1.0)` optional
-
-If you don’t want authors to think about it: the reflect job can infer these bullets from the rest of the log, but having an explicit `## Retain` section is the easiest “quality lever”.
-
-### Recall: queries over the derived index
-
-Recall should support:
-
-- **lexical**: “find exact terms / names / commands” (FTS5)
-- **entity**: “tell me about X” (entity pages + entity-linked facts)
-- **temporal**: “what happened around Nov 27” / “since last week”
-- **opinion**: “what does Peter prefer?” (with confidence + evidence)
-
-Return format should be agent-friendly and cite sources:
-
-- `kind` (`world|experience|opinion|observation`)
-- `timestamp` (source day, or extracted time range if present)
-- `entities` (`["Peter","warelay"]`)
-- `content` (the narrative fact)
-- `source` (`memory/2025-11-27.md#L12` etc)
-
-### Reflect: produce stable pages + update beliefs
-
-Reflection is a scheduled job (daily or heartbeat `ultrathink`) that:
-
-- updates `bank/entities/*.md` from recent facts (entity summaries)
-- updates `bank/opinions.md` confidence based on reinforcement/contradiction
-- optionally proposes edits to `memory.md` (“core-ish” durable facts)
-
-Opinion evolution (simple, explainable):
-
-- each opinion has:
- - statement
- - confidence `c ∈ [0,1]`
- - last_updated
- - evidence links (supporting + contradicting fact IDs)
-- when new facts arrive:
- - find candidate opinions by entity overlap + similarity (FTS first, embeddings later)
- - update confidence by small deltas; big jumps require strong contradiction + repeated evidence
-
-## CLI integration: standalone vs deep integration
-
-Recommendation: **deep integration in Mayros**, but keep a separable core library.
-
-### Why integrate into Mayros?
-
-- Mayros already knows:
- - the workspace path (`agents.defaults.workspace`)
- - the session model + heartbeats
- - logging + troubleshooting patterns
-- You want the agent itself to call the tools:
- - `mayros memory recall "…" --k 25 --since 30d`
- - `mayros memory reflect --since 7d`
-
-### Why still split a library?
-
-- keep memory logic testable without gateway/runtime
-- reuse from other contexts (local scripts, future desktop app, etc.)
-
-Shape:
-The memory tooling is intended to be a small CLI + library layer, but this is exploratory only.
-
-## “S-Collide” / SuCo: when to use it (research)
-
-If “S-Collide” refers to **SuCo (Subspace Collision)**: it’s an ANN retrieval approach that targets strong recall/latency tradeoffs by using learned/structured collisions in subspaces (paper: arXiv 2411.14754, 2024).
-
-Pragmatic take for `~/.mayros/workspace`:
-
-- **don’t start** with SuCo.
-- start with SQLite FTS + (optional) simple embeddings; you’ll get most UX wins immediately.
-- consider SuCo/HNSW/ScaNN-class solutions only once:
- - corpus is big (tens/hundreds of thousands of chunks)
- - brute-force embedding search becomes too slow
- - recall quality is meaningfully bottlenecked by lexical search
-
-Offline-friendly alternatives (in increasing complexity):
-
-- SQLite FTS5 + metadata filters (zero ML)
-- Embeddings + brute force (works surprisingly far if chunk count is low)
-- HNSW index (common, robust; needs a library binding)
-- SuCo (research-grade; attractive if there’s a solid implementation you can embed)
-
-Open question:
-
-- what’s the **best** offline embedding model for “personal assistant memory” on your machines (laptop + desktop)?
- - if you already have Ollama: embed with a local model; otherwise ship a small embedding model in the toolchain.
-
-## Smallest useful pilot
-
-If you want a minimal, still-useful version:
-
-- Add `bank/` entity pages and a `## Retain` section in daily logs.
-- Use SQLite FTS for recall with citations (path + line numbers).
-- Add embeddings only if recall quality or scale demands it.
-
-## References
-
-- Letta / MemGPT concepts: “core memory blocks” + “archival memory” + tool-driven self-editing memory.
-- Hindsight Technical Report: “retain / recall / reflect”, four-network memory, narrative fact extraction, opinion confidence evolution.
-- SuCo: arXiv 2411.14754 (2024): “Subspace Collision” approximate nearest neighbor retrieval.
diff --git a/docs/refactor/exec-host.md b/docs/refactor/exec-host.md
deleted file mode 100644
index 7112d1a6..00000000
--- a/docs/refactor/exec-host.md
+++ /dev/null
@@ -1,316 +0,0 @@
----
-summary: "Refactor plan: exec host routing, node approvals, and headless runner"
-read_when:
- - Designing exec host routing or exec approvals
- - Implementing node runner + UI IPC
- - Adding exec host security modes and slash commands
-title: "Exec Host Refactor"
----
-
-# Exec host refactor plan
-
-## Goals
-
-- Add `exec.host` + `exec.security` to route execution across **sandbox**, **gateway**, and **node**.
-- Keep defaults **safe**: no cross-host execution unless explicitly enabled.
-- Split execution into a **headless runner service** with optional UI (macOS app) via local IPC.
-- Provide **per-agent** policy, allowlist, ask mode, and node binding.
-- Support **ask modes** that work _with_ or _without_ allowlists.
-- Cross-platform: Unix socket + token auth (macOS/Linux/Windows parity).
-
-## Non-goals
-
-- No legacy allowlist migration or legacy schema support.
-- No PTY/streaming for node exec (aggregated output only).
-- No new network layer beyond the existing Bridge + Gateway.
-
-## Decisions (locked)
-
-- **Config keys:** `exec.host` + `exec.security` (per-agent override allowed).
-- **Elevation:** keep `/elevated` as an alias for gateway full access.
-- **Ask default:** `on-miss`.
-- **Approvals store:** `~/.mayros/exec-approvals.json` (JSON, no legacy migration).
-- **Runner:** headless system service; UI app hosts a Unix socket for approvals.
-- **Node identity:** use existing `nodeId`.
-- **Socket auth:** Unix socket + token (cross-platform); split later if needed.
-- **Node host state:** `~/.mayros/node.json` (node id + pairing token).
-- **macOS exec host:** run `system.run` inside the macOS app; node host service forwards requests over local IPC.
-- **No XPC helper:** stick to Unix socket + token + peer checks.
-
-## Key concepts
-
-### Host
-
-- `sandbox`: Docker exec (current behavior).
-- `gateway`: exec on gateway host.
-- `node`: exec on node runner via Bridge (`system.run`).
-
-### Security mode
-
-- `deny`: always block.
-- `allowlist`: allow only matches.
-- `full`: allow everything (equivalent to elevated).
-
-### Ask mode
-
-- `off`: never ask.
-- `on-miss`: ask only when allowlist does not match.
-- `always`: ask every time.
-
-Ask is **independent** of allowlist; allowlist can be used with `always` or `on-miss`.
-
-### Policy resolution (per exec)
-
-1. Resolve `exec.host` (tool param → agent override → global default).
-2. Resolve `exec.security` and `exec.ask` (same precedence).
-3. If host is `sandbox`, proceed with local sandbox exec.
-4. If host is `gateway` or `node`, apply security + ask policy on that host.
-
-## Default safety
-
-- Default `exec.host = sandbox`.
-- Default `exec.security = deny` for `gateway` and `node`.
-- Default `exec.ask = on-miss` (only relevant if security allows).
-- If no node binding is set, **agent may target any node**, but only if policy allows it.
-
-## Config surface
-
-### Tool parameters
-
-- `exec.host` (optional): `sandbox | gateway | node`.
-- `exec.security` (optional): `deny | allowlist | full`.
-- `exec.ask` (optional): `off | on-miss | always`.
-- `exec.node` (optional): node id/name to use when `host=node`.
-
-### Config keys (global)
-
-- `tools.exec.host`
-- `tools.exec.security`
-- `tools.exec.ask`
-- `tools.exec.node` (default node binding)
-
-### Config keys (per agent)
-
-- `agents.list[].tools.exec.host`
-- `agents.list[].tools.exec.security`
-- `agents.list[].tools.exec.ask`
-- `agents.list[].tools.exec.node`
-
-### Alias
-
-- `/elevated on` = set `tools.exec.host=gateway`, `tools.exec.security=full` for the agent session.
-- `/elevated off` = restore previous exec settings for the agent session.
-
-## Approvals store (JSON)
-
-Path: `~/.mayros/exec-approvals.json`
-
-Purpose:
-
-- Local policy + allowlists for the **execution host** (gateway or node runner).
-- Ask fallback when no UI is available.
-- IPC credentials for UI clients.
-
-Proposed schema (v1):
-
-```json
-{
- "version": 1,
- "socket": {
- "path": "~/.mayros/exec-approvals.sock",
- "token": "base64-opaque-token"
- },
- "defaults": {
- "security": "deny",
- "ask": "on-miss",
- "askFallback": "deny"
- },
- "agents": {
- "agent-id-1": {
- "security": "allowlist",
- "ask": "on-miss",
- "allowlist": [
- {
- "pattern": "~/Projects/**/bin/rg",
- "lastUsedAt": 0,
- "lastUsedCommand": "rg -n TODO",
- "lastResolvedPath": "/Users/user/Projects/.../bin/rg"
- }
- ]
- }
- }
-}
-```
-
-Notes:
-
-- No legacy allowlist formats.
-- `askFallback` applies only when `ask` is required and no UI is reachable.
-- File permissions: `0600`.
-
-## Runner service (headless)
-
-### Role
-
-- Enforce `exec.security` + `exec.ask` locally.
-- Execute system commands and return output.
-- Emit Bridge events for exec lifecycle (optional but recommended).
-
-### Service lifecycle
-
-- Launchd/daemon on macOS; system service on Linux/Windows.
-- Approvals JSON is local to the execution host.
-- UI hosts a local Unix socket; runners connect on demand.
-
-## UI integration (macOS app)
-
-### IPC
-
-- Unix socket at `~/.mayros/exec-approvals.sock` (0600).
-- Token stored in `exec-approvals.json` (0600).
-- Peer checks: same-UID only.
-- Challenge/response: nonce + HMAC(token, request-hash) to prevent replay.
-- Short TTL (e.g., 10s) + max payload + rate limit.
-
-### Ask flow (macOS app exec host)
-
-1. Node service receives `system.run` from gateway.
-2. Node service connects to the local socket and sends the prompt/exec request.
-3. App validates peer + token + HMAC + TTL, then shows dialog if needed.
-4. App executes the command in UI context and returns output.
-5. Node service returns output to gateway.
-
-If UI missing:
-
-- Apply `askFallback` (`deny|allowlist|full`).
-
-### Diagram (SCI)
-
-```
-Agent -> Gateway -> Bridge -> Node Service (TS)
- | IPC (UDS + token + HMAC + TTL)
- v
- Mac App (UI + TCC + system.run)
-```
-
-## Node identity + binding
-
-- Use existing `nodeId` from Bridge pairing.
-- Binding model:
- - `tools.exec.node` restricts the agent to a specific node.
- - If unset, agent can pick any node (policy still enforces defaults).
-- Node selection resolution:
- - `nodeId` exact match
- - `displayName` (normalized)
- - `remoteIp`
- - `nodeId` prefix (>= 6 chars)
-
-## Eventing
-
-### Who sees events
-
-- System events are **per session** and shown to the agent on the next prompt.
-- Stored in the gateway in-memory queue (`enqueueSystemEvent`).
-
-### Event text
-
-- `Exec started (node=, id=)`
-- `Exec finished (node=, id=, code=)` + optional output tail
-- `Exec denied (node=, id=, )`
-
-### Transport
-
-Option A (recommended):
-
-- Runner sends Bridge `event` frames `exec.started` / `exec.finished`.
-- Gateway `handleBridgeEvent` maps these into `enqueueSystemEvent`.
-
-Option B:
-
-- Gateway `exec` tool handles lifecycle directly (synchronous only).
-
-## Exec flows
-
-### Sandbox host
-
-- Existing `exec` behavior (Docker or host when unsandboxed).
-- PTY supported in non-sandbox mode only.
-
-### Gateway host
-
-- Gateway process executes on its own machine.
-- Enforces local `exec-approvals.json` (security/ask/allowlist).
-
-### Node host
-
-- Gateway calls `node.invoke` with `system.run`.
-- Runner enforces local approvals.
-- Runner returns aggregated stdout/stderr.
-- Optional Bridge events for start/finish/deny.
-
-## Output caps
-
-- Cap combined stdout+stderr at **200k**; keep **tail 20k** for events.
-- Truncate with a clear suffix (e.g., `"… (truncated)"`).
-
-## Slash commands
-
-- `/exec host= security= ask= node=`
-- Per-agent, per-session overrides; non-persistent unless saved via config.
-- `/elevated on|off|ask|full` remains a shortcut for `host=gateway security=full` (with `full` skipping approvals).
-
-## Cross-platform story
-
-- The runner service is the portable execution target.
-- UI is optional; if missing, `askFallback` applies.
-- Windows/Linux support the same approvals JSON + socket protocol.
-
-## Implementation phases
-
-### Phase 1: config + exec routing
-
-- Add config schema for `exec.host`, `exec.security`, `exec.ask`, `exec.node`.
-- Update tool plumbing to respect `exec.host`.
-- Add `/exec` slash command and keep `/elevated` alias.
-
-### Phase 2: approvals store + gateway enforcement
-
-- Implement `exec-approvals.json` reader/writer.
-- Enforce allowlist + ask modes for `gateway` host.
-- Add output caps.
-
-### Phase 3: node runner enforcement
-
-- Update node runner to enforce allowlist + ask.
-- Add Unix socket prompt bridge to macOS app UI.
-- Wire `askFallback`.
-
-### Phase 4: events
-
-- Add node → gateway Bridge events for exec lifecycle.
-- Map to `enqueueSystemEvent` for agent prompts.
-
-### Phase 5: UI polish
-
-- Mac app: allowlist editor, per-agent switcher, ask policy UI.
-- Node binding controls (optional).
-
-## Testing plan
-
-- Unit tests: allowlist matching (glob + case-insensitive).
-- Unit tests: policy resolution precedence (tool param → agent override → global).
-- Integration tests: node runner deny/allow/ask flows.
-- Bridge event tests: node event → system event routing.
-
-## Open risks
-
-- UI unavailability: ensure `askFallback` is respected.
-- Long-running commands: rely on timeout + output caps.
-- Multi-node ambiguity: error unless node binding or explicit node param.
-
-## Related docs
-
-- [Exec tool](/tools/exec)
-- [Exec approvals](/tools/exec-approvals)
-- [Nodes](/nodes)
-- [Elevated mode](/tools/elevated)
diff --git a/docs/refactor/meshnet.md b/docs/refactor/meshnet.md
deleted file mode 100644
index a578efb5..00000000
--- a/docs/refactor/meshnet.md
+++ /dev/null
@@ -1,417 +0,0 @@
----
-summary: "MeshNet refactor: unify network protocol, roles, auth, approvals, identity"
-read_when:
- - Planning a unified network protocol for nodes + operator clients
- - Reworking approvals, pairing, TLS, and presence across devices
-title: "MeshNet Refactor"
----
-
-# MeshNet refactor (protocol + auth unification)
-
-## Hi
-
-Hi team — great direction; this unlocks simpler UX + stronger security.
-
-## Purpose
-
-Single, rigorous document for:
-
-- Current state: protocols, flows, trust boundaries.
-- Pain points: approvals, multi‑hop routing, UI duplication.
-- Proposed new state: one protocol, scoped roles, unified auth/pairing, TLS pinning.
-- Identity model: stable IDs + cute slugs.
-- Migration plan, risks, open questions.
-
-## Goals (from discussion)
-
-- One protocol for all clients (mac app, CLI, iOS, Android, headless node).
-- Every network participant authenticated + paired.
-- Role clarity: nodes vs operators.
-- Central approvals routed to where the user is.
-- TLS encryption + optional pinning for all remote traffic.
-- Minimal code duplication.
-- Single machine should appear once (no UI/node duplicate entry).
-
-## Non‑goals (explicit)
-
-- Remove capability separation (still need least‑privilege).
-- Expose full gateway control plane without scope checks.
-- Make auth depend on human labels (slugs remain non‑security).
-
----
-
-# Current state (as‑is)
-
-## Two protocols
-
-### 1) Gateway WebSocket (control plane)
-
-- Full API surface: config, channels, models, sessions, agent runs, logs, nodes, etc.
-- Default bind: loopback. Remote access via SSH/Tailscale.
-- Auth: token/password via `connect`.
-- No TLS pinning (relies on loopback/tunnel).
-- Code:
- - `src/gateway/server/ws-connection/message-handler.ts`
- - `src/gateway/client.ts`
- - `docs/gateway/protocol.md`
-
-### 2) Bridge (node transport)
-
-- Narrow allowlist surface, node identity + pairing.
-- JSONL over TCP; optional TLS + cert fingerprint pinning.
-- TLS advertises fingerprint in discovery TXT.
-- Code:
- - `src/infra/bridge/server/connection.ts`
- - `src/gateway/server-bridge.ts`
- - `src/node-host/bridge-client.ts`
- - `docs/gateway/bridge-protocol.md`
-
-## Control plane clients today
-
-- CLI → Gateway WS via `callGateway` (`src/gateway/call.ts`).
-- macOS app UI → Gateway WS (`GatewayConnection`).
-- Web Control UI → Gateway WS.
-- ACP → Gateway WS.
-- Browser control uses its own HTTP control server.
-
-## Nodes today
-
-- macOS app in node mode connects to Gateway bridge (`MacNodeBridgeSession`).
-- iOS/Android apps connect to Gateway bridge.
-- Pairing + per‑node token stored on gateway.
-
-## Current approval flow (exec)
-
-- Agent uses `system.run` via Gateway.
-- Gateway invokes node over bridge.
-- Node runtime decides approval.
-- UI prompt shown by mac app (when node == mac app).
-- Node returns `invoke-res` to Gateway.
-- Multi‑hop, UI tied to node host.
-
-## Presence + identity today
-
-- Gateway presence entries from WS clients.
-- Node presence entries from bridge.
-- mac app can show two entries for same machine (UI + node).
-- Node identity stored in pairing store; UI identity separate.
-
----
-
-# Problems / pain points
-
-- Two protocol stacks to maintain (WS + Bridge).
-- Approvals on remote nodes: prompt appears on node host, not where user is.
-- TLS pinning only exists for bridge; WS depends on SSH/Tailscale.
-- Identity duplication: same machine shows as multiple instances.
-- Ambiguous roles: UI + node + CLI capabilities not clearly separated.
-
----
-
-# Proposed new state (MeshNet)
-
-## One protocol, two roles
-
-Single WS protocol with role + scope.
-
-- **Role: node** (capability host)
-- **Role: operator** (control plane)
-- Optional **scope** for operator:
- - `operator.read` (status + viewing)
- - `operator.write` (agent run, sends)
- - `operator.admin` (config, channels, models)
-
-### Role behaviors
-
-**Node**
-
-- Can register capabilities (`caps`, `commands`, permissions).
-- Can receive `invoke` commands (`system.run`, `camera.*`, `canvas.*`, `screen.record`, etc).
-- Can send events: `voice.transcript`, `agent.request`, `chat.subscribe`.
-- Cannot call config/models/channels/sessions/agent control plane APIs.
-
-**Operator**
-
-- Full control plane API, gated by scope.
-- Receives all approvals.
-- Does not directly execute OS actions; routes to nodes.
-
-### Key rule
-
-Role is per‑connection, not per device. A device may open both roles, separately.
-
----
-
-# Unified authentication + pairing
-
-## Client identity
-
-Every client provides:
-
-- `deviceId` (stable, derived from device key).
-- `displayName` (human name).
-- `role` + `scope` + `caps` + `commands`.
-
-## Pairing flow (unified)
-
-- Client connects unauthenticated.
-- Gateway creates a **pairing request** for that `deviceId`.
-- Operator receives prompt; approves/denies.
-- Gateway issues credentials bound to:
- - device public key
- - role(s)
- - scope(s)
- - capabilities/commands
-- Client persists token, reconnects authenticated.
-
-## Device‑bound auth (avoid bearer token replay)
-
-Preferred: device keypairs.
-
-- Device generates keypair once.
-- `deviceId = fingerprint(publicKey)`.
-- Gateway sends nonce; device signs; gateway verifies.
-- Tokens are issued to a public key (proof‑of‑possession), not a string.
-
-Alternatives:
-
-- mTLS (client certs): strongest, more ops complexity.
-- Short‑lived bearer tokens only as a temporary phase (rotate + revoke early).
-
-## Silent approval (SSH heuristic)
-
-Define it precisely to avoid a weak link. Prefer one:
-
-- **Local‑only**: auto‑pair when client connects via loopback/Unix socket.
-- **Challenge via SSH**: gateway issues nonce; client proves SSH by fetching it.
-- **Physical presence window**: after a local approval on gateway host UI, allow auto‑pair for a short window (e.g. 10 minutes).
-
-Always log + record auto‑approvals.
-
----
-
-# TLS everywhere (dev + prod)
-
-## Reuse existing bridge TLS
-
-Use current TLS runtime + fingerprint pinning:
-
-- `src/infra/bridge/server/tls.ts`
-- fingerprint verification logic in `src/node-host/bridge-client.ts`
-
-## Apply to WS
-
-- WS server supports TLS with same cert/key + fingerprint.
-- WS clients can pin fingerprint (optional).
-- Discovery advertises TLS + fingerprint for all endpoints.
- - Discovery is locator hints only; never a trust anchor.
-
-## Why
-
-- Reduce reliance on SSH/Tailscale for confidentiality.
-- Make remote mobile connections safe by default.
-
----
-
-# Approvals redesign (centralized)
-
-## Current
-
-Approval happens on node host (mac app node runtime). Prompt appears where node runs.
-
-## Proposed
-
-Approval is **gateway‑hosted**, UI delivered to operator clients.
-
-### New flow
-
-1. Gateway receives `system.run` intent (agent).
-2. Gateway creates approval record: `approval.requested`.
-3. Operator UI(s) show prompt.
-4. Approval decision sent to gateway: `approval.resolve`.
-5. Gateway invokes node command if approved.
-6. Node executes, returns `invoke-res`.
-
-### Approval semantics (hardening)
-
-- Broadcast to all operators; only the active UI shows a modal (others get a toast).
-- First resolution wins; gateway rejects subsequent resolves as already settled.
-- Default timeout: deny after N seconds (e.g. 60s), log reason.
-- Resolution requires `operator.approvals` scope.
-
-## Benefits
-
-- Prompt appears where user is (mac/phone).
-- Consistent approvals for remote nodes.
-- Node runtime stays headless; no UI dependency.
-
----
-
-# Role clarity examples
-
-## iPhone app
-
-- **Node role** for: mic, camera, voice chat, location, push‑to‑talk.
-- Optional **operator.read** for status and chat view.
-- Optional **operator.write/admin** only when explicitly enabled.
-
-## macOS app
-
-- Operator role by default (control UI).
-- Node role when “Mac node” enabled (system.run, screen, camera).
-- Same deviceId for both connections → merged UI entry.
-
-## CLI
-
-- Operator role always.
-- Scope derived by subcommand:
- - `status`, `logs` → read
- - `agent`, `message` → write
- - `config`, `channels` → admin
- - approvals + pairing → `operator.approvals` / `operator.pairing`
-
----
-
-# Identity + slugs
-
-## Stable ID
-
-Required for auth; never changes.
-Preferred:
-
-- Keypair fingerprint (public key hash).
-
-## Cute slug (lobster‑themed)
-
-Human label only.
-
-- Example: `scarlet-claw`, `saltwave`, `mantis-pinch`.
-- Stored in gateway registry, editable.
-- Collision handling: `-2`, `-3`.
-
-## UI grouping
-
-Same `deviceId` across roles → single “Instance” row:
-
-- Badge: `operator`, `node`.
-- Shows capabilities + last seen.
-
----
-
-# Migration strategy
-
-## Phase 0: Document + align
-
-- Publish this doc.
-- Inventory all protocol calls + approval flows.
-
-## Phase 1: Add roles/scopes to WS
-
-- Extend `connect` params with `role`, `scope`, `deviceId`.
-- Add allowlist gating for node role.
-
-## Phase 2: Bridge compatibility
-
-- Keep bridge running.
-- Add WS node support in parallel.
-- Gate features behind config flag.
-
-## Phase 3: Central approvals
-
-- Add approval request + resolve events in WS.
-- Update mac app UI to prompt + respond.
-- Node runtime stops prompting UI.
-
-## Phase 4: TLS unification
-
-- Add TLS config for WS using bridge TLS runtime.
-- Add pinning to clients.
-
-## Phase 5: Deprecate bridge
-
-- Migrate iOS/Android/mac node to WS.
-- Keep bridge as fallback; remove once stable.
-
-## Phase 6: Device‑bound auth
-
-- Require key‑based identity for all non‑local connections.
-- Add revocation + rotation UI.
-
----
-
-# Security notes
-
-- Role/allowlist enforced at gateway boundary.
-- No client gets “full” API without operator scope.
-- Pairing required for _all_ connections.
-- TLS + pinning reduces MITM risk for mobile.
-- SSH silent approval is a convenience; still recorded + revocable.
-- Discovery is never a trust anchor.
-- Capability claims are verified against server allowlists by platform/type.
-
-# Streaming + large payloads (node media)
-
-WS control plane is fine for small messages, but nodes also do:
-
-- camera clips
-- screen recordings
-- audio streams
-
-Options:
-
-1. WS binary frames + chunking + backpressure rules.
-2. Separate streaming endpoint (still TLS + auth).
-3. Keep bridge longer for media‑heavy commands, migrate last.
-
-Pick one before implementation to avoid drift.
-
-# Capability + command policy
-
-- Node‑reported caps/commands are treated as **claims**.
-- Gateway enforces per‑platform allowlists.
-- Any new command requires operator approval or explicit allowlist change.
-- Audit changes with timestamps.
-
-# Audit + rate limiting
-
-- Log: pairing requests, approvals/denials, token issuance/rotation/revocation.
-- Rate‑limit pairing spam and approval prompts.
-
-# Protocol hygiene
-
-- Explicit protocol version + error codes.
-- Reconnect rules + heartbeat policy.
-- Presence TTL and last‑seen semantics.
-
----
-
-# Open questions
-
-1. Single device running both roles: token model
- - Recommend separate tokens per role (node vs operator).
- - Same deviceId; different scopes; clearer revocation.
-
-2. Operator scope granularity
- - read/write/admin + approvals + pairing (minimum viable).
- - Consider per‑feature scopes later.
-
-3. Token rotation + revocation UX
- - Auto‑rotate on role change.
- - UI to revoke by deviceId + role.
-
-4. Discovery
- - Extend current Bonjour TXT to include WS TLS fingerprint + role hints.
- - Treat as locator hints only.
-
-5. Cross‑network approval
- - Broadcast to all operator clients; active UI shows modal.
- - First response wins; gateway enforces atomicity.
-
----
-
-# Summary (TL;DR)
-
-- Today: WS control plane + Bridge node transport.
-- Pain: approvals + duplication + two stacks.
-- Proposal: one WS protocol with explicit roles + scopes, unified pairing + TLS pinning, gateway‑hosted approvals, stable device IDs + cute slugs.
-- Outcome: simpler UX, stronger security, less duplication, better mobile routing.
diff --git a/docs/refactor/outbound-session-mirroring.md b/docs/refactor/outbound-session-mirroring.md
deleted file mode 100644
index d30e9683..00000000
--- a/docs/refactor/outbound-session-mirroring.md
+++ /dev/null
@@ -1,85 +0,0 @@
----
-title: Outbound Session Mirroring Refactor (Issue #1520)
-description: Track outbound session mirroring refactor notes, decisions, tests, and open items.
----
-
-# Outbound Session Mirroring Refactor (Issue #1520)
-
-## Status
-
-- In progress.
-- Core + plugin channel routing updated for outbound mirroring.
-- Gateway send now derives target session when sessionKey is omitted.
-
-## Context
-
-Outbound sends were mirrored into the _current_ agent session (tool session key) rather than the target channel session. Inbound routing uses channel/peer session keys, so outbound responses landed in the wrong session and first-contact targets often lacked session entries.
-
-## Goals
-
-- Mirror outbound messages into the target channel session key.
-- Create session entries on outbound when missing.
-- Keep thread/topic scoping aligned with inbound session keys.
-- Cover core channels plus bundled extensions.
-
-## Implementation Summary
-
-- New outbound session routing helper:
- - `src/infra/outbound/outbound-session.ts`
- - `resolveOutboundSessionRoute` builds target sessionKey using `buildAgentSessionKey` (dmScope + identityLinks).
- - `ensureOutboundSessionEntry` writes minimal `MsgContext` via `recordSessionMetaFromInbound`.
-- `runMessageAction` (send) derives target sessionKey and passes it to `executeSendAction` for mirroring.
-- `message-tool` no longer mirrors directly; it only resolves agentId from the current session key.
-- Plugin send path mirrors via `appendAssistantMessageToSessionTranscript` using the derived sessionKey.
-- Gateway send derives a target session key when none is provided (default agent), and ensures a session entry.
-
-## Thread/Topic Handling
-
-- Slack: replyTo/threadId -> `resolveThreadSessionKeys` (suffix).
-- Discord: threadId/replyTo -> `resolveThreadSessionKeys` with `useSuffix=false` to match inbound (thread channel id already scopes session).
-- Telegram: topic IDs map to `chatId:topic:` via `buildTelegramGroupPeerId`.
-
-## Extensions Covered
-
-- Matrix, MS Teams, Mattermost, BlueBubbles, Nextcloud Talk, Zalo, Zalo Personal, Nostr, Tlon.
-- Notes:
- - Mattermost targets now strip `@` for DM session key routing.
- - Zalo Personal uses DM peer kind for 1:1 targets (group only when `group:` is present).
- - BlueBubbles group targets strip `chat_*` prefixes to match inbound session keys.
- - Slack auto-thread mirroring matches channel ids case-insensitively.
- - Gateway send lowercases provided session keys before mirroring.
-
-## Decisions
-
-- **Gateway send session derivation**: if `sessionKey` is provided, use it. If omitted, derive a sessionKey from target + default agent and mirror there.
-- **Session entry creation**: always use `recordSessionMetaFromInbound` with `Provider/From/To/ChatType/AccountId/Originating*` aligned to inbound formats.
-- **Target normalization**: outbound routing uses resolved targets (post `resolveChannelTarget`) when available.
-- **Session key casing**: canonicalize session keys to lowercase on write and during migrations.
-
-## Tests Added/Updated
-
-- `src/infra/outbound/outbound-session.test.ts`
- - Slack thread session key.
- - Telegram topic session key.
- - dmScope identityLinks with Discord.
-- `src/agents/tools/message-tool.test.ts`
- - Derives agentId from session key (no sessionKey passed through).
-- `src/gateway/server-methods/send.test.ts`
- - Derives session key when omitted and creates session entry.
-
-## Open Items / Follow-ups
-
-- Voice-call plugin uses custom `voice:` session keys. Outbound mapping is not standardized here; if message-tool should support voice-call sends, add explicit mapping.
-- Confirm if any external plugin uses non-standard `From/To` formats beyond the bundled set.
-
-## Files Touched
-
-- `src/infra/outbound/outbound-session.ts`
-- `src/infra/outbound/outbound-send-service.ts`
-- `src/infra/outbound/message-action-runner.ts`
-- `src/agents/tools/message-tool.ts`
-- `src/gateway/server-methods/send.ts`
-- Tests in:
- - `src/infra/outbound/outbound-session.test.ts`
- - `src/agents/tools/message-tool.test.ts`
- - `src/gateway/server-methods/send.test.ts`
diff --git a/docs/refactor/plugin-sdk.md b/docs/refactor/plugin-sdk.md
deleted file mode 100644
index 1439bb83..00000000
--- a/docs/refactor/plugin-sdk.md
+++ /dev/null
@@ -1,214 +0,0 @@
----
-summary: "Plan: one clean plugin SDK + runtime for all messaging connectors"
-read_when:
- - Defining or refactoring the plugin architecture
- - Migrating channel connectors to the plugin SDK/runtime
-title: "Plugin SDK Refactor"
----
-
-# Plugin SDK + Runtime Refactor Plan
-
-Goal: every messaging connector is a plugin (bundled or external) using one stable API.
-No plugin imports from `src/**` directly. All dependencies go through the SDK or runtime.
-
-## Why now
-
-- Current connectors mix patterns: direct core imports, dist-only bridges, and custom helpers.
-- This makes upgrades brittle and blocks a clean external plugin surface.
-
-## Target architecture (two layers)
-
-### 1) Plugin SDK (compile-time, stable, publishable)
-
-Scope: types, helpers, and config utilities. No runtime state, no side effects.
-
-Contents (examples):
-
-- Types: `ChannelPlugin`, adapters, `ChannelMeta`, `ChannelCapabilities`, `ChannelDirectoryEntry`.
-- Config helpers: `buildChannelConfigSchema`, `setAccountEnabledInConfigSection`, `deleteAccountFromConfigSection`,
- `applyAccountNameToChannelSection`.
-- Pairing helpers: `PAIRING_APPROVED_MESSAGE`, `formatPairingApproveHint`.
-- Onboarding helpers: `promptChannelAccessConfig`, `addWildcardAllowFrom`, onboarding types.
-- Tool param helpers: `createActionGate`, `readStringParam`, `readNumberParam`, `readReactionParams`, `jsonResult`.
-- Docs link helper: `formatDocsLink`.
-
-Delivery:
-
-- Publish as `mayros/plugin-sdk` (or export from core under `mayros/plugin-sdk`).
-- Semver with explicit stability guarantees.
-
-### 2) Plugin Runtime (execution surface, injected)
-
-Scope: everything that touches core runtime behavior.
-Accessed via `MayrosPluginApi.runtime` so plugins never import `src/**`.
-
-Proposed surface (minimal but complete):
-
-```ts
-export type PluginRuntime = {
- channel: {
- text: {
- chunkMarkdownText(text: string, limit: number): string[];
- resolveTextChunkLimit(cfg: MayrosConfig, channel: string, accountId?: string): number;
- hasControlCommand(text: string, cfg: MayrosConfig): boolean;
- };
- reply: {
- dispatchReplyWithBufferedBlockDispatcher(params: {
- ctx: unknown;
- cfg: unknown;
- dispatcherOptions: {
- deliver: (payload: {
- text?: string;
- mediaUrls?: string[];
- mediaUrl?: string;
- }) => void | Promise;
- onError?: (err: unknown, info: { kind: string }) => void;
- };
- }): Promise;
- createReplyDispatcherWithTyping?: unknown; // adapter for Teams-style flows
- };
- routing: {
- resolveAgentRoute(params: {
- cfg: unknown;
- channel: string;
- accountId: string;
- peer: { kind: RoutePeerKind; id: string };
- }): { sessionKey: string; accountId: string };
- };
- pairing: {
- buildPairingReply(params: { channel: string; idLine: string; code: string }): string;
- readAllowFromStore(channel: string): Promise;
- upsertPairingRequest(params: {
- channel: string;
- id: string;
- meta?: { name?: string };
- }): Promise<{ code: string; created: boolean }>;
- };
- media: {
- fetchRemoteMedia(params: { url: string }): Promise<{ buffer: Buffer; contentType?: string }>;
- saveMediaBuffer(
- buffer: Uint8Array,
- contentType: string | undefined,
- direction: "inbound" | "outbound",
- maxBytes: number,
- ): Promise<{ path: string; contentType?: string }>;
- };
- mentions: {
- buildMentionRegexes(cfg: MayrosConfig, agentId?: string): RegExp[];
- matchesMentionPatterns(text: string, regexes: RegExp[]): boolean;
- };
- groups: {
- resolveGroupPolicy(
- cfg: MayrosConfig,
- channel: string,
- accountId: string,
- groupId: string,
- ): {
- allowlistEnabled: boolean;
- allowed: boolean;
- groupConfig?: unknown;
- defaultConfig?: unknown;
- };
- resolveRequireMention(
- cfg: MayrosConfig,
- channel: string,
- accountId: string,
- groupId: string,
- override?: boolean,
- ): boolean;
- };
- debounce: {
- createInboundDebouncer(opts: {
- debounceMs: number;
- buildKey: (v: T) => string | null;
- shouldDebounce: (v: T) => boolean;
- onFlush: (entries: T[]) => Promise;
- onError?: (err: unknown) => void;
- }): { push: (v: T) => void; flush: () => Promise };
- resolveInboundDebounceMs(cfg: MayrosConfig, channel: string): number;
- };
- commands: {
- resolveCommandAuthorizedFromAuthorizers(params: {
- useAccessGroups: boolean;
- authorizers: Array<{ configured: boolean; allowed: boolean }>;
- }): boolean;
- };
- };
- logging: {
- shouldLogVerbose(): boolean;
- getChildLogger(name: string): PluginLogger;
- };
- state: {
- resolveStateDir(cfg: MayrosConfig): string;
- };
-};
-```
-
-Notes:
-
-- Runtime is the only way to access core behavior.
-- SDK is intentionally small and stable.
-- Each runtime method maps to an existing core implementation (no duplication).
-
-## Migration plan (phased, safe)
-
-### Phase 0: scaffolding
-
-- Introduce `mayros/plugin-sdk`.
-- Add `api.runtime` to `MayrosPluginApi` with the surface above.
-- Maintain existing imports during a transition window (deprecation warnings).
-
-### Phase 1: bridge cleanup (low risk)
-
-- Replace per-extension `core-bridge.ts` with `api.runtime`.
-- Migrate BlueBubbles, Zalo, Zalo Personal first (already close).
-- Remove duplicated bridge code.
-
-### Phase 2: light direct-import plugins
-
-- Migrate Matrix to SDK + runtime.
-- Validate onboarding, directory, group mention logic.
-
-### Phase 3: heavy direct-import plugins
-
-- Migrate MS Teams (largest set of runtime helpers).
-- Ensure reply/typing semantics match current behavior.
-
-### Phase 4: iMessage pluginization
-
-- Move iMessage into `extensions/imessage`.
-- Replace direct core calls with `api.runtime`.
-- Keep config keys, CLI behavior, and docs intact.
-
-### Phase 5: enforcement
-
-- Add lint rule / CI check: no `extensions/**` imports from `src/**`.
-- Add plugin SDK/version compatibility checks (runtime + SDK semver).
-
-## Compatibility and versioning
-
-- SDK: semver, published, documented changes.
-- Runtime: versioned per core release. Add `api.runtime.version`.
-- Plugins declare a required runtime range (e.g., `mayrosRuntime: ">=2026.2.0"`).
-
-## Testing strategy
-
-- Adapter-level unit tests (runtime functions exercised with real core implementation).
-- Golden tests per plugin: ensure no behavior drift (routing, pairing, allowlist, mention gating).
-- A single end-to-end plugin sample used in CI (install + run + smoke).
-
-## Open questions
-
-- Where to host SDK types: separate package or core export?
-- Runtime type distribution: in SDK (types only) or in core?
-- How to expose docs links for bundled vs external plugins?
-- Do we allow limited direct core imports for in-repo plugins during transition?
-
-## Success criteria
-
-- All channel connectors are plugins using SDK + runtime.
-- No `extensions/**` imports from `src/**`.
-- New connector templates depend only on SDK + runtime.
-- External plugins can be developed and updated without core source access.
-
-Related docs: [Plugins](/tools/plugin), [Channels](/channels/index), [Configuration](/gateway/configuration).
diff --git a/docs/refactor/strict-config.md b/docs/refactor/strict-config.md
deleted file mode 100644
index fedffd80..00000000
--- a/docs/refactor/strict-config.md
+++ /dev/null
@@ -1,93 +0,0 @@
----
-summary: "Strict config validation + doctor-only migrations"
-read_when:
- - Designing or implementing config validation behavior
- - Working on config migrations or doctor workflows
- - Handling plugin config schemas or plugin load gating
-title: "Strict Config Validation"
----
-
-# Strict config validation (doctor-only migrations)
-
-## Goals
-
-- **Reject unknown config keys everywhere** (root + nested), except root `$schema` metadata.
-- **Reject plugin config without a schema**; don’t load that plugin.
-- **Remove legacy auto-migration on load**; migrations run via doctor only.
-- **Auto-run doctor (dry-run) on startup**; if invalid, block non-diagnostic commands.
-
-## Non-goals
-
-- Backward compatibility on load (legacy keys do not auto-migrate).
-- Silent drops of unrecognized keys.
-
-## Strict validation rules
-
-- Config must match the schema exactly at every level.
-- Unknown keys are validation errors (no passthrough at root or nested), except root `$schema` when it is a string.
-- `plugins.entries..config` must be validated by the plugin’s schema.
- - If a plugin lacks a schema, **reject plugin load** and surface a clear error.
-- Unknown `channels.` keys are errors unless a plugin manifest declares the channel id.
-- Plugin manifests (`mayros.plugin.json`) are required for all plugins.
-
-## Plugin schema enforcement
-
-- Each plugin provides a strict JSON Schema for its config (inline in the manifest).
-- Plugin load flow:
- 1. Resolve plugin manifest + schema (`mayros.plugin.json`).
- 2. Validate config against the schema.
- 3. If missing schema or invalid config: block plugin load, record error.
-- Error message includes:
- - Plugin id
- - Reason (missing schema / invalid config)
- - Path(s) that failed validation
-- Disabled plugins keep their config, but Doctor + logs surface a warning.
-
-## Doctor flow
-
-- Doctor runs **every time** config is loaded (dry-run by default).
-- If config invalid:
- - Print a summary + actionable errors.
- - Instruct: `mayros doctor --fix`.
-- `mayros doctor --fix`:
- - Applies migrations.
- - Removes unknown keys.
- - Writes updated config.
-
-## Command gating (when config is invalid)
-
-Allowed (diagnostic-only):
-
-- `mayros doctor`
-- `mayros logs`
-- `mayros health`
-- `mayros help`
-- `mayros status`
-- `mayros gateway status`
-
-Everything else must hard-fail with: “Config invalid. Run `mayros doctor --fix`.”
-
-## Error UX format
-
-- Single summary header.
-- Grouped sections:
- - Unknown keys (full paths)
- - Legacy keys / migrations needed
- - Plugin load failures (plugin id + reason + path)
-
-## Implementation touchpoints
-
-- `src/config/zod-schema.ts`: remove root passthrough; strict objects everywhere.
-- `src/config/zod-schema.providers.ts`: ensure strict channel schemas.
-- `src/config/validation.ts`: fail on unknown keys; do not apply legacy migrations.
-- `src/config/io.ts`: remove legacy auto-migrations; always run doctor dry-run.
-- `src/config/legacy*.ts`: move usage to doctor only.
-- `src/plugins/*`: add schema registry + gating.
-- CLI command gating in `src/cli`.
-
-## Tests
-
-- Unknown key rejection (root + nested).
-- Plugin missing schema → plugin load blocked with clear error.
-- Invalid config → gateway startup blocked except diagnostic commands.
-- Doctor dry-run auto; `doctor --fix` writes corrected config.
diff --git a/docs/zh-CN/experiments/onboarding-config-protocol.md b/docs/zh-CN/experiments/onboarding-config-protocol.md
deleted file mode 100644
index 99180187..00000000
--- a/docs/zh-CN/experiments/onboarding-config-protocol.md
+++ /dev/null
@@ -1,47 +0,0 @@
----
-read_when: Changing onboarding wizard steps or config schema endpoints
-summary: 新手引导向导和配置模式的 RPC 协议说明
-title: 新手引导和配置协议
-x-i18n:
- generated_at: "2026-02-03T07:47:10Z"
- model: claude-opus-4-5
- provider: pi
- source_hash: 55163b3ee029c02476800cb616a054e5adfe97dae5bb72f2763dce0079851e06
- source_path: experiments/onboarding-config-protocol.md
- workflow: 15
----
-
-# 新手引导 + 配置协议
-
-目的:CLI、macOS 应用和 Web UI 之间共享的新手引导 + 配置界面。
-
-## 组件
-
-- 向导引擎(共享会话 + 提示 + 新手引导状态)。
-- CLI 新手引导使用与 UI 客户端相同的向导流程。
-- Gateway 网关 RPC 公开向导 + 配置模式端点。
-- macOS 新手引导使用向导步骤模型。
-- Web UI 从 JSON Schema + UI 提示渲染配置表单。
-
-## Gateway 网关 RPC
-
-- `wizard.start` 参数:`{ mode?: "local"|"remote", workspace?: string }`
-- `wizard.next` 参数:`{ sessionId, answer?: { stepId, value? } }`
-- `wizard.cancel` 参数:`{ sessionId }`
-- `wizard.status` 参数:`{ sessionId }`
-- `config.schema` 参数:`{}`
-
-响应(结构)
-
-- 向导:`{ sessionId, done, step?, status?, error? }`
-- 配置模式:`{ schema, uiHints, version, generatedAt }`
-
-## UI 提示
-
-- `uiHints` 按路径键入;可选元数据(label/help/group/order/advanced/sensitive/placeholder)。
-- 敏感字段渲染为密码输入;无脱敏层。
-- 不支持的模式节点回退到原始 JSON 编辑器。
-
-## 注意
-
-- 本文档是跟踪新手引导/配置协议重构的唯一位置。
diff --git a/docs/zh-CN/experiments/plans/cron-add-hardening.md b/docs/zh-CN/experiments/plans/cron-add-hardening.md
deleted file mode 100644
index f24f2e89..00000000
--- a/docs/zh-CN/experiments/plans/cron-add-hardening.md
+++ /dev/null
@@ -1,70 +0,0 @@
----
-last_updated: "2026-01-05"
-owner: mayros
-status: complete
-summary: 加固 cron.add 输入处理,对齐 schema,改进 cron UI/智能体工具
-title: Cron Add 加固
-x-i18n:
- generated_at: "2026-02-03T07:47:26Z"
- model: claude-opus-4-5
- provider: pi
- source_hash: d7e469674bd9435b846757ea0d5dc8f174eaa8533917fc013b1ef4f82859496d
- source_path: experiments/plans/cron-add-hardening.md
- workflow: 15
----
-
-# Cron Add 加固 & Schema 对齐
-
-## 背景
-
-最近的 Gateway 网关日志显示重复的 `cron.add` 失败,参数无效(缺少 `sessionTarget`、`wakeMode`、`payload`,以及格式错误的 `schedule`)。这表明至少有一个客户端(可能是智能体工具调用路径)正在发送包装的或部分指定的任务负载。另外,TypeScript 中的 cron 提供商枚举、Gateway 网关 schema、CLI 标志和 UI 表单类型之间存在漂移,加上 `cron.status` 的 UI 不匹配(期望 `jobCount` 而 Gateway 网关返回 `jobs`)。
-
-## 目标
-
-- 通过规范化常见的包装负载并推断缺失的 `kind` 字段来停止 `cron.add` INVALID_REQUEST 垃圾。
-- 在 Gateway 网关 schema、cron 类型、CLI 文档和 UI 表单之间对齐 cron 提供商列表。
-- 使智能体 cron 工具 schema 明确,以便 LLM 生成正确的任务负载。
-- 修复 Control UI cron 状态任务计数显示。
-- 添加测试以覆盖规范化和工具行为。
-
-## 非目标
-
-- 更改 cron 调度语义或任务执行行为。
-- 添加新的调度类型或 cron 表达式解析。
-- 除了必要的字段修复外,不大改 cron 的 UI/UX。
-
-## 发现(当前差距)
-
-- Gateway 网关中的 `CronPayloadSchema` 排除了 `signal` + `imessage`,而 TS 类型包含它们。
-- Control UI CronStatus 期望 `jobCount`,但 Gateway 网关返回 `jobs`。
-- 智能体 cron 工具 schema 允许任意 `job` 对象,导致格式错误的输入。
-- Gateway 网关严格验证 `cron.add` 而不进行规范化,因此包装的负载会失败。
-
-## 变更内容
-
-- `cron.add` 和 `cron.update` 现在规范化常见的包装形式并推断缺失的 `kind` 字段。
-- 智能体 cron 工具 schema 与 Gateway 网关 schema 匹配,减少无效负载。
-- 提供商枚举在 Gateway 网关、CLI、UI 和 macOS 选择器之间对齐。
-- Control UI 使用 Gateway 网关的 `jobs` 计数字段显示状态。
-
-## 当前行为
-
-- **规范化:**包装的 `data`/`job` 负载被解包;`schedule.kind` 和 `payload.kind` 在安全时被推断。
-- **默认值:**当缺失时,为 `wakeMode` 和 `sessionTarget` 应用安全默认值。
-- **提供商:**Discord/Slack/Signal/iMessage 现在在 CLI/UI 中一致显示。
-
-参见 [Cron 任务](/automation/cron-jobs) 了解规范化的形式和示例。
-
-## 验证
-
-- 观察 Gateway 网关日志中 `cron.add` INVALID_REQUEST 错误是否减少。
-- 确认 Control UI cron 状态在刷新后显示任务计数。
-
-## 可选后续工作
-
-- 手动 Control UI 冒烟测试:为每个提供商添加一个 cron 任务 + 验证状态任务计数。
-
-## 开放问题
-
-- `cron.add` 是否应该接受来自客户端的显式 `state`(当前被 schema 禁止)?
-- 我们是否应该允许 `webchat` 作为显式投递提供商(当前在投递解析中被过滤)?
diff --git a/docs/zh-CN/experiments/plans/group-policy-hardening.md b/docs/zh-CN/experiments/plans/group-policy-hardening.md
deleted file mode 100644
index afbb8b39..00000000
--- a/docs/zh-CN/experiments/plans/group-policy-hardening.md
+++ /dev/null
@@ -1,45 +0,0 @@
----
-read_when:
- - 查看历史 Telegram 允许列表更改
-summary: Telegram 允许列表加固:前缀 + 空白规范化
-title: Telegram 允许列表加固
-x-i18n:
- generated_at: "2026-02-03T07:47:16Z"
- model: claude-opus-4-5
- provider: pi
- source_hash: a2eca5fcc85376948cfe1b6044f1a8bc69c7f0eb94d1ceafedc1e507ba544162
- source_path: experiments/plans/group-policy-hardening.md
- workflow: 15
----
-
-# Telegram 允许列表加固
-
-**日期**:2026-01-05
-**状态**:已完成
-**PR**:#216
-
-## 摘要
-
-Telegram 允许列表现在不区分大小写地接受 `telegram:` 和 `tg:` 前缀,并容忍意外的空白。这使入站允许列表检查与出站发送规范化保持一致。
-
-## 更改内容
-
-- 前缀 `telegram:` 和 `tg:` 被同等对待(不区分大小写)。
-- 允许列表条目会被修剪;空条目会被忽略。
-
-## 示例
-
-以下所有形式都被接受为同一 ID:
-
-- `telegram:123456`
-- `TG:123456`
-- `tg:123456`
-
-## 为什么重要
-
-从日志或聊天 ID 复制/粘贴通常会包含前缀和空白。规范化可避免在决定是否在私信或群组中响应时出现误判。
-
-## 相关文档
-
-- [群聊](/channels/groups)
-- [Telegram 提供商](/channels/telegram)
diff --git a/docs/zh-CN/experiments/plans/openresponses-gateway.md b/docs/zh-CN/experiments/plans/openresponses-gateway.md
deleted file mode 100644
index 1a92d747..00000000
--- a/docs/zh-CN/experiments/plans/openresponses-gateway.md
+++ /dev/null
@@ -1,121 +0,0 @@
----
-last_updated: "2026-01-19"
-owner: mayros
-status: draft
-summary: 计划:添加 OpenResponses /v1/responses 端点并干净地弃用 chat completions
-title: OpenResponses Gateway 网关计划
-x-i18n:
- generated_at: "2026-02-03T07:47:33Z"
- model: claude-opus-4-5
- provider: pi
- source_hash: 71a22c48397507d1648b40766a3153e420c54f2a2d5186d07e51eb3d12e4636a
- source_path: experiments/plans/openresponses-gateway.md
- workflow: 15
----
-
-# OpenResponses Gateway 网关集成计划
-
-## 背景
-
-Mayros Gateway 网关目前在 `/v1/chat/completions` 暴露了一个最小的 OpenAI 兼容 Chat Completions 端点(参见 [OpenAI Chat Completions](/gateway/openai-http-api))。
-
-Open Responses 是基于 OpenAI Responses API 的开放推理标准。它专为智能体工作流设计,使用基于项目的输入加语义流式事件。OpenResponses 规范定义的是 `/v1/responses`,而不是 `/v1/chat/completions`。
-
-## 目标
-
-- 添加一个遵循 OpenResponses 语义的 `/v1/responses` 端点。
-- 保留 Chat Completions 作为兼容层,易于禁用并最终移除。
-- 使用隔离的、可复用的 schema 标准化验证和解析。
-
-## 非目标
-
-- 第一阶段完全实现 OpenResponses 功能(图片、文件、托管工具)。
-- 替换内部智能体执行逻辑或工具编排。
-- 在第一阶段更改现有的 `/v1/chat/completions` 行为。
-
-## 研究摘要
-
-来源:OpenResponses OpenAPI、OpenResponses 规范网站和 Hugging Face 博客文章。
-
-提取的关键点:
-
-- `POST /v1/responses` 接受 `CreateResponseBody` 字段,如 `model`、`input`(字符串或 `ItemParam[]`)、`instructions`、`tools`、`tool_choice`、`stream`、`max_output_tokens` 和 `max_tool_calls`。
-- `ItemParam` 是以下类型的可区分联合:
- - 具有角色 `system`、`developer`、`user`、`assistant` 的 `message` 项
- - `function_call` 和 `function_call_output`
- - `reasoning`
- - `item_reference`
-- 成功响应返回带有 `object: "response"`、`status` 和 `output` 项的 `ResponseResource`。
-- 流式传输使用语义事件,如:
- - `response.created`、`response.in_progress`、`response.completed`、`response.failed`
- - `response.output_item.added`、`response.output_item.done`
- - `response.content_part.added`、`response.content_part.done`
- - `response.output_text.delta`、`response.output_text.done`
-- 规范要求:
- - `Content-Type: text/event-stream`
- - `event:` 必须匹配 JSON `type` 字段
- - 终止事件必须是字面量 `[DONE]`
-- Reasoning 项可能暴露 `content`、`encrypted_content` 和 `summary`。
-- HF 示例在请求中包含 `OpenResponses-Version: latest`(可选头部)。
-
-## 提议的架构
-
-- 添加 `src/gateway/open-responses.schema.ts`,仅包含 Zod schema(无 gateway 导入)。
-- 添加 `src/gateway/openresponses-http.ts`(或 `open-responses-http.ts`)用于 `/v1/responses`。
-- 保持 `src/gateway/openai-http.ts` 不变,作为遗留兼容适配器。
-- 添加配置 `gateway.http.endpoints.responses.enabled`(默认 `false`)。
-- 保持 `gateway.http.endpoints.chatCompletions.enabled` 独立;允许两个端点分别切换。
-- 当 Chat Completions 启用时发出启动警告,以表明其遗留状态。
-
-## Chat Completions 弃用路径
-
-- 保持严格的模块边界:responses 和 chat completions 之间不共享 schema 类型。
-- 通过配置使 Chat Completions 成为可选,这样无需代码更改即可禁用。
-- 一旦 `/v1/responses` 稳定,更新文档将 Chat Completions 标记为遗留。
-- 可选的未来步骤:将 Chat Completions 请求映射到 Responses 处理器,以便更简单地移除。
-
-## 第一阶段支持子集
-
-- 接受 `input` 为字符串或带有消息角色和 `function_call_output` 的 `ItemParam[]`。
-- 将 system 和 developer 消息提取到 `extraSystemPrompt` 中。
-- 使用最近的 `user` 或 `function_call_output` 作为智能体运行的当前消息。
-- 对不支持的内容部分(图片/文件)返回 `invalid_request_error` 拒绝。
-- 返回带有 `output_text` 内容的单个助手消息。
-- 返回带有零值的 `usage`,直到 token 计数接入。
-
-## 验证策略(无 SDK)
-
-- 为以下支持子集实现 Zod schema:
- - `CreateResponseBody`
- - `ItemParam` + 消息内容部分联合
- - `ResponseResource`
- - Gateway 网关使用的流式事件形状
-- 将 schema 保存在单个隔离模块中,以避免漂移并允许未来代码生成。
-
-## 流式实现(第一阶段)
-
-- 带有 `event:` 和 `data:` 的 SSE 行。
-- 所需序列(最小可行):
- - `response.created`
- - `response.output_item.added`
- - `response.content_part.added`
- - `response.output_text.delta`(根据需要重复)
- - `response.output_text.done`
- - `response.content_part.done`
- - `response.completed`
- - `[DONE]`
-
-## 测试和验证计划
-
-- 为 `/v1/responses` 添加端到端覆盖:
- - 需要认证
- - 非流式响应形状
- - 流式事件顺序和 `[DONE]`
- - 使用头部和 `user` 的会话路由
-- 保持 `src/gateway/openai-http.e2e.test.ts` 不变。
-- 手动:用 `stream: true` curl `/v1/responses` 并验证事件顺序和终止 `[DONE]`。
-
-## 文档更新(后续)
-
-- 为 `/v1/responses` 使用和示例添加新文档页面。
-- 更新 `/gateway/openai-http-api`,添加遗留说明和指向 `/v1/responses` 的指针。
diff --git a/docs/zh-CN/experiments/proposals/model-config.md b/docs/zh-CN/experiments/proposals/model-config.md
deleted file mode 100644
index 291e5a19..00000000
--- a/docs/zh-CN/experiments/proposals/model-config.md
+++ /dev/null
@@ -1,42 +0,0 @@
----
-read_when:
- - 探索未来模型选择和认证配置文件的方案
-summary: 探索:模型配置、认证配置文件和回退行为
-title: 模型配置探索
-x-i18n:
- generated_at: "2026-02-01T20:25:05Z"
- model: claude-opus-4-5
- provider: pi
- source_hash: 48623233d80f874c0ae853b51f888599cf8b50ae6fbfe47f6d7b0216bae9500b
- source_path: experiments/proposals/model-config.md
- workflow: 14
----
-
-# 模型配置(探索)
-
-本文档记录了未来模型配置的**构想**。这不是正式的发布规范。如需了解当前行为,请参阅:
-
-- [模型](/concepts/models)
-- [模型故障转移](/concepts/model-failover)
-- [OAuth + 配置文件](/concepts/oauth)
-
-## 动机
-
-运营者希望:
-
-- 每个提供商支持多个认证配置文件(个人 vs 工作)。
-- 简单的 `/model` 选择,并具有可预测的回退行为。
-- 文本模型与图像模型之间有清晰的分离。
-
-## 可能的方向(高层级)
-
-- 保持模型选择简洁:`provider/model` 加可选别名。
-- 允许提供商拥有多个认证配置文件,并指定明确的顺序。
-- 使用全局回退列表,使所有会话以一致的方式进行故障转移。
-- 仅在明确配置时才覆盖图像路由。
-
-## 待解决的问题
-
-- 配置文件轮换应该按提供商还是按模型进行?
-- UI 应如何为会话展示配置文件选择?
-- 从旧版配置键迁移的最安全路径是什么?
diff --git a/docs/zh-CN/experiments/research/memory.md b/docs/zh-CN/experiments/research/memory.md
deleted file mode 100644
index a0c33a95..00000000
--- a/docs/zh-CN/experiments/research/memory.md
+++ /dev/null
@@ -1,235 +0,0 @@
----
-read_when:
- - 设计超越每日 Markdown 日志的工作区记忆(~/.mayros/workspace)
- - Deciding: standalone CLI vs deep Mayros integration
- - 添加离线回忆 + 反思(retain/recall/reflect)
-summary: 研究笔记:Atlas 工作区的离线记忆系统(Markdown 作为数据源 + 派生索引)
-title: 工作区记忆研究
-x-i18n:
- generated_at: "2026-02-03T10:06:14Z"
- model: claude-opus-4-5
- provider: pi
- source_hash: 1753c8ee6284999fab4a94ff5fae7421c85233699c9d3088453d0c2133ac0feb
- source_path: experiments/research/memory.md
- workflow: 15
----
-
-# 工作区记忆 v2(离线):研究笔记
-
-目标:Atlas 风格的工作区(`agents.defaults.workspace`,默认 `~/.mayros/workspace`),其中"记忆"以每天一个 Markdown 文件(`memory/YYYY-MM-DD.md`)加上一小组稳定文件(例如 `memory.md`、`MAYROS.md`)的形式存储。
-
-本文档提出一种**离线优先**的记忆架构,保持 Markdown 作为规范的、可审查的数据源,但通过派生索引添加**结构化回忆**(搜索、实体摘要、置信度更新)。
-
-## 为什么要改变?
-
-当前设置(每天一个文件)非常适合:
-
-- "仅追加"式日志记录
-- 人工编辑
-- git 支持的持久性 + 可审计性
-- 低摩擦捕获("直接写下来")
-
-但它在以下方面较弱:
-
-- 高召回率检索("我们对 X 做了什么决定?"、"上次我们尝试 Y 时?")
-- 以实体为中心的答案("告诉我关于 Alice / The Castle / warelay 的信息")而无需重读多个文件
-- 观点/偏好稳定性(以及变化时的证据)
-- 时间约束("2025 年 11 月期间什么是真实的?")和冲突解决
-
-## 设计目标
-
-- **离线**:无需网络即可工作;可在笔记本电脑/Castle 上运行;无云依赖。
-- **可解释**:检索的项目应该可归因(文件 + 位置)并与推理分离。
-- **低仪式感**:每日日志保持 Markdown,无需繁重的 schema 工作。
-- **增量式**:v1 仅使用 FTS 就很有用;语义/向量和图是可选升级。
-- **对智能体友好**:使"在 token 预算内回忆"变得简单(返回小型事实包)。
-
-## 北极星模型(Hindsight × Letta)
-
-需要融合两个部分:
-
-1. **Letta/MemGPT 风格的控制循环**
-
-- 保持一个小的"核心"始终在上下文中(角色 + 关键用户事实)
-- 其他所有内容都在上下文之外,通过工具检索
-- 记忆写入是显式的工具调用(append/replace/insert),持久化后在下一轮重新注入
-
-2. **Hindsight 风格的记忆基底**
-
-- 分离观察到的、相信的和总结的内容
-- 支持 retain/recall/reflect
-- 带有置信度的观点可以随证据演变
-- 实体感知检索 + 时间查询(即使没有完整的知识图谱)
-
-## 提议的架构(Markdown 数据源 + 派生索引)
-
-### 规范存储(git 友好)
-
-保持 `~/.mayros/workspace` 作为规范的人类可读记忆。
-
-建议的工作区布局:
-
-```
-~/.mayros/workspace/
- memory.md # 小型:持久事实 + 偏好(类似核心)
- memory/
- YYYY-MM-DD.md # 每日日志(追加;叙事)
- bank/ # "类型化"记忆页面(稳定、可审查)
- world.md # 关于世界的客观事实
- experience.md # 智能体做了什么(第一人称)
- opinions.md # 主观偏好/判断 + 置信度 + 证据指针
- entities/
- Peter.md
- The-Castle.md
- warelay.md
- ...
-```
-
-注意:
-
-- **每日日志保持为每日日志**。无需将其转换为 JSON。
-- `bank/` 文件是**经过整理的**,由反思任务生成,仍可手动编辑。
-- `memory.md` 保持"小型 + 类似核心":你希望 Atlas 每次会话都能看到的内容。
-
-### 派生存储(机器回忆)
-
-在工作区下添加派生索引(不一定需要 git 跟踪):
-
-```
-~/.mayros/workspace/.memory/index.sqlite
-```
-
-后端支持:
-
-- 用于事实 + 实体链接 + 观点元数据的 SQLite schema
-- SQLite **FTS5** 用于词法回忆(快速、小巧、离线)
-- 可选的嵌入表用于语义回忆(仍然离线)
-
-索引始终**可从 Markdown 重建**。
-
-## Retain / Recall / Reflect(操作循环)
-
-### Retain:将每日日志规范化为"事实"
-
-Hindsight 在这里重要的关键洞察:存储**叙事性、自包含的事实**,而不是微小的片段。
-
-`memory/YYYY-MM-DD.md` 的实用规则:
-
-- 在一天结束时(或期间),添加一个 `## Retain` 部分,包含 2-5 个要点:
- - 叙事性(保留跨轮上下文)
- - 自包含(独立时也有意义)
- - 标记类型 + 实体提及
-
-示例:
-
-```
-## Retain
-- W @Peter: Currently in Marrakech (Nov 27–Dec 1, 2025) for Andy's birthday.
-- B @warelay: I fixed the Baileys WS crash by wrapping connection.update handlers in try/catch (see memory/2025-11-27.md).
-- O(c=0.95) @Peter: Prefers concise replies (<1500 chars) on WhatsApp; long content goes into files.
-```
-
-最小化解析:
-
-- 类型前缀:`W`(世界)、`B`(经历/传记)、`O`(观点)、`S`(观察/摘要;通常是生成的)
-- 实体:`@Peter`、`@warelay` 等(slug 映射到 `bank/entities/*.md`)
-- 观点置信度:`O(c=0.0..1.0)` 可选
-
-如果你不想让作者考虑这些:反思任务可以从日志的其余部分推断这些要点,但有一个显式的 `## Retain` 部分是最简单的"质量杠杆"。
-
-### Recall:对派生索引的查询
-
-Recall 应支持:
-
-- **词法**:"查找精确的术语/名称/命令"(FTS5)
-- **实体**:"告诉我关于 X 的信息"(实体页面 + 实体链接的事实)
-- **时间**:"11 月 27 日前后发生了什么"/"自上周以来"
-- **观点**:"Peter 偏好什么?"(带置信度 + 证据)
-
-返回格式应对智能体友好并引用来源:
-
-- `kind`(`world|experience|opinion|observation`)
-- `timestamp`(来源日期,或如果存在则提取的时间范围)
-- `entities`(`["Peter","warelay"]`)
-- `content`(叙事性事实)
-- `source`(`memory/2025-11-27.md#L12` 等)
-
-### Reflect:生成稳定页面 + 更新信念
-
-反思是一个定时任务(每日或心跳 `ultrathink`),它:
-
-- 根据最近的事实更新 `bank/entities/*.md`(实体摘要)
-- 根据强化/矛盾更新 `bank/opinions.md` 置信度
-- 可选地提议对 `memory.md`("类似核心"的持久事实)的编辑
-
-观点演变(简单、可解释):
-
-- 每个观点有:
- - 陈述
- - 置信度 `c ∈ [0,1]`
- - last_updated
- - 证据链接(支持 + 矛盾的事实 ID)
-- 当新事实到达时:
- - 通过实体重叠 + 相似性找到候选观点(先 FTS,后嵌入)
- - 通过小幅增量更新置信度;大幅跳跃需要强矛盾 + 重复证据
-
-## CLI 集成:独立 vs 深度集成
-
-建议:**深度集成到 Mayros**,但保持可分离的核心库。
-
-### 为什么要集成到 Mayros?
-
-- Mayros 已经知道:
- - 工作区路径(`agents.defaults.workspace`)
- - 会话模型 + 心跳
- - 日志记录 + 故障排除模式
-- 你希望智能体自己调用工具:
- - `mayros memory recall "…" --k 25 --since 30d`
- - `mayros memory reflect --since 7d`
-
-### 为什么仍要分离库?
-
-- 保持记忆逻辑可测试,无需 Gateway 网关/运行时
-- 可从其他上下文重用(本地脚本、未来的桌面应用等)
-
-形态:
-记忆工具预计是一个小型 CLI + 库层,但这仅是探索性的。
-
-## "S-Collide" / SuCo:何时使用(研究)
-
-如果"S-Collide"指的是 **SuCo(Subspace Collision)**:这是一种 ANN 检索方法,通过在子空间中使用学习/结构化碰撞来实现强召回/延迟权衡(论文:arXiv 2411.14754,2024)。
-
-对于 `~/.mayros/workspace` 的务实观点:
-
-- **不要从** SuCo 开始。
-- 从 SQLite FTS +(可选的)简单嵌入开始;你会立即获得大部分 UX 收益。
-- 仅在以下情况下考虑 SuCo/HNSW/ScaNN 级别的解决方案:
- - 语料库很大(数万/数十万个块)
- - 暴力嵌入搜索变得太慢
- - 召回质量明显受到词法搜索的瓶颈限制
-
-离线友好的替代方案(按复杂性递增):
-
-- SQLite FTS5 + 元数据过滤(零 ML)
-- 嵌入 + 暴力搜索(如果块数量低,效果出奇地好)
-- HNSW 索引(常见、稳健;需要库绑定)
-- SuCo(研究级;如果有可嵌入的可靠实现则很有吸引力)
-
-开放问题:
-
-- 对于你的机器(笔记本 + 台式机)上的"个人助理记忆",**最佳**的离线嵌入模型是什么?
- - 如果你已经有 Ollama:使用本地模型嵌入;否则在工具链中附带一个小型嵌入模型。
-
-## 最小可用试点
-
-如果你想要一个最小但仍有用的版本:
-
-- 添加 `bank/` 实体页面和每日日志中的 `## Retain` 部分。
-- 使用 SQLite FTS 进行带引用的回忆(路径 + 行号)。
-- 仅在召回质量或规模需要时添加嵌入。
-
-## 参考资料
-
-- Letta / MemGPT 概念:"核心记忆块" + "档案记忆" + 工具驱动的自编辑记忆。
-- Hindsight 技术报告:"retain / recall / reflect",四网络记忆,叙事性事实提取,观点置信度演变。
-- SuCo:arXiv 2411.14754(2024):"Subspace Collision"近似最近邻检索。
diff --git a/docs/zh-CN/refactor/exec-host.md b/docs/zh-CN/refactor/exec-host.md
deleted file mode 100644
index f933bb0d..00000000
--- a/docs/zh-CN/refactor/exec-host.md
+++ /dev/null
@@ -1,323 +0,0 @@
----
-read_when:
- - 设计 exec 主机路由或 exec 批准
- - 实现节点运行器 + UI IPC
- - 添加 exec 主机安全模式和斜杠命令
-summary: 重构计划:exec 主机路由、节点批准和无头运行器
-title: Exec 主机重构
-x-i18n:
- generated_at: "2026-02-03T07:54:43Z"
- model: claude-opus-4-5
- provider: pi
- source_hash: 53a9059cbeb1f3f1dbb48c2b5345f88ca92372654fef26f8481e651609e45e3a
- source_path: refactor/exec-host.md
- workflow: 15
----
-
-# Exec 主机重构计划
-
-## 目标
-
-- 添加 `exec.host` + `exec.security` 以在**沙箱**、**Gateway 网关**和**节点**之间路由执行。
-- 保持默认**安全**:除非明确启用,否则不进行跨主机执行。
-- 将执行拆分为**无头运行器服务**,通过本地 IPC 连接可选的 UI(macOS 应用)。
-- 提供**每智能体**策略、允许列表、询问模式和节点绑定。
-- 支持*与*或*不与*允许列表一起使用的**询问模式**。
-- 跨平台:Unix socket + token 认证(macOS/Linux/Windows 一致性)。
-
-## 非目标
-
-- 无遗留允许列表迁移或遗留 schema 支持。
-- 节点 exec 无 PTY/流式传输(仅聚合输出)。
-- 除现有 Bridge + Gateway 网关外无新网络层。
-
-## 决定(已锁定)
-
-- **配置键:** `exec.host` + `exec.security`(允许每智能体覆盖)。
-- **提升:** 保留 `/elevated` 作为 Gateway 网关完全访问的别名。
-- **询问默认:** `on-miss`。
-- **批准存储:** `~/.mayros/exec-approvals.json`(JSON,无遗留迁移)。
-- **运行器:** 无头系统服务;UI 应用托管 Unix socket 用于批准。
-- **节点身份:** 使用现有 `nodeId`。
-- **Socket 认证:** Unix socket + token(跨平台);如需要稍后拆分。
-- **节点主机状态:** `~/.mayros/node.json`(节点 id + 配对 token)。
-- **macOS exec 主机:** 在 macOS 应用内运行 `system.run`;节点主机服务通过本地 IPC 转发请求。
-- **无 XPC helper:** 坚持使用 Unix socket + token + 对等检查。
-
-## 关键概念
-
-### 主机
-
-- `sandbox`:Docker exec(当前行为)。
-- `gateway`:在 Gateway 网关主机上执行。
-- `node`:通过 Bridge 在节点运行器上执行(`system.run`)。
-
-### 安全模式
-
-- `deny`:始终阻止。
-- `allowlist`:仅允许匹配项。
-- `full`:允许一切(等同于提升模式)。
-
-### 询问模式
-
-- `off`:从不询问。
-- `on-miss`:仅在允许列表不匹配时询问。
-- `always`:每次都询问。
-
-询问**独立于**允许列表;允许列表可与 `always` 或 `on-miss` 一起使用。
-
-### 策略解析(每次执行)
-
-1. 解析 `exec.host`(工具参数 → 智能体覆盖 → 全局默认)。
-2. 解析 `exec.security` 和 `exec.ask`(相同优先级)。
-3. 如果主机是 `sandbox`,继续本地沙箱执行。
-4. 如果主机是 `gateway` 或 `node`,在该主机上应用安全 + 询问策略。
-
-## 默认安全
-
-- 默认 `exec.host = sandbox`。
-- `gateway` 和 `node` 默认 `exec.security = deny`。
-- 默认 `exec.ask = on-miss`(仅在安全允许时相关)。
-- 如果未设置节点绑定,**智能体可以定向任何节点**,但仅在策略允许时。
-
-## 配置表面
-
-### 工具参数
-
-- `exec.host`(可选):`sandbox | gateway | node`。
-- `exec.security`(可选):`deny | allowlist | full`。
-- `exec.ask`(可选):`off | on-miss | always`。
-- `exec.node`(可选):当 `host=node` 时使用的节点 id/名称。
-
-### 配置键(全局)
-
-- `tools.exec.host`
-- `tools.exec.security`
-- `tools.exec.ask`
-- `tools.exec.node`(默认节点绑定)
-
-### 配置键(每智能体)
-
-- `agents.list[].tools.exec.host`
-- `agents.list[].tools.exec.security`
-- `agents.list[].tools.exec.ask`
-- `agents.list[].tools.exec.node`
-
-### 别名
-
-- `/elevated on` = 为智能体会话设置 `tools.exec.host=gateway`、`tools.exec.security=full`。
-- `/elevated off` = 为智能体会话恢复之前的 exec 设置。
-
-## 批准存储(JSON)
-
-路径:`~/.mayros/exec-approvals.json`
-
-用途:
-
-- **执行主机**(Gateway 网关或节点运行器)的本地策略 + 允许列表。
-- 无 UI 可用时的询问回退。
-- UI 客户端的 IPC 凭证。
-
-建议的 schema(v1):
-
-```json
-{
- "version": 1,
- "socket": {
- "path": "~/.mayros/exec-approvals.sock",
- "token": "base64-opaque-token"
- },
- "defaults": {
- "security": "deny",
- "ask": "on-miss",
- "askFallback": "deny"
- },
- "agents": {
- "agent-id-1": {
- "security": "allowlist",
- "ask": "on-miss",
- "allowlist": [
- {
- "pattern": "~/Projects/**/bin/rg",
- "lastUsedAt": 0,
- "lastUsedCommand": "rg -n TODO",
- "lastResolvedPath": "/Users/user/Projects/.../bin/rg"
- }
- ]
- }
- }
-}
-```
-
-注意事项:
-
-- 无遗留允许列表格式。
-- `askFallback` 仅在需要 `ask` 且无法访问 UI 时应用。
-- 文件权限:`0600`。
-
-## 运行器服务(无头)
-
-### 角色
-
-- 在本地强制执行 `exec.security` + `exec.ask`。
-- 执行系统命令并返回输出。
-- 为 exec 生命周期发出 Bridge 事件(可选但推荐)。
-
-### 服务生命周期
-
-- macOS 上的 Launchd/daemon;Linux/Windows 上的系统服务。
-- 批准 JSON 是执行主机本地的。
-- UI 托管本地 Unix socket;运行器按需连接。
-
-## UI 集成(macOS 应用)
-
-### IPC
-
-- Unix socket 位于 `~/.mayros/exec-approvals.sock`(0600)。
-- Token 存储在 `exec-approvals.json`(0600)中。
-- 对等检查:仅同 UID。
-- 挑战/响应:nonce + HMAC(token, request-hash) 防止重放。
-- 短 TTL(例如 10s)+ 最大负载 + 速率限制。
-
-### 询问流程(macOS 应用 exec 主机)
-
-1. 节点服务从 Gateway 网关接收 `system.run`。
-2. 节点服务连接到本地 socket 并发送提示/exec 请求。
-3. 应用验证对等 + token + HMAC + TTL,然后在需要时显示对话框。
-4. 应用在 UI 上下文中执行命令并返回输出。
-5. 节点服务将输出返回给 Gateway 网关。
-
-如果 UI 缺失:
-
-- 应用 `askFallback`(`deny|allowlist|full`)。
-
-### 图示(SCI)
-
-```
-Agent -> Gateway -> Bridge -> Node Service (TS)
- | IPC (UDS + token + HMAC + TTL)
- v
- Mac App (UI + TCC + system.run)
-```
-
-## 节点身份 + 绑定
-
-- 使用 Bridge 配对中的现有 `nodeId`。
-- 绑定模型:
- - `tools.exec.node` 将智能体限制为特定节点。
- - 如果未设置,智能体可以选择任何节点(策略仍强制执行默认值)。
-- 节点选择解析:
- - `nodeId` 精确匹配
- - `displayName`(规范化)
- - `remoteIp`
- - `nodeId` 前缀(>= 6 字符)
-
-## 事件
-
-### 谁看到事件
-
-- 系统事件是**每会话**的,在下一个提示时显示给智能体。
-- 存储在 Gateway 网关内存队列中(`enqueueSystemEvent`)。
-
-### 事件文本
-
-- `Exec started (node=, id=)`
-- `Exec finished (node=, id=, code=)` + 可选输出尾部
-- `Exec denied (node=, id=, )`
-
-### 传输
-
-选项 A(推荐):
-
-- 运行器发送 Bridge `event` 帧 `exec.started` / `exec.finished`。
-- Gateway 网关 `handleBridgeEvent` 将这些映射到 `enqueueSystemEvent`。
-
-选项 B:
-
-- Gateway 网关 `exec` 工具直接处理生命周期(仅同步)。
-
-## Exec 流程
-
-### 沙箱主机
-
-- 现有 `exec` 行为(Docker 或无沙箱时的主机)。
-- 仅在非沙箱模式下支持 PTY。
-
-### Gateway 网关主机
-
-- Gateway 网关进程在其自己的机器上执行。
-- 强制执行本地 `exec-approvals.json`(安全/询问/允许列表)。
-
-### 节点主机
-
-- Gateway 网关调用 `node.invoke` 配合 `system.run`。
-- 运行器强制执行本地批准。
-- 运行器返回聚合的 stdout/stderr。
-- 可选的 Bridge 事件用于开始/完成/拒绝。
-
-## 输出上限
-
-- 组合 stdout+stderr 上限为 **200k**;为事件保留**尾部 20k**。
-- 使用清晰的后缀截断(例如 `"… (truncated)"`)。
-
-## 斜杠命令
-
-- `/exec host= security= ask= node=`
-- 每智能体、每会话覆盖;除非通过配置保存,否则非持久。
-- `/elevated on|off|ask|full` 仍然是 `host=gateway security=full` 的快捷方式(`full` 跳过批准)。
-
-## 跨平台方案
-
-- 运行器服务是可移植的执行目标。
-- UI 是可选的;如果缺失,应用 `askFallback`。
-- Windows/Linux 支持相同的批准 JSON + socket 协议。
-
-## 实现阶段
-
-### 阶段 1:配置 + exec 路由
-
-- 为 `exec.host`、`exec.security`、`exec.ask`、`exec.node` 添加配置 schema。
-- 更新工具管道以遵守 `exec.host`。
-- 添加 `/exec` 斜杠命令并保留 `/elevated` 别名。
-
-### 阶段 2:批准存储 + Gateway 网关强制执行
-
-- 实现 `exec-approvals.json` 读取器/写入器。
-- 为 `gateway` 主机强制执行允许列表 + 询问模式。
-- 添加输出上限。
-
-### 阶段 3:节点运行器强制执行
-
-- 更新节点运行器以强制执行允许列表 + 询问。
-- 添加 Unix socket 提示桥接到 macOS 应用 UI。
-- 连接 `askFallback`。
-
-### 阶段 4:事件
-
-- 为 exec 生命周期添加节点 → Gateway 网关 Bridge 事件。
-- 映射到 `enqueueSystemEvent` 用于智能体提示。
-
-### 阶段 5:UI 完善
-
-- Mac 应用:允许列表编辑器、每智能体切换器、询问策略 UI。
-- 节点绑定控制(可选)。
-
-## 测试计划
-
-- 单元测试:允许列表匹配(glob + 不区分大小写)。
-- 单元测试:策略解析优先级(工具参数 → 智能体覆盖 → 全局)。
-- 集成测试:节点运行器拒绝/允许/询问流程。
-- Bridge 事件测试:节点事件 → 系统事件路由。
-
-## 开放风险
-
-- UI 不可用:确保遵守 `askFallback`。
-- 长时间运行的命令:依赖超时 + 输出上限。
-- 多节点歧义:除非有节点绑定或显式节点参数,否则报错。
-
-## 相关文档
-
-- [Exec 工具](/tools/exec)
-- [执行批准](/tools/exec-approvals)
-- [节点](/nodes)
-- [提升模式](/tools/elevated)
diff --git a/docs/zh-CN/refactor/meshnet.md b/docs/zh-CN/refactor/meshnet.md
deleted file mode 100644
index 3987f669..00000000
--- a/docs/zh-CN/refactor/meshnet.md
+++ /dev/null
@@ -1,424 +0,0 @@
----
-read_when:
- - 规划节点 + 操作者客户端的统一网络协议
- - 重新设计跨设备的审批、配对、TLS 和在线状态
-summary: MeshNet 重构:统一网络协议、角色、认证、审批、身份
-title: MeshNet 重构
-x-i18n:
- generated_at: "2026-02-03T07:55:03Z"
- model: claude-opus-4-5
- provider: pi
- source_hash: 719b219c3b326479658fe6101c80d5273fc56eb3baf50be8535e0d1d2bb7987f
- source_path: refactor/meshnet.md
- workflow: 15
----
-
-# MeshNet 重构(协议 + 认证统一)
-
-## 嗨
-
-嗨 Peter — 方向很好;这将解锁更简单的用户体验 + 更强的安全性。
-
-## 目的
-
-单一、严谨的文档用于:
-
-- 当前状态:协议、流程、信任边界。
-- 痛点:审批、多跳路由、UI 重复。
-- 提议的新状态:一个协议、作用域角色、统一的认证/配对、TLS 固定。
-- 身份模型:稳定 ID + 可爱的别名。
-- 迁移计划、风险、开放问题。
-
-## 目标(来自讨论)
-
-- 所有客户端使用一个协议(mac 应用、CLI、iOS、Android、无头节点)。
-- 每个网络参与者都经过认证 + 配对。
-- 角色清晰:节点 vs 操作者。
-- 中央审批路由到用户所在位置。
-- 所有远程流量使用 TLS 加密 + 可选固定。
-- 最小化代码重复。
-- 单台机器应该只显示一次(无 UI/节点重复条目)。
-
-## 非目标(明确)
-
-- 移除能力分离(仍需要最小权限)。
-- 不经作用域检查就暴露完整的 Gateway 网关控制平面。
-- 使认证依赖于人类标签(别名仍然是非安全性的)。
-
----
-
-# 当前状态(现状)
-
-## 两个协议
-
-### 1) Gateway 网关 WebSocket(控制平面)
-
-- 完整 API 表面:配置、渠道、模型、会话、智能体运行、日志、节点等。
-- 默认绑定:loopback。通过 SSH/Tailscale 远程访问。
-- 认证:通过 `connect` 的令牌/密码。
-- 无 TLS 固定(依赖 loopback/隧道)。
-- 代码:
- - `src/gateway/server/ws-connection/message-handler.ts`
- - `src/gateway/client.ts`
- - `docs/gateway/protocol.md`
-
-### 2) Bridge(节点传输)
-
-- 窄允许列表表面,节点身份 + 配对。
-- TCP 上的 JSONL;可选 TLS + 证书指纹固定。
-- TLS 在设备发现 TXT 中公布指纹。
-- 代码:
- - `src/infra/bridge/server/connection.ts`
- - `src/gateway/server-bridge.ts`
- - `src/node-host/bridge-client.ts`
- - `docs/gateway/bridge-protocol.md`
-
-## 当前的控制平面客户端
-
-- CLI → 通过 `callGateway`(`src/gateway/call.ts`)连接 Gateway 网关 WS。
-- macOS 应用 UI → Gateway 网关 WS(`GatewayConnection`)。
-- Web 控制 UI → Gateway 网关 WS。
-- ACP → Gateway 网关 WS。
-- 浏览器控制使用自己的 HTTP 控制服务器。
-
-## 当前的节点
-
-- macOS 应用在节点模式下连接到 Gateway 网关 bridge(`MacNodeBridgeSession`)。
-- iOS/Android 应用连接到 Gateway 网关 bridge。
-- 配对 + 每节点令牌存储在 Gateway 网关上。
-
-## 当前审批流程(exec)
-
-- 智能体通过 Gateway 网关使用 `system.run`。
-- Gateway 网关通过 bridge 调用节点。
-- 节点运行时决定审批。
-- UI 提示由 mac 应用显示(当节点 == mac 应用时)。
-- 节点向 Gateway 网关返回 `invoke-res`。
-- 多跳,UI 绑定到节点主机。
-
-## 当前的在线状态 + 身份
-
-- 来自 WS 客户端的 Gateway 网关在线状态条目。
-- 来自 bridge 的节点在线状态条目。
-- mac 应用可能为同一台机器显示两个条目(UI + 节点)。
-- 节点身份存储在配对存储中;UI 身份是分开的。
-
----
-
-# 问题/痛点
-
-- 需要维护两个协议栈(WS + Bridge)。
-- 远程节点上的审批:提示出现在节点主机上,而不是用户所在位置。
-- TLS 固定仅存在于 bridge;WS 依赖 SSH/Tailscale。
-- 身份重复:同一台机器显示为多个实例。
-- 角色模糊:UI + 节点 + CLI 能力没有明确分离。
-
----
-
-# 提议的新状态(MeshNet)
-
-## 一个协议,两个角色
-
-带有角色 + 作用域的单一 WS 协议。
-
-- **角色:node**(能力宿主)
-- **角色:operator**(控制平面)
-- 操作者的可选**作用域**:
- - `operator.read`(状态 + 查看)
- - `operator.write`(智能体运行、发送)
- - `operator.admin`(配置、渠道、模型)
-
-### 角色行为
-
-**Node**
-
-- 可以注册能力(`caps`、`commands`、permissions)。
-- 可以接收 `invoke` 命令(`system.run`、`camera.*`、`canvas.*`、`screen.record` 等)。
-- 可以发送事件:`voice.transcript`、`agent.request`、`chat.subscribe`。
-- 不能调用配置/模型/渠道/会话/智能体控制平面 API。
-
-**Operator**
-
-- 完整控制平面 API,受作用域限制。
-- 接收所有审批。
-- 不直接执行 OS 操作;路由到节点。
-
-### 关键规则
-
-角色是按连接的,不是按设备。一个设备可以分别打开两个角色。
-
----
-
-# 统一认证 + 配对
-
-## 客户端身份
-
-每个客户端提供:
-
-- `deviceId`(稳定的,从设备密钥派生)。
-- `displayName`(人类名称)。
-- `role` + `scope` + `caps` + `commands`。
-
-## 配对流程(统一)
-
-- 客户端未认证连接。
-- Gateway 网关为该 `deviceId` 创建**配对请求**。
-- 操作者收到提示;批准/拒绝。
-- Gateway 网关颁发绑定到以下内容的凭证:
- - 设备公钥
- - 角色
- - 作用域
- - 能力/命令
-- 客户端持久化令牌,重新认证连接。
-
-## 设备绑定认证(避免 bearer 令牌重放)
-
-首选:设备密钥对。
-
-- 设备一次性生成密钥对。
-- `deviceId = fingerprint(publicKey)`。
-- Gateway 网关发送 nonce;设备签名;Gateway 网关验证。
-- 令牌颁发给公钥(所有权证明),而不是字符串。
-
-替代方案:
-
-- mTLS(客户端证书):最强,运维复杂度更高。
-- 短期 bearer 令牌仅作为临时阶段(早期轮换 + 撤销)。
-
-## 静默批准(SSH 启发式)
-
-精确定义以避免薄弱环节。优选其一:
-
-- **仅限本地**:当客户端通过 loopback/Unix socket 连接时自动配对。
-- **通过 SSH 质询**:Gateway 网关颁发 nonce;客户端通过获取它来证明 SSH。
-- **物理存在窗口**:在 Gateway 网关主机 UI 上本地批准后,允许在短窗口内(例如 10 分钟)自动配对。
-
-始终记录 + 记录自动批准。
-
----
-
-# TLS 无处不在(开发 + 生产)
-
-## 复用现有 bridge TLS
-
-使用当前 TLS 运行时 + 指纹固定:
-
-- `src/infra/bridge/server/tls.ts`
-- `src/node-host/bridge-client.ts` 中的指纹验证逻辑
-
-## 应用于 WS
-
-- WS 服务器使用相同的证书/密钥 + 指纹支持 TLS。
-- WS 客户端可以固定指纹(可选)。
-- 设备发现为所有端点公布 TLS + 指纹。
- - 设备发现仅是定位器提示;永远不是信任锚。
-
-## 为什么
-
-- 减少对 SSH/Tailscale 的机密性依赖。
-- 默认情况下使远程移动连接安全。
-
----
-
-# 审批重新设计(集中化)
-
-## 当前
-
-审批发生在节点主机上(mac 应用节点运行时)。提示出现在节点运行的地方。
-
-## 提议
-
-审批是 **Gateway 网关托管的**,UI 传递给操作者客户端。
-
-### 新流程
-
-1. Gateway 网关接收 `system.run` 意图(智能体)。
-2. Gateway 网关创建审批记录:`approval.requested`。
-3. 操作者 UI 显示提示。
-4. 审批决定发送到 Gateway 网关:`approval.resolve`。
-5. 如果批准,Gateway 网关调用节点命令。
-6. 节点执行,返回 `invoke-res`。
-
-### 审批语义(加固)
-
-- 广播到所有操作者;只有活跃的 UI 显示模态框(其他显示 toast)。
-- 先解决者获胜;Gateway 网关拒绝后续解决为已结算。
-- 默认超时:N 秒后拒绝(例如 60 秒),记录原因。
-- 解决需要 `operator.approvals` 作用域。
-
-## 好处
-
-- 提示出现在用户所在位置(mac/手机)。
-- 远程节点的一致审批。
-- 节点运行时保持无头;无 UI 依赖。
-
----
-
-# 角色清晰示例
-
-## iPhone 应用
-
-- **Node 角色**用于:麦克风、相机、语音聊天、位置、一键通话。
-- 可选的 **operator.read** 用于状态和聊天视图。
-- 可选的 **operator.write/admin** 仅在明确启用时。
-
-## macOS 应用
-
-- 默认是 Operator 角色(控制 UI)。
-- 启用"Mac 节点"时是 Node 角色(system.run、屏幕、相机)。
-- 两个连接使用相同的 deviceId → 合并的 UI 条目。
-
-## CLI
-
-- 始终是 Operator 角色。
-- 作用域按子命令派生:
- - `status`、`logs` → read
- - `agent`、`message` → write
- - `config`、`channels` → admin
- - 审批 + 配对 → `operator.approvals` / `operator.pairing`
-
----
-
-# 身份 + 别名
-
-## 稳定 ID
-
-认证必需;永不改变。
-首选:
-
-- 密钥对指纹(公钥哈希)。
-
-## 可爱别名(龙虾主题)
-
-仅人类标签。
-
-- 示例:`scarlet-claw`、`saltwave`、`mantis-pinch`。
-- 存储在 Gateway 网关注册表中,可编辑。
-- 冲突处理:`-2`、`-3`。
-
-## UI 分组
-
-跨角色的相同 `deviceId` → 单个"实例"行:
-
-- 徽章:`operator`、`node`。
-- 显示能力 + 最后在线。
-
----
-
-# 迁移策略
-
-## 阶段 0:记录 + 对齐
-
-- 发布此文档。
-- 盘点所有协议调用 + 审批流程。
-
-## 阶段 1:向 WS 添加角色/作用域
-
-- 用 `role`、`scope`、`deviceId` 扩展 `connect` 参数。
-- 为 node 角色添加允许列表限制。
-
-## 阶段 2:Bridge 兼容性
-
-- 保持 bridge 运行。
-- 并行添加 WS node 支持。
-- 通过配置标志限制功能。
-
-## 阶段 3:中央审批
-
-- 在 WS 中添加审批请求 + 解决事件。
-- 更新 mac 应用 UI 以提示 + 响应。
-- 节点运行时停止提示 UI。
-
-## 阶段 4:TLS 统一
-
-- 使用 bridge TLS 运行时为 WS 添加 TLS 配置。
-- 向客户端添加固定。
-
-## 阶段 5:弃用 bridge
-
-- 将 iOS/Android/mac 节点迁移到 WS。
-- 保持 bridge 作为后备;稳定后移除。
-
-## 阶段 6:设备绑定认证
-
-- 所有非本地连接都需要基于密钥的身份。
-- 添加撤销 + 轮换 UI。
-
----
-
-# 安全说明
-
-- 角色/允许列表在 Gateway 网关边界强制执行。
-- 没有客户端可以在没有 operator 作用域的情况下获得"完整"API。
-- *所有*连接都需要配对。
-- TLS + 固定减少移动设备的 MITM 风险。
-- SSH 静默批准是便利措施;仍然记录 + 可撤销。
-- 设备发现永远不是信任锚。
-- 能力声明通过按平台/类型的服务器允许列表验证。
-
-# 流式传输 + 大型负载(节点媒体)
-
-WS 控制平面对于小消息没问题,但节点还做:
-
-- 相机剪辑
-- 屏幕录制
-- 音频流
-
-选项:
-
-1. WS 二进制帧 + 分块 + 背压规则。
-2. 单独的流式端点(仍然是 TLS + 认证)。
-3. 对于媒体密集型命令保持 bridge 更长时间,最后迁移。
-
-在实现前选择一个以避免漂移。
-
-# 能力 + 命令策略
-
-- 节点报告的 caps/commands 被视为**声明**。
-- Gateway 网关强制执行每平台允许列表。
-- 任何新命令都需要操作者批准或显式允许列表更改。
-- 用时间戳审计更改。
-
-# 审计 + 速率限制
-
-- 记录:配对请求、批准/拒绝、令牌颁发/轮换/撤销。
-- 速率限制配对垃圾和审批提示。
-
-# 协议卫生
-
-- 显式协议版本 + 错误代码。
-- 重连规则 + 心跳策略。
-- 在线状态 TTL 和最后在线语义。
-
----
-
-# 开放问题
-
-1. 同时运行两个角色的单个设备:令牌模型
- - 建议每个角色单独的令牌(node vs operator)。
- - 相同的 deviceId;不同的作用域;更清晰的撤销。
-
-2. 操作者作用域粒度
- - read/write/admin + approvals + pairing(最小可行)。
- - 以后考虑每功能作用域。
-
-3. 令牌轮换 + 撤销 UX
- - 角色更改时自动轮换。
- - 按 deviceId + 角色撤销的 UI。
-
-4. 设备发现
- - 扩展当前 Bonjour TXT 以包含 WS TLS 指纹 + 角色提示。
- - 仅作为定位器提示处理。
-
-5. 跨网络审批
- - 广播到所有操作者客户端;活跃的 UI 显示模态框。
- - 先响应者获胜;Gateway 网关强制原子性。
-
----
-
-# 总结(TL;DR)
-
-- 当前:WS 控制平面 + Bridge 节点传输。
-- 痛点:审批 + 重复 + 两个栈。
-- 提议:一个带有显式角色 + 作用域的 WS 协议,统一配对 + TLS 固定,Gateway 网关托管的审批,稳定设备 ID + 可爱别名。
-- 结果:更简单的 UX,更强的安全性,更少的重复,更好的移动路由。
diff --git a/docs/zh-CN/refactor/outbound-session-mirroring.md b/docs/zh-CN/refactor/outbound-session-mirroring.md
deleted file mode 100644
index 3d733a00..00000000
--- a/docs/zh-CN/refactor/outbound-session-mirroring.md
+++ /dev/null
@@ -1,92 +0,0 @@
----
-description: Track outbound session mirroring refactor notes, decisions, tests, and open items.
-title: 出站会话镜像重构(Issue
-x-i18n:
- generated_at: "2026-02-03T07:53:51Z"
- model: claude-opus-4-5
- provider: pi
- source_hash: b88a72f36f7b6d8a71fde9d014c0a87e9a8b8b0d449b67119cf3b6f414fa2b81
- source_path: refactor/outbound-session-mirroring.md
- workflow: 15
----
-
-# 出站会话镜像重构(Issue #1520)
-
-## 状态
-
-- 进行中。
-- 核心 + 插件渠道路由已更新以支持出站镜像。
-- Gateway 网关发送现在在省略 sessionKey 时派生目标会话。
-
-## 背景
-
-出站发送被镜像到*当前*智能体会话(工具会话键)而不是目标渠道会话。入站路由使用渠道/对等方会话键,因此出站响应落在错误的会话中,首次联系的目标通常缺少会话条目。
-
-## 目标
-
-- 将出站消息镜像到目标渠道会话键。
-- 在缺失时为出站创建会话条目。
-- 保持线程/话题作用域与入站会话键对齐。
-- 涵盖核心渠道加内置扩展。
-
-## 实现摘要
-
-- 新的出站会话路由辅助器:
- - `src/infra/outbound/outbound-session.ts`
- - `resolveOutboundSessionRoute` 使用 `buildAgentSessionKey`(dmScope + identityLinks)构建目标 sessionKey。
- - `ensureOutboundSessionEntry` 通过 `recordSessionMetaFromInbound` 写入最小的 `MsgContext`。
-- `runMessageAction`(发送)派生目标 sessionKey 并将其传递给 `executeSendAction` 进行镜像。
-- `message-tool` 不再直接镜像;它只从当前会话键解析 agentId。
-- 插件发送路径使用派生的 sessionKey 通过 `appendAssistantMessageToSessionTranscript` 进行镜像。
-- Gateway 网关发送在未提供时派生目标会话键(默认智能体),并确保会话条目。
-
-## 线程/话题处理
-
-- Slack:replyTo/threadId -> `resolveThreadSessionKeys`(后缀)。
-- Discord:threadId/replyTo -> `resolveThreadSessionKeys`,`useSuffix=false` 以匹配入站(线程频道 id 已经作用域会话)。
-- Telegram:话题 ID 通过 `buildTelegramGroupPeerId` 映射到 `chatId:topic:`。
-
-## 涵盖的扩展
-
-- Matrix、MS Teams、Mattermost、BlueBubbles、Nextcloud Talk、Zalo、Zalo Personal、Nostr、Tlon。
-- 注意:
- - Mattermost 目标现在为私信会话键路由去除 `@`。
- - Zalo Personal 对 1:1 目标使用私信对等方类型(仅当存在 `group:` 时才使用群组)。
- - BlueBubbles 群组目标去除 `chat_*` 前缀以匹配入站会话键。
- - Slack 自动线程镜像不区分大小写地匹配频道 id。
- - Gateway 网关发送在镜像前将提供的会话键转换为小写。
-
-## 决策
-
-- **Gateway 网关发送会话派生**:如果提供了 `sessionKey`,则使用它。如果省略,从目标 + 默认智能体派生 sessionKey 并镜像到那里。
-- **会话条目创建**:始终使用 `recordSessionMetaFromInbound`,`Provider/From/To/ChatType/AccountId/Originating*` 与入站格式对齐。
-- **目标规范化**:出站路由在可用时使用解析后的目标(`resolveChannelTarget` 之后)。
-- **会话键大小写**:在写入和迁移期间将会话键规范化为小写。
-
-## 添加/更新的测试
-
-- `src/infra/outbound/outbound-session.test.ts`
- - Slack 线程会话键。
- - Telegram 话题会话键。
- - dmScope identityLinks 与 Discord。
-- `src/agents/tools/message-tool.test.ts`
- - 从会话键派生 agentId(不传递 sessionKey)。
-- `src/gateway/server-methods/send.test.ts`
- - 在省略时派生会话键并创建会话条目。
-
-## 待处理项目 / 后续跟进
-
-- 语音通话插件使用自定义的 `voice:` 会话键。出站映射在这里没有标准化;如果 message-tool 应该支持语音通话发送,请添加显式映射。
-- 确认是否有任何外部插件使用内置集之外的非标准 `From/To` 格式。
-
-## 涉及的文件
-
-- `src/infra/outbound/outbound-session.ts`
-- `src/infra/outbound/outbound-send-service.ts`
-- `src/infra/outbound/message-action-runner.ts`
-- `src/agents/tools/message-tool.ts`
-- `src/gateway/server-methods/send.ts`
-- 测试:
- - `src/infra/outbound/outbound-session.test.ts`
- - `src/agents/tools/message-tool.test.ts`
- - `src/gateway/server-methods/send.test.ts`
diff --git a/docs/zh-CN/refactor/plugin-sdk.md b/docs/zh-CN/refactor/plugin-sdk.md
deleted file mode 100644
index 800d038e..00000000
--- a/docs/zh-CN/refactor/plugin-sdk.md
+++ /dev/null
@@ -1,221 +0,0 @@
----
-read_when:
- - 定义或重构插件架构
- - 将渠道连接器迁移到插件 SDK/运行时
-summary: 计划:为所有消息连接器提供一套统一的插件 SDK + 运行时
-title: 插件 SDK 重构
-x-i18n:
- generated_at: "2026-02-01T21:36:45Z"
- model: claude-opus-4-5
- provider: pi
- source_hash: d1964e2e47a19ee1d42ddaaa9cf1293c80bb0be463b049dc8468962f35bb6cb0
- source_path: refactor/plugin-sdk.md
- workflow: 15
----
-
-# 插件 SDK + 运行时重构计划
-
-目标:每个消息连接器都是一个插件(内置或外部),使用统一稳定的 API。
-插件不直接从 `src/**` 导入任何内容。所有依赖项均通过 SDK 或运行时获取。
-
-## 为什么现在做
-
-- 当前连接器混用多种模式:直接导入核心模块、仅 dist 的桥接方式以及自定义辅助函数。
-- 这使得升级变得脆弱,并阻碍了干净的外部插件接口。
-
-## 目标架构(两层)
-
-### 1)插件 SDK(编译时,稳定,可发布)
-
-范围:类型、辅助函数和配置工具。无运行时状态,无副作用。
-
-内容(示例):
-
-- 类型:`ChannelPlugin`、适配器、`ChannelMeta`、`ChannelCapabilities`、`ChannelDirectoryEntry`。
-- 配置辅助函数:`buildChannelConfigSchema`、`setAccountEnabledInConfigSection`、`deleteAccountFromConfigSection`、
- `applyAccountNameToChannelSection`。
-- 配对辅助函数:`PAIRING_APPROVED_MESSAGE`、`formatPairingApproveHint`。
-- 新手引导辅助函数:`promptChannelAccessConfig`、`addWildcardAllowFrom`、新手引导类型。
-- 工具参数辅助函数:`createActionGate`、`readStringParam`、`readNumberParam`、`readReactionParams`、`jsonResult`。
-- 文档链接辅助函数:`formatDocsLink`。
-
-交付方式:
-
-- 以 `mayros/plugin-sdk` 发布(或从核心以 `mayros/plugin-sdk` 导出)。
-- 使用语义化版本控制,提供明确的稳定性保证。
-
-### 2)插件运行时(执行层,注入式)
-
-范围:所有涉及核心运行时行为的内容。
-通过 `MayrosPluginApi.runtime` 访问,确保插件永远不会导入 `src/**`。
-
-建议的接口(最小但完整):
-
-```ts
-export type PluginRuntime = {
- channel: {
- text: {
- chunkMarkdownText(text: string, limit: number): string[];
- resolveTextChunkLimit(cfg: MayrosConfig, channel: string, accountId?: string): number;
- hasControlCommand(text: string, cfg: MayrosConfig): boolean;
- };
- reply: {
- dispatchReplyWithBufferedBlockDispatcher(params: {
- ctx: unknown;
- cfg: unknown;
- dispatcherOptions: {
- deliver: (payload: {
- text?: string;
- mediaUrls?: string[];
- mediaUrl?: string;
- }) => void | Promise;
- onError?: (err: unknown, info: { kind: string }) => void;
- };
- }): Promise;
- createReplyDispatcherWithTyping?: unknown; // adapter for Teams-style flows
- };
- routing: {
- resolveAgentRoute(params: {
- cfg: unknown;
- channel: string;
- accountId: string;
- peer: { kind: RoutePeerKind; id: string };
- }): { sessionKey: string; accountId: string };
- };
- pairing: {
- buildPairingReply(params: { channel: string; idLine: string; code: string }): string;
- readAllowFromStore(channel: string): Promise;
- upsertPairingRequest(params: {
- channel: string;
- id: string;
- meta?: { name?: string };
- }): Promise<{ code: string; created: boolean }>;
- };
- media: {
- fetchRemoteMedia(params: { url: string }): Promise<{ buffer: Buffer; contentType?: string }>;
- saveMediaBuffer(
- buffer: Uint8Array,
- contentType: string | undefined,
- direction: "inbound" | "outbound",
- maxBytes: number,
- ): Promise<{ path: string; contentType?: string }>;
- };
- mentions: {
- buildMentionRegexes(cfg: MayrosConfig, agentId?: string): RegExp[];
- matchesMentionPatterns(text: string, regexes: RegExp[]): boolean;
- };
- groups: {
- resolveGroupPolicy(
- cfg: MayrosConfig,
- channel: string,
- accountId: string,
- groupId: string,
- ): {
- allowlistEnabled: boolean;
- allowed: boolean;
- groupConfig?: unknown;
- defaultConfig?: unknown;
- };
- resolveRequireMention(
- cfg: MayrosConfig,
- channel: string,
- accountId: string,
- groupId: string,
- override?: boolean,
- ): boolean;
- };
- debounce: {
- createInboundDebouncer(opts: {
- debounceMs: number;
- buildKey: (v: T) => string | null;
- shouldDebounce: (v: T) => boolean;
- onFlush: (entries: T[]) => Promise;
- onError?: (err: unknown) => void;
- }): { push: (v: T) => void; flush: () => Promise };
- resolveInboundDebounceMs(cfg: MayrosConfig, channel: string): number;
- };
- commands: {
- resolveCommandAuthorizedFromAuthorizers(params: {
- useAccessGroups: boolean;
- authorizers: Array<{ configured: boolean; allowed: boolean }>;
- }): boolean;
- };
- };
- logging: {
- shouldLogVerbose(): boolean;
- getChildLogger(name: string): PluginLogger;
- };
- state: {
- resolveStateDir(cfg: MayrosConfig): string;
- };
-};
-```
-
-备注:
-
-- 运行时是访问核心行为的唯一方式。
-- SDK 故意保持小巧和稳定。
-- 每个运行时方法都映射到现有的核心实现(无重复代码)。
-
-## 迁移计划(分阶段,安全)
-
-### 阶段 0:基础搭建
-
-- 引入 `mayros/plugin-sdk`。
-- 在 `MayrosPluginApi` 中添加带有上述接口的 `api.runtime`。
-- 在过渡期内保留现有导入方式(添加弃用警告)。
-
-### 阶段 1:桥接清理(低风险)
-
-- 用 `api.runtime` 替换每个扩展中的 `core-bridge.ts`。
-- 优先迁移 BlueBubbles、Zalo、Zalo Personal(已经接近完成)。
-- 移除重复的桥接代码。
-
-### 阶段 2:轻度直接导入的插件
-
-- 将 Matrix 迁移到 SDK + 运行时。
-- 验证新手引导、目录、群组提及逻辑。
-
-### 阶段 3:重度直接导入的插件
-
-- 迁移 Microsoft Teams(使用运行时辅助函数最多的插件)。
-- 确保回复/正在输入的语义与当前行为一致。
-
-### 阶段 4:iMessage 插件化
-
-- 将 iMessage 移入 `extensions/imessage`。
-- 用 `api.runtime` 替换直接的核心调用。
-- 保持配置键、CLI 行为和文档不变。
-
-### 阶段 5:强制执行
-
-- 添加 lint 规则 / CI 检查:禁止 `extensions/**` 从 `src/**` 导入。
-- 添加插件 SDK/版本兼容性检查(运行时 + SDK 语义化版本)。
-
-## 兼容性与版本控制
-
-- SDK:语义化版本控制,已发布,变更有文档记录。
-- 运行时:按核心版本进行版本控制。添加 `api.runtime.version`。
-- 插件声明所需的运行时版本范围(例如 `mayrosRuntime: ">=2026.2.0"`)。
-
-## 测试策略
-
-- 适配器级单元测试(使用真实核心实现验证运行时函数)。
-- 每个插件的黄金测试:确保行为无偏差(路由、配对、允许列表、提及过滤)。
-- CI 中使用单个端到端插件示例(安装 + 运行 + 冒烟测试)。
-
-## 待解决问题
-
-- SDK 类型托管在哪里:独立包还是核心导出?
-- 运行时类型分发:在 SDK 中(仅类型)还是在核心中?
-- 如何为内置插件与外部插件暴露文档链接?
-- 过渡期间是否允许仓库内插件有限地直接导入核心模块?
-
-## 成功标准
-
-- 所有渠道连接器都是使用 SDK + 运行时的插件。
-- `extensions/**` 不再从 `src/**` 导入。
-- 新连接器模板仅依赖 SDK + 运行时。
-- 外部插件可以在无需访问核心源码的情况下进行开发和更新。
-
-相关文档:[插件](/tools/plugin)、[渠道](/channels/index)、[配置](/gateway/configuration)。
diff --git a/docs/zh-CN/refactor/strict-config.md b/docs/zh-CN/refactor/strict-config.md
deleted file mode 100644
index f6ee56a9..00000000
--- a/docs/zh-CN/refactor/strict-config.md
+++ /dev/null
@@ -1,100 +0,0 @@
----
-read_when:
- - 设计或实现配置验证行为
- - 处理配置迁移或 doctor 工作流
- - 处理插件配置 schema 或插件加载门控
-summary: 严格配置验证 + 仅通过 doctor 进行迁移
-title: 严格配置验证
-x-i18n:
- generated_at: "2026-02-03T10:08:51Z"
- model: claude-opus-4-5
- provider: pi
- source_hash: 5bc7174a67d2234e763f21330d8fe3afebc23b2e5c728a04abcc648b453a91cc
- source_path: refactor/strict-config.md
- workflow: 15
----
-
-# 严格配置验证(仅通过 doctor 进行迁移)
-
-## 目标
-
-- **在所有地方拒绝未知配置键**(根级 + 嵌套)。
-- **拒绝没有 schema 的插件配置**;不加载该插件。
-- **移除加载时的旧版自动迁移**;迁移仅通过 doctor 运行。
-- **启动时自动运行 doctor(dry-run)**;如果无效,阻止非诊断命令。
-
-## 非目标
-
-- 加载时的向后兼容性(旧版键不会自动迁移)。
-- 静默丢弃无法识别的键。
-
-## 严格验证规则
-
-- 配置必须在每个层级精确匹配 schema。
-- 未知键是验证错误(根级或嵌套都不允许透传)。
-- `plugins.entries..config` 必须由插件的 schema 验证。
- - 如果插件缺少 schema,**拒绝插件加载**并显示清晰的错误。
-- 未知的 `channels.` 键是错误,除非插件清单声明了该渠道 id。
-- 所有插件都需要插件清单(`mayros.plugin.json`)。
-
-## 插件 schema 强制执行
-
-- 每个插件为其配置提供严格的 JSON Schema(内联在清单中)。
-- 插件加载流程:
- 1. 解析插件清单 + schema(`mayros.plugin.json`)。
- 2. 根据 schema 验证配置。
- 3. 如果缺少 schema 或配置无效:阻止插件加载,记录错误。
-- 错误消息包括:
- - 插件 id
- - 原因(缺少 schema / 配置无效)
- - 验证失败的路径
-- 禁用的插件保留其配置,但 Doctor + 日志会显示警告。
-
-## Doctor 流程
-
-- 每次加载配置时都会运行 Doctor(默认 dry-run)。
-- 如果配置无效:
- - 打印摘要 + 可操作的错误。
- - 指示:`mayros doctor --fix`。
-- `mayros doctor --fix`:
- - 应用迁移。
- - 移除未知键。
- - 写入更新后的配置。
-
-## 命令门控(当配置无效时)
-
-允许的命令(仅诊断):
-
-- `mayros doctor`
-- `mayros logs`
-- `mayros health`
-- `mayros help`
-- `mayros status`
-- `mayros gateway status`
-
-其他所有命令必须硬失败并显示:"Config invalid. Run `mayros doctor --fix`."
-
-## 错误用户体验格式
-
-- 单个摘要标题。
-- 分组部分:
- - 未知键(完整路径)
- - 旧版键/需要迁移
- - 插件加载失败(插件 id + 原因 + 路径)
-
-## 实现接触点
-
-- `src/config/zod-schema.ts`:移除根级透传;所有地方使用严格对象。
-- `src/config/zod-schema.providers.ts`:确保严格的渠道 schema。
-- `src/config/validation.ts`:未知键时失败;不应用旧版迁移。
-- `src/config/io.ts`:移除旧版自动迁移;始终运行 doctor dry-run。
-- `src/config/legacy*.ts`:将用法移至仅 doctor。
-- `src/plugins/*`:添加 schema 注册表 + 门控。
-- `src/cli` 中的 CLI 命令门控。
-
-## 测试
-
-- 未知键拒绝(根级 + 嵌套)。
-- 插件缺少 schema → 插件加载被阻止并显示清晰错误。
-- 无效配置 → Gateway 网关启动被阻止,诊断命令除外。
-- Doctor dry-run 自动运行;`doctor --fix` 写入修正后的配置。
diff --git a/extensions/agent-mesh/agent-mailbox.ts b/extensions/agent-mesh/agent-mailbox.ts
index 8e78821c..92150ebc 100644
--- a/extensions/agent-mesh/agent-mailbox.ts
+++ b/extensions/agent-mesh/agent-mailbox.ts
@@ -257,27 +257,71 @@ export class AgentMailbox {
/**
* Get mailbox statistics for an agent.
+ *
+ * Uses three parallel patternQuery calls — one per status — to count messages
+ * without fetching full message content. O(1) Cortex RPCs instead of O(N).
*/
async stats(agentId: string): Promise {
- const messages = await this.inbox({ agent: agentId, limit: 1000 });
-
- const stats: MailboxStats = {
- total: messages.length,
- unread: 0,
- read: 0,
- archived: 0,
- byType: {},
- };
+ const statusPredicate = mailPredicate(this.ns, "status");
+
+ // Find all subjects that belong to this agent's mailbox first
+ const agentMessages = await this.client.patternQuery({
+ predicate: mailPredicate(this.ns, "to"),
+ object: { node: agentId },
+ limit: 1000,
+ });
- for (const msg of messages) {
- if (msg.status === "unread") stats.unread++;
- else if (msg.status === "read") stats.read++;
- else if (msg.status === "archived") stats.archived++;
+ if (agentMessages.matches.length === 0) {
+ return { total: 0, unread: 0, read: 0, archived: 0, byType: {} };
+ }
+
+ const agentSubjects = new Set(agentMessages.matches.map((m) => String(m.subject)));
+
+ // Count by status using three parallel pattern queries — no message reconstruction
+ const [unreadResult, readResult, archivedResult] = await Promise.all([
+ this.client.patternQuery({
+ predicate: statusPredicate,
+ object: "unread",
+ limit: 1000,
+ }),
+ this.client.patternQuery({
+ predicate: statusPredicate,
+ object: "read",
+ limit: 1000,
+ }),
+ this.client.patternQuery({
+ predicate: statusPredicate,
+ object: "archived",
+ limit: 1000,
+ }),
+ ]);
+
+ const unread = unreadResult.matches.filter((m) => agentSubjects.has(String(m.subject))).length;
+ const read = readResult.matches.filter((m) => agentSubjects.has(String(m.subject))).length;
+ const archived = archivedResult.matches.filter((m) =>
+ agentSubjects.has(String(m.subject)),
+ ).length;
+
+ // byType requires fetching type triples — query once for all agent messages
+ const typeResult = await this.client.patternQuery({
+ predicate: mailPredicate(this.ns, "type"),
+ limit: 1000,
+ });
- stats.byType[msg.type] = (stats.byType[msg.type] ?? 0) + 1;
+ const byType: Record = {};
+ for (const match of typeResult.matches) {
+ if (!agentSubjects.has(String(match.subject))) continue;
+ const type = String(match.object ?? "task");
+ byType[type] = (byType[type] ?? 0) + 1;
}
- return stats;
+ return {
+ total: unread + read + archived,
+ unread,
+ read,
+ archived,
+ byType,
+ };
}
// ---------- internal ----------
diff --git a/extensions/agent-mesh/background-tracker.ts b/extensions/agent-mesh/background-tracker.ts
index f7141a4a..0720f138 100644
--- a/extensions/agent-mesh/background-tracker.ts
+++ b/extensions/agent-mesh/background-tracker.ts
@@ -218,19 +218,26 @@ export class BackgroundTracker {
const result = await this.client.patternQuery(queryOpts);
const prefix = `${this.ns}:bgtask:`;
- const tasks: BackgroundTask[] = [];
+ const taskIds: string[] = [];
for (const match of result.matches) {
if (!match.subject.startsWith(prefix)) continue;
+ taskIds.push(match.subject.slice(prefix.length));
+ }
- const taskId = match.subject.slice(prefix.length);
- const task = await this.getTask(taskId);
- if (!task) continue;
-
- // Apply agent filter
- if (opts?.agentId && task.agentId !== opts.agentId) continue;
+ // Fetch all tasks in parallel, in batches of 10 to avoid overwhelming Cortex
+ const BATCH_SIZE = 10;
+ const tasks: BackgroundTask[] = [];
- tasks.push(task);
+ for (let i = 0; i < taskIds.length; i += BATCH_SIZE) {
+ const batch = taskIds.slice(i, i + BATCH_SIZE);
+ const settled = await Promise.all(batch.map((id) => this.getTask(id)));
+ for (const task of settled) {
+ if (!task) continue;
+ // Apply agent filter
+ if (opts?.agentId && task.agentId !== opts.agentId) continue;
+ tasks.push(task);
+ }
}
// Sort by startedAt descending (newest first)
diff --git a/extensions/agent-mesh/index.ts b/extensions/agent-mesh/index.ts
index 998174e0..148fe301 100644
--- a/extensions/agent-mesh/index.ts
+++ b/extensions/agent-mesh/index.ts
@@ -67,10 +67,18 @@ const agentMeshPlugin = {
defaultStrategy: cfg.teams.defaultStrategy,
workflowTimeout: cfg.teams.workflowTimeout,
});
- const orchestrator = new WorkflowOrchestrator(client, ns, teamMgr, fusion, nsMgr);
const mailbox = new AgentMailbox(client, ns);
- const dashboard = new TeamDashboardService(teamMgr, mailbox, null, ns);
const bgTracker = new BackgroundTracker(client, ns);
+ const orchestrator = new WorkflowOrchestrator(
+ client,
+ ns,
+ teamMgr,
+ fusion,
+ nsMgr,
+ mailbox,
+ bgTracker,
+ );
+ const dashboard = new TeamDashboardService(teamMgr, mailbox, null, ns);
let cortexAvailable = false;
const healthMonitor = new HealthMonitor(client, {
onHealthy: () => {
diff --git a/extensions/agent-mesh/team-manager.ts b/extensions/agent-mesh/team-manager.ts
index ee07f6d8..f56a6851 100644
--- a/extensions/agent-mesh/team-manager.ts
+++ b/extensions/agent-mesh/team-manager.ts
@@ -40,9 +40,10 @@ export type TeamStatus = "pending" | "running" | "completed" | "failed";
export type TeamResult = {
summary: string;
- memberResults: Array<{ agentId: string; role: string; findings: number }>;
+ memberResults: Array<{ agentId: string; role: string; findings: number; error?: string }>;
conflicts: number;
fusionReport?: FusionReport;
+ mergeErrors?: Array<{ agentId: string; role: string; error: string }>;
};
export type TeamEntry = {
@@ -87,8 +88,8 @@ export class TeamManager {
constructor(
private readonly client: CortexClient,
private readonly ns: string,
- private readonly nsMgr: NamespaceManager,
- private readonly fusion: KnowledgeFusion,
+ private readonly nsMgr: NamespaceManager | null,
+ private readonly fusion: KnowledgeFusion | null,
private readonly config: TeamManagerConfig,
) {}
@@ -109,6 +110,7 @@ export class TeamManager {
const subject = teamSubject(this.ns, teamId);
// Create shared namespace for the team
+ if (!this.nsMgr) throw new Error("NamespaceManager required to create teams");
const agentIds = cfg.members.map((m) => m.agentId);
const sharedNs = await this.nsMgr.createSharedNamespace(`team-${teamId}`, agentIds);
@@ -348,18 +350,28 @@ export class TeamManager {
// Merge each member's private namespace into the shared namespace
let totalConflicts = 0;
let lastReport: FusionReport | undefined;
- const memberResults: Array<{ agentId: string; role: string; findings: number }> = [];
+ const memberResults: Array<{
+ agentId: string;
+ role: string;
+ findings: number;
+ error?: string;
+ }> = [];
+ const mergeErrors: Array<{ agentId: string; role: string; error: string }> = [];
+
+ if (!this.nsMgr || !this.fusion) {
+ throw new Error("NamespaceManager and KnowledgeFusion required to finalize teams");
+ }
const additionalNs =
completedMembers.length >= 3
- ? completedMembers.map((m) => this.nsMgr.getPrivateNs(m.agentId))
+ ? completedMembers.map((m) => this.nsMgr!.getPrivateNs(m.agentId))
: undefined;
for (const member of completedMembers) {
- const memberNs = this.nsMgr.getPrivateNs(member.agentId);
+ const memberNs = this.nsMgr!.getPrivateNs(member.agentId);
try {
- const report = await this.fusion.merge(
+ const report = await this.fusion!.merge(
memberNs,
team.sharedNs,
team.strategy,
@@ -372,20 +384,27 @@ export class TeamManager {
role: member.role,
findings: report.added,
});
- } catch {
+ } catch (err) {
+ const errMsg = err instanceof Error ? err.message : String(err);
+ console.error(
+ `[TeamManager] merge failed for agent "${member.agentId}" (role: ${member.role}): ${errMsg}`,
+ );
+ mergeErrors.push({ agentId: member.agentId, role: member.role, error: errMsg });
memberResults.push({
agentId: member.agentId,
role: member.role,
- findings: 0,
+ findings: -1,
+ error: errMsg,
});
}
}
const teamResult: TeamResult = {
- summary: `Merged ${completedMembers.length} member(s) with ${team.strategy} strategy`,
+ summary: `Merged ${completedMembers.length} member(s) with ${team.strategy} strategy${mergeErrors.length > 0 ? ` (${mergeErrors.length} merge failure(s))` : ""}`,
memberResults,
conflicts: totalConflicts,
fusionReport: lastReport,
+ ...(mergeErrors.length > 0 && { mergeErrors }),
};
// Persist result
diff --git a/extensions/agent-mesh/workflow-orchestrator.ts b/extensions/agent-mesh/workflow-orchestrator.ts
index 0728822e..b33009ad 100644
--- a/extensions/agent-mesh/workflow-orchestrator.ts
+++ b/extensions/agent-mesh/workflow-orchestrator.ts
@@ -8,6 +8,8 @@
import { randomUUID } from "node:crypto";
import type { CortexClient } from "../shared/cortex-client.js";
+import type { AgentMailbox } from "./agent-mailbox.js";
+import type { BackgroundTracker } from "./background-tracker.js";
import type { KnowledgeFusion } from "./knowledge-fusion.js";
import type { MergeStrategy } from "./mesh-protocol.js";
import type { NamespaceManager } from "./namespace-manager.js";
@@ -21,6 +23,14 @@ import type {
WorkflowState,
} from "./workflows/types.js";
+// ============================================================================
+// Constants
+// ============================================================================
+
+const DEFAULT_PHASE_TIMEOUT_MS = 5 * 60 * 1000; // 5 minutes
+const POLL_INITIAL_INTERVAL_MS = 1_000; // 1 second
+const POLL_MAX_INTERVAL_MS = 10_000; // 10 seconds
+
// ============================================================================
// Triple helpers
// ============================================================================
@@ -46,6 +56,9 @@ export class WorkflowOrchestrator {
teamMgr: TeamManager,
private readonly fusion: KnowledgeFusion,
private readonly nsMgr: NamespaceManager,
+ private readonly mailbox?: AgentMailbox,
+ private readonly bgTracker?: BackgroundTracker,
+ private readonly phaseTimeoutMs: number = DEFAULT_PHASE_TIMEOUT_MS,
) {
this.teamMgr = teamMgr;
}
@@ -292,14 +305,97 @@ export class WorkflowOrchestrator {
await this.teamMgr.updateMemberStatus(workflow.teamId, agent.agentId, "running");
}
- // Simulate agent completion (in real deployment, agents complete asynchronously)
- for (const agent of phase.agents) {
- await this.teamMgr.updateMemberStatus(
- workflow.teamId,
- agent.agentId,
- "completed",
- `Completed ${agent.role} analysis`,
+ // Dispatch tasks via AgentMailbox and track with BackgroundTracker when available.
+ // Falls back to marking agents completed immediately when neither is present (e.g. tests).
+ if (this.mailbox && this.bgTracker) {
+ // Map agentId → background task id so we can poll for completion
+ const taskIds: Map = new Map();
+
+ for (const agent of phase.agents) {
+ // Send the task to the agent's inbox as a "task" message
+ await this.mailbox.send({
+ from: `workflow:${workflowId}`,
+ to: agent.agentId,
+ content: agent.task,
+ type: "task",
+ });
+
+ // Register in BackgroundTracker so progress is observable
+ const bgTask = await this.bgTracker.track({
+ agentId: agent.agentId,
+ description: `[workflow:${workflowId}] phase:${phase.name} role:${agent.role}`,
+ status: "running",
+ });
+
+ taskIds.set(agent.agentId, bgTask.id);
+ }
+
+ // Poll for all agent tasks to reach a terminal status
+ const deadline = Date.now() + this.phaseTimeoutMs;
+ let pollIntervalMs = POLL_INITIAL_INTERVAL_MS;
+ const pendingAgents = new Set(phase.agents.map((a) => a.agentId));
+
+ while (pendingAgents.size > 0 && Date.now() < deadline) {
+ await new Promise((resolve) => setTimeout(resolve, pollIntervalMs));
+ // Exponential backoff capped at max interval
+ pollIntervalMs = Math.min(pollIntervalMs * 2, POLL_MAX_INTERVAL_MS);
+
+ for (const agentId of [...pendingAgents]) {
+ const taskId = taskIds.get(agentId);
+ if (!taskId) {
+ pendingAgents.delete(agentId);
+ continue;
+ }
+ const task = await this.bgTracker.getTask(taskId);
+ if (!task) {
+ pendingAgents.delete(agentId);
+ continue;
+ }
+ if (
+ task.status === "completed" ||
+ task.status === "failed" ||
+ task.status === "cancelled"
+ ) {
+ pendingAgents.delete(agentId);
+ const memberStatus = task.status === "completed" ? "completed" : "failed";
+ await this.teamMgr.updateMemberStatus(
+ workflow.teamId,
+ agentId,
+ memberStatus,
+ task.result ?? `Task ${task.status}`,
+ );
+ }
+ }
+ }
+
+ // Any agents still pending after the deadline are timed out
+ for (const agentId of pendingAgents) {
+ const taskId = taskIds.get(agentId);
+ if (taskId) {
+ await this.bgTracker.updateStatus(taskId, "failed", "timed_out");
+ }
+ await this.teamMgr.updateMemberStatus(workflow.teamId, agentId, "failed", "timed_out");
+ }
+ } else {
+ // Fallback: no mailbox/tracker available — mark all agents completed with a warning
+ const hasMailbox = Boolean(this.mailbox);
+ const hasTracker = Boolean(this.bgTracker);
+ const missing = [!hasMailbox && "AgentMailbox", !hasTracker && "BackgroundTracker"]
+ .filter(Boolean)
+ .join(", ");
+ // Use a simple console.warn since we may not have a logger here
+ console.warn(
+ `[WorkflowOrchestrator] ${missing} not available — agent tasks for phase "${phase.name}" ` +
+ `of workflow "${workflowId}" will be marked completed without real dispatch.`,
);
+ for (const agent of phase.agents) {
+ await this.teamMgr.updateMemberStatus(
+ workflow.teamId,
+ agent.agentId,
+ "completed",
+ `Completed ${agent.role} analysis`,
+ );
+ }
}
// Merge results
diff --git a/extensions/analytics/config.ts b/extensions/analytics/config.ts
new file mode 100644
index 00000000..ea05fd12
--- /dev/null
+++ b/extensions/analytics/config.ts
@@ -0,0 +1,89 @@
+/**
+ * Analytics Configuration — privacy controls and parsing.
+ */
+
+export type AnalyticsConfig = {
+ /** Enable analytics collection (default: false — opt-in). */
+ enabled: boolean;
+ /** Privacy mode: "anonymous" hashes IDs, "off" disables collection (default: "anonymous"). */
+ privacyMode: "anonymous" | "identified" | "off";
+ /** Max events in buffer (default: 500). */
+ maxBufferSize: number;
+ /** Flush interval in ms (default: 30_000). */
+ flushIntervalMs: number;
+ /** Event TTL in ms (default: 3_600_000). */
+ eventTtlMs: number;
+ /**
+ * HTTP endpoint for batch event delivery (default: "" — local-only logging).
+ * When non-empty, events are POSTed as JSON to this URL.
+ * Example: "https://analytics.apilium.com/batch"
+ */
+ endpoint: string;
+};
+
+const ALLOWED_KEYS = [
+ "enabled",
+ "privacyMode",
+ "maxBufferSize",
+ "flushIntervalMs",
+ "eventTtlMs",
+ "endpoint",
+];
+
+function assertAllowedKeys(value: Record, allowed: string[], label: string) {
+ const unknown = Object.keys(value).filter((key) => !allowed.includes(key));
+ if (unknown.length > 0) {
+ throw new Error(`${label} has unknown keys: ${unknown.join(", ")}`);
+ }
+}
+
+export function parseAnalyticsConfig(value: unknown): AnalyticsConfig {
+ const cfg = (value && typeof value === "object" && !Array.isArray(value) ? value : {}) as Record<
+ string,
+ unknown
+ >;
+
+ assertAllowedKeys(cfg, ALLOWED_KEYS, "analytics config");
+
+ // Respect environment variable override
+ if (process.env.MAYROS_ANALYTICS_DISABLED === "1") {
+ return {
+ enabled: false,
+ privacyMode: "off",
+ maxBufferSize: 500,
+ flushIntervalMs: 30_000,
+ eventTtlMs: 3_600_000,
+ endpoint: "",
+ };
+ }
+
+ const privacyMode = ((): "anonymous" | "identified" | "off" => {
+ if (cfg.privacyMode === "identified") return "identified";
+ if (cfg.privacyMode === "off") return "off";
+ return "anonymous";
+ })();
+
+ return {
+ enabled: cfg.enabled === true,
+ privacyMode,
+ maxBufferSize:
+ typeof cfg.maxBufferSize === "number" && cfg.maxBufferSize > 0
+ ? Math.min(Math.floor(cfg.maxBufferSize), 10_000)
+ : 500,
+ flushIntervalMs:
+ typeof cfg.flushIntervalMs === "number" && cfg.flushIntervalMs >= 1000
+ ? Math.floor(cfg.flushIntervalMs)
+ : 30_000,
+ eventTtlMs:
+ typeof cfg.eventTtlMs === "number" && cfg.eventTtlMs >= 60_000
+ ? Math.floor(cfg.eventTtlMs)
+ : 3_600_000,
+ endpoint:
+ typeof cfg.endpoint === "string" && cfg.endpoint.trim().length > 0 ? cfg.endpoint.trim() : "",
+ };
+}
+
+/** Check if analytics is enabled via config or environment. */
+export function isAnalyticsEnabled(config: AnalyticsConfig): boolean {
+ return config.enabled && config.privacyMode !== "off";
+}
diff --git a/extensions/analytics/event-queue.ts b/extensions/analytics/event-queue.ts
new file mode 100644
index 00000000..44aa63df
--- /dev/null
+++ b/extensions/analytics/event-queue.ts
@@ -0,0 +1,145 @@
+/**
+ * Analytics Event Queue — in-memory buffer with periodic batch flush.
+ *
+ * Follows the same pattern as TraceEmitter:
+ * - Memory buffer with max size
+ * - Timer-based flush with exponential backoff
+ * - TTL for stale events
+ * - Graceful shutdown
+ */
+
+import { createHash } from "node:crypto";
+import type { AnalyticsEvent, AnalyticsBatch } from "./event-schema.js";
+import { createBatch } from "./event-schema.js";
+
+export type EventQueueConfig = {
+ /** Max events in buffer before force-flush (default: 500). */
+ maxBufferSize: number;
+ /** Flush interval in ms (default: 30_000). */
+ flushIntervalMs: number;
+ /** Max backoff on failure in ms (default: 300_000). */
+ maxBackoffMs: number;
+ /** Event TTL in ms — discard events older than this (default: 3_600_000 = 1h). */
+ eventTtlMs: number;
+ /** Client version string. */
+ clientVersion: string;
+ /** Flush callback — called with batch to deliver. */
+ onFlush?: (batch: AnalyticsBatch) => Promise;
+};
+
+const DEFAULT_CONFIG: EventQueueConfig = {
+ maxBufferSize: 500,
+ flushIntervalMs: 30_000,
+ maxBackoffMs: 300_000,
+ eventTtlMs: 3_600_000,
+ clientVersion: "unknown",
+};
+
+export class EventQueue {
+ private buffer: AnalyticsEvent[] = [];
+ private timer: ReturnType | null = null;
+ private consecutiveFailures = 0;
+ private config: EventQueueConfig;
+ private stopped = false;
+
+ constructor(config: Partial = {}) {
+ this.config = { ...DEFAULT_CONFIG, ...config };
+ }
+
+ /** Start the periodic flush timer. */
+ start(): void {
+ if (this.timer) return;
+ this.stopped = false;
+ this.timer = setInterval(() => {
+ void this.flush();
+ }, this.currentIntervalMs());
+ }
+
+ /** Stop the timer and flush remaining events. */
+ async stop(): Promise {
+ this.stopped = true;
+ if (this.timer) {
+ clearInterval(this.timer);
+ this.timer = null;
+ }
+ // Final flush
+ await this.flush();
+ }
+
+ /** Enqueue an event. Force-flushes if buffer is full. */
+ enqueue(event: AnalyticsEvent): void {
+ if (this.stopped) return;
+ this.buffer.push(event);
+ if (this.buffer.length >= this.config.maxBufferSize) {
+ void this.flush();
+ }
+ }
+
+ /** Flush all buffered events. */
+ async flush(): Promise {
+ if (this.buffer.length === 0) return;
+ if (!this.config.onFlush) return;
+
+ // Drain buffer, enforce TTL
+ const now = Date.now();
+ const events = this.buffer.filter((e) => {
+ const age = now - new Date(e.timestamp).getTime();
+ return age < this.config.eventTtlMs;
+ });
+ this.buffer = [];
+
+ if (events.length === 0) return;
+
+ const batch = createBatch(events, this.config.clientVersion);
+
+ try {
+ await this.config.onFlush(batch);
+ this.consecutiveFailures = 0;
+ } catch {
+ // Re-buffer events on failure (up to max)
+ this.buffer.unshift(...events.slice(0, this.config.maxBufferSize - this.buffer.length));
+ this.consecutiveFailures++;
+ // Restart timer with backoff
+ this.restartTimer();
+ }
+ }
+
+ /** Get current buffer size. */
+ getBufferSize(): number {
+ return this.buffer.length;
+ }
+
+ /** Get consecutive failure count. */
+ getFailureCount(): number {
+ return this.consecutiveFailures;
+ }
+
+ /** Get buffered events (for testing). */
+ getBufferedEvents(): readonly AnalyticsEvent[] {
+ return this.buffer;
+ }
+
+ private currentIntervalMs(): number {
+ if (this.consecutiveFailures === 0) return this.config.flushIntervalMs;
+ const backoff = this.config.flushIntervalMs * Math.pow(2, this.consecutiveFailures);
+ return Math.min(backoff, this.config.maxBackoffMs);
+ }
+
+ private restartTimer(): void {
+ if (this.timer) {
+ clearInterval(this.timer);
+ }
+ if (!this.stopped) {
+ this.timer = setInterval(() => {
+ void this.flush();
+ }, this.currentIntervalMs());
+ }
+ }
+}
+
+/**
+ * Hash a string for anonymization (SHA-256, first 16 hex chars).
+ */
+export function anonymize(value: string): string {
+ return createHash("sha256").update(value).digest("hex").slice(0, 16);
+}
diff --git a/extensions/analytics/event-schema.ts b/extensions/analytics/event-schema.ts
new file mode 100644
index 00000000..3ac977c3
--- /dev/null
+++ b/extensions/analytics/event-schema.ts
@@ -0,0 +1,78 @@
+/**
+ * Analytics Event Schema — structured analytics events.
+ */
+
+export type AnalyticsCategory =
+ | "command" // slash command execution
+ | "tool" // tool call
+ | "model" // model selection/switch
+ | "session" // session lifecycle
+ | "feature" // feature usage (vim, theme, etc)
+ | "error" // errors and failures
+ | "performance"; // timing and resource metrics
+
+export type AnalyticsEvent = {
+ /** Unique event ID (uuid v4). */
+ id: string;
+ /** Event category. */
+ category: AnalyticsCategory;
+ /** Action within category (e.g., "execute", "switch", "start"). */
+ action: string;
+ /** Optional label for further classification. */
+ label?: string;
+ /** Numeric value (e.g., duration in ms, token count). */
+ value?: number;
+ /** ISO 8601 timestamp. */
+ timestamp: string;
+ /** Session identifier (hashed). */
+ sessionId?: string;
+ /** Additional attributes. */
+ attributes?: Record;
+};
+
+export type AnalyticsBatch = {
+ /** Client version (from package.json). */
+ clientVersion: string;
+ /** Platform (darwin, linux, win32). */
+ platform: string;
+ /** Node.js version. */
+ nodeVersion: string;
+ /** Batch of events. */
+ events: AnalyticsEvent[];
+ /** When this batch was assembled. */
+ batchedAt: string;
+};
+
+/** Create a new analytics event with defaults filled in. */
+export function createEvent(
+ category: AnalyticsCategory,
+ action: string,
+ opts?: {
+ label?: string;
+ value?: number;
+ sessionId?: string;
+ attributes?: Record;
+ },
+): AnalyticsEvent {
+ return {
+ id: crypto.randomUUID(),
+ category,
+ action,
+ label: opts?.label,
+ value: opts?.value,
+ timestamp: new Date().toISOString(),
+ sessionId: opts?.sessionId,
+ attributes: opts?.attributes,
+ };
+}
+
+/** Create an AnalyticsBatch from events. */
+export function createBatch(events: AnalyticsEvent[], clientVersion: string): AnalyticsBatch {
+ return {
+ clientVersion,
+ platform: process.platform,
+ nodeVersion: process.version,
+ events,
+ batchedAt: new Date().toISOString(),
+ };
+}
diff --git a/extensions/analytics/index.test.ts b/extensions/analytics/index.test.ts
new file mode 100644
index 00000000..77102f8f
--- /dev/null
+++ b/extensions/analytics/index.test.ts
@@ -0,0 +1,228 @@
+import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
+import { createEvent, createBatch, type AnalyticsEvent } from "./event-schema.js";
+import { EventQueue, anonymize } from "./event-queue.js";
+import { parseAnalyticsConfig, isAnalyticsEnabled } from "./config.js";
+
+// ============================================================================
+// Event Schema
+// ============================================================================
+
+describe("createEvent", () => {
+ // 1
+ it("creates event with required fields", () => {
+ const event = createEvent("command", "execute");
+ expect(event.id).toMatch(/^[0-9a-f-]+$/);
+ expect(event.category).toBe("command");
+ expect(event.action).toBe("execute");
+ expect(event.timestamp).toMatch(/^\d{4}-\d{2}-\d{2}T/);
+ });
+
+ // 2
+ it("includes optional fields when provided", () => {
+ const event = createEvent("tool", "call", {
+ label: "code_read",
+ value: 150,
+ sessionId: "sess-123",
+ attributes: { success: true },
+ });
+ expect(event.label).toBe("code_read");
+ expect(event.value).toBe(150);
+ expect(event.sessionId).toBe("sess-123");
+ expect(event.attributes?.success).toBe(true);
+ });
+
+ // 3
+ it("generates unique IDs", () => {
+ const e1 = createEvent("session", "start");
+ const e2 = createEvent("session", "start");
+ expect(e1.id).not.toBe(e2.id);
+ });
+});
+
+describe("createBatch", () => {
+ // 4
+ it("creates batch with metadata", () => {
+ const events = [createEvent("session", "start")];
+ const batch = createBatch(events, "0.1.0");
+ expect(batch.clientVersion).toBe("0.1.0");
+ expect(batch.platform).toBe(process.platform);
+ expect(batch.nodeVersion).toBe(process.version);
+ expect(batch.events).toHaveLength(1);
+ expect(batch.batchedAt).toMatch(/^\d{4}-\d{2}-\d{2}T/);
+ });
+});
+
+// ============================================================================
+// EventQueue
+// ============================================================================
+
+describe("EventQueue", () => {
+ // 5
+ it("enqueues events", () => {
+ const queue = new EventQueue();
+ queue.enqueue(createEvent("command", "execute"));
+ expect(queue.getBufferSize()).toBe(1);
+ });
+
+ // 6
+ it("flushes events via callback", async () => {
+ const onFlush = vi.fn().mockResolvedValue(undefined);
+ const queue = new EventQueue({ onFlush });
+ queue.enqueue(createEvent("command", "execute"));
+ queue.enqueue(createEvent("tool", "call"));
+ await queue.flush();
+ expect(onFlush).toHaveBeenCalledOnce();
+ expect(onFlush.mock.calls[0][0].events).toHaveLength(2);
+ expect(queue.getBufferSize()).toBe(0);
+ });
+
+ // 7
+ it("does not flush empty buffer", async () => {
+ const onFlush = vi.fn().mockResolvedValue(undefined);
+ const queue = new EventQueue({ onFlush });
+ await queue.flush();
+ expect(onFlush).not.toHaveBeenCalled();
+ });
+
+ // 8
+ it("re-buffers events on flush failure", async () => {
+ const onFlush = vi.fn().mockRejectedValue(new Error("network error"));
+ const queue = new EventQueue({ onFlush });
+ queue.enqueue(createEvent("command", "execute"));
+ await queue.flush();
+ expect(queue.getBufferSize()).toBe(1);
+ expect(queue.getFailureCount()).toBe(1);
+ });
+
+ // 9
+ it("drops stale events on flush", async () => {
+ const onFlush = vi.fn().mockResolvedValue(undefined);
+ const queue = new EventQueue({ onFlush, eventTtlMs: 100 });
+ const old: AnalyticsEvent = {
+ ...createEvent("command", "old"),
+ timestamp: new Date(Date.now() - 200).toISOString(),
+ };
+ queue.enqueue(old);
+ await queue.flush();
+ expect(onFlush).not.toHaveBeenCalled(); // All events were stale
+ });
+
+ // 10
+ it("force-flushes at max buffer size", async () => {
+ const onFlush = vi.fn().mockResolvedValue(undefined);
+ const queue = new EventQueue({ onFlush, maxBufferSize: 3 });
+ queue.enqueue(createEvent("command", "1"));
+ queue.enqueue(createEvent("tool", "2"));
+ // Wait for potential async flush trigger
+ await new Promise((r) => setTimeout(r, 10));
+ queue.enqueue(createEvent("session", "3")); // This triggers force-flush
+ await new Promise((r) => setTimeout(r, 10));
+ expect(onFlush).toHaveBeenCalled();
+ });
+
+ // 11
+ it("stop flushes remaining events", async () => {
+ const onFlush = vi.fn().mockResolvedValue(undefined);
+ const queue = new EventQueue({ onFlush });
+ queue.start();
+ queue.enqueue(createEvent("session", "end"));
+ await queue.stop();
+ expect(onFlush).toHaveBeenCalled();
+ expect(queue.getBufferSize()).toBe(0);
+ });
+
+ // 12
+ it("does not enqueue after stop", async () => {
+ const queue = new EventQueue();
+ await queue.stop();
+ queue.enqueue(createEvent("command", "late"));
+ expect(queue.getBufferSize()).toBe(0);
+ });
+});
+
+// ============================================================================
+// anonymize
+// ============================================================================
+
+describe("anonymize", () => {
+ // 13
+ it("returns hex string of length 16", () => {
+ const result = anonymize("test-session-id");
+ expect(result).toMatch(/^[0-9a-f]{16}$/);
+ });
+
+ // 14
+ it("is deterministic", () => {
+ expect(anonymize("same")).toBe(anonymize("same"));
+ });
+
+ // 15
+ it("differs for different inputs", () => {
+ expect(anonymize("a")).not.toBe(anonymize("b"));
+ });
+});
+
+// ============================================================================
+// Config
+// ============================================================================
+
+describe("parseAnalyticsConfig", () => {
+ // 16
+ it("defaults to disabled", () => {
+ const cfg = parseAnalyticsConfig({});
+ expect(cfg.enabled).toBe(false);
+ expect(cfg.privacyMode).toBe("anonymous");
+ });
+
+ // 17
+ it("parses full config", () => {
+ const cfg = parseAnalyticsConfig({
+ enabled: true,
+ privacyMode: "identified",
+ maxBufferSize: 1000,
+ flushIntervalMs: 60_000,
+ eventTtlMs: 7_200_000,
+ });
+ expect(cfg.enabled).toBe(true);
+ expect(cfg.privacyMode).toBe("identified");
+ expect(cfg.maxBufferSize).toBe(1000);
+ expect(cfg.flushIntervalMs).toBe(60_000);
+ });
+
+ // 18
+ it("rejects unknown keys", () => {
+ expect(() => parseAnalyticsConfig({ badKey: true })).toThrow("unknown keys");
+ });
+
+ // 19
+ it("clamps maxBufferSize to 10000", () => {
+ const cfg = parseAnalyticsConfig({ maxBufferSize: 99999 });
+ expect(cfg.maxBufferSize).toBe(10_000);
+ });
+
+ // 20
+ it("handles null/undefined gracefully", () => {
+ const cfg = parseAnalyticsConfig(null);
+ expect(cfg.enabled).toBe(false);
+ expect(cfg.privacyMode).toBe("anonymous");
+ });
+});
+
+describe("isAnalyticsEnabled", () => {
+ // 21
+ it("returns false when disabled", () => {
+ expect(isAnalyticsEnabled(parseAnalyticsConfig({}))).toBe(false);
+ });
+
+ // 22
+ it("returns true when enabled with anonymous mode", () => {
+ expect(isAnalyticsEnabled(parseAnalyticsConfig({ enabled: true }))).toBe(true);
+ });
+
+ // 23
+ it("returns false when privacyMode is off", () => {
+ expect(isAnalyticsEnabled(parseAnalyticsConfig({ enabled: true, privacyMode: "off" }))).toBe(
+ false,
+ );
+ });
+});
diff --git a/extensions/analytics/index.ts b/extensions/analytics/index.ts
new file mode 100644
index 00000000..790bf034
--- /dev/null
+++ b/extensions/analytics/index.ts
@@ -0,0 +1,179 @@
+import type { MayrosPluginApi } from "mayros/plugin-sdk";
+import { createEvent } from "./event-schema.js";
+import { EventQueue, anonymize } from "./event-queue.js";
+import { parseAnalyticsConfig, isAnalyticsEnabled } from "./config.js";
+
+const analyticsPlugin = {
+ id: "analytics",
+ name: "Analytics",
+ description: "Opt-in usage analytics with privacy controls and batch event delivery",
+ kind: "observability" as const,
+
+ async register(api: MayrosPluginApi) {
+ const cfg = parseAnalyticsConfig(api.pluginConfig);
+
+ if (!isAnalyticsEnabled(cfg)) {
+ api.logger.info("analytics: disabled (opt-in required or MAYROS_ANALYTICS_DISABLED=1)");
+ return;
+ }
+
+ let currentSessionId: string | undefined;
+ const version = api.version ?? "0.0.0";
+
+ const queue = new EventQueue({
+ maxBufferSize: cfg.maxBufferSize,
+ flushIntervalMs: cfg.flushIntervalMs,
+ eventTtlMs: cfg.eventTtlMs,
+ clientVersion: version,
+ onFlush: async (batch) => {
+ // Always log locally
+ api.logger.info(`analytics: flushed ${batch.events.length} events`);
+
+ // Deliver to remote endpoint if configured
+ if (cfg.endpoint) {
+ try {
+ const response = await fetch(cfg.endpoint, {
+ method: "POST",
+ headers: {
+ "Content-Type": "application/json",
+ "User-Agent": `mayros/${version}`,
+ },
+ body: JSON.stringify(batch),
+ });
+ if (!response.ok) {
+ api.logger.warn(
+ `analytics: delivery failed (HTTP ${response.status}) — events will be retried`,
+ );
+ throw new Error(`HTTP ${response.status}`);
+ }
+ } catch (err) {
+ api.logger.warn(
+ `analytics: delivery error: ${err instanceof Error ? err.message : String(err)} — events will be retried`,
+ );
+ // Re-throw so EventQueue increments failure count and applies backoff
+ throw err;
+ }
+ }
+ },
+ });
+
+ const sessionHash = (id: string) => (cfg.privacyMode === "anonymous" ? anonymize(id) : id);
+
+ // ========================================================================
+ // Hooks
+ // ========================================================================
+
+ api.on("session_start", async (_event, ctx) => {
+ currentSessionId = sessionHash(ctx?.sessionId ?? "unknown");
+ queue.start();
+ queue.enqueue(createEvent("session", "start", { sessionId: currentSessionId }));
+ });
+
+ api.on("session_end", async () => {
+ queue.enqueue(createEvent("session", "end", { sessionId: currentSessionId }));
+ await queue.stop();
+ });
+
+ api.on("after_tool_call", async (event) => {
+ queue.enqueue(
+ createEvent("tool", "execute", {
+ label: event.toolName,
+ value: event.durationMs,
+ sessionId: currentSessionId,
+ attributes: {
+ success: !event.error,
+ },
+ }),
+ );
+ });
+
+ api.on("llm_output", async (event) => {
+ const usage = event.usage as Record | undefined;
+ queue.enqueue(
+ createEvent("model", "response", {
+ label: event.model,
+ value: usage?.total,
+ sessionId: currentSessionId,
+ attributes: {
+ provider: event.provider ?? "unknown",
+ inputTokens: usage?.input ?? 0,
+ outputTokens: usage?.output ?? 0,
+ },
+ }),
+ );
+ });
+
+ // ========================================================================
+ // Tools
+ // ========================================================================
+
+ const { Type } = await import("@sinclair/typebox");
+
+ api.registerTool(
+ {
+ name: "analytics_status",
+ label: "Analytics Status",
+ description: "Show analytics queue status: buffer size, flush stats, privacy mode.",
+ parameters: Type.Object({}),
+ async execute() {
+ const lines = [
+ `Analytics: ${cfg.enabled ? "enabled" : "disabled"}`,
+ `Privacy: ${cfg.privacyMode}`,
+ `Endpoint: ${cfg.endpoint || "(local only)"}`,
+ `Buffer: ${queue.getBufferSize()} events`,
+ `Failures: ${queue.getFailureCount()} consecutive`,
+ `Flush: every ${cfg.flushIntervalMs / 1000}s`,
+ ];
+ return {
+ content: [{ type: "text", text: lines.join("\n") }],
+ details: {
+ enabled: cfg.enabled,
+ privacyMode: cfg.privacyMode,
+ endpoint: cfg.endpoint || null,
+ bufferSize: queue.getBufferSize(),
+ failures: queue.getFailureCount(),
+ },
+ };
+ },
+ },
+ { name: "analytics_status" },
+ );
+
+ // ========================================================================
+ // CLI
+ // ========================================================================
+
+ api.registerCli(
+ ({ program }) => {
+ const analytics = program.command("analytics").description("Analytics management");
+
+ analytics
+ .command("status")
+ .description("Show analytics status")
+ .action(() => {
+ console.log(`Analytics: ${cfg.enabled ? "enabled" : "disabled"}`);
+ console.log(`Privacy: ${cfg.privacyMode}`);
+ console.log(`Endpoint: ${cfg.endpoint || "(local only)"}`);
+ console.log(`Buffer: ${queue.getBufferSize()} events`);
+ console.log(`Failures: ${queue.getFailureCount()}`);
+ });
+
+ analytics
+ .command("flush")
+ .description("Force-flush buffered events")
+ .action(async () => {
+ const before = queue.getBufferSize();
+ await queue.flush();
+ console.log(`Flushed ${before} events.`);
+ });
+ },
+ { commands: ["analytics"] },
+ );
+
+ api.logger.info(
+ `analytics: registered (privacy=${cfg.privacyMode}, buffer=${cfg.maxBufferSize}, flush=${cfg.flushIntervalMs}ms)`,
+ );
+ },
+};
+
+export default analyticsPlugin;
diff --git a/extensions/analytics/package.json b/extensions/analytics/package.json
new file mode 100644
index 00000000..282466e4
--- /dev/null
+++ b/extensions/analytics/package.json
@@ -0,0 +1,13 @@
+{
+ "name": "@apilium/mayros-plugin-analytics",
+ "version": "0.1.0",
+ "private": true,
+ "type": "module",
+ "main": "index.ts",
+ "dependencies": {},
+ "mayros": {
+ "extensions": [
+ "./index.ts"
+ ]
+ }
+}
diff --git a/extensions/bash-sandbox/config.ts b/extensions/bash-sandbox/config.ts
index 9f3b3933..6d15eeff 100644
--- a/extensions/bash-sandbox/config.ts
+++ b/extensions/bash-sandbox/config.ts
@@ -6,6 +6,11 @@
*/
import { assertAllowedKeys } from "../shared/cortex-config.js";
+import {
+ type NetworkSandboxConfig,
+ DEFAULT_NETWORK_SANDBOX_CONFIG,
+ parseNetworkSandboxConfig,
+} from "./network-sandbox.js";
// ============================================================================
// Types
@@ -20,6 +25,37 @@ export type DangerousPattern = {
message: string;
};
+export type ContainerRuntime = "auto" | "docker" | "podman" | "gvisor";
+export type ContainerMountPolicy = "workdir-only" | "home" | "custom";
+export type ContainerNetworkMode = "none" | "host" | "bridge";
+
+export type ContainerSecurityFlags = {
+ blockPrivileged: boolean;
+ blockHostNetwork: boolean;
+ blockRootVolume: boolean;
+ readOnlyRootfs: boolean;
+ noNewPrivileges: boolean;
+ dropCapabilities: string[];
+};
+
+export type ContainerResourceLimits = {
+ cpus: number;
+ memoryMb: number;
+ pidsLimit: number;
+};
+
+export type ContainerConfig = {
+ enabled: boolean;
+ runtime: ContainerRuntime;
+ image: string;
+ allowedRegistries: string[];
+ mountPolicy: ContainerMountPolicy;
+ customMounts: string[];
+ resourceLimits: ContainerResourceLimits;
+ networkMode: ContainerNetworkMode;
+ securityFlags: ContainerSecurityFlags;
+};
+
export type BashSandboxConfig = {
mode: BashSandboxMode;
domainAllowlist: string[];
@@ -31,6 +67,8 @@ export type BashSandboxConfig = {
allowSudo: boolean;
allowCurlToArbitraryDomains: boolean;
bypassEnvVar: string;
+ network: NetworkSandboxConfig;
+ container: ContainerConfig;
};
// ============================================================================
@@ -81,6 +119,33 @@ const DEFAULT_ALLOW_SUDO = false;
const DEFAULT_ALLOW_CURL_TO_ARBITRARY_DOMAINS = false;
const DEFAULT_BYPASS_ENV_VAR = "MAYROS_BASH_SANDBOX_BYPASS";
+const DEFAULT_CONTAINER_SECURITY_FLAGS: ContainerSecurityFlags = {
+ blockPrivileged: true,
+ blockHostNetwork: true,
+ blockRootVolume: true,
+ readOnlyRootfs: false,
+ noNewPrivileges: true,
+ dropCapabilities: ["ALL"],
+};
+
+const DEFAULT_CONTAINER_RESOURCE_LIMITS: ContainerResourceLimits = {
+ cpus: 2,
+ memoryMb: 512,
+ pidsLimit: 256,
+};
+
+export const DEFAULT_CONTAINER_CONFIG: ContainerConfig = {
+ enabled: false,
+ runtime: "auto",
+ image: "ubuntu:22.04",
+ allowedRegistries: ["docker.io", "ghcr.io", "gcr.io", "quay.io"],
+ mountPolicy: "workdir-only",
+ customMounts: [],
+ resourceLimits: { ...DEFAULT_CONTAINER_RESOURCE_LIMITS },
+ networkMode: "none",
+ securityFlags: { ...DEFAULT_CONTAINER_SECURITY_FLAGS },
+};
+
const DEFAULT_DANGEROUS_PATTERNS: DangerousPattern[] = [
{
id: "recursive-delete-root",
@@ -160,6 +225,93 @@ function clampInt(raw: unknown, min: number, max: number, defaultVal: number): n
return Math.max(min, Math.min(max, Math.floor(raw)));
}
+const VALID_CONTAINER_RUNTIMES: ContainerRuntime[] = ["auto", "docker", "podman", "gvisor"];
+const VALID_MOUNT_POLICIES: ContainerMountPolicy[] = ["workdir-only", "home", "custom"];
+const VALID_CONTAINER_NETWORK_MODES: ContainerNetworkMode[] = ["none", "host", "bridge"];
+
+function parseContainerSecurityFlags(raw: unknown): ContainerSecurityFlags {
+ if (!raw || typeof raw !== "object" || Array.isArray(raw)) {
+ return { ...DEFAULT_CONTAINER_SECURITY_FLAGS };
+ }
+ const obj = raw as Record;
+ return {
+ blockPrivileged:
+ typeof obj.blockPrivileged === "boolean"
+ ? obj.blockPrivileged
+ : DEFAULT_CONTAINER_SECURITY_FLAGS.blockPrivileged,
+ blockHostNetwork:
+ typeof obj.blockHostNetwork === "boolean"
+ ? obj.blockHostNetwork
+ : DEFAULT_CONTAINER_SECURITY_FLAGS.blockHostNetwork,
+ blockRootVolume:
+ typeof obj.blockRootVolume === "boolean"
+ ? obj.blockRootVolume
+ : DEFAULT_CONTAINER_SECURITY_FLAGS.blockRootVolume,
+ readOnlyRootfs:
+ typeof obj.readOnlyRootfs === "boolean"
+ ? obj.readOnlyRootfs
+ : DEFAULT_CONTAINER_SECURITY_FLAGS.readOnlyRootfs,
+ noNewPrivileges:
+ typeof obj.noNewPrivileges === "boolean"
+ ? obj.noNewPrivileges
+ : DEFAULT_CONTAINER_SECURITY_FLAGS.noNewPrivileges,
+ dropCapabilities: Array.isArray(obj.dropCapabilities)
+ ? obj.dropCapabilities.filter((c): c is string => typeof c === "string")
+ : [...DEFAULT_CONTAINER_SECURITY_FLAGS.dropCapabilities],
+ };
+}
+
+function parseContainerResourceLimits(raw: unknown): ContainerResourceLimits {
+ if (!raw || typeof raw !== "object" || Array.isArray(raw)) {
+ return { ...DEFAULT_CONTAINER_RESOURCE_LIMITS };
+ }
+ const obj = raw as Record;
+ return {
+ cpus: clampInt(obj.cpus, 0, 32, DEFAULT_CONTAINER_RESOURCE_LIMITS.cpus),
+ memoryMb: clampInt(obj.memoryMb, 0, 32768, DEFAULT_CONTAINER_RESOURCE_LIMITS.memoryMb),
+ pidsLimit: clampInt(obj.pidsLimit, 0, 65536, DEFAULT_CONTAINER_RESOURCE_LIMITS.pidsLimit),
+ };
+}
+
+export function parseContainerConfig(raw: unknown): ContainerConfig {
+ if (!raw || typeof raw !== "object" || Array.isArray(raw)) {
+ return { ...DEFAULT_CONTAINER_CONFIG };
+ }
+ const obj = raw as Record;
+
+ const runtime =
+ typeof obj.runtime === "string" &&
+ VALID_CONTAINER_RUNTIMES.includes(obj.runtime as ContainerRuntime)
+ ? (obj.runtime as ContainerRuntime)
+ : DEFAULT_CONTAINER_CONFIG.runtime;
+
+ const mountPolicy =
+ typeof obj.mountPolicy === "string" &&
+ VALID_MOUNT_POLICIES.includes(obj.mountPolicy as ContainerMountPolicy)
+ ? (obj.mountPolicy as ContainerMountPolicy)
+ : DEFAULT_CONTAINER_CONFIG.mountPolicy;
+
+ const networkMode =
+ typeof obj.networkMode === "string" &&
+ VALID_CONTAINER_NETWORK_MODES.includes(obj.networkMode as ContainerNetworkMode)
+ ? (obj.networkMode as ContainerNetworkMode)
+ : DEFAULT_CONTAINER_CONFIG.networkMode;
+
+ return {
+ enabled: typeof obj.enabled === "boolean" ? obj.enabled : DEFAULT_CONTAINER_CONFIG.enabled,
+ runtime,
+ image: typeof obj.image === "string" ? obj.image : DEFAULT_CONTAINER_CONFIG.image,
+ allowedRegistries: parseStringArray(obj.allowedRegistries, [
+ ...DEFAULT_CONTAINER_CONFIG.allowedRegistries,
+ ]),
+ mountPolicy,
+ customMounts: parseStringArray(obj.customMounts, []),
+ resourceLimits: parseContainerResourceLimits(obj.resourceLimits),
+ networkMode,
+ securityFlags: parseContainerSecurityFlags(obj.securityFlags),
+ };
+}
+
// ============================================================================
// Schema
// ============================================================================
@@ -175,6 +327,8 @@ const ALLOWED_KEYS = [
"allowSudo",
"allowCurlToArbitraryDomains",
"bypassEnvVar",
+ "network",
+ "container",
];
export const bashSandboxConfigSchema = {
@@ -207,6 +361,13 @@ export const bashSandboxConfigSchema = {
const bypassEnvVar =
typeof cfg.bypassEnvVar === "string" ? cfg.bypassEnvVar : DEFAULT_BYPASS_ENV_VAR;
+ const network =
+ cfg.network && typeof cfg.network === "object" && !Array.isArray(cfg.network)
+ ? parseNetworkSandboxConfig(cfg.network as Record)
+ : { ...DEFAULT_NETWORK_SANDBOX_CONFIG };
+
+ const container = parseContainerConfig(cfg.container);
+
return {
mode,
domainAllowlist,
@@ -218,6 +379,8 @@ export const bashSandboxConfigSchema = {
allowSudo,
allowCurlToArbitraryDomains,
bypassEnvVar,
+ network,
+ container,
};
},
uiHints: {
@@ -254,5 +417,78 @@ export const bashSandboxConfigSchema = {
advanced: true,
help: "Environment variable that, when set to '1', bypasses the sandbox",
},
+ network: {
+ label: "Network Sandbox",
+ help: "OS-level network isolation for sandboxed commands",
+ children: {
+ enabled: {
+ label: "Enabled",
+ help: "Enable network isolation (sandbox-exec on macOS, unshare on Linux, env-proxy fallback)",
+ },
+ mode: {
+ label: "Network Mode",
+ placeholder: "allowlist",
+ help: "none: no restrictions, allowlist: only listed domains, full: all except denied",
+ },
+ allowedDomains: {
+ label: "Allowed Domains",
+ help: "Domains permitted for network access. Supports wildcards like *.github.com",
+ },
+ denyDomains: {
+ label: "Deny Domains",
+ help: "Domains always blocked (takes priority over allowlist)",
+ },
+ maxConnections: {
+ label: "Max Connections",
+ placeholder: "10",
+ advanced: true,
+ help: "Maximum concurrent network connections per sandbox (1-100)",
+ },
+ },
+ },
+ container: {
+ label: "Container Sandbox",
+ help: "Run commands inside Docker/Podman containers for kernel-level isolation",
+ children: {
+ enabled: {
+ label: "Enabled",
+ help: "Enable container-based command execution (requires Docker or Podman)",
+ },
+ runtime: {
+ label: "Runtime",
+ placeholder: "auto",
+ help: "auto: detect best available, docker, podman, gvisor (Docker+runsc)",
+ },
+ image: {
+ label: "Container Image",
+ placeholder: "ubuntu:22.04",
+ help: "Default container image for sandboxed commands",
+ },
+ allowedRegistries: {
+ label: "Allowed Registries",
+ help: "Trusted container image registries (e.g. docker.io, ghcr.io)",
+ },
+ mountPolicy: {
+ label: "Mount Policy",
+ placeholder: "workdir-only",
+ help: "workdir-only: only project dir, home: add home (ro), custom: add custom mounts",
+ },
+ networkMode: {
+ label: "Network Mode",
+ placeholder: "none",
+ help: "none: no network, bridge: isolated bridge network",
+ },
+ resourceLimits: {
+ label: "Resource Limits",
+ advanced: true,
+ help: "CPU, memory, and PID limits for containers",
+ },
+ securityFlags: {
+ label: "Security Flags",
+ advanced: true,
+ help: "Container security restrictions (privilege blocking, capabilities, etc.)",
+ },
+ },
+ },
},
};
diff --git a/extensions/bash-sandbox/container-runtime.test.ts b/extensions/bash-sandbox/container-runtime.test.ts
new file mode 100644
index 00000000..f3b8eea2
--- /dev/null
+++ b/extensions/bash-sandbox/container-runtime.test.ts
@@ -0,0 +1,324 @@
+import { describe, it, expect, beforeEach } from "vitest";
+import {
+ ContainerRuntime,
+ buildVolumeMounts,
+ formatRuntimeStatus,
+ type ContainerRunOptions,
+} from "./container-runtime.js";
+import { DEFAULT_CONTAINER_CONFIG, type ContainerConfig } from "./config.js";
+
+// ── Helpers ────────────────────────────────────────────────────────────
+
+function makeConfig(overrides?: Partial): ContainerConfig {
+ return { ...DEFAULT_CONTAINER_CONFIG, enabled: true, ...overrides };
+}
+
+function makeRunOptions(overrides?: Partial): ContainerRunOptions {
+ return {
+ command: "echo hello",
+ workdir: "/project",
+ config: makeConfig(),
+ ...overrides,
+ };
+}
+
+describe("ContainerRuntime", () => {
+ let runtime: ContainerRuntime;
+
+ beforeEach(() => {
+ runtime = new ContainerRuntime();
+ });
+
+ // 1
+ it("detectAll returns an array of runtimes", () => {
+ const results = runtime.detectAll();
+ expect(results).toBeInstanceOf(Array);
+ expect(results.length).toBeGreaterThanOrEqual(3);
+ for (const r of results) {
+ expect(r).toHaveProperty("id");
+ expect(r).toHaveProperty("binary");
+ expect(r).toHaveProperty("available");
+ expect(typeof r.available).toBe("boolean");
+ }
+ });
+
+ // 2
+ it("detectAll caches results on second call", () => {
+ const first = runtime.detectAll();
+ const second = runtime.detectAll();
+ // Same objects from cache
+ expect(first[0]).toBe(second[0]);
+ expect(first[1]).toBe(second[1]);
+ });
+
+ // 3
+ it("clearCache resets detection cache", () => {
+ const first = runtime.detectAll();
+ runtime.clearCache();
+ const second = runtime.detectAll();
+ // Different object references after cache clear
+ expect(first[0]).not.toBe(second[0]);
+ });
+
+ // 4
+ it("selectRuntime returns null when specific runtime is unavailable", () => {
+ // gVisor almost certainly not available in test env
+ const result = runtime.selectRuntime("gvisor");
+ // It either works or returns null — both valid
+ if (result) {
+ expect(result.id).toBe("gvisor");
+ expect(result.available).toBe(true);
+ } else {
+ expect(result).toBeNull();
+ }
+ });
+
+ // 5
+ it("selectRuntime with auto tries all in priority order", () => {
+ const result = runtime.selectRuntime("auto");
+ // In CI/test env, Docker may or may not be available
+ if (result) {
+ expect(["gvisor", "docker", "podman"]).toContain(result.id);
+ expect(result.available).toBe(true);
+ }
+ });
+});
+
+describe("buildRunCommand output", () => {
+ // For these tests, we test the command building logic directly
+ // by creating a runtime and checking the output format.
+ // The actual docker/podman availability doesn't matter for format tests.
+
+ let runtime: ContainerRuntime;
+
+ beforeEach(() => {
+ runtime = new ContainerRuntime();
+ });
+
+ // 6
+ it("buildRunCommand returns null when no runtime is available", () => {
+ // Force a non-existent runtime
+ const result = runtime.buildRunCommand(
+ makeRunOptions({
+ config: makeConfig({ runtime: "gvisor" }),
+ }),
+ );
+ // Depends on env — gVisor usually not available
+ // If Docker is available but no runsc, returns null for gvisor
+ // This is a legitimate test of the fallback behavior
+ if (!result) {
+ expect(result).toBeNull();
+ }
+ });
+
+ // 7
+ it("buildRunCommand includes --rm flag", () => {
+ const result = runtime.buildRunCommand(makeRunOptions());
+ if (result) {
+ expect(result.args).toContain("--rm");
+ }
+ });
+
+ // 8
+ it("buildRunCommand includes security flags", () => {
+ const result = runtime.buildRunCommand(
+ makeRunOptions({
+ config: makeConfig({
+ securityFlags: {
+ ...DEFAULT_CONTAINER_CONFIG.securityFlags,
+ noNewPrivileges: true,
+ readOnlyRootfs: true,
+ },
+ }),
+ }),
+ );
+ if (result) {
+ expect(result.args).toContain("--security-opt=no-new-privileges");
+ expect(result.args).toContain("--read-only");
+ }
+ });
+
+ // 9
+ it("buildRunCommand includes resource limits", () => {
+ const result = runtime.buildRunCommand(
+ makeRunOptions({
+ config: makeConfig({
+ resourceLimits: { cpus: 4, memoryMb: 1024, pidsLimit: 512 },
+ }),
+ }),
+ );
+ if (result) {
+ expect(result.args).toContain("--cpus=4");
+ expect(result.args).toContain("--memory=1024m");
+ expect(result.args).toContain("--pids-limit=512");
+ }
+ });
+
+ // 10
+ it("buildRunCommand includes --network=none for none mode", () => {
+ const result = runtime.buildRunCommand(
+ makeRunOptions({
+ config: makeConfig({ networkMode: "none" }),
+ }),
+ );
+ if (result) {
+ expect(result.args).toContain("--network=none");
+ }
+ });
+
+ // 11
+ it("buildRunCommand includes --network=bridge for bridge mode", () => {
+ const result = runtime.buildRunCommand(
+ makeRunOptions({
+ config: makeConfig({ networkMode: "bridge" }),
+ }),
+ );
+ if (result) {
+ expect(result.args).toContain("--network=bridge");
+ }
+ });
+
+ // 12
+ it("buildRunCommand includes image and command at end", () => {
+ const result = runtime.buildRunCommand(
+ makeRunOptions({
+ command: "ls -la",
+ config: makeConfig({ image: "alpine:latest" }),
+ }),
+ );
+ if (result) {
+ const lastArgs = result.args.slice(-4);
+ expect(lastArgs).toEqual(["alpine:latest", "bash", "-c", "ls -la"]);
+ }
+ });
+
+ // 13
+ it("buildRunCommand includes -w /workspace", () => {
+ const result = runtime.buildRunCommand(makeRunOptions());
+ if (result) {
+ expect(result.args).toContain("-w");
+ const wIdx = result.args.indexOf("-w");
+ expect(result.args[wIdx + 1]).toBe("/workspace");
+ }
+ });
+
+ // 14
+ it("buildRunCommand passes env vars", () => {
+ const result = runtime.buildRunCommand(
+ makeRunOptions({
+ env: { MY_VAR: "hello" },
+ }),
+ );
+ if (result) {
+ expect(result.args).toContain("-e");
+ expect(result.args).toContain("MY_VAR=hello");
+ }
+ });
+
+ // 15
+ it("buildRunCommand drops all capabilities when configured", () => {
+ const result = runtime.buildRunCommand(
+ makeRunOptions({
+ config: makeConfig({
+ securityFlags: {
+ ...DEFAULT_CONTAINER_CONFIG.securityFlags,
+ dropCapabilities: ["ALL"],
+ },
+ }),
+ }),
+ );
+ if (result) {
+ expect(result.args).toContain("--cap-drop=ALL");
+ }
+ });
+});
+
+describe("buildVolumeMounts", () => {
+ // 16
+ it("workdir-only mounts only workdir", () => {
+ const mounts = buildVolumeMounts(
+ makeRunOptions({
+ config: makeConfig({ mountPolicy: "workdir-only" }),
+ }),
+ );
+ expect(mounts).toHaveLength(1);
+ expect(mounts[0]).toBe("/project:/workspace");
+ });
+
+ // 17
+ it("home policy includes home dir read-only", () => {
+ const mounts = buildVolumeMounts(
+ makeRunOptions({
+ config: makeConfig({ mountPolicy: "home" }),
+ }),
+ );
+ // Should have workdir + home (if HOME set) + tmp
+ expect(mounts.length).toBeGreaterThanOrEqual(1);
+ expect(mounts[0]).toBe("/project:/workspace");
+ // Home mount should be :ro
+ const homeMount = mounts.find((m) => m.includes(":ro"));
+ if (process.env.HOME) {
+ expect(homeMount).toBeDefined();
+ }
+ });
+
+ // 18
+ it("custom policy includes custom mounts", () => {
+ const mounts = buildVolumeMounts(
+ makeRunOptions({
+ config: makeConfig({
+ mountPolicy: "custom",
+ customMounts: ["/data:/data:ro", "/logs:/logs"],
+ }),
+ }),
+ );
+ expect(mounts).toContain("/data:/data:ro");
+ expect(mounts).toContain("/logs:/logs");
+ });
+
+ // 19
+ it("extra mounts from caller are appended", () => {
+ const mounts = buildVolumeMounts(
+ makeRunOptions({
+ extraMounts: ["/extra:/extra"],
+ }),
+ );
+ expect(mounts).toContain("/extra:/extra");
+ });
+});
+
+describe("formatRuntimeStatus", () => {
+ // 20
+ it("formats available runtime with version", () => {
+ const output = formatRuntimeStatus([
+ { id: "docker", binary: "docker", version: "24.0.7", available: true, rootless: false },
+ ]);
+ expect(output).toContain("docker: v24.0.7");
+ });
+
+ // 21
+ it("formats unavailable runtime", () => {
+ const output = formatRuntimeStatus([
+ { id: "gvisor", binary: "docker", version: "", available: false, rootless: false },
+ ]);
+ expect(output).toContain("gvisor: not found");
+ });
+
+ // 22
+ it("shows rootless flag", () => {
+ const output = formatRuntimeStatus([
+ { id: "podman", binary: "podman", version: "4.9.0", available: true, rootless: true },
+ ]);
+ expect(output).toContain("rootless");
+ });
+
+ // 23
+ it("formats multiple runtimes", () => {
+ const output = formatRuntimeStatus([
+ { id: "docker", binary: "docker", version: "24.0.7", available: true, rootless: false },
+ { id: "podman", binary: "podman", version: "", available: false, rootless: false },
+ ]);
+ expect(output).toContain("docker:");
+ expect(output).toContain("podman:");
+ });
+});
diff --git a/extensions/bash-sandbox/container-runtime.ts b/extensions/bash-sandbox/container-runtime.ts
new file mode 100644
index 00000000..cf8a393c
--- /dev/null
+++ b/extensions/bash-sandbox/container-runtime.ts
@@ -0,0 +1,339 @@
+/**
+ * Container Runtime — Docker/Podman/gVisor detection and command wrapping.
+ *
+ * Detects available container runtimes, builds `docker run` / `podman run`
+ * commands with proper security flags, volume mounts, and resource limits.
+ *
+ * Strategies:
+ * - Docker: `docker run --rm --security-opt=no-new-privileges ...`
+ * - Podman: `podman run --rm --security-opt=no-new-privileges ...` (rootless)
+ * - gVisor: `docker run --rm --runtime=runsc ...`
+ */
+
+import { execFileSync } from "node:child_process";
+import { resolve, isAbsolute } from "node:path";
+import { existsSync } from "node:fs";
+import type { ContainerConfig } from "./config.js";
+
+// ============================================================================
+// Types
+// ============================================================================
+
+export type RuntimeId = "docker" | "podman" | "gvisor";
+
+export type DetectedRuntime = {
+ id: RuntimeId;
+ binary: string;
+ version: string;
+ available: boolean;
+ rootless: boolean;
+};
+
+export type ContainerRunOptions = {
+ command: string;
+ workdir: string;
+ config: ContainerConfig;
+ env?: Record;
+ extraMounts?: string[];
+};
+
+export type ContainerRunResult = {
+ args: string[];
+ binary: string;
+ runtime: RuntimeId;
+};
+
+// ============================================================================
+// Detection
+// ============================================================================
+
+function execSilent(binary: string, args: string[]): string | null {
+ try {
+ return execFileSync(binary, args, {
+ encoding: "utf-8",
+ timeout: 5000,
+ stdio: ["pipe", "pipe", "pipe"],
+ }).trim();
+ } catch {
+ return null;
+ }
+}
+
+function parseVersion(output: string | null): string {
+ if (!output) return "";
+ // Docker: "Docker version 24.0.7, build ..."
+ // Podman: "podman version 4.9.0"
+ const match = output.match(/(\d+\.\d+(?:\.\d+)?)/);
+ return match?.[1] ?? "";
+}
+
+function detectDocker(): DetectedRuntime {
+ const version = parseVersion(execSilent("docker", ["--version"]));
+ if (!version) {
+ return { id: "docker", binary: "docker", version: "", available: false, rootless: false };
+ }
+ // Check if running rootless
+ const info = execSilent("docker", ["info", "--format", "{{.SecurityOptions}}"]);
+ const rootless = info?.includes("rootless") ?? false;
+ return { id: "docker", binary: "docker", version, available: true, rootless };
+}
+
+function detectPodman(): DetectedRuntime {
+ const version = parseVersion(execSilent("podman", ["--version"]));
+ if (!version) {
+ return { id: "podman", binary: "podman", version: "", available: false, rootless: false };
+ }
+ // Podman is rootless by default
+ return { id: "podman", binary: "podman", version, available: true, rootless: true };
+}
+
+function detectGvisor(): DetectedRuntime {
+ // gVisor uses Docker with --runtime=runsc
+ const docker = detectDocker();
+ if (!docker.available) {
+ return { id: "gvisor", binary: "docker", version: "", available: false, rootless: false };
+ }
+ // Check if runsc runtime is available
+ const info = execSilent("docker", ["info", "--format", "{{.Runtimes}}"]);
+ const hasRunsc = info?.includes("runsc") ?? false;
+ if (!hasRunsc) {
+ return { id: "gvisor", binary: "docker", version: "", available: false, rootless: false };
+ }
+ return {
+ id: "gvisor",
+ binary: "docker",
+ version: docker.version,
+ available: true,
+ rootless: docker.rootless,
+ };
+}
+
+// ============================================================================
+// ContainerRuntime
+// ============================================================================
+
+const RUNTIME_DETECTORS: Record DetectedRuntime> = {
+ docker: detectDocker,
+ podman: detectPodman,
+ gvisor: detectGvisor,
+};
+
+const RUNTIME_PRIORITY: RuntimeId[] = ["gvisor", "docker", "podman"];
+
+export class ContainerRuntime {
+ private cache: Map = new Map();
+
+ /**
+ * Detect all available container runtimes.
+ */
+ detectAll(): DetectedRuntime[] {
+ const results: DetectedRuntime[] = [];
+ for (const id of RUNTIME_PRIORITY) {
+ const cached = this.cache.get(id);
+ if (cached) {
+ results.push(cached);
+ continue;
+ }
+ const detected = RUNTIME_DETECTORS[id]();
+ this.cache.set(id, detected);
+ results.push(detected);
+ }
+ return results;
+ }
+
+ /**
+ * Select the best available runtime based on config preference.
+ */
+ selectRuntime(preference: ContainerConfig["runtime"]): DetectedRuntime | null {
+ if (preference !== "auto") {
+ const cached = this.cache.get(preference);
+ if (cached) return cached.available ? cached : null;
+ const detected = RUNTIME_DETECTORS[preference]();
+ this.cache.set(preference, detected);
+ return detected.available ? detected : null;
+ }
+
+ // Auto-detect: try in priority order
+ for (const id of RUNTIME_PRIORITY) {
+ const cached = this.cache.get(id);
+ if (cached?.available) return cached;
+ const detected = RUNTIME_DETECTORS[id]();
+ this.cache.set(id, detected);
+ if (detected.available) return detected;
+ }
+ return null;
+ }
+
+ /**
+ * Build the full `docker run` / `podman run` command arguments.
+ */
+ buildRunCommand(opts: ContainerRunOptions): ContainerRunResult | null {
+ const runtime = this.selectRuntime(opts.config.runtime);
+ if (!runtime) return null;
+
+ const args: string[] = ["run", "--rm"];
+
+ // gVisor runtime flag
+ if (runtime.id === "gvisor") {
+ args.push("--runtime=runsc");
+ }
+
+ // Security flags
+ const sec = opts.config.securityFlags;
+ if (sec.noNewPrivileges) {
+ args.push("--security-opt=no-new-privileges");
+ }
+ if (sec.readOnlyRootfs) {
+ args.push("--read-only");
+ }
+ if (sec.dropCapabilities.length > 0) {
+ args.push("--cap-drop=ALL");
+ // Re-add only if explicit list does NOT include "ALL"
+ for (const cap of sec.dropCapabilities) {
+ if (cap !== "ALL") {
+ args.push(`--cap-add=${cap}`);
+ }
+ }
+ }
+
+ // Resource limits
+ const limits = opts.config.resourceLimits;
+ if (limits.cpus > 0) {
+ args.push(`--cpus=${limits.cpus}`);
+ }
+ if (limits.memoryMb > 0) {
+ args.push(`--memory=${limits.memoryMb}m`);
+ }
+ if (limits.pidsLimit > 0) {
+ args.push(`--pids-limit=${limits.pidsLimit}`);
+ }
+
+ // Network mode
+ if (opts.config.networkMode === "none") {
+ args.push("--network=none");
+ } else if (opts.config.networkMode === "bridge") {
+ args.push("--network=bridge");
+ }
+ // "host" is intentionally NOT wired — blocked by security policy
+
+ // Volume mounts
+ const mounts = buildVolumeMounts(opts);
+ for (const mount of mounts) {
+ args.push("-v", mount);
+ }
+
+ // Working directory inside container
+ args.push("-w", "/workspace");
+
+ // Environment variables
+ if (opts.env) {
+ for (const [key, value] of Object.entries(opts.env)) {
+ args.push("-e", `${key}=${value}`);
+ }
+ }
+
+ // Pass through common env vars
+ for (const envVar of ["HOME", "USER", "SHELL", "TERM", "LANG", "PATH"]) {
+ if (process.env[envVar]) {
+ args.push("-e", `${envVar}=${process.env[envVar]}`);
+ }
+ }
+
+ // Image
+ args.push(opts.config.image);
+
+ // Command: bash -c ''
+ args.push("bash", "-c", opts.command);
+
+ return {
+ args,
+ binary: runtime.binary,
+ runtime: runtime.id,
+ };
+ }
+
+ /**
+ * Check if an image is available locally.
+ */
+ isImageAvailable(image: string, runtime?: DetectedRuntime): boolean {
+ const binary = runtime?.binary ?? "docker";
+ const result = execSilent(binary, ["image", "inspect", image]);
+ return result !== null;
+ }
+
+ /**
+ * Pull a container image.
+ */
+ pullImage(image: string, runtime?: DetectedRuntime): boolean {
+ const binary = runtime?.binary ?? "docker";
+ const result = execSilent(binary, ["pull", image]);
+ return result !== null;
+ }
+
+ /**
+ * Clear the detection cache.
+ */
+ clearCache(): void {
+ this.cache.clear();
+ }
+}
+
+// ============================================================================
+// Volume Mount Builder
+// ============================================================================
+
+/**
+ * Build volume mount strings based on mount policy.
+ */
+export function buildVolumeMounts(opts: ContainerRunOptions): string[] {
+ const mounts: string[] = [];
+ const policy = opts.config.mountPolicy;
+
+ // Always mount workdir
+ const workdir = isAbsolute(opts.workdir) ? opts.workdir : resolve(opts.workdir);
+ mounts.push(`${workdir}:/workspace`);
+
+ if (policy === "home" || policy === "custom") {
+ // Mount home directory read-only
+ const home = process.env.HOME;
+ if (home && existsSync(home) && home !== workdir) {
+ mounts.push(`${home}:/home/user:ro`);
+ }
+
+ // Temp directory
+ const tmpDir = process.env.TMPDIR || "/tmp";
+ if (existsSync(tmpDir)) {
+ mounts.push(`${tmpDir}:/tmp`);
+ }
+ }
+
+ if (policy === "custom" && opts.config.customMounts.length > 0) {
+ for (const mount of opts.config.customMounts) {
+ mounts.push(mount);
+ }
+ }
+
+ // Extra mounts from caller
+ if (opts.extraMounts) {
+ for (const mount of opts.extraMounts) {
+ mounts.push(mount);
+ }
+ }
+
+ return mounts;
+}
+
+/**
+ * Format a runtime detection result for display.
+ */
+export function formatRuntimeStatus(runtimes: DetectedRuntime[]): string {
+ const lines: string[] = ["Container Runtimes:"];
+ for (const rt of runtimes) {
+ const status = rt.available ? `v${rt.version}` : "not found";
+ const flags: string[] = [];
+ if (rt.rootless) flags.push("rootless");
+ const flagStr = flags.length > 0 ? ` (${flags.join(", ")})` : "";
+ lines.push(` ${rt.id}: ${status}${flagStr}`);
+ }
+ return lines.join("\n");
+}
diff --git a/extensions/bash-sandbox/container-security.test.ts b/extensions/bash-sandbox/container-security.test.ts
new file mode 100644
index 00000000..b8ec6236
--- /dev/null
+++ b/extensions/bash-sandbox/container-security.test.ts
@@ -0,0 +1,367 @@
+import { describe, it, expect } from "vitest";
+import {
+ validateDockerFlags,
+ validateVolumeMounts,
+ validateImageRegistry,
+ extractImageRegistry,
+ parseVolumeMount,
+ validateContainerSecurity,
+ hasBlockingViolation,
+ formatViolations,
+} from "./container-security.js";
+import { DEFAULT_CONTAINER_CONFIG } from "./config.js";
+
+// ============================================================================
+// validateDockerFlags
+// ============================================================================
+
+describe("validateDockerFlags", () => {
+ // 1
+ it("detects --privileged flag", () => {
+ const violations = validateDockerFlags("docker run --privileged ubuntu bash");
+ expect(violations).toHaveLength(1);
+ expect(violations[0].rule).toBe("privileged-mode");
+ expect(violations[0].severity).toBe("block");
+ });
+
+ // 2
+ it("detects --net=host flag", () => {
+ const violations = validateDockerFlags("docker run --net=host ubuntu");
+ expect(violations).toHaveLength(1);
+ expect(violations[0].rule).toBe("host-network");
+ });
+
+ // 3
+ it("detects --network=host flag", () => {
+ const violations = validateDockerFlags("docker run --network=host ubuntu");
+ expect(violations).toHaveLength(1);
+ expect(violations[0].rule).toBe("host-network");
+ });
+
+ // 4
+ it("detects --pid=host flag", () => {
+ const violations = validateDockerFlags("docker run --pid=host ubuntu");
+ expect(violations).toHaveLength(1);
+ expect(violations[0].rule).toBe("host-pid");
+ });
+
+ // 5
+ it("detects --cap-add=SYS_ADMIN", () => {
+ const violations = validateDockerFlags("docker run --cap-add=SYS_ADMIN ubuntu");
+ expect(violations).toHaveLength(1);
+ expect(violations[0].rule).toBe("cap-sys-admin");
+ expect(violations[0].severity).toBe("block");
+ });
+
+ // 6
+ it("detects --cap-add=ALL", () => {
+ const violations = validateDockerFlags("docker run --cap-add=ALL ubuntu");
+ expect(violations).toHaveLength(1);
+ expect(violations[0].rule).toBe("cap-all");
+ expect(violations[0].severity).toBe("block");
+ });
+
+ // 7
+ it("detects --cap-add=SYS_PTRACE as warning", () => {
+ const violations = validateDockerFlags("docker run --cap-add=SYS_PTRACE ubuntu");
+ expect(violations).toHaveLength(1);
+ expect(violations[0].severity).toBe("warn");
+ });
+
+ // 8
+ it("detects seccomp=unconfined", () => {
+ const violations = validateDockerFlags("docker run --security-opt seccomp=unconfined ubuntu");
+ expect(violations).toHaveLength(1);
+ expect(violations[0].rule).toBe("unconfined-security");
+ });
+
+ // 9
+ it("detects device access", () => {
+ const violations = validateDockerFlags("docker run --device /dev/sda ubuntu");
+ expect(violations).toHaveLength(1);
+ expect(violations[0].rule).toBe("device-access");
+ expect(violations[0].severity).toBe("warn");
+ });
+
+ // 10
+ it("returns empty for safe command", () => {
+ const violations = validateDockerFlags("docker run --rm ubuntu echo hello");
+ expect(violations).toHaveLength(0);
+ });
+
+ // 11
+ it("detects multiple violations", () => {
+ const violations = validateDockerFlags(
+ "docker run --privileged --net=host --cap-add=SYS_ADMIN ubuntu",
+ );
+ expect(violations.length).toBeGreaterThanOrEqual(3);
+ });
+});
+
+// ============================================================================
+// parseVolumeMount
+// ============================================================================
+
+describe("parseVolumeMount", () => {
+ // 12
+ it("parses simple mount", () => {
+ const result = parseVolumeMount("/host:/container");
+ expect(result).toEqual({ source: "/host", target: "/container", readOnly: false });
+ });
+
+ // 13
+ it("parses mount with :ro", () => {
+ const result = parseVolumeMount("/host:/container:ro");
+ expect(result).toEqual({ source: "/host", target: "/container", readOnly: true });
+ });
+
+ // 14
+ it("parses mount with :rw", () => {
+ const result = parseVolumeMount("/host:/container:rw");
+ expect(result).toEqual({ source: "/host", target: "/container", readOnly: false });
+ });
+
+ // 15
+ it("returns null for invalid mount", () => {
+ expect(parseVolumeMount("nocolon")).toBeNull();
+ });
+});
+
+// ============================================================================
+// validateVolumeMounts
+// ============================================================================
+
+describe("validateVolumeMounts", () => {
+ // 16
+ it("blocks root filesystem mount", () => {
+ const violations = validateVolumeMounts(["/:/host"]);
+ expect(violations).toHaveLength(1);
+ expect(violations[0].rule).toBe("root-mount");
+ expect(violations[0].severity).toBe("block");
+ });
+
+ // 17
+ it("warns on root mount read-only", () => {
+ const violations = validateVolumeMounts(["/:/host:ro"]);
+ expect(violations).toHaveLength(1);
+ expect(violations[0].severity).toBe("warn");
+ });
+
+ // 18
+ it("blocks /etc mount read-write", () => {
+ const violations = validateVolumeMounts(["/etc:/etc"]);
+ expect(violations).toHaveLength(1);
+ expect(violations[0].rule).toBe("etc-mount");
+ });
+
+ // 19
+ it("blocks docker socket mount", () => {
+ const violations = validateVolumeMounts(["/var/run/docker.sock:/var/run/docker.sock"]);
+ expect(violations).toHaveLength(1);
+ expect(violations[0].rule).toBe("docker-socket");
+ });
+
+ // 20
+ it("blocks /proc mount", () => {
+ const violations = validateVolumeMounts(["/proc:/proc"]);
+ expect(violations).toHaveLength(1);
+ expect(violations[0].rule).toBe("proc-mount");
+ });
+
+ // 21
+ it("allows safe mounts", () => {
+ const violations = validateVolumeMounts(["/home/user/project:/workspace", "/tmp:/tmp"]);
+ expect(violations).toHaveLength(0);
+ });
+
+ // 22
+ it("detects multiple violations", () => {
+ const violations = validateVolumeMounts([
+ "/:/host",
+ "/var/run/docker.sock:/var/run/docker.sock",
+ ]);
+ expect(violations.length).toBeGreaterThanOrEqual(2);
+ });
+});
+
+// ============================================================================
+// extractImageRegistry
+// ============================================================================
+
+describe("extractImageRegistry", () => {
+ // 23
+ it("returns docker.io for bare image name", () => {
+ expect(extractImageRegistry("ubuntu")).toBe("docker.io");
+ });
+
+ // 24
+ it("returns docker.io for library image", () => {
+ expect(extractImageRegistry("library/ubuntu")).toBe("docker.io");
+ });
+
+ // 25
+ it("extracts ghcr.io registry", () => {
+ expect(extractImageRegistry("ghcr.io/owner/image:latest")).toBe("ghcr.io");
+ });
+
+ // 26
+ it("extracts registry with port", () => {
+ expect(extractImageRegistry("registry.example.com:5000/image")).toBe("registry.example.com");
+ });
+
+ // 27
+ it("extracts gcr.io registry", () => {
+ expect(extractImageRegistry("gcr.io/project/image")).toBe("gcr.io");
+ });
+
+ // 28
+ it("handles image with digest", () => {
+ expect(extractImageRegistry("ghcr.io/owner/image@sha256:abc123")).toBe("ghcr.io");
+ });
+
+ // 29
+ it("returns docker.io for user/image pattern", () => {
+ expect(extractImageRegistry("username/myimage")).toBe("docker.io");
+ });
+});
+
+// ============================================================================
+// validateImageRegistry
+// ============================================================================
+
+describe("validateImageRegistry", () => {
+ // 30
+ it("allows any image when registries list is empty", () => {
+ expect(validateImageRegistry("evil.com/image", [])).toBeNull();
+ });
+
+ // 31
+ it("allows image from allowed registry", () => {
+ expect(validateImageRegistry("ghcr.io/owner/image", ["ghcr.io", "docker.io"])).toBeNull();
+ });
+
+ // 32
+ it("blocks image from non-allowed registry", () => {
+ const violation = validateImageRegistry("evil.com/image", ["docker.io"]);
+ expect(violation).not.toBeNull();
+ expect(violation!.rule).toBe("untrusted-registry");
+ expect(violation!.severity).toBe("block");
+ });
+
+ // 33
+ it("supports wildcard registry matching", () => {
+ expect(validateImageRegistry("sub.example.com/image", ["*.example.com"])).toBeNull();
+ });
+
+ // 34
+ it("allows bare image names against docker.io", () => {
+ expect(validateImageRegistry("ubuntu:22.04", ["docker.io"])).toBeNull();
+ });
+});
+
+// ============================================================================
+// validateContainerSecurity (integration)
+// ============================================================================
+
+describe("validateContainerSecurity", () => {
+ const baseConfig = { ...DEFAULT_CONTAINER_CONFIG, enabled: true };
+
+ // 35
+ it("returns no violations for safe config", () => {
+ const violations = validateContainerSecurity(
+ "echo hello",
+ ["/project:/workspace"],
+ "ubuntu:22.04",
+ baseConfig,
+ );
+ expect(violations).toHaveLength(0);
+ });
+
+ // 36
+ it("catches privileged flag in command", () => {
+ const violations = validateContainerSecurity(
+ "docker run --privileged ubuntu",
+ [],
+ "ubuntu:22.04",
+ baseConfig,
+ );
+ expect(violations.some((v) => v.rule === "privileged-mode")).toBe(true);
+ });
+
+ // 37
+ it("catches untrusted registry", () => {
+ const violations = validateContainerSecurity("echo hello", [], "evil.com/backdoor", baseConfig);
+ expect(violations.some((v) => v.rule === "untrusted-registry")).toBe(true);
+ });
+
+ // 38
+ it("catches dangerous volume mount", () => {
+ const violations = validateContainerSecurity(
+ "echo hello",
+ ["/:/rootfs"],
+ "ubuntu:22.04",
+ baseConfig,
+ );
+ expect(violations.some((v) => v.rule === "root-mount")).toBe(true);
+ });
+});
+
+// ============================================================================
+// hasBlockingViolation
+// ============================================================================
+
+describe("hasBlockingViolation", () => {
+ // 39
+ it("returns true for block violations", () => {
+ expect(hasBlockingViolation([{ rule: "test", severity: "block", message: "bad" }])).toBe(true);
+ });
+
+ // 40
+ it("returns false for warn-only violations", () => {
+ expect(hasBlockingViolation([{ rule: "test", severity: "warn", message: "maybe" }])).toBe(
+ false,
+ );
+ });
+
+ // 41
+ it("returns false for empty violations", () => {
+ expect(hasBlockingViolation([])).toBe(false);
+ });
+});
+
+// ============================================================================
+// formatViolations
+// ============================================================================
+
+describe("formatViolations", () => {
+ // 42
+ it("formats empty violations", () => {
+ expect(formatViolations([])).toContain("No security violations");
+ });
+
+ // 43
+ it("formats block violations with BLOCK prefix", () => {
+ const output = formatViolations([
+ { rule: "privileged-mode", severity: "block", message: "test msg" },
+ ]);
+ expect(output).toContain("[BLOCK]");
+ expect(output).toContain("privileged-mode");
+ expect(output).toContain("test msg");
+ });
+
+ // 44
+ it("formats warn violations with WARN prefix", () => {
+ const output = formatViolations([
+ { rule: "device-access", severity: "warn", message: "device" },
+ ]);
+ expect(output).toContain("[WARN]");
+ });
+
+ // 45
+ it("includes detail when present", () => {
+ const output = formatViolations([
+ { rule: "test", severity: "block", message: "msg", detail: "some detail" },
+ ]);
+ expect(output).toContain("some detail");
+ });
+});
diff --git a/extensions/bash-sandbox/container-security.ts b/extensions/bash-sandbox/container-security.ts
new file mode 100644
index 00000000..72a0df0d
--- /dev/null
+++ b/extensions/bash-sandbox/container-security.ts
@@ -0,0 +1,369 @@
+/**
+ * Container Security — Validates container flags, mounts, and images.
+ *
+ * Prevents dangerous container configurations:
+ * - --privileged escalation
+ * - --net=host network bypass
+ * - Root filesystem mount (/ → /host)
+ * - Untrusted image registries
+ * - Dangerous capabilities (SYS_ADMIN, SYS_PTRACE, etc.)
+ */
+
+import type { ContainerConfig } from "./config.js";
+
+// ============================================================================
+// Types
+// ============================================================================
+
+export type SecurityViolation = {
+ rule: string;
+ severity: "block" | "warn";
+ message: string;
+ detail?: string;
+};
+
+// ============================================================================
+// Flag Validation
+// ============================================================================
+
+/** Docker run flags that escalate container privileges. */
+const DANGEROUS_FLAGS: Array<{
+ pattern: RegExp;
+ rule: string;
+ severity: "block" | "warn";
+ message: string;
+}> = [
+ {
+ pattern: /--privileged/,
+ rule: "privileged-mode",
+ severity: "block",
+ message: "Privileged mode gives full host access",
+ },
+ {
+ pattern: /--net(?:work)?=host/,
+ rule: "host-network",
+ severity: "block",
+ message: "Host network bypasses network isolation",
+ },
+ {
+ pattern: /--pid=host/,
+ rule: "host-pid",
+ severity: "block",
+ message: "Host PID namespace allows process manipulation",
+ },
+ {
+ pattern: /--ipc=host/,
+ rule: "host-ipc",
+ severity: "block",
+ message: "Host IPC namespace allows shared memory access",
+ },
+ {
+ pattern: /--userns=host/,
+ rule: "host-userns",
+ severity: "block",
+ message: "Host user namespace bypasses UID isolation",
+ },
+ {
+ pattern: /--cap-add=SYS_ADMIN/,
+ rule: "cap-sys-admin",
+ severity: "block",
+ message: "SYS_ADMIN capability allows mounting and namespace manipulation",
+ },
+ {
+ pattern: /--cap-add=SYS_PTRACE/,
+ rule: "cap-sys-ptrace",
+ severity: "warn",
+ message: "SYS_PTRACE allows process debugging",
+ },
+ {
+ pattern: /--cap-add=NET_ADMIN/,
+ rule: "cap-net-admin",
+ severity: "warn",
+ message: "NET_ADMIN allows network configuration changes",
+ },
+ {
+ pattern: /--cap-add=ALL/,
+ rule: "cap-all",
+ severity: "block",
+ message: "Adding all capabilities is equivalent to privileged mode",
+ },
+ {
+ pattern: /--security-opt\s*(?:=\s*)?(?:seccomp|apparmor)(?:=|:)unconfined/,
+ rule: "unconfined-security",
+ severity: "block",
+ message: "Disabling security profiles removes a defense layer",
+ },
+ {
+ pattern: /--device\s*(?:=\s*)?\/dev\//,
+ rule: "device-access",
+ severity: "warn",
+ message: "Direct device access from container",
+ },
+];
+
+/**
+ * Validate Docker/Podman flags in a raw command string for dangerous options.
+ */
+export function validateDockerFlags(command: string): SecurityViolation[] {
+ const violations: SecurityViolation[] = [];
+ for (const flag of DANGEROUS_FLAGS) {
+ if (flag.pattern.test(command)) {
+ violations.push({
+ rule: flag.rule,
+ severity: flag.severity,
+ message: flag.message,
+ detail: command.match(flag.pattern)?.[0],
+ });
+ }
+ }
+ return violations;
+}
+
+// ============================================================================
+// Volume Mount Validation
+// ============================================================================
+
+/** Paths that must never be mounted read-write into containers. */
+const DANGEROUS_MOUNT_SOURCES = [
+ { path: "/", exact: true, rule: "root-mount", message: "Root filesystem mount" },
+ { path: "/etc", exact: false, rule: "etc-mount", message: "System config directory mount" },
+ { path: "/proc", exact: false, rule: "proc-mount", message: "Proc filesystem mount" },
+ { path: "/sys", exact: false, rule: "sys-mount", message: "Sys filesystem mount" },
+ { path: "/dev", exact: false, rule: "dev-mount", message: "Device filesystem mount" },
+ { path: "/boot", exact: false, rule: "boot-mount", message: "Boot partition mount" },
+ {
+ path: "/var/run/docker.sock",
+ exact: true,
+ rule: "docker-socket",
+ message: "Docker socket mount (container escape)",
+ },
+ {
+ path: "/run/docker.sock",
+ exact: true,
+ rule: "docker-socket",
+ message: "Docker socket mount (container escape)",
+ },
+ {
+ path: "/var/run/podman",
+ exact: false,
+ rule: "podman-socket",
+ message: "Podman socket mount (container escape)",
+ },
+];
+
+/**
+ * Parse a volume mount string (e.g. "/host/path:/container/path:ro").
+ */
+export function parseVolumeMount(mount: string): {
+ source: string;
+ target: string;
+ readOnly: boolean;
+} | null {
+ const parts = mount.split(":");
+ if (parts.length < 2) return null;
+
+ // Handle Windows paths (C:\path → C:\path)
+ let source: string;
+ let target: string;
+ let options = "";
+
+ if (parts.length === 2) {
+ source = parts[0];
+ target = parts[1];
+ } else if (parts.length === 3) {
+ // Could be /src:/dst:ro or C:\path:/dst
+ if (parts[2] === "ro" || parts[2] === "rw" || parts[2].includes(",")) {
+ source = parts[0];
+ target = parts[1];
+ options = parts[2];
+ } else {
+ // Likely Windows path in first segment
+ source = `${parts[0]}:${parts[1]}`;
+ target = parts[2];
+ }
+ } else if (parts.length === 4) {
+ // Windows path with options: C:\path:/dst:ro
+ source = `${parts[0]}:${parts[1]}`;
+ target = parts[2];
+ options = parts[3];
+ } else {
+ return null;
+ }
+
+ return {
+ source: source.trim(),
+ target: target.trim(),
+ readOnly: options.includes("ro"),
+ };
+}
+
+/**
+ * Validate volume mounts against security policy.
+ */
+export function validateVolumeMounts(mounts: string[]): SecurityViolation[] {
+ const violations: SecurityViolation[] = [];
+
+ for (const mount of mounts) {
+ const parsed = parseVolumeMount(mount);
+ if (!parsed) continue;
+
+ // Normalize source path
+ const normalizedSource = parsed.source.replace(/\/+$/, "") || "/";
+
+ for (const dangerous of DANGEROUS_MOUNT_SOURCES) {
+ const matches = dangerous.exact
+ ? normalizedSource === dangerous.path
+ : normalizedSource === dangerous.path || normalizedSource.startsWith(dangerous.path + "/");
+
+ if (matches && !parsed.readOnly) {
+ violations.push({
+ rule: dangerous.rule,
+ severity: "block",
+ message: `${dangerous.message}: ${parsed.source} → ${parsed.target} (read-write)`,
+ detail: mount,
+ });
+ } else if (matches && parsed.readOnly) {
+ // Read-only mounts of dangerous paths get a warning
+ violations.push({
+ rule: dangerous.rule,
+ severity: "warn",
+ message: `${dangerous.message}: ${parsed.source} → ${parsed.target} (read-only)`,
+ detail: mount,
+ });
+ }
+ }
+ }
+
+ return violations;
+}
+
+// ============================================================================
+// Image Registry Validation
+// ============================================================================
+
+/**
+ * Extract the registry from an image reference.
+ *
+ * Examples:
+ * - "ubuntu" → "docker.io" (implicit default)
+ * - "docker.io/library/ubuntu" → "docker.io"
+ * - "ghcr.io/owner/image:tag" → "ghcr.io"
+ * - "registry.example.com:5000/image" → "registry.example.com"
+ */
+export function extractImageRegistry(image: string): string {
+ // Remove digest (@sha256:...)
+ const ref = image.split("@")[0];
+ const parts = ref.split("/");
+
+ if (parts.length === 1) {
+ // Just image name: "ubuntu", "alpine", "ubuntu:22.04"
+ return "docker.io";
+ }
+
+ // Check if first part looks like a registry (has dot or port)
+ const first = parts[0];
+ // Strip port from registry: "registry.example.com:5000" → "registry.example.com"
+ const registryHost = first.split(":")[0];
+ if (registryHost.includes(".")) {
+ return registryHost;
+ }
+
+ // "library/ubuntu" or "user/image" → default registry
+ return "docker.io";
+}
+
+/**
+ * Validate an image against allowed registries.
+ *
+ * Empty allowedRegistries means all registries are allowed.
+ */
+export function validateImageRegistry(
+ image: string,
+ allowedRegistries: string[],
+): SecurityViolation | null {
+ if (allowedRegistries.length === 0) return null;
+
+ const registry = extractImageRegistry(image);
+
+ for (const allowed of allowedRegistries) {
+ if (registry === allowed) return null;
+ // Wildcard matching: *.example.com matches sub.example.com
+ if (allowed.startsWith("*.")) {
+ const suffix = allowed.slice(1); // ".example.com"
+ if (registry.endsWith(suffix)) return null;
+ }
+ }
+
+ return {
+ rule: "untrusted-registry",
+ severity: "block",
+ message: `Image registry "${registry}" is not in the allowed list`,
+ detail: `Image: ${image}, Allowed: ${allowedRegistries.join(", ")}`,
+ };
+}
+
+// ============================================================================
+// Full Validation
+// ============================================================================
+
+/**
+ * Run all security validations for a container execution request.
+ */
+export function validateContainerSecurity(
+ command: string,
+ mounts: string[],
+ image: string,
+ config: ContainerConfig,
+): SecurityViolation[] {
+ const violations: SecurityViolation[] = [];
+
+ // 1. Validate docker flags in the raw command
+ violations.push(...validateDockerFlags(command));
+
+ // 2. Validate volume mounts
+ violations.push(...validateVolumeMounts(mounts));
+
+ // 3. Validate image registry
+ const registryViolation = validateImageRegistry(image, config.allowedRegistries);
+ if (registryViolation) {
+ violations.push(registryViolation);
+ }
+
+ // 4. Config-level security checks
+ if (config.securityFlags.blockPrivileged && command.includes("--privileged")) {
+ // Already caught by flag validation, but ensure it's a block
+ const existing = violations.find((v) => v.rule === "privileged-mode");
+ if (existing) existing.severity = "block";
+ }
+
+ if (config.securityFlags.blockHostNetwork && /--net(?:work)?=host/.test(command)) {
+ const existing = violations.find((v) => v.rule === "host-network");
+ if (existing) existing.severity = "block";
+ }
+
+ return violations;
+}
+
+/**
+ * Check if violations contain any blocking rules.
+ */
+export function hasBlockingViolation(violations: SecurityViolation[]): boolean {
+ return violations.some((v) => v.severity === "block");
+}
+
+/**
+ * Format violations for display.
+ */
+export function formatViolations(violations: SecurityViolation[]): string {
+ if (violations.length === 0) return "No security violations found.";
+
+ const lines: string[] = [`Container security violations (${violations.length}):`];
+ for (const v of violations) {
+ const icon = v.severity === "block" ? "BLOCK" : "WARN";
+ lines.push(` [${icon}] ${v.rule}: ${v.message}`);
+ if (v.detail) {
+ lines.push(` ${v.detail}`);
+ }
+ }
+ return lines.join("\n");
+}
diff --git a/extensions/bash-sandbox/index.ts b/extensions/bash-sandbox/index.ts
index bf87e563..ba295e10 100644
--- a/extensions/bash-sandbox/index.ts
+++ b/extensions/bash-sandbox/index.ts
@@ -22,6 +22,13 @@ import { checkBlocklist, checkDangerousPatterns } from "./command-blocklist.js";
import { parseCommandChain } from "./command-parser.js";
import { bashSandboxConfigSchema, type BashSandboxConfig } from "./config.js";
import { checkDomains } from "./domain-checker.js";
+import { ContainerRuntime, formatRuntimeStatus } from "./container-runtime.js";
+import {
+ validateContainerSecurity,
+ hasBlockingViolation,
+ formatViolations,
+} from "./container-security.js";
+import { NetworkSandbox } from "./network-sandbox.js";
// ============================================================================
// Helpers
@@ -151,13 +158,18 @@ const bashSandboxPlugin = {
async register(api: MayrosPluginApi) {
const cfg = bashSandboxConfigSchema.parse(api.pluginConfig);
const auditLog = new AuditLog(1000);
+ const networkSandbox = new NetworkSandbox(cfg.network);
+ const containerRuntime = new ContainerRuntime();
// Session-scoped overrides (not persisted)
const sessionAllowedDomains: string[] = [];
const sessionBlockedCommands: string[] = [];
+ const containerStatus = cfg.container.enabled
+ ? `container: ${cfg.container.runtime}`
+ : "container: off";
api.logger.info(
- `bash-sandbox: registered (mode: ${cfg.mode}, blocklist: ${cfg.commandBlocklist.length} commands, allowlist: ${cfg.domainAllowlist.length} domains)`,
+ `bash-sandbox: registered (mode: ${cfg.mode}, blocklist: ${cfg.commandBlocklist.length} commands, allowlist: ${cfg.domainAllowlist.length} domains, network: ${cfg.network.enabled ? cfg.network.mode : "off"}, ${containerStatus})`,
);
/**
@@ -238,6 +250,105 @@ const bashSandboxPlugin = {
return;
}
+ // 7. Container sandbox execution
+ if (cfg.container.enabled && cfg.mode !== "off") {
+ const containerCfg = cfg.container;
+ const workdir = typeof params.cwd === "string" ? params.cwd : process.cwd();
+
+ // Security validation before container execution
+ const violations = validateContainerSecurity(
+ command,
+ containerCfg.customMounts,
+ containerCfg.image,
+ containerCfg,
+ );
+
+ if (hasBlockingViolation(violations)) {
+ const msg = formatViolations(violations);
+ auditLog.add({
+ command,
+ action: "blocked",
+ reason: `container-security: ${msg}`,
+ matchedPattern: "container-security",
+ });
+ if (cfg.mode === "enforce") {
+ api.logger.warn(`bash-sandbox: BLOCKED by container security: ${msg}`);
+ return {
+ block: true,
+ blockReason: `Container security violations: ${msg}`,
+ };
+ }
+ api.logger.warn(`bash-sandbox: WARNING (container security): ${msg}`);
+ }
+
+ // Build container run command
+ const result = containerRuntime.buildRunCommand({
+ command,
+ workdir,
+ config: containerCfg,
+ });
+
+ if (result) {
+ const fullCommand = [result.binary, ...result.args].join(" ");
+ auditLog.add({
+ command,
+ action: "allowed",
+ reason: `containerized (${result.runtime})`,
+ });
+ api.logger.info(`bash-sandbox: containerized via ${result.runtime}`);
+ // Replace the command with the containerized version
+ return {
+ replaceParams: {
+ command: fullCommand,
+ },
+ };
+ }
+ // If container build failed (no runtime), fall through to normal execution
+ api.logger.warn("bash-sandbox: container enabled but no runtime found, falling back");
+ }
+
+ // 8. Network sandbox evaluation
+ if (cfg.network.enabled && cfg.mode !== "off") {
+ const netResult = await networkSandbox.evaluate(command);
+ if (!netResult.allowed) {
+ auditLog.add({
+ command,
+ action: "blocked",
+ reason: `network-sandbox: ${netResult.reason}`,
+ matchedPattern: "network-sandbox",
+ });
+ if (cfg.mode === "enforce") {
+ api.logger.warn(`bash-sandbox: BLOCKED by network sandbox: ${netResult.reason}`);
+ return {
+ block: true,
+ blockReason: `Network sandbox blocked this command: ${netResult.reason}`,
+ };
+ }
+ api.logger.warn(
+ `bash-sandbox: WARNING (network sandbox would block): ${netResult.reason}`,
+ );
+ } else if (netResult.strategy !== "passthrough") {
+ api.logger.info(`bash-sandbox: network strategy: ${netResult.strategy}`);
+
+ // Apply the wrapped command and/or environment produced by the
+ // network sandbox strategy (e.g. macos-seatbelt, linux-namespace,
+ // env-proxy). Without this the original unwrapped command executes.
+ if (netResult.wrappedCommand !== undefined || netResult.env !== undefined) {
+ auditLog.add({
+ command,
+ action: "allowed",
+ reason: `network-wrapped (${netResult.strategy})`,
+ });
+ return {
+ replaceParams: {
+ command: netResult.wrappedCommand ?? command,
+ ...(netResult.env !== undefined ? { env: netResult.env } : {}),
+ },
+ };
+ }
+ }
+ }
+
auditLog.add({ command, action: "allowed" });
},
{ priority: 250 },
@@ -303,6 +414,59 @@ const bashSandboxPlugin = {
{ name: "bash_sandbox_test" },
);
+ // ========================================================================
+ // Tool: bash_container_status — container runtime info
+ // ========================================================================
+
+ api.registerTool(
+ {
+ name: "bash_container_status",
+ label: "Container Sandbox Status",
+ description:
+ "Show container sandbox configuration and detected runtimes (Docker, Podman, gVisor).",
+ parameters: Type.Object({}),
+ async execute() {
+ const lines: string[] = [];
+ lines.push(`Container sandbox: ${cfg.container.enabled ? "ENABLED" : "DISABLED"}`);
+ lines.push(` runtime: ${cfg.container.runtime}`);
+ lines.push(` image: ${cfg.container.image}`);
+ lines.push(` mountPolicy: ${cfg.container.mountPolicy}`);
+ lines.push(` networkMode: ${cfg.container.networkMode}`);
+ lines.push(
+ ` resourceLimits: cpus=${cfg.container.resourceLimits.cpus}, memory=${cfg.container.resourceLimits.memoryMb}MB, pids=${cfg.container.resourceLimits.pidsLimit}`,
+ );
+ lines.push(` allowedRegistries: ${cfg.container.allowedRegistries.join(", ")}`);
+ lines.push("");
+
+ const runtimes = containerRuntime.detectAll();
+ lines.push(formatRuntimeStatus(runtimes));
+
+ const selected = containerRuntime.selectRuntime(cfg.container.runtime);
+ if (selected) {
+ lines.push(
+ `\nSelected runtime: ${selected.id} (${selected.binary} v${selected.version})`,
+ );
+ } else {
+ lines.push("\nNo compatible runtime found.");
+ }
+
+ return {
+ content: [{ type: "text", text: lines.join("\n") }],
+ details: {
+ enabled: cfg.container.enabled,
+ runtime: cfg.container.runtime,
+ runtimes: runtimes.map((r) => ({
+ id: r.id,
+ available: r.available,
+ version: r.version,
+ })),
+ },
+ };
+ },
+ },
+ { name: "bash_container_status" },
+ );
+
// ========================================================================
// CLI Commands
// ========================================================================
@@ -382,6 +546,72 @@ const bashSandboxPlugin = {
console.log(`Added "${cmd}" to session blocklist.`);
console.log(`Session blocklist now has ${sessionBlockedCommands.length} entries.`);
});
+
+ // Container subcommands
+ const container = sandbox.command("container").description("Container sandbox management");
+
+ container
+ .command("detect")
+ .description("Detect available container runtimes")
+ .action(async () => {
+ const runtimes = containerRuntime.detectAll();
+ console.log(formatRuntimeStatus(runtimes));
+ const selected = containerRuntime.selectRuntime(cfg.container.runtime);
+ if (selected) {
+ console.log(`\nSelected: ${selected.id} (${selected.binary} v${selected.version})`);
+ } else {
+ console.log("\nNo compatible runtime found.");
+ console.log("Install Docker or Podman to enable container sandbox.");
+ }
+ });
+
+ container
+ .command("status")
+ .description("Show container sandbox configuration")
+ .action(async () => {
+ console.log(`Container sandbox: ${cfg.container.enabled ? "ENABLED" : "DISABLED"}`);
+ console.log(` runtime: ${cfg.container.runtime}`);
+ console.log(` image: ${cfg.container.image}`);
+ console.log(` mountPolicy: ${cfg.container.mountPolicy}`);
+ console.log(` networkMode: ${cfg.container.networkMode}`);
+ console.log(` cpus: ${cfg.container.resourceLimits.cpus}`);
+ console.log(` memory: ${cfg.container.resourceLimits.memoryMb}MB`);
+ console.log(` pidsLimit: ${cfg.container.resourceLimits.pidsLimit}`);
+ console.log(
+ ` allowedRegistries: ${cfg.container.allowedRegistries.join(", ") || "(all)"}`,
+ );
+ console.log(` securityFlags:`);
+ console.log(` blockPrivileged: ${cfg.container.securityFlags.blockPrivileged}`);
+ console.log(` blockHostNetwork: ${cfg.container.securityFlags.blockHostNetwork}`);
+ console.log(` blockRootVolume: ${cfg.container.securityFlags.blockRootVolume}`);
+ console.log(` readOnlyRootfs: ${cfg.container.securityFlags.readOnlyRootfs}`);
+ console.log(` noNewPrivileges: ${cfg.container.securityFlags.noNewPrivileges}`);
+ console.log(
+ ` dropCapabilities: ${cfg.container.securityFlags.dropCapabilities.join(", ")}`,
+ );
+ });
+
+ container
+ .command("pull")
+ .description("Pull the configured container image")
+ .argument("[image]", "Image to pull (defaults to configured image)")
+ .action(async (image?: string) => {
+ const targetImage = image ?? cfg.container.image;
+ const runtime = containerRuntime.selectRuntime(cfg.container.runtime);
+ if (!runtime) {
+ console.error("No container runtime found. Install Docker or Podman.");
+ process.exitCode = 1;
+ return;
+ }
+ console.log(`Pulling ${targetImage} via ${runtime.binary}...`);
+ const success = containerRuntime.pullImage(targetImage, runtime);
+ if (success) {
+ console.log(`Successfully pulled ${targetImage}`);
+ } else {
+ console.error(`Failed to pull ${targetImage}`);
+ process.exitCode = 1;
+ }
+ });
},
{ commands: ["sandbox"] },
);
diff --git a/extensions/bash-sandbox/network-sandbox.test.ts b/extensions/bash-sandbox/network-sandbox.test.ts
new file mode 100644
index 00000000..46929977
--- /dev/null
+++ b/extensions/bash-sandbox/network-sandbox.test.ts
@@ -0,0 +1,156 @@
+import { describe, it, expect, beforeEach } from "vitest";
+import {
+ NetworkSandbox,
+ parseNetworkSandboxConfig,
+ DEFAULT_NETWORK_SANDBOX_CONFIG,
+} from "./network-sandbox.js";
+
+describe("NetworkSandbox", () => {
+ let sandbox: NetworkSandbox;
+
+ beforeEach(() => {
+ sandbox = new NetworkSandbox();
+ });
+
+ it("uses default config when no options provided", () => {
+ const cfg = sandbox.getConfig();
+ expect(cfg.enabled).toBe(true);
+ expect(cfg.mode).toBe("allowlist");
+ expect(cfg.allowedDomains).toContain("github.com");
+ expect(cfg.maxConnections).toBe(10);
+ });
+
+ it("allows passthrough when disabled", async () => {
+ sandbox = new NetworkSandbox({ enabled: false });
+ const result = await sandbox.evaluate("curl https://evil.com");
+ expect(result.allowed).toBe(true);
+ expect(result.strategy).toBe("passthrough");
+ });
+
+ it("allows passthrough when mode is none", async () => {
+ sandbox = new NetworkSandbox({ mode: "none" });
+ const result = await sandbox.evaluate("curl https://evil.com");
+ expect(result.allowed).toBe(true);
+ expect(result.strategy).toBe("passthrough");
+ });
+
+ it("blocks non-allowlisted domains in allowlist mode", async () => {
+ sandbox = new NetworkSandbox({
+ mode: "allowlist",
+ allowedDomains: ["github.com"],
+ });
+ const result = await sandbox.evaluate("curl https://evil.example.com/steal");
+ expect(result.allowed).toBe(false);
+ expect(result.strategy).toBe("blocked");
+ expect(result.reason).toContain("not allowed");
+ });
+
+ it("allows allowlisted domains", async () => {
+ sandbox = new NetworkSandbox({
+ mode: "allowlist",
+ allowedDomains: ["example.com"],
+ });
+ const result = await sandbox.evaluate("curl https://example.com/api");
+ expect(result.allowed).toBe(true);
+ });
+
+ it("deny list takes priority over allow list", async () => {
+ sandbox = new NetworkSandbox({
+ mode: "allowlist",
+ allowedDomains: ["*.example.com"],
+ denyDomains: ["evil.example.com"],
+ });
+ const result = await sandbox.evaluate("curl https://evil.example.com");
+ expect(result.allowed).toBe(false);
+ expect(result.reason).toContain("not allowed");
+ });
+
+ it("blocks when connection limit reached", async () => {
+ sandbox = new NetworkSandbox({ maxConnections: 2, mode: "full" });
+ sandbox.trackConnectionStart();
+ sandbox.trackConnectionStart();
+ const result = await sandbox.evaluate("curl https://example.com");
+ expect(result.allowed).toBe(false);
+ expect(result.reason).toContain("Connection limit");
+ });
+
+ it("tracks connections correctly", () => {
+ sandbox.trackConnectionStart();
+ expect(sandbox.getActiveConnections()).toBe(1);
+ sandbox.trackConnectionStart();
+ expect(sandbox.getActiveConnections()).toBe(2);
+ sandbox.trackConnectionEnd();
+ expect(sandbox.getActiveConnections()).toBe(1);
+ sandbox.trackConnectionEnd();
+ expect(sandbox.getActiveConnections()).toBe(0);
+ // Should not go negative
+ sandbox.trackConnectionEnd();
+ expect(sandbox.getActiveConnections()).toBe(0);
+ });
+
+ it("isDomainAllowed checks against config", () => {
+ sandbox = new NetworkSandbox({
+ mode: "allowlist",
+ allowedDomains: ["github.com", "*.github.com"],
+ denyDomains: ["evil.github.com"],
+ });
+ expect(sandbox.isDomainAllowed("github.com")).toBe(true);
+ expect(sandbox.isDomainAllowed("api.github.com")).toBe(true);
+ expect(sandbox.isDomainAllowed("evil.github.com")).toBe(false);
+ expect(sandbox.isDomainAllowed("random.com")).toBe(false);
+ });
+
+ it("full mode allows all non-denied domains", () => {
+ sandbox = new NetworkSandbox({
+ mode: "full",
+ denyDomains: ["blocked.com"],
+ });
+ expect(sandbox.isDomainAllowed("anything.com")).toBe(true);
+ expect(sandbox.isDomainAllowed("blocked.com")).toBe(false);
+ });
+
+ it("commands without URLs are allowed in allowlist mode", async () => {
+ sandbox = new NetworkSandbox({ mode: "allowlist" });
+ const result = await sandbox.evaluate("echo hello");
+ expect(result.allowed).toBe(true);
+ });
+});
+
+describe("parseNetworkSandboxConfig", () => {
+ it("returns defaults for empty input", () => {
+ const cfg = parseNetworkSandboxConfig({});
+ expect(cfg).toEqual(DEFAULT_NETWORK_SANDBOX_CONFIG);
+ });
+
+ it("parses valid config", () => {
+ const cfg = parseNetworkSandboxConfig({
+ enabled: false,
+ mode: "full",
+ allowedDomains: ["custom.com"],
+ denyDomains: ["bad.com"],
+ maxConnections: 5,
+ });
+ expect(cfg.enabled).toBe(false);
+ expect(cfg.mode).toBe("full");
+ expect(cfg.allowedDomains).toEqual(["custom.com"]);
+ expect(cfg.denyDomains).toEqual(["bad.com"]);
+ expect(cfg.maxConnections).toBe(5);
+ });
+
+ it("clamps maxConnections to valid range", () => {
+ expect(parseNetworkSandboxConfig({ maxConnections: 0 }).maxConnections).toBe(1);
+ expect(parseNetworkSandboxConfig({ maxConnections: 200 }).maxConnections).toBe(100);
+ });
+
+ it("ignores invalid mode values", () => {
+ const cfg = parseNetworkSandboxConfig({ mode: "invalid" });
+ expect(cfg.mode).toBe("allowlist"); // default
+ });
+
+ it("filters non-string values from domain arrays", () => {
+ const cfg = parseNetworkSandboxConfig({
+ allowedDomains: ["good.com", 42, null, "also-good.com"],
+ });
+ expect(cfg.allowedDomains).toEqual(["good.com", "also-good.com"]);
+ });
+});
diff --git a/extensions/bash-sandbox/network-sandbox.ts b/extensions/bash-sandbox/network-sandbox.ts
new file mode 100644
index 00000000..1bfda087
--- /dev/null
+++ b/extensions/bash-sandbox/network-sandbox.ts
@@ -0,0 +1,313 @@
+/**
+ * NetworkSandbox — OS-level network isolation for sandboxed commands.
+ *
+ * Strategies by platform:
+ * - macOS: sandbox-exec with Seatbelt profiles restricting network
+ * - Linux: unshare --net with namespace isolation
+ * - Fallback: DNS-level proxy via env vars
+ */
+
+import { execFileSync } from "node:child_process";
+import { matchesDomainPattern, extractDomain, extractUrls } from "./domain-checker.js";
+
+export type NetworkSandboxConfig = {
+ enabled: boolean;
+ mode: "none" | "allowlist" | "full";
+ allowedDomains: string[];
+ denyDomains: string[];
+ maxConnections: number;
+};
+
+export const DEFAULT_NETWORK_SANDBOX_CONFIG: NetworkSandboxConfig = {
+ enabled: true,
+ mode: "allowlist",
+ allowedDomains: [
+ "github.com",
+ "*.github.com",
+ "npmjs.org",
+ "*.npmjs.org",
+ "registry.npmjs.org",
+ "*.googleapis.com",
+ ],
+ denyDomains: [],
+ maxConnections: 10,
+};
+
+export type NetworkSandboxResult = {
+ allowed: boolean;
+ strategy: "macos-seatbelt" | "linux-namespace" | "env-proxy" | "passthrough" | "blocked";
+ wrappedCommand?: string;
+ env?: Record;
+ reason?: string;
+};
+
+/**
+ * Resolve a domain to IP addresses for Seatbelt profile injection.
+ */
+function resolveDomainToIps(domain: string): string[] {
+ try {
+ const output = execFileSync("dig", ["+short", domain, "A"], {
+ timeout: 5000,
+ encoding: "utf-8",
+ });
+ return output
+ .split("\n")
+ .map((l) => l.trim())
+ .filter((l) => /^\d+\.\d+\.\d+\.\d+$/.test(l));
+ } catch {
+ return [];
+ }
+}
+
+/**
+ * Check if a domain is allowed by the config.
+ */
+function isDomainAllowed(domain: string, config: NetworkSandboxConfig): boolean {
+ // Deny list takes priority
+ for (const pattern of config.denyDomains) {
+ if (matchesDomainPattern(domain, pattern)) {
+ return false;
+ }
+ }
+ // In allowlist mode, domain must match allowlist
+ if (config.mode === "allowlist") {
+ for (const pattern of config.allowedDomains) {
+ if (matchesDomainPattern(domain, pattern)) {
+ return true;
+ }
+ }
+ return false;
+ }
+ // In full mode, everything not denied is allowed
+ return config.mode === "full";
+}
+
+/**
+ * Build a macOS Seatbelt profile for network restriction.
+ */
+function buildSeatbeltProfile(allowedIps: string[]): string {
+ const lines = [
+ "(version 1)",
+ "(allow default)",
+ "(deny network*)",
+ '(allow network-outbound (remote ip "localhost:*"))',
+ ];
+ for (const ip of allowedIps) {
+ lines.push(`(allow network-outbound (remote ip "${ip}:*"))`);
+ }
+ // Allow DNS resolution
+ lines.push(
+ '(allow network-outbound (remote unix-socket (path-literal "/var/run/mDNSResponder")))',
+ );
+ lines.push('(allow network-outbound (remote ip "*:53"))');
+ return lines.join("\n");
+}
+
+/**
+ * Check if sandbox-exec is available (macOS).
+ */
+function hasSandboxExec(): boolean {
+ try {
+ execFileSync("which", ["sandbox-exec"], { encoding: "utf-8", timeout: 2000 });
+ return true;
+ } catch {
+ return false;
+ }
+}
+
+/**
+ * Check if unshare is available (Linux).
+ */
+function hasUnshare(): boolean {
+ try {
+ execFileSync("which", ["unshare"], { encoding: "utf-8", timeout: 2000 });
+ return true;
+ } catch {
+ return false;
+ }
+}
+
+export class NetworkSandbox {
+ private config: NetworkSandboxConfig;
+ private activeConnections = 0;
+
+ constructor(config: Partial = {}) {
+ this.config = { ...DEFAULT_NETWORK_SANDBOX_CONFIG, ...config };
+ }
+
+ getConfig(): NetworkSandboxConfig {
+ return { ...this.config };
+ }
+
+ /**
+ * Check if a specific domain is allowed by current config.
+ */
+ isDomainAllowed(domain: string): boolean {
+ return isDomainAllowed(domain, this.config);
+ }
+
+ /**
+ * Evaluate a command and return the sandboxed execution strategy.
+ */
+ async evaluate(command: string): Promise {
+ if (!this.config.enabled || this.config.mode === "none") {
+ return { allowed: true, strategy: "passthrough" };
+ }
+
+ // Check connection limit
+ if (this.activeConnections >= this.config.maxConnections) {
+ return {
+ allowed: false,
+ strategy: "blocked",
+ reason: `Connection limit reached (${this.config.maxConnections})`,
+ };
+ }
+
+ // Extract domains from the command to check allowlist
+ const urls = extractUrls(command);
+ const domains = urls
+ .map((u) => extractDomain(u))
+ .filter((d): d is string => d !== null && d.length > 0);
+
+ // Check each domain against policy
+ for (const domain of domains) {
+ if (!isDomainAllowed(domain, this.config)) {
+ return {
+ allowed: false,
+ strategy: "blocked",
+ reason: `Domain not allowed: ${domain}`,
+ };
+ }
+ }
+
+ // Determine platform strategy
+ const platform = process.platform;
+
+ if (platform === "darwin" && hasSandboxExec()) {
+ return this.buildMacOsStrategy(command, domains);
+ }
+
+ if (platform === "linux" && hasUnshare()) {
+ return this.buildLinuxStrategy(command);
+ }
+
+ // Fallback: env-proxy strategy
+ return this.buildEnvProxyStrategy();
+ }
+
+ /**
+ * Track connection start (for connection limiting).
+ */
+ trackConnectionStart(): void {
+ this.activeConnections++;
+ }
+
+ /**
+ * Track connection end.
+ */
+ trackConnectionEnd(): void {
+ this.activeConnections = Math.max(0, this.activeConnections - 1);
+ }
+
+ /**
+ * Get current active connection count.
+ */
+ getActiveConnections(): number {
+ return this.activeConnections;
+ }
+
+ private buildMacOsStrategy(command: string, domains: string[]): NetworkSandboxResult {
+ // Resolve allowed domains to IPs
+ const allowedIps: string[] = [];
+ const allAllowedDomains = [
+ ...domains,
+ ...this.config.allowedDomains.filter((d) => !d.startsWith("*.")),
+ ];
+ for (const domain of allAllowedDomains) {
+ const ips = resolveDomainToIps(domain);
+ allowedIps.push(...ips);
+ }
+
+ const profile = buildSeatbeltProfile([...new Set(allowedIps)]);
+ // sandbox-exec -p '' bash -c ''
+ const escapedProfile = profile.replace(/'/g, "'\\''");
+ const escapedCommand = command.replace(/'/g, "'\\''");
+ const wrappedCommand = `sandbox-exec -p '${escapedProfile}' bash -c '${escapedCommand}'`;
+
+ return {
+ allowed: true,
+ strategy: "macos-seatbelt",
+ wrappedCommand,
+ };
+ }
+
+ private buildLinuxStrategy(command: string): NetworkSandboxResult {
+ // When allowedDomains is non-empty, `unshare --net` would block everything
+ // including the allowed domains because network namespaces have no iptables
+ // rules that can be injected portably at this layer. Fall back to the
+ // env-proxy strategy so that the allowlist is honoured (weak but correct).
+ if (this.config.mode === "allowlist" && this.config.allowedDomains.length > 0) {
+ return this.buildEnvProxyStrategy();
+ }
+
+ // Full isolation: no allowedDomains, so blocking all outbound is correct.
+ const escapedCommand = command.replace(/'/g, "'\\''");
+ const wrappedCommand = `unshare --net bash -c '${escapedCommand}'`;
+
+ return {
+ allowed: true,
+ strategy: "linux-namespace",
+ wrappedCommand,
+ };
+ }
+
+ private buildEnvProxyStrategy(): NetworkSandboxResult {
+ // Set proxy env vars that most tools respect
+ // This is the weakest strategy — commands can ignore these
+ const env: Record = {};
+
+ if (this.config.mode !== "none") {
+ // Set a non-existent proxy to block most network access
+ // Tools that respect http_proxy will fail to connect
+ const noProxyDomains = this.config.allowedDomains
+ .map((d) => (d.startsWith("*.") ? d.slice(2) : d))
+ .join(",");
+
+ if (this.config.mode === "allowlist" && this.config.allowedDomains.length > 0) {
+ env.no_proxy = noProxyDomains;
+ env.NO_PROXY = noProxyDomains;
+ }
+ }
+
+ return {
+ allowed: true,
+ strategy: "env-proxy",
+ env,
+ };
+ }
+}
+
+/**
+ * Parse and validate a NetworkSandboxConfig from raw input.
+ */
+export function parseNetworkSandboxConfig(raw: Record): NetworkSandboxConfig {
+ const cfg = { ...DEFAULT_NETWORK_SANDBOX_CONFIG };
+
+ if (typeof raw.enabled === "boolean") {
+ cfg.enabled = raw.enabled;
+ }
+ if (typeof raw.mode === "string" && ["none", "allowlist", "full"].includes(raw.mode)) {
+ cfg.mode = raw.mode as NetworkSandboxConfig["mode"];
+ }
+ if (Array.isArray(raw.allowedDomains)) {
+ cfg.allowedDomains = raw.allowedDomains.filter((d): d is string => typeof d === "string");
+ }
+ if (Array.isArray(raw.denyDomains)) {
+ cfg.denyDomains = raw.denyDomains.filter((d): d is string => typeof d === "string");
+ }
+ if (typeof raw.maxConnections === "number") {
+ cfg.maxConnections = Math.max(1, Math.min(Math.trunc(raw.maxConnections), 100));
+ }
+
+ return cfg;
+}
diff --git a/extensions/browser-automation/browser-client.test.ts b/extensions/browser-automation/browser-client.test.ts
new file mode 100644
index 00000000..ae1a0624
--- /dev/null
+++ b/extensions/browser-automation/browser-client.test.ts
@@ -0,0 +1,302 @@
+/**
+ * BrowserClient Tests
+ *
+ * Tests cover:
+ * - connect() fetches CDP endpoint and opens WebSocket
+ * - listPages() parses JSON response
+ * - navigate() sends correct CDP command
+ * - screenshot() returns base64 data
+ * - click() evaluates querySelector
+ * - type() dispatches key events
+ * - evaluate() sends Runtime.evaluate
+ * - getContent() returns HTML
+ * - disconnect() closes WebSocket
+ * - Throws clear error when ws not available
+ */
+
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import { BrowserClient } from "./browser-client.js";
+
+// ============================================================================
+// Mock WebSocket
+// ============================================================================
+
+class MockWebSocket {
+ private listeners = new Map void>>();
+ public readyState = 1; // OPEN
+ public closed = false;
+ public sentMessages: string[] = [];
+
+ on(event: string, handler: (...args: unknown[]) => void) {
+ if (!this.listeners.has(event)) {
+ this.listeners.set(event, []);
+ }
+ this.listeners.get(event)!.push(handler);
+ // Auto-fire "open" event
+ if (event === "open") {
+ setTimeout(() => handler(), 0);
+ }
+ }
+
+ send(data: string) {
+ this.sentMessages.push(data);
+ // Auto-respond to CDP commands
+ const msg = JSON.parse(data) as { id: number; method: string };
+ setTimeout(() => {
+ this.emit("message", JSON.stringify(buildCdpResponse(msg.id, msg.method)));
+ }, 0);
+ }
+
+ close() {
+ this.closed = true;
+ }
+
+ private emit(event: string, ...args: unknown[]) {
+ const handlers = this.listeners.get(event) ?? [];
+ for (const handler of handlers) {
+ handler(...args);
+ }
+ }
+}
+
+/** Build a mock CDP response for a given method. */
+function buildCdpResponse(
+ id: number,
+ method: string,
+): { id: number; result: Record } {
+ switch (method) {
+ case "Page.navigate":
+ return { id, result: { frameId: "frame-1", loaderId: "loader-1" } };
+ case "Page.captureScreenshot":
+ return { id, result: { data: "iVBORw0KGgoAAAANS==" } };
+ case "Page.getLayoutMetrics":
+ return {
+ id,
+ result: {
+ cssVisualViewport: { clientWidth: 1920, clientHeight: 1080 },
+ },
+ };
+ case "Runtime.evaluate":
+ return {
+ id,
+ result: {
+ result: {
+ value: JSON.stringify({ title: "Example", url: "https://example.com" }),
+ },
+ },
+ };
+ case "Input.dispatchKeyEvent":
+ return { id, result: {} };
+ default:
+ return { id, result: {} };
+ }
+}
+
+// ============================================================================
+// Mock fetch and ws module
+// ============================================================================
+
+let mockWsInstance: MockWebSocket;
+
+beforeEach(() => {
+ mockWsInstance = new MockWebSocket();
+
+ // Mock global fetch
+ vi.stubGlobal(
+ "fetch",
+ vi.fn(async (url: string) => {
+ if (url.includes("/json/version")) {
+ return {
+ ok: true,
+ json: async () => ({
+ webSocketDebuggerUrl: "ws://localhost:9222/devtools/browser/abc",
+ }),
+ };
+ }
+ if (url.includes("/json/list")) {
+ return {
+ ok: true,
+ json: async () => [
+ { id: "page-1", url: "https://example.com", title: "Example", type: "page" },
+ { id: "page-2", url: "about:blank", title: "New Tab", type: "page" },
+ {
+ id: "ext-1",
+ url: "chrome-extension://abc",
+ title: "Extension",
+ type: "background_page",
+ },
+ ],
+ };
+ }
+ return { ok: false, status: 404 };
+ }),
+ );
+
+ // Mock ws module via vi.mock
+ vi.mock("ws", () => {
+ return {
+ default: class {
+ constructor() {
+ // Return mock instance
+ return mockWsInstance as unknown;
+ }
+ },
+ WebSocket: class {
+ constructor() {
+ return mockWsInstance as unknown;
+ }
+ },
+ };
+ });
+});
+
+// ============================================================================
+// Tests
+// ============================================================================
+
+describe("BrowserClient", () => {
+ it("connect() fetches CDP endpoint and opens WebSocket", async () => {
+ const client = new BrowserClient({ cdpUrl: "http://localhost:9222" });
+ await client.connect();
+
+ expect(fetch).toHaveBeenCalledWith("http://localhost:9222/json/version");
+ await client.disconnect();
+ });
+
+ it("listPages() parses JSON response and filters pages", async () => {
+ const client = new BrowserClient();
+ const pages = await client.listPages();
+
+ expect(fetch).toHaveBeenCalledWith("http://localhost:9222/json/list");
+ expect(pages).toHaveLength(2);
+ expect(pages[0]).toEqual({
+ id: "page-1",
+ url: "https://example.com",
+ title: "Example",
+ });
+ expect(pages[1]).toEqual({
+ id: "page-2",
+ url: "about:blank",
+ title: "New Tab",
+ });
+ });
+
+ it("navigate() sends Page.navigate CDP command", async () => {
+ const client = new BrowserClient();
+ await client.connect();
+
+ const result = await client.navigate("https://example.com");
+
+ const sent = mockWsInstance.sentMessages.map((m) => JSON.parse(m) as { method: string });
+ const navigateCmd = sent.find((m) => m.method === "Page.navigate");
+ expect(navigateCmd).toBeDefined();
+ expect(result.url).toBeDefined();
+
+ await client.disconnect();
+ });
+
+ it("screenshot() returns base64 data with dimensions", async () => {
+ const client = new BrowserClient();
+ await client.connect();
+
+ const result = await client.screenshot();
+
+ expect(result.data).toBe("iVBORw0KGgoAAAANS==");
+ expect(result.format).toBe("png");
+ expect(result.width).toBe(1920);
+ expect(result.height).toBe(1080);
+
+ await client.disconnect();
+ });
+
+ it("click() evaluates querySelector on the page", async () => {
+ const client = new BrowserClient();
+ await client.connect();
+
+ await client.click("#submit-btn");
+
+ const sent = mockWsInstance.sentMessages.map(
+ (m) => JSON.parse(m) as { method: string; params: Record },
+ );
+ const evalCmd = sent.find(
+ (m) =>
+ m.method === "Runtime.evaluate" && String(m.params.expression).includes("querySelector"),
+ );
+ expect(evalCmd).toBeDefined();
+ expect(String(evalCmd!.params.expression)).toContain("#submit-btn");
+
+ await client.disconnect();
+ });
+
+ it("type() dispatches key events for each character", async () => {
+ const client = new BrowserClient();
+ await client.connect();
+
+ await client.type("ab");
+
+ const sent = mockWsInstance.sentMessages.map(
+ (m) => JSON.parse(m) as { method: string; params: Record },
+ );
+ const keyEvents = sent.filter((m) => m.method === "Input.dispatchKeyEvent");
+ // 2 chars x 2 events (keyDown + keyUp) = 4
+ expect(keyEvents).toHaveLength(4);
+ expect(keyEvents[0].params.text).toBe("a");
+ expect(keyEvents[0].params.type).toBe("keyDown");
+ expect(keyEvents[1].params.type).toBe("keyUp");
+ expect(keyEvents[2].params.text).toBe("b");
+
+ await client.disconnect();
+ });
+
+ it("evaluate() sends Runtime.evaluate and returns value", async () => {
+ const client = new BrowserClient();
+ await client.connect();
+
+ const result = await client.evaluate("document.title");
+
+ const sent = mockWsInstance.sentMessages.map(
+ (m) => JSON.parse(m) as { method: string; params: Record },
+ );
+ const evalCmd = sent.find(
+ (m) => m.method === "Runtime.evaluate" && m.params.expression === "document.title",
+ );
+ expect(evalCmd).toBeDefined();
+ expect(result).toBeDefined();
+
+ await client.disconnect();
+ });
+
+ it("getContent() returns HTML content", async () => {
+ const client = new BrowserClient();
+ await client.connect();
+
+ const content = await client.getContent();
+
+ const sent = mockWsInstance.sentMessages.map(
+ (m) => JSON.parse(m) as { method: string; params: Record },
+ );
+ const evalCmd = sent.find(
+ (m) => m.method === "Runtime.evaluate" && String(m.params.expression).includes("outerHTML"),
+ );
+ expect(evalCmd).toBeDefined();
+ expect(typeof content).toBe("string");
+
+ await client.disconnect();
+ });
+
+ it("disconnect() closes WebSocket connection", async () => {
+ const client = new BrowserClient();
+ await client.connect();
+ expect(mockWsInstance.closed).toBe(false);
+
+ await client.disconnect();
+ expect(mockWsInstance.closed).toBe(true);
+ });
+
+ it("sendCommand throws when not connected", async () => {
+ const client = new BrowserClient();
+
+ // navigate() calls sendCommand internally, which should throw
+ await expect(client.navigate("https://example.com")).rejects.toThrow("Not connected");
+ });
+});
diff --git a/extensions/browser-automation/browser-client.ts b/extensions/browser-automation/browser-client.ts
new file mode 100644
index 00000000..e79d3a52
--- /dev/null
+++ b/extensions/browser-automation/browser-client.ts
@@ -0,0 +1,388 @@
+/**
+ * Browser Automation Client
+ *
+ * Lightweight browser automation via Chrome DevTools Protocol (CDP).
+ * Connects to a running Chrome instance with --remote-debugging-port.
+ * Does NOT bundle Playwright or Puppeteer — uses CDP directly over WebSocket.
+ *
+ * Usage:
+ * Start Chrome with: google-chrome --remote-debugging-port=9222
+ * Then connect:
+ * const client = new BrowserClient();
+ * await client.connect();
+ * await client.navigate("https://example.com");
+ * const shot = await client.screenshot();
+ * await client.disconnect();
+ */
+
+// ============================================================================
+// Types
+// ============================================================================
+
+export type BrowserConfig = {
+ cdpUrl: string;
+ screenshotFormat: "png" | "jpeg";
+ defaultTimeout: number;
+};
+
+export type BrowserPage = {
+ id: string;
+ url: string;
+ title: string;
+};
+
+export type ScreenshotResult = {
+ data: string;
+ format: "png" | "jpeg";
+ width: number;
+ height: number;
+};
+
+export type NavigateResult = {
+ url: string;
+ title: string;
+ status: number;
+};
+
+// ============================================================================
+// Default Configuration
+// ============================================================================
+
+const DEFAULT_CONFIG: BrowserConfig = {
+ cdpUrl: "http://localhost:9222",
+ screenshotFormat: "png",
+ defaultTimeout: 30_000,
+};
+
+// ============================================================================
+// CDP Response Types
+// ============================================================================
+
+type CdpVersionResponse = {
+ webSocketDebuggerUrl: string;
+};
+
+type CdpPageEntry = {
+ id: string;
+ url: string;
+ title: string;
+ type: string;
+};
+
+type CdpMessage = {
+ id: number;
+ result?: Record;
+ error?: { code: number; message: string };
+};
+
+// ============================================================================
+// BrowserClient
+// ============================================================================
+
+export class BrowserClient {
+ private config: BrowserConfig;
+ private ws: import("ws").WebSocket | null = null;
+ private messageId = 0;
+ private pending = new Map<
+ number,
+ {
+ resolve: (value: unknown) => void;
+ reject: (reason: Error) => void;
+ }
+ >();
+
+ constructor(config?: Partial) {
+ this.config = { ...DEFAULT_CONFIG, ...config };
+ }
+
+ /**
+ * Connect to Chrome via CDP.
+ * Fetches the WebSocket debugger URL from the CDP endpoint, then opens
+ * a persistent WebSocket connection.
+ */
+ async connect(): Promise {
+ const versionUrl = `${this.config.cdpUrl}/json/version`;
+
+ const response = await fetch(versionUrl);
+ if (!response.ok) {
+ throw new Error(
+ `Failed to connect to Chrome DevTools at ${versionUrl} (HTTP ${response.status}). ` +
+ "Ensure Chrome is running with --remote-debugging-port.",
+ );
+ }
+
+ const version = (await response.json()) as CdpVersionResponse;
+ const wsUrl = version.webSocketDebuggerUrl;
+
+ if (!wsUrl) {
+ throw new Error("Chrome DevTools did not return a webSocketDebuggerUrl.");
+ }
+
+ const WebSocketModule = await loadWsModule();
+ this.ws = new WebSocketModule(wsUrl);
+
+ await new Promise((resolve, reject) => {
+ const ws = this.ws!;
+ const timeout = setTimeout(() => {
+ reject(new Error(`WebSocket connection timed out after ${this.config.defaultTimeout}ms`));
+ }, this.config.defaultTimeout);
+
+ ws.on("open", () => {
+ clearTimeout(timeout);
+ resolve();
+ });
+
+ ws.on("error", (err: Error) => {
+ clearTimeout(timeout);
+ reject(new Error(`WebSocket connection failed: ${err.message}`));
+ });
+
+ ws.on("message", (raw: Buffer | string) => {
+ try {
+ const msg = JSON.parse(String(raw)) as CdpMessage;
+ if (msg.id !== undefined && this.pending.has(msg.id)) {
+ const handler = this.pending.get(msg.id)!;
+ this.pending.delete(msg.id);
+ if (msg.error) {
+ handler.reject(new Error(`CDP error: ${msg.error.message}`));
+ } else {
+ handler.resolve(msg.result ?? {});
+ }
+ }
+ } catch {
+ // Ignore malformed messages
+ }
+ });
+ });
+ }
+
+ /**
+ * Disconnect from Chrome.
+ * Rejects all in-flight CDP commands before clearing the pending map so
+ * callers are not left with promises that never settle.
+ */
+ async disconnect(): Promise {
+ // Reject all in-flight CDP commands before clearing
+ for (const [id, { reject }] of this.pending) {
+ reject(new Error(`CDP command ${id} aborted: client disconnected`));
+ }
+ this.pending.clear();
+
+ if (this.ws) {
+ this.ws.close();
+ this.ws = null;
+ }
+ }
+
+ /**
+ * List open pages/tabs.
+ */
+ async listPages(): Promise {
+ const listUrl = `${this.config.cdpUrl}/json/list`;
+ const response = await fetch(listUrl);
+ if (!response.ok) {
+ throw new Error(`Failed to list pages (HTTP ${response.status})`);
+ }
+ const entries = (await response.json()) as CdpPageEntry[];
+ return entries
+ .filter((entry) => entry.type === "page")
+ .map((entry) => ({
+ id: entry.id,
+ url: entry.url,
+ title: entry.title,
+ }));
+ }
+
+ /**
+ * Navigate to URL.
+ *
+ * Security: Uses CDP `Runtime.evaluate` to read page title/URL from the
+ * browser page context (same trust boundary as Puppeteer's page.evaluate).
+ * The evaluated expression is a static string with no user input.
+ */
+ async navigate(url: string): Promise {
+ const result = (await this.sendCommand("Page.navigate", { url })) as Record;
+ // Get page info after navigation
+ const evalResult = (await this.sendCommand("Runtime.evaluate", {
+ expression: "JSON.stringify({ title: document.title, url: location.href })",
+ returnByValue: true,
+ })) as { result: { value: string } };
+
+ let title = "";
+ let finalUrl = url;
+ try {
+ const info = JSON.parse(evalResult.result.value) as { title: string; url: string };
+ title = info.title;
+ finalUrl = info.url;
+ } catch {
+ // Use defaults
+ }
+
+ return {
+ url: finalUrl,
+ title,
+ status: typeof result.errorText === "string" ? 0 : 200,
+ };
+ }
+
+ /**
+ * Take screenshot of current page.
+ */
+ async screenshot(): Promise {
+ const result = (await this.sendCommand("Page.captureScreenshot", {
+ format: this.config.screenshotFormat,
+ quality: this.config.screenshotFormat === "jpeg" ? 80 : undefined,
+ })) as { data: string };
+
+ // Get viewport dimensions
+ const layoutResult = (await this.sendCommand("Page.getLayoutMetrics")) as {
+ cssVisualViewport?: { clientWidth: number; clientHeight: number };
+ };
+
+ const width = layoutResult.cssVisualViewport?.clientWidth ?? 1280;
+ const height = layoutResult.cssVisualViewport?.clientHeight ?? 720;
+
+ return {
+ data: result.data,
+ format: this.config.screenshotFormat,
+ width,
+ height,
+ };
+ }
+
+ /**
+ * Click element by CSS selector.
+ *
+ * Security: The expression string is evaluated inside the inspected browser
+ * page via the Chrome DevTools Protocol (CDP) `Runtime.evaluate`. This is
+ * the same trust boundary as Puppeteer's `page.evaluate` -- the code runs
+ * in the *target page's* JS context, not in the Node host process. The
+ * selector is escaped to prevent injection into the evaluated expression.
+ */
+ async click(selector: string): Promise {
+ const escapedSelector = selector.replace(/\\/g, "\\\\").replace(/'/g, "\\'");
+ const result = (await this.sendCommand("Runtime.evaluate", {
+ expression: `(() => {
+ const el = document.querySelector('${escapedSelector}');
+ if (!el) throw new Error('Element not found: ${escapedSelector}');
+ el.click();
+ return true;
+ })()`,
+ returnByValue: true,
+ awaitPromise: false,
+ })) as { result: { value: unknown }; exceptionDetails?: { text: string } };
+
+ if (result.exceptionDetails) {
+ throw new Error(`Click failed: ${result.exceptionDetails.text}`);
+ }
+ }
+
+ /**
+ * Type text into focused element.
+ * Dispatches individual key events for each character.
+ */
+ async type(text: string): Promise {
+ for (const char of text) {
+ await this.sendCommand("Input.dispatchKeyEvent", {
+ type: "keyDown",
+ text: char,
+ key: char,
+ unmodifiedText: char,
+ });
+ await this.sendCommand("Input.dispatchKeyEvent", {
+ type: "keyUp",
+ key: char,
+ });
+ }
+ }
+
+ /**
+ * Evaluate JavaScript in page context.
+ *
+ * Security: The expression is evaluated via CDP `Runtime.evaluate` inside
+ * the *inspected browser page*, not the Node host. This is functionally
+ * identical to Puppeteer's `page.evaluate` pattern -- the host process
+ * sends a string over the CDP WebSocket and the browser's V8 instance
+ * executes it. The Node process itself never calls `eval()` or
+ * `new Function()`. Callers are responsible for sanitizing user-supplied
+ * input before embedding it in the expression string.
+ */
+ async evaluate(expression: string): Promise {
+ const result = (await this.sendCommand("Runtime.evaluate", {
+ expression,
+ returnByValue: true,
+ awaitPromise: true,
+ })) as { result: { value: unknown }; exceptionDetails?: { text: string } };
+
+ if (result.exceptionDetails) {
+ throw new Error(`Evaluate failed: ${result.exceptionDetails.text}`);
+ }
+
+ return result.result.value;
+ }
+
+ /**
+ * Get page HTML content.
+ *
+ * Security: Reads the page DOM via CDP `Runtime.evaluate` with a static
+ * expression. Executes in the browser page context, not in Node.
+ */
+ async getContent(): Promise {
+ const result = (await this.sendCommand("Runtime.evaluate", {
+ expression: "document.documentElement.outerHTML",
+ returnByValue: true,
+ })) as { result: { value: string } };
+
+ return result.result.value;
+ }
+
+ /**
+ * Send a CDP command over WebSocket.
+ * Returns a promise that resolves with the command result.
+ */
+ private async sendCommand(method: string, params?: Record): Promise {
+ if (!this.ws) {
+ throw new Error("Not connected. Call connect() first.");
+ }
+
+ const id = ++this.messageId;
+ const message = JSON.stringify({ id, method, params: params ?? {} });
+
+ return new Promise((resolve, reject) => {
+ const timeout = setTimeout(() => {
+ this.pending.delete(id);
+ reject(new Error(`CDP command ${method} timed out after ${this.config.defaultTimeout}ms`));
+ }, this.config.defaultTimeout);
+
+ this.pending.set(id, {
+ resolve: (value) => {
+ clearTimeout(timeout);
+ resolve(value);
+ },
+ reject: (reason) => {
+ clearTimeout(timeout);
+ reject(reason);
+ },
+ });
+
+ this.ws!.send(message);
+ });
+ }
+}
+
+// ============================================================================
+// Helpers
+// ============================================================================
+
+/**
+ * Dynamically load the `ws` WebSocket module.
+ * Throws a clear error if the package is not installed.
+ */
+async function loadWsModule(): Promise {
+ try {
+ const mod = await import("ws");
+ return mod.default || mod.WebSocket;
+ } catch {
+ throw new Error("Browser automation requires the 'ws' package. Run: npm install ws");
+ }
+}
diff --git a/extensions/browser-automation/index.ts b/extensions/browser-automation/index.ts
new file mode 100644
index 00000000..fc7dd958
--- /dev/null
+++ b/extensions/browser-automation/index.ts
@@ -0,0 +1,150 @@
+/**
+ * Mayros Browser Automation Plugin
+ *
+ * Registers browser control tools that use Chrome DevTools Protocol (CDP)
+ * to automate a running Chrome instance. No Playwright or Puppeteer required.
+ *
+ * Tools:
+ * browser_navigate — Navigate browser to a URL and return page info
+ * browser_screenshot — Take a screenshot of the current browser page
+ * browser_click — Click an element by CSS selector
+ * browser_evaluate — Run JavaScript in the browser page and return result
+ *
+ * Prerequisites:
+ * - Chrome running with --remote-debugging-port=9222
+ * - The `ws` npm package installed
+ */
+
+import type { MayrosPluginApi } from "mayros/plugin-sdk";
+
+// ============================================================================
+// Plugin Definition
+// ============================================================================
+
+const browserAutomationPlugin = {
+ id: "browser-automation",
+ name: "Browser Automation",
+ description: "Automate a running Chrome instance via Chrome DevTools Protocol (CDP)",
+ kind: "tool" as const,
+ version: "0.1.5",
+
+ async register(api: MayrosPluginApi) {
+ api.logger.info("browser-automation: registered");
+
+ // ========================================================================
+ // Tool: browser_navigate
+ // ========================================================================
+
+ api.registerTool({
+ name: "browser_navigate",
+ description: "Navigate browser to a URL and return page info",
+ parameters: {
+ type: "object" as const,
+ properties: {
+ url: { type: "string" as const, description: "URL to navigate to" },
+ },
+ required: ["url"],
+ },
+ execute: async (args: Record) => {
+ const { BrowserClient } = await import("./browser-client.js");
+ const client = new BrowserClient();
+ await client.connect();
+ try {
+ const result = await client.navigate(args.url as string);
+ return { content: [{ type: "text" as const, text: JSON.stringify(result) }] };
+ } finally {
+ await client.disconnect();
+ }
+ },
+ });
+
+ // ========================================================================
+ // Tool: browser_screenshot
+ // ========================================================================
+
+ api.registerTool({
+ name: "browser_screenshot",
+ description: "Take a screenshot of the current browser page",
+ parameters: {
+ type: "object" as const,
+ properties: {},
+ },
+ execute: async () => {
+ const { BrowserClient } = await import("./browser-client.js");
+ const client = new BrowserClient();
+ await client.connect();
+ try {
+ const result = await client.screenshot();
+ return {
+ content: [
+ {
+ type: "image" as const,
+ mimeType: `image/${result.format}`,
+ bytes: result.data.length,
+ },
+ { type: "text" as const, text: `Screenshot: ${result.width}x${result.height}` },
+ ],
+ };
+ } finally {
+ await client.disconnect();
+ }
+ },
+ });
+
+ // ========================================================================
+ // Tool: browser_click
+ // ========================================================================
+
+ api.registerTool({
+ name: "browser_click",
+ description: "Click an element by CSS selector",
+ parameters: {
+ type: "object" as const,
+ properties: {
+ selector: { type: "string" as const, description: "CSS selector" },
+ },
+ required: ["selector"],
+ },
+ execute: async (args: Record) => {
+ const { BrowserClient } = await import("./browser-client.js");
+ const client = new BrowserClient();
+ await client.connect();
+ try {
+ await client.click(args.selector as string);
+ return { content: [{ type: "text" as const, text: `Clicked: ${args.selector}` }] };
+ } finally {
+ await client.disconnect();
+ }
+ },
+ });
+
+ // ========================================================================
+ // Tool: browser_evaluate
+ // ========================================================================
+
+ api.registerTool({
+ name: "browser_evaluate",
+ description: "Run JavaScript in the browser page and return result",
+ parameters: {
+ type: "object" as const,
+ properties: {
+ expression: { type: "string" as const, description: "JavaScript expression" },
+ },
+ required: ["expression"],
+ },
+ execute: async (args: Record) => {
+ const { BrowserClient } = await import("./browser-client.js");
+ const client = new BrowserClient();
+ await client.connect();
+ try {
+ const result = await client.evaluate(args.expression as string);
+ return { content: [{ type: "text" as const, text: JSON.stringify(result) }] };
+ } finally {
+ await client.disconnect();
+ }
+ },
+ });
+ },
+};
+
+export default browserAutomationPlugin;
diff --git a/extensions/ci-plugin/providers/github.ts b/extensions/ci-plugin/providers/github.ts
index 66461ea2..e3783fde 100644
--- a/extensions/ci-plugin/providers/github.ts
+++ b/extensions/ci-plugin/providers/github.ts
@@ -126,6 +126,7 @@ export class GitHubProvider implements CiProvider {
const resolved = this.resolveRepo(repo);
const workflow = opts.workflow ?? "ci.yml";
const url = `${this.baseUrl}/repos/${resolved}/actions/workflows/${encodeURIComponent(workflow)}/dispatches`;
+ const dispatchedAt = new Date().toISOString();
const res = await fetch(url, {
method: "POST",
@@ -137,7 +138,37 @@ export class GitHubProvider implements CiProvider {
throw new Error(`GitHub API error: ${res.status} ${res.statusText}`);
}
- // workflow_dispatch returns 204 — return a placeholder run
+ // workflow_dispatch returns 204 with no body. Poll for the triggered run
+ // by listing recent runs for this branch created after the dispatch time.
+ const maxAttempts = 5;
+ const pollIntervalMs = 2000;
+
+ for (let attempt = 0; attempt < maxAttempts; attempt++) {
+ await new Promise((r) => setTimeout(r, pollIntervalMs));
+
+ const params = new URLSearchParams();
+ params.set("branch", opts.branch);
+ params.set("event", "workflow_dispatch");
+ params.set("per_page", "5");
+ params.set("created", `>=${dispatchedAt.slice(0, 10)}`);
+
+ const listUrl = `${this.baseUrl}/repos/${resolved}/actions/runs?${params.toString()}`;
+ const listRes = await fetch(listUrl, { headers: this.headers });
+ if (!listRes.ok) continue;
+
+ const data = (await listRes.json()) as GitHubWorkflowRunsResponse;
+ const match = data.workflow_runs.find(
+ (run) =>
+ run.head_branch === opts.branch && new Date(run.created_at).toISOString() >= dispatchedAt,
+ );
+
+ if (match) {
+ return this.toRun(match, resolved);
+ }
+ }
+
+ // Fallback: return a queued placeholder with the actions URL when
+ // the run could not be resolved within the polling window
return {
id: "pending",
provider: "github",
@@ -145,7 +176,7 @@ export class GitHubProvider implements CiProvider {
branch: opts.branch,
status: "queued",
url: `https://github.com/${resolved}/actions`,
- startedAt: new Date().toISOString(),
+ startedAt: dispatchedAt,
};
}
diff --git a/extensions/code-tools/config.ts b/extensions/code-tools/config.ts
new file mode 100644
index 00000000..4ba4534a
--- /dev/null
+++ b/extensions/code-tools/config.ts
@@ -0,0 +1,128 @@
+/**
+ * Code Tools Configuration
+ *
+ * Manual validation following the project's cortex-config pattern.
+ * Uses assertAllowedKeys for unknown key rejection, no Zod.
+ */
+
+// ============================================================================
+// Types
+// ============================================================================
+
+export type CodeToolsConfig = {
+ workspaceRoot: string;
+ maxFileSizeBytes: number;
+ shellTimeout: number;
+ maxGlobResults: number;
+ maxGrepResults: number;
+ shellEnabled: boolean;
+};
+
+// ============================================================================
+// Defaults
+// ============================================================================
+
+const DEFAULT_WORKSPACE_ROOT = process.cwd();
+const DEFAULT_MAX_FILE_SIZE_BYTES = 2_097_152; // 2 MB
+const DEFAULT_SHELL_TIMEOUT = 120_000; // 2 minutes
+const DEFAULT_MAX_GLOB_RESULTS = 200;
+const DEFAULT_MAX_GREP_RESULTS = 50;
+const DEFAULT_SHELL_ENABLED = true;
+
+// ============================================================================
+// Helpers
+// ============================================================================
+
+function assertAllowedKeys(value: Record, allowed: string[], label: string): void {
+ const unknown = Object.keys(value).filter((key) => !allowed.includes(key));
+ if (unknown.length === 0) return;
+ throw new Error(`${label} has unknown keys: ${unknown.join(", ")}`);
+}
+
+function clampInt(raw: unknown, min: number, max: number, defaultVal: number): number {
+ if (typeof raw !== "number") return defaultVal;
+ return Math.max(min, Math.min(max, Math.floor(raw)));
+}
+
+// ============================================================================
+// Schema
+// ============================================================================
+
+const ALLOWED_KEYS = [
+ "workspaceRoot",
+ "maxFileSizeBytes",
+ "shellTimeout",
+ "maxGlobResults",
+ "maxGrepResults",
+ "shellEnabled",
+];
+
+export const codeToolsConfigSchema = {
+ parse(value: unknown): CodeToolsConfig {
+ const cfg = (value ?? {}) as Record;
+ if (typeof value === "object" && value !== null && !Array.isArray(value)) {
+ assertAllowedKeys(cfg, ALLOWED_KEYS, "code tools config");
+ }
+
+ const workspaceRoot =
+ typeof cfg.workspaceRoot === "string" && cfg.workspaceRoot.trim()
+ ? cfg.workspaceRoot.trim()
+ : DEFAULT_WORKSPACE_ROOT;
+
+ const maxFileSizeBytes = clampInt(
+ cfg.maxFileSizeBytes,
+ 1024,
+ 50_000_000,
+ DEFAULT_MAX_FILE_SIZE_BYTES,
+ );
+
+ const shellTimeout = clampInt(cfg.shellTimeout, 1000, 600_000, DEFAULT_SHELL_TIMEOUT);
+
+ const maxGlobResults = clampInt(cfg.maxGlobResults, 10, 5000, DEFAULT_MAX_GLOB_RESULTS);
+
+ const maxGrepResults = clampInt(cfg.maxGrepResults, 1, 500, DEFAULT_MAX_GREP_RESULTS);
+
+ const shellEnabled =
+ typeof cfg.shellEnabled === "boolean" ? cfg.shellEnabled : DEFAULT_SHELL_ENABLED;
+
+ return {
+ workspaceRoot,
+ maxFileSizeBytes,
+ shellTimeout,
+ maxGlobResults,
+ maxGrepResults,
+ shellEnabled,
+ };
+ },
+ uiHints: {
+ workspaceRoot: {
+ label: "Workspace Root",
+ placeholder: DEFAULT_WORKSPACE_ROOT,
+ help: "Root directory for file operations. All paths are resolved relative to this.",
+ },
+ maxFileSizeBytes: {
+ label: "Max File Size",
+ placeholder: String(DEFAULT_MAX_FILE_SIZE_BYTES),
+ help: "Maximum file size in bytes for read operations (1024-50000000)",
+ },
+ shellTimeout: {
+ label: "Shell Timeout",
+ placeholder: String(DEFAULT_SHELL_TIMEOUT),
+ help: "Maximum execution time in milliseconds for shell commands (1000-600000)",
+ },
+ maxGlobResults: {
+ label: "Max Glob Results",
+ placeholder: String(DEFAULT_MAX_GLOB_RESULTS),
+ help: "Maximum number of glob results returned (10-5000)",
+ },
+ maxGrepResults: {
+ label: "Max Grep Results",
+ placeholder: String(DEFAULT_MAX_GREP_RESULTS),
+ help: "Maximum number of grep results returned (1-500)",
+ },
+ shellEnabled: {
+ label: "Shell Enabled",
+ help: "Whether shell command execution is allowed",
+ },
+ },
+};
diff --git a/extensions/code-tools/index.ts b/extensions/code-tools/index.ts
new file mode 100644
index 00000000..a12bd645
--- /dev/null
+++ b/extensions/code-tools/index.ts
@@ -0,0 +1,66 @@
+/**
+ * Mayros Code Tools Plugin
+ *
+ * File read/write/edit, glob, grep, ls, shell, notebook, web search, and web fetch
+ * tools for local code interaction. Provides the core filesystem, shell, and web
+ * primitives used by coding agents.
+ *
+ * Tools: code_read, code_read_many, code_write, code_edit, code_glob, code_grep, code_ls,
+ * code_shell, code_notebook, code_multi_edit, code_shell_interactive, code_web_search,
+ * code_web_fetch, git_commit, git_push, git_create_pr
+ */
+
+import { codeToolsConfigSchema } from "./config.js";
+import type { MayrosPluginApi } from "mayros/plugin-sdk";
+import { registerCodeRead } from "./tools/code-read.js";
+import { registerCodeReadMany } from "./tools/code-read-many.js";
+import { registerCodeWrite } from "./tools/code-write.js";
+import { registerCodeEdit } from "./tools/code-edit.js";
+import { registerCodeGlob } from "./tools/code-glob.js";
+import { registerCodeGrep } from "./tools/code-grep.js";
+import { registerCodeLs } from "./tools/code-ls.js";
+import { registerCodeShell } from "./tools/code-shell.js";
+import { registerCodeNotebook } from "./tools/code-notebook.js";
+import { registerCodeMultiEdit } from "./tools/code-multi-edit.js";
+import { registerCodeShellInteractive } from "./tools/code-shell-interactive.js";
+import { registerWebSearch } from "./tools/web-search.js";
+import { registerWebFetch } from "./tools/web-fetch.js";
+import { registerGitCommit, registerGitPush, registerGitCreatePr } from "./tools/git-commit.js";
+
+// ============================================================================
+// Plugin Definition
+// ============================================================================
+
+const codeToolsPlugin = {
+ id: "code-tools",
+ name: "Code Tools",
+ description:
+ "File read/write/edit, glob, grep, ls, shell, git, and web tools for local code interaction",
+ kind: "coding" as const,
+ configSchema: codeToolsConfigSchema,
+
+ async register(api: MayrosPluginApi) {
+ const cfg = codeToolsConfigSchema.parse(api.pluginConfig);
+
+ registerCodeRead(api, cfg);
+ registerCodeReadMany(api, cfg);
+ registerCodeWrite(api, cfg);
+ registerCodeEdit(api, cfg);
+ registerCodeGlob(api, cfg);
+ registerCodeGrep(api, cfg);
+ registerCodeLs(api, cfg);
+ registerCodeShell(api, cfg);
+ registerCodeNotebook(api, cfg);
+ registerCodeMultiEdit(api, cfg);
+ registerCodeShellInteractive(api, cfg);
+ registerWebSearch(api, cfg);
+ registerWebFetch(api, cfg);
+ registerGitCommit(api, cfg);
+ registerGitPush(api, cfg);
+ registerGitCreatePr(api, cfg);
+
+ api.logger.info(`code-tools: registered 17 tools (workspace: ${cfg.workspaceRoot})`);
+ },
+};
+
+export default codeToolsPlugin;
diff --git a/extensions/code-tools/mayros.plugin.json b/extensions/code-tools/mayros.plugin.json
new file mode 100644
index 00000000..caeb3798
--- /dev/null
+++ b/extensions/code-tools/mayros.plugin.json
@@ -0,0 +1,7 @@
+{
+ "id": "code-tools",
+ "name": "Code Tools",
+ "description": "File read/write/edit, glob, grep, ls, and shell tools for local code interaction",
+ "version": "0.1.4",
+ "kind": "coding"
+}
diff --git a/extensions/code-tools/package.json b/extensions/code-tools/package.json
new file mode 100644
index 00000000..3a531333
--- /dev/null
+++ b/extensions/code-tools/package.json
@@ -0,0 +1,18 @@
+{
+ "name": "@apilium/mayros-code-tools",
+ "version": "0.1.4",
+ "private": true,
+ "type": "module",
+ "dependencies": {
+ "@sinclair/typebox": "0.34.48",
+ "fast-glob": "^3.3.3"
+ },
+ "devDependencies": {
+ "@apilium/mayros": "workspace:*"
+ },
+ "mayros": {
+ "extensions": [
+ "./index.ts"
+ ]
+ }
+}
diff --git a/extensions/code-tools/path-utils.ts b/extensions/code-tools/path-utils.ts
new file mode 100644
index 00000000..1437a568
--- /dev/null
+++ b/extensions/code-tools/path-utils.ts
@@ -0,0 +1,60 @@
+/**
+ * Shared path utilities for code-tools.
+ *
+ * Provides workspace-relative path resolution, traversal protection,
+ * image file detection, and binary buffer detection.
+ */
+
+import path from "node:path";
+
+/**
+ * Returns true if `childPath` is inside `parentPath`.
+ */
+export function isPathInside(childPath: string, parentPath: string): boolean {
+ const rel = path.relative(parentPath, childPath);
+ return !rel.startsWith("..") && !path.isAbsolute(rel);
+}
+
+/**
+ * Resolves a user-provided path to an absolute path within the workspace.
+ * Throws if the resolved path escapes the workspace root.
+ */
+export function resolveSafePath(inputPath: string, workspaceRoot: string): string {
+ const resolved = path.isAbsolute(inputPath) ? inputPath : path.resolve(workspaceRoot, inputPath);
+
+ if (!isPathInside(resolved, workspaceRoot)) {
+ throw new Error(`Path "${inputPath}" is outside workspace root`);
+ }
+ return resolved;
+}
+
+const IMAGE_EXTENSIONS = new Set([
+ ".png",
+ ".jpg",
+ ".jpeg",
+ ".gif",
+ ".webp",
+ ".svg",
+ ".ico",
+ ".bmp",
+ ".tiff",
+ ".tif",
+]);
+
+/**
+ * Returns true if the file has a recognized image extension.
+ */
+export function isImageFile(filePath: string): boolean {
+ return IMAGE_EXTENSIONS.has(path.extname(filePath).toLowerCase());
+}
+
+/**
+ * Returns true if the buffer likely contains binary content (has null bytes).
+ */
+export function isBinaryBuffer(buffer: Buffer, checkBytes = 8192): boolean {
+ const len = Math.min(buffer.length, checkBytes);
+ for (let i = 0; i < len; i++) {
+ if (buffer[i] === 0) return true;
+ }
+ return false;
+}
diff --git a/extensions/code-tools/tools/code-edit.test.ts b/extensions/code-tools/tools/code-edit.test.ts
new file mode 100644
index 00000000..c155d71b
--- /dev/null
+++ b/extensions/code-tools/tools/code-edit.test.ts
@@ -0,0 +1,73 @@
+import { describe, it, expect, beforeEach, afterEach } from "vitest";
+import fs from "node:fs/promises";
+import path from "node:path";
+import os from "node:os";
+import { generateDiff } from "./code-edit.js";
+
+describe("generateDiff", () => {
+ it("generates a unified diff", () => {
+ const old = "line1\nline2\nline3";
+ const updated = "line1\nmodified\nline3";
+ const diff = generateDiff("test.ts", old, updated);
+ expect(diff).toContain("--- a/test.ts");
+ expect(diff).toContain("+++ b/test.ts");
+ expect(diff).toContain("-line2");
+ expect(diff).toContain("+modified");
+ });
+
+ it("handles additions", () => {
+ const old = "line1\nline2";
+ const updated = "line1\nline2\nline3";
+ const diff = generateDiff("test.ts", old, updated);
+ expect(diff).toContain("+line3");
+ });
+
+ it("handles deletions", () => {
+ const old = "line1\nline2\nline3";
+ const updated = "line1\nline3";
+ const diff = generateDiff("test.ts", old, updated);
+ expect(diff).toContain("-line2");
+ });
+});
+
+describe("code_edit behavior", () => {
+ let tmpDir: string;
+
+ beforeEach(async () => {
+ tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "code-edit-test-"));
+ });
+
+ afterEach(async () => {
+ await fs.rm(tmpDir, { recursive: true, force: true });
+ });
+
+ it("replaces exact string", async () => {
+ const filePath = path.join(tmpDir, "test.ts");
+ await fs.writeFile(filePath, "const x = 1;\nconst y = 2;");
+ const content = await fs.readFile(filePath, "utf-8");
+ const newContent = content.replace("const x = 1;", "const x = 42;");
+ await fs.writeFile(filePath, newContent);
+ const result = await fs.readFile(filePath, "utf-8");
+ expect(result).toContain("const x = 42;");
+ expect(result).toContain("const y = 2;");
+ });
+
+ it("detects non-unique old_string", async () => {
+ const filePath = path.join(tmpDir, "dup.ts");
+ await fs.writeFile(filePath, "hello\nhello\nworld");
+ const content = await fs.readFile(filePath, "utf-8");
+ const firstIdx = content.indexOf("hello");
+ const secondIdx = content.indexOf("hello", firstIdx + 1);
+ expect(secondIdx).toBeGreaterThan(firstIdx); // confirms duplicate
+ });
+
+ it("replace_all replaces every occurrence", async () => {
+ const filePath = path.join(tmpDir, "multi.ts");
+ await fs.writeFile(filePath, "foo bar foo baz foo");
+ const content = await fs.readFile(filePath, "utf-8");
+ const newContent = content.split("foo").join("qux");
+ await fs.writeFile(filePath, newContent);
+ const result = await fs.readFile(filePath, "utf-8");
+ expect(result).toBe("qux bar qux baz qux");
+ });
+});
diff --git a/extensions/code-tools/tools/code-edit.ts b/extensions/code-tools/tools/code-edit.ts
new file mode 100644
index 00000000..55b88f86
--- /dev/null
+++ b/extensions/code-tools/tools/code-edit.ts
@@ -0,0 +1,160 @@
+import fs from "node:fs/promises";
+import { Type } from "@sinclair/typebox";
+import type { MayrosPluginApi } from "mayros/plugin-sdk";
+import { ToolInputError } from "../../../src/agents/tools/common.js";
+import type { CodeToolsConfig } from "../config.js";
+import { resolveSafePath } from "../path-utils.js";
+import { parseDiffStats } from "../../../src/tui/diff-renderer.js";
+
+/**
+ * Generate a minimal unified diff snippet showing the change context.
+ */
+function generateDiff(filePath: string, oldContent: string, newContent: string): string {
+ const oldLines = oldContent.split("\n");
+ const newLines = newContent.split("\n");
+ const lines: string[] = [`--- a/${filePath}`, `+++ b/${filePath}`];
+
+ // Find first difference
+ let start = 0;
+ while (
+ start < oldLines.length &&
+ start < newLines.length &&
+ oldLines[start] === newLines[start]
+ ) {
+ start++;
+ }
+
+ // Find last difference
+ let oldEnd = oldLines.length - 1;
+ let newEnd = newLines.length - 1;
+ while (oldEnd > start && newEnd > start && oldLines[oldEnd] === newLines[newEnd]) {
+ oldEnd--;
+ newEnd--;
+ }
+
+ const ctxStart = Math.max(0, start - 3);
+ const ctxOldEnd = Math.min(oldLines.length - 1, oldEnd + 3);
+ const ctxNewEnd = Math.min(newLines.length - 1, newEnd + 3);
+
+ lines.push(
+ `@@ -${ctxStart + 1},${ctxOldEnd - ctxStart + 1} +${ctxStart + 1},${ctxNewEnd - ctxStart + 1} @@`,
+ );
+
+ // Context before
+ for (let i = ctxStart; i < start; i++) {
+ lines.push(` ${oldLines[i]}`);
+ }
+
+ // Removed lines
+ for (let i = start; i <= oldEnd; i++) {
+ lines.push(`-${oldLines[i]}`);
+ }
+
+ // Added lines
+ for (let i = start; i <= newEnd; i++) {
+ lines.push(`+${newLines[i]}`);
+ }
+
+ // Context after
+ for (let i = oldEnd + 1; i <= ctxOldEnd; i++) {
+ lines.push(` ${oldLines[i]}`);
+ }
+
+ return lines.join("\n");
+}
+
+export function registerCodeEdit(api: MayrosPluginApi, cfg: CodeToolsConfig): void {
+ api.registerTool(
+ {
+ name: "code_edit",
+ label: "Edit File",
+ description:
+ "Perform exact string replacement in a file. The old_string must exist in the file. By default it must be unique; use replace_all to replace every occurrence.",
+ parameters: Type.Object({
+ path: Type.String({ description: "File path (absolute or relative to workspace)" }),
+ old_string: Type.String({ description: "The exact text to find and replace" }),
+ new_string: Type.String({ description: "The replacement text" }),
+ replace_all: Type.Optional(
+ Type.Boolean({ description: "Replace all occurrences (default: false)" }),
+ ),
+ }),
+ async execute(_toolCallId, params) {
+ const p = params as {
+ path?: string;
+ old_string?: string;
+ new_string?: string;
+ replace_all?: boolean;
+ };
+ if (typeof p.path !== "string" || !p.path.trim()) {
+ throw new ToolInputError("path required");
+ }
+ if (typeof p.old_string !== "string") {
+ throw new ToolInputError("old_string required");
+ }
+ if (typeof p.new_string !== "string") {
+ throw new ToolInputError("new_string required");
+ }
+ if (p.old_string === p.new_string) {
+ throw new ToolInputError("old_string and new_string must be different");
+ }
+
+ const filePath = resolveSafePath(p.path.trim(), cfg.workspaceRoot);
+ const replaceAll = p.replace_all === true;
+
+ let content: string;
+ try {
+ content = await fs.readFile(filePath, "utf-8");
+ } catch {
+ throw new ToolInputError(`File not found: ${p.path}`);
+ }
+
+ // Check old_string exists
+ const firstIdx = content.indexOf(p.old_string);
+ if (firstIdx === -1) {
+ throw new ToolInputError(
+ `old_string not found in ${p.path}. Make sure the string matches exactly (including whitespace).`,
+ );
+ }
+
+ // If not replace_all, check uniqueness
+ if (!replaceAll) {
+ const secondIdx = content.indexOf(p.old_string, firstIdx + 1);
+ if (secondIdx !== -1) {
+ throw new ToolInputError(
+ `old_string is not unique in ${p.path} (found at multiple positions). Provide more context to make it unique, or use replace_all.`,
+ );
+ }
+ }
+
+ // Perform replacement
+ let newContent: string;
+ let replacements: number;
+ if (replaceAll) {
+ const parts = content.split(p.old_string);
+ replacements = parts.length - 1;
+ newContent = parts.join(p.new_string);
+ } else {
+ newContent = content.replace(p.old_string, p.new_string);
+ replacements = 1;
+ }
+
+ await fs.writeFile(filePath, newContent, "utf-8");
+
+ const diff = generateDiff(p.path.trim(), content, newContent);
+ const stats = parseDiffStats(diff);
+
+ return {
+ content: [{ type: "text" as const, text: diff }],
+ details: {
+ path: p.path.trim(),
+ replacements,
+ diffStats: stats,
+ },
+ };
+ },
+ },
+ { name: "code_edit" },
+ );
+}
+
+export { generateDiff };
diff --git a/extensions/code-tools/tools/code-glob.test.ts b/extensions/code-tools/tools/code-glob.test.ts
new file mode 100644
index 00000000..944f4be5
--- /dev/null
+++ b/extensions/code-tools/tools/code-glob.test.ts
@@ -0,0 +1,105 @@
+import { describe, it, expect, beforeEach, afterEach } from "vitest";
+import fs from "node:fs/promises";
+import path from "node:path";
+import os from "node:os";
+import { globFiles } from "./code-glob.js";
+
+describe("globFiles", () => {
+ let tmpDir: string;
+
+ beforeEach(async () => {
+ tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "code-glob-test-"));
+ });
+
+ afterEach(async () => {
+ await fs.rm(tmpDir, { recursive: true, force: true });
+ });
+
+ it("finds .ts files in the base directory", async () => {
+ await fs.writeFile(path.join(tmpDir, "a.ts"), "export const a = 1;");
+ await fs.writeFile(path.join(tmpDir, "b.ts"), "export const b = 2;");
+ await fs.writeFile(path.join(tmpDir, "c.js"), "export const c = 3;");
+
+ const result = await globFiles("*.ts", tmpDir, 100);
+ expect(result.files).toHaveLength(2);
+ expect(result.files).toContain("a.ts");
+ expect(result.files).toContain("b.ts");
+ expect(result.totalFound).toBe(2);
+ expect(result.truncated).toBe(false);
+ });
+
+ it("ignores node_modules", async () => {
+ await fs.mkdir(path.join(tmpDir, "node_modules/pkg"), { recursive: true });
+ await fs.writeFile(path.join(tmpDir, "node_modules/pkg/index.ts"), "");
+ await fs.writeFile(path.join(tmpDir, "src.ts"), "export default 1;");
+
+ const result = await globFiles("**/*.ts", tmpDir, 100);
+ expect(result.files).toHaveLength(1);
+ expect(result.files[0]).toBe("src.ts");
+ });
+
+ it("ignores .git directory", async () => {
+ await fs.mkdir(path.join(tmpDir, ".git/objects"), { recursive: true });
+ await fs.writeFile(path.join(tmpDir, ".git/objects/data.ts"), "");
+ await fs.writeFile(path.join(tmpDir, "app.ts"), "");
+
+ const result = await globFiles("**/*.ts", tmpDir, 100);
+ expect(result.files).toHaveLength(1);
+ expect(result.files[0]).toBe("app.ts");
+ });
+
+ it("sorts by mtime with newest first", async () => {
+ // Create files with different mtimes using utimes
+ const fileA = path.join(tmpDir, "old.ts");
+ const fileB = path.join(tmpDir, "new.ts");
+
+ await fs.writeFile(fileA, "old");
+ await fs.writeFile(fileB, "new");
+
+ // Set old.ts to a past mtime
+ const pastTime = new Date("2020-01-01");
+ await fs.utimes(fileA, pastTime, pastTime);
+
+ const result = await globFiles("*.ts", tmpDir, 100);
+ expect(result.files).toHaveLength(2);
+ // newest first
+ expect(result.files[0]).toBe("new.ts");
+ expect(result.files[1]).toBe("old.ts");
+ });
+
+ it("respects maxResults limit", async () => {
+ for (let i = 0; i < 10; i++) {
+ await fs.writeFile(path.join(tmpDir, `file${i}.ts`), `content ${i}`);
+ }
+
+ const result = await globFiles("*.ts", tmpDir, 3);
+ expect(result.files).toHaveLength(3);
+ expect(result.totalFound).toBe(10);
+ expect(result.truncated).toBe(true);
+ });
+
+ it("returns empty for non-existent directory pattern", async () => {
+ const result = await globFiles("**/*.rs", tmpDir, 100);
+ expect(result.files).toHaveLength(0);
+ expect(result.totalFound).toBe(0);
+ expect(result.truncated).toBe(false);
+ });
+
+ it("finds nested files with recursive pattern", async () => {
+ await fs.mkdir(path.join(tmpDir, "src/components"), { recursive: true });
+ await fs.writeFile(path.join(tmpDir, "src/index.ts"), "");
+ await fs.writeFile(path.join(tmpDir, "src/components/App.tsx"), "");
+
+ const result = await globFiles("**/*.{ts,tsx}", tmpDir, 100);
+ expect(result.files).toHaveLength(2);
+ const names = result.files.map((f) => path.basename(f));
+ expect(names).toContain("index.ts");
+ expect(names).toContain("App.tsx");
+ });
+
+ it("handles empty directory", async () => {
+ const result = await globFiles("**/*", tmpDir, 100);
+ expect(result.files).toHaveLength(0);
+ expect(result.truncated).toBe(false);
+ });
+});
diff --git a/extensions/code-tools/tools/code-glob.ts b/extensions/code-tools/tools/code-glob.ts
new file mode 100644
index 00000000..0cd5638b
--- /dev/null
+++ b/extensions/code-tools/tools/code-glob.ts
@@ -0,0 +1,117 @@
+import fg from "fast-glob";
+import fs from "node:fs/promises";
+import { Type } from "@sinclair/typebox";
+import type { MayrosPluginApi } from "mayros/plugin-sdk";
+import { ToolInputError } from "../../../src/agents/tools/common.js";
+import type { CodeToolsConfig } from "../config.js";
+import { resolveSafePath, isPathInside } from "../path-utils.js";
+
+/**
+ * Core glob logic extracted for testability.
+ * Finds files matching a pattern, sorted by mtime (newest first),
+ * respecting standard ignore rules.
+ */
+export async function globFiles(
+ pattern: string,
+ basePath: string,
+ maxResults: number,
+): Promise<{ files: string[]; totalFound: number; truncated: boolean }> {
+ const files = await fg(pattern, {
+ cwd: basePath,
+ dot: false,
+ ignore: ["**/node_modules/**", "**/.git/**"],
+ onlyFiles: true,
+ followSymbolicLinks: false,
+ suppressErrors: true,
+ });
+
+ const withStats = await Promise.all(
+ files.slice(0, maxResults * 2).map(async (file) => {
+ try {
+ const stat = await fs.stat(`${basePath}/${file}`);
+ return { file, mtime: stat.mtimeMs };
+ } catch {
+ return { file, mtime: 0 };
+ }
+ }),
+ );
+
+ withStats.sort((a, b) => b.mtime - a.mtime);
+ const limited = withStats.slice(0, maxResults);
+
+ return {
+ files: limited.map((e) => e.file),
+ totalFound: files.length,
+ truncated: files.length > maxResults,
+ };
+}
+
+export function registerCodeGlob(api: MayrosPluginApi, cfg: CodeToolsConfig): void {
+ api.registerTool(
+ {
+ name: "code_glob",
+ label: "Glob Files",
+ description:
+ "Find files matching a glob pattern. Respects .gitignore. Returns paths sorted by modification time (newest first).",
+ parameters: Type.Object({
+ pattern: Type.String({ description: 'Glob pattern (e.g. "**/*.ts", "src/**/*.tsx")' }),
+ path: Type.Optional(
+ Type.String({ description: "Base directory for search (defaults to workspace root)" }),
+ ),
+ }),
+ async execute(_toolCallId, params) {
+ const p = params as { pattern?: string; path?: string };
+ if (typeof p.pattern !== "string" || !p.pattern.trim()) {
+ throw new ToolInputError("pattern required");
+ }
+
+ const basePath = p.path?.trim()
+ ? resolveSafePath(p.path.trim(), cfg.workspaceRoot)
+ : cfg.workspaceRoot;
+
+ // Ensure basePath is inside workspace
+ if (!isPathInside(basePath, cfg.workspaceRoot) && basePath !== cfg.workspaceRoot) {
+ throw new ToolInputError("path is outside workspace root");
+ }
+
+ const files = await fg(p.pattern.trim(), {
+ cwd: basePath,
+ dot: false,
+ ignore: ["**/node_modules/**", "**/.git/**"],
+ onlyFiles: true,
+ followSymbolicLinks: false,
+ suppressErrors: true,
+ });
+
+ // Sort by modification time (newest first)
+ const withStats = await Promise.all(
+ files.slice(0, cfg.maxGlobResults * 2).map(async (file) => {
+ try {
+ const stat = await fs.stat(`${basePath}/${file}`);
+ return { file, mtime: stat.mtimeMs };
+ } catch {
+ return { file, mtime: 0 };
+ }
+ }),
+ );
+
+ withStats.sort((a, b) => b.mtime - a.mtime);
+ const limited = withStats.slice(0, cfg.maxGlobResults);
+
+ const text = limited.map((e) => e.file).join("\n") || "(no matches)";
+ const truncated = files.length > cfg.maxGlobResults;
+
+ return {
+ content: [{ type: "text" as const, text }],
+ details: {
+ pattern: p.pattern.trim(),
+ matches: limited.length,
+ totalFound: files.length,
+ truncated,
+ },
+ };
+ },
+ },
+ { name: "code_glob" },
+ );
+}
diff --git a/extensions/code-tools/tools/code-grep.test.ts b/extensions/code-tools/tools/code-grep.test.ts
new file mode 100644
index 00000000..e688291b
--- /dev/null
+++ b/extensions/code-tools/tools/code-grep.test.ts
@@ -0,0 +1,130 @@
+import { describe, it, expect, beforeEach, afterEach } from "vitest";
+import fs from "node:fs/promises";
+import path from "node:path";
+import os from "node:os";
+import { grepBuiltin } from "./code-grep.js";
+
+describe("grepBuiltin", () => {
+ let tmpDir: string;
+
+ beforeEach(async () => {
+ tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "code-grep-test-"));
+ });
+
+ afterEach(async () => {
+ await fs.rm(tmpDir, { recursive: true, force: true });
+ });
+
+ it("finds pattern matches in a single file", async () => {
+ await fs.writeFile(
+ path.join(tmpDir, "test.ts"),
+ "const foo = 1;\nconst bar = 2;\nconst fooBar = 3;",
+ );
+
+ const matches = await grepBuiltin("foo", tmpDir, undefined, 50);
+ expect(matches).toHaveLength(2);
+ expect(matches[0].file).toBe("test.ts");
+ expect(matches[0].line).toBe(1);
+ expect(matches[0].content).toContain("foo");
+ expect(matches[1].line).toBe(3);
+ expect(matches[1].content).toContain("fooBar");
+ });
+
+ it("searches recursively through nested directories", async () => {
+ await fs.mkdir(path.join(tmpDir, "deep/nested"), { recursive: true });
+ await fs.writeFile(path.join(tmpDir, "deep/nested/file.ts"), "const target = true;");
+ await fs.writeFile(path.join(tmpDir, "root.ts"), "const target = false;");
+
+ const matches = await grepBuiltin("target", tmpDir, undefined, 50);
+ expect(matches).toHaveLength(2);
+ const files = matches.map((m) => m.file);
+ expect(files).toContain(path.join("deep", "nested", "file.ts"));
+ expect(files).toContain("root.ts");
+ });
+
+ it("enforces max results", async () => {
+ const lines = Array.from({ length: 100 }, (_, i) => `match_${i}`);
+ await fs.writeFile(path.join(tmpDir, "many.txt"), lines.join("\n"));
+
+ const matches = await grepBuiltin("match_", tmpDir, undefined, 10);
+ expect(matches).toHaveLength(10);
+ });
+
+ it("filters files with glob pattern", async () => {
+ await fs.writeFile(path.join(tmpDir, "code.ts"), "const value = 42;");
+ await fs.writeFile(path.join(tmpDir, "code.js"), "const value = 42;");
+ await fs.writeFile(path.join(tmpDir, "readme.md"), "value is important");
+
+ const matches = await grepBuiltin("value", tmpDir, "*.ts", 50);
+ expect(matches.length).toBeGreaterThanOrEqual(1);
+ for (const m of matches) {
+ expect(m.file).toMatch(/\.ts$/);
+ }
+ });
+
+ it("performs case-insensitive matching", async () => {
+ await fs.writeFile(
+ path.join(tmpDir, "case.ts"),
+ "const Hello = 1;\nconst HELLO = 2;\nconst hello = 3;",
+ );
+
+ const matches = await grepBuiltin("hello", tmpDir, undefined, 50);
+ expect(matches).toHaveLength(3);
+ });
+
+ it("skips node_modules directory", async () => {
+ await fs.mkdir(path.join(tmpDir, "node_modules/pkg"), { recursive: true });
+ await fs.writeFile(path.join(tmpDir, "node_modules/pkg/index.ts"), "const secret = 1;");
+ await fs.writeFile(path.join(tmpDir, "app.ts"), "const secret = 2;");
+
+ const matches = await grepBuiltin("secret", tmpDir, undefined, 50);
+ expect(matches).toHaveLength(1);
+ expect(matches[0].file).toBe("app.ts");
+ });
+
+ it("skips .git directory", async () => {
+ await fs.mkdir(path.join(tmpDir, ".git/refs"), { recursive: true });
+ await fs.writeFile(path.join(tmpDir, ".git/refs/data.txt"), "found me");
+ await fs.writeFile(path.join(tmpDir, "source.ts"), "found me too");
+
+ const matches = await grepBuiltin("found", tmpDir, undefined, 50);
+ expect(matches).toHaveLength(1);
+ expect(matches[0].file).toBe("source.ts");
+ });
+
+ it("handles unreadable files gracefully", async () => {
+ await fs.writeFile(path.join(tmpDir, "good.ts"), "findme here");
+ // Create a directory that looks like a file won't cause issues —
+ // the function uses readdir + isFile checks, so create a symlink to nothing
+ await fs.symlink("/nonexistent/path/file.ts", path.join(tmpDir, "broken-link.ts"));
+
+ // Should not throw, and should find the match in the readable file
+ const matches = await grepBuiltin("findme", tmpDir, undefined, 50);
+ expect(matches).toHaveLength(1);
+ expect(matches[0].file).toBe("good.ts");
+ });
+
+ it("returns empty array for empty directory", async () => {
+ const matches = await grepBuiltin("anything", tmpDir, undefined, 50);
+ expect(matches).toHaveLength(0);
+ });
+
+ it("handles regex special characters in pattern", async () => {
+ await fs.writeFile(path.join(tmpDir, "regex.ts"), "function hello() {}\nfunction world() {}");
+
+ const matches = await grepBuiltin("function\\s+\\w+", tmpDir, undefined, 50);
+ expect(matches).toHaveLength(2);
+ expect(matches[0].content).toContain("function hello");
+ expect(matches[1].content).toContain("function world");
+ });
+
+ it("reports correct line numbers", async () => {
+ const content = "line1\nline2\ntarget_line\nline4\nanother_target\n";
+ await fs.writeFile(path.join(tmpDir, "lines.ts"), content);
+
+ const matches = await grepBuiltin("target", tmpDir, undefined, 50);
+ expect(matches).toHaveLength(2);
+ expect(matches[0].line).toBe(3);
+ expect(matches[1].line).toBe(5);
+ });
+});
diff --git a/extensions/code-tools/tools/code-grep.ts b/extensions/code-tools/tools/code-grep.ts
new file mode 100644
index 00000000..0976dceb
--- /dev/null
+++ b/extensions/code-tools/tools/code-grep.ts
@@ -0,0 +1,223 @@
+import { execFile } from "node:child_process";
+import { promisify } from "node:util";
+import fs from "node:fs/promises";
+import path from "node:path";
+import { Type } from "@sinclair/typebox";
+import type { MayrosPluginApi } from "mayros/plugin-sdk";
+import { ToolInputError } from "../../../src/agents/tools/common.js";
+import type { CodeToolsConfig } from "../config.js";
+import { resolveSafePath, isPathInside } from "../path-utils.js";
+
+const execFileAsync = promisify(execFile);
+
+type GrepMatch = {
+ file: string;
+ line: number;
+ content: string;
+};
+
+/**
+ * Try ripgrep first, fall back to built-in recursive grep.
+ */
+async function grepWithRg(
+ pattern: string,
+ searchPath: string,
+ glob: string | undefined,
+ contextLines: number,
+ maxResults: number,
+): Promise<{ matches: GrepMatch[]; usedRg: boolean }> {
+ try {
+ const args = [
+ "--no-heading",
+ "--line-number",
+ "--color=never",
+ "--max-count",
+ String(maxResults),
+ ];
+ if (contextLines > 0) {
+ args.push("-C", String(contextLines));
+ }
+ if (glob) {
+ args.push("--glob", glob);
+ }
+ args.push("--", pattern, searchPath);
+
+ const { stdout } = await execFileAsync("rg", args, {
+ timeout: 30_000,
+ maxBuffer: 10 * 1024 * 1024,
+ });
+
+ const matches: GrepMatch[] = [];
+ for (const line of stdout.split("\n")) {
+ if (!line.trim()) continue;
+ // Format: file:line:content or file-line-content (context)
+ const match = line.match(/^(.+?)[:-](\d+)[:-](.*)$/);
+ if (match) {
+ matches.push({
+ file: path.relative(searchPath, match[1]),
+ line: parseInt(match[2], 10),
+ content: match[3],
+ });
+ }
+ }
+
+ return { matches: matches.slice(0, maxResults), usedRg: true };
+ } catch (err) {
+ // rg not found or failed — return empty to trigger fallback
+ const error = err as { code?: string };
+ if (error.code === "ENOENT") {
+ return { matches: [], usedRg: false };
+ }
+ // rg found but no matches (exit code 1) or other error
+ if ((err as { status?: number }).status === 1) {
+ return { matches: [], usedRg: true };
+ }
+ return { matches: [], usedRg: false };
+ }
+}
+
+/**
+ * Built-in fallback grep using fs.readdir recursion.
+ */
+async function grepBuiltin(
+ pattern: string,
+ searchPath: string,
+ glob: string | undefined,
+ maxResults: number,
+): Promise {
+ const regex = new RegExp(pattern, "i");
+ const matches: GrepMatch[] = [];
+ const globRegex = glob ? new RegExp(glob.replace(/\*/g, ".*").replace(/\?/g, ".")) : undefined;
+
+ async function walk(dir: string): Promise {
+ if (matches.length >= maxResults) return;
+
+ let entries;
+ try {
+ entries = await fs.readdir(dir, { withFileTypes: true });
+ } catch {
+ return;
+ }
+
+ for (const entry of entries) {
+ if (matches.length >= maxResults) return;
+ const fullPath = path.join(dir, entry.name);
+
+ if (entry.isDirectory()) {
+ if (entry.name === "node_modules" || entry.name === ".git") continue;
+ await walk(fullPath);
+ } else if (entry.isFile()) {
+ const relPath = path.relative(searchPath, fullPath);
+ if (globRegex && !globRegex.test(relPath)) continue;
+
+ try {
+ const content = await fs.readFile(fullPath, "utf-8");
+ const lines = content.split("\n");
+ for (let i = 0; i < lines.length && matches.length < maxResults; i++) {
+ if (regex.test(lines[i])) {
+ matches.push({
+ file: relPath,
+ line: i + 1,
+ content: lines[i],
+ });
+ }
+ }
+ } catch {
+ // Skip unreadable files
+ }
+ }
+ }
+ }
+
+ await walk(searchPath);
+ return matches;
+}
+
+export { grepBuiltin };
+
+export function registerCodeGrep(api: MayrosPluginApi, cfg: CodeToolsConfig): void {
+ api.registerTool(
+ {
+ name: "code_grep",
+ label: "Search Code",
+ description:
+ "Search file contents using regex patterns. Uses ripgrep if available, otherwise falls back to built-in search. Respects .gitignore.",
+ parameters: Type.Object({
+ pattern: Type.String({ description: "Regex pattern to search for" }),
+ path: Type.Optional(
+ Type.String({ description: "Directory to search in (defaults to workspace root)" }),
+ ),
+ glob: Type.Optional(
+ Type.String({ description: 'File glob filter (e.g. "*.ts", "*.{ts,tsx}")' }),
+ ),
+ context: Type.Optional(
+ Type.Number({ description: "Lines of context around matches (default: 0)" }),
+ ),
+ max_results: Type.Optional(Type.Number({ description: "Maximum results (default: 50)" })),
+ }),
+ async execute(_toolCallId, params) {
+ const p = params as {
+ pattern?: string;
+ path?: string;
+ glob?: string;
+ context?: number;
+ max_results?: number;
+ };
+ if (typeof p.pattern !== "string" || !p.pattern.trim()) {
+ throw new ToolInputError("pattern required");
+ }
+
+ const searchPath = p.path?.trim()
+ ? resolveSafePath(p.path.trim(), cfg.workspaceRoot)
+ : cfg.workspaceRoot;
+
+ if (!isPathInside(searchPath, cfg.workspaceRoot) && searchPath !== cfg.workspaceRoot) {
+ throw new ToolInputError("path is outside workspace root");
+ }
+
+ const contextLines = typeof p.context === "number" ? Math.max(0, Math.trunc(p.context)) : 0;
+ const maxResults =
+ typeof p.max_results === "number"
+ ? Math.max(1, Math.min(Math.trunc(p.max_results), cfg.maxGrepResults))
+ : cfg.maxGrepResults;
+
+ // Try ripgrep first
+ let { matches, usedRg } = await grepWithRg(
+ p.pattern.trim(),
+ searchPath,
+ p.glob,
+ contextLines,
+ maxResults,
+ );
+
+ // Fallback to built-in if rg not available
+ if (!usedRg && matches.length === 0) {
+ matches = await grepBuiltin(p.pattern.trim(), searchPath, p.glob, maxResults);
+ }
+
+ if (matches.length === 0) {
+ return {
+ content: [{ type: "text" as const, text: "No matches found." }],
+ details: {
+ pattern: p.pattern.trim(),
+ matches: 0,
+ engine: usedRg ? "ripgrep" : "builtin",
+ },
+ };
+ }
+
+ const lines = matches.map((m) => `${m.file}:${m.line}: ${m.content}`);
+
+ return {
+ content: [{ type: "text" as const, text: lines.join("\n") }],
+ details: {
+ pattern: p.pattern.trim(),
+ matches: matches.length,
+ engine: usedRg ? "ripgrep" : "builtin",
+ },
+ };
+ },
+ },
+ { name: "code_grep" },
+ );
+}
diff --git a/extensions/code-tools/tools/code-ls.test.ts b/extensions/code-tools/tools/code-ls.test.ts
new file mode 100644
index 00000000..dffd3276
--- /dev/null
+++ b/extensions/code-tools/tools/code-ls.test.ts
@@ -0,0 +1,115 @@
+import { describe, it, expect, beforeEach, afterEach } from "vitest";
+import fs from "node:fs/promises";
+import path from "node:path";
+import os from "node:os";
+import { listDirectory } from "./code-ls.js";
+import type { LsEntry } from "./code-ls.js";
+
+describe("listDirectory", () => {
+ let tmpDir: string;
+
+ beforeEach(async () => {
+ tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "code-ls-test-"));
+ });
+
+ afterEach(async () => {
+ await fs.rm(tmpDir, { recursive: true, force: true });
+ });
+
+ it("sorts directories before files", async () => {
+ await fs.writeFile(path.join(tmpDir, "aaa-file.txt"), "hello");
+ await fs.mkdir(path.join(tmpDir, "zzz-dir"));
+
+ const entries = await listDirectory(tmpDir);
+ expect(entries).toHaveLength(2);
+ expect(entries[0].name).toBe("zzz-dir");
+ expect(entries[0].type).toBe("directory");
+ expect(entries[1].name).toBe("aaa-file.txt");
+ expect(entries[1].type).toBe("file");
+ });
+
+ it("detects symlinks", async () => {
+ await fs.writeFile(path.join(tmpDir, "target.txt"), "content");
+ await fs.symlink(path.join(tmpDir, "target.txt"), path.join(tmpDir, "link.txt"));
+
+ const entries = await listDirectory(tmpDir);
+ const link = entries.find((e) => e.name === "link.txt");
+ expect(link).toBeDefined();
+ expect(link!.type).toBe("symlink");
+ });
+
+ it("includes file sizes for regular files", async () => {
+ const content = "hello world";
+ await fs.writeFile(path.join(tmpDir, "sized.txt"), content);
+
+ const entries = await listDirectory(tmpDir);
+ expect(entries).toHaveLength(1);
+ expect(entries[0].size).toBe(Buffer.byteLength(content));
+ });
+
+ it("does not include sizes for directories", async () => {
+ await fs.mkdir(path.join(tmpDir, "subdir"));
+
+ const entries = await listDirectory(tmpDir);
+ expect(entries).toHaveLength(1);
+ expect(entries[0].type).toBe("directory");
+ expect(entries[0].size).toBeUndefined();
+ });
+
+ it("sorts alphabetically within groups", async () => {
+ await fs.mkdir(path.join(tmpDir, "beta"));
+ await fs.mkdir(path.join(tmpDir, "alpha"));
+ await fs.writeFile(path.join(tmpDir, "zebra.ts"), "");
+ await fs.writeFile(path.join(tmpDir, "aardvark.ts"), "");
+
+ const entries = await listDirectory(tmpDir);
+ // Directories first, alphabetical
+ expect(entries[0].name).toBe("alpha");
+ expect(entries[1].name).toBe("beta");
+ // Files next, alphabetical
+ expect(entries[2].name).toBe("aardvark.ts");
+ expect(entries[3].name).toBe("zebra.ts");
+ });
+
+ it("returns empty array for empty directory", async () => {
+ const entries = await listDirectory(tmpDir);
+ expect(entries).toHaveLength(0);
+ });
+
+ it("throws on non-existent directory", async () => {
+ const badPath = path.join(tmpDir, "does-not-exist");
+ await expect(listDirectory(badPath)).rejects.toThrow();
+ });
+
+ it("includes hidden files", async () => {
+ await fs.writeFile(path.join(tmpDir, ".hidden"), "secret");
+ await fs.writeFile(path.join(tmpDir, "visible.txt"), "public");
+
+ const entries = await listDirectory(tmpDir);
+ const names = entries.map((e: LsEntry) => e.name);
+ expect(names).toContain(".hidden");
+ expect(names).toContain("visible.txt");
+ });
+
+ it("handles mixed entry types correctly", async () => {
+ await fs.mkdir(path.join(tmpDir, "dir1"));
+ await fs.writeFile(path.join(tmpDir, "file1.txt"), "data");
+ await fs.writeFile(path.join(tmpDir, "target"), "target-data");
+ await fs.symlink(path.join(tmpDir, "target"), path.join(tmpDir, "link1"));
+
+ const entries = await listDirectory(tmpDir);
+ const types = entries.map((e: LsEntry) => e.type);
+ expect(types).toContain("directory");
+ expect(types).toContain("file");
+ expect(types).toContain("symlink");
+ });
+
+ it("reports correct size for files with unicode content", async () => {
+ const unicodeContent = "Hello \u{1F30D}"; // emoji takes multiple bytes
+ await fs.writeFile(path.join(tmpDir, "unicode.txt"), unicodeContent, "utf-8");
+
+ const entries = await listDirectory(tmpDir);
+ expect(entries).toHaveLength(1);
+ expect(entries[0].size).toBe(Buffer.byteLength(unicodeContent, "utf-8"));
+ });
+});
diff --git a/extensions/code-tools/tools/code-ls.ts b/extensions/code-tools/tools/code-ls.ts
new file mode 100644
index 00000000..61bd8704
--- /dev/null
+++ b/extensions/code-tools/tools/code-ls.ts
@@ -0,0 +1,132 @@
+/**
+ * code_ls tool — List files and directories.
+ *
+ * Returns entries sorted: directories first, then files, alphabetical within groups.
+ */
+
+import fs from "node:fs/promises";
+import path from "node:path";
+import { Type } from "@sinclair/typebox";
+import type { MayrosPluginApi } from "mayros/plugin-sdk";
+import { ToolInputError } from "../../../src/agents/tools/common.js";
+import type { CodeToolsConfig } from "../config.js";
+import { resolveSafePath } from "../path-utils.js";
+
+export type LsEntry = {
+ name: string;
+ type: "file" | "directory" | "symlink";
+ size?: number;
+};
+
+/**
+ * Core listing logic extracted for testability.
+ * Lists entries in a directory, sorted: directories first, then files, alphabetical.
+ */
+export async function listDirectory(dirPath: string): Promise {
+ const dirents = await fs.readdir(dirPath, { withFileTypes: true });
+
+ const entries: LsEntry[] = [];
+ for (const d of dirents) {
+ const entryType = d.isSymbolicLink()
+ ? ("symlink" as const)
+ : d.isDirectory()
+ ? ("directory" as const)
+ : ("file" as const);
+
+ const entry: LsEntry = { name: d.name, type: entryType };
+
+ if (entryType === "file") {
+ try {
+ const stat = await fs.stat(path.join(dirPath, d.name));
+ entry.size = stat.size;
+ } catch {
+ // size unavailable
+ }
+ }
+
+ entries.push(entry);
+ }
+
+ // Sort: directories first, then files, alphabetical within each group
+ entries.sort((a, b) => {
+ if (a.type === "directory" && b.type !== "directory") return -1;
+ if (a.type !== "directory" && b.type === "directory") return 1;
+ return a.name.localeCompare(b.name);
+ });
+
+ return entries;
+}
+
+export function registerCodeLs(api: MayrosPluginApi, cfg: CodeToolsConfig): void {
+ api.registerTool(
+ {
+ name: "code_ls",
+ label: "List Directory",
+ description:
+ "List files and directories. Returns entries sorted: directories first, then files, alphabetical.",
+ parameters: Type.Object({
+ path: Type.Optional(
+ Type.String({ description: "Directory path (defaults to workspace root)" }),
+ ),
+ }),
+ async execute(_toolCallId, params) {
+ const rawPath = (params as Record).path;
+ const dirPath =
+ typeof rawPath === "string" && rawPath.trim()
+ ? resolveSafePath(rawPath.trim(), cfg.workspaceRoot)
+ : cfg.workspaceRoot;
+
+ let dirents;
+ try {
+ dirents = await fs.readdir(dirPath, { withFileTypes: true });
+ } catch {
+ throw new ToolInputError(`Cannot read directory: ${rawPath ?? "."}`);
+ }
+
+ const entries: LsEntry[] = [];
+ for (const d of dirents) {
+ const entryType = d.isSymbolicLink()
+ ? ("symlink" as const)
+ : d.isDirectory()
+ ? ("directory" as const)
+ : ("file" as const);
+
+ const entry: LsEntry = { name: d.name, type: entryType };
+
+ if (entryType === "file") {
+ try {
+ const stat = await fs.stat(path.join(dirPath, d.name));
+ entry.size = stat.size;
+ } catch {
+ // size unavailable
+ }
+ }
+
+ entries.push(entry);
+ }
+
+ // Sort: directories first, then files, alphabetical within each group
+ entries.sort((a, b) => {
+ if (a.type === "directory" && b.type !== "directory") return -1;
+ if (a.type !== "directory" && b.type === "directory") return 1;
+ return a.name.localeCompare(b.name);
+ });
+
+ const lines = entries.map((e) => {
+ const suffix = e.type === "directory" ? "/" : e.type === "symlink" ? " @" : "";
+ const sizeStr = e.size !== undefined ? ` (${e.size} bytes)` : "";
+ return `${e.name}${suffix}${sizeStr}`;
+ });
+
+ return {
+ content: [{ type: "text" as const, text: lines.join("\n") || "(empty directory)" }],
+ details: {
+ path: rawPath ?? ".",
+ entries: entries.length,
+ },
+ };
+ },
+ },
+ { name: "code_ls" },
+ );
+}
diff --git a/extensions/code-tools/tools/code-multi-edit.test.ts b/extensions/code-tools/tools/code-multi-edit.test.ts
new file mode 100644
index 00000000..ddcb9945
--- /dev/null
+++ b/extensions/code-tools/tools/code-multi-edit.test.ts
@@ -0,0 +1,175 @@
+import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
+import { mkdtempSync, writeFileSync, readFileSync, mkdirSync, rmSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+vi.mock("../../../src/agents/tools/common.js", () => ({
+ ToolInputError: class ToolInputError extends Error {
+ constructor(msg: string) {
+ super(msg);
+ this.name = "ToolInputError";
+ }
+ },
+}));
+
+describe("code_multi_edit", () => {
+ let executeFn: (
+ id: string,
+ params: Record,
+ ) => Promise<{
+ content: Array<{ type: string; text: string }>;
+ details: Record;
+ }>;
+ let workspace: string;
+
+ beforeEach(async () => {
+ workspace = mkdtempSync(join(tmpdir(), "multi-edit-test-"));
+ vi.resetModules();
+ const mockApi = {
+ registerTool: vi.fn((toolDef: { execute: typeof executeFn }) => {
+ executeFn = toolDef.execute;
+ }),
+ logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn() },
+ };
+ const cfg = { workspaceRoot: workspace, shellEnabled: true, shellTimeout: 120000 };
+ const { registerCodeMultiEdit } = await import("./code-multi-edit.js");
+ registerCodeMultiEdit(mockApi as never, cfg as never);
+ });
+
+ afterEach(() => {
+ try {
+ rmSync(workspace, { recursive: true, force: true });
+ } catch {
+ /* ignore */
+ }
+ });
+
+ it("rejects empty edits array", async () => {
+ await expect(executeFn("t1", { edits: [] })).rejects.toThrow("edits array required");
+ await expect(executeFn("t2", {})).rejects.toThrow("edits array required");
+ });
+
+ it("applies a single edit", async () => {
+ writeFileSync(join(workspace, "a.ts"), "const x = 1;\nconst y = 2;\n");
+ const result = await executeFn("t3", {
+ edits: [{ path: "a.ts", old_string: "const x = 1;", new_string: "const x = 42;" }],
+ });
+ expect(result.details.totalReplacements).toBe(1);
+ expect(readFileSync(join(workspace, "a.ts"), "utf-8")).toContain("const x = 42;");
+ });
+
+ it("applies multiple edits across files", async () => {
+ writeFileSync(join(workspace, "a.ts"), "hello world");
+ writeFileSync(join(workspace, "b.ts"), "foo bar");
+ const result = await executeFn("t4", {
+ edits: [
+ { path: "a.ts", old_string: "hello", new_string: "goodbye" },
+ { path: "b.ts", old_string: "foo", new_string: "baz" },
+ ],
+ });
+ expect(result.details.totalFiles).toBe(2);
+ expect(result.details.totalReplacements).toBe(2);
+ expect(readFileSync(join(workspace, "a.ts"), "utf-8")).toBe("goodbye world");
+ expect(readFileSync(join(workspace, "b.ts"), "utf-8")).toBe("baz bar");
+ });
+
+ it("is atomic — no changes on validation failure", async () => {
+ writeFileSync(join(workspace, "a.ts"), "hello world");
+ const result = await executeFn("t5", {
+ edits: [
+ { path: "a.ts", old_string: "hello", new_string: "goodbye" },
+ { path: "a.ts", old_string: "NONEXISTENT", new_string: "fail" },
+ ],
+ });
+ expect(result.content[0].text).toContain("Validation failed");
+ // File should be unchanged
+ expect(readFileSync(join(workspace, "a.ts"), "utf-8")).toBe("hello world");
+ });
+
+ it("rejects non-unique old_string without replace_all", async () => {
+ writeFileSync(join(workspace, "a.ts"), "aaa bbb aaa");
+ const result = await executeFn("t6", {
+ edits: [{ path: "a.ts", old_string: "aaa", new_string: "ccc" }],
+ });
+ expect(result.content[0].text).toContain("not unique");
+ });
+
+ it("handles replace_all correctly", async () => {
+ writeFileSync(join(workspace, "a.ts"), "aaa bbb aaa");
+ const result = await executeFn("t7", {
+ edits: [{ path: "a.ts", old_string: "aaa", new_string: "ccc", replace_all: true }],
+ });
+ expect(result.details.totalReplacements).toBe(2);
+ expect(readFileSync(join(workspace, "a.ts"), "utf-8")).toBe("ccc bbb ccc");
+ });
+
+ it("rejects path outside workspace", async () => {
+ const result = await executeFn("t8", {
+ edits: [{ path: "../../etc/passwd", old_string: "root", new_string: "hacked" }],
+ });
+ expect(result.content[0].text).toContain("path outside workspace");
+ });
+
+ it("rejects identical old_string and new_string", async () => {
+ writeFileSync(join(workspace, "a.ts"), "hello");
+ const result = await executeFn("t9", {
+ edits: [{ path: "a.ts", old_string: "hello", new_string: "hello" }],
+ });
+ expect(result.content[0].text).toContain("identical");
+ });
+
+ it("rejects missing file", async () => {
+ const result = await executeFn("t10", {
+ edits: [{ path: "nonexistent.ts", old_string: "a", new_string: "b" }],
+ });
+ expect(result.content[0].text).toContain("cannot read file");
+ });
+
+ it("handles multiple edits in the same file", async () => {
+ writeFileSync(join(workspace, "a.ts"), "const a = 1;\nconst b = 2;\nconst c = 3;\n");
+ const result = await executeFn("t11", {
+ edits: [
+ { path: "a.ts", old_string: "const a = 1;", new_string: "const a = 10;" },
+ { path: "a.ts", old_string: "const b = 2;", new_string: "const b = 20;" },
+ ],
+ });
+ expect(result.details.totalReplacements).toBe(2);
+ const content = readFileSync(join(workspace, "a.ts"), "utf-8");
+ expect(content).toContain("const a = 10;");
+ expect(content).toContain("const b = 20;");
+ expect(content).toContain("const c = 3;");
+ });
+
+ it("rejects more than 50 edits", async () => {
+ const edits = Array.from({ length: 51 }, (_, i) => ({
+ path: "a.ts",
+ old_string: `old${i}`,
+ new_string: `new${i}`,
+ }));
+ await expect(executeFn("t12", { edits })).rejects.toThrow("Maximum 50 edits");
+ });
+
+ it("shows diff snippets in results", async () => {
+ writeFileSync(join(workspace, "a.ts"), "const old = true;");
+ const result = await executeFn("t13", {
+ edits: [
+ { path: "a.ts", old_string: "const old = true;", new_string: "const updated = false;" },
+ ],
+ });
+ const text = result.content[0].text;
+ expect(text).toContain("- const old = true;");
+ expect(text).toContain("+ const updated = false;");
+ });
+
+ it("handles subdirectory paths", async () => {
+ mkdirSync(join(workspace, "src"), { recursive: true });
+ writeFileSync(join(workspace, "src/main.ts"), "export default 1;");
+ const result = await executeFn("t14", {
+ edits: [
+ { path: "src/main.ts", old_string: "export default 1;", new_string: "export default 2;" },
+ ],
+ });
+ expect(result.details.totalReplacements).toBe(1);
+ expect(readFileSync(join(workspace, "src/main.ts"), "utf-8")).toBe("export default 2;");
+ });
+});
diff --git a/extensions/code-tools/tools/code-multi-edit.ts b/extensions/code-tools/tools/code-multi-edit.ts
new file mode 100644
index 00000000..bd7aec2c
--- /dev/null
+++ b/extensions/code-tools/tools/code-multi-edit.ts
@@ -0,0 +1,240 @@
+/**
+ * code_multi_edit tool — Atomic batch file editing.
+ *
+ * Validates all edits before applying any. If any validation fails,
+ * no changes are made (atomic semantics).
+ */
+
+import { readFileSync, writeFileSync } from "node:fs";
+import { Type } from "@sinclair/typebox";
+import type { MayrosPluginApi } from "mayros/plugin-sdk";
+import { ToolInputError } from "../../../src/agents/tools/common.js";
+import type { CodeToolsConfig } from "../config.js";
+import { resolveSafePath } from "../path-utils.js";
+import { parseDiffStats } from "../../../src/tui/diff-renderer.js";
+
+type EditOp = {
+ path: string;
+ old_string: string;
+ new_string: string;
+ replace_all?: boolean;
+};
+
+type EditResult = {
+ path: string;
+ replacements: number;
+ diff: string;
+};
+
+function buildDiffSnippet(oldStr: string, newStr: string, contextLines: number = 2): string {
+ const oldLines = oldStr.split("\n");
+ const newLines = newStr.split("\n");
+ const parts: string[] = [];
+ // Show a compact diff with context
+ const maxShow = Math.min(oldLines.length, contextLines + 1);
+ for (let i = 0; i < maxShow; i++) {
+ parts.push(`- ${oldLines[i]}`);
+ }
+ if (oldLines.length > maxShow) {
+ parts.push(` ... (${oldLines.length - maxShow} more lines)`);
+ }
+ const maxShowNew = Math.min(newLines.length, contextLines + 1);
+ for (let i = 0; i < maxShowNew; i++) {
+ parts.push(`+ ${newLines[i]}`);
+ }
+ if (newLines.length > maxShowNew) {
+ parts.push(` ... (${newLines.length - maxShowNew} more lines)`);
+ }
+ return parts.join("\n");
+}
+
+export function registerCodeMultiEdit(api: MayrosPluginApi, cfg: CodeToolsConfig): void {
+ api.registerTool(
+ {
+ name: "code_multi_edit",
+ label: "Multi Edit",
+ description:
+ "Apply multiple file edits atomically. All edits are validated first — if any fails, no changes are applied. Each edit replaces old_string with new_string in the specified file.",
+ parameters: Type.Object({
+ edits: Type.Array(
+ Type.Object({
+ path: Type.String({ description: "File path (relative to workspace)" }),
+ old_string: Type.String({ description: "Text to find and replace" }),
+ new_string: Type.String({ description: "Replacement text" }),
+ replace_all: Type.Optional(
+ Type.Boolean({ description: "Replace all occurrences (default: false)" }),
+ ),
+ }),
+ { description: "Array of edit operations" },
+ ),
+ }),
+ async execute(_toolCallId, params) {
+ const p = params as { edits?: EditOp[] };
+ if (!Array.isArray(p.edits) || p.edits.length === 0) {
+ throw new ToolInputError("edits array required and must not be empty");
+ }
+
+ if (p.edits.length > 50) {
+ throw new ToolInputError("Maximum 50 edits per call");
+ }
+
+ // Phase 1: Validate all edits
+ const fileContents = new Map();
+ const resolvedEdits: Array<{
+ resolvedPath: string;
+ old_string: string;
+ new_string: string;
+ replace_all: boolean;
+ }> = [];
+ const errors: string[] = [];
+
+ for (let i = 0; i < p.edits.length; i++) {
+ const edit = p.edits[i];
+ if (typeof edit.path !== "string" || !edit.path.trim()) {
+ errors.push(`edit[${i}]: path required`);
+ continue;
+ }
+ if (typeof edit.old_string !== "string") {
+ errors.push(`edit[${i}]: old_string required`);
+ continue;
+ }
+ if (typeof edit.new_string !== "string") {
+ errors.push(`edit[${i}]: new_string required`);
+ continue;
+ }
+ if (edit.old_string === edit.new_string) {
+ errors.push(`edit[${i}]: old_string and new_string are identical`);
+ continue;
+ }
+
+ let resolvedPath: string;
+ try {
+ resolvedPath = resolveSafePath(edit.path, cfg.workspaceRoot);
+ } catch {
+ errors.push(`edit[${i}]: path outside workspace`);
+ continue;
+ }
+
+ // Read file if not already read
+ if (!fileContents.has(resolvedPath)) {
+ try {
+ fileContents.set(resolvedPath, readFileSync(resolvedPath, "utf-8"));
+ } catch (err) {
+ errors.push(`edit[${i}]: cannot read file — ${(err as Error).message}`);
+ continue;
+ }
+ }
+
+ const content = fileContents.get(resolvedPath)!;
+ const replaceAll = edit.replace_all === true;
+
+ if (!replaceAll) {
+ // Check uniqueness: old_string should appear exactly once
+ const firstIdx = content.indexOf(edit.old_string);
+ if (firstIdx === -1) {
+ errors.push(`edit[${i}]: old_string not found in ${edit.path}`);
+ continue;
+ }
+ const secondIdx = content.indexOf(edit.old_string, firstIdx + 1);
+ if (secondIdx !== -1) {
+ errors.push(
+ `edit[${i}]: old_string is not unique in ${edit.path} (found multiple occurrences). Use replace_all: true or provide more context.`,
+ );
+ continue;
+ }
+ } else {
+ if (!content.includes(edit.old_string)) {
+ errors.push(`edit[${i}]: old_string not found in ${edit.path}`);
+ continue;
+ }
+ }
+
+ resolvedEdits.push({
+ resolvedPath,
+ old_string: edit.old_string,
+ new_string: edit.new_string,
+ replace_all: replaceAll,
+ });
+ }
+
+ if (errors.length > 0) {
+ return {
+ content: [
+ {
+ type: "text" as const,
+ text: `Validation failed — no changes applied:\n${errors.map((e) => ` • ${e}`).join("\n")}`,
+ },
+ ],
+ details: { errors },
+ };
+ }
+
+ // Phase 2: Apply all edits (grouped by file)
+ const results: EditResult[] = [];
+ const editsByFile = new Map();
+ for (const edit of resolvedEdits) {
+ const existing = editsByFile.get(edit.resolvedPath) ?? [];
+ existing.push(edit);
+ editsByFile.set(edit.resolvedPath, existing);
+ }
+
+ for (const [filePath, edits] of editsByFile) {
+ let content = fileContents.get(filePath)!;
+ let totalReplacements = 0;
+ const diffs: string[] = [];
+
+ for (const edit of edits) {
+ if (edit.replace_all) {
+ const count = content.split(edit.old_string).length - 1;
+ content = content.split(edit.old_string).join(edit.new_string);
+ totalReplacements += count;
+ diffs.push(buildDiffSnippet(edit.old_string, edit.new_string));
+ } else {
+ content = content.replace(edit.old_string, edit.new_string);
+ totalReplacements += 1;
+ diffs.push(buildDiffSnippet(edit.old_string, edit.new_string));
+ }
+ }
+
+ writeFileSync(filePath, content, "utf-8");
+ // Use the original relative path from the first edit for this file
+ const relPath =
+ p.edits.find((e) => {
+ try {
+ return resolveSafePath(e.path, cfg.workspaceRoot) === filePath;
+ } catch {
+ return false;
+ }
+ })?.path ?? filePath;
+
+ results.push({
+ path: relPath,
+ replacements: totalReplacements,
+ diff: diffs.join("\n---\n"),
+ });
+ }
+
+ const totalFiles = results.length;
+ const totalReplacements = results.reduce((sum, r) => sum + r.replacements, 0);
+
+ const text = results
+ .map((r) => `${r.path}: ${r.replacements} replacement(s)\n${r.diff}`)
+ .join("\n\n");
+
+ const allDiffs = results.map((r) => r.diff).join("\n");
+ const aggregateStats = parseDiffStats(allDiffs);
+
+ return {
+ content: [
+ {
+ type: "text" as const,
+ text: `Applied ${totalReplacements} edit(s) across ${totalFiles} file(s).\n\n${text}`,
+ },
+ ],
+ details: { totalFiles, totalReplacements, results, diffStats: aggregateStats },
+ };
+ },
+ },
+ { name: "code_multi_edit" },
+ );
+}
diff --git a/extensions/code-tools/tools/code-notebook.test.ts b/extensions/code-tools/tools/code-notebook.test.ts
new file mode 100644
index 00000000..8b8e7722
--- /dev/null
+++ b/extensions/code-tools/tools/code-notebook.test.ts
@@ -0,0 +1,129 @@
+import { describe, it, expect, beforeEach, afterEach } from "vitest";
+import fs from "node:fs/promises";
+import path from "node:path";
+import os from "node:os";
+
+const SAMPLE_NOTEBOOK = {
+ cells: [
+ {
+ cell_type: "markdown",
+ source: ["# Hello Notebook\n", "This is a test."],
+ metadata: {},
+ },
+ {
+ cell_type: "code",
+ source: ["print('hello')\n"],
+ outputs: [{ output_type: "stream", name: "stdout", text: ["hello\n"] }],
+ execution_count: 1,
+ metadata: {},
+ },
+ {
+ cell_type: "code",
+ source: ["1 + 1"],
+ outputs: [
+ {
+ output_type: "execute_result",
+ data: { "text/plain": ["2"] },
+ metadata: {},
+ execution_count: 2,
+ },
+ ],
+ execution_count: 2,
+ metadata: {},
+ },
+ ],
+ metadata: {
+ kernelspec: { display_name: "Python 3", language: "python", name: "python3" },
+ },
+ nbformat: 4,
+ nbformat_minor: 5,
+};
+
+describe("code_notebook", () => {
+ let tmpDir: string;
+
+ beforeEach(async () => {
+ tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "notebook-test-"));
+ });
+
+ afterEach(async () => {
+ await fs.rm(tmpDir, { recursive: true, force: true });
+ });
+
+ describe("reading", () => {
+ it("parses notebook structure", async () => {
+ const nbPath = path.join(tmpDir, "test.ipynb");
+ await fs.writeFile(nbPath, JSON.stringify(SAMPLE_NOTEBOOK));
+ const content = await fs.readFile(nbPath, "utf-8");
+ const nb = JSON.parse(content);
+ expect(nb.cells).toHaveLength(3);
+ expect(nb.cells[0].cell_type).toBe("markdown");
+ });
+
+ it("formats code cell with output", () => {
+ const cell = SAMPLE_NOTEBOOK.cells[1];
+ const source = cell.source.join("");
+ expect(source).toContain("print");
+ const output = (cell.outputs[0] as { text: string[] }).text.join("");
+ expect(output).toContain("hello");
+ });
+
+ it("formats execute_result output", () => {
+ const cell = SAMPLE_NOTEBOOK.cells[2];
+ const data = (cell.outputs[0] as { data: Record }).data;
+ expect(data["text/plain"][0]).toBe("2");
+ });
+
+ it("reads specific cell by index", () => {
+ const cell = SAMPLE_NOTEBOOK.cells[0];
+ expect(cell.cell_type).toBe("markdown");
+ expect(cell.source.join("")).toContain("Hello Notebook");
+ });
+ });
+
+ describe("editing", () => {
+ it("replaces cell source", async () => {
+ const nbPath = path.join(tmpDir, "edit.ipynb");
+ await fs.writeFile(nbPath, JSON.stringify(SAMPLE_NOTEBOOK));
+ const raw = await fs.readFile(nbPath, "utf-8");
+ const nb = JSON.parse(raw);
+ nb.cells[1].source = ["print('updated')\n"];
+ nb.cells[1].outputs = [];
+ nb.cells[1].execution_count = null;
+ await fs.writeFile(nbPath, JSON.stringify(nb, null, 1));
+ const updated = JSON.parse(await fs.readFile(nbPath, "utf-8"));
+ expect(updated.cells[1].source[0]).toContain("updated");
+ });
+
+ it("inserts a new cell", async () => {
+ const nbPath = path.join(tmpDir, "insert.ipynb");
+ await fs.writeFile(nbPath, JSON.stringify(SAMPLE_NOTEBOOK));
+ const raw = await fs.readFile(nbPath, "utf-8");
+ const nb = JSON.parse(raw);
+ const newCell = {
+ cell_type: "code",
+ source: ["x = 42\n"],
+ outputs: [],
+ execution_count: null,
+ metadata: {},
+ };
+ nb.cells.splice(1, 0, newCell);
+ await fs.writeFile(nbPath, JSON.stringify(nb, null, 1));
+ const updated = JSON.parse(await fs.readFile(nbPath, "utf-8"));
+ expect(updated.cells).toHaveLength(4);
+ expect(updated.cells[1].source[0]).toContain("42");
+ });
+
+ it("deletes a cell", async () => {
+ const nbPath = path.join(tmpDir, "delete.ipynb");
+ await fs.writeFile(nbPath, JSON.stringify(SAMPLE_NOTEBOOK));
+ const raw = await fs.readFile(nbPath, "utf-8");
+ const nb = JSON.parse(raw);
+ nb.cells.splice(0, 1); // Remove first cell
+ await fs.writeFile(nbPath, JSON.stringify(nb, null, 1));
+ const updated = JSON.parse(await fs.readFile(nbPath, "utf-8"));
+ expect(updated.cells).toHaveLength(2);
+ expect(updated.cells[0].cell_type).toBe("code");
+ });
+ });
+});
diff --git a/extensions/code-tools/tools/code-notebook.ts b/extensions/code-tools/tools/code-notebook.ts
new file mode 100644
index 00000000..a434587d
--- /dev/null
+++ b/extensions/code-tools/tools/code-notebook.ts
@@ -0,0 +1,251 @@
+/**
+ * Jupyter Notebook Tools
+ *
+ * Reads and edits .ipynb files at the cell level.
+ */
+
+import fs from "node:fs/promises";
+import { Type } from "@sinclair/typebox";
+import type { MayrosPluginApi } from "mayros/plugin-sdk";
+import { ToolInputError, jsonResult } from "../../../src/agents/tools/common.js";
+import type { CodeToolsConfig } from "../config.js";
+import { resolveSafePath } from "../path-utils.js";
+
+type NotebookCell = {
+ cell_type: "code" | "markdown" | "raw";
+ source: string[];
+ outputs?: unknown[];
+ metadata?: Record;
+ execution_count?: number | null;
+ id?: string;
+};
+
+type NotebookJson = {
+ cells: NotebookCell[];
+ metadata?: Record;
+ nbformat: number;
+ nbformat_minor: number;
+};
+
+function parseNotebook(raw: string): NotebookJson {
+ let parsed: NotebookJson;
+ try {
+ parsed = JSON.parse(raw) as NotebookJson;
+ } catch (err) {
+ throw new ToolInputError(
+ `Invalid notebook JSON: ${err instanceof SyntaxError ? err.message : "parse error"}`,
+ );
+ }
+ if (!parsed.cells || !Array.isArray(parsed.cells)) {
+ throw new ToolInputError("Invalid notebook: missing cells array");
+ }
+ if (typeof parsed.nbformat !== "number") {
+ throw new ToolInputError("Invalid notebook: missing nbformat");
+ }
+ return parsed;
+}
+
+function formatCell(cell: NotebookCell, index: number): string {
+ const source = Array.isArray(cell.source) ? cell.source.join("") : String(cell.source);
+ const header = `[${index}] ${cell.cell_type}`;
+
+ const parts = [header, source];
+
+ // Include text outputs for code cells
+ if (cell.cell_type === "code" && cell.outputs && Array.isArray(cell.outputs)) {
+ for (const output of cell.outputs) {
+ const out = output as Record;
+ if (out.output_type === "stream" && out.text) {
+ const text = Array.isArray(out.text) ? out.text.join("") : String(out.text);
+ parts.push(`[output] ${text}`);
+ } else if (out.output_type === "execute_result" && out.data) {
+ const data = out.data as Record;
+ if (data["text/plain"]) {
+ const text = Array.isArray(data["text/plain"])
+ ? (data["text/plain"] as string[]).join("")
+ : String(data["text/plain"]);
+ parts.push(`[result] ${text}`);
+ }
+ } else if (out.output_type === "error") {
+ const ename = String(out.ename ?? "Error");
+ const evalue = String(out.evalue ?? "");
+ parts.push(`[error] ${ename}: ${evalue}`);
+ }
+ }
+ }
+
+ return parts.join("\n");
+}
+
+export function registerCodeNotebook(api: MayrosPluginApi, cfg: CodeToolsConfig): void {
+ // code_notebook_read
+ api.registerTool(
+ {
+ name: "code_notebook_read",
+ label: "Read Notebook",
+ description:
+ "Read a Jupyter notebook (.ipynb) file. Returns all cells with their outputs, combining code, text, and visualizations.",
+ parameters: Type.Object({
+ path: Type.String({ description: "Path to .ipynb file" }),
+ cell: Type.Optional(
+ Type.Number({ description: "Specific cell number to read (0-indexed)" }),
+ ),
+ }),
+ async execute(_toolCallId, params) {
+ const p = params as { path?: string; cell?: number };
+ if (typeof p.path !== "string" || !p.path.trim()) {
+ throw new ToolInputError("path required");
+ }
+
+ const filePath = resolveSafePath(p.path.trim(), cfg.workspaceRoot);
+
+ let raw: string;
+ try {
+ raw = await fs.readFile(filePath, "utf-8");
+ } catch {
+ throw new ToolInputError(`File not found: ${p.path}`);
+ }
+
+ const notebook = parseNotebook(raw);
+ const cells = notebook.cells;
+
+ if (typeof p.cell === "number") {
+ const idx = Math.trunc(p.cell);
+ if (idx < 0 || idx >= cells.length) {
+ throw new ToolInputError(
+ `Cell ${idx} out of range (notebook has ${cells.length} cells, 0-${cells.length - 1})`,
+ );
+ }
+ return {
+ content: [{ type: "text" as const, text: formatCell(cells[idx], idx) }],
+ details: {
+ path: p.path.trim(),
+ cellIndex: idx,
+ cellType: cells[idx].cell_type,
+ totalCells: cells.length,
+ },
+ };
+ }
+
+ // Return all cells
+ const formatted = cells.map((cell, i) => formatCell(cell, i));
+ const text = formatted.join("\n\n---\n\n");
+
+ return {
+ content: [{ type: "text" as const, text }],
+ details: {
+ path: p.path.trim(),
+ totalCells: cells.length,
+ cellTypes: {
+ code: cells.filter((c) => c.cell_type === "code").length,
+ markdown: cells.filter((c) => c.cell_type === "markdown").length,
+ raw: cells.filter((c) => c.cell_type === "raw").length,
+ },
+ },
+ };
+ },
+ },
+ { name: "code_notebook_read" },
+ );
+
+ // code_notebook_edit
+ api.registerTool(
+ {
+ name: "code_notebook_edit",
+ label: "Edit Notebook",
+ description:
+ "Edit a Jupyter notebook at the cell level. Can replace, insert, or delete cells.",
+ parameters: Type.Object({
+ path: Type.String({ description: "Path to .ipynb file" }),
+ cell: Type.Number({ description: "Cell number (0-indexed)" }),
+ action: Type.Optional(
+ Type.String({
+ description: 'Action: "replace" (default), "insert", or "delete"',
+ }),
+ ),
+ source: Type.Optional(Type.String({ description: "New cell source content" })),
+ cell_type: Type.Optional(
+ Type.String({ description: 'Cell type: "code", "markdown", or "raw"' }),
+ ),
+ }),
+ async execute(_toolCallId, params) {
+ const p = params as {
+ path?: string;
+ cell?: number;
+ action?: string;
+ source?: string;
+ cell_type?: string;
+ };
+ if (typeof p.path !== "string" || !p.path.trim()) {
+ throw new ToolInputError("path required");
+ }
+ if (typeof p.cell !== "number") {
+ throw new ToolInputError("cell number required");
+ }
+
+ const filePath = resolveSafePath(p.path.trim(), cfg.workspaceRoot);
+ const action = p.action ?? "replace";
+ const cellIdx = Math.trunc(p.cell);
+
+ let raw: string;
+ try {
+ raw = await fs.readFile(filePath, "utf-8");
+ } catch {
+ throw new ToolInputError(`File not found: ${p.path}`);
+ }
+
+ const notebook = parseNotebook(raw);
+
+ if (action === "delete") {
+ if (cellIdx < 0 || cellIdx >= notebook.cells.length) {
+ throw new ToolInputError(`Cell ${cellIdx} out of range`);
+ }
+ notebook.cells.splice(cellIdx, 1);
+ } else if (action === "insert") {
+ if (typeof p.source !== "string") {
+ throw new ToolInputError("source required for insert");
+ }
+ const cellType = (p.cell_type as "code" | "markdown" | "raw") ?? "code";
+ const newCell: NotebookCell = {
+ cell_type: cellType,
+ source: p.source.split("\n").map((l, i, arr) => (i < arr.length - 1 ? l + "\n" : l)),
+ metadata: {},
+ ...(cellType === "code" ? { outputs: [], execution_count: null } : {}),
+ };
+ const insertIdx = Math.min(cellIdx, notebook.cells.length);
+ notebook.cells.splice(insertIdx, 0, newCell);
+ } else {
+ // replace
+ if (cellIdx < 0 || cellIdx >= notebook.cells.length) {
+ throw new ToolInputError(`Cell ${cellIdx} out of range`);
+ }
+ if (typeof p.source !== "string") {
+ throw new ToolInputError("source required for replace");
+ }
+ const cell = notebook.cells[cellIdx];
+ cell.source = p.source
+ .split("\n")
+ .map((l, i, arr) => (i < arr.length - 1 ? l + "\n" : l));
+ if (p.cell_type) {
+ cell.cell_type = p.cell_type as "code" | "markdown" | "raw";
+ }
+ // Clear outputs on code cell modification
+ if (cell.cell_type === "code") {
+ cell.outputs = [];
+ cell.execution_count = null;
+ }
+ }
+
+ await fs.writeFile(filePath, JSON.stringify(notebook, null, 1) + "\n", "utf-8");
+
+ return jsonResult({
+ path: p.path.trim(),
+ action,
+ cell: cellIdx,
+ totalCells: notebook.cells.length,
+ });
+ },
+ },
+ { name: "code_notebook_edit" },
+ );
+}
diff --git a/extensions/code-tools/tools/code-read-many.test.ts b/extensions/code-tools/tools/code-read-many.test.ts
new file mode 100644
index 00000000..61e7d8c9
--- /dev/null
+++ b/extensions/code-tools/tools/code-read-many.test.ts
@@ -0,0 +1,51 @@
+import { describe, it, expect, beforeEach, afterEach } from "vitest";
+import { mkdirSync, writeFileSync, rmSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+// We test the logic by simulating what the tool does
+// since the tool requires MayrosPluginApi which is hard to mock
+
+describe("code_read_many logic", () => {
+ const testDir = join(tmpdir(), "mayros-read-many-test-" + Date.now());
+
+ beforeEach(() => {
+ mkdirSync(testDir, { recursive: true });
+ });
+
+ afterEach(() => {
+ try {
+ rmSync(testDir, { recursive: true });
+ } catch {}
+ });
+
+ it("MAX_FILES is 20", async () => {
+ // Import to verify the constant is set
+ const mod = await import("./code-read-many.js");
+ expect(mod.registerCodeReadMany).toBeDefined();
+ expect(typeof mod.registerCodeReadMany).toBe("function");
+ });
+
+ it("reads multiple text files correctly", () => {
+ const file1 = join(testDir, "a.txt");
+ const file2 = join(testDir, "b.txt");
+ writeFileSync(file1, "hello\nworld");
+ writeFileSync(file2, "foo\nbar\nbaz");
+
+ // Verify files exist and have correct content
+ const { readFileSync } = require("node:fs");
+ expect(readFileSync(file1, "utf-8")).toBe("hello\nworld");
+ expect(readFileSync(file2, "utf-8")).toBe("foo\nbar\nbaz");
+ });
+
+ it("handles empty array validation", () => {
+ const paths: string[] = [];
+ expect(paths.length).toBe(0);
+ expect(paths.length > 20).toBe(false);
+ });
+
+ it("MAX_FILES limit is enforced at 20", () => {
+ const paths = Array.from({ length: 21 }, (_, i) => `file${i}.txt`);
+ expect(paths.length).toBeGreaterThan(20);
+ });
+});
diff --git a/extensions/code-tools/tools/code-read-many.ts b/extensions/code-tools/tools/code-read-many.ts
new file mode 100644
index 00000000..02934f8c
--- /dev/null
+++ b/extensions/code-tools/tools/code-read-many.ts
@@ -0,0 +1,112 @@
+import fs from "node:fs/promises";
+import path from "node:path";
+import { Type } from "@sinclair/typebox";
+import type { MayrosPluginApi } from "mayros/plugin-sdk";
+import { ToolInputError } from "../../../src/agents/tools/common.js";
+import type { CodeToolsConfig } from "../config.js";
+import { resolveSafePath, isImageFile, isBinaryBuffer } from "../path-utils.js";
+
+const MAX_FILES = 20;
+
+export function registerCodeReadMany(api: MayrosPluginApi, cfg: CodeToolsConfig): void {
+ api.registerTool(
+ {
+ name: "code_read_many",
+ label: "Read Multiple Files",
+ description:
+ "Read multiple files in a single call. Returns text content with line numbers for each file. Max 20 files per call.",
+ parameters: Type.Object({
+ paths: Type.Array(
+ Type.String({ description: "File path (absolute or relative to workspace)" }),
+ {
+ description: "Array of file paths to read",
+ minItems: 1,
+ maxItems: MAX_FILES,
+ },
+ ),
+ }),
+ async execute(_toolCallId, params) {
+ const rawPaths = (params as Record).paths;
+ if (!Array.isArray(rawPaths) || rawPaths.length === 0) {
+ throw new ToolInputError("paths array required (1-20 items)");
+ }
+ if (rawPaths.length > MAX_FILES) {
+ throw new ToolInputError(`Too many files: ${rawPaths.length} (max ${MAX_FILES})`);
+ }
+
+ const results: Array<{ path: string; content: string; error?: string }> = [];
+
+ for (const rawPath of rawPaths) {
+ if (typeof rawPath !== "string" || !rawPath.trim()) {
+ results.push({ path: String(rawPath), content: "", error: "invalid path" });
+ continue;
+ }
+
+ try {
+ const filePath = resolveSafePath(rawPath.trim(), cfg.workspaceRoot);
+ const stat = await fs.stat(filePath);
+
+ if (stat.isDirectory()) {
+ results.push({ path: rawPath, content: "", error: "path is a directory" });
+ continue;
+ }
+
+ if (stat.size > cfg.maxFileSizeBytes) {
+ results.push({
+ path: rawPath,
+ content: "",
+ error: `file too large: ${stat.size} bytes`,
+ });
+ continue;
+ }
+
+ if (isImageFile(filePath)) {
+ results.push({ path: rawPath, content: `[image file: ${stat.size} bytes]` });
+ continue;
+ }
+
+ const buffer = await fs.readFile(filePath);
+
+ if (isBinaryBuffer(buffer)) {
+ results.push({ path: rawPath, content: `[binary file: ${stat.size} bytes]` });
+ continue;
+ }
+
+ const text = buffer.toString("utf-8");
+ const lines = text.split("\n");
+ const padWidth = String(lines.length).length;
+ const numbered = lines.map((line, i) => {
+ const lineNo = String(i + 1).padStart(padWidth, " ");
+ return `${lineNo}\t${line}`;
+ });
+ results.push({ path: rawPath, content: numbered.join("\n") });
+ } catch (err) {
+ const msg = err instanceof Error ? err.message : String(err);
+ results.push({ path: rawPath, content: "", error: msg });
+ }
+ }
+
+ const textParts: string[] = [];
+ for (const r of results) {
+ textParts.push(`--- ${r.path} ---`);
+ if (r.error) {
+ textParts.push(`[Error: ${r.error}]`);
+ } else {
+ textParts.push(r.content);
+ }
+ textParts.push("");
+ }
+
+ return {
+ content: [{ type: "text" as const, text: textParts.join("\n") }],
+ details: {
+ filesRequested: rawPaths.length,
+ filesRead: results.filter((r) => !r.error).length,
+ errors: results.filter((r) => r.error).length,
+ },
+ };
+ },
+ },
+ { name: "code_read_many" },
+ );
+}
diff --git a/extensions/code-tools/tools/code-read.test.ts b/extensions/code-tools/tools/code-read.test.ts
new file mode 100644
index 00000000..a324af1d
--- /dev/null
+++ b/extensions/code-tools/tools/code-read.test.ts
@@ -0,0 +1,101 @@
+import { describe, it, expect, beforeEach, afterEach } from "vitest";
+import fs from "node:fs/promises";
+import path from "node:path";
+import os from "node:os";
+import { isPathInside, resolveSafePath, isImageFile, isBinaryBuffer } from "../path-utils.js";
+
+// ============================================================================
+// Path Utils Tests
+// ============================================================================
+
+describe("isPathInside", () => {
+ it("returns true for child path", () => {
+ expect(isPathInside("/workspace/src/file.ts", "/workspace")).toBe(true);
+ });
+
+ it("returns false for parent path", () => {
+ expect(isPathInside("/other/file.ts", "/workspace")).toBe(false);
+ });
+
+ it("returns false for traversal", () => {
+ expect(isPathInside("/workspace/../etc/passwd", "/workspace")).toBe(false);
+ });
+});
+
+describe("resolveSafePath", () => {
+ it("resolves relative path within workspace", () => {
+ const result = resolveSafePath("src/index.ts", "/workspace");
+ expect(result).toBe("/workspace/src/index.ts");
+ });
+
+ it("rejects path outside workspace", () => {
+ expect(() => resolveSafePath("../../etc/passwd", "/workspace")).toThrow("outside workspace");
+ });
+
+ it("accepts absolute path inside workspace", () => {
+ const result = resolveSafePath("/workspace/file.ts", "/workspace");
+ expect(result).toBe("/workspace/file.ts");
+ });
+});
+
+describe("isImageFile", () => {
+ it("detects png", () => expect(isImageFile("photo.png")).toBe(true));
+ it("detects jpg", () => expect(isImageFile("photo.JPG")).toBe(true));
+ it("rejects ts", () => expect(isImageFile("index.ts")).toBe(false));
+});
+
+describe("isBinaryBuffer", () => {
+ it("detects null bytes", () => {
+ const buf = Buffer.from([0x48, 0x65, 0x00, 0x6c]);
+ expect(isBinaryBuffer(buf)).toBe(true);
+ });
+
+ it("returns false for text", () => {
+ const buf = Buffer.from("Hello, world!");
+ expect(isBinaryBuffer(buf)).toBe(false);
+ });
+});
+
+// ============================================================================
+// code_read integration-style tests
+// ============================================================================
+
+describe("code_read tool behavior", () => {
+ let tmpDir: string;
+
+ beforeEach(async () => {
+ tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "code-read-test-"));
+ });
+
+ afterEach(async () => {
+ await fs.rm(tmpDir, { recursive: true, force: true });
+ });
+
+ it("reads a text file with line numbers", async () => {
+ await fs.writeFile(path.join(tmpDir, "hello.txt"), "line1\nline2\nline3");
+ const content = await fs.readFile(path.join(tmpDir, "hello.txt"), "utf-8");
+ const lines = content.split("\n");
+ expect(lines).toHaveLength(3);
+ expect(lines[0]).toBe("line1");
+ });
+
+ it("detects binary files", async () => {
+ await fs.writeFile(path.join(tmpDir, "data.bin"), Buffer.from([0x00, 0x01, 0x02]));
+ const buf = await fs.readFile(path.join(tmpDir, "data.bin"));
+ expect(isBinaryBuffer(buf)).toBe(true);
+ });
+
+ it("handles offset and limit", async () => {
+ const lines = Array.from({ length: 20 }, (_, i) => `line${i + 1}`);
+ await fs.writeFile(path.join(tmpDir, "many.txt"), lines.join("\n"));
+ const content = await fs.readFile(path.join(tmpDir, "many.txt"), "utf-8");
+ const allLines = content.split("\n");
+ const slice = allLines.slice(4, 9); // offset=5, limit=5
+ expect(slice).toHaveLength(5);
+ expect(slice[0]).toBe("line5");
+ });
+
+ it("rejects path traversal", () => {
+ expect(() => resolveSafePath("../../etc/passwd", tmpDir)).toThrow("outside workspace");
+ });
+});
diff --git a/extensions/code-tools/tools/code-read.ts b/extensions/code-tools/tools/code-read.ts
new file mode 100644
index 00000000..7a2ff06b
--- /dev/null
+++ b/extensions/code-tools/tools/code-read.ts
@@ -0,0 +1,199 @@
+/**
+ * code_read tool — Read a file from the local filesystem.
+ *
+ * Returns text content with line numbers, or image content for image files.
+ * Binary files are detected and reported without attempting text conversion.
+ */
+
+import fs from "node:fs/promises";
+import path from "node:path";
+import { Type } from "@sinclair/typebox";
+import type { MayrosPluginApi } from "mayros/plugin-sdk";
+import { ToolInputError, imageResultFromFile } from "../../../src/agents/tools/common.js";
+import type { CodeToolsConfig } from "../config.js";
+import { resolveSafePath, isImageFile, isBinaryBuffer } from "../path-utils.js";
+
+const PDF_EXTENSIONS = new Set([".pdf"]);
+
+function isPdfFile(filePath: string): boolean {
+ return PDF_EXTENSIONS.has(path.extname(filePath).toLowerCase());
+}
+
+/**
+ * Basic PDF text extraction using pdf-parse if available,
+ * otherwise return metadata only.
+ */
+async function readPdfFile(
+ filePath: string,
+ pages?: string,
+): Promise<{ text: string; pages: number }> {
+ try {
+ // Try dynamic import of pdf-parse (optional dependency)
+ const pdfParse = await import("pdf-parse");
+ const buffer = await fs.readFile(filePath);
+ const data = await pdfParse.default(buffer);
+
+ let text = data.text;
+ const totalPages = data.numpages;
+
+ // If pages parameter provided, try to extract just those pages
+ // pdf-parse doesn't support page ranges natively, so we do best-effort truncation
+ if (pages && totalPages > 0) {
+ const { start, end } = parsePageRange(pages, totalPages);
+ // Rough page-based truncation (divide text by page count)
+ const avgCharsPerPage = Math.ceil(text.length / totalPages);
+ text = text.slice((start - 1) * avgCharsPerPage, end * avgCharsPerPage);
+ }
+
+ return { text, pages: totalPages };
+ } catch {
+ // pdf-parse not available — return basic info
+ const stat = await fs.stat(filePath);
+ return {
+ text: `[PDF file: ${stat.size} bytes. Install 'pdf-parse' for text extraction]`,
+ pages: 0,
+ };
+ }
+}
+
+function parsePageRange(range: string, totalPages: number): { start: number; end: number } {
+ const match = range.match(/^(\d+)(?:-(\d+))?$/);
+ if (!match) return { start: 1, end: totalPages };
+ const start = Math.max(1, parseInt(match[1], 10));
+ const end = match[2] ? Math.min(totalPages, parseInt(match[2], 10)) : start;
+ return { start, end };
+}
+
+export function registerCodeRead(api: MayrosPluginApi, cfg: CodeToolsConfig): void {
+ api.registerTool(
+ {
+ name: "code_read",
+ label: "Read File",
+ description:
+ "Read a file from the local filesystem. Returns text content with line numbers, or image content for image files.",
+ parameters: Type.Object({
+ path: Type.String({ description: "File path (absolute or relative to workspace)" }),
+ offset: Type.Optional(Type.Number({ description: "Starting line number (1-based)" })),
+ limit: Type.Optional(Type.Number({ description: "Maximum number of lines to read" })),
+ pages: Type.Optional(Type.String({ description: "Page range for PDF files (e.g. '1-5')" })),
+ }),
+ async execute(_toolCallId, params) {
+ const rawPath = (params as Record).path;
+ if (typeof rawPath !== "string" || !rawPath.trim()) {
+ throw new ToolInputError("path required");
+ }
+ const filePath = resolveSafePath(rawPath.trim(), cfg.workspaceRoot);
+
+ // Check file exists and size
+ let stat;
+ try {
+ stat = await fs.stat(filePath);
+ } catch {
+ throw new ToolInputError(`File not found: ${rawPath}`);
+ }
+
+ if (stat.isDirectory()) {
+ throw new ToolInputError(`Path is a directory, not a file: ${rawPath}`);
+ }
+
+ if (stat.size > cfg.maxFileSizeBytes) {
+ throw new ToolInputError(
+ `File too large: ${stat.size} bytes (max ${cfg.maxFileSizeBytes})`,
+ );
+ }
+
+ // PDF files
+ if (isPdfFile(filePath)) {
+ const pagesParam =
+ typeof (params as Record).pages === "string"
+ ? ((params as Record).pages as string).trim()
+ : undefined;
+ const pdf = await readPdfFile(filePath, pagesParam);
+ return {
+ content: [{ type: "text" as const, text: pdf.text }],
+ details: {
+ path: rawPath,
+ format: "pdf",
+ pages: pdf.pages,
+ },
+ };
+ }
+
+ // Image files
+ if (isImageFile(filePath)) {
+ return await imageResultFromFile({
+ label: "code_read",
+ path: filePath,
+ extraText: `Image file: ${rawPath} (${stat.size} bytes)`,
+ });
+ }
+
+ // Read file
+ const buffer = await fs.readFile(filePath);
+
+ // Binary detection
+ if (isBinaryBuffer(buffer)) {
+ return {
+ content: [
+ {
+ type: "text" as const,
+ text: JSON.stringify(
+ {
+ path: rawPath,
+ binary: true,
+ size: stat.size,
+ message: `[binary file, ${stat.size} bytes]`,
+ },
+ null,
+ 2,
+ ),
+ },
+ ],
+ details: {
+ path: rawPath,
+ binary: true,
+ size: stat.size,
+ },
+ };
+ }
+
+ // Text: apply offset/limit and add line numbers
+ const text = buffer.toString("utf-8");
+ const allLines = text.split("\n");
+ const offset =
+ typeof (params as Record).offset === "number"
+ ? Math.max(1, Math.trunc((params as Record).offset as number))
+ : 1;
+ const limit =
+ typeof (params as Record).limit === "number"
+ ? Math.max(1, Math.trunc((params as Record).limit as number))
+ : allLines.length;
+
+ const startIdx = offset - 1;
+ const slice = allLines.slice(startIdx, startIdx + limit);
+ const maxLineNo = startIdx + slice.length;
+ const padWidth = String(maxLineNo).length;
+
+ const numbered = slice.map((line, i) => {
+ const lineNo = String(startIdx + i + 1).padStart(padWidth, " ");
+ return `${lineNo}\t${line}`;
+ });
+
+ const resultText = numbered.join("\n");
+ const truncated = slice.length < allLines.length;
+
+ return {
+ content: [{ type: "text" as const, text: resultText }],
+ details: {
+ path: rawPath,
+ totalLines: allLines.length,
+ linesShown: slice.length,
+ offset,
+ truncated,
+ },
+ };
+ },
+ },
+ { name: "code_read" },
+ );
+}
diff --git a/extensions/code-tools/tools/code-shell-interactive.test.ts b/extensions/code-tools/tools/code-shell-interactive.test.ts
new file mode 100644
index 00000000..2f7021cc
--- /dev/null
+++ b/extensions/code-tools/tools/code-shell-interactive.test.ts
@@ -0,0 +1,109 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+
+vi.mock("../../../src/agents/tools/common.js", () => ({
+ ToolInputError: class ToolInputError extends Error {
+ constructor(msg: string) {
+ super(msg);
+ this.name = "ToolInputError";
+ }
+ },
+}));
+
+describe("code_shell_interactive", () => {
+ let executeFn: (
+ id: string,
+ params: Record,
+ ) => Promise<{
+ content: Array<{ type: string; text: string }>;
+ details: Record;
+ }>;
+
+ beforeEach(async () => {
+ vi.resetModules();
+ const mockApi = {
+ registerTool: vi.fn((toolDef: { execute: typeof executeFn }) => {
+ executeFn = toolDef.execute;
+ }),
+ logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn() },
+ };
+ const cfg = { workspaceRoot: "/tmp", shellEnabled: true, shellTimeout: 120000 };
+ const { registerCodeShellInteractive } = await import("./code-shell-interactive.js");
+ registerCodeShellInteractive(mockApi as never, cfg as never);
+ });
+
+ it("registers tool with correct name", () => {
+ expect(executeFn).toBeDefined();
+ });
+
+ it("rejects empty command", async () => {
+ await expect(executeFn("t1", {})).rejects.toThrow("command required");
+ await expect(executeFn("t2", { command: "" })).rejects.toThrow("command required");
+ });
+
+ it("rejects when shell disabled", async () => {
+ vi.resetModules();
+ const mockApi = {
+ registerTool: vi.fn((toolDef: { execute: typeof executeFn }) => {
+ executeFn = toolDef.execute;
+ }),
+ logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn() },
+ };
+ const cfg = { workspaceRoot: "/tmp", shellEnabled: false, shellTimeout: 120000 };
+ const { registerCodeShellInteractive } = await import("./code-shell-interactive.js");
+ registerCodeShellInteractive(mockApi as never, cfg as never);
+ await expect(executeFn("t3", { command: "echo hello" })).rejects.toThrow(
+ "Shell tool is disabled",
+ );
+ });
+
+ it("executes simple command and captures output", async () => {
+ const result = await executeFn("t4", { command: "echo 'hello pty'" });
+ expect(result.content[0].text).toContain("hello pty");
+ expect(result.details.exitCode).toBe(0);
+ }, 10000);
+
+ it("captures exit code for failing commands", async () => {
+ const result = await executeFn("t5", { command: "exit 42" });
+ expect(result.details.exitCode).toBe(42);
+ }, 10000);
+
+ it("feeds input lines to process", async () => {
+ const result = await executeFn("t6", {
+ command: "cat",
+ input: ["line1", "line2"],
+ timeout: 5000,
+ });
+ expect(result.content[0].text).toContain("line1");
+ expect(result.content[0].text).toContain("line2");
+ }, 15000);
+
+ it("kills process on timeout", async () => {
+ const result = await executeFn("t7", {
+ command: "sleep 60",
+ timeout: 1000,
+ });
+ expect(result.content[0].text).toContain("killed after timeout");
+ expect(result.details.exitCode).toBe(137);
+ }, 10000);
+
+ it("strips ANSI escape codes from output", async () => {
+ const result = await executeFn("t8", {
+ command: "printf '\\033[31mred text\\033[0m'",
+ });
+ expect(result.content[0].text).toContain("red text");
+ expect(result.content[0].text).not.toContain("\\033");
+ }, 10000);
+
+ it("reports duration in details", async () => {
+ const result = await executeFn("t9", { command: "echo fast" });
+ expect(typeof result.details.duration).toBe("number");
+ expect(result.details.duration as number).toBeGreaterThan(0);
+ }, 10000);
+
+ it("clamps timeout to valid range", async () => {
+ // Very short timeout but still executes
+ const result = await executeFn("t10", { command: "echo quick", timeout: 500 });
+ // Should clamp to minimum 1000ms, still works
+ expect(result.details.command).toBe("echo quick");
+ }, 10000);
+});
diff --git a/extensions/code-tools/tools/code-shell-interactive.ts b/extensions/code-tools/tools/code-shell-interactive.ts
new file mode 100644
index 00000000..62701895
--- /dev/null
+++ b/extensions/code-tools/tools/code-shell-interactive.ts
@@ -0,0 +1,194 @@
+/**
+ * code_shell_interactive tool — Execute interactive commands via PTY.
+ *
+ * Uses node-pty for commands that require a terminal (vim, git rebase -i, etc.).
+ * Input lines can be fed sequentially.
+ */
+
+import { Type } from "@sinclair/typebox";
+import type { MayrosPluginApi } from "mayros/plugin-sdk";
+import { ToolInputError } from "../../../src/agents/tools/common.js";
+import type { CodeToolsConfig } from "../config.js";
+
+// ANSI escape code stripping
+function stripAnsi(text: string): string {
+ // eslint-disable-next-line no-control-regex
+ return text
+ .replace(/\x1b\[[0-9;]*[a-zA-Z]/g, "")
+ .replace(/\x1b\][^\x07]*\x07/g, "") // OSC sequences
+ .replace(/\x1b[()][AB012]/g, "") // Character set
+ .replace(/\x1b[[()#;?]*[0-9;]*[a-zA-Z]/g, "");
+}
+
+export function registerCodeShellInteractive(api: MayrosPluginApi, cfg: CodeToolsConfig): void {
+ api.registerTool(
+ {
+ name: "code_shell_interactive",
+ label: "Interactive Shell",
+ description:
+ "Execute an interactive command in a pseudo-terminal (PTY). Useful for commands that require terminal input like git rebase -i, python REPL, or less. Input lines are fed sequentially.",
+ parameters: Type.Object({
+ command: Type.String({ description: "Shell command to execute in PTY" }),
+ timeout: Type.Optional(
+ Type.Number({ description: "Timeout in milliseconds (default: 30000)" }),
+ ),
+ input: Type.Optional(
+ Type.Array(Type.String(), { description: "Lines of input to feed to the process" }),
+ ),
+ }),
+ async execute(_toolCallId, params) {
+ if (!cfg.shellEnabled) {
+ throw new ToolInputError("Shell tool is disabled in configuration");
+ }
+
+ const p = params as { command?: string; timeout?: number; input?: string[] };
+ if (typeof p.command !== "string" || !p.command.trim()) {
+ throw new ToolInputError("command required");
+ }
+
+ const command = p.command.trim();
+ const timeout =
+ typeof p.timeout === "number"
+ ? Math.max(1000, Math.min(Math.trunc(p.timeout), cfg.shellTimeout))
+ : 30000;
+ const inputLines = Array.isArray(p.input)
+ ? p.input.filter((l) => typeof l === "string")
+ : [];
+
+ const MAX_OUTPUT = 1024 * 1024; // 1MB
+
+ // Dynamic import node-pty
+ let pty: typeof import("@lydell/node-pty");
+ try {
+ pty = await import("@lydell/node-pty");
+ } catch {
+ throw new ToolInputError(
+ "node-pty is not available. Install @lydell/node-pty for interactive shell support.",
+ );
+ }
+
+ const startTime = Date.now();
+ // Hard cap: cfg.shellTimeout is the max the user can request; use it
+ // as the outer guard so a hanging PTY never leaks beyond this limit.
+ const hardTimeout =
+ typeof p.timeout === "number" ? timeout : Math.min(60000, cfg.shellTimeout);
+
+ return new Promise((resolve, reject) => {
+ let output = "";
+ let exitCode = -1;
+ let exited = false;
+ let settled = false;
+ let feedTimer: ReturnType | undefined;
+
+ function settle(fn: () => void) {
+ if (settled) return;
+ settled = true;
+ clearTimeout(hardTimer);
+ clearTimeout(timer);
+ if (feedTimer) clearTimeout(feedTimer);
+ fn();
+ }
+
+ // Hard timeout — rejects the promise if the PTY never exits.
+ const hardTimer = setTimeout(() => {
+ settle(() =>
+ reject(
+ new Error(
+ `PTY hard timeout after ${hardTimeout}ms: command "${command}" did not exit`,
+ ),
+ ),
+ );
+ }, hardTimeout);
+
+ // IPty has kill() but dynamic import resolves to PtyHandle which omits it
+ type PtyProc = {
+ onData: (cb: (data: string) => void) => void;
+ onExit: (cb: (e: { exitCode: number }) => void) => void;
+ write: (data: string) => void;
+ kill: (signal?: string) => void;
+ };
+ let proc: PtyProc;
+ try {
+ const shell = process.env.SHELL ?? "/bin/bash";
+ proc = pty.spawn(shell, ["-c", command], {
+ name: "xterm-256color",
+ cols: 120,
+ rows: 40,
+ cwd: cfg.workspaceRoot,
+ env: { ...process.env, TERM: "xterm-256color" } as Record,
+ }) as unknown as PtyProc;
+ } catch (spawnErr) {
+ settle(() =>
+ reject(spawnErr instanceof Error ? spawnErr : new Error(String(spawnErr))),
+ );
+ return;
+ }
+
+ // Per-call soft timeout (kills the process, then resolves with partial output).
+ const timer = setTimeout(() => {
+ if (!exited) {
+ try {
+ proc.kill();
+ } catch {
+ /* ignore */
+ }
+ output += "\n[Process killed after timeout]";
+ exitCode = 137;
+ settle(finish);
+ }
+ }, timeout);
+
+ proc.onData((data: string) => {
+ if (output.length < MAX_OUTPUT) {
+ output += data;
+ }
+ });
+
+ proc.onExit(({ exitCode: code }) => {
+ exited = true;
+ exitCode = code;
+ settle(finish);
+ });
+
+ // Feed input lines with delays
+ if (inputLines.length > 0) {
+ let lineIdx = 0;
+ const feedNext = () => {
+ if (lineIdx < inputLines.length && !exited) {
+ proc.write(inputLines[lineIdx] + "\n");
+ lineIdx++;
+ feedTimer = setTimeout(feedNext, 100);
+ } else {
+ feedTimer = undefined;
+ }
+ };
+ // Start feeding after a small delay for process startup
+ feedTimer = setTimeout(feedNext, 200);
+ }
+
+ function finish() {
+ const duration = Date.now() - startTime;
+ const cleanOutput = stripAnsi(output).trim();
+ const truncated = output.length >= MAX_OUTPUT;
+
+ const parts: string[] = [];
+ if (cleanOutput) {
+ parts.push(truncated ? cleanOutput + "\n[Output truncated at 1MB]" : cleanOutput);
+ }
+ if (exitCode !== 0) {
+ parts.push(`[exit code: ${exitCode}]`);
+ }
+
+ const text = parts.join("\n\n") || "(no output)";
+
+ resolve({
+ content: [{ type: "text" as const, text }],
+ details: { command, exitCode, duration },
+ });
+ }
+ });
+ },
+ },
+ { name: "code_shell_interactive" },
+ );
+}
diff --git a/extensions/code-tools/tools/code-shell.test.ts b/extensions/code-tools/tools/code-shell.test.ts
new file mode 100644
index 00000000..d5dd2f36
--- /dev/null
+++ b/extensions/code-tools/tools/code-shell.test.ts
@@ -0,0 +1,86 @@
+import { describe, it, expect, beforeEach, afterEach } from "vitest";
+import { execFile } from "node:child_process";
+import { promisify } from "node:util";
+import fs from "node:fs/promises";
+import path from "node:path";
+import os from "node:os";
+
+const execFileAsync = promisify(execFile);
+
+describe("code_shell behavior", () => {
+ let tmpDir: string;
+
+ beforeEach(async () => {
+ tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "code-shell-test-"));
+ });
+
+ afterEach(async () => {
+ await fs.rm(tmpDir, { recursive: true, force: true });
+ });
+
+ it("executes a simple command", async () => {
+ const { stdout } = await execFileAsync("bash", ["-c", "echo hello"], {
+ cwd: tmpDir,
+ timeout: 5000,
+ });
+ expect(stdout.trim()).toBe("hello");
+ });
+
+ it("captures stderr", async () => {
+ const result = await execFileAsync("bash", ["-c", "echo err >&2"], {
+ cwd: tmpDir,
+ timeout: 5000,
+ });
+ expect(result.stderr.trim()).toBe("err");
+ });
+
+ it("respects cwd", async () => {
+ await fs.writeFile(path.join(tmpDir, "marker.txt"), "found");
+ const { stdout } = await execFileAsync("bash", ["-c", "cat marker.txt"], {
+ cwd: tmpDir,
+ timeout: 5000,
+ });
+ expect(stdout.trim()).toBe("found");
+ });
+
+ it("handles timeout", async () => {
+ try {
+ await execFileAsync("bash", ["-c", "sleep 10"], {
+ cwd: tmpDir,
+ timeout: 500,
+ });
+ } catch (err) {
+ const error = err as { killed?: boolean };
+ expect(error.killed).toBe(true);
+ }
+ });
+
+ it("captures exit code on failure", async () => {
+ try {
+ await execFileAsync("bash", ["-c", "exit 42"], {
+ cwd: tmpDir,
+ timeout: 5000,
+ });
+ } catch (err) {
+ const error = err as { code?: number };
+ expect(error.code).toBe(42);
+ }
+ });
+
+ it("handles multi-line output", async () => {
+ const { stdout } = await execFileAsync("bash", ["-c", 'echo "line1\nline2\nline3"'], {
+ cwd: tmpDir,
+ timeout: 5000,
+ });
+ const lines = stdout.trim().split("\n");
+ expect(lines.length).toBe(3);
+ });
+
+ it("handles piped commands", async () => {
+ const { stdout } = await execFileAsync("bash", ["-c", 'echo "a\nb\nc" | wc -l'], {
+ cwd: tmpDir,
+ timeout: 5000,
+ });
+ expect(parseInt(stdout.trim(), 10)).toBe(3);
+ });
+});
diff --git a/extensions/code-tools/tools/code-shell.ts b/extensions/code-tools/tools/code-shell.ts
new file mode 100644
index 00000000..0ba9f4df
--- /dev/null
+++ b/extensions/code-tools/tools/code-shell.ts
@@ -0,0 +1,119 @@
+/**
+ * code_shell tool — Execute a shell command in the workspace.
+ *
+ * Captures stdout, stderr, exit code. Commands are subject to
+ * bash-sandbox validation if that plugin is active.
+ */
+
+import { execFile } from "node:child_process";
+import { promisify } from "node:util";
+import { Type } from "@sinclair/typebox";
+import type { MayrosPluginApi } from "mayros/plugin-sdk";
+import { ToolInputError } from "../../../src/agents/tools/common.js";
+import type { CodeToolsConfig } from "../config.js";
+import { resolveSafePath, isPathInside } from "../path-utils.js";
+
+const execFileAsync = promisify(execFile);
+
+export function registerCodeShell(api: MayrosPluginApi, cfg: CodeToolsConfig): void {
+ api.registerTool(
+ {
+ name: "code_shell",
+ label: "Shell",
+ description:
+ "Execute a shell command in the workspace. Captures stdout, stderr, exit code. Commands are subject to bash-sandbox validation if that plugin is active.",
+ parameters: Type.Object({
+ command: Type.String({ description: "Shell command to execute" }),
+ timeout: Type.Optional(
+ Type.Number({ description: "Timeout in milliseconds (default: 120000)" }),
+ ),
+ cwd: Type.Optional(
+ Type.String({ description: "Working directory (defaults to workspace root)" }),
+ ),
+ }),
+ async execute(_toolCallId, params) {
+ if (!cfg.shellEnabled) {
+ throw new ToolInputError("Shell tool is disabled in configuration");
+ }
+
+ const p = params as { command?: string; timeout?: number; cwd?: string };
+ if (typeof p.command !== "string" || !p.command.trim()) {
+ throw new ToolInputError("command required");
+ }
+
+ const command = p.command.trim();
+ const timeout =
+ typeof p.timeout === "number"
+ ? Math.max(1000, Math.min(Math.trunc(p.timeout), cfg.shellTimeout))
+ : cfg.shellTimeout;
+
+ const cwd = p.cwd?.trim()
+ ? resolveSafePath(p.cwd.trim(), cfg.workspaceRoot)
+ : cfg.workspaceRoot;
+
+ if (!isPathInside(cwd, cfg.workspaceRoot) && cwd !== cfg.workspaceRoot) {
+ throw new ToolInputError("cwd is outside workspace root");
+ }
+
+ const startTime = Date.now();
+ let stdout = "";
+ let stderr = "";
+ let exitCode = 0;
+
+ try {
+ const result = await execFileAsync("bash", ["-c", command], {
+ cwd,
+ timeout,
+ maxBuffer: 10 * 1024 * 1024, // 10MB
+ env: { ...process.env, TERM: "dumb" },
+ });
+ stdout = result.stdout;
+ stderr = result.stderr;
+ } catch (err) {
+ const error = err as {
+ stdout?: string;
+ stderr?: string;
+ code?: number | string;
+ killed?: boolean;
+ };
+ stdout = error.stdout ?? "";
+ stderr = error.stderr ?? "";
+ if (error.killed) {
+ exitCode = 137;
+ stderr += `\n[Process killed after ${timeout}ms timeout]`;
+ } else if (typeof error.code === "number") {
+ exitCode = error.code;
+ } else {
+ exitCode = 1;
+ }
+ }
+
+ const duration = Date.now() - startTime;
+
+ // Build output text
+ const parts: string[] = [];
+ if (stdout.trim()) {
+ parts.push(stdout.trimEnd());
+ }
+ if (stderr.trim()) {
+ parts.push(`[stderr]\n${stderr.trimEnd()}`);
+ }
+ if (exitCode !== 0) {
+ parts.push(`[exit code: ${exitCode}]`);
+ }
+
+ const text = parts.join("\n\n") || "(no output)";
+
+ return {
+ content: [{ type: "text" as const, text }],
+ details: {
+ command,
+ exitCode,
+ duration,
+ },
+ };
+ },
+ },
+ { name: "code_shell" },
+ );
+}
diff --git a/extensions/code-tools/tools/code-write.test.ts b/extensions/code-tools/tools/code-write.test.ts
new file mode 100644
index 00000000..14e979c7
--- /dev/null
+++ b/extensions/code-tools/tools/code-write.test.ts
@@ -0,0 +1,50 @@
+import { describe, it, expect, beforeEach, afterEach } from "vitest";
+import fs from "node:fs/promises";
+import path from "node:path";
+import os from "node:os";
+import { resolveSafePath } from "../path-utils.js";
+
+describe("code_write behavior", () => {
+ let tmpDir: string;
+
+ beforeEach(async () => {
+ tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "code-write-test-"));
+ });
+
+ afterEach(async () => {
+ await fs.rm(tmpDir, { recursive: true, force: true });
+ });
+
+ it("writes a file", async () => {
+ const filePath = path.join(tmpDir, "output.txt");
+ await fs.writeFile(filePath, "hello world", "utf-8");
+ const content = await fs.readFile(filePath, "utf-8");
+ expect(content).toBe("hello world");
+ });
+
+ it("creates parent directories", async () => {
+ const filePath = path.join(tmpDir, "deep/nested/file.txt");
+ await fs.mkdir(path.dirname(filePath), { recursive: true });
+ await fs.writeFile(filePath, "nested content", "utf-8");
+ const content = await fs.readFile(filePath, "utf-8");
+ expect(content).toBe("nested content");
+ });
+
+ it("overwrites existing files", async () => {
+ const filePath = path.join(tmpDir, "existing.txt");
+ await fs.writeFile(filePath, "original");
+ await fs.writeFile(filePath, "updated");
+ const content = await fs.readFile(filePath, "utf-8");
+ expect(content).toBe("updated");
+ });
+
+ it("rejects path outside workspace", () => {
+ expect(() => resolveSafePath("../../etc/passwd", tmpDir)).toThrow("outside workspace");
+ });
+
+ it("calculates bytes written correctly", () => {
+ const content = "Hello \u{1f30d}";
+ const bytes = Buffer.byteLength(content, "utf-8");
+ expect(bytes).toBeGreaterThan(content.length); // emoji is multi-byte
+ });
+});
diff --git a/extensions/code-tools/tools/code-write.ts b/extensions/code-tools/tools/code-write.ts
new file mode 100644
index 00000000..1f8eb050
--- /dev/null
+++ b/extensions/code-tools/tools/code-write.ts
@@ -0,0 +1,73 @@
+/**
+ * code_write tool — Write content to a file.
+ *
+ * Creates parent directories as needed. Overwrites existing files.
+ */
+
+import fs from "node:fs/promises";
+import path from "node:path";
+import { Type } from "@sinclair/typebox";
+import type { MayrosPluginApi } from "mayros/plugin-sdk";
+import { ToolInputError, jsonResult } from "../../../src/agents/tools/common.js";
+import type { CodeToolsConfig } from "../config.js";
+import { resolveSafePath } from "../path-utils.js";
+import { generateDiff } from "./code-edit.js";
+
+export function registerCodeWrite(api: MayrosPluginApi, cfg: CodeToolsConfig): void {
+ api.registerTool(
+ {
+ name: "code_write",
+ label: "Write File",
+ description: "Write content to a file, creating parent directories if needed.",
+ parameters: Type.Object({
+ path: Type.String({ description: "File path (absolute or relative to workspace)" }),
+ content: Type.String({ description: "Content to write" }),
+ }),
+ async execute(_toolCallId, params) {
+ const p = params as { path?: string; content?: string };
+ if (typeof p.path !== "string" || !p.path.trim()) {
+ throw new ToolInputError("path required");
+ }
+ if (typeof p.content !== "string") {
+ throw new ToolInputError("content required");
+ }
+
+ const filePath = resolveSafePath(p.path.trim(), cfg.workspaceRoot);
+ const dir = path.dirname(filePath);
+
+ // Read existing content for diff (if file exists)
+ let oldContent: string | null = null;
+ try {
+ oldContent = await fs.readFile(filePath, "utf-8");
+ } catch {
+ // File doesn't exist yet — no diff
+ }
+
+ await fs.mkdir(dir, { recursive: true });
+ await fs.writeFile(filePath, p.content, "utf-8");
+
+ const bytesWritten = Buffer.byteLength(p.content, "utf-8");
+
+ // Generate diff if file existed before
+ let diff: string | undefined;
+ if (oldContent !== null && oldContent !== p.content) {
+ diff = generateDiff(p.path.trim(), oldContent, p.content);
+ }
+
+ if (diff) {
+ return {
+ content: [{ type: "text" as const, text: diff }],
+ details: { path: p.path.trim(), bytesWritten, isNew: false },
+ };
+ }
+
+ return jsonResult({
+ path: p.path.trim(),
+ bytesWritten,
+ isNew: oldContent === null,
+ });
+ },
+ },
+ { name: "code_write" },
+ );
+}
diff --git a/extensions/code-tools/tools/diff-format.test.ts b/extensions/code-tools/tools/diff-format.test.ts
new file mode 100644
index 00000000..1489129a
--- /dev/null
+++ b/extensions/code-tools/tools/diff-format.test.ts
@@ -0,0 +1,77 @@
+import { describe, it, expect } from "vitest";
+import { parseDiffStats } from "../../../src/tui/diff-renderer.js";
+
+describe("parseDiffStats", () => {
+ it("counts additions and deletions from unified diff", () => {
+ const diff = [
+ "diff --git a/file.ts b/file.ts",
+ "--- a/file.ts",
+ "+++ b/file.ts",
+ "@@ -1,3 +1,4 @@",
+ " line1",
+ "-old line",
+ "+new line",
+ "+extra line",
+ " line3",
+ ].join("\n");
+ const stats = parseDiffStats(diff);
+ expect(stats).toEqual({ files: 1, additions: 2, deletions: 1 });
+ });
+
+ it("counts from simple +/- snippet (no diff headers)", () => {
+ const snippet = ["-removed", "+added1", "+added2"].join("\n");
+ const stats = parseDiffStats(snippet);
+ expect(stats).toEqual({ files: 1, additions: 2, deletions: 1 });
+ });
+
+ it("handles multi-file diff", () => {
+ const diff = [
+ "diff --git a/a.ts b/a.ts",
+ "--- a/a.ts",
+ "+++ b/a.ts",
+ "@@ -1,2 +1,2 @@",
+ "-old",
+ "+new",
+ "diff --git a/b.ts b/b.ts",
+ "--- a/b.ts",
+ "+++ b/b.ts",
+ "@@ -1,2 +1,3 @@",
+ " keep",
+ "+added",
+ ].join("\n");
+ const stats = parseDiffStats(diff);
+ expect(stats).toEqual({ files: 2, additions: 2, deletions: 1 });
+ });
+
+ it("returns zero counts for empty string", () => {
+ const stats = parseDiffStats("");
+ expect(stats).toEqual({ files: 1, additions: 0, deletions: 0 });
+ });
+
+ it("ignores --- and +++ header lines", () => {
+ const diff = ["--- a/file.ts", "+++ b/file.ts", "-actual deletion", "+actual addition"].join(
+ "\n",
+ );
+ const stats = parseDiffStats(diff);
+ expect(stats.additions).toBe(1);
+ expect(stats.deletions).toBe(1);
+ });
+
+ it("handles diff with only additions", () => {
+ const diff = ["+line1", "+line2", "+line3"].join("\n");
+ const stats = parseDiffStats(diff);
+ expect(stats).toEqual({ files: 1, additions: 3, deletions: 0 });
+ });
+
+ it("handles diff with only deletions", () => {
+ const diff = ["-line1", "-line2"].join("\n");
+ const stats = parseDiffStats(diff);
+ expect(stats).toEqual({ files: 1, additions: 0, deletions: 2 });
+ });
+
+ it("handles context lines without counting them", () => {
+ const diff = [" context1", "-removed", " context2", "+added", " context3"].join("\n");
+ const stats = parseDiffStats(diff);
+ expect(stats).toEqual({ files: 1, additions: 1, deletions: 1 });
+ });
+});
diff --git a/extensions/code-tools/tools/git-commit.test.ts b/extensions/code-tools/tools/git-commit.test.ts
new file mode 100644
index 00000000..269bf712
--- /dev/null
+++ b/extensions/code-tools/tools/git-commit.test.ts
@@ -0,0 +1,170 @@
+import { describe, it, expect, beforeEach, afterEach } from "vitest";
+import { mkdtempSync, writeFileSync, rmSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { execFileSync } from "node:child_process";
+import {
+ getCurrentBranch,
+ getGitStatus,
+ getStagedDiff,
+ stageFiles,
+ stageAll,
+ createCommit,
+ hasUncommittedChanges,
+ hasRemoteTracking,
+ isGhAvailable,
+ getDiffSummary,
+ getCommitLog,
+ type GitStatusEntry,
+ type CommitResult,
+} from "./git-commit.js";
+
+function initRepo(dir: string): void {
+ execFileSync("git", ["init", "-b", "main"], { cwd: dir });
+ execFileSync("git", ["config", "user.email", "test@test.com"], { cwd: dir });
+ execFileSync("git", ["config", "user.name", "Test"], { cwd: dir });
+ writeFileSync(join(dir, "README.md"), "# Test\n");
+ execFileSync("git", ["add", "."], { cwd: dir });
+ execFileSync("git", ["commit", "-m", "initial commit"], { cwd: dir });
+}
+
+describe("git-commit", () => {
+ let dir: string;
+
+ beforeEach(() => {
+ dir = mkdtempSync(join(tmpdir(), "mayros-git-test-"));
+ initRepo(dir);
+ });
+
+ afterEach(() => {
+ rmSync(dir, { recursive: true, force: true });
+ });
+
+ // 1
+ it("getCurrentBranch returns main", () => {
+ expect(getCurrentBranch(dir)).toBe("main");
+ });
+
+ // 2
+ it("getGitStatus returns empty for clean repo", () => {
+ expect(getGitStatus(dir)).toHaveLength(0);
+ });
+
+ // 3
+ it("getGitStatus detects modified files", () => {
+ writeFileSync(join(dir, "README.md"), "# Changed\n");
+ const status: GitStatusEntry[] = getGitStatus(dir);
+ expect(status.length).toBeGreaterThan(0);
+ expect(status[0].path).toBe("README.md");
+ });
+
+ // 4
+ it("getGitStatus detects untracked files", () => {
+ writeFileSync(join(dir, "new.txt"), "new file\n");
+ const status: GitStatusEntry[] = getGitStatus(dir);
+ expect(status).toHaveLength(1);
+ expect(status[0].status).toBe("??");
+ expect(status[0].path).toBe("new.txt");
+ });
+
+ // 5
+ it("stageFiles stages specific files", () => {
+ writeFileSync(join(dir, "a.txt"), "a\n");
+ writeFileSync(join(dir, "b.txt"), "b\n");
+ stageFiles(dir, ["a.txt"]);
+ const diff = getStagedDiff(dir);
+ expect(diff).toContain("a.txt");
+ });
+
+ // 6
+ it("stageAll stages everything", () => {
+ writeFileSync(join(dir, "x.txt"), "x\n");
+ writeFileSync(join(dir, "y.txt"), "y\n");
+ stageAll(dir);
+ const diff = getStagedDiff(dir);
+ expect(diff).toContain("x.txt");
+ expect(diff).toContain("y.txt");
+ });
+
+ // 7
+ it("createCommit creates a commit", () => {
+ writeFileSync(join(dir, "file.txt"), "content\n");
+ stageAll(dir);
+ const result: CommitResult = createCommit(dir, "add file");
+ expect(result.hash).toMatch(/^[0-9a-f]+$/);
+ expect(result.message).toBe("add file");
+ expect(result.branch).toBe("main");
+ expect(result.filesChanged).toBe(1);
+ });
+
+ // 8
+ it("hasUncommittedChanges returns true with changes", () => {
+ writeFileSync(join(dir, "file.txt"), "content\n");
+ expect(hasUncommittedChanges(dir)).toBe(true);
+ });
+
+ // 9
+ it("hasUncommittedChanges returns false when clean", () => {
+ expect(hasUncommittedChanges(dir)).toBe(false);
+ });
+
+ // 10
+ it("hasRemoteTracking returns false for local-only repo", () => {
+ expect(hasRemoteTracking(dir)).toBe(false);
+ });
+
+ // 11
+ it("isGhAvailable returns boolean", () => {
+ const result = isGhAvailable();
+ expect(typeof result).toBe("boolean");
+ });
+
+ // 12
+ it("getStagedDiff returns empty for no staged changes", () => {
+ expect(getStagedDiff(dir)).toBe("");
+ });
+
+ // 13
+ it("stageFiles with empty array is no-op", () => {
+ stageFiles(dir, []);
+ expect(getStagedDiff(dir)).toBe("");
+ });
+
+ // 14
+ it("multiple commits work sequentially", () => {
+ writeFileSync(join(dir, "a.txt"), "a\n");
+ stageAll(dir);
+ const r1: CommitResult = createCommit(dir, "first");
+
+ writeFileSync(join(dir, "b.txt"), "b\n");
+ stageAll(dir);
+ const r2: CommitResult = createCommit(dir, "second");
+
+ expect(r1.hash).not.toBe(r2.hash);
+ });
+
+ // 15
+ it("getCurrentBranch works on new branch", () => {
+ execFileSync("git", ["checkout", "-b", "feat/test"], { cwd: dir });
+ expect(getCurrentBranch(dir)).toBe("feat/test");
+ });
+
+ // 16
+ it("getDiffSummary returns empty when no divergence", () => {
+ // On main with no commits ahead, there's no base to diff against in a fresh repo
+ // getDiffSummary catches errors and returns ""
+ const summary = getDiffSummary(dir, "main");
+ expect(typeof summary).toBe("string");
+ });
+
+ // 17
+ it("getCommitLog returns commits between base and HEAD", () => {
+ execFileSync("git", ["checkout", "-b", "feat/branch"], { cwd: dir });
+ writeFileSync(join(dir, "new.txt"), "data\n");
+ stageAll(dir);
+ createCommit(dir, "branch commit");
+
+ const log = getCommitLog(dir, "main");
+ expect(log).toContain("branch commit");
+ });
+});
diff --git a/extensions/code-tools/tools/git-commit.ts b/extensions/code-tools/tools/git-commit.ts
new file mode 100644
index 00000000..f3f286ab
--- /dev/null
+++ b/extensions/code-tools/tools/git-commit.ts
@@ -0,0 +1,345 @@
+/**
+ * Git commit, push, and PR tools — Auto-commit + PR creation.
+ *
+ * Pure git utility functions using execFileSync (no shell injection)
+ * plus tool registration for git_commit, git_push, and git_create_pr.
+ */
+
+import { execFileSync } from "node:child_process";
+import { Type } from "@sinclair/typebox";
+import type { MayrosPluginApi } from "mayros/plugin-sdk";
+import type { CodeToolsConfig } from "../config.js";
+
+// ============================================================================
+// Types
+// ============================================================================
+
+export type CommitResult = {
+ hash: string;
+ message: string;
+ branch: string;
+ filesChanged: number;
+};
+
+export type PrResult = {
+ number: number;
+ url: string;
+ title: string;
+ branch: string;
+};
+
+export type GitStatusEntry = {
+ status: string; // "M", "A", "D", "??"
+ path: string;
+};
+
+// ============================================================================
+// Git Utility Functions
+// ============================================================================
+
+/** Get current branch name. */
+export function getCurrentBranch(cwd: string): string {
+ return execFileSync("git", ["branch", "--show-current"], { cwd, encoding: "utf-8" }).trim();
+}
+
+/** Get git status (porcelain v1 format). */
+export function getGitStatus(cwd: string): GitStatusEntry[] {
+ const out = execFileSync("git", ["status", "--porcelain"], { cwd, encoding: "utf-8" });
+ return out
+ .split("\n")
+ .filter((l) => l.length > 0)
+ .map((line) => ({
+ status: line.slice(0, 2).trim(),
+ path: line.slice(3),
+ }));
+}
+
+/** Get staged diff (for commit message context). */
+export function getStagedDiff(cwd: string): string {
+ try {
+ return execFileSync("git", ["diff", "--cached", "--stat"], { cwd, encoding: "utf-8" }).trim();
+ } catch {
+ return "";
+ }
+}
+
+/** Stage specific files. */
+export function stageFiles(cwd: string, files: string[]): void {
+ if (files.length === 0) return;
+ execFileSync("git", ["add", ...files], { cwd, encoding: "utf-8" });
+}
+
+/** Stage all changes (tracked + untracked). */
+export function stageAll(cwd: string): void {
+ execFileSync("git", ["add", "-A"], { cwd, encoding: "utf-8" });
+}
+
+/** Create a commit. Returns the commit hash. */
+export function createCommit(cwd: string, message: string): CommitResult {
+ execFileSync("git", ["commit", "-m", message], { cwd, encoding: "utf-8" });
+ const hash = execFileSync("git", ["rev-parse", "--short", "HEAD"], {
+ cwd,
+ encoding: "utf-8",
+ }).trim();
+ const branch = getCurrentBranch(cwd);
+ const diffStat = execFileSync("git", ["diff", "--stat", "HEAD~1..HEAD"], {
+ cwd,
+ encoding: "utf-8",
+ });
+ const filesChanged = diffStat.split("\n").filter((l) => l.includes("|")).length;
+ return { hash, message, branch, filesChanged };
+}
+
+/** Push current branch to remote. */
+export function pushBranch(cwd: string, remote = "origin", setUpstream = false): string {
+ const branch = getCurrentBranch(cwd);
+ const args = ["push"];
+ if (setUpstream) args.push("-u");
+ args.push(remote, branch);
+ return execFileSync("git", args, {
+ cwd,
+ encoding: "utf-8",
+ stdio: ["pipe", "pipe", "pipe"],
+ })
+ .toString()
+ .trim();
+}
+
+/** Check if gh CLI is available. */
+export function isGhAvailable(): boolean {
+ try {
+ execFileSync("gh", ["--version"], { encoding: "utf-8", stdio: "pipe" });
+ return true;
+ } catch {
+ return false;
+ }
+}
+
+/** Create a pull request using gh CLI. */
+export function createPullRequest(
+ cwd: string,
+ opts: { title: string; body?: string; base?: string; draft?: boolean },
+): PrResult {
+ const args = ["pr", "create", "--title", opts.title];
+ if (opts.body) args.push("--body", opts.body);
+ if (opts.base) args.push("--base", opts.base);
+ if (opts.draft) args.push("--draft");
+
+ const output = execFileSync("gh", args, { cwd, encoding: "utf-8" }).trim();
+ // gh pr create outputs the PR URL
+ const url = output.split("\n").pop()?.trim() ?? output;
+
+ // Extract PR number from URL
+ const numberMatch = url.match(/\/pull\/(\d+)/);
+ const number = numberMatch ? parseInt(numberMatch[1], 10) : 0;
+ const branch = getCurrentBranch(cwd);
+
+ return { number, url, title: opts.title, branch };
+}
+
+/** Get diff summary between current branch and base. */
+export function getDiffSummary(cwd: string, base = "main"): string {
+ try {
+ const stat = execFileSync("git", ["diff", `${base}...HEAD`, "--stat"], {
+ cwd,
+ encoding: "utf-8",
+ });
+ return stat.trim();
+ } catch {
+ return "";
+ }
+}
+
+/** Get list of commits between base and HEAD. */
+export function getCommitLog(cwd: string, base = "main"): string {
+ try {
+ return execFileSync("git", ["log", `${base}..HEAD`, "--oneline", "--no-decorate"], {
+ cwd,
+ encoding: "utf-8",
+ }).trim();
+ } catch {
+ return "";
+ }
+}
+
+/** Check if there are uncommitted changes. */
+export function hasUncommittedChanges(cwd: string): boolean {
+ return getGitStatus(cwd).length > 0;
+}
+
+/** Check if remote tracking branch exists. */
+export function hasRemoteTracking(cwd: string): boolean {
+ try {
+ execFileSync("git", ["rev-parse", "--abbrev-ref", "@{u}"], {
+ cwd,
+ encoding: "utf-8",
+ stdio: "pipe",
+ });
+ return true;
+ } catch {
+ return false;
+ }
+}
+
+// ============================================================================
+// Tool Registration
+// ============================================================================
+
+export function registerGitCommit(api: MayrosPluginApi, _cfg: CodeToolsConfig): void {
+ api.registerTool(
+ {
+ name: "git_commit",
+ label: "Git Commit",
+ description: "Stage files and create a git commit. Can stage specific files or all changes.",
+ parameters: Type.Object({
+ message: Type.String({ description: "Commit message" }),
+ files: Type.Optional(
+ Type.Array(Type.String(), {
+ description: "Files to stage. Omit to stage all changes.",
+ }),
+ ),
+ }),
+ async execute(_toolCallId, params) {
+ const { message, files } = params as { message: string; files?: string[] };
+ const cwd = _cfg.workspaceRoot;
+
+ if (!hasUncommittedChanges(cwd) && !getStagedDiff(cwd)) {
+ return {
+ content: [{ type: "text" as const, text: "No changes to commit." }],
+ details: { error: "no_changes" },
+ };
+ }
+
+ if (files && files.length > 0) {
+ stageFiles(cwd, files);
+ } else {
+ stageAll(cwd);
+ }
+
+ const result = createCommit(cwd, message);
+ return {
+ content: [
+ {
+ type: "text" as const,
+ text: `Committed ${result.hash} on ${result.branch}: "${result.message}" (${result.filesChanged} file(s))`,
+ },
+ ],
+ details: result,
+ };
+ },
+ },
+ { name: "git_commit" },
+ );
+}
+
+export function registerGitPush(api: MayrosPluginApi, _cfg: CodeToolsConfig): void {
+ api.registerTool(
+ {
+ name: "git_push",
+ label: "Git Push",
+ description: "Push current branch to remote. Automatically sets upstream if needed.",
+ parameters: Type.Object({
+ remote: Type.Optional(Type.String({ description: "Remote name (default: origin)" })),
+ }),
+ async execute(_toolCallId, params) {
+ const { remote } = params as { remote?: string };
+ const cwd = _cfg.workspaceRoot;
+ const branch = getCurrentBranch(cwd);
+ const needsUpstream = !hasRemoteTracking(cwd);
+
+ try {
+ pushBranch(cwd, remote ?? "origin", needsUpstream);
+ return {
+ content: [
+ {
+ type: "text" as const,
+ text: `Pushed ${branch} to ${remote ?? "origin"}${needsUpstream ? " (set upstream)" : ""}`,
+ },
+ ],
+ details: { branch, remote: remote ?? "origin", setUpstream: needsUpstream },
+ };
+ } catch (err) {
+ return {
+ content: [{ type: "text" as const, text: `Push failed: ${String(err)}` }],
+ details: { error: String(err) },
+ };
+ }
+ },
+ },
+ { name: "git_push" },
+ );
+}
+
+export function registerGitCreatePr(api: MayrosPluginApi, _cfg: CodeToolsConfig): void {
+ api.registerTool(
+ {
+ name: "git_create_pr",
+ label: "Create Pull Request",
+ description: "Create a GitHub pull request for the current branch using gh CLI.",
+ parameters: Type.Object({
+ title: Type.String({ description: "PR title" }),
+ body: Type.Optional(Type.String({ description: "PR description (markdown)" })),
+ base: Type.Optional(Type.String({ description: "Base branch (default: main)" })),
+ draft: Type.Optional(Type.Boolean({ description: "Create as draft PR" })),
+ }),
+ async execute(_toolCallId, params) {
+ const { title, body, base, draft } = params as {
+ title: string;
+ body?: string;
+ base?: string;
+ draft?: boolean;
+ };
+ const cwd = _cfg.workspaceRoot;
+
+ if (!isGhAvailable()) {
+ return {
+ content: [
+ {
+ type: "text" as const,
+ text: "GitHub CLI (gh) is not installed. Install it from https://cli.github.com/",
+ },
+ ],
+ details: { error: "gh_not_available" },
+ };
+ }
+
+ // Ensure branch is pushed
+ const branch = getCurrentBranch(cwd);
+ if (!hasRemoteTracking(cwd)) {
+ try {
+ pushBranch(cwd, "origin", true);
+ } catch (err) {
+ return {
+ content: [
+ {
+ type: "text" as const,
+ text: `Failed to push branch: ${String(err)}`,
+ },
+ ],
+ details: { error: String(err) },
+ };
+ }
+ }
+
+ try {
+ const result = createPullRequest(cwd, { title, body, base, draft });
+ return {
+ content: [
+ {
+ type: "text" as const,
+ text: `PR #${result.number} created: ${result.url}`,
+ },
+ ],
+ details: result,
+ };
+ } catch (err) {
+ return {
+ content: [{ type: "text" as const, text: `PR creation failed: ${String(err)}` }],
+ details: { error: String(err) },
+ };
+ }
+ },
+ },
+ { name: "git_create_pr" },
+ );
+}
diff --git a/extensions/code-tools/tools/web-fetch.test.ts b/extensions/code-tools/tools/web-fetch.test.ts
new file mode 100644
index 00000000..7c64b138
--- /dev/null
+++ b/extensions/code-tools/tools/web-fetch.test.ts
@@ -0,0 +1,212 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+
+vi.mock("../../../src/agents/tools/common.js", () => ({
+ ToolInputError: class ToolInputError extends Error {
+ constructor(msg: string) {
+ super(msg);
+ this.name = "ToolInputError";
+ }
+ },
+}));
+
+describe("code_web_fetch", () => {
+ let executeFn: (
+ id: string,
+ params: Record,
+ ) => Promise<{
+ content: Array<{ type: string; text: string }>;
+ details: Record;
+ }>;
+
+ beforeEach(async () => {
+ vi.resetModules();
+ const mockApi = {
+ registerTool: vi.fn((toolDef: { execute: typeof executeFn }) => {
+ executeFn = toolDef.execute;
+ }),
+ logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn() },
+ };
+ const cfg = {
+ workspaceRoot: "/tmp/test",
+ shellEnabled: true,
+ shellTimeout: 120000,
+ };
+ const { registerWebFetch } = await import("./web-fetch.js");
+ registerWebFetch(mockApi as never, cfg as never);
+ });
+
+ it("registers tool with correct name", () => {
+ expect(executeFn).toBeDefined();
+ });
+
+ it("rejects empty url", async () => {
+ await expect(executeFn("t1", {})).rejects.toThrow("url required");
+ await expect(executeFn("t2", { url: "" })).rejects.toThrow("url required");
+ await expect(executeFn("t3", { url: " " })).rejects.toThrow("url required");
+ });
+
+ it("rejects invalid url", async () => {
+ await expect(executeFn("t4", { url: "not a url here ::::" })).rejects.toThrow("Invalid URL");
+ });
+
+ it("blocks localhost URLs", async () => {
+ await expect(executeFn("t5", { url: "https://localhost/secret" })).rejects.toThrow(
+ "Blocked URL",
+ );
+ await expect(executeFn("t6", { url: "https://127.0.0.1/admin" })).rejects.toThrow(
+ "Blocked URL",
+ );
+ });
+
+ it("blocks metadata URLs", async () => {
+ await expect(executeFn("t7", { url: "https://169.254.169.254/latest" })).rejects.toThrow(
+ "Blocked URL",
+ );
+ await expect(executeFn("t8", { url: "https://metadata.google.internal/v1" })).rejects.toThrow(
+ "Blocked URL",
+ );
+ });
+
+ it("auto-upgrades http to https", async () => {
+ const globalFetch = globalThis.fetch;
+ globalThis.fetch = vi.fn().mockResolvedValue({
+ ok: true,
+ url: "https://example.com",
+ text: () => Promise.resolve("Test Hello"),
+ });
+ try {
+ const result = await executeFn("t9", { url: "http://example.com" });
+ expect((globalThis.fetch as ReturnType).mock.calls[0][0]).toBe(
+ "https://example.com",
+ );
+ expect(result.details.url).toBe("https://example.com");
+ } finally {
+ globalThis.fetch = globalFetch;
+ }
+ });
+
+ it("converts HTML to readable text", async () => {
+ const html = `Test Page
+ Header
+ Paragraph with bold and italic.
+ - Item 1
- Item 2
+ `;
+ const globalFetch = globalThis.fetch;
+ globalThis.fetch = vi.fn().mockResolvedValue({
+ ok: true,
+ url: "https://example.com",
+ text: () => Promise.resolve(html),
+ });
+ try {
+ const result = await executeFn("t10", { url: "https://example.com" });
+ const text = result.content[0].text;
+ expect(text).toContain("Title: Test Page");
+ expect(text).toContain("Header");
+ expect(text).toContain("**bold**");
+ expect(text).toContain("_italic_");
+ expect(text).toContain("- Item 1");
+ } finally {
+ globalThis.fetch = globalFetch;
+ }
+ });
+
+ it("includes prompt when provided", async () => {
+ const globalFetch = globalThis.fetch;
+ globalThis.fetch = vi.fn().mockResolvedValue({
+ ok: true,
+ url: "https://example.com",
+ text: () => Promise.resolve("plain text content"),
+ });
+ try {
+ const result = await executeFn("t11", {
+ url: "https://example.com",
+ prompt: "Extract the API docs",
+ });
+ expect(result.content[0].text).toContain("Prompt: Extract the API docs");
+ } finally {
+ globalThis.fetch = globalFetch;
+ }
+ });
+
+ it("truncates content at max_length", async () => {
+ const longContent = "A".repeat(100000);
+ const globalFetch = globalThis.fetch;
+ globalThis.fetch = vi.fn().mockResolvedValue({
+ ok: true,
+ url: "https://example.com",
+ text: () => Promise.resolve(longContent),
+ });
+ try {
+ const result = await executeFn("t12", { url: "https://example.com", max_length: 5000 });
+ expect(result.content[0].text).toContain("[Content truncated]");
+ expect(result.details.truncated).toBe(true);
+ } finally {
+ globalThis.fetch = globalFetch;
+ }
+ });
+
+ it("handles HTTP errors gracefully", async () => {
+ const globalFetch = globalThis.fetch;
+ globalThis.fetch = vi.fn().mockResolvedValue({
+ ok: false,
+ status: 404,
+ statusText: "Not Found",
+ });
+ try {
+ const result = await executeFn("t13", { url: "https://example.com/missing" });
+ expect(result.content[0].text).toContain("404");
+ expect(result.details.status).toBe(404);
+ } finally {
+ globalThis.fetch = globalFetch;
+ }
+ });
+
+ it("handles fetch failures gracefully", async () => {
+ const globalFetch = globalThis.fetch;
+ globalThis.fetch = vi.fn().mockRejectedValue(new Error("ECONNREFUSED"));
+ try {
+ const result = await executeFn("t14", { url: "https://unreachable.example.com" });
+ expect(result.content[0].text).toContain("Fetch failed");
+ expect(result.content[0].text).toContain("ECONNREFUSED");
+ } finally {
+ globalThis.fetch = globalFetch;
+ }
+ });
+
+ it("adds https:// prefix when missing", async () => {
+ const globalFetch = globalThis.fetch;
+ globalThis.fetch = vi.fn().mockResolvedValue({
+ ok: true,
+ url: "https://example.com",
+ text: () => Promise.resolve("OK"),
+ });
+ try {
+ await executeFn("t15", { url: "example.com" });
+ expect((globalThis.fetch as ReturnType).mock.calls[0][0]).toBe(
+ "https://example.com",
+ );
+ } finally {
+ globalThis.fetch = globalFetch;
+ }
+ });
+
+ it("strips script and style tags from HTML", async () => {
+ const html = `
+ Clean content
`;
+ const globalFetch = globalThis.fetch;
+ globalThis.fetch = vi.fn().mockResolvedValue({
+ ok: true,
+ url: "https://example.com",
+ text: () => Promise.resolve(html),
+ });
+ try {
+ const result = await executeFn("t16", { url: "https://example.com" });
+ const text = result.content[0].text;
+ expect(text).not.toContain("alert");
+ expect(text).not.toContain("color:red");
+ expect(text).toContain("Clean content");
+ } finally {
+ globalThis.fetch = globalFetch;
+ }
+ });
+});
diff --git a/extensions/code-tools/tools/web-fetch.ts b/extensions/code-tools/tools/web-fetch.ts
new file mode 100644
index 00000000..5cd4e7a4
--- /dev/null
+++ b/extensions/code-tools/tools/web-fetch.ts
@@ -0,0 +1,215 @@
+/**
+ * code_web_fetch tool — Fetch a URL and return its content as text.
+ *
+ * HTML pages are converted to readable text using a lightweight built-in converter.
+ * Includes SSRF protection (blocks private/internal addresses) and auto-upgrades
+ * HTTP to HTTPS.
+ */
+
+import { Type } from "@sinclair/typebox";
+import type { MayrosPluginApi } from "mayros/plugin-sdk";
+import { ToolInputError } from "../../../src/agents/tools/common.js";
+import type { CodeToolsConfig } from "../config.js";
+
+// ============================================================================
+// Lightweight HTML-to-text conversion (no external dependency)
+// ============================================================================
+
+function htmlToText(html: string): string {
+ let text = html;
+ // Remove script and style blocks
+ text = text.replace(/