diff --git a/packages/software-factory/.agents/skills/boxel-development/SKILL.md b/.agents/skills/boxel-development/SKILL.md similarity index 100% rename from packages/software-factory/.agents/skills/boxel-development/SKILL.md rename to .agents/skills/boxel-development/SKILL.md diff --git a/packages/software-factory/.agents/skills/boxel-development/references/dev-command-development.md b/.agents/skills/boxel-development/references/dev-command-development.md similarity index 100% rename from packages/software-factory/.agents/skills/boxel-development/references/dev-command-development.md rename to .agents/skills/boxel-development/references/dev-command-development.md diff --git a/packages/software-factory/.agents/skills/boxel-development/references/dev-core-concept.md b/.agents/skills/boxel-development/references/dev-core-concept.md similarity index 100% rename from packages/software-factory/.agents/skills/boxel-development/references/dev-core-concept.md rename to .agents/skills/boxel-development/references/dev-core-concept.md diff --git a/packages/software-factory/.agents/skills/boxel-development/references/dev-core-patterns.md b/.agents/skills/boxel-development/references/dev-core-patterns.md similarity index 100% rename from packages/software-factory/.agents/skills/boxel-development/references/dev-core-patterns.md rename to .agents/skills/boxel-development/references/dev-core-patterns.md diff --git a/packages/software-factory/.agents/skills/boxel-development/references/dev-data-management.md b/.agents/skills/boxel-development/references/dev-data-management.md similarity index 100% rename from packages/software-factory/.agents/skills/boxel-development/references/dev-data-management.md rename to .agents/skills/boxel-development/references/dev-data-management.md diff --git a/packages/software-factory/.agents/skills/boxel-development/references/dev-defensive-programming.md b/.agents/skills/boxel-development/references/dev-defensive-programming.md similarity index 100% rename from packages/software-factory/.agents/skills/boxel-development/references/dev-defensive-programming.md rename to .agents/skills/boxel-development/references/dev-defensive-programming.md diff --git a/packages/software-factory/.agents/skills/boxel-development/references/dev-delegated-rendering.md b/.agents/skills/boxel-development/references/dev-delegated-rendering.md similarity index 100% rename from packages/software-factory/.agents/skills/boxel-development/references/dev-delegated-rendering.md rename to .agents/skills/boxel-development/references/dev-delegated-rendering.md diff --git a/packages/software-factory/.agents/skills/boxel-development/references/dev-enumerations.md b/.agents/skills/boxel-development/references/dev-enumerations.md similarity index 100% rename from packages/software-factory/.agents/skills/boxel-development/references/dev-enumerations.md rename to .agents/skills/boxel-development/references/dev-enumerations.md diff --git a/packages/software-factory/.agents/skills/boxel-development/references/dev-external-libraries.md b/.agents/skills/boxel-development/references/dev-external-libraries.md similarity index 100% rename from packages/software-factory/.agents/skills/boxel-development/references/dev-external-libraries.md rename to .agents/skills/boxel-development/references/dev-external-libraries.md diff --git a/packages/software-factory/.agents/skills/boxel-development/references/dev-file-def.md b/.agents/skills/boxel-development/references/dev-file-def.md similarity index 100% rename from packages/software-factory/.agents/skills/boxel-development/references/dev-file-def.md rename to .agents/skills/boxel-development/references/dev-file-def.md diff --git a/packages/software-factory/.agents/skills/boxel-development/references/dev-file-editing.md b/.agents/skills/boxel-development/references/dev-file-editing.md similarity index 100% rename from packages/software-factory/.agents/skills/boxel-development/references/dev-file-editing.md rename to .agents/skills/boxel-development/references/dev-file-editing.md diff --git a/packages/software-factory/.agents/skills/boxel-development/references/dev-fitted-formats.md b/.agents/skills/boxel-development/references/dev-fitted-formats.md similarity index 100% rename from packages/software-factory/.agents/skills/boxel-development/references/dev-fitted-formats.md rename to .agents/skills/boxel-development/references/dev-fitted-formats.md diff --git a/packages/software-factory/.agents/skills/boxel-development/references/dev-query-systems.md b/.agents/skills/boxel-development/references/dev-query-systems.md similarity index 100% rename from packages/software-factory/.agents/skills/boxel-development/references/dev-query-systems.md rename to .agents/skills/boxel-development/references/dev-query-systems.md diff --git a/packages/software-factory/.agents/skills/boxel-development/references/dev-quick-reference.md b/.agents/skills/boxel-development/references/dev-quick-reference.md similarity index 100% rename from packages/software-factory/.agents/skills/boxel-development/references/dev-quick-reference.md rename to .agents/skills/boxel-development/references/dev-quick-reference.md diff --git a/packages/software-factory/.agents/skills/boxel-development/references/dev-qunit-testing.md b/.agents/skills/boxel-development/references/dev-qunit-testing.md similarity index 94% rename from packages/software-factory/.agents/skills/boxel-development/references/dev-qunit-testing.md rename to .agents/skills/boxel-development/references/dev-qunit-testing.md index 72671ed3fd9..8ca7abca0da 100644 --- a/packages/software-factory/.agents/skills/boxel-development/references/dev-qunit-testing.md +++ b/.agents/skills/boxel-development/references/dev-qunit-testing.md @@ -134,7 +134,7 @@ Add `data-test-*` attributes to card templates for stable test selectors: When tests fail, the orchestrator feeds test failure details back to the agent. For more detail: -- **TestRun cards** live in the target realm's `Validations/` folder with a `test_` prefix (e.g., `Validations/test_issue-slug-1.json`). To find all test runs, search by the TestRun card type in the target realm. Each TestRun has a `sequenceNumber` that increases with each iteration. Use `read_file` on a specific TestRun for full details. +- **TestRun cards** live in the target realm's `Validations/` folder with a `test_` prefix (e.g., `Validations/test_issue-slug-1.json`). To find all test runs, run `Glob` over `Validations/test_*.json` or shell out via `Bash` to `boxel search --realm ` filtered on the TestRun card type. Each TestRun has a `sequenceNumber` that increases with each iteration. Use native `Read` on a specific TestRun for full details — paths are workspace-relative. ## Rules diff --git a/packages/software-factory/.agents/skills/boxel-development/references/dev-replicate-ai.md b/.agents/skills/boxel-development/references/dev-replicate-ai.md similarity index 100% rename from packages/software-factory/.agents/skills/boxel-development/references/dev-replicate-ai.md rename to .agents/skills/boxel-development/references/dev-replicate-ai.md diff --git a/packages/software-factory/.agents/skills/boxel-development/references/dev-spec-usage.md b/.agents/skills/boxel-development/references/dev-spec-usage.md similarity index 73% rename from packages/software-factory/.agents/skills/boxel-development/references/dev-spec-usage.md rename to .agents/skills/boxel-development/references/dev-spec-usage.md index 4f19b8ac335..2921bd60cc7 100644 --- a/packages/software-factory/.agents/skills/boxel-development/references/dev-spec-usage.md +++ b/.agents/skills/boxel-development/references/dev-spec-usage.md @@ -1,24 +1,52 @@ # Catalog Spec Card Instances -For each top-level card definition, create a Catalog Spec card instance in the target realm's `Spec/` folder using the `create_catalog_spec` tool. This makes the card discoverable in the Boxel catalog. +For each top-level card definition, write a Catalog Spec card instance in the target realm's `Spec/` folder. This makes the card discoverable in the Boxel catalog. -The `create_catalog_spec` tool has the authoritative JSON schema for Spec card fields — use its parameter definitions to know which attributes and relationships are available. The tool auto-constructs the document with the correct `adoptsFrom` (`https://cardstack.com/base/spec#Spec`). +Specs adopt from the `Spec` class exported by `https://cardstack.com/base/spec` — that module lives in the base realm, not your target realm. Fetch the authoritative schema by calling the `get_card_schema` factory tool: -## Usage +``` +get_card_schema({ module: 'https://cardstack.com/base/spec', name: 'Spec' }) +``` -Use the `create_catalog_spec` tool to create a Spec card. The tool's parameters define the available fields dynamically from the card definition — consult the tool schema for the exact field names and types. +The result gives you the exact `attributes` and `relationships` shape. Write the JSON file with native `Write` (paths are workspace-relative, e.g. `Spec/sticky-note.json`); `boxel sync` pushes it to the realm between iterations. + +## Required Shape + +```json +{ + "data": { + "type": "card", + "attributes": { + "specType": "card", + "ref": { "module": "../sticky-note", "name": "StickyNote" }, + "readMe": "...", + "cardInfo": { "name": "Sticky Note", "summary": "..." } + }, + "relationships": { + "linkedExamples.0": { "links": { "self": "../StickyNote/welcome-note" } } + }, + "meta": { + "adoptsFrom": { + "module": "https://cardstack.com/base/spec", + "name": "Spec" + } + } + } +} +``` Key concepts: - `ref` — a CodeRef pointing to the card definition (module path + exported class name). The module path is relative from the Spec card to the `.gts` file (e.g., `../sticky-note` from `Spec/sticky-note.json`). - `specType` — `"card"` for CardDef, `"field"` for FieldDef, `"component"` for standalone components. -- `linkedExamples` — a relationship pointing to sample card instances. Create at least one sample instance and link it here. +- `linkedExamples` — a `linksToMany` relationship pointing to sample card instances. Use dotted keys (`linkedExamples.0`, `linkedExamples.1`, …) — the array form is rejected by the indexer. Create at least one sample instance and link it here. +- **Do NOT call `run_instantiate` on the Spec file itself.** Spec's module lives in the base realm; the prerender enforces same-origin module loads and the call always fails. To validate Specs, call `run_instantiate` WITHOUT a `path`; it discovers Specs in the target realm and exercises their `linkedExamples` against the card classes you wrote. ## Sample Card Instances Create at least one sample instance with realistic data for each top-level card. Sample instances serve as both catalog examples and test fixtures. -Place sample instances in a folder named after the card type (e.g., `StickyNote/welcome-note.json`). Use `write_file` to create them. The `linkedExamples` relationship in the Spec card points to these using a relative path (e.g., `../StickyNote/welcome-note`). +Place sample instances in a folder named after the card type (e.g., `StickyNote/welcome-note.json`) and write them with native `Write`. The `linkedExamples` relationship in the Spec card points to these using a relative path without the `.json` suffix (e.g., `../StickyNote/welcome-note`). --- diff --git a/packages/software-factory/.agents/skills/boxel-development/references/dev-styling-design.md b/.agents/skills/boxel-development/references/dev-styling-design.md similarity index 100% rename from packages/software-factory/.agents/skills/boxel-development/references/dev-styling-design.md rename to .agents/skills/boxel-development/references/dev-styling-design.md diff --git a/packages/software-factory/.agents/skills/boxel-development/references/dev-technical-rules.md b/.agents/skills/boxel-development/references/dev-technical-rules.md similarity index 100% rename from packages/software-factory/.agents/skills/boxel-development/references/dev-technical-rules.md rename to .agents/skills/boxel-development/references/dev-technical-rules.md diff --git a/packages/software-factory/.agents/skills/boxel-development/references/dev-template-patterns.md b/.agents/skills/boxel-development/references/dev-template-patterns.md similarity index 100% rename from packages/software-factory/.agents/skills/boxel-development/references/dev-template-patterns.md rename to .agents/skills/boxel-development/references/dev-template-patterns.md diff --git a/packages/software-factory/.agents/skills/boxel-development/references/dev-theme-design-system.md b/.agents/skills/boxel-development/references/dev-theme-design-system.md similarity index 100% rename from packages/software-factory/.agents/skills/boxel-development/references/dev-theme-design-system.md rename to .agents/skills/boxel-development/references/dev-theme-design-system.md diff --git a/packages/boxel-cli/plugin/.claude-plugin/plugin.json b/packages/boxel-cli/plugin/.claude-plugin/plugin.json index c49a93966a9..16d696e03d7 100644 --- a/packages/boxel-cli/plugin/.claude-plugin/plugin.json +++ b/packages/boxel-cli/plugin/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "boxel-cli", "description": "Claude Code skills for working with Boxel realms via @cardstack/boxel-cli. Requires @cardstack/boxel-cli >= 0.0.1 installed on PATH (npm install -g @cardstack/boxel-cli).", - "version": "0.1.1", + "version": "0.1.3", "author": { "name": "Cardstack", "url": "https://boxel.ai" diff --git a/packages/boxel-cli/plugin/skills/boxel-api/SKILL.md b/packages/boxel-cli/plugin/skills/boxel-api/SKILL.md new file mode 100644 index 00000000000..ecdd16b8815 --- /dev/null +++ b/packages/boxel-cli/plugin/skills/boxel-api/SKILL.md @@ -0,0 +1,182 @@ +--- +name: boxel-api +description: Use when calling Boxel realm-server APIs from code — primarily federated search across realms. Documents the boxel-cli programmatic surface (`BoxelCLIClient`) and the matching CLI commands. Read this whenever you need to query a realm's index. +--- + +# Boxel API + +Canonical home for Boxel platform API knowledge. + +**Architectural principle:** boxel-cli owns the entire Boxel API surface. Any code that talks to the realm server or Matrix lives in boxel-cli; consumers (the software-factory, custom scripts, hand-written tools) import `BoxelCLIClient` from `@cardstack/boxel-cli/api` and call its methods. Auth (tokens, refresh, retries) is fully internal — if you're holding a JWT or calling `fetch` against a realm URL directly, you're in the wrong layer. + +```ts +import { BoxelCLIClient } from '@cardstack/boxel-cli/api'; + +let client = new BoxelCLIClient(); // reads the active Boxel profile +``` + +All examples below assume `client` is a `BoxelCLIClient` instance. + +## Federated search + +Search across one or more realms via `/_federated-search`. Query syntax matches the Boxel realm search format. + +### CLI + +``` +boxel search --realm [--realm ...] --query '' +``` + +`--realm` is repeatable. `--query` takes a JSON string. Append `--json` for raw output. + +### Programmatic + +```ts +let result = await client.search(realmUrl, query); // single realm +let result = await client.search([realmA, realmB], query); // federated +``` + +Returns `{ ok, status, data?, error? }`. `data` is an array of card resources. + +### Query syntax + +```json +{ + "filter": { ... }, + "sort": [ ... ], + "page": { "size": 10 } +} +``` + +All top-level fields are optional. An empty query `{}` returns all cards in the targeted realms. + +#### Filter by card type + +```json +{ + "filter": { + "type": { + "module": "http://localhost:4201/software-factory/darkfactory", + "name": "Project" + } + } +} +``` + +Returns all cards that adopt from (or extend) the specified type. Wildcards (`*`) in `module` or `name` are **not** supported — always use a specific CodeRef. + +#### Filter by field value (`eq`) + +`eq` requires an `on` to scope the field to a card type: + +```json +{ + "filter": { + "on": { "module": "...", "name": "Issue" }, + "eq": { "status": "in_progress" } + } +} +``` + +Multiple fields in `eq` are ANDed. Use dot paths for nested fields (e.g. `"author.firstName": "Carl"`). Use `null` to match empty/missing fields. + +#### Substring search (`contains`) + +Case-insensitive substring match: + +```json +{ "filter": { "contains": { "cardTitle": "sticky" } } } +``` + +Scoped form same as `eq` (`on` + `contains`). + +#### Range filters + +```json +{ + "filter": { + "on": { "module": "...", "name": "Post" }, + "range": { "views": { "lte": 10, "gt": 5 } } + } +} +``` + +Operators: `gt`, `gte`, `lt`, `lte`. Works on numeric, date, and string fields. + +#### Boolean combinators + +```json +// AND +{ "filter": { "on": {...}, "every": [ {...}, {...} ] } } + +// OR +{ "filter": { "any": [ {...}, {...} ] } } + +// NOT +{ "filter": { "on": {...}, "not": { "eq": { ... } } } } +``` + +#### Sort + +```json +{ + "sort": [ + { "by": "author.lastName", "on": { "module": "...", "name": "Article" } } + ], + "filter": { "type": { "module": "...", "name": "Article" } } +} +``` + +Add `"direction": "desc"` for descending. + +#### Pagination + +```json +{ "filter": {...}, "page": { "size": 10 } } +``` + +#### CodeRef field matching + +CodeRef fields (e.g. `ref` on a Spec card) are matched against the full `{ module, name }`: + +```json +{ + "filter": { + "on": { "module": "https://cardstack.com/base/spec", "name": "Spec" }, + "eq": { + "ref": { + "module": "http://localhost:4201/my-realm/sticky-note", + "name": "StickyNote" + } + } + } +} +``` + +### Common mistakes + +- **Field names without `on`.** Fields like `title`, `status`, etc. are type-specific. The exceptions are `cardTitle` and `cardDescription` — those exist on the base `CardDef`. +- **Relative or bare module URLs.** Always use full absolute module URLs in CodeRefs. +- **Slash separators in dotted paths.** Use `author.firstName`, not `author/firstName`. +- **Searching relationships that aren't rendered in an embedded/fitted template.** The query engine indexes a linked field only if it appears in an embedded format. Otherwise the filter silently misses. + +## When to use what + +| Goal | Use | +| -------------------------------------------------------- | ------------------------------------------------------------------ | +| Find cards in your local synced workspace | Native `grep` / `find` — files are already on disk | +| Find cards by type / field across one or more realms | `boxel search` / `client.search` | +| Read a single card's source from a realm | `client.read(realmUrl, path)` / `boxel file read` | +| Read the transpiled (browser) version of a `.gts` module | `client.readTranspiled(...)` / `boxel read-transpiled` | +| List files in a realm | `client.listFiles(realmUrl)` / `boxel file list` | +| Push local changes to a realm | `client.sync(realmUrl, dir, { preferLocal: true })` / `boxel sync` | +| Pull a realm's state to a local dir | `client.pull(realmUrl, dir)` / `boxel pull` | +| Run a host command (prerendered) | See the `boxel-command` skill | + +## What this skill is **not** for + +- **Card development patterns** (`.gts` field declarations, templates, `linksTo` vs `contains`) — that's `boxel-development`. +- **JSON:API document structure** for card instances — that's `boxel-file-structure`. +- **Sync / pull / track / watch CLI ergonomics** — those have their own per-command skills (`boxel-sync`, `boxel-track`, `boxel-watch`). +- **Host commands via the prerenderer** (`/_run-command`) — that's the `boxel-command` skill. +- **Realm provisioning** (`createRealm` / `boxel realm create`) and **readiness polling** (`client.waitForReady` / `/_readiness-check`) — orchestration concerns. The software-factory creates target realms in `factory-target-realm.ts` before the agent loop starts; consumers needing those APIs should read `boxel-cli/src/api.ts` directly or run `boxel realm create --help`. diff --git a/packages/boxel-cli/plugin/skills/boxel-command/SKILL.md b/packages/boxel-cli/plugin/skills/boxel-command/SKILL.md new file mode 100644 index 00000000000..171af2b3d73 --- /dev/null +++ b/packages/boxel-cli/plugin/skills/boxel-command/SKILL.md @@ -0,0 +1,71 @@ +--- +name: boxel-command +description: Use when running a Boxel host command via the realm server's prerenderer — invoking commands like `get-card-type-schema`, `evaluate-module`, `instantiate-card`, or any other module exposed at `@cardstack/boxel-host/commands/.../default`. Documents `boxel run-command` and the matching `client.runCommand()` method. +--- + +# Boxel Host Commands + +Some Boxel operations only exist inside the host app's prerendered runtime — there's no realm-server HTTP endpoint for them, and they can't be reimplemented in plain Node. The realm server's `/_run-command` endpoint forwards a job to the prerenderer (a headless Chrome instance that has the full host runtime loaded), executes the named command there, and returns the serialized result. Schema introspection, module evaluation, card instantiation, transpiled-module fetches — all of these go through `run-command`. + +This skill documents how to invoke that flow. + +## When to use it + +- **Card type schema lookup.** Get the live `{ attributes, relationships }` JSON Schema for a `CardDef` by introspecting its real class at runtime — not by reading the `.gts` source. +- **Module evaluation.** Load a `.gts` / `.ts` module in the prerender sandbox to surface broken imports, circular references, or top-level runtime errors before they hit a real consumer. +- **Card instantiation.** Construct a card instance from a JSON document inside the prerender — exercises the `CardDef` class against the document shape. +- **Anything else exposed at `@cardstack/boxel-host/commands//default`.** Each module is its own host command. + +## CLI + +``` +boxel run-command --realm [--input ''] [--json] +``` + +- `` — the module path of the command (e.g. `@cardstack/boxel-host/commands/get-card-type-schema/default`). +- `--realm` — the realm URL the command runs against. Required. +- `--input` — JSON string passed as the command's input. Optional; some commands take no input. +- `--json` — emit the raw response instead of the formatted summary. + +### Example + +``` +boxel run-command @cardstack/boxel-host/commands/get-card-type-schema/default \ + --realm http://localhost:4201/my-realm/ \ + --input '{"codeRef":{"module":"http://localhost:4201/my-realm/sticky-note","name":"StickyNote"}}' +``` + +## Programmatic + +```ts +import { BoxelCLIClient } from '@cardstack/boxel-cli/api'; + +let client = new BoxelCLIClient(); + +let result = await client.runCommand( + realmServerUrl, + realmUrl, + '@cardstack/boxel-host/commands/get-card-type-schema/default', + { codeRef: { module: '', name: 'StickyNote' } }, +); +``` + +Returns `{ status: 'ready' | 'error' | 'unusable', result?: string | null, error?: string | null }`. `result` is the command's serialized output (a JSON string — parse it yourself). `error` is set when `status !== 'ready'`. + +## How it works under the hood + +`/_run-command` enqueues a job for the realm worker. The worker hands it to the prerenderer (which has the host app, the realm's Loader, the CardAPI, and all field serializers loaded). The command module is imported, called with the input, and its result is serialized back through the queue to the HTTP response. + +Three failure modes you'll see: + +- `status: 'unusable'` — the prerender pool is broken (e.g. "No standby page available for prerender"). Not retryable from the caller's side; usually a sign the realm-server worker / prerender pool itself is unhealthy. +- `status: 'error'` with `error: "module URL not found"` — the realm's in-memory module map hasn't indexed the file yet. Common right after a `/_atomic` write; caller can retry briefly or use `client.sync(..., { waitForIndex: true })` upstream. +- `status: 'error'` with any other message — the command threw inside the prerender. The `error` is the thrown error's message; the original stack is usually in the worker logs. + +The realm server itself enforces auth (server JWT via `BoxelCLIClient`); the prerender executes inside the realm's sandbox with the realm's permissions. + +## What this skill is **not** for + +- **Realm-side HTTP endpoints** (search, file read/write, atomic batches) — those are direct `BoxelCLIClient` methods. See the `boxel-api` skill. +- **Programmatic in-memory validators** (`runLintInMemory`, `runEvaluateInMemory`, `runParseInMemory`, `runInstantiateInMemory`) — those wrap `runCommand` internally but expose a flatter result shape; consumers usually want those, not raw `runCommand`. They live in the software-factory package. +- **Defining new host commands.** That's host-app development (`packages/host/app/commands/`). diff --git a/packages/boxel-cli/plugin/skills/realm-sync/SKILL.md b/packages/boxel-cli/plugin/skills/realm-sync/SKILL.md index fc28c2eef7b..9b75bef04a8 100644 --- a/packages/boxel-cli/plugin/skills/realm-sync/SKILL.md +++ b/packages/boxel-cli/plugin/skills/realm-sync/SKILL.md @@ -66,20 +66,9 @@ Bidirectional sync between a local directory and a Boxel realm - `--dry-run` — Preview without making changes - `--realm-secret-seed` — Administrative auth: prompt for a realm secret seed and mint a JWT locally instead of using a Matrix profile (env: BOXEL_REALM_SECRET_SEED) -### `boxel realm watch ` +### `boxel realm watch` -Watch a Boxel realm for server-side changes and pull them into a local directory - -**Arguments:** - -- `` — The URL of the realm to watch (e.g., https://app.boxel.ai/demo/) -- `` — The local directory to write changes into - -**Options:** - -- `-i, --interval ` — Polling interval in seconds -- `-d, --debounce ` — Seconds to wait after a burst of changes before applying them -- `--realm-secret-seed` — Administrative auth: prompt for a realm secret seed and mint a JWT locally instead of using a Matrix profile (env: BOXEL_REALM_SECRET_SEED) +Watch a Boxel realm; subcommands manage watch processes ### `boxel realm push ` diff --git a/packages/boxel-cli/src/commands/file/index.ts b/packages/boxel-cli/src/commands/file/index.ts index 3ba7f11e76b..eecb960c831 100644 --- a/packages/boxel-cli/src/commands/file/index.ts +++ b/packages/boxel-cli/src/commands/file/index.ts @@ -9,7 +9,7 @@ import { registerWriteCommand } from './write'; export function registerFileCommand(program: Command): void { let file = program .command('file') - .description('Read, write, search, and manage files in a realm'); + .description('Read, write, and manage files in a realm'); registerDeleteCommand(file); registerListCommand(file); diff --git a/packages/boxel-cli/src/lib/boxel-cli-client.ts b/packages/boxel-cli/src/lib/boxel-cli-client.ts index 2ede7e9376a..34a4ad7df06 100644 --- a/packages/boxel-cli/src/lib/boxel-cli-client.ts +++ b/packages/boxel-cli/src/lib/boxel-cli-client.ts @@ -450,6 +450,18 @@ export class BoxelCLIClient { return this.pm.authedRealmServerFetch(input, init); } + /** + * Return the realm-server JWT, fetching one via Matrix login if no token + * is cached. Use only when you need to hand the bare token to a downstream + * client that can't go through `authedServerFetch` (e.g. opencode's + * static-Authorization provider config). Prefer `authedServerFetch` for + * server endpoints called from JS — it handles per-request 401 retries + * that this getter cannot. + */ + async getServerToken(): Promise { + return this.pm.getOrRefreshServerToken(); + } + async pull( realmUrl: string, localDir: string, diff --git a/packages/boxel-cli/src/lib/profile-manager.ts b/packages/boxel-cli/src/lib/profile-manager.ts index 0d7296666eb..2c9a87ce614 100644 --- a/packages/boxel-cli/src/lib/profile-manager.ts +++ b/packages/boxel-cli/src/lib/profile-manager.ts @@ -1,6 +1,7 @@ import * as fs from 'fs'; import * as path from 'path'; import * as os from 'os'; +import jwt from 'jsonwebtoken'; import { FG_YELLOW, FG_CYAN, FG_MAGENTA, DIM, BOLD, RESET } from './colors'; import { matrixLogin, @@ -16,6 +17,31 @@ import type { RealmAuthenticator } from './realm-authenticator'; const DEFAULT_CONFIG_DIR = path.join(os.homedir(), '.boxel-cli'); const PROFILES_FILENAME = 'profiles.json'; +/** + * Tokens issued by the realm server carry a 7-day TTL. Re-mint when + * there's less than a day left so a long-running operation (or a + * downstream consumer that bakes the token into a static header, like + * opencode's passthrough provider) doesn't get a 401 mid-flight. + * + * Decode-only — we don't verify the signature; the realm server does + * that on every request. We only care about the `exp` claim. + */ +const SERVER_TOKEN_EXPIRY_SAFETY_MARGIN_SEC = 86400; // 1 day + +function isJwtNearExpiry( + token: string, + safetyMarginSec = SERVER_TOKEN_EXPIRY_SAFETY_MARGIN_SEC, +): boolean { + // Tokens are cached verbatim from the realm server's `Authorization` + // response header, so they're prefixed with `Bearer ` — strip it before + // decoding or jsonwebtoken returns null and we'd refresh on every call. + let raw = token.replace(/^Bearer\s+/i, ''); + let decoded = jwt.decode(raw) as { exp?: number } | null; + if (!decoded?.exp) return true; // unparseable / missing exp → treat as expired + let nowSec = Math.floor(Date.now() / 1000); + return decoded.exp - nowSec < safetyMarginSec; +} + export const NO_ACTIVE_PROFILE_ERROR = 'No active profile. Run `boxel profile add` to create one.'; @@ -371,7 +397,7 @@ export class ProfileManager implements RealmAuthenticator { async getOrRefreshServerToken(): Promise { let cached = this.getRealmServerToken(); - if (cached) { + if (cached && !isJwtNearExpiry(cached)) { return cached; } let matrixAuth = await this.loginToMatrix(); diff --git a/packages/realm-server/handlers/handle-openrouter-passthrough.ts b/packages/realm-server/handlers/handle-openrouter-passthrough.ts new file mode 100644 index 00000000000..d3267e2a996 --- /dev/null +++ b/packages/realm-server/handlers/handle-openrouter-passthrough.ts @@ -0,0 +1,178 @@ +import type Koa from 'koa'; +import type { DBAdapter } from '@cardstack/runtime-common'; +import { logger, SupportedMimeType } from '@cardstack/runtime-common'; +import * as Sentry from '@sentry/node'; + +import { AllowedProxyDestinations } from '../lib/allowed-proxy-destinations'; +import { + awaitPendingCost, + handleStreamingRequest, + trackCostDeduction, +} from '../lib/proxy-forward'; +import { + fetchRequestFromContext, + sendResponseForBadRequest, + sendResponseForForbiddenRequest, + sendResponseForSystemError, + setContextResponse, +} from '../middleware'; + +const log = logger('openrouter-passthrough'); + +const OPENROUTER_CHAT_URL = 'https://openrouter.ai/api/v1/chat/completions'; + +/** + * OpenAI-compatible passthrough to OpenRouter chat completions. + * + * Unlike `/_request-forward` (which expects a `{ url, method, requestBody }` + * envelope and exists to proxy arbitrary whitelisted destinations), this + * endpoint accepts a verbatim OpenAI chat-completions body and pins the + * upstream destination to `OPENROUTER_CHAT_URL` server-side, so an + * OpenAI-compatible client (e.g. software-factory's opencode backend) + * can point its `baseURL` straight at the realm server. + * + * Auth: the realm-server JWT (via `jwtMiddleware`). The static + * `Authorization` header AI-SDK clients stamp onto every request goes + * here; we never expose the OpenRouter API key to the caller. + * + * Streaming: driven by `stream: true` inside the OpenAI body — the caller + * does not pass it as a query string. + * + * Credit accounting / streaming framing is shared with `_request-forward` + * via `lib/proxy-forward`, so per-user cost-deduction ordering is preserved + * across both endpoints. + */ +export default function handleOpenRouterPassthrough({ + dbAdapter, +}: { + dbAdapter: DBAdapter; +}) { + return async function (ctxt: Koa.Context, _next: Koa.Next) { + try { + const token = ctxt.state.token; + if (!token) { + await sendResponseForForbiddenRequest( + ctxt, + 'Token is required to forward requests', + ); + return; + } + const { user: matrixUserId } = token; + + const request = await fetchRequestFromContext(ctxt); + const rawBody = await request.text(); + let openAIBody: Record; + try { + openAIBody = JSON.parse(rawBody); + } catch { + await sendResponseForBadRequest(ctxt, 'Request body is not valid JSON'); + return; + } + if ( + typeof openAIBody !== 'object' || + openAIBody === null || + Array.isArray(openAIBody) + ) { + await sendResponseForBadRequest( + ctxt, + 'Request body must be a JSON object', + ); + return; + } + const isStreaming = openAIBody.stream === true; + + const destinationsConfig = + AllowedProxyDestinations.getInstance(dbAdapter); + const destinationConfig = + await destinationsConfig.getDestinationConfig(OPENROUTER_CHAT_URL); + if (!destinationConfig) { + // Misconfiguration on the server side — OpenRouter must be in the + // proxy_endpoints whitelist for this endpoint to function. + await sendResponseForSystemError( + ctxt, + 'OpenRouter passthrough is not configured on this realm server', + ); + return; + } + + try { + await awaitPendingCost(matrixUserId); + } catch (e) { + log.error('Error waiting for pending cost:', e); + await sendResponseForSystemError( + ctxt, + 'There was an error saving your Boxel credits usage. Try again or contact support if the problem persists.', + ); + return; + } + + const creditValidation = + await destinationConfig.creditStrategy.validateCredits( + dbAdapter, + matrixUserId, + ); + if (!creditValidation.hasEnoughCredits) { + await sendResponseForForbiddenRequest( + ctxt, + creditValidation.errorMessage || 'Insufficient credits', + ); + return; + } + + const headers: Record = { + 'Content-Type': 'application/json', + Authorization: `Bearer ${destinationConfig.apiKey}`, + }; + const finalBody = JSON.stringify(openAIBody); + + if (isStreaming) { + if (!destinationConfig.supportsStreaming) { + await sendResponseForBadRequest( + ctxt, + 'Streaming is not supported for the OpenRouter passthrough', + ); + return; + } + await handleStreamingRequest( + ctxt, + OPENROUTER_CHAT_URL, + 'POST', + headers, + finalBody, + destinationConfig, + dbAdapter, + matrixUserId, + ); + return; + } + + const externalResponse = await globalThis.fetch(OPENROUTER_CHAT_URL, { + method: 'POST', + headers, + body: finalBody, + }); + const responseData = await externalResponse.json(); + + trackCostDeduction( + destinationConfig, + dbAdapter, + matrixUserId, + responseData, + ); + + const response = new Response(JSON.stringify(responseData), { + status: externalResponse.status, + statusText: externalResponse.statusText, + headers: { 'content-type': SupportedMimeType.JSON }, + }); + await setContextResponse(ctxt, response); + } catch (error) { + log.error('Error in openrouter-passthrough handler:', error); + Sentry.captureException(error); + await sendResponseForSystemError( + ctxt, + 'An error occurred while processing the request', + ); + } + }; +} diff --git a/packages/realm-server/handlers/handle-request-forward.ts b/packages/realm-server/handlers/handle-request-forward.ts index 794c61853a4..054b7a3c447 100644 --- a/packages/realm-server/handlers/handle-request-forward.ts +++ b/packages/realm-server/handlers/handle-request-forward.ts @@ -9,190 +9,15 @@ import { fetchRequestFromContext, } from '../middleware'; import { AllowedProxyDestinations } from '../lib/allowed-proxy-destinations'; +import { + awaitPendingCost, + handleStreamingRequest, + trackCostDeduction, +} from '../lib/proxy-forward'; import * as Sentry from '@sentry/node'; const log = logger('request-forward'); -// Track pending cost-saving promises per user so we can ensure the previous -// request's cost has been recorded before allowing a new one -const pendingCostPromises = new Map>(); - -async function handleStreamingRequest( - ctxt: Koa.Context, - url: string, - method: string, - headers: Record, - requestBody: BodyInit | undefined, - endpointConfig: any, - dbAdapter: DBAdapter, - matrixUserId: string, -) { - try { - setupSSEHeaders(ctxt); - - const fetchInit: RequestInit = { - method, - headers, - }; - if (requestBody !== undefined) { - fetchInit.body = requestBody; - } - - const externalResponse = await fetch(url, fetchInit); - - ctxt.res.write(': connected\n\n'); - - if (!externalResponse.ok) { - const errorData = await externalResponse.text(); - log.error( - `Streaming request failed: ${externalResponse.status} - ${errorData}`, - ); - ctxt.status = externalResponse.status; - ctxt.res.write(`data: ${JSON.stringify({ error: errorData })}\n\n`); - ctxt.res.write('data: [DONE]\n\n'); - return; - } - - const reader = externalResponse.body?.getReader(); - if (!reader) throw new Error('No readable stream available'); - - let generationId: string | undefined; - let costInUsd: number | undefined; - let lastPing = Date.now(); - - await proxySSE( - reader, - async (data) => { - // Handle end of stream - if (data === '[DONE]') { - // Only deduct credits when we observed billable metadata during - // the stream (an inline cost or a generation ID for the fallback). - if ( - generationId != null || - (typeof costInUsd === 'number' && - Number.isFinite(costInUsd) && - costInUsd > 0) - ) { - const previousPromise = - pendingCostPromises.get(matrixUserId) ?? Promise.resolve(); - const costPromise = previousPromise - .then(() => - endpointConfig.creditStrategy.saveUsageCost( - dbAdapter, - matrixUserId, - { id: generationId, usage: { cost: costInUsd } }, - ), - ) - .finally(() => { - if (pendingCostPromises.get(matrixUserId) === costPromise) { - pendingCostPromises.delete(matrixUserId); - } - }); - pendingCostPromises.set(matrixUserId, costPromise); - } else { - log.warn( - `Streaming response for user ${matrixUserId} contained no generation ID or usage cost, skipping credit deduction`, - ); - } - - ctxt.res.write(`data: [DONE]\n\n`); - return 'stop'; - } - - // Try parsing JSON data - try { - const dataObj = JSON.parse(data); - - if (!generationId && dataObj.id) { - generationId = dataObj.id; - } - - if (dataObj.usage?.cost != null) { - costInUsd = dataObj.usage.cost; - } - } catch { - log.warn('Invalid JSON in streaming response:', data); - } - - ctxt.res.write(`data: ${data}\n\n`); - return; - }, - () => { - // Keep-alive ping - const now = Date.now(); - if (now - lastPing > KEEP_ALIVE_INTERVAL_MS) { - ctxt.res.write(': ping\n\n'); - lastPing = now; - } - }, - ); - } catch (error) { - log.error('Error in streaming request:', error); - Sentry.captureException(error); - ctxt.res.write( - `data: ${JSON.stringify({ error: 'Streaming error occurred' })}\n\n`, - ); - ctxt.res.write('data: [DONE]\n\n'); - } -} - -/** --------------------------- - * Helper functions - * --------------------------- */ -const KEEP_ALIVE_INTERVAL_MS = 15000; - -function setupSSEHeaders(ctx: Koa.Context) { - ctx.set('Content-Type', 'text/event-stream'); - ctx.set('Cache-Control', 'no-cache, no-store, must-revalidate'); - ctx.set('Connection', 'keep-alive'); - ctx.set('Access-Control-Allow-Origin', '*'); - ctx.set('Access-Control-Allow-Headers', 'Cache-Control'); - ctx.set('X-Accel-Buffering', 'no'); // Disable nginx buffering - ctx.set('Transfer-Encoding', 'chunked'); - ctx.body = null; - ctx.status = 200; - ctx.res.flushHeaders(); -} - -async function proxySSE( - reader: ReadableStreamDefaultReader, - onData: (data: string) => Promise, - onTick?: () => void, -) { - let buffer = ''; - try { - // eslint-disable-next-line no-constant-condition - while (true) { - const { done, value } = await reader.read(); - if (done) break; - - buffer += new TextDecoder().decode(value); - if (onTick) onTick(); - - for (const line of extractSSELines(buffer)) { - if (!line || line.startsWith(':')) continue; - if (line.startsWith('data: ')) { - const data = line.slice(6); - const result = await onData(data); - if (result === 'stop') return; - } - } - } - } finally { - reader.releaseLock(); - } -} - -function extractSSELines(buffer: string): string[] { - const lines: string[] = []; - let lineEnd: number; - while ((lineEnd = buffer.indexOf('\n')) !== -1) { - lines.push(buffer.slice(0, lineEnd).trim()); - buffer = buffer.slice(lineEnd + 1); - } - return lines; -} - interface MultipartFileField { filename: string; content: string; @@ -359,18 +184,15 @@ export default function handleRequestForward({ } // 4. Wait for any pending cost from a previous request to be recorded - const pendingCost = pendingCostPromises.get(matrixUserId); - if (pendingCost) { - try { - await pendingCost; - } catch (e) { - log.error('Error waiting for pending cost:', e); - await sendResponseForSystemError( - ctxt, - 'There was an error saving your Boxel credits usage. Try again or contact support if the problem persists.', - ); - return; - } + try { + await awaitPendingCost(matrixUserId); + } catch (e) { + log.error('Error waiting for pending cost:', e); + await sendResponseForSystemError( + ctxt, + 'There was an error saving your Boxel credits usage. Try again or contact support if the problem persists.', + ); + return; } // 5. Check user has sufficient credits using credit strategy @@ -515,22 +337,12 @@ export default function handleRequestForward({ const responseData = await externalResponse.json(); // 6. Deduct credits in the background using the cost from the response. - const previousPromise = - pendingCostPromises.get(matrixUserId) ?? Promise.resolve(); - const costPromise = previousPromise - .then(() => - destinationConfig.creditStrategy.saveUsageCost( - dbAdapter, - matrixUserId, - responseData, - ), - ) - .finally(() => { - if (pendingCostPromises.get(matrixUserId) === costPromise) { - pendingCostPromises.delete(matrixUserId); - } - }); - pendingCostPromises.set(matrixUserId, costPromise); + trackCostDeduction( + destinationConfig, + dbAdapter, + matrixUserId, + responseData, + ); // 7. Return response const response = new Response(JSON.stringify(responseData), { diff --git a/packages/realm-server/lib/proxy-forward.ts b/packages/realm-server/lib/proxy-forward.ts new file mode 100644 index 00000000000..854a26b145e --- /dev/null +++ b/packages/realm-server/lib/proxy-forward.ts @@ -0,0 +1,212 @@ +import type Koa from 'koa'; +import type { DBAdapter } from '@cardstack/runtime-common'; +import { logger } from '@cardstack/runtime-common'; +import * as Sentry from '@sentry/node'; + +import type { AllowedProxyDestination } from './allowed-proxy-destinations'; + +const log = logger('proxy-forward'); + +/** + * Per-user barrier ensuring the previous request's billable cost has been + * recorded before a new request starts. Shared across every handler that + * forwards through a credit-bearing destination so the same user can't race + * concurrent requests through different endpoints (e.g. `_request-forward` + * and `/_openrouter/chat/completions`). + */ +const pendingCostPromises = new Map>(); + +const KEEP_ALIVE_INTERVAL_MS = 15000; + +export async function awaitPendingCost(matrixUserId: string): Promise { + let pending = pendingCostPromises.get(matrixUserId); + if (pending) { + await pending; + } +} + +/** Schedule cost deduction in the background, chained after any prior pending. */ +export function trackCostDeduction( + destinationConfig: AllowedProxyDestination, + dbAdapter: DBAdapter, + matrixUserId: string, + responseData: unknown, +): void { + const previous = pendingCostPromises.get(matrixUserId) ?? Promise.resolve(); + const cost = previous + .then(() => + destinationConfig.creditStrategy.saveUsageCost( + dbAdapter, + matrixUserId, + responseData, + ), + ) + .finally(() => { + if (pendingCostPromises.get(matrixUserId) === cost) { + pendingCostPromises.delete(matrixUserId); + } + }); + pendingCostPromises.set(matrixUserId, cost); +} + +/** + * Stream the upstream `text/event-stream` response back to the client, parsing + * each `data:` line so we can capture the OpenRouter generation id / inline + * cost and schedule a credit deduction at `[DONE]`. + */ +export async function handleStreamingRequest( + ctxt: Koa.Context, + url: string, + method: string, + headers: Record, + requestBody: BodyInit | undefined, + endpointConfig: AllowedProxyDestination, + dbAdapter: DBAdapter, + matrixUserId: string, +): Promise { + try { + setupSSEHeaders(ctxt); + + const fetchInit: RequestInit = { method, headers }; + if (requestBody !== undefined) { + fetchInit.body = requestBody; + } + + const externalResponse = await fetch(url, fetchInit); + + if (!externalResponse.ok) { + const errorData = await externalResponse.text(); + log.error( + `Streaming request failed: ${externalResponse.status} - ${errorData}`, + ); + ctxt.status = externalResponse.status; + ctxt.res.write(`data: ${JSON.stringify({ error: errorData })}\n\n`); + ctxt.res.write('data: [DONE]\n\n'); + return; + } + + // First write commits headers + status to the wire, so do this + // only after the upstream-OK check above has had a chance to + // override the status. + ctxt.res.write(': connected\n\n'); + + const reader = externalResponse.body?.getReader(); + if (!reader) throw new Error('No readable stream available'); + + let generationId: string | undefined; + let costInUsd: number | undefined; + let lastPing = Date.now(); + + await proxySSE( + reader, + async (data) => { + if (data === '[DONE]') { + if ( + generationId != null || + (typeof costInUsd === 'number' && + Number.isFinite(costInUsd) && + costInUsd > 0) + ) { + trackCostDeduction(endpointConfig, dbAdapter, matrixUserId, { + id: generationId, + usage: { cost: costInUsd }, + }); + } else { + log.warn( + `Streaming response for user ${matrixUserId} contained no generation ID or usage cost, skipping credit deduction`, + ); + } + + ctxt.res.write(`data: [DONE]\n\n`); + return 'stop'; + } + + try { + const dataObj = JSON.parse(data); + if (!generationId && dataObj.id) { + generationId = dataObj.id; + } + if (dataObj.usage?.cost != null) { + costInUsd = dataObj.usage.cost; + } + } catch { + log.warn('Invalid JSON in streaming response:', data); + } + + ctxt.res.write(`data: ${data}\n\n`); + return; + }, + () => { + const now = Date.now(); + if (now - lastPing > KEEP_ALIVE_INTERVAL_MS) { + ctxt.res.write(': ping\n\n'); + lastPing = now; + } + }, + ); + } catch (error) { + log.error('Error in streaming request:', error); + Sentry.captureException(error); + ctxt.res.write( + `data: ${JSON.stringify({ error: 'Streaming error occurred' })}\n\n`, + ); + ctxt.res.write('data: [DONE]\n\n'); + } +} + +function setupSSEHeaders(ctx: Koa.Context) { + // Headers and status are set here but NOT flushed — `flushHeaders` + // commits the wire status, which would mask any later + // `ctx.status = upstream.status` on upstream failure. Caller flushes + // (implicitly, via the first `ctx.res.write`) only after confirming + // the upstream response was OK. + ctx.set('Content-Type', 'text/event-stream'); + ctx.set('Cache-Control', 'no-cache, no-store, must-revalidate'); + ctx.set('Connection', 'keep-alive'); + ctx.set('Access-Control-Allow-Origin', '*'); + ctx.set('Access-Control-Allow-Headers', 'Cache-Control'); + ctx.set('X-Accel-Buffering', 'no'); // Disable nginx buffering + ctx.set('Transfer-Encoding', 'chunked'); + ctx.body = null; + ctx.status = 200; +} + +async function proxySSE( + reader: ReadableStreamDefaultReader, + onData: (data: string) => Promise, + onTick?: () => void, +) { + let buffer = ''; + try { + // eslint-disable-next-line no-constant-condition + while (true) { + const { done, value } = await reader.read(); + if (done) break; + + buffer += new TextDecoder().decode(value); + if (onTick) onTick(); + + // Split on `\n`, keep the trailing incomplete fragment in + // `buffer`, dispatch every complete line. The previous + // implementation called a helper that locally reassigned + // its `buffer` parameter — the caller's buffer never got + // trimmed, so every new read re-emitted every prior line. + // For SSE that means the receiver got each delta multiple + // times and concatenated them ("foofoo barfoo bar baz..."). + let parts = buffer.split('\n'); + buffer = parts.pop() ?? ''; + + for (let raw of parts) { + let line = raw.trim(); + if (!line || line.startsWith(':')) continue; + if (line.startsWith('data: ')) { + const data = line.slice(6); + const result = await onData(data); + if (result === 'stop') return; + } + } + } + } finally { + reader.releaseLock(); + } +} diff --git a/packages/realm-server/routes.ts b/packages/realm-server/routes.ts index bbd18953560..c80936071b0 100644 --- a/packages/realm-server/routes.ts +++ b/packages/realm-server/routes.ts @@ -29,6 +29,7 @@ import handleAddCredit from './handlers/handle-add-credit'; import handleUpsertRealmUserPermission from './handlers/handle-upsert-realm-user-permission'; import handleCreateStripeSessionRequest from './handlers/handle-create-stripe-session'; import handleRequestForward from './handlers/handle-request-forward'; +import handleOpenRouterPassthrough from './handlers/handle-openrouter-passthrough'; import handlePostDeployment from './handlers/handle-post-deployment'; import { handleCheckBoxelDomainAvailabilityRequest } from './handlers/handle-check-boxel-domain-availability'; import handleRealmAuth from './handlers/handle-realm-auth'; @@ -167,6 +168,13 @@ export function createRoutes(args: CreateRoutesArgs) { dbAdapter: args.dbAdapter, }), ); + router.post( + '/_openrouter/chat/completions', + jwtMiddleware(args.realmSecretSeed), + handleOpenRouterPassthrough({ + dbAdapter: args.dbAdapter, + }), + ); router.all( '/_federated-search', multiRealmAuthorization(args), diff --git a/packages/realm-server/tests/index.ts b/packages/realm-server/tests/index.ts index 44381af0fdf..dc6d1737a41 100644 --- a/packages/realm-server/tests/index.ts +++ b/packages/realm-server/tests/index.ts @@ -240,6 +240,7 @@ const ALL_TEST_FILES: string[] = [ './types-endpoint-test', './virtual-network-test', './request-forward-test', + './openrouter-passthrough-test', './publish-unpublish-realm-test', './boxel-domain-availability-test', './get-boxel-claimed-domain-test', diff --git a/packages/realm-server/tests/openrouter-passthrough-test.ts b/packages/realm-server/tests/openrouter-passthrough-test.ts new file mode 100644 index 00000000000..eacabfd8953 --- /dev/null +++ b/packages/realm-server/tests/openrouter-passthrough-test.ts @@ -0,0 +1,366 @@ +import { module, test } from 'qunit'; +import sinon from 'sinon'; +import type { Test, SuperTest } from 'supertest'; +import supertest from 'supertest'; +import { basename, join } from 'path'; +import type { Server } from 'http'; +import { dirSync, type DirResult } from 'tmp'; +import { copySync, ensureDirSync } from 'fs-extra'; +import { + setupDB, + runTestRealmServer, + closeServer, + insertUser, + insertPlan, + realmSecretSeed, + createVirtualNetwork, + waitUntil, +} from './helpers'; +import { createJWT as createRealmServerJWT } from '../utils/jwt'; +import { + addToCreditsLedger, + getUserByMatrixUserId, + sumUpCreditsLedger, +} from '@cardstack/billing/billing-queries'; +import { AllowedProxyDestinations } from '../lib/allowed-proxy-destinations'; + +module(basename(__filename), function () { + module( + 'Realm-specific Endpoints | _openrouter/chat/completions', + function (hooks) { + let testRealmHttpServer: Server; + let testRealm: any; + let dbAdapter: any; + let publisher: any; + let runner: any; + let request: SuperTest; + let testRealmDir: string; + let dir: DirResult; + + let virtualNetwork = createVirtualNetwork(); + + hooks.beforeEach(async function () { + dir = dirSync(); + copySync(join(__dirname, 'cards'), dir.name); + }); + + async function startRealmServer( + dbAdapter: any, + publisher: any, + runner: any, + ) { + if (testRealm) { + virtualNetwork.unmount(testRealm.handle); + } + + ({ testRealm, testRealmHttpServer } = await runTestRealmServer({ + virtualNetwork, + testRealmDir, + realmsRootPath: join(dir.name, 'realm_server_2'), + realmURL: new URL('http://127.0.0.1:4445/test/'), + dbAdapter, + publisher, + runner, + matrixURL: new URL('http://localhost:8008'), + })); + request = supertest(testRealmHttpServer); + } + + setupDB(hooks, { + beforeEach: async (_dbAdapter, _publisher, _runner) => { + dbAdapter = _dbAdapter; + publisher = _publisher; + runner = _runner; + testRealmDir = join(dir.name, 'realm_server_2', 'test'); + ensureDirSync(testRealmDir); + copySync(join(__dirname, 'cards'), testRealmDir); + + // Whitelist OpenRouter chat completions so the passthrough handler + // can resolve a destination config + credit strategy. + await dbAdapter.execute( + `INSERT INTO proxy_endpoints (id, url, api_key, credit_strategy, supports_streaming, auth_method, auth_parameter_name, created_at, updated_at) + VALUES + (gen_random_uuid(), 'https://openrouter.ai/api/v1/chat/completions', 'openrouter-api-key', 'openrouter', true, NULL, NULL, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) + ON CONFLICT (url) + DO UPDATE SET + api_key = EXCLUDED.api_key, + credit_strategy = EXCLUDED.credit_strategy, + supports_streaming = EXCLUDED.supports_streaming, + updated_at = CURRENT_TIMESTAMP`, + ); + + await startRealmServer(dbAdapter, publisher, runner); + + await insertUser( + dbAdapter, + '@testuser:localhost', + 'cus_test123', + 'test@example.com', + ); + + await insertPlan(dbAdapter, 'Test Plan', 1000, 100, 'price_test123'); + + const user = await getUserByMatrixUserId( + dbAdapter, + '@testuser:localhost', + ); + if (user) { + await addToCreditsLedger(dbAdapter, { + userId: user.id, + creditAmount: 50, + creditType: 'extra_credit', + subscriptionCycleId: null, + }); + } + }, + afterEach: async () => { + AllowedProxyDestinations.reset(); + await closeServer(testRealmHttpServer); + }, + }); + + test('forwards a verbatim OpenAI body to OpenRouter and deducts credits', async function (assert) { + const originalFetch = global.fetch; + const mockFetch = sinon.stub(global, 'fetch'); + + const mockOpenRouterResponse = { + id: 'gen-test-passthrough-1', + choices: [{ message: { role: 'assistant', content: 'hi' } }], + usage: { total_tokens: 42, cost: 0.005 }, + }; + + mockFetch.callsFake( + async (input: string | URL | Request, _init?: RequestInit) => { + const url = typeof input === 'string' ? input : input.toString(); + if (url === 'https://openrouter.ai/api/v1/chat/completions') { + return new Response(JSON.stringify(mockOpenRouterResponse), { + status: 200, + headers: { 'content-type': 'application/json' }, + }); + } + return new Response(JSON.stringify({ error: 'Not found' }), { + status: 404, + headers: { 'content-type': 'application/json' }, + }); + }, + ); + + try { + const jwt = createRealmServerJWT( + { user: '@testuser:localhost', sessionRoom: 'test-session-room' }, + realmSecretSeed, + ); + + const openAIBody = { + model: 'anthropic/claude-opus-4-7', + messages: [{ role: 'user', content: 'Hello' }], + }; + + const response = await request + .post('/_openrouter/chat/completions') + .set('Accept', 'application/json') + .set('Content-Type', 'application/json') + .set('Authorization', `Bearer ${jwt}`) + .send(openAIBody); + + assert.strictEqual(response.status, 200); + assert.deepEqual(response.body, mockOpenRouterResponse); + + const calls = mockFetch.getCalls(); + const upstream = calls.find((call) => { + const url = call.args[0]; + const href = typeof url === 'string' ? url : url?.toString(); + return href === 'https://openrouter.ai/api/v1/chat/completions'; + }); + assert.ok(upstream, 'fetched upstream chat completions URL'); + + const upstreamInit = upstream!.args[1] as RequestInit; + const upstreamHeaders = upstreamInit.headers as Record< + string, + string + >; + assert.strictEqual( + upstreamHeaders.Authorization, + 'Bearer openrouter-api-key', + 'server-side OpenRouter key is stamped onto upstream Authorization', + ); + assert.deepEqual( + JSON.parse(upstreamInit.body as string), + openAIBody, + 'OpenAI body forwarded verbatim', + ); + + const user = await getUserByMatrixUserId( + dbAdapter, + '@testuser:localhost', + ); + await waitUntil( + async () => { + const credits = await sumUpCreditsLedger(dbAdapter, { + creditType: ['extra_credit', 'extra_credit_used'], + userId: user!.id, + }); + return credits === 45; // 50 - (0.005 * 1000) = 45 + }, + { timeoutMessage: 'Credits should be deducted (50 - 5 = 45)' }, + ); + } finally { + mockFetch.restore(); + global.fetch = originalFetch; + } + }); + + test('streams the upstream SSE response when stream: true is in the body', async function (assert) { + const originalFetch = global.fetch; + const mockFetch = sinon.stub(global, 'fetch'); + + const mockStreamResponse = new Response( + new ReadableStream({ + start(controller) { + controller.enqueue( + new TextEncoder().encode( + 'data: {"id":"gen-stream-pt","choices":[{"delta":{"content":"Hi"}}]}\n\n', + ), + ); + controller.enqueue( + new TextEncoder().encode( + 'data: {"choices":[{"delta":{"content":" there"}}],"usage":{"prompt_tokens":3,"completion_tokens":2,"cost":0.001}}\n\n', + ), + ); + controller.enqueue(new TextEncoder().encode('data: [DONE]\n\n')); + controller.close(); + }, + }), + { + status: 200, + headers: { 'content-type': 'text/event-stream' }, + }, + ); + + mockFetch.callsFake( + async (input: string | URL | Request, _init?: RequestInit) => { + const url = typeof input === 'string' ? input : input.toString(); + if (url === 'https://openrouter.ai/api/v1/chat/completions') { + return mockStreamResponse; + } + return new Response(JSON.stringify({ error: 'Not found' }), { + status: 404, + headers: { 'content-type': 'application/json' }, + }); + }, + ); + + try { + const jwt = createRealmServerJWT( + { user: '@testuser:localhost', sessionRoom: 'test-session-room' }, + realmSecretSeed, + ); + + const response = await request + .post('/_openrouter/chat/completions') + .set('Accept', 'text/event-stream') + .set('Content-Type', 'application/json') + .set('Authorization', `Bearer ${jwt}`) + .send({ + model: 'anthropic/claude-opus-4-7', + messages: [{ role: 'user', content: 'Hello' }], + stream: true, + }); + + assert.strictEqual(response.status, 200); + assert.strictEqual( + response.headers['cache-control'], + 'no-cache, no-store, must-revalidate', + ); + assert.true( + response.text.includes('data: {"id":"gen-stream-pt"'), + 'first stream chunk relayed', + ); + assert.true( + response.text.includes('data: [DONE]'), + 'stream terminator relayed', + ); + + const user = await getUserByMatrixUserId( + dbAdapter, + '@testuser:localhost', + ); + await waitUntil( + async () => { + const credits = await sumUpCreditsLedger(dbAdapter, { + creditType: ['extra_credit', 'extra_credit_used'], + userId: user!.id, + }); + return credits === 49; // 50 - (0.001 * 1000) = 49 + }, + { timeoutMessage: 'Credits should be deducted (50 - 1 = 49)' }, + ); + } finally { + mockFetch.restore(); + global.fetch = originalFetch; + } + }); + + test('rejects requests without a JWT', async function (assert) { + const response = await request + .post('/_openrouter/chat/completions') + .set('Accept', 'application/json') + .set('Content-Type', 'application/json') + .send({ + model: 'anthropic/claude-opus-4-7', + messages: [{ role: 'user', content: 'Hello' }], + }); + assert.strictEqual(response.status, 401); + }); + + test('rejects when the user has no credits', async function (assert) { + // Drain credits below the threshold. + const user = await getUserByMatrixUserId( + dbAdapter, + '@testuser:localhost', + ); + if (user) { + await addToCreditsLedger(dbAdapter, { + userId: user.id, + creditAmount: -50, + creditType: 'extra_credit_used', + subscriptionCycleId: null, + }); + } + + const jwt = createRealmServerJWT( + { user: '@testuser:localhost', sessionRoom: 'test-session-room' }, + realmSecretSeed, + ); + + const response = await request + .post('/_openrouter/chat/completions') + .set('Accept', 'application/json') + .set('Content-Type', 'application/json') + .set('Authorization', `Bearer ${jwt}`) + .send({ + model: 'anthropic/claude-opus-4-7', + messages: [{ role: 'user', content: 'Hello' }], + }); + + assert.strictEqual(response.status, 403); + }); + + test('rejects a non-JSON body with 400', async function (assert) { + const jwt = createRealmServerJWT( + { user: '@testuser:localhost', sessionRoom: 'test-session-room' }, + realmSecretSeed, + ); + + const response = await request + .post('/_openrouter/chat/completions') + .set('Accept', 'application/json') + .set('Content-Type', 'application/json') + .set('Authorization', `Bearer ${jwt}`) + .send('not json'); + + assert.strictEqual(response.status, 400); + }); + }, + ); +}); diff --git a/packages/software-factory/.agents/skills/boxel-development/references/dev-realm-search.md b/packages/software-factory/.agents/skills/boxel-development/references/dev-realm-search.md deleted file mode 100644 index e4b1ec7563a..00000000000 --- a/packages/software-factory/.agents/skills/boxel-development/references/dev-realm-search.md +++ /dev/null @@ -1,288 +0,0 @@ -# Realm Search Query Reference - -How to use the `search_realm` tool to query cards in a realm. The query object follows the Boxel realm search API format. - -## Basic Structure - -```json -{ - "filter": { ... }, - "sort": [ ... ], - "page": { "size": 10 } -} -``` - -All top-level fields are optional. An empty query `{}` returns all cards. - -## Filter by Card Type - -Use `type` with a `{ module, name }` CodeRef to filter by card type. The `module` must be the full absolute URL of the module that defines the card. - -```json -{ - "filter": { - "type": { - "module": "http://localhost:4201/software-factory/darkfactory", - "name": "Project" - } - } -} -``` - -This returns all cards that adopt from (or extend) the specified type. Do NOT use wildcards (`*`) in module or name — they are not supported. - -## Filter by Field Value (eq) - -Use `eq` to match exact field values. You must specify `on` to scope the field to a card type: - -```json -{ - "filter": { - "on": { - "module": "http://localhost:4201/software-factory/darkfactory", - "name": "Issue" - }, - "eq": { "status": "in_progress" } - } -} -``` - -Multiple fields in `eq` are ANDed: - -```json -{ - "filter": { - "on": { "module": "...", "name": "Post" }, - "eq": { "cardTitle": "Card 1", "cardDescription": "Sample post" } - } -} -``` - -### Nested Fields - -Use dot paths for nested fields (e.g., fields inside a `contains` relationship): - -```json -{ - "filter": { - "on": { "module": "...", "name": "Post" }, - "eq": { "author.firstName": "Carl" } - } -} -``` - -### Null / Missing Values - -Use `null` to find cards where a field is empty or missing: - -```json -{ - "filter": { - "on": { "module": "...", "name": "TypeExamples" }, - "eq": { "stringField": null } - } -} -``` - -## Substring Search (contains) - -Use `contains` for case-insensitive substring matching: - -```json -{ - "filter": { - "contains": { "cardTitle": "sticky" } - } -} -``` - -Scoped to a type: - -```json -{ - "filter": { - "on": { "module": "...", "name": "Person" }, - "contains": { "cardTitle": "note" } - } -} -``` - -## Range Filters - -Use `range` with `gt`, `gte`, `lt`, `lte` for numeric, date, or string comparisons: - -```json -{ - "filter": { - "on": { "module": "...", "name": "Post" }, - "range": { - "views": { "lte": 10, "gt": 5 }, - "author.posts": { "gte": 1 } - } - } -} -``` - -## Combining Filters - -### AND (every) - -All conditions must match: - -```json -{ - "filter": { - "on": { "module": "...", "name": "Post" }, - "every": [ - { "eq": { "cardTitle": "Card 1" } }, - { "not": { "eq": { "author.firstName": "Cardy" } } } - ] - } -} -``` - -### OR (any) - -At least one condition must match. Can combine different types: - -```json -{ - "filter": { - "any": [ - { - "on": { "module": "...", "name": "Article" }, - "eq": { "author.firstName": "Cardy" } - }, - { - "on": { "module": "...", "name": "Book" }, - "eq": { "author.firstName": "Cardy" } - } - ] - } -} -``` - -### NOT (negation) - -```json -{ - "filter": { - "on": { "module": "...", "name": "Article" }, - "not": { "eq": { "author.firstName": "Carl" } } - } -} -``` - -## Sorting - -Sort results using the `sort` array. Each entry needs `by` (field path) and `on` (card type): - -```json -{ - "sort": [ - { - "by": "author.lastName", - "on": { "module": "...", "name": "Article" } - } - ], - "filter": { - "type": { "module": "...", "name": "Article" } - } -} -``` - -Descending order: - -```json -{ - "sort": [ - { - "by": "author.firstName", - "on": { "module": "...", "name": "Article" }, - "direction": "desc" - } - ] -} -``` - -## Pagination - -```json -{ - "filter": { "type": { "module": "...", "name": "Project" } }, - "page": { "size": 10 } -} -``` - -## Discovering Available Fields - -You can only filter/sort on fields that exist on the card type. To find which fields a card type has: - -1. Use `run_command` to fetch the JSON schema for a card type: - -```json -{ - "command": "@cardstack/boxel-host/commands/get-card-type-schema/default", - "commandInput": { - "codeRef": { - "module": "http://localhost:4201/software-factory/darkfactory", - "name": "Issue" - } - } -} -``` - -2. The result contains `attributes.properties` listing all searchable fields (e.g., `status`, `summary`, `priority`). - -3. Use those field names in your `eq`, `contains`, `range`, or `sort` with the matching `on` type. - -The card tools (`update_project`, `update_issue`, `create_knowledge`, `create_catalog_spec`) also have dynamic JSON schemas in their parameters that list available fields. - -### Inheritance - -Filtering on a base card type's fields matches all cards that inherit from it. For example, filtering on `CardDef` fields like `cardTitle` or `cardDescription` finds cards of any type. Filtering on an `Issue` field like `status` finds only Issue cards (and any subtypes of Issue). - -### Searching Through Relationship Fields - -You can filter on fields inside `linksTo` and `linksToMany` relationships, as long as those relationship fields are rendered in an embedded or fitted template. Rendering makes them indexable by the query engine. - -For example, if a `Friend` card has `@field friend = linksTo(Dog)` and Dog's `firstName` field is rendered in an embedded template: - -```json -{ - "filter": { - "on": { "module": "...", "name": "Friend" }, - "eq": { "friend.firstName": "Mango" } - } -} -``` - -### Searching by CodeRef Fields - -Some cards have CodeRef fields (e.g., `ref` on the Spec card). You can search by matching the full CodeRef: - -```json -{ - "filter": { - "on": { - "module": "https://cardstack.com/base/spec", - "name": "Spec" - }, - "eq": { - "ref": { - "module": "http://localhost:4201/my-realm/sticky-note", - "name": "StickyNote" - } - } - } -} -``` - -If a relationship field is NOT rendered in any embedded/fitted template, the query engine cannot index it and searches against it will fail. - -## Common Mistakes - -- **Do NOT use wildcards** (`*`) in `module` or `name` — the query engine does not support them. Use `type` with a specific CodeRef. -- **Do NOT use field names without `on`** — fields like `title`, `status`, etc. are specific to a card type. Without `on`, the query engine doesn't know which type's fields to search. The exception is `cardTitle` and `cardDescription` which exist on the base `CardDef`. -- **Use full absolute module URLs** — not relative paths, not bare package names. -- **Nested field paths use dots** — `author.firstName`, not `author/firstName` or `author[firstName]`. diff --git a/packages/software-factory/.agents/skills/boxel-file-structure/SKILL.md b/packages/software-factory/.agents/skills/boxel-file-structure/SKILL.md deleted file mode 100644 index 6420101d95d..00000000000 --- a/packages/software-factory/.agents/skills/boxel-file-structure/SKILL.md +++ /dev/null @@ -1,314 +0,0 @@ ---- -name: boxel-file-structure -description: Use when organizing files in a Boxel workspace, choosing filenames or directories for card definitions and instances, or validating JSON `adoptsFrom.module` paths and relationship links. ---- - -# Boxel File Structure Rules - -Rules for organizing files in a Boxel workspace when working locally with boxel-cli. - -## URL Structure - -``` -https://[realm-domain]/[username]/[workspace]/[path].[extension] -Example: https://app.boxel.ai/sarah/pet-rescue/animals/dog.gts -``` - -## File Naming Conventions - -| Type | Convention | Example | -| -------------------- | ----------------- | ----------------------------------- | -| Card definitions | `kebab-case.gts` | `blog-post.gts`, `grammy-award.gts` | -| Instance directories | `PascalCase/` | `BlogPost/`, `GrammyAward/` | -| Instance files | `kebab-case.json` | `my-first-post.json` | - -## Directory Structure - -``` -workspace/ -├── .realm.json # Workspace config -├── index.json # Workspace index -├── cards-grid.json # Default cards grid -├── blog-post.gts # Card definition (kebab-case) -├── BlogPost/ # Instance directory (PascalCase) -│ ├── my-first-post.json -│ └── another-post.json -├── author.gts -└── Author/ - └── jane-doe.json -``` - -## Module Paths in JSON (CRITICAL) - -**The `adoptsFrom.module` path is relative to the JSON file location.** - -### ✅ Correct: Instance in subdirectory - -``` -grammy-award.gts # Definition at root -GrammyAward/ # Instances in PascalCase directory -└── record-of-the-year.json -``` - -**In `GrammyAward/record-of-the-year.json`:** - -```json -{ - "meta": { - "adoptsFrom": { - "module": "../grammy-award", // ← Go UP to parent, then to file - "name": "GrammyAward" - } - } -} -``` - -### ❌ Wrong: Forgetting the relative path - -```json -{ - "meta": { - "adoptsFrom": { - "module": "./grammy-award", // ← WRONG! This looks in GrammyAward/ - "name": "GrammyAward" - } - } -} -``` - -## Path Rules Summary - -| JSON Location | Definition Location | Module Path | -| ----------------------------- | --------------------- | ----------------- | -| `root/Instance.json` | `root/card.gts` | `"./card"` | -| `root/Card/instance.json` | `root/card.gts` | `"../card"` | -| `root/Card/Sub/instance.json` | `root/card.gts` | `"../../card"` | -| `root/Card/instance.json` | `root/other/card.gts` | `"../other/card"` | - -## Instance JSON Structure (Full) - -```json -{ - "data": { - "type": "card", - "attributes": { - "fieldName": "value", - "numberField": 123, - "boolField": true - }, - "relationships": { - "author": { - "links": { - "self": "../Author/jane-doe" - } - } - }, - "meta": { - "adoptsFrom": { - "module": "../card-definition", - "name": "CardClassName" - } - } - } -} -``` - -## linksToMany Relationships (CRITICAL) - -**🔴 For `linksToMany` fields, use numbered keys like `fieldName.0`, `fieldName.1`, etc.** - -```json -{ - "data": { - "relationships": { - "tags.0": { - "links": { - "self": "../Tag/tech" - } - }, - "tags.1": { - "links": { - "self": "../Tag/news" - } - }, - "tags.2": { - "links": { - "self": "../Tag/tutorial" - } - } - } - } -} -``` - -### ❌ Wrong: Array syntax (does NOT work) - -```json -{ - "relationships": { - "tags": { - "links": { - "self": ["../Tag/tech", "../Tag/news"] - } - } - } -} -``` - -```` - -### JSON Structure Rules - -| Section | Purpose | Required | -|---------|---------|----------| -| `data.type` | Always `"card"` | Yes | -| `data.attributes` | Scalar field values (string, number, bool) | Yes | -| `data.relationships` | Links to other cards (`linksTo`/`linksToMany`) | Only if has links | -| `data.meta.adoptsFrom` | References the card definition | Yes | - -### Attributes vs Relationships - -**Use `attributes` for:** -- StringField, NumberField, BooleanField values -- FieldDef instances (embedded via `contains`) -- Any non-card data - -**Use `relationships` for:** -- CardDef references (`linksTo` → single link) -- CardDef arrays (`linksToMany` → array of links) - -## The Cardinal Rule (linksTo vs contains) - -**🔴 CRITICAL - memorize this:** - -| Field Type | Definition uses | Instance uses | -|------------|-----------------|---------------| -| Extends `CardDef` | `linksTo` / `linksToMany` | `relationships` | -| Extends `FieldDef` | `contains` / `containsMany` | `attributes` | - -```gts -// In .gts definition: -@field author = linksTo(Author); // Author extends CardDef → relationships -@field address = contains(AddressField); // AddressField extends FieldDef → attributes -```` - -```json -// In .json instance: -{ - "attributes": { - "address": { "street": "123 Main", "city": "NYC" } - }, - "relationships": { - "author": { "links": { "self": "../Author/jane" } } - } -} -``` - -## Links Between Cards - -When linking to other cards, use the card's URL without `.json`: - -```json -{ - "data": { - "relationships": { - "author": { - "links": { - "self": "../Author/jane-doe" - } - } - } - } -} -``` - -## Base Realms (Read-Only) - -These realms contain shared definitions you can import from: - -**Production:** - -- `https://cardstack.com/base/` - Core types (CardDef, FieldDef, etc.) -- `https://app.boxel.ai/catalog/` - Catalog cards -- `https://app.boxel.ai/skills/` - Skill cards - -**Staging:** - -- `https://cardstack.com/base/` - Same core types -- `https://realms-staging.stack.cards/catalog/` -- `https://realms-staging.stack.cards/skills/` - -## Common Import Patterns - -```gts -// Core imports (always from cardstack.com/base) -import { - CardDef, - FieldDef, - field, - contains, - linksTo, - containsMany, - linksToMany, - StringField, - NumberField, - BooleanField, - Component, -} from 'https://cardstack.com/base/card-api'; - -// Import from same workspace -import { Author } from './author'; - -// Import from base realm -import { Skill } from 'https://cardstack.com/base/skill'; -``` - -## Query Structure (for API searches) - -When using the `/_search` API endpoint: - -```json -{ - "filter": { - "type": { - "module": "https://realm-url/card-name", - "name": "CardClassName" - } - } -} -``` - -**With field filters:** - -```json -{ - "filter": { - "on": { "module": "https://realm-url/product", "name": "Product" }, - "contains": { "name": "laptop" } - } -} -``` - -**Operations:** `eq`, `contains`, `range`, `not`, `type`, `every` (AND), `any` (OR) - -## Common Mistakes - -| Mistake | Fix | -| -------------------------------------- | --------------------------------------------- | -| `"module": "./card"` from subdirectory | Use `"../card"` | -| `contains(CardDef)` | Use `linksTo(CardDef)` | -| `linksTo(FieldDef)` | Use `contains(FieldDef)` | -| Link in `attributes` | Move to `relationships` | -| FieldDef in `relationships` | Move to `attributes` | -| Missing `data` wrapper in JSON | Wrap everything in `{"data": {...}}` | -| PascalCase for `.gts` files | Use `kebab-case.gts` | -| kebab-case for instance dirs | Use `PascalCase/` | -| `linksToMany` as array | Use numbered keys: `field.0`, `field.1`, etc. | - -## Essential Formats - -Every CardDef should implement these templates: - -- `isolated` - Full detail view (scrollable) -- `embedded` - Compact summary for lists -- `fitted` - Fixed dimensions for grids/dashboards (CRITICAL for good UX) diff --git a/packages/software-factory/.agents/skills/boxel-repair/SKILL.md b/packages/software-factory/.agents/skills/boxel-repair/SKILL.md deleted file mode 100644 index a47dff30f2a..00000000000 --- a/packages/software-factory/.agents/skills/boxel-repair/SKILL.md +++ /dev/null @@ -1,46 +0,0 @@ ---- -name: boxel-repair -description: Use when a Boxel workspace has broken realm metadata, missing icons or backgrounds, bad `index.json` or `cards-grid.json` links, or stale Matrix realm metadata that needs `boxel repair-realm` or `boxel repair-realms`. ---- - -> **Factory agent note:** This skill is for human Claude Code sessions only. The factory agent's tool registry does not include boxel-cli tools — all realm I/O uses `write_file`, `read_file`, and `search_realm` tools via the realm HTTP API. - -# Boxel Repair - -Use this workflow when a workspace has any of these symptoms: - -- Missing icon/background in workspace tiles -- Display name is `Unknown Workspace` or mismatched -- Opening a workspace fails due to missing `cards-grid` relationship -- Matrix workspace list (`app.boxel.realms`) is stale/inconsistent - -## Commands - -```bash -# Inspect one realm without mutating -boxel repair-realm --dry-run - -# Repair one realm -boxel repair-realm - -# Repair all realms owned by active profile user -boxel repair-realms -``` - -## Behavior - -`repair-realm` and `repair-realms` perform these repairs: - -- `.realm.json`: normalize `name`, `iconURL`, `backgroundURL` -- `index.json`: ensure `relationships.cardsGrid.links.self` = `./cards-grid` -- `cards-grid.json`: restore default cards-grid card if missing/corrupt -- Before replacing `index.json`/`cards-grid.json`, preserve existing content as timestamped backup cards in the same realm -- `index.json`: write `data.meta._touched` timestamp to break cache -- Matrix `app.boxel.realms`: reconcile list to match repaired, accessible realms - -## Important Defaults - -- `personal` realm is excluded unless `--include-personal` is provided. -- Batch repair defaults to active profile owner. -- Use `--no-reconcile-matrix` when you want file/card repair only. -- Use `--no-fix-index`/`--no-touch-index` when debugging minimal metadata-only fixes. diff --git a/packages/software-factory/.agents/skills/boxel-restore/SKILL.md b/packages/software-factory/.agents/skills/boxel-restore/SKILL.md deleted file mode 100644 index 0e427fce6e7..00000000000 --- a/packages/software-factory/.agents/skills/boxel-restore/SKILL.md +++ /dev/null @@ -1,78 +0,0 @@ ---- -name: boxel-restore -description: Use when restoring a Boxel workspace to a previous checkpoint and syncing deletions back to the server safely, including stopping watch first and running `boxel sync . --prefer-local` after restore. ---- - -> **Factory agent note:** This skill is for human Claude Code sessions only. The factory agent's tool registry does not include boxel-cli tools — all realm I/O uses `write_file`, `read_file`, and `search_realm` tools via the realm HTTP API. - -# Boxel Restore - -Restore workspace to a previous checkpoint and sync deletions to server. - -## Workflow - -1. **Stop watch if running** - Prevents re-pulling deleted files -2. **Show history** - Display recent checkpoints with numbers -3. **Confirm target** - Ask user which checkpoint (or accept from command) -4. **Restore locally** - Run `boxel history . -r ` -5. **Sync to server** - Run `boxel sync . --prefer-local` to push deletions -6. **Restart watch** - Optionally restart watch if it was running - -## Usage - -``` -Use the `boxel-restore` skill interactively -Restore checkpoint `3` -Restore checkpoint `abc123` -``` - -## Commands Used - -```bash -# Stop any running watch first -# (check /tasks and stop if needed) - -# View history -boxel history . - -# Restore to checkpoint (auto-confirm) -echo "y" | boxel history . -r - -# ESSENTIAL: Push deletions to server -boxel sync . --prefer-local - -# Optionally restart watch -boxel watch . -i -d -``` - -## Response Format - -1. Show the checkpoint being restored to (hash, message, date, source) -2. List files that will be deleted (if any new files since checkpoint) -3. Execute restore -4. Execute sync with --prefer-local -5. Confirm completion - -## Critical Notes - -- **Always stop watch before restoring** - Otherwise it re-pulls deleted files -- **Always use --prefer-local after restore** - This syncs deletions to server -- After restore, workspace matches checkpoint exactly (files added later are gone) - -## Example Output - -``` -Restoring to checkpoint #3: abc1234 - Message: Pull: Update knicks-vip-ticket.gts - Source: SERVER (external change) - Date: 5 minutes ago - -Files that will be deleted: - - KnicksVipTicket/knicks-vs-magic.json - - KnicksVipTicket/knicks-vs-thunder.json - -Restoring... ✓ -Syncing deletions to server... ✓ - -Restore complete. Server now matches checkpoint #3. -``` diff --git a/packages/software-factory/.agents/skills/boxel-setup/SKILL.md b/packages/software-factory/.agents/skills/boxel-setup/SKILL.md deleted file mode 100644 index aeb72d9211d..00000000000 --- a/packages/software-factory/.agents/skills/boxel-setup/SKILL.md +++ /dev/null @@ -1,120 +0,0 @@ ---- -name: boxel-setup -description: Use for Boxel CLI onboarding, profile setup, verifying login, listing workspaces, switching profiles, or helping a new user perform their first sync. ---- - -> **Factory agent note:** This skill is for human Claude Code sessions only. The factory agent's tool registry does not include boxel-cli tools — all realm I/O uses `write_file`, `read_file`, and `search_realm` tools via the realm HTTP API. - -# Boxel Setup - -Guide new users through Boxel CLI setup. - -## Trigger - -Run this automatically when: - -- User first opens the repo -- No profile is configured (`npx boxel profile` shows nothing) -- User asks about setup or getting started - -## Flow - -### 1. Check Current State - -```bash -npx boxel profile -``` - -If no profile exists, proceed with setup. - -### 2. Add a Profile - -**Option A: Interactive (recommended)** - -```bash -npx boxel profile add -``` - -This wizard will: - -1. Ask for environment (Production or Staging) -2. Ask for username and password -3. Create the profile automatically - -**Option B: Non-interactive (CI/automation)** - -Ask the user for: - -- **Environment**: Production (app.boxel.ai) or Staging (realms-staging.stack.cards) -- **Username**: Their Boxel handle (e.g., `aallen90`, `ctse`). Found in Account panel as `@username:stack.cards` or in workspace URLs like `app.boxel.ai/username/workspace-name` -- **Password**: Same as Boxel web login - -Then run (using environment variable for security): - -**Production:** - -```bash -BOXEL_PASSWORD="password" npx boxel profile add -u @username:boxel.ai -n "Production" -``` - -**Staging:** - -```bash -BOXEL_PASSWORD="password" npx boxel profile add -u @username:stack.cards -n "Staging" -``` - -> **Security Note:** Avoid passing passwords via `-p` flag as they appear in shell history. - -### 3. Verify - -```bash -npx boxel list -``` - -### 4. First Sync - -Help them sync a workspace: - -```bash -npx boxel sync @username/workspace ./workspace-name -``` - -## Profile Management - -**List profiles:** - -```bash -npx boxel profile list -``` - -**Switch profile:** - -```bash -npx boxel profile switch -``` - -**Migrate from old .env:** - -```bash -npx boxel profile migrate -``` - -## Success Message - -``` -Setup complete! You can now: -- `npx boxel list` - See your workspaces -- `npx boxel sync @username/workspace` - Sync a workspace -- `npx boxel watch .` - Monitor for changes -- `npx boxel history .` - View/restore checkpoints - -Profile management: -- `npx boxel profile` - Show active profile -- `npx boxel profile list` - List all profiles -- `npx boxel profile switch ` - Switch profiles - -For AI-assisted development, try: -- `boxel-watch` - Smart watch with auto intervals -- `boxel-sync` - Context-aware sync -- `boxel-restore` - Undo changes -``` diff --git a/packages/software-factory/.agents/skills/boxel-sync/SKILL.md b/packages/software-factory/.agents/skills/boxel-sync/SKILL.md deleted file mode 100644 index c54845695e9..00000000000 --- a/packages/software-factory/.agents/skills/boxel-sync/SKILL.md +++ /dev/null @@ -1,90 +0,0 @@ ---- -name: boxel-sync -description: Use when deciding how to sync a Boxel workspace after local edits, server changes, or a restore, including choosing between interactive sync, `--prefer-local`, `--prefer-remote`, or `--prefer-newest`. ---- - -> **Factory agent note:** This skill is for human Claude Code sessions only. The factory agent's tool registry does not include boxel-cli tools — all realm I/O uses `write_file`, `read_file`, and `search_realm` tools via the realm HTTP API. - -# Boxel Sync - -Smart bidirectional sync with context-aware conflict resolution. - -## Context Detection - -Analyze the situation to choose the right sync strategy: - -### After Local Edits - -When Claude has been editing files locally: - -- Use `--prefer-local` to push changes -- Creates checkpoint for the push - -### After Server Activity - -When watch detected server changes or user mentions UI edits: - -- Use `--prefer-remote` or default (interactive) -- Pull changes first - -### After Restore - -When a restore was just performed: - -- Use `--prefer-local` to sync deletions to server -- Essential for completing the restore workflow - -### Conflict Detected - -When both sides have changes: - -- Show status first -- Ask user preference or use `--prefer-newest` - -## Commands - -```bash -# Check status first -boxel status . - -# Standard sync (interactive conflicts) -boxel sync . - -# Push local changes -boxel sync . --prefer-local - -# Pull remote changes -boxel sync . --prefer-remote - -# Auto-resolve by timestamp -boxel sync . --prefer-newest - -# Include deletions -boxel sync . --delete - -# Preview only -boxel sync . --dry-run -``` - -## Response Format - -1. Brief status check (what changed where) -2. Chosen strategy and why -3. Execute sync -4. Report results (files pushed/pulled/deleted) - -## Example Output - -``` -Checking status... - Local: 2 files modified - Remote: No changes - -Using --prefer-local since you have local edits. - -Syncing... - Pushed: card-definition.gts, instance.json - Checkpoint: abc1234 [MAJOR] Push: 2 files - -Sync complete! -``` diff --git a/packages/software-factory/.agents/skills/boxel-track/SKILL.md b/packages/software-factory/.agents/skills/boxel-track/SKILL.md deleted file mode 100644 index a47d8df6af7..00000000000 --- a/packages/software-factory/.agents/skills/boxel-track/SKILL.md +++ /dev/null @@ -1,129 +0,0 @@ ---- -name: boxel-track -description: Use when starting or explaining `boxel track` for local file watching, automatic checkpoints, or optional real-time push with `--push` during Boxel development. ---- - -> **Factory agent note:** This skill is for human Claude Code sessions only. The factory agent's tool registry does not include boxel-cli tools — all realm I/O uses `write_file`, `read_file`, and `search_realm` tools via the realm HTTP API. - -# Boxel Track - -Start `boxel track` to monitor local file changes and create checkpoints automatically. - -## When to Use Track - -Use **track** when you're editing files locally (in IDE, with AI agent, etc.) and want automatic backups: - -- Working in VS Code, Cursor, or other IDE -- AI agent is editing files -- You want checkpoint history of your work - -**Track vs Watch:** -| Command | Symbol | Direction | Purpose | -|---------|--------|-----------|---------| -| `track` | ⇆ | Local edits → Checkpoints | Backup your work as you edit | -| `watch` | ⇅ | Server → Local | Pull external changes from Boxel UI | - -## Commands - -```bash -# Start tracking (default: 3s debounce, 10s min interval) -boxel track . - -# Track AND auto-push to server (real-time sync) -boxel track . --push - -# Custom timing (5s debounce, 30s between checkpoints) -boxel track . -d 5 -i 30 - -# Quiet mode (only show checkpoints) -boxel track . -q - -# Verbose mode (debug output) -boxel track . -v - -# Stop all track/watch processes -boxel stop -``` - -## The Track → Sync Workflow - -### Option 1: Manual Sync (Default) - -Track creates local checkpoints only. Push to server when ready: - -```bash -# 1. Track creates checkpoints as you edit -boxel track . - -# 2. When ready to push to server, sync with --prefer-local -boxel sync . --prefer-local -``` - -This lets you: - -- Work offline with local backups -- Batch multiple edits before pushing -- Review changes before they go live - -### Option 2: Real-Time Sync (--push) - -Auto-push changes to server as you edit: - -```bash -# Track AND push changes automatically -boxel track . --push -``` - -Uses batch upload via `/_atomic` endpoint for efficient multi-file uploads. Definitions (.gts) are uploaded before instances (.json) to ensure proper indexing. - -## Context Detection - -When invoked, consider: - -### Standard Development (3s debounce, 10s interval) - -- Normal editing workflow -- Balanced between checkpoint frequency and overhead - -### Fast Iteration (2s debounce, 5s interval) - -- Rapid prototyping -- User says "track closely" or "capture everything" - -### Background Tracking (5s debounce, 30s interval) - -- Long editing sessions -- User says "just backup" or "light tracking" - -## Response Format - -When invoked: - -1. Confirm workspace directory -2. Start track with appropriate settings -3. **Remind user about sync options** - -Example (without --push): - -``` -Starting track in the current workspace (3s debounce, 10s interval). -Checkpoints will be created automatically as you save files. - -Remember: Track creates LOCAL checkpoints only. -When ready to push changes to Boxel server: - boxel sync . --prefer-local - -Or restart with --push for real-time sync: - boxel track . --push - -Use Ctrl+C to stop tracking, or `boxel stop` from another terminal. -``` - -Example (with --push): - -``` -Starting track with auto-push (3s debounce, 10s interval). -Changes will be checkpointed AND pushed to server automatically. - -Use Ctrl+C to stop, or `boxel stop` from another terminal. -``` diff --git a/packages/software-factory/.agents/skills/boxel-watch/SKILL.md b/packages/software-factory/.agents/skills/boxel-watch/SKILL.md deleted file mode 100644 index 5886bb8552a..00000000000 --- a/packages/software-factory/.agents/skills/boxel-watch/SKILL.md +++ /dev/null @@ -1,75 +0,0 @@ ---- -name: boxel-watch -description: Use when starting or choosing settings for `boxel watch` to monitor remote Boxel changes, including active-development, quick-feedback, and background-monitoring intervals. ---- - -> **Factory agent note:** This skill is for human Claude Code sessions only. The factory agent's tool registry does not include boxel-cli tools — all realm I/O uses `write_file`, `read_file`, and `search_realm` tools via the realm HTTP API. - -# Boxel Watch - -Start `boxel watch` with intelligent interval settings based on context. - -## Context Detection - -Analyze the conversation and recent activity to determine the appropriate watch settings: - -### Active Development Mode (5s interval, 3s debounce) - -Use when: - -- User is actively editing .gts or .json files -- User mentions "editing", "working on", "changing", "updating" -- Recent file writes or edits in the workspace -- User asks to "watch while I work" - -### Monitoring Mode (30s interval, 10s debounce) - -Use when: - -- User wants to "keep an eye on" changes -- User is doing research, reading, or planning -- No recent edits to workspace files -- User says "background", "monitor", or "check occasionally" - -### Quick Feedback Mode (10s interval, 5s debounce) - -Use when: - -- User is testing changes in Boxel UI -- User mentions "testing", "trying", "see if it works" -- Balanced between responsiveness and efficiency - -## Execution - -1. Determine the workspace directory (default: current synced workspace) -2. Determine the mode based on context -3. Explain the chosen settings briefly -4. Start watch in background with appropriate flags -5. Inform user how to stop (Ctrl+C or task stop) - -## Commands - -```bash -# Active development -boxel watch . -i 5 -d 3 - -# Monitoring -boxel watch . -i 30 -d 10 - -# Quick feedback -boxel watch . -i 10 -d 5 - -# Quiet mode (any interval) -boxel watch . -i -d -q -``` - -## Response Format - -When invoked, respond with: - -1. Detected mode and reasoning (1 sentence) -2. The watch command being run -3. How to stop or adjust - -Example: -"Starting watch in **active development mode** (5s interval) since you're editing card files. Run in background - use `/tasks` to check status or Ctrl+C to stop." diff --git a/packages/software-factory/.agents/skills/software-factory-bootstrap/SKILL.md b/packages/software-factory/.agents/skills/software-factory-bootstrap/SKILL.md index 9903374bb05..8d8319e22d6 100644 --- a/packages/software-factory/.agents/skills/software-factory-bootstrap/SKILL.md +++ b/packages/software-factory/.agents/skills/software-factory-bootstrap/SKILL.md @@ -12,9 +12,8 @@ the implementation phase. ## How to write tracker-schema cards Project, KnowledgeArticle, and Issue cards are plain `.json` files in -the workspace. Use the workspace fs surface to write them — `Write` -(Claude backend) or `write_file` (OpenRouter backend) — with the -JSON:API document envelope shown below. +the workspace. Use the native `Write` tool with the exact JSON:API +document shape documented below for each card type. **The system prompt names the live tracker module URL** (the value you should put in `data.meta.adoptsFrom.module` for Project / Board / Issue / diff --git a/packages/software-factory/.agents/skills/software-factory-operations/SKILL.md b/packages/software-factory/.agents/skills/software-factory-operations/SKILL.md index b09bfb59f9c..bb9ed5d37a9 100644 --- a/packages/software-factory/.agents/skills/software-factory-operations/SKILL.md +++ b/packages/software-factory/.agents/skills/software-factory-operations/SKILL.md @@ -19,44 +19,44 @@ directly. - **Target realm** (user-specified, passed to `factory:go`) Receives all generated artifacts: Project, Issue, KnowledgeArticle, card definitions, card instances, Catalog Spec cards, and QUnit test files. -## Tool Surfaces +## Workspace files (local mirror of target realm) -Two surfaces are available, depending on which agent backend is running. -The system prompt makes the concrete mapping explicit; this skill describes -the operations. +The agent's working directory is the workspace — the local mirror of the +target realm that the orchestrator syncs back between iterations. Use the +**native** `Read`, `Write`, `Edit`, `Glob`, `Grep`, and `Bash` tools on +these files; the workspace `cwd` is set for you, so realm-relative paths +resolve directly. -### Workspace files (local mirror of target realm) +These files live in the workspace: -These files live in the workspace directory and are synced to the realm -by the orchestrator. Use the workspace fs surface for them — and only -for them: +- Card definitions: `*.gts` +- Card tests: `*.test.gts` +- Content card instances under `/.json` (the user data the + cards represent — e.g. `StickyNote/note-1.json`) +- Tracker-schema cards: `Projects/.json`, `Issues/.json`, + `Knowledge Articles/.json`, `Spec/.json` -- Card definitions: `*.gts` files -- Card tests: `*.test.gts` files -- Content card instances under `/.json` (the user data - the cards represent — e.g. `StickyNote/note-1.json`) +`Bash` is also available for `boxel` CLI commands: -Tooling per backend: +- Read-only inspection: `boxel status`, `boxel history`, `boxel search`, + `boxel read-transpiled`. +- `boxel run-command` — dispatches to whatever host command you specify. + Most specifiers are read-only inspection commands (`get-card-type-schema`, + `evaluate-module`, `instantiate-card`), but the surface itself is generic; + treat it as "as safe as the named command." -- **Claude backend:** use the **native** `Read`, `Write`, `Edit`, - `Glob`, `Grep`, and `Bash` tools. The SDK query's `cwd` is the - workspace, so realm-relative paths resolve directly. `Bash` is - available for safe shell helpers (`ls`, `find`, `cat`, read-only - `boxel` CLI commands like `boxel status` / `boxel history`). -- **OpenRouter backend:** use the factory `read_file({ path, realm? })` - / `write_file({ path, content, realm? })` tools — same realm-relative - paths. +See the **Realm-side reads** section below for the full usage. -Inspect before writing. Read or grep the file you plan to change, and +**Inspect before writing.** Read or grep the file you plan to change, and glob for sibling files (e.g. existing card definitions in the same directory) before creating new ones. -### Tracker-schema cards — write JSON directly +## Tracker-schema cards — write JSON directly -Project, IssueTracker, Issue, KnowledgeArticle, and Spec cards are plain `.json` files -in the workspace. Use `Write` (Claude) or `write_file` (OpenRouter) to -create them and `Read` + `Edit` (or `Read` + `Write` of the merged -document) to update them — same workspace fs surface as `.gts` files. +Project, IssueTracker, Issue, KnowledgeArticle, and Spec cards are plain +`.json` files in the workspace. Use `Write` to create them; to update one, +`Read` it, then either `Edit` the relevant attributes or `Write` the +merged document back — same workspace fs surface as `.gts` files. | File path | adoptsFrom | | -------------------------------- | -------------------------------------------------------------- | @@ -69,27 +69,27 @@ document) to update them — same workspace fs surface as `.gts` files. `` is named in the system prompt — use that value verbatim. -**Always fetch the live schema before writing.** Field names, enum -values, and relationship keys for each card type are introspected at -runtime — never hard-coded in this skill. Call +**Always fetch the live schema before writing.** Field names, enum values, +and relationship keys for each card type are introspected at runtime — +never hard-coded in this skill. Call `get_card_schema({ module, name })` for the card you're about to write -and use the returned `{ attributes, relationships? }` JSON Schema to -shape the document. The bootstrap skill covers the bootstrap-specific -attribute population guidance; this skill covers the operational -patterns (read-before-write, comments, invariants) that layer on top. +and use the returned `{ attributes, relationships? }` JSON Schema to shape +the document. The bootstrap skill covers the bootstrap-specific attribute +population guidance; this skill covers the operational patterns +(read-before-write, comments, invariants) that layer on top. **Read before write.** When updating any tracker card, `Read` the file first, change only the attributes you intend to update, then write the -merged document back. Don't overwrite the whole file with only your -new fields — you'll silently drop the existing attributes. +merged document back. Don't overwrite the whole file with only your new +fields — you'll silently drop the existing attributes. -**Issue invariants you must enforce yourself** (these used to be -enforced by a wrapper tool; they aren't anymore): +**Issue invariants you must enforce yourself** (these used to be enforced +by a wrapper tool; they aren't anymore): -- **`description` is immutable** after the issue is created. If you - need to add context — blocked reasons, progress notes, validation - failures, clarification requests — append to the `comments` array - instead. See "Adding a comment to an existing issue" below. +- **`description` is immutable** after the issue is created. If you need + to add context — blocked reasons, progress notes, validation failures, + clarification requests — append to the `comments` array instead. See + "Adding a comment to an existing issue" below. - **Status transitions are restricted.** You may set `status` to `"blocked"` (cannot proceed) or `"backlog"` (unblock). Never set `status` to `"done"` or `"in_progress"` — those are owned by the @@ -100,11 +100,20 @@ enforced by a wrapper tool; they aren't anymore): Issue cards carry a containsMany comments array on `attributes`. To append a comment: -1. Call `get_card_schema({ module: "", name: "Issue" })` if you don't already have the Issue schema cached. The comments array entry is itself an object with its own field shape — use the field names returned by the schema (the body / author / timestamp fields) verbatim. The timestamp field on a comment is **not** the same as the Issue's own top-level `createdAt` / `updatedAt` attributes; the schema disambiguates them. +1. Call `get_card_schema({ module: "", name: "Issue" })` + if you don't already have the Issue schema cached. The comments array + entry is itself an object with its own field shape — use the field + names returned by the schema (the body / author / timestamp fields) + verbatim. The timestamp field on a comment is **not** the same as the + Issue's own top-level `createdAt` / `updatedAt` attributes; the schema + disambiguates them. 2. `Read` the issue's `.json`. -3. Append a new entry to the comments array on `data.attributes`, populating the body (markdown comment text), the author (e.g. `"factory-agent"` or `"orchestrator"`), and the comment-timestamp field (ISO timestamp). -4. `Write` (or `Edit`) the document back. **Do not modify the - description or any other attribute** — comments are append-only. +3. Append a new entry to the comments array on `data.attributes`, + populating the body (markdown comment text), the author (e.g. + `"factory-agent"` or `"orchestrator"`), and the comment-timestamp + field (ISO timestamp). +4. `Write` (or `Edit`) the document back. **Do not modify the description + or any other attribute** — comments are append-only. ### Catalog Spec card shape @@ -116,9 +125,9 @@ module. Fetch the live schema before writing: get_card_schema({ module: "https://cardstack.com/base/spec", name: "Spec" }) ``` -Use the returned `{ attributes, relationships? }` to shape the -document. What the schema does **not** tell you and you must supply -yourself for entry-point cards: +Use the returned `{ attributes, relationships? }` to shape the document. +What the schema does **not** tell you and you must supply yourself for +entry-point cards: - A display title and short description suitable for the catalog. - The spec-type field set to the enum value the schema returns for @@ -144,62 +153,172 @@ The full document envelope is the same as for tracker cards (`data` / `type: "card"` / `attributes` / `relationships` / `meta.adoptsFrom`), just with the `https://cardstack.com/base/spec` adoptsFrom. -### Realm-side reads (factory tools) +## Realm-side reads (via `boxel` CLI) -These always go through factory tools regardless of backend — they -reach the realm runtime, enforce schema and immutability invariants, -or drive control flow. +For operations that need to reach the realm runtime — searching the +indexed cards, fetching transpiled JS, running host commands — shell out +via `Bash` to the `boxel` CLI. These never go through the workspace fs. -- Fetch the **transpiled** JavaScript for a `.gts` module — used only when an eval/instantiate error reports a line/column number, since those numbers reference the transpiled output, not your `.gts` source. - - **Claude backend:** run `boxel read-transpiled --realm ` via `Bash`. The `.gts` extension is optional. Pipe through `sed -n 'p'` (or wrap with `awk`) when you want to inspect a single line. - - **OpenRouter backend:** call the factory `fetch_transpiled_module({ path, realm? })` tool with the same realm-relative path. -- Search the target realm for cards using a structured query object (filter, sort, page). Use this to check for existing cards, find duplicates, or inspect project state. - - **Claude backend:** run `boxel search --realm --query '' --json` via `Bash`. **Quoting:** single-quote the entire JSON object so the shell does not expand or split it; keep all keys and string values double-quoted inside. Example: `boxel search --realm https://realms.example/h/p/ --query '{"filter":{"type":{"module":"https://cardstack.com/base/spec","name":"Spec"}}}' --json`. Pipe through `jq` if you want a focused projection. - - **OpenRouter backend:** call the factory `search_realm({ query, realm? })` tool with the same structured query object — no shell quoting concerns. +- **Search the target realm** for cards using a structured query + (filter, sort, page). Use this to check for existing cards, find + duplicates, or inspect project state. + ``` + boxel search --realm --query '' --json + ``` + Single-quote the entire JSON object so the shell does not expand or + split it; keep keys and string values double-quoted inside. Pipe + through `jq` to project. **For the full query syntax (filter / eq / + contains / range / every / any / not / sort / page, CodeRef matching, + common mistakes) see the `boxel-api` skill.** +- **Fetch the transpiled JavaScript** for a `.gts` module — used only + when an eval/instantiate error reports a line/column number, since + those numbers reference the transpiled output, not your `.gts` source. + ``` + boxel read-transpiled --realm + ``` + The `.gts` extension is optional. Pipe through `sed -n 'p'` (or + wrap with `awk`) to inspect a single line. See the **Debugging + Runtime Evaluation Errors** section below for when to reach for this. +- **Run any other host command** in the realm's prerendered runtime + (module evaluation, card instantiation, anything else exposed at + `@cardstack/boxel-host/commands//default`): + ``` + boxel run-command --realm --input '' --json + ``` + Most agent tasks won't need this — the validators below already wrap + the common host commands. See the `boxel-command` skill for the + programmatic surface and failure modes. ### Fetching live card-type schemas `get_card_schema({ module, name })` returns the live JSON Schema (`{ attributes, relationships? }`) for any `CardDef`, introspected from the actual class via the realm server's prerenderer (the same path the -AI Bot uses for its patch-tool schemas). Always call this before -writing a tracker card (Project / Issue / KnowledgeArticle), a Spec -card, or any other card whose shape you need to know. Schemas are -cached per-process, so repeated calls with the same code ref are free. - -### Running other Host Commands - -For host commands beyond the schema fetch, the OpenRouter backend -exposes `run_command({ command, commandInput? })` and the Claude -backend should shell out via Bash to -`boxel run-command --realm --input '' --json`. - -### Self-Validation (optional, no side effects) - -All five tools are safe to call repeatedly mid-turn; none of them write a realm artifact. The orchestrator still runs the full validation pipeline (which persists the durable `TestRun` / `LintResult` / `ParseResult` / `EvalResult` / `InstantiateResult` cards) after `signal_done`, so calling any of these is optional. The realm-touching tools (`run_evaluate`, `run_instantiate`, `run_tests`) push your workspace to the realm before invoking the prerenderer, so they always see the writes you've just made — no manual sync needed. - -- `run_lint({ path? })` — Run ESLint + Prettier (with `@cardstack/boxel` rules) and return an in-memory `RunLintResult` with `status`, `filesChecked`, `filesWithErrors`, `errorCount`, `warningCount`, `durationMs`, `lintableFiles`, and per-violation `{ rule, file, line, column, message, severity }`. Without `path`, lints every `.gts` / `.gjs` / `.ts` / `.js` file in the target realm. With `path` (realm-relative file path), lints **only that one file** — prefer this right after writing or editing a single file. -- `run_tests()` — Run the realm's QUnit suite and receive an in-memory result object `{ status, passedCount, failedCount, skippedCount, durationMs, testFiles, failures, errorMessage? }`. Use it when you want feedback before signalling done. -- `run_parse({ path? })` — Parse and type-check files in the target realm and return an in-memory `RunParseResult` with `status`, `filesChecked`, `filesWithErrors`, `errorCount`, `durationMs`, `parseableFiles`, and per-error `{ file, line, column, message }`. Without `path`, runs glint (ember-tsc) over every `.gts` / `.gjs` / `.ts` file in the realm AND validates every `.json` file listed as a Spec `linkedExample` (same discovery as the parse validation step). With `path` (realm-relative file path), parses **only that one file** — `.gts` / `.gjs` / `.ts` runs through glint; `.json` is parsed and checked for card document structure. The extension is required; `parseableFiles` entries are always returned in the `.json` / `.gts` / `.gjs` / `.ts` form, so you can feed any of them straight back into `path`. Prefer the single-file form right after writing or editing one file. -- `run_evaluate({ path? })` — Evaluate ESM modules (`.gts` / `.gjs` / `.ts` / `.js`) in the target realm via the prerenderer sandbox and return a `RunEvaluateResult` (status, module counts, per-failure `{ path, error, stackTrace? }`). Without `path`, evaluates every non-test evaluable module. With `path`, evaluates only that single realm-relative file — handy for a quick self-check right after writing one module. Test files (`*.test.*`) are rejected — the test runner validates those. The tool bound-polls past the brief read-after-write window where the realm has the source on disk but indexing hasn't populated the module map yet, so a returned failure is a real failure — don't retry on the agent side. When a failure reports a line/column, those numbers refer to the transpiled module — pair with the transpiled-module fetch above (Bash + `boxel read-transpiled` on the Claude backend, `fetch_transpiled_module` on OpenRouter) to locate the offending source construct, then fix the `.gts` source (never copy transpiled patterns back into source). -- `run_instantiate({ path? })` — Instantiate card example instances in the target realm via the prerenderer sandbox and return a `RunInstantiateResult` (status, instance counts, per-failure `{ path, cardName, error, stackTrace? }`). Without `path`, searches the realm for Spec cards and instantiates every `linkedExample` on every card/app Spec; specs with no `linkedExamples` still get a bare instantiation to exercise the card class. With `path`, instantiates only that single realm-relative `.json` example file — its `meta.adoptsFrom` supplies the module + card name, and spec discovery is skipped entirely so you can self-check one instance in isolation. The `path` argument must end in `.json`. `instanceFiles` only contains real `.json` example paths (bare-instantiation fallbacks are filtered out) so any entry can be fed straight back into `path`. If a bare instantiation fails, its failure entry has `path: ''` and a populated `cardName` — identify the spec by `cardName` and do NOT pass the empty path back into `path`. The tool bound-polls past the brief read-after-write window where the realm has the source on disk but indexing hasn't populated the module map yet, so a returned failure is a real failure — don't retry on the agent side. When a failure reports a line/column, those numbers refer to the transpiled module — pair with the transpiled-module fetch above (Bash + `boxel read-transpiled` on the Claude backend, `fetch_transpiled_module` on OpenRouter) to locate the offending source construct, then fix the `.gts` source (never copy transpiled patterns back into source). - -### Control Flow - -- `signal_done()` — Signal that the current issue is complete. Call this only after all implementation and test files have been written. -- `request_clarification({ message })` — Signal that you cannot proceed and need human input. Describe what is blocking. +AI Bot uses for its patch-tool schemas). Always call this before writing +a tracker card (Project / Issue / KnowledgeArticle), a Spec card, or any +other card whose shape you need to know. Schemas are cached per-process, +so repeated calls with the same code ref are free. + +## Self-Validation (optional, in-memory results) + +All five validators are factory tools, safe to call repeatedly mid-turn. +They return in-memory result objects and **do not persist any durable +validation cards** — the orchestrator still runs the full validation +pipeline (which persists `TestRun` / `LintResult` / `ParseResult` / +`EvalResult` / `InstantiateResult` cards) after `signal_done`, so calling +any of these mid-turn is optional. + +**Side effect to know about:** the realm-touching validators +(`run_evaluate`, `run_instantiate`, `run_tests`) sync your workspace to +the realm before invoking the prerenderer, so they push whatever you've +just written. That's the same write the orchestrator's between-iteration +sync would have done — it's not destructive, but it does mean calling +these tools is the moment your local writes hit the realm. The lighter +validators (`run_lint`, `run_parse`) run entirely in-process and don't +touch the realm. + +- `run_lint({ path? })` — Run ESLint + Prettier (with `@cardstack/boxel` + rules) and return an in-memory `RunLintResult` with `status`, + `filesChecked`, `filesWithErrors`, `errorCount`, `warningCount`, + `durationMs`, `lintableFiles`, and per-violation `{ rule, file, line, +column, message, severity }`. Without `path`, lints every `.gts` / + `.gjs` / `.ts` / `.js` file in the target realm. With `path` + (realm-relative file path), lints **only that one file** — prefer this + right after writing or editing a single file. +- `run_tests()` — Run the realm's QUnit suite and receive an in-memory + result object `{ status, passedCount, failedCount, skippedCount, +durationMs, testFiles, failures, errorMessage? }`. Use it when you + want feedback before signalling done. +- `run_parse({ path? })` — Parse and type-check files in the target + realm and return an in-memory `RunParseResult` with `status`, + `filesChecked`, `filesWithErrors`, `errorCount`, `durationMs`, + `parseableFiles`, and per-error `{ file, line, column, message }`. + Without `path`, runs glint (ember-tsc) over every `.gts` / `.gjs` / + `.ts` file in the realm AND validates every `.json` file listed as a + Spec `linkedExample` (same discovery as the parse validation step). + With `path` (realm-relative file path), parses **only that one file** + — `.gts` / `.gjs` / `.ts` runs through glint; `.json` is parsed and + checked for card document structure. The extension is required; + `parseableFiles` entries are always returned in the `.json` / `.gts` + / `.gjs` / `.ts` form, so you can feed any of them straight back into + `path`. Prefer the single-file form right after writing or editing one + file. +- `run_evaluate({ path? })` — Evaluate ESM modules (`.gts` / `.gjs` / + `.ts` / `.js`) in the target realm via the prerenderer sandbox and + return a `RunEvaluateResult` (status, module counts, per-failure + `{ path, error, stackTrace? }`). Without `path`, evaluates every + non-test evaluable module. With `path`, evaluates only that single + realm-relative file — handy for a quick self-check right after writing + one module. Test files (`*.test.*`) are rejected — the test runner + validates those. The tool bound-polls past the brief read-after-write + window where the realm has the source on disk but indexing hasn't + populated the module map yet, so a returned failure is a real failure + — don't retry on the agent side. When a failure reports a line/column, + those numbers refer to the transpiled module — pair with + `boxel read-transpiled` (see Realm-side reads above) to locate the + offending source construct, then fix the `.gts` source (never copy + transpiled patterns back into source). +- `run_instantiate({ path? })` — Instantiate card example instances in + the target realm via the prerenderer sandbox and return a + `RunInstantiateResult` (status, instance counts, per-failure `{ path, +cardName, error, stackTrace? }`). Without `path`, searches the realm + for Spec cards and instantiates every `linkedExample` on every + card/app Spec; specs with no `linkedExamples` still get a bare + instantiation to exercise the card class. With `path`, instantiates + only that single realm-relative `.json` example file — its + `meta.adoptsFrom` supplies the module + card name, and spec discovery + is skipped entirely so you can self-check one instance in isolation. + The `path` argument must end in `.json`. `instanceFiles` only contains + real `.json` example paths (bare-instantiation fallbacks are filtered + out) so any entry can be fed straight back into `path`. If a bare + instantiation fails, its failure entry has `path: ''` and a populated + `cardName` — identify the spec by `cardName` and do NOT pass the empty + path back into `path`. The tool bound-polls past the brief + read-after-write window where the realm has the source on disk but + indexing hasn't populated the module map yet, so a returned failure + is a real failure — don't retry on the agent side. When a failure + reports a line/column, those numbers refer to the transpiled module — + pair with `boxel read-transpiled` (see Realm-side reads above) to + locate the offending source construct, then fix the `.gts` source + (never copy transpiled patterns back into source). + +## Control Flow + +- `signal_done()` — Signal that the current issue is complete. Call this + only after all implementation and test files have been written. +- `request_clarification({ message })` — Signal that you cannot proceed + and need human input. Describe what is blocking. ## Required Flow -1. **Inspect before writing.** Search the target realm for existing cards (Bash + `boxel search` on Claude, `search_realm` on OpenRouter — see the Realm-side reads section above). Read or grep the workspace files you plan to change (or sibling files in the same directory) before creating or modifying anything. +1. **Inspect before writing.** Search the target realm for existing + cards (`boxel search --realm --query ''` via `Bash` — + see Realm-side reads above, with full syntax in the `boxel-api` + skill). Read or grep the workspace files you plan to change (or + sibling files in the same directory) before creating or modifying + anything. 2. **Write card definitions** (`.gts`) into the workspace. -3. **Write `.test.gts` test files** co-located with card definitions. Every issue must have at least one test file. **Write tests immediately after the card definition, before any instances or catalog specs.** +3. **Write `.test.gts` test files** co-located with card definitions. + Every issue must have at least one test file. **Write tests + immediately after the card definition, before any instances or + catalog specs.** 4. **Write card instances** (`.json`) into the workspace. -5. **Write a Catalog Spec card** (`Spec/.json`) — adoptsFrom `https://cardstack.com/base/spec` / `Spec`. Link sample instances via `relationships.linkedExamples`. -6. **(Optional) Call `run_tests()`** to self-validate before signalling done. This returns test results in-memory without writing any realm artifacts. Iterating on your own work with `run_tests` is faster than round-tripping through the orchestrator pipeline. -7. **Call `signal_done()`** when all implementation and test files are written. The orchestrator runs the full validation pipeline (which persists a `TestRun` card, among other artifacts) automatically after this. -8. **If tests fail**, the orchestrator feeds failure details back. Re-read the affected workspace files, fix them, and call `signal_done()` again. -9. **Record progress** by appending to the issue's `comments` array (Read + Edit the issue JSON). Never modify the issue's `description`. +5. **Write a Catalog Spec card** (`Spec/.json`) — adoptsFrom + `https://cardstack.com/base/spec` / `Spec`. Link sample instances via + `relationships.linkedExamples`. +6. **(Optional) Call `run_tests()`** to self-validate before signalling + done. This returns test results in-memory without writing any realm + artifacts. Iterating on your own work with `run_tests` is faster than + round-tripping through the orchestrator pipeline. +7. **Call `signal_done()`** when all implementation and test files are + written. The orchestrator runs the full validation pipeline (which + persists a `TestRun` card, among other artifacts) automatically after + this. +8. **If tests fail**, the orchestrator feeds failure details back. + Re-read the affected workspace files, fix them, and call + `signal_done()` again. +9. **Record progress** by appending to the issue's `comments` array + (Read + Edit the issue JSON). Never modify the issue's `description`. ## Target Realm Artifact Structure @@ -236,30 +355,34 @@ and read the reported line to see what compiled construct raised the error — then reason back to the `.gts` source construct that produced it. -- **Claude backend:** `boxel read-transpiled sticky-note.gts --realm ` via `Bash`. Pipe through `sed -n '60,70p'` (or similar) to focus on a window around the reported line. -- **OpenRouter backend:** `fetch_transpiled_module({ path: 'sticky-note.gts' })`. +``` +boxel read-transpiled sticky-note.gts --realm +``` + +Pipe through `sed -n '60,70p'` (or similar) to focus on a window around +the reported line. -For example, `" is not a valid character within attribute names: (error occurred in '/.../sticky-note.gts' @ line 66 : column 32)` -typically points inside a `precompileTemplate(...)` block in the -transpiled output. The actual fault in the source is often in a CSS -comment or a template expression — line 66 in your `.gts` source is -unrelated. Reading the transpiled line is what connects the error back -to the source. +For example, `" is not a valid character within attribute names: (error +occurred in '/.../sticky-note.gts' @ line 66 : column 32)` typically +points inside a `precompileTemplate(...)` block in the transpiled +output. The actual fault in the source is often in a CSS comment or a +template expression — line 66 in your `.gts` source is unrelated. +Reading the transpiled line is what connects the error back to the +source. ### The transpiled output is for DEBUGGING ONLY — never for implementation -**Scope:** the transpiled fetch (Bash + `boxel read-transpiled` on -Claude, `fetch_transpiled_module` on OpenRouter) is only for +**Scope:** the transpiled fetch (`boxel read-transpiled`) is only for investigating **runtime errors in `.gts` modules you have already -written** — when an eval or instantiate validation failure points to -a line/column in the transpiled output and you need to map that +written** — when an eval or instantiate validation failure points to a +line/column in the transpiled output and you need to map that coordinate back to your source. It is not for learning how to write cards, not for understanding Boxel patterns, and not a general reference. -- **Do not copy patterns, imports, or shapes from the transpiled - output into your `.gts` source.** The transpiler emits artifacts - like `setComponentTemplate(...)`, `precompileTemplate(...)`, wire-format +- **Do not copy patterns, imports, or shapes from the transpiled output + into your `.gts` source.** The transpiler emits artifacts like + `setComponentTemplate(...)`, `precompileTemplate(...)`, wire-format template arrays, base64 CSS imports (`./file.gts.CiAg...`), and other compiler internals. None of those belong in source code. - **Do not write `.gts` that "looks like" the compiled JS.** Always @@ -275,13 +398,15 @@ reference. the right references — not what the compiler happens to emit. Use the transpiled fetch the way a developer uses a source map: to -translate a runtime line number back to a source construct in the -code **you wrote**, then close the transpiled view and fix the source +translate a runtime line number back to a source construct in the code +**you wrote**, then close the transpiled view and fix the source idiomatically. ## Writing QUnit Card Tests -Test files are `.test.gts` files co-located with card definitions in the target realm. Each test file exports a `runTests()` function that registers QUnit modules and tests. +Test files are `.test.gts` files co-located with card definitions in the +target realm. Each test file exports a `runTests()` function that +registers QUnit modules and tests. ### Example Test @@ -311,20 +436,43 @@ export function runTests() { ### Key Points -- Tests are `.test.gts` files co-located with the card definition (e.g., `sticky-note.gts` and `sticky-note.test.gts`) +- Tests are `.test.gts` files co-located with the card definition (e.g., + `sticky-note.gts` and `sticky-note.test.gts`) - Each test file must export a `runTests()` function -- Use `import.meta.url` to resolve card definitions relative to the test file — never hardcode realm URLs -- Use `setupCardTest(hooks)` for rendering context, then `renderCard(loader, card, format)` for DOM assertions -- No external realm writes during tests — all test data lives in browser memory -- Use `data-test-*` attributes for DOM selectors when testing rendered output -- Use QUnit assertions: `assert.dom()`, `assert.strictEqual()`, `assert.ok()` -- **Never use `QUnit.skip()` or `QUnit.todo()`.** All tests must actually execute. Skipped/todo tests are flagged as `skipped` in the TestRun card and treated as a failure when no tests actually ran. The orchestrator will reject a TestRun where every test is skipped. +- Use `import.meta.url` to resolve card definitions relative to the test + file — never hardcode realm URLs +- Use `setupCardTest(hooks)` for rendering context, then + `renderCard(loader, card, format)` for DOM assertions +- No external realm writes during tests — all test data lives in browser + memory +- Use `data-test-*` attributes for DOM selectors when testing rendered + output +- Use QUnit assertions: `assert.dom()`, `assert.strictEqual()`, + `assert.ok()` +- **Never use `QUnit.skip()` or `QUnit.todo()`.** All tests must + actually execute. Skipped/todo tests are flagged as `skipped` in the + TestRun card and treated as a failure when no tests actually ran. The + orchestrator will reject a TestRun where every test is skipped. ## Important Rules -- **Never write to the source realm.** All generated artifacts go to the target realm via the workspace mirror. -- **Stay inside the workspace.** Workspace fs operations are scoped to the local mirror of the target realm. Use realm-relative paths (`sticky-note.gts`, `StickyNote/note-1.json`) — never absolute paths outside the workspace, never the user's home directory, never the source realm. -- **Don't drive sync yourself.** The orchestrator owns `boxel sync` / `boxel push`. Read-only `boxel` commands (`boxel status`, `boxel history`) are fine for inspection, but never run sync, push, or any command that mutates the realm directly. -- **Write source code, not compiled output.** When writing `.gts` files, write clean idiomatic source — never compiled JSON blocks or base64-encoded content. -- **Use absolute `adoptsFrom.module` URLs** when referencing definitions that live in a different realm (e.g., the source realm's tracker schema). -- **Start small and iterate.** Write the smallest working implementation first, then add the test. If tests fail, read the failure output carefully before making targeted fixes. +- **Never write to the source realm.** All generated artifacts go to the + target realm via the workspace mirror. +- **Stay inside the workspace.** Workspace fs operations are scoped to + the local mirror of the target realm. Use realm-relative paths + (`sticky-note.gts`, `StickyNote/note-1.json`) — never absolute paths + outside the workspace, never the user's home directory, never the + source realm. +- **Don't drive sync yourself.** The orchestrator owns `boxel sync` / + `boxel push`. Read-only `boxel` commands (`boxel status`, + `boxel history`) are fine for inspection, but never run sync, push, + or any command that mutates the realm directly. +- **Write source code, not compiled output.** When writing `.gts` files, + write clean idiomatic source — never compiled JSON blocks or base64- + encoded content. +- **Use absolute `adoptsFrom.module` URLs** when referencing definitions + that live in a different realm (e.g., the source realm's tracker + schema). +- **Start small and iterate.** Write the smallest working implementation + first, then add the test. If tests fail, read the failure output + carefully before making targeted fixes. diff --git a/packages/software-factory/.claude/CLAUDE.md b/packages/software-factory/.claude/CLAUDE.md index f5aeded35d7..cf9aeb8957e 100644 --- a/packages/software-factory/.claude/CLAUDE.md +++ b/packages/software-factory/.claude/CLAUDE.md @@ -1,783 +1,56 @@ -# Boxel CLI - Claude Code Integration +# CLAUDE.md — software-factory -## GitHub Repository +This package implements the issue-driven factory loop. See +[README.md](../README.md) for architecture and +[AGENTS.md](../AGENTS.md) for the agent-facing summary. -**Official repo:** https://github.com/cardstack/boxel-cli +## Running the factory ---- - -## How to Run Boxel Commands - -After `npm install && npm run build`, use `npx boxel`: - -```bash -npx boxel sync . -npx boxel history ./workspace -npx boxel profile add -``` - -Or use `boxel` directly after `npm link`. - -**For development** (no rebuild needed after code changes): - -```bash -npm run dev -- -``` - -All documentation below shows `boxel ` for brevity. - ---- - -## Auto-Activate Boxel Development Skill - -**IMPORTANT:** When the user is doing ANY of the following, automatically read and follow `.claude/skills/boxel-development/SKILL.md`: - -- Creating or editing `.gts` files (card definitions) -- Creating or editing `.json` card instances -- Asking about Boxel patterns, cards, or components -- "Vibe coding" or prototyping Boxel cards -- Working in a synced Boxel workspace (has `.boxel-sync.json`) -- Asking to create, build, or design anything in Boxel - -**How to activate:** Read the skill file at the start of the task: - -``` -Read .claude/skills/boxel-development/SKILL.md -``` - -The skill contains comprehensive Boxel development guidance including CardDef/FieldDef patterns, templates, styling, and best practices. - ---- - -**When a user opens this repo, check if they need onboarding first!** - -## Onboarding Flow - -When you detect a new user (no profile configured), guide them through setup: - -### Step 1: Check Profile - -```bash -npx boxel profile -``` - -If no profile exists, run the interactive setup: - -### Step 2: Add a Profile - -```bash -npx boxel profile add -``` - -This launches an interactive wizard that: - -1. Asks for environment (Production or Staging) -2. Asks for username and password -3. Creates the profile in `~/.boxel-cli/profiles.json` - -**Non-interactive option (CI/automation only):** - -```bash -# Use environment variable to avoid exposing password in shell history -BOXEL_PASSWORD="password" npx boxel profile add -u @username:boxel.ai -n "My Prod Account" -``` - -> **Security Note:** Avoid passing passwords via `-p` flag as they appear in shell history and process listings. Use the interactive wizard or `BOXEL_PASSWORD` environment variable. - -### Step 3: Verify & List Workspaces - -```bash -npx boxel list -``` - -### Step 4: First Sync - -Help them sync their first workspace: - -```bash -npx boxel sync @username/workspace ./workspace-name -``` - -### Switching Between Profiles - -```bash -npx boxel profile list # See all profiles (★ = active) -npx boxel profile switch username # Switch by partial match +```sh +pnpm factory:go --brief-url --target-realm ``` ---- - -## Local Workspace Organization - -When syncing multiple workspaces locally, organize them by **domain/username/realm** to mirror the Matrix ID structure (`@username:domain`): - -``` -boxel-workspaces/ -├── boxel.ai/ # Production domain -│ └── acme-corp/ # Username -│ ├── personal/ # Realm -│ ├── project-atlas/ -│ └── inventory-tracker/ -└── stack.cards/ # Staging domain - └── acme-corp/ - └── sandbox/ -``` - -**Benefits:** - -- Clear separation between production and staging environments -- Matches the `@username:domain` profile ID format -- Easy to identify which profile/environment a workspace belongs to -- Supports multiple users on the same machine - -**First-time sync to this structure:** - -```bash -# Production workspace -boxel pull https://app.boxel.ai/acme-corp/project-atlas/ ./boxel-workspaces/boxel.ai/acme-corp/project-atlas - -# Staging workspace -boxel pull https://realms-staging.stack.cards/acme-corp/sandbox/ ./boxel-workspaces/stack.cards/acme-corp/sandbox -``` - ---- - -## Available Skills - -Shared repo-local skills live in `.agents/skills/`. -`.claude/skills/` should be a symlink to that directory so Claude and Codex read the same files. - -### `boxel-track` - Track Local Edits - -Use this skill when starting `boxel track` for local file watching and checkpoints: - -- Creates checkpoints as you save files in IDE -- Use `--push` flag to automatically push changes to server (batch upload) -- Without `--push`: Run `boxel sync . --prefer-local` to push to server - -### `boxel-watch` - Smart Watch - -Use this skill when starting `boxel watch` with context-aware timing: - -- **Active development** (5s interval, 3s debounce): When editing files -- **Monitoring** (30s interval, 10s debounce): Background observation -- **Quick feedback** (10s interval, 5s debounce): Testing changes - -### `boxel-restore` - Restore Checkpoint - -Use this skill for the full restore workflow: - -1. Shows history -2. Restores to checkpoint (properly deletes newer files) -3. Syncs deletions to server with `--prefer-local` -4. Optionally restarts watch - -### `boxel-sync` - Smart Sync - -Use this skill for context-aware bidirectional sync: - -- After local edits or track → `--prefer-local` -- After server changes → `--prefer-remote` -- After restore → `--prefer-local` (essential for syncing deletions) - -### `boxel-repair` - Realm Metadata/Card Repair - -Use when workspaces show missing icon/background, wrong display name, or fail to open due to broken `index.json`/`cards-grid.json` links. - -- Read `.claude/skills/boxel-repair/SKILL.md` for the step-by-step repair flow. -- `boxel repair-realm ` repairs one realm -- `boxel repair-realms` repairs all owned realms (excluding `personal` by default) -- Also reconciles Matrix account data (`app.boxel.realms`) unless disabled - -### `software-factory-operations` - End-to-End Delivery Loop - -Use this skill when the task is to break work into Boxel tickets, implement in an assigned realm, verify with Playwright, and keep knowledge plus progress checkpoints as durable factory memory. - ---- - -## Commands Reference - -### Status & Checking - -```bash -boxel status . # Check sync status -boxel status --all # Check all workspaces -boxel status . --pull # Auto-pull remote changes -boxel check ./file.json --sync # Check single file -``` - -### Pull, Push, Sync (Command Relationship) - -| Command | Direction | Purpose | Deletes Local | Deletes Remote | -| ------- | -------------- | -------------- | ---------------------- | --------------------- | -| `pull` | Remote → Local | Fresh download | with `--delete` | never | -| `push` | Local → Remote | Deploy changes | never | with `--delete` | -| `sync` | Both ways | Stay in sync | with `--prefer-remote` | with `--prefer-local` | - -```bash -boxel sync . # Interactive sync -boxel sync . --prefer-local # Keep local + sync deletions -boxel sync . --prefer-remote # Keep remote -boxel sync . --prefer-newest # Keep newest version -boxel sync . --delete # Sync deletions both ways -boxel sync . --dry-run # Preview only - -boxel push ./local # One-way push (local → remote) -boxel push ./local --delete # Push and remove orphaned remote files -boxel pull ./local # One-way pull (remote → local) -``` - -**Failed download cleanup:** When `sync` encounters files that return 500 errors (broken/corrupted on server), it will prompt you to delete them: - -``` -⚠️ 3 file(s) failed to download (server error): - - Staff/broken-card.json - - Student/corrupted.json - -These files may be broken on the server. Delete them from remote? [y/N] -``` - -> **Safety tip:** Before any destructive operation, create a checkpoint with a descriptive message: -> -> ```bash -> boxel history . -m "Before cleanup: removing broken server files" -> ``` - -### Track ⇆ (Local File Watching) - -```bash -boxel track . # Track local edits, auto-checkpoint as you save -boxel track . --push # Track AND push changes to server (batch upload) -boxel track . -d 5 -i 30 # 5s debounce, 30s min between checkpoints -boxel track . -q # Quiet mode -boxel track . -v # Verbose mode (debug output) -``` - -**Use track when:** Editing locally in IDE/VS Code. Creates checkpoints as you save files. -**Symbol:** ⇆ (horizontal arrows = local changes) -**With --push:** Real-time sync to server using batch upload via `/_atomic` endpoint. - -### Watch ⇅ (Remote Server Watching) - -```bash -boxel watch # Watch all configured realms (from .boxel-workspaces.json) -boxel watch . # Watch single workspace -boxel watch . ./other-realm # Watch multiple realms simultaneously -boxel watch . -i 5 -d 3 # Active: 5s interval, 3s debounce -boxel watch . -q # Quiet mode -``` - -**Use watch when:** Others are editing in Boxel web UI. Pulls their changes and creates checkpoints. -**Symbol:** ⇅ (vertical arrows = remote server changes) - -### Stop - -```bash -boxel stop # Stop all running watch (⇅) and track (⇆) processes -``` - -**Multi-realm watching:** Useful when code lives in one realm and data in another. Each realm gets its own checkpoint tracking and debouncing. - -### Realms (Multi-Realm Configuration) - -```bash -boxel realms # List configured realms -boxel realms --init # Create .boxel-workspaces.json -boxel realms --add ./path # Add a realm -boxel realms --add ./code --purpose "Card definitions" --patterns "*.gts" --default -boxel realms --add ./data --purpose "Data instances" --card-types "BlogPost,Product" -boxel realms --llm # Output LLM guidance for file placement -boxel realms --remove ./path # Remove a realm -``` - -**File placement guidance:** The `--llm` output tells Claude which realm to use for different file types and card types. - -### History & Restore - -```bash -boxel history . # View checkpoints -boxel history . -r # Interactive restore -boxel history . -r 3 # Quick restore to #3 -boxel history . -r abc123 # Restore by hash -boxel history . -m "Message" # Create checkpoint with custom message -``` - -### Skills - -```bash -boxel skills --refresh # Fetch skills from Boxel -boxel skills --list # List all available skills -boxel skills --enable "Name" # Enable a skill -boxel skills --disable "Name" # Disable a skill -boxel skills --export ./project # Export as Claude commands -``` - -### Profile (Authentication) - -```bash -boxel profile # Show current active profile -boxel profile list # List all saved profiles (★ = active) -boxel profile add # Interactive wizard to add profile (recommended) -# Non-interactive: use BOXEL_PASSWORD env var instead of -p flag for security -boxel profile switch # Switch profile (partial match OK) -boxel profile remove # Remove a profile -boxel profile migrate # Migrate from old .env file -``` - -**Profile IDs:** Use Matrix format `@username:domain` - -- Production: `@username:boxel.ai` -- Staging: `@username:stack.cards` - -**Storage:** Profiles stored in `~/.boxel-cli/profiles.json` (permissions: 0600) - -### Other - -```bash -boxel list # List workspaces -boxel create endpoint "Name" # Create workspace -boxel consolidate-workspaces . # Move legacy local dirs into domain/owner/realm -boxel repair-realm # Repair one realm metadata/starter cards -boxel repair-realms # Batch repair all owned realms -boxel pull ./local # One-way pull -boxel push ./local # One-way push -``` - -### Share & Gather (GitHub Workflow) - -```bash -boxel share . -t /path/to/repo -b branch-name --no-pr # Share to GitHub repo -boxel gather . -s /path/to/repo # Pull from GitHub repo -``` - -**Share** copies workspace state to a GitHub repo branch: - -- Preserves repo-level files (package.json, LICENSE, README, etc.) -- Skips realm-specific files (.realm.json, index.json, cards-grid.json) -- Creates branch and commits changes - -**Gather** pulls changes from GitHub back to workspace: - -- Symmetric to share -- Preserves workspace's realm-specific files - -**Pushing to GitHub:** Use GitHub Desktop to push branches (no CLI auth configured). -After share creates the branch locally, open GitHub Desktop and push. - -### `/boxel-development` - Default Vibe Coding Skill - -The **Boxel Development** skill is auto-enabled for vibe coding. It provides comprehensive guidance for: - -- Card definitions (.gts files) -- Card instances (.json files) -- Boxel patterns and best practices - -### `/boxel-file-structure` - File Organization Rules - -Reference for local file organization: - -- Directory naming: definitions (`kebab-case.gts`), instances (`PascalCase/`) -- Module paths: relative to JSON location (`../card` from subdirectory) -- JSON structure for card instances - -### `boxel skills` - Manage Additional Skills - -Fetch and manage AI instruction cards from Boxel: - -```bash -boxel skills --refresh # Fetch latest from Boxel -boxel skills --list # See available skills -boxel skills --enable "X" # Enable additional skills -boxel skills --export . # Re-export to .agents/skills/ (shared with .claude/skills/) -``` - ---- - -## Key Workflows - -### Local Development with Track (IDE/Agent Editing) - -```bash -boxel track . # Start tracking local edits (auto-checkpoints) -# ... edit files in IDE or with Claude ... -# Track creates LOCAL checkpoints as you save - -# IMPORTANT: When ready to push changes to Boxel server: -boxel sync . --prefer-local # Push your local changes to server -``` - -**Remember:** Track does NOT sync to server automatically - it only creates local checkpoints. Always run `sync --prefer-local` when you want your changes live on the server. - -### Real-Time Sync with Track --push - -```bash -boxel track . --push # Track AND auto-push to server -# ... edit files in IDE or with Claude ... -# Changes are checkpointed AND pushed to server automatically -``` - -**With --push:** Uses batch upload via `/_atomic` endpoint for efficient multi-file uploads. Definitions (.gts) are uploaded before instances (.json) to ensure proper indexing. - -### Active Development Session (Watching Server) - -```bash -boxel watch . -i 5 -d 3 # Active development settings -# ... edit in Boxel UI or locally ... -boxel sync . # Push/pull changes -``` - -### Undo Server Changes (Restore) - -```bash -boxel history . # Find checkpoint -boxel history . -r 3 # Restore to #3 -boxel sync . --prefer-local # ESSENTIAL: sync deletions to server -``` - -### Share Milestone to GitHub - -```bash -boxel share . -t /path/to/boxel-home -b boxel/feature-name --no-pr -# Then push via GitHub Desktop -``` - -**URL Portability:** Share automatically converts absolute realm URLs in `index.json` and `cards-grid.json` to relative URLs, making the content portable across different realms. - -### Gather Updates from GitHub - -```bash -boxel gather . -s /path/to/boxel-home -boxel sync . --prefer-local # Push gathered changes to Boxel server -``` - -**URL Portability:** Gather includes `index.json` and `cards-grid.json`, transforming any absolute URLs to relative paths for portability. - -Or simply: - -``` -consult boxel-restore and restore checkpoint 3 -``` - -### Monitor Server While Working - -```bash -boxel watch . -i 30 -d 10 # Monitoring settings -# Checkpoints created automatically -boxel history . # View what changed -``` - -### Multi-Realm Development - -When working with multiple realms (e.g., code + data separation): - -```bash -# Configure realms once -boxel realms --add ./code-realm --purpose "Card definitions" --patterns "*.gts" --default -boxel realms --add ./data-realm --purpose "Content instances" --card-types "BlogPost,Product" - -# Watch all configured realms -boxel watch - -# Check where to put a new file -boxel realms --llm -``` - -**File placement heuristics:** - -- `.gts` files → realm with `*.gts` pattern (usually code realm) -- Card instances → realm configured for that card type -- Ambiguous → use the default realm - ---- - -## Critical Patterns - -### ⚠️ SAFETY FIRST: Checkpoint Before Destructive Operations - -**Always create a checkpoint with a descriptive message before:** - -- Deleting files from server (`--prefer-local`, `push --delete`) -- Restoring to an earlier checkpoint -- Bulk cleanup operations -- Removing card definitions or instances - -```bash -boxel history . -m "Before cleanup: removing sample data and unused definitions" -# Now safe to proceed with destructive operation -boxel sync . --prefer-local -``` - -This ensures you can always recover if something goes wrong. The checkpoint message helps identify what state to restore to. - -### 0. ALWAYS Write Source Code, Never Compiled Output - -When editing `.gts` files, **always write clean idiomatic source code**: - -```gts -// CORRECT - Clean source -export class MyCard extends CardDef { - static fitted = class Fitted extends Component { - - }; -} -``` - -**NEVER** write or edit: - -- Compiled JSON blocks (`"block": "[[[10,0]..."`) -- Base64-encoded CSS imports (`./file.gts.CiAg...`) -- Wire format template arrays - -The server compiles source to these formats. If you see them, the file was pulled from server - rewrite it as clean source. - -### 0.5. Edit Lock Before Modifying Files - -When editing files locally while watch is running, use edit lock to prevent watch from overwriting your changes: - -```bash -boxel edit . grammy-gallery.gts # Lock file before editing -# ... make your edits ... -boxel sync . --prefer-local # Push your changes -boxel touch . Instance/file.json # Force re-index -boxel edit . --done grammy-gallery.gts # Release lock -``` - -**Quick commands:** - -```bash -boxel edit . --list # See what's locked -boxel edit . --clear # Clear all locks -boxel edit . --done # Release all locks -``` - -**Why:** Watch mode pulls remote changes which can overwrite local edits. Edit lock tells watch to skip those files. - -### 0.5. Touch Instance After Remote .gts Update - -When you update a `.gts` card definition file remotely (via sync/push), touch an instance file to force re-indexing: - -```bash -boxel touch . CardName/instance.json # Touch specific instance -boxel touch . # Or touch all files -``` - -**Why:** The realm server may not re-index the definition until an instance using it is touched. - -### 1. Stop Watch Before Restore - -Watch will re-pull deleted files if running during restore: - -```bash -# Stop watch first (Ctrl+C or kill process) -boxel history . -r 3 -boxel sync . --prefer-local -``` - -### 2. Always Use --prefer-local After Restore - -This syncs local deletions to the server: - -```bash -boxel history . -r 3 # Deletes files locally -boxel sync . --prefer-local # Deletes files on server -``` - -### 3. Debouncing Groups Rapid Changes - -Watch waits for changes to settle: - -- Change detected → timer starts -- More changes → timer resets -- Timer expires → single checkpoint with all changes - -### 4. Checkpoint Classification - -- `[MAJOR]` - New files, deleted files, .gts changes, >3 files -- `[minor]` - Small updates to existing .json files -- `LOCAL` ⇆ - Changes from local edits (track command) -- `SERVER` ⇅ - External changes from web UI (watch command) - ---- - -## File Structure - -``` -workspace/ -├── .boxel-sync.json # Sync manifest (hashes, mtimes) -├── .boxel-history/ # Git-based checkpoint history -├── .realm.json # Workspace config -├── index.json # Workspace index -├── *.gts # Card definitions -└── CardName/ - └── *.json # Card instances -``` - ---- - -## Workspace References - -Commands accept: - -- `.` - Current directory (needs `.boxel-sync.json`) -- `./path` - Local path -- `@user/workspace` - e.g., `@username/personal` -- `https://...` - Full URL - ---- - -## Understanding Boxel URLs (Card IDs) - -When a user shares a URL like: - -``` -https://app.boxel.ai/tribecaprep/employee-handbook/Document/d8341312-f3a0-442b-a2e5-49c5cdd84695 -``` - -**This is a Card ID, not a fetchable URL!** - -### How to Parse Boxel URLs - -| URL Part | Meaning | -| ----------------------- | --------------------------- | -| `app.boxel.ai` | Production server | -| `tribecaprep` | User/organization | -| `employee-handbook` | Realm/workspace name | -| `Document/d8341312-...` | Card type and instance path | - -### NEVER Use WebFetch on Boxel URLs - -- Boxel realms are **usually private** and require Matrix authentication -- WebFetch will fail with 401/403 errors -- The user is referencing content **they expect you to have locally** - -### Finding the Local Copy - -If the user references a Boxel URL, the file is likely already synced to the local workspace: - -1. **Parse the path**: `Document/d8341312-f3a0-442b-a2e5-49c5cdd84695` → local path is `Document/d8341312-f3a0-442b-a2e5-49c5cdd84695.json` - -2. **Search the workspace**: - -```bash -# Find by card ID -find . -name "d8341312-f3a0-442b-a2e5-49c5cdd84695*" - -# Or search for the card type folder -ls ./Document/ -``` - -3. **Read the local file** using the Read tool - -### Example Workflow - -User says: "Check the handbook at https://app.boxel.ai/tribecaprep/employee-handbook/Document/abc123" - -**Do this:** - -``` -# Look for local file -Read ./Document/abc123.json -``` - -**NOT this:** - -``` -# This will FAIL - private realm -WebFetch https://app.boxel.ai/tribecaprep/employee-handbook/Document/abc123 -``` - ---- - -## API Reference - -| Endpoint | Method | Purpose | -| ---------- | ------ | ----------------------- | -| `/_mtimes` | GET | File modification times | -| `/` | GET | Download file | -| `/` | POST | Upload file | -| `/` | DELETE | Delete file | -| `/_atomic` | POST | Batch atomic operations | - -Headers: - -- `Authorization`: JWT from Matrix auth -- `Accept`: `application/vnd.card+source` or `application/vnd.api+json` - -### Atomic Batch Operations - -The `/_atomic` endpoint supports batch file operations that succeed or fail atomically: - -```json -{ - "atomic:operations": [ - { "op": "add", "href": "./path/to/new.json", "data": { "data": {...} } }, - { "op": "update", "href": "./path/to/existing.gts", "data": { "data": { "type": "module", "attributes": { "content": "..." } } } }, - { "op": "remove", "href": "./path/to/delete.json" } - ] -} -``` - -| Operation | Behavior | -| --------- | ------------------------------------------- | -| `add` | Create new file (fails 409 if exists) | -| `update` | Update existing file (fails 404 if missing) | -| `remove` | Delete file | - -**Content-Type:** `application/vnd.api+json` - ---- - -## Conflict Resolution - -| Local | Remote | Action | -| --------- | --------- | ------------------------------- | -| Changed | Unchanged | Push | -| Unchanged | Changed | Pull | -| Changed | Changed | Conflict → use strategy | -| Deleted | Changed | `--prefer-local` deletes remote | -| Changed | Deleted | `--prefer-remote` deletes local | - ---- - -## Troubleshooting - -### "Authentication failed" - -- Check active profile: `boxel profile` -- Verify credentials: `boxel profile list` -- Verify you can log into Boxel web with same credentials -- For staging: ensure profile uses `@username:stack.cards` +- `--debug` — verbose logs. +- `--agent openrouter` — use the opencode-OpenRouter passthrough agent + (routes via the realm-server's `/_openrouter/chat/completions` proxy). -### "No workspace found" +## Running tests -- Run `boxel list` to see workspaces -- Use full URL for first sync -- Ensure correct profile is active for the environment +- `pnpm test:node` — QUnit node tests. +- `pnpm test:playwright` — Playwright e2e tests. +- `pnpm lint` — eslint + prettier + glint (`ember-tsc`). -### Files keep reverting after restore +## Skill loading -- Stop watch before restoring -- Use `boxel sync . --prefer-local` after +The agent's instructions live in `.agents/skills/`. The factory loader +(`src/factory-skill-loader.ts`) walks three directories: -### Watch not detecting changes +1. `packages/software-factory/.agents/skills/` — factory-specific skills + (`software-factory-bootstrap`, `software-factory-operations`). +2. `packages/boxel-cli/plugin/skills/` — boxel-cli Claude Code plugin + skills (`boxel-api`, `boxel-command`); same directory the plugin + distributes to end users. +3. monorepo root `.agents/skills/` — general domain skills + (`boxel-development`, `boxel-file-structure`, `ember-best-practices`). -- Check interval setting -- Verify server URL -- Check active profile: `boxel profile` +`packages/software-factory/.claude/skills` is a symlink to +`.agents/skills/` so Claude Code and the factory loader read the same +files. -### Switching environments (prod/staging) +## Architectural principle -- Add profiles for each environment -- Switch with: `boxel profile switch ` +`boxel-cli` owns the entire Boxel API surface. The factory imports +`BoxelCLIClient` from `@cardstack/boxel-cli/api`; it never calls +`fetch()` against a realm directly. Auth, token refresh, and retries +are internal to boxel-cli. -### "500 Internal Server Error" on specific files +## Key source files -- These files are broken/corrupted on the server -- Sync will prompt you to delete them after completion -- Or use `boxel push . --delete` to remove all orphaned remote files -- Check if card definitions have errors in Boxel web UI +- `src/factory-entrypoint.ts` — CLI entry; bootstraps target realm, + creates seed issue, runs the loop. +- `src/issue-loop.ts` — inner/outer issue scheduling. +- `src/workspace-fs.ts` — local mirror of the target realm. +- `src/factory-agent/opencode.ts` — agent backend. +- `src/factory-tool-builder.ts` — factory tool registry passed to the + agent (validators, `get_card_schema`, `signal_done`, + `request_clarification`). diff --git a/packages/software-factory/AGENTS.md b/packages/software-factory/AGENTS.md index 2474ca6cd28..513a58d128c 100644 --- a/packages/software-factory/AGENTS.md +++ b/packages/software-factory/AGENTS.md @@ -1,224 +1,46 @@ -# AGENTS.md - Boxel CLI Codex Guidance - -## High-Priority Safety Rules - -1. Create a checkpoint before destructive operations: - - `boxel history . -m "Before destructive operation"` -2. After restore, always sync with local preference: - - `boxel history . -r ` - - `boxel sync . --prefer-local` -3. Stop watch before restore to avoid re-pulling deleted files. -4. When watch is running and you edit files locally, use edit locks: - - `boxel edit . ` before editing - - `boxel edit . --done ` after sync -5. Write clean source code, never compiled wire-format output for `.gts` files. - -## Mission - -This repo is part of a software factory where humans do not inspect or hand-edit code directly. - -The goal is to: - -- Accept a user request -- Break it down into persistent tasks -- Implement the work incrementally -- Test each step -- Commit or checkpoint progress continuously -- Store the plan, tickets, and knowledge base in Boxel so future runs become more reliable - -Code and project state should live in Boxel realms. Tickets and knowledge cards are the persistent memory for the factory. - -## Current Environment - -- Active Boxel CLI user: `factory` -- Do not put passwords directly into shell commands. If re-auth is needed, use `BOXEL_PASSWORD` or interactive login. -- Realm server: `http://localhost:4201/` -- Matrix server: `http://localhost:8008` -- `boxel list` currently shows access to: - - `http://localhost:4201/factory/guidance-tasks/` -- Check out Boxel workspaces into the local `realms/` subfolder under this repo -- Ignore `dark-factory`; it was an earlier iteration and should not be used as the current target -- Task tracker modules live in the `guidance-tasks` workspace under module `darkfactory` -- There are demo instances in that workspace that should be inspected before inventing new task structures -- Tickets may live in a dedicated task realm or in the realm created for a specific solution, but they should reuse the tracker module instead of duplicating it - -## Factory Execution Model - -When given a product request, the default operating loop is: - -1. Discover the relevant realms, modules, and existing task or knowledge cards -2. Choose or create the target implementation realm -3. Break the request into tickets, milestones, or task cards in Boxel -4. Add or update knowledge-base cards that capture decisions, constraints, and reusable procedures -5. Implement work one task at a time -6. Test after each meaningful change -7. Checkpoint with Boxel history and commit in git when a git repo exists -8. Sync changes back to Boxel and continue iterating until the request is demonstrably working - -Persistent Boxel artifacts are part of the deliverable, not just code. - -## Immediate Demo Priority - -There is one hour to produce an end-to-end demo of this workflow. - -Bias toward: - -- A small but complete project -- Visible task breakdown into tickets -- Clear knowledge-base entries -- Repeated task -> implement -> test -> checkpoint loops -- Fast feedback over architectural perfection - -## Auth and Testing Notes - -- Prefer Boxel CLI capabilities over rebuilding the same behavior from scratch -- Additional tooling is allowed when the CLI does not cover the task cleanly -- You will likely need auth helpers that can obtain JWTs for accessible realms -- The `_server-session` endpoint can provide JWTs for realms the current user can access -- Authenticated card URLs open directly into interact mode, which is the preferred surface for browser testing and Playwright-based verification -- Explore search and query options in the tracker and workspace data model before creating new structures -- Project tests should live in Boxel realms when they are part of the product's persistent memory -- Preferred convention: - - realm-local Playwright specs live under `tests/**/*.spec.ts` - - files copied into disposable verification realms live under `tests/fixtures/**` - - fixture contents are copied to the scratch realm root preserving paths, so `tests/fixtures/DeliveryBrief/example.json` becomes `DeliveryBrief/example.json` in the scratch realm -- Run realm-hosted tests with: - - `npm run test:realm -- --realm-path ./realms/` -- The default runner flow is: - 1. create a fresh scratch realm - 2. pull it locally under the canonical workspace path `realms////` - 3. copy fixture files from the source realm into the scratch realm - 4. sync the scratch realm - 5. run the source realm's Playwright specs against the scratch realm URL - 6. report failures and keep the scratch realm available for inspection -- When fixture instances depend on card definitions from the source realm, prefer absolute `meta.adoptsFrom.module` URLs so the scratch realm only needs the copied instances - -## Boxel Development Trigger - -When tasks involve Boxel card development, automatically consult: - -- `.agents/skills/boxel-development/SKILL.md` -- `.agents/skills/software-factory-operations/SKILL.md` when the task is about ticket-driven application delivery, realm coordination, or the end-to-end factory loop - -The shared repo-local skills live in `.agents/skills/`. -Claude should read them through `.claude/skills/`, which should point at the same directory to avoid duplicate instructions. - -Trigger examples: - -- Editing `.gts` card definitions -- Editing card instance `.json` -- Asking for Boxel card patterns/components -- Working in a synced workspace (`.boxel-sync.json` present) - -## Core Command Semantics - -- `pull`: remote -> local -- `push`: local -> remote -- `sync`: bidirectional conflict resolution -- `track`: local file watching with auto-checkpoints (use `--push` for real-time server sync) -- `watch`: remote change watching (pulls server changes) -- `repair-realm`: repair one realm metadata + starter cards + optional Matrix reconciliation -- `repair-realms`: batch repair all owned realms and reconcile Matrix realm list - -After local edits tracked with `track`, push to server with: - -- `boxel sync . --prefer-local` -- Or use `boxel track . --push` for automatic real-time sync - -## Onboarding Flow (When Needed) - -If user has no profile configured: - -1. `npx boxel profile` -2. `npx boxel profile add` (interactive preferred) -3. `npx boxel list` -4. First sync/pull into local workspace - -If `boxel list` already works, treat onboarding as complete and move on to workspace discovery. - -Security note: - -- Prefer interactive password entry or `BOXEL_PASSWORD` env var. -- Avoid plain `-p` password usage in shell history. - -## Multi-Realm Guidance - -- Configure realms with `boxel realms --add ...` -- Use `boxel realms --llm` for file-placement guidance. -- This repo already includes `.boxel-workspaces.json` mapping `guidance-tasks` as the shared tracker realm and `software-factory-demo` as the default implementation realm. -- Heuristic: - - `.gts` -> code realm (`*.gts` pattern) - - instances -> realm mapped for card type - - ambiguous -> default realm - -## Boxel URL Handling - -Boxel app URLs usually reference private, authenticated content. - -- Do not fetch them from the public web. -- Parse card path from URL and locate local synced file instead. - Example: -- URL segment `Document/` maps to local `Document/.json` - -## Useful Workflows - -### Local dev loop (manual sync) - -1. `boxel track .` -2. edit files -3. `boxel sync . --prefer-local` - -### Local dev loop (real-time sync) - -1. `boxel track . --push` -2. edit files (changes auto-pushed via batch upload) - -### Monitor server changes - -1. `boxel watch .` -2. inspect checkpoints with `boxel history .` - -### Restore workflow - -1. stop watch -2. `boxel history . -r ` -3. `boxel sync . --prefer-local` - -### Software Factory Loop - -1. `boxel list` -2. sync the relevant realm locally -3. inspect existing task and knowledge cards -4. create or update tickets for the requested outcome -5. implement in the target realm -6. store or update project tests inside the target realm when they are part of the deliverable -7. test using CLI plus authenticated browser flows where useful -8. prefer a disposable scratch realm for fixture-driven browser verification -9. report issues back into tickets or knowledge cards -10. sync to the realm -11. checkpoint and sync -12. update knowledge cards with what was learned - -## Related References - -- `.claude/CLAUDE.md` -- `.agents/skills/boxel-development/SKILL.md` -- `.agents/skills/boxel-file-structure/SKILL.md` -- `.agents/skills/boxel-repair/SKILL.md` -- `.agents/skills/boxel-sync/SKILL.md` -- `.agents/skills/boxel-watch/SKILL.md` -- `.agents/skills/boxel-track/SKILL.md` -- `.agents/skills/boxel-restore/SKILL.md` -- `.agents/skills/boxel-setup/SKILL.md` -- `.agents/skills/software-factory-operations/SKILL.md` - -## Share & Gather (GitHub Workflow) - -Share workspace to GitHub repo, gather changes back: - -```bash -boxel share . -t /path/to/repo -b branch-name --no-pr -boxel gather . -s /path/to/repo -``` - -**URL Portability:** Share/gather automatically convert absolute realm URLs in `index.json` and `cards-grid.json` to relative paths, making content portable across different realms. +# AGENTS.md — software-factory + +This package implements the issue-driven factory loop. The factory takes a +brief, creates a project + issues in a target realm, and the agent works +each issue using native fs tools (`Read` / `Write` / `Edit` / `Glob` / +`Grep` / `Bash`) plus a handful of factory tools (validators + control +flow). + +See [README.md](./README.md) for architecture. The agent's loaded +instructions live in `.agents/skills/` (root + this package + `boxel-cli`). + +## Commands + +- `pnpm factory:go --brief-url --target-realm ` — run the factory loop. + - `--debug` for verbose logs. + - `--agent openrouter` to use the opencode-OpenRouter passthrough agent. +- `pnpm test:node` — QUnit node tests. +- `pnpm test:playwright` — Playwright e2e tests. +- `pnpm lint` — eslint + prettier + glint (`ember-tsc`). + +## Key files + +- `src/factory-entrypoint.ts` — CLI entry; bootstraps the target realm, + creates the seed issue, runs the loop. +- `src/issue-loop.ts` — inner/outer issue scheduling loop. +- `src/factory-skill-loader.ts` — resolves and loads skills from + `packages/software-factory/.agents/skills/` (primary), + `packages/boxel-cli/plugin/skills/` (fallback), and monorepo root + `.agents/skills/` (fallback). +- `src/workspace-fs.ts` — local-filesystem mirror of the target realm; + the agent reads/writes here, the orchestrator syncs. +- `src/factory-agent/opencode.ts` — agent backend (opencode in passthrough + mode against the realm-server's `/_openrouter/chat/completions` proxy). + +## Architectural boundaries + +- **`boxel-cli` owns the entire Boxel API surface.** The factory imports + `BoxelCLIClient` from `@cardstack/boxel-cli/api`; it never calls + `fetch()` against a realm directly. Auth, token refresh, and retries + are internal to boxel-cli. +- **Target-realm I/O is local.** The agent operates on the workspace + mirror under `os.tmpdir()/boxel-factory-workspaces//`. The + orchestrator calls `client.sync()` between iterations. +- **Realm creation, pull, sync are orchestrator concerns.** The agent is + explicitly told not to drive sync — `factory-entrypoint.ts` and + `factory-issue-loop-wiring.ts` own those calls. diff --git a/packages/software-factory/package.json b/packages/software-factory/package.json index 565a3405701..f8aa4b4f2a8 100644 --- a/packages/software-factory/package.json +++ b/packages/software-factory/package.json @@ -34,6 +34,8 @@ }, "devDependencies": { "@anthropic-ai/claude-agent-sdk": "^0.2.114", + "@modelcontextprotocol/sdk": "^1.29.0", + "@opencode-ai/sdk": "1.14.34", "@cardstack/boxel-cli": "workspace:*", "@cardstack/boxel-icons": "workspace:*", "@cardstack/boxel-ui": "workspace:*", @@ -60,6 +62,7 @@ "ember-concurrency": "catalog:", "ember-modifier": "^4.1.0", "eslint-plugin-qunit": "catalog:", + "opencode-ai": "1.14.34", "fs-extra": "catalog:", "jsonwebtoken": "catalog:", "pg": "catalog:", diff --git a/packages/software-factory/prompts/bootstrap-implement.md b/packages/software-factory/prompts/bootstrap-implement.md index bd4ea3bc312..848697bf189 100644 --- a/packages/software-factory/prompts/bootstrap-implement.md +++ b/packages/software-factory/prompts/bootstrap-implement.md @@ -17,9 +17,13 @@ Description: ## What to Create -Create the following artifacts in the target realm using the available tools -(`write_file`, `update_issue`, `create_knowledge`). Use `search_realm` and -`read_file` to inspect existing state before creating anything. +Create the following artifacts in the target realm by calling the +**`Write`** tool to produce each `.json` file. The cwd is the local +workspace mirror of the target realm; file paths below are +workspace-relative. Use **`Read`** / **`Glob`** to inspect existing +state before creating anything, and **`Bash`** to shell out to +`boxel search` if you need to query the realm. Do not describe what +you would write — call `Write` to actually create each file. ### 1. Project Card @@ -130,14 +134,38 @@ dependency cards are implemented before cards that consume them. ## Instructions -1. Use `read_file` to read the brief at the URL above (if it is in a realm) or use the brief content in the description -2. Derive the slug and project code from the brief title -3. Create the Project card -4. Create the IssueTracker card and link it to the Project card -5. Create Knowledge Article cards (at least brief context + agent onboarding) -6. Identify entry-point cards from the brief — these are the top-level cards users interact with -7. Create one implementation Issue per entry-point card, with all relationships wired -8. Call `signal_done()` — the orchestrator manages issue status transitions. Do NOT set the issue status yourself. +**Step 0 (MANDATORY before any `Write`).** Fetch the live schema for +each card type you're about to write. Without this you will guess +field names and array shapes and produce cards that fail to render +with `Expected array for field value ` runtime errors: + +``` +get_card_schema({ module: "", name: "Project" }) +get_card_schema({ module: "", name: "IssueTracker" }) +get_card_schema({ module: "", name: "KnowledgeArticle" }) +get_card_schema({ module: "", name: "Issue" }) +``` + +The returned `{ attributes, relationships? }` JSON Schema names every +field, its type (`string`, `number`, `boolean`, `array` for +`containsMany` / `linksToMany`, etc.), and any enum values +(`status`, `priority`, `articleType`, `projectStatus`, `issueType`). +Use those exact names, types, and enum values. + +Then create the artifacts in order so relationship targets exist +when referenced: + +1. The brief content is included verbatim in the issue description above. Do not fetch the URL — read the description. +2. Derive the slug and project code from the brief title. +3. Call `Write` to create the **Project card** at `Projects/.json`. +4. Call `Write` to create the **IssueTracker card** at `Boards/.json` and link it to the Project card. +5. Call `Write` to create the **Knowledge Article cards** in `Knowledge Articles/` (at least brief context + agent onboarding). +6. Identify entry-point cards from the brief — these are the top-level cards users interact with. +7. Call `Write` to create **one implementation Issue per entry-point card** at `Issues/-.json`, with all relationships wired. +8. Call **`signal_done`** (factory MCP tool) — the orchestrator manages issue status transitions. Do NOT set the issue status yourself. + +**You must actually call the `Write` tool for each file. Calling +`signal_done` without writing the artifacts is a failure.** Create artifacts in the order listed — Project first, then IssueTracker, then Knowledge Articles, then Issues — so that relationship targets exist when referenced. diff --git a/packages/software-factory/prompts/issue-implement.md b/packages/software-factory/prompts/issue-implement.md new file mode 100644 index 00000000000..045bbaace37 --- /dev/null +++ b/packages/software-factory/prompts/issue-implement.md @@ -0,0 +1,66 @@ +# Project + +{{project.objective}} + +{{#if project.successCriteria}} +Success criteria: +{{#each project.successCriteria}} +- {{.}} +{{/each}} +{{/if}} + +# Knowledge + +{{#each knowledge}} + +## {{title}} + +{{content}} +{{/each}} + +# Current Issue + +ID: {{issue.id}} +Summary: {{issue.summary}} +Status: {{issue.status}} +Priority: {{issue.priority}} + +Description: +{{issue.description}} + +{{#if issue.checklist}} +Checklist: +{{#each issue.checklist}} +- [ ] {{.}} +{{/each}} +{{/if}} + +{{#if toolResults}} + +# Tool Results + +You previously invoked the following tools. Use these results to inform your implementation. + +{{#each toolResults}} + +## {{tool}} (exit code: {{exitCode}}) + +```{{outputFormat}} +{{output}} +``` + +{{/each}} +{{/if}} + +# Instructions + +Implement this issue in this order: + +1. Use **`Read`** / **`Glob`** to inspect existing workspace state. If (and only if) you need to query the **target realm** for cards already in it, shell out via `Bash` to `boxel search` / `boxel read-transpiled` with `--realm `. Do not list or query any other realm; the skills are authoritative for patterns. +2. Call **`Write`** to create or update card definitions (`.gts`) in the workspace. +3. Call **`Write`** to create QUnit test files (`.test.gts`) co-located with card definitions — write tests BEFORE any sample instances or catalog specs. **Wrap every test in a QUnit `module(...)` block named after the card or feature under test** (e.g., `module('StickyNote', function (hooks) { ... test(...) ... })`). The TestRun card groups results by module name, so tests left at the top level all collapse into a single "default" bucket and become hard to read. +4. Call **`Write`** to create at least one sample card instance (`.json`) in the workspace. +5. Call **`Write`** to create a Catalog Spec card in the `Spec/` folder for the top-level card (adoptsFrom `https://cardstack.com/base/spec#Spec`), linking sample instances via `linkedExamples`. +6. Call **`signal_done`** (factory MCP tool) when all implementation and test files have been written. + +The validation pipeline runs tests automatically after `signal_done` — write tests, then signal done, and the orchestrator handles the rest. Do NOT set the issue status to "done" yourself — the orchestrator manages issue status transitions based on validation results. **Calling `signal_done` without having actually written the required files is a failure.** diff --git a/packages/software-factory/prompts/ticket-iterate.md b/packages/software-factory/prompts/issue-iterate.md similarity index 78% rename from packages/software-factory/prompts/ticket-iterate.md rename to packages/software-factory/prompts/issue-iterate.md index 4aa8b500966..a95119c3e0a 100644 --- a/packages/software-factory/prompts/ticket-iterate.md +++ b/packages/software-factory/prompts/issue-iterate.md @@ -53,11 +53,11 @@ All validation steps passed. Fix the validation failures shown above. You have the same tools available. You can: -- Use read_file to inspect the current state of your implementation -- Use write_file to update implementation or test files -- Use search_realm to check what cards exist +- Use **`Read`** / **`Glob`** to inspect the current state of your implementation +- Use **`Write`** or **`Edit`** to update implementation or test files +- Use **`Bash`** + `boxel search` to check what cards exist in the realm - If a lint violation is in your code, fix the code to pass lint - If the test expectation is wrong, fix the test - If the implementation is wrong, fix the implementation -When done, call signal_done. +When done, call **`signal_done`** (factory MCP tool). diff --git a/packages/software-factory/prompts/ticket-test.md b/packages/software-factory/prompts/issue-test.md similarity index 89% rename from packages/software-factory/prompts/ticket-test.md rename to packages/software-factory/prompts/issue-test.md index 9c339e0e3ae..66228831816 100644 --- a/packages/software-factory/prompts/ticket-test.md +++ b/packages/software-factory/prompts/issue-test.md @@ -23,4 +23,4 @@ Tests must: - Verify card-specific behavior, field values, and relationships - Keep all test data in browser memory — no external realm writes -Use write_file to create test files, then call signal_done. +Call **`Write`** to create the test files in the workspace, then call **`signal_done`** (factory MCP tool). diff --git a/packages/software-factory/prompts/system.md b/packages/software-factory/prompts/system.md index cae64a33868..ea01d9bd546 100644 --- a/packages/software-factory/prompts/system.md +++ b/packages/software-factory/prompts/system.md @@ -1,15 +1,57 @@ # Role You are a software factory agent. You implement Boxel cards and tests in -target realms based on ticket descriptions and project context. +target realms based on issue descriptions and project context. -You have access to tools for reading and writing the workspace mirror of -the target realm, searching realm state, running validators, and signaling -completion. Inspect existing state before making changes — do not guess. +# Tools + +You operate inside an agent session. Your cwd is a local workspace mirror of the target realm; the orchestrator syncs it to the realm between iterations. **Every filesystem path you pass to a tool must be workspace-relative** (e.g. `Projects/sticky-note.json`, `sticky-note.gts`). Absolute paths under `/Users/`, `~`, application-support directories, and `..`-traversal that escapes the workspace are all blocked — do not invent them. If you need to confirm where you are, run `pwd` once; everything afterward is relative. + +Native filesystem tools (use these to actually create / change files): + +- **`Write`** — create or overwrite a file at a given path. Use this to + produce every `.gts` card definition, `.test.gts`, and `.json` card + instance the issue requires. +- **`Read`** — load an existing file's contents. +- **`Edit`** — patch an existing file in place. +- **`Glob`** / **`Grep`** — find files / search content in the workspace. +- **`Bash`** — run shell commands. For realm-runtime reads (transpiled + output, structured search **of the target realm only**) use + `boxel read-transpiled` / `boxel search` from the operations skill. + +Factory tools (call by name): + +- **`get_card_schema({ module, name })`** — fetch the live JSON Schema + of a card definition. Required before writing a tracker + (Project / Issue / KnowledgeArticle) or Spec card. +- **`run_lint`** / **`run_parse`** / **`run_evaluate`** / + **`run_instantiate`** / **`run_tests`** — mid-turn validators. Optional: + the orchestrator runs these automatically after `signal_done`. +- **`signal_done`** — call when every required file has been written. + **Always end the turn with this.** +- **`request_clarification({ message })`** — call when blocked and + unable to make progress. + +# Doing the work + +You are an _agent_, not a planner. **Reason briefly, then call tools to +act.** When the issue says to create a file, call `Write` — do not +describe what the file would contain in plain text. When the issue says +to inspect existing state, call `Read` / `Glob` — do not assume. + +Inspect existing state before making changes; do not guess. # Rules -- Every ticket must include at least one QUnit test file (.test.gts co-located with the card definition). Every `test(...)` in those files must be wrapped inside a QUnit `module('', function (hooks) { ... })` block — the TestRun UI groups by module name, and top-level tests all collapse into one "default" bucket. +- **Stay in your target realm.** The loaded skills + the issue + description contain everything you need to implement the card. Do + NOT run `boxel file ls` / `boxel search` / `boxel read-transpiled` + against any realm other than the target realm shown below — not the + base realm, not the software-factory realm, not experiments, not + catalog. Cross-realm exploration burns tokens and time without + helping. If a pattern isn't covered by your skills, write the card + using your own knowledge and let validation tell you what to fix. +- Every issue must include at least one QUnit test file (.test.gts co-located with the card definition). Every `test(...)` in those files must be wrapped inside a QUnit `module('', function (hooks) { ... })` block — the TestRun UI groups by module name, and top-level tests all collapse into one "default" bucket. - For each top-level card defined in the brief, create a Catalog Spec card in the target realm's Spec/ folder (adoptsFrom https://cardstack.com/base/spec#Spec) and at least one sample card instance linked via linkedExamples. diff --git a/packages/software-factory/prompts/ticket-implement.md b/packages/software-factory/prompts/ticket-implement.md deleted file mode 100644 index 80c75568193..00000000000 --- a/packages/software-factory/prompts/ticket-implement.md +++ /dev/null @@ -1,66 +0,0 @@ -# Project - -{{project.objective}} - -{{#if project.successCriteria}} -Success criteria: -{{#each project.successCriteria}} -- {{.}} -{{/each}} -{{/if}} - -# Knowledge - -{{#each knowledge}} - -## {{title}} - -{{content}} -{{/each}} - -# Current Issue - -ID: {{issue.id}} -Summary: {{issue.summary}} -Status: {{issue.status}} -Priority: {{issue.priority}} - -Description: -{{issue.description}} - -{{#if issue.checklist}} -Checklist: -{{#each issue.checklist}} -- [ ] {{.}} -{{/each}} -{{/if}} - -{{#if toolResults}} - -# Tool Results - -You previously invoked the following tools. Use these results to inform your implementation. - -{{#each toolResults}} - -## {{tool}} (exit code: {{exitCode}}) - -```{{outputFormat}} -{{output}} -``` - -{{/each}} -{{/if}} - -# Instructions - -Implement this issue in this order: - -1. Use search_realm and read_file to inspect existing realm state -2. Use write_file to create or update card definitions (.gts) in the target realm -3. Use write_file to create QUnit test files (.test.gts) co-located with card definitions — write tests BEFORE any sample instances or catalog specs. **Wrap every test in a QUnit `module(...)` block named after the card or feature under test** (e.g., `module('StickyNote', function (hooks) { ... test(...) ... })`). The TestRun card groups results by module name, so tests left at the top level all collapse into a single "default" bucket and become hard to read. -4. Create at least one sample card instance (.json) in the target realm -5. Create a Catalog Spec card in the Spec/ folder for the top-level card (adoptsFrom https://cardstack.com/base/spec#Spec), linking sample instances via linkedExamples -6. Call signal_done when all implementation and test files have been written - -The validation pipeline runs tests automatically after `signal_done` — write tests, then signal done, and the orchestrator handles the rest. Do NOT set the issue status to "done" yourself — the orchestrator manages issue status transitions based on validation results. diff --git a/packages/software-factory/scripts/smoke-tests/factory-skill-smoke.ts b/packages/software-factory/scripts/smoke-tests/factory-skill-smoke.ts index 1860de5153e..e6eb414956b 100644 --- a/packages/software-factory/scripts/smoke-tests/factory-skill-smoke.ts +++ b/packages/software-factory/scripts/smoke-tests/factory-skill-smoke.ts @@ -155,14 +155,10 @@ async function main(): Promise { let allSkillNames = [ 'boxel-development', 'boxel-file-structure', + 'boxel-api', + 'boxel-command', 'ember-best-practices', 'software-factory-operations', - 'boxel-sync', - 'boxel-track', - 'boxel-watch', - 'boxel-restore', - 'boxel-repair', - 'boxel-setup', ]; let allSkills = await loader.loadAll(allSkillNames); diff --git a/packages/software-factory/src/factory-agent/claude-code.ts b/packages/software-factory/src/factory-agent/claude-code.ts index 976671c48ba..83bfd0ac87c 100644 --- a/packages/software-factory/src/factory-agent/claude-code.ts +++ b/packages/software-factory/src/factory-agent/claude-code.ts @@ -8,14 +8,12 @@ * early and surfaces the corresponding AgentRunResult. Otherwise the query * runs to normal completion (maxTurns or no-more-tool-calls). * - * Relationship to OpenRouterFactoryAgent: - * - Same prompt assembly (FilePromptLoader + assemble*Prompt helpers) - * - Same tool catalog (FactoryTool[]) - * - Same exit signals (DONE_SIGNAL / CLARIFICATION_SIGNAL) - * - Different transport: Agent SDK instead of OpenRouter HTTP - * - * The factory's deterministic ralph loop is oblivious to which agent is - * running — the LoopAgent contract (`run(context, tools)`) is identical. + * The sibling backend (`./opencode.ts`) drives the same factory loop + * via the opencode SDK + an OpenRouter (or boxel-proxy) provider — + * same prompt assembly, same tool catalog, same exit signals, just a + * different transport. The factory's deterministic ralph loop is + * oblivious to which agent is running: the LoopAgent contract + * (`run(context, tools)`) is identical for both. */ import { realpathSync } from 'node:fs'; @@ -68,46 +66,13 @@ const MAX_TOOL_USE_TURNS = 50; /** * Built-in Claude Code tools the factory exposes to the model on the - * Claude backend. These replace the custom `read_file` / `write_file` - * factory tools — they operate on the SDK query's `cwd` (the factory - * workspace), so the model uses native semantics for fs work and we - * keep MCP focused on operations that genuinely need realm runtime - * access (search_realm, validators, structured updates, signals). + * Claude backend. They operate on the SDK query's `cwd` (the factory + * workspace), so the model handles workspace files natively while MCP + * stays focused on what needs realm runtime access (`get_card_schema`, + * validators, control signals). */ const NATIVE_FS_TOOLS = ['Read', 'Write', 'Edit', 'Bash', 'Glob', 'Grep']; -/** - * Factory tool names that are filtered out of the MCP catalog on the - * Claude backend because the model has a native or boxel CLI - * alternative — keeping them in the catalog would just be a duplicate - * surface for the same operation. OpenRouter still gets these tools; - * it has no native fs and no Bash. - * - * Each entry's replacement: - * - `read_file` → native `Read` - * - `write_file` → native `Write` / `Edit` - * - `run_command` → unused in practice (card-type schemas - * are pre-loaded by the wiring); if ever - * needed, the boxel CLI exposes - * `boxel run-command` over Bash. - * - `fetch_transpiled_module`→ Bash + `boxel read-transpiled - * --realm `. Used only when a - * validator reports a transpiled - * line/column, so the marginal cost of - * shelling out is negligible. - * - `search_realm` → Bash + `boxel search --realm - * --query '' --json`. Single-quote - * the JSON in shell to avoid expansion; - * see the operations skill for examples. - */ -const CLAUDE_FILTERED_FACTORY_TOOLS = new Set([ - 'read_file', - 'write_file', - 'run_command', - 'fetch_transpiled_module', - 'search_realm', -]); - let log = logger('factory-agent-claude-code'); type CapturedSignal = @@ -153,24 +118,12 @@ export class ClaudeCodeFactoryAgent implements LoopAgent { context: AgentContext, tools: FactoryTool[], ): Promise { - // Filter out factory tools the Claude backend doesn't need: - // - // 1. Tools in CLAUDE_FILTERED_FACTORY_TOOLS — native Claude Code - // tools (Read / Write / Edit / Bash) or the boxel CLI cover - // these surfaces directly; a duplicate MCP tool would just be - // a second way to do the same thing. - // 2. Tools whose source is `'registered'` — these come from the - // `ToolRegistry`'s `realm-api` manifests. After CS-10883 the - // registry only contains `realm-create`, which the entrypoint - // drives before the agent runs; nothing on the agent's hot - // path needs it. The filter remains so any future re-additions - // to the registry stay off the Claude path by default. - // - // OpenRouter still sees the full list — it has no native fs / Bash. - let mcpFactoryTools = tools.filter( - (t) => - !CLAUDE_FILTERED_FACTORY_TOOLS.has(t.name) && t.source !== 'registered', - ); + // `'registered'` tools come from the ToolRegistry's `realm-api` + // manifests (currently just `realm-create`, which the entrypoint + // drives before the agent runs). Keep them off the agent's hot + // path so future registry additions don't accidentally surface + // here. + let mcpFactoryTools = tools.filter((t) => t.source !== 'registered'); let systemPrompt = this.buildSystemPrompt(context, mcpFactoryTools); let userPrompt = this.buildUserPrompt(context); @@ -316,9 +269,9 @@ export class ClaudeCodeFactoryAgent implements LoopAgent { }; } - // Stream ended without an explicit signal. Mirror OpenRouterFactoryAgent: - // if the agent called at least one tool, treat as done; otherwise - // needs_iteration so the orchestrator feeds validation failures back. + // Stream ended without an explicit signal. If the agent called + // at least one tool, treat as done; otherwise needs_iteration so + // the orchestrator feeds validation failures back. return { status: toolCallLog.length > 0 ? 'done' : 'needs_iteration', toolCalls: toolCallLog, @@ -344,14 +297,12 @@ export class ClaudeCodeFactoryAgent implements LoopAgent { // Two tool surfaces are visible to the model on the Claude backend: // 1. Native Claude Code tools (Read / Write / Edit / Bash / Glob / // Grep) — anchored to the factory workspace via the SDK query's - // `cwd`. These replace the factory's old `read_file` / - // `write_file` shims; the model works on the local mirror of the - // target realm directly. + // `cwd`. The model works on the local mirror of the target realm + // directly; `boxel sync` pushes between iterations. // 2. Factory tools exposed via an in-process MCP server, prefixed - // with `mcp____`. Used for everything that needs realm - // runtime access (search, validators, host commands, structured - // updates) and for control signals (signal_done / - // request_clarification). + // with `mcp____`. Used for realm-runtime operations + // (`get_card_schema`, the five validators) and for control + // signals (`signal_done`, `request_clarification`). // // The shared prompt template / skills reference factory operations by // their plain names (e.g. `signal_done`). Append a short rename map diff --git a/packages/software-factory/src/factory-agent/index.ts b/packages/software-factory/src/factory-agent/index.ts index 12d21104f00..41e5f432b09 100644 --- a/packages/software-factory/src/factory-agent/index.ts +++ b/packages/software-factory/src/factory-agent/index.ts @@ -3,14 +3,17 @@ * * One file per backend, each implementing the `LoopAgent` interface in * `./types.ts`: - * - `./openrouter.ts` — OpenRouter (OpenAI-compat tool-use protocol) - * - `./claude-code.ts` — Claude Code Agent SDK (in-process MCP) + * - `./opencode.ts` — opencode SDK driving an OpenRouter (or proxy) + * model. Backs the `--agent openrouter` CLI flag. + * - `./claude-code.ts` — Claude Code Agent SDK (in-process MCP). + * Backs the `--agent claude` CLI flag. * * `createLoopAgent()` in `../factory-issue-loop-wiring.ts` picks which to * instantiate based on the `--agent` flag. */ export * from './types'; -export { OpenRouterFactoryAgent } from './openrouter'; +export { OpencodeFactoryAgent } from './opencode'; +export type { OpencodeAgentConfig } from './opencode'; export { ClaudeCodeFactoryAgent } from './claude-code'; -export { MockFactoryAgent, MockLoopAgent } from './mocks'; +export { MockLoopAgent } from './mocks'; diff --git a/packages/software-factory/src/factory-agent/mocks.ts b/packages/software-factory/src/factory-agent/mocks.ts index 68a8ec3271e..4c0395db524 100644 --- a/packages/software-factory/src/factory-agent/mocks.ts +++ b/packages/software-factory/src/factory-agent/mocks.ts @@ -1,51 +1,14 @@ /** * Mock agent implementations for testing. * - * These are deterministic agents that return pre-scripted responses, - * used by unit tests and smoke tests to verify orchestration logic - * without calling a real LLM. + * Deterministic agents that return pre-scripted responses, used by unit + * tests and smoke tests to verify orchestration logic without calling + * a real LLM. */ -import type { AgentAction, AgentContext, FactoryAgent } from './types'; -import type { LoopAgent, AgentRunResult } from './types'; +import type { AgentContext, LoopAgent, AgentRunResult } from './types'; import type { FactoryTool } from '../factory-tool-builder'; -// --------------------------------------------------------------------------- -// MockFactoryAgent — deterministic FactoryAgent for declarative model tests -// --------------------------------------------------------------------------- - -export class MockFactoryAgent implements FactoryAgent { - private responses: AgentAction[][]; - private callIndex = 0; - - /** All AgentContext inputs received, in order. */ - readonly receivedContexts: AgentContext[] = []; - - constructor(responses: AgentAction[][]) { - this.responses = responses; - } - - async plan(context: AgentContext): Promise { - this.receivedContexts.push(context); - - if (this.callIndex >= this.responses.length) { - throw new Error( - `MockFactoryAgent exhausted: called ${this.callIndex + 1} times ` + - `but only ${this.responses.length} response(s) were configured`, - ); - } - - let response = this.responses[this.callIndex]; - this.callIndex++; - return response; - } - - /** Number of times plan() has been called. */ - get callCount(): number { - return this.callIndex; - } -} - // --------------------------------------------------------------------------- // MockLoopAgent — deterministic LoopAgent for tool-use model tests // --------------------------------------------------------------------------- diff --git a/packages/software-factory/src/factory-agent/opencode.ts b/packages/software-factory/src/factory-agent/opencode.ts new file mode 100644 index 00000000000..9ad4e19b297 --- /dev/null +++ b/packages/software-factory/src/factory-agent/opencode.ts @@ -0,0 +1,1027 @@ +/** + * OpencodeFactoryAgent — LoopAgent backed by the opencode SDK. + * + * Drives a session against an `opencode` subprocess so `--agent + * openrouter` runs get native fs / Bash / Glob / Grep, with the 8 + * factory tools surfaced over MCP for schema lookup, validation, and + * control signals. + * + * Two auth modes: + * - **Direct** — `openRouterApiKey` set: opencode's provider points + * at openrouter.ai with the user's bearer. + * - **Passthrough** — no key: opencode's provider points at the + * realm server's `_openrouter/chat/completions` with a server JWT; + * the realm applies the server-side OpenRouter key and bills + * credits to the operator. + * + * DONE / CLARIFICATION signals carry a Symbol that doesn't survive + * JSON-RPC, so the MCP server re-tags them `factory:done` / + * `factory:clarification` and the agent matches on the tag. + */ + +import { createServer as createHttpServer } from 'node:http'; +import type { AddressInfo } from 'node:net'; +import { randomUUID } from 'node:crypto'; +import { realpathSync } from 'node:fs'; + +import { Server as McpServer } from '@modelcontextprotocol/sdk/server/index.js'; +import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js'; +import { + CallToolRequestSchema, + ListToolsRequestSchema, +} from '@modelcontextprotocol/sdk/types.js'; +import type { Config as OpencodeConfig } from '@opencode-ai/sdk'; + +// `@opencode-ai/sdk` is ESM-only and the test runner uses ts-node in +// CommonJS mode, so a top-level `import` would fail at module-load +// time on every test that touches this file. Lazy-load via dynamic +// import inside `run()` so the type imports stay available at compile +// time and the runtime cost (one dynamic import per `factory:go`) is +// negligible. +async function loadOpencodeSdk() { + let mod = await import('@opencode-ai/sdk'); + return { + createOpencodeServer: mod.createOpencodeServer, + createOpencodeClient: mod.createOpencodeClient, + }; +} + +import { + CLARIFICATION_SIGNAL, + DONE_SIGNAL, + type FactoryTool, + type ToolCallEntry, +} from '../factory-tool-builder'; +import { logger } from '../logger'; +import { + assembleBootstrapPrompt, + assembleImplementPrompt, + assembleIteratePrompt, + FilePromptLoader, + requireDarkfactoryModuleUrl, + type PromptLoader, +} from '../factory-prompt-loader'; +import type { + AgentContext, + AgentRunResult, + LoopAgent, + ResolvedSkill, +} from './types'; + +let log = logger('factory-agent-opencode'); + +const FACTORY_PROVIDER_ID = 'factory-openrouter'; +const MCP_SERVER_NAME = 'factory'; +const SIGNAL_DONE_TAG = 'factory:done'; +const SIGNAL_CLARIFICATION_TAG = 'factory:clarification'; + +/** + * Upper bound on the post-race finally drain. If the race above won + * via the `waitForSessionIdle` fallback, the prompt + event-log + * promises are very likely stuck on the opencode 1.14.34 dead-HTTP + * bug; we let them try to settle for a beat, then move on. + */ +const POST_RACE_DRAIN_MS = 2000; + +/** + * The 8 factory tools exposed to the agent over MCP. Filesystem and + * shell are owned by opencode's native `Read` / `Write` / `Edit` / + * `Glob` / `Grep` / `Bash`. + */ +const FACTORY_MCP_TOOL_NAMES = new Set([ + 'get_card_schema', + 'run_tests', + 'run_lint', + 'run_evaluate', + 'run_parse', + 'run_instantiate', + 'signal_done', + 'request_clarification', +]); + +/** + * Per-session tool whitelist for opencode. We need fs (`read` / `write` / + * `edit`) for the workspace, `bash` to shell out to `boxel + * read-transpiled` / `boxel search`, and `glob` / `grep` to inspect + * existing state. Everything else opencode bundles by default + * (`webfetch`, `task`, `todowrite`, `skill`, `question`, `invalid`) + * costs tokens on every request without serving the factory's flow, + * so we explicitly disable them. Our own factory MCP tools come + * through the MCP transport and aren't affected by this map. + */ +const ENABLED_OPENCODE_TOOLS = { + read: true, + write: true, + edit: true, + bash: true, + glob: true, + grep: true, + webfetch: false, + task: false, + todowrite: false, + skill: false, + question: false, + invalid: false, +}; + +export interface OpencodeAgentConfig { + /** OpenRouter model ID (e.g., `anthropic/claude-opus-4-7`). */ + model: string; + /** Realm server URL — used in passthrough mode as the base for opencode's provider. */ + realmServerUrl: string; + /** Boxel CLI client — used in passthrough mode to fetch the server JWT we hand opencode. */ + client: import('@cardstack/boxel-cli/api').BoxelCLIClient; + /** + * If set, opencode talks to OpenRouter directly with this key in + * the Authorization header. If unset, the agent falls back to + * passthrough mode (boxel JWT → realm-server + * `/_openrouter/chat/completions`). + */ + openRouterApiKey?: string; + /** + * Local workspace directory mirroring the target realm. Used as + * the opencode subprocess `cwd` so native fs tools resolve realm- + * relative paths inside the workspace. Combined with + * `permission.external_directory: 'deny'` to scope writes. + */ + workspaceDir: string; + /** When true, log opencode events to stderr. */ + debug?: boolean; +} + +interface CapturedSignal { + kind: 'done' | 'clarification'; + message?: string; +} + +interface RunHooks { + onToolCall: (entry: ToolCallEntry) => void; + onSignal: (signal: CapturedSignal) => void; +} + +export class OpencodeFactoryAgent implements LoopAgent { + private config: OpencodeAgentConfig; + private promptLoader: PromptLoader; + + // Long-lived opencode subprocess + MCP server. Spawned once on the + // first `run()` and reused for every subsequent iteration. opencode + // 1.14.34 has rapid-restart failure modes (fresh-spawn `session.prompt` + // POSTs return `TypeError: fetch failed` often enough to make a per- + // iteration spawn unworkable), and tearing the subprocess down between + // sessions is exactly the wrong shape for the SDK anyway — opencode is + // a long-lived server with many short-lived sessions. + private opencode?: { url: string; close: () => void }; + private mcp?: { url: string; close: () => Promise }; + private client?: ReturnType< + Awaited>['createOpencodeClient'] + >; + // Active per-run hooks the long-lived MCP server forwards into. + private currentHooks?: RunHooks; + private resolvedWorkspaceDir?: string; + + constructor(config: OpencodeAgentConfig, promptLoader?: PromptLoader) { + this.config = config; + this.promptLoader = promptLoader ?? new FilePromptLoader(); + } + + /** + * Tear down the long-lived opencode subprocess + MCP server. The + * orchestrator calls this in its outer `finally` after all issue + * iterations are done. + */ + async close(): Promise { + let opencode = this.opencode; + let mcp = this.mcp; + this.opencode = undefined; + this.mcp = undefined; + this.client = undefined; + this.currentHooks = undefined; + + if (opencode) { + // Parse the actual port from the SDK-returned URL rather than + // assuming the SDK's default — `port: 0` would give us a random + // port and a hardcoded 4096 would then SIGKILL whatever + // unrelated process happens to be there. Falls back to no + // escalation when parsing fails. + let port = parseOpencodePort(opencode.url); + try { + opencode.close(); + } catch { + // best-effort + } + // `opencode.close()` only sends SIGTERM, which the 1.14.34 + // binary ignores. waitForPortFree escalates to SIGKILL on the + // process listening on this specific port. + if (port !== undefined) { + await waitForPortFree(port, 1000); + } + } + if (mcp) { + await mcp.close().catch(() => undefined); + } + } + + /** + * Spin up the long-lived MCP server + opencode subprocess on first + * use. The MCP server's tool-call / signal callbacks dispatch into + * `currentHooks`, which `run()` swaps in / out around each session. + */ + private async ensureStarted(mcpTools: FactoryTool[]): Promise { + if (this.opencode) return; + + this.mcp = await startFactoryMcpServer(mcpTools, { + onToolCall: (entry) => this.currentHooks?.onToolCall(entry), + onSignal: (signal) => this.currentHooks?.onSignal(signal), + }); + + let providerConfig: OpencodeConfig['provider']; + if (this.config.openRouterApiKey) { + providerConfig = buildProviderConfig( + this.config.model, + 'https://openrouter.ai/api/v1', + `Bearer ${this.config.openRouterApiKey}`, + 'OpenRouter (direct)', + ); + } else { + // Passthrough: realm-server validates the server JWT, applies + // the server-side OpenRouter key, forwards verbatim, and bills + // credits to the operator. The 7-day JWT TTL outlasts any single + // factory:go run. + let serverToken = await this.config.client.getServerToken(); + providerConfig = buildProviderConfig( + this.config.model, + new URL('_openrouter', this.config.realmServerUrl).toString(), + serverToken, + 'OpenRouter (boxel passthrough)', + ); + } + + let { createOpencodeServer, createOpencodeClient } = + await loadOpencodeSdk(); + let resolvedDir = realpathSync(this.config.workspaceDir); + // CRITICAL: opencode's `createOpencodeServer` spawns the subprocess + // without a `cwd` option — it inherits the parent's cwd. The model + // then resolves relative paths from its native fs tools (`Read`, + // `Write`, `Edit`, …) against that inherited cwd. Without this + // chdir the model would write files into the directory `factory:go` + // was invoked from instead of the realm workspace, and we'd see + // "Read /Users/.../packages/software-factory/Projects/foo.json" + // (which doesn't exist) rather than the workspace path. + let originalCwd = process.cwd(); + process.chdir(resolvedDir); + try { + this.opencode = await createOpencodeServer({ + config: { + provider: providerConfig, + mcp: { + [MCP_SERVER_NAME]: { + type: 'remote', + url: this.mcp.url, + enabled: true, + }, + }, + permission: { + edit: 'allow', + // Opencode's bash permission accepts either a single mode + // or a per-pattern map. We allow Bash unconditionally; the + // model is told (via the prompt) to use it for read-only + // inspection only. The `external_directory` knob is what + // structurally prevents write escape. + bash: 'allow', + external_directory: 'deny', + }, + }, + }); + } finally { + process.chdir(originalCwd); + } + this.client = createOpencodeClient({ baseUrl: this.opencode.url }); + // Reuse the same canonical path for `session.list` / `session.status` + // queries. opencode normalizes `directory` through its own realpath + // (`/var/folders/...` → `/private/var/folders/...` on macOS), and + // its filter is an exact-string match. + this.resolvedWorkspaceDir = resolvedDir; + + if (this.config.debug) { + log.info( + `Agent backend: opencode (model=${this.config.model}, mode=${this.config.openRouterApiKey ? 'direct' : 'passthrough'})`, + ); + } + // Always print the opencode subprocess URL + log directory: when + // `session.prompt rejected` warnings fire, the next thing the + // operator wants is `tail -f ~/.local/share/opencode/log/` + // to see what the subprocess was doing at the moment of failure. + let opencodeLogDir = `${process.env.HOME ?? '~'}/.local/share/opencode/log`; + log.info( + `opencode subprocess at ${this.opencode!.url} | logs: ${opencodeLogDir} (newest = active)`, + ); + } + + async run( + context: AgentContext, + tools: FactoryTool[], + ): Promise { + let mcpTools = tools.filter((t) => FACTORY_MCP_TOOL_NAMES.has(t.name)); + await this.ensureStarted(mcpTools); + let client = this.client!; + let workspaceDir = this.resolvedWorkspaceDir!; + + let toolCallLog: ToolCallEntry[] = []; + let captured: CapturedSignal | undefined; + let sessionErrorMessage: string | undefined; + let resolveSignal: () => void; + let signalCaptured = new Promise((resolve) => { + resolveSignal = resolve; + }); + let resolveSessionError: () => void; + let sessionErrored = new Promise((resolve) => { + resolveSessionError = resolve; + }); + + this.currentHooks = { + onToolCall: (entry) => { + toolCallLog.push(entry); + log.info( + `factory tool: ${entry.tool}(${summarizeArgs(entry.args)}) [${entry.durationMs}ms]`, + ); + }, + onSignal: (signal) => { + captured = signal; + resolveSignal(); + }, + }; + + try { + let session = await client.session.create({ + query: { directory: workspaceDir }, + }); + let sessionId = (session.data as { id: string }).id; + log.info(`session: ${sessionId}`); + + // Subscribe to opencode's per-directory event bus. Drives both + // visibility (tool calls + step transitions logged) and error + // propagation: a `session.error` (e.g. 401 from the model API) + // resolves `sessionErrored`, which short-circuits the run below + // and returns `blocked` instead of letting the loop spin. + let stopEventLog = subscribeForLogging(client, sessionId, (message) => { + sessionErrorMessage = message; + resolveSessionError(); + }).catch(() => undefined); + + let prompt = this.buildPrompt(context); + let systemPrompt = this.buildSystemPrompt(context); + + // The SDK's `session.prompt` HTTP call is *supposed* to block + // until the model + tool loop completes, but in opencode + // 1.14.34 the response often isn't flushed once the loop exits + // — leaving the promise hanging long after the server-side + // session is idle. The other obvious completion signals + // (`client.event.subscribe` SSE stream and + // `client.session.status` map) both turned out empty / racy in + // this version too. Watching `session.list[id].time.updated` + // through a stability window is the only signal that's both + // present and reliable here. The prompt's return value is + // unused — DONE / CLARIFICATION signals come back through the + // MCP server, not the prompt. + let promptPromise = client.session + .prompt({ + path: { id: sessionId }, + body: { + model: { + providerID: FACTORY_PROVIDER_ID, + modelID: this.config.model, + }, + system: systemPrompt, + // Trim opencode's default tool catalog to only what the + // factory actually uses. Every tool definition costs + // tokens on every chat completion — disabling the ones + // we never need (webfetch, task, todowrite, skill, + // question, invalid) cuts thousands of tokens out of + // each request and keeps the model focused. + tools: ENABLED_OPENCODE_TOOLS, + parts: [{ type: 'text', text: prompt }], + }, + }) + .catch(async (err) => { + let liveness = await probeOpencode(this.opencode?.url); + log.warn( + `session.prompt rejected (session=${sessionId} url=${this.opencode?.url ?? '?'}): ${describeFetchError(err)} | opencode probe: ${liveness}`, + ); + }); + + try { + // Happy path: the model calls `signal_done` (or + // `request_clarification`), MCP captures it, and we return + // instantly. Error path: a `session.error` event (model API + // 401, etc.) short-circuits and we return `blocked`. Fallback + // for when the model exits the loop silently: poll + // `time.updated` for a stability window so we still return + // rather than hang on the dead `prompt` HTTP promise. + await Promise.race([ + signalCaptured, + sessionErrored, + waitForSessionIdle(client, sessionId, workspaceDir), + ]); + } finally { + // Best-effort drain so sockets close cleanly when they can, + // bounded so we never block on the documented opencode 1.14.34 + // bug where `session.prompt` returns but never flushes the HTTP + // response. If the race above won via `waitForSessionIdle`, + // both `promptPromise` and `stopEventLog` are likely stuck on + // exactly that — letting them go after a short window is the + // whole point of having a fallback signal in the first place. + await Promise.race([promptPromise, delay(POST_RACE_DRAIN_MS)]); + await Promise.race([stopEventLog, delay(POST_RACE_DRAIN_MS)]); + } + } finally { + this.currentHooks = undefined; + } + + if (captured?.kind === 'done') { + return { status: 'done', toolCalls: toolCallLog }; + } + if (captured?.kind === 'clarification') { + return { + status: 'blocked', + toolCalls: toolCallLog, + message: captured.message ?? '', + }; + } + if (sessionErrorMessage) { + return { + status: 'blocked', + toolCalls: toolCallLog, + message: `opencode session error: ${sessionErrorMessage}`, + }; + } + return { + status: toolCallLog.length > 0 ? 'done' : 'needs_iteration', + toolCalls: toolCallLog, + }; + } + + private buildSystemPrompt(context: AgentContext): string { + let skills = context.skills.map((s: ResolvedSkill) => ({ + name: s.name, + content: s.content, + references: s.references ?? [], + })); + return this.promptLoader.load('system', { + targetRealm: context.targetRealm, + darkfactoryModuleUrl: requireDarkfactoryModuleUrl(context), + skills, + }); + } + + private buildPrompt(context: AgentContext): string { + let issueType = (context.issue as Record).issueType; + if (issueType === 'bootstrap' && context.briefUrl) { + return assembleBootstrapPrompt({ context, loader: this.promptLoader }); + } + if (context.validationContext) { + return assembleIteratePrompt({ + context, + previousActions: [], + iteration: context.iteration ?? 1, + loader: this.promptLoader, + }); + } + return assembleImplementPrompt({ context, loader: this.promptLoader }); + } +} + +/** + * Build an opencode provider config for the OpenAI-compatible adapter. + * + * Used in two modes: direct (baseURL → openrouter.ai, auth → user's + * bearer) and passthrough (baseURL → realm-server `/_openrouter`, + * auth → server JWT). AI-SDK appends `/chat/completions` to baseURL. + */ +function buildProviderConfig( + model: string, + baseURL: string, + authorization: string, + displayName: string, +): OpencodeConfig['provider'] { + return { + [FACTORY_PROVIDER_ID]: { + npm: '@ai-sdk/openai-compatible', + name: displayName, + options: { baseURL }, + models: { + [model]: { + name: model, + tool_call: true, + headers: { Authorization: authorization }, + }, + }, + }, + }; +} + +/** + * Spin up a localhost HTTP MCP server exposing the 8 factory tools + * (`get_card_schema`, 5 validators, 2 control signals) so opencode + * can call them. + * + * Tool calls are forwarded to the supplied `FactoryTool.execute()`, + * results are JSON-serialized back. DONE / CLARIFICATION signals + * (which carry `Symbol`s that don't survive JSON-RPC) are tagged + * with the static `factory:done` / `factory:clarification` strings; + * the agent's signal-capture hook matches on the tag. + */ +async function startFactoryMcpServer( + tools: FactoryTool[], + hooks: { + onToolCall: (entry: ToolCallEntry) => void; + onSignal: (signal: CapturedSignal) => void; + }, +): Promise<{ url: string; close: () => Promise }> { + let byName = new Map(tools.map((t) => [t.name, t])); + + let server = new McpServer( + { name: 'factory', version: '1.0.0' }, + { capabilities: { tools: {} } }, + ); + + server.setRequestHandler(ListToolsRequestSchema, async () => ({ + tools: tools.map((t) => ({ + name: t.name, + description: t.description, + inputSchema: (t.parameters ?? { + type: 'object', + properties: {}, + }) as { type: 'object'; properties?: Record }, + })), + })); + + server.setRequestHandler(CallToolRequestSchema, async (request) => { + let { name, arguments: args } = request.params; + let tool = byName.get(name); + if (!tool) { + return { + isError: true, + content: [{ type: 'text' as const, text: `Unknown tool: ${name}` }], + }; + } + let typedArgs = (args ?? {}) as Record; + let start = Date.now(); + let result: unknown; + try { + result = await tool.execute(typedArgs); + } catch (error) { + result = { + error: error instanceof Error ? error.message : String(error), + }; + } + let durationMs = Date.now() - start; + hooks.onToolCall({ tool: name, args: typedArgs, result, durationMs }); + + if (result && typeof result === 'object' && 'signal' in result) { + let sig = (result as Record).signal; + if (sig === DONE_SIGNAL) { + hooks.onSignal({ kind: 'done' }); + } else if (sig === CLARIFICATION_SIGNAL) { + let message = String((result as Record).message ?? ''); + hooks.onSignal({ kind: 'clarification', message }); + } + } + + return { + content: [ + { + type: 'text' as const, + text: JSON.stringify(serializeSignalResult(result)), + }, + ], + }; + }); + + let transport = new StreamableHTTPServerTransport({ + sessionIdGenerator: () => randomUUID(), + }); + await server.connect(transport); + + let httpServer = createHttpServer(async (req, res) => { + let chunks: Buffer[] = []; + req.on('data', (c) => chunks.push(Buffer.from(c))); + req.on('end', async () => { + let bodyText = Buffer.concat(chunks).toString('utf8'); + let body: unknown = undefined; + if (bodyText.length > 0) { + try { + body = JSON.parse(bodyText); + } catch { + body = undefined; + } + } + try { + await transport.handleRequest( + req as Parameters[0], + res, + body, + ); + } catch (err) { + if (!res.writableEnded) { + res.statusCode = 500; + res.end( + `mcp: handler error: ${err instanceof Error ? err.message : String(err)}`, + ); + } + } + }); + }); + + await new Promise((resolve) => { + httpServer.listen(0, '127.0.0.1', resolve); + }); + let port = (httpServer.address() as AddressInfo).port; + let url = `http://127.0.0.1:${port}/`; + + return { + url, + close: () => + new Promise((resolve) => { + httpServer.close(() => resolve()); + }), + }; +} + +/** Stringify the symbol-bearing factory signals so they survive JSON-RPC. */ +function serializeSignalResult(result: unknown): unknown { + if (result && typeof result === 'object' && 'signal' in result) { + let r = result as Record; + let signal = r.signal; + let tag = + signal === DONE_SIGNAL + ? SIGNAL_DONE_TAG + : signal === CLARIFICATION_SIGNAL + ? SIGNAL_CLARIFICATION_TAG + : String(signal); + return { ...r, signal: tag }; + } + return result; +} + +/** + * Watch `session.time.updated` via `client.session.list` until it stops + * advancing for `STABILITY_WINDOW_MS` — opencode bumps it on every step + * transition, so a few seconds of no change reliably means the model + + * tool loop has gone idle. The `directory` query must match the + * canonical realpath opencode normalized at create time (`/var → + * /private/var` on macOS), already resolved by the caller. + */ +async function waitForSessionIdle( + client: { session: { list: (opts?: any) => Promise } }, + sessionId: string, + workspaceDir: string, +): Promise { + const POLL_INTERVAL_MS = 750; + // Generous: `time.updated` appears to tick on step boundaries rather + // than per `message.part.delta`, and opus can sit 30+ seconds + // "thinking" between steps. The polling is only a fallback for when + // the model exits without calling `signal_done` / `request_clarification`, + // so the wider window costs nothing on the happy path (signal-captured + // race short-circuits this). + const STABILITY_WINDOW_MS = 60_000; + const MAX_WAIT_MS = 30 * 60 * 1000; // 30 minutes; comfortable upper bound for opus + // After this many consecutive `session.list` failures, give up — the + // opencode subprocess has almost certainly died (TypeError: fetch + // failed). We return cleanly so the outer factory loop can continue + // to the next iteration instead of crashing the whole run. + const MAX_CONSECUTIVE_LIST_FAILURES = 5; + // Periodic heartbeat so users running `factory:go` see proof the + // model is making progress (or stuck) instead of staring at a + // silent terminal for minutes. + const HEARTBEAT_MS = 15_000; + let started = Date.now(); + let lastUpdated: number | undefined; + let stableSince: number | undefined; + let consecutiveFailures = 0; + let lastHeartbeat = Date.now(); + let lastHeartbeatUpdated: number | undefined; + + // eslint-disable-next-line no-constant-condition + while (true) { + if (Date.now() - started > MAX_WAIT_MS) { + throw new Error( + `Timed out after ${MAX_WAIT_MS}ms waiting for opencode session ${sessionId} to settle.`, + ); + } + + let res: + | { data?: Array<{ id: string; time: { updated: number } }> } + | undefined; + try { + res = (await client.session.list({ + query: { directory: workspaceDir }, + })) as { + data?: Array<{ id: string; time: { updated: number } }>; + }; + consecutiveFailures = 0; + } catch (err) { + consecutiveFailures++; + if (consecutiveFailures >= MAX_CONSECUTIVE_LIST_FAILURES) { + log.warn( + `opencode session.list failed ${consecutiveFailures}× in a row (${describeFetchError(err)}); treating session as ended`, + ); + return; + } + } + + if (res) { + let session = res.data?.find((s) => s.id === sessionId); + if (session) { + let updated = session.time.updated; + if (lastUpdated === undefined || updated !== lastUpdated) { + lastUpdated = updated; + stableSince = Date.now(); + } else if ( + stableSince !== undefined && + Date.now() - stableSince >= STABILITY_WINDOW_MS + ) { + return; + } + } + } + + let now = Date.now(); + if (now - lastHeartbeat >= HEARTBEAT_MS) { + let elapsedSec = Math.round((now - started) / 1000); + let activity = + lastUpdated === lastHeartbeatUpdated + ? `idle ${Math.round((now - (stableSince ?? now)) / 1000)}s` + : 'active'; + log.info( + `waiting on opencode session [${elapsedSec}s elapsed, ${activity}]`, + ); + lastHeartbeat = now; + lastHeartbeatUpdated = lastUpdated; + } + + await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS)); + } +} + +/** + * Subscribe to opencode's `/event` SSE stream and log step transitions + * + native tool invocations + session.idle for our session. Best-effort + * — completion detection still uses `time.updated` polling, this is + * purely for visibility into what the model is doing during long opus + * runs. + */ +async function subscribeForLogging( + client: { event: { subscribe: () => Promise } }, + sessionId: string, + onError?: (message: string) => void, +): Promise { + let events: { stream: AsyncIterable }; + try { + events = (await client.event.subscribe()) as { + stream: AsyncIterable; + }; + } catch { + return; + } + try { + for await (let raw of events.stream) { + let event = raw as { + type?: string; + properties?: Record; + }; + let props = event.properties ?? {}; + if (props.sessionID && props.sessionID !== sessionId) continue; + switch (event.type) { + case 'session.idle': + log.info(`opencode session.idle`); + return; + case 'session.error': { + let summary = summarizeSessionError(props.error); + log.warn(`opencode session.error: ${summary}`); + onError?.(summary); + return; + } + case 'message.part.updated': { + let part = props.part as + | { type?: string; tool?: string; state?: { status?: string } } + | undefined; + if ( + part?.type === 'tool' && + part.tool && + part.state?.status === 'completed' + ) { + log.info(`opencode tool: ${part.tool}`); + } + break; + } + default: + // ignore other events + break; + } + } + } catch { + // SSE stream torn down — that's fine, the logging task is done. + } +} + +/** + * opencode's `session.error` event carries an APIError payload with a + * useful message + status code buried under nested `data` fields and a + * lot of CDN noise. Pull out just the parts a human needs to diagnose. + */ +function summarizeSessionError(error: unknown): string { + if (!error || typeof error !== 'object') return JSON.stringify(error); + let e = error as Record; + let name = (e.name as string | undefined) ?? 'Error'; + let data = (e.data as Record | undefined) ?? {}; + let message = (data.message as string | undefined) ?? 'unknown'; + let statusCode = data.statusCode as number | undefined; + let url = (data.metadata as { url?: string } | undefined)?.url; + let parts = [`${name}: ${message}`]; + if (statusCode !== undefined) parts.push(`status=${statusCode}`); + if (url) parts.push(`url=${url}`); + // The HTTP statusText (`message`) is usually generic ("Forbidden + // Request"). The actual reason ("Insufficient credits", model-not- + // available, upstream auth failure, ...) lives in the response body. + // Surface it when it's small enough to be useful in a single-line log. + let body = extractBodySummary(data.responseBody); + if (body) parts.push(`body=${body}`); + return parts.join(' '); +} + +function extractBodySummary(rawBody: unknown): string | undefined { + if (typeof rawBody !== 'string' || rawBody.length === 0) return undefined; + try { + let parsed = JSON.parse(rawBody); + // Common shapes we want to surface: + // { errors: [string, ...] } ← realm-server JSON:API error envelope + // { error: { message: string } } ← OpenRouter / OpenAI-style + // { error: string } ← simpler form + if ( + parsed && + typeof parsed === 'object' && + Array.isArray((parsed as { errors?: unknown[] }).errors) && + (parsed as { errors: unknown[] }).errors.length > 0 + ) { + let first = (parsed as { errors: unknown[] }).errors[0]; + return typeof first === 'string' ? first : JSON.stringify(first); + } + let err = (parsed as { error?: unknown }).error; + if (typeof err === 'string') return err; + if (err && typeof err === 'object') { + let msg = (err as { message?: unknown }).message; + if (typeof msg === 'string') return msg; + return JSON.stringify(err); + } + } catch { + // Non-JSON body — fall through to raw truncation. + } + // Truncate raw body so a giant HTML error page doesn't drown the log. + return rawBody.length > 200 ? rawBody.slice(0, 197) + '...' : rawBody; +} + +function delay(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +function summarizeArgs(args: Record): string { + let entries = Object.entries(args).map(([k, v]) => { + let s = typeof v === 'string' ? v : JSON.stringify(v); + if (typeof s === 'string' && s.length > 60) s = s.slice(0, 57) + '...'; + return `${k}=${s}`; + }); + return entries.join(', '); +} + +/** + * Extract the listening TCP port from the URL the opencode SDK + * returns from `createOpencodeServer({ ... })`. Returns undefined for + * malformed URLs or non-numeric ports so the caller can skip the + * SIGKILL escalation safely (better than killing port 4096 blindly + * when the SDK changed defaults or we passed `port: 0`). + */ +function parseOpencodePort(url: string): number | undefined { + try { + let port = Number.parseInt(new URL(url).port, 10); + return Number.isFinite(port) ? port : undefined; + } catch { + return undefined; + } +} + +/** + * Block until a TCP port is free on localhost. Tries graceful drain + * first, then escalates to SIGKILL on whoever is listening — opencode + * 1.14.34 ignores the SIGTERM the SDK sends from `close()` (it spawns + * a precompiled binary that doesn't honour the signal), so without + * this the next iteration's `createOpencodeServer` always hits + * EADDRINUSE. + */ +async function waitForPortFree(port: number, graceMs: number): Promise { + let graceDeadline = Date.now() + graceMs; + while (Date.now() < graceDeadline) { + if (await isPortFree(port)) return; + await new Promise((resolve) => setTimeout(resolve, 100)); + } + // Still held after the grace window — escalate to SIGKILL on + // whoever is listening. Best-effort: if `lsof` isn't available or + // finds nothing, leave the next iteration to throw a clearer + // EADDRINUSE rather than silently fail here. + let { execSync } = await import('node:child_process'); + try { + let pids = execSync(`lsof -nP -iTCP:${port} -sTCP:LISTEN -t`, { + encoding: 'utf8', + stdio: ['ignore', 'pipe', 'ignore'], + }) + .split('\n') + .map((s) => s.trim()) + .filter((s) => s.length > 0); + for (let pid of pids) { + try { + process.kill(Number(pid), 'SIGKILL'); + log.warn( + `forced SIGKILL on stale opencode pid=${pid} holding port ${port}`, + ); + } catch { + // process already gone + } + } + } catch { + // lsof not on PATH or no listener — fall through to the post-kill + // wait, which will return immediately if the port is free. + } + let killDeadline = Date.now() + 2000; + while (Date.now() < killDeadline) { + if (await isPortFree(port)) return; + await new Promise((resolve) => setTimeout(resolve, 100)); + } +} + +async function isPortFree(port: number): Promise { + return new Promise((resolve) => { + let probe = createHttpServer(); + probe.unref(); + probe.once('error', () => resolve(false)); + probe.listen(port, '127.0.0.1', () => { + probe.close(() => resolve(true)); + }); + }); +} + +/** + * Render an Error in a way that surfaces the underlying cause. + * + * Node's undici `fetch` wraps every network failure in + * `TypeError: fetch failed` and stashes the real reason — `ECONNREFUSED`, + * `ECONNRESET`, `UND_ERR_HEADERS_TIMEOUT`, `AbortError`, etc. — on + * `err.cause`. The default `String(err)` throws all of that away. Use + * this helper anywhere we log a fetch rejection so we can actually tell + * what happened (subprocess died vs. socket timeout vs. user abort). + */ +function describeFetchError(err: unknown): string { + if (!(err instanceof Error)) return String(err); + let parts = [err.message]; + let cause: unknown = (err as { cause?: unknown }).cause; + let depth = 0; + while (cause && depth < 4) { + if (cause instanceof Error) { + let causeCode = + (cause as { code?: string }).code ?? + (cause as { name?: string }).name ?? + 'Error'; + parts.push(`cause: ${causeCode}: ${cause.message}`); + cause = (cause as { cause?: unknown }).cause; + } else { + parts.push(`cause: ${String(cause)}`); + break; + } + depth++; + } + return parts.join(' / '); +} + +/** + * Best-effort liveness probe for the opencode subprocess. Hits + * `${url}/app` (a cheap built-in endpoint) with a short timeout and + * reports either "alive (status N)", "dead (cause)", or "unknown" if + * the URL can't even be parsed. + * + * Logged alongside every `session.prompt` / `session.list` rejection so + * we can immediately distinguish "subprocess crashed" from "transient + * socket hiccup". + */ +async function probeOpencode(url: string | undefined): Promise { + if (!url) return 'unknown (no url)'; + let controller = new AbortController(); + let timer = setTimeout(() => controller.abort(), 1500); + try { + let res = await fetch(`${url.replace(/\/+$/, '')}/app`, { + signal: controller.signal, + }); + return `alive (HTTP ${res.status})`; + } catch (err) { + return `dead (${describeFetchError(err)})`; + } finally { + clearTimeout(timer); + } +} diff --git a/packages/software-factory/src/factory-agent/openrouter.ts b/packages/software-factory/src/factory-agent/openrouter.ts deleted file mode 100644 index 7272f49a2a9..00000000000 --- a/packages/software-factory/src/factory-agent/openrouter.ts +++ /dev/null @@ -1,422 +0,0 @@ -/** - * OpenRouter-backed factory agent — implements `LoopAgent` by driving a - * remote LLM through OpenRouter's OpenAI-compatible tool-use protocol. - * - * Flow: this agent sends tool definitions to the LLM via the API's - * `tools` parameter. The LLM emits `tool_calls[]`, we dispatch each - * through `FactoryTool.execute()`, feed the result back as a `role: "tool"` - * message, and iterate until the LLM calls `signal_done` / - * `request_clarification` or stops making tool calls. - */ - -import { SupportedMimeType } from '@cardstack/runtime-common/supported-mime-type'; - -const MAX_TOOL_USE_TURNS = 50; - -import type { AgentContext, FactoryAgentConfig, ResolvedSkill } from './types'; -import { OPENROUTER_CHAT_URL } from './types'; -import type { LoopAgent, AgentRunResult } from './types'; -import { - assembleBootstrapPrompt, - assembleImplementPrompt, - assembleIteratePrompt, - FilePromptLoader, - requireDarkfactoryModuleUrl, - type PromptLoader, -} from '../factory-prompt-loader'; -import { - DONE_SIGNAL, - CLARIFICATION_SIGNAL, - type FactoryTool, - type ToolCallEntry, -} from '../factory-tool-builder'; - -// --------------------------------------------------------------------------- -// Tool-use message types (for OpenRouter/OpenAI tool-use protocol) -// --------------------------------------------------------------------------- - -interface ToolUseMessage { - role: 'system' | 'user' | 'assistant' | 'tool'; - content?: string | null; - tool_calls?: OpenRouterToolCall[]; - tool_call_id?: string; -} - -interface OpenRouterToolCall { - id: string; - type: 'function'; - function: { - name: string; - arguments: string; - }; -} - -interface OpenRouterToolDefinition { - type: 'function'; - function: { - name: string; - description: string; - parameters: Record; - }; -} - -interface OpenRouterChatResponse { - choices?: { - message?: { - role?: string; - content?: string | null; - tool_calls?: OpenRouterToolCall[]; - }; - finish_reason?: string; - }[]; - usage?: { - prompt_tokens?: number; - completion_tokens?: number; - total_tokens?: number; - }; -} - -// --------------------------------------------------------------------------- -// OpenRouterFactoryAgent -// --------------------------------------------------------------------------- - -export class OpenRouterFactoryAgent implements LoopAgent { - private config: FactoryAgentConfig; - private directFetchImpl: typeof globalThis.fetch | undefined; - private promptLoader: PromptLoader; - /** True when an OpenRouter API key is available; false means proxy path. */ - readonly useDirectApi: boolean; - - constructor(config: FactoryAgentConfig, promptLoader?: PromptLoader) { - this.config = config; - this.promptLoader = promptLoader ?? new FilePromptLoader(); - - let rawApiKey = - process.env.OPENROUTER_API_KEY ?? config.openRouterApiKey ?? undefined; - let apiKey = - typeof rawApiKey === 'string' ? rawApiKey.trim() || undefined : undefined; - this.useDirectApi = apiKey !== undefined; - - if (this.useDirectApi) { - let directApiKey = apiKey!; - this.directFetchImpl = (( - input: RequestInfo | URL, - init?: RequestInit, - ) => { - let headers = new Headers(init?.headers); - if (!headers.has('Authorization')) { - headers.set('Authorization', `Bearer ${directApiKey}`); - } - return globalThis.fetch(input, { ...init, headers }); - }) as typeof globalThis.fetch; - } - } - - async run( - context: AgentContext, - tools: FactoryTool[], - ): Promise { - let messages = this.buildMessages(context); - let toolDefs = this.buildToolDefinitions(tools); - let toolCallLog: ToolCallEntry[] = []; - - if (this.config.debug) { - this.debugLog('=== Initial prompt ==='); - for (let msg of messages) { - this.debugLog( - `[${msg.role}] ${(msg.content ?? '').slice(0, 2000)}${(msg.content ?? '').length > 2000 ? '... (truncated)' : ''}`, - ); - } - this.debugLog( - `=== Tools (${toolDefs.length}): ${toolDefs.map((t) => t.function.name).join(', ')} ===`, - ); - } - - // Multi-turn tool-calling loop - for (let turn = 0; turn < MAX_TOOL_USE_TURNS; turn++) { - let response = await this.callOpenRouterWithTools(messages, toolDefs); - let choice = response.choices?.[0]; - - if (this.config.debug) { - this.debugLog(`=== LLM response (turn ${turn + 1}) ===`); - this.debugLog(JSON.stringify(choice?.message ?? {}, null, 2)); - if (choice?.finish_reason) { - this.debugLog(`finish_reason: ${choice.finish_reason}`); - } - if (response.usage) { - this.debugLog( - `tokens: prompt=${response.usage.prompt_tokens} completion=${response.usage.completion_tokens} total=${response.usage.total_tokens}`, - ); - } - } - - if (!choice?.message) { - throw new Error( - `Unexpected OpenRouter response: no choices[0].message in ${JSON.stringify(response).slice(0, 500)}`, - ); - } - - let assistantToolCalls = choice.message.tool_calls; - - // No tool calls — model finished its turn - if (!assistantToolCalls || assistantToolCalls.length === 0) { - // Model stopped without calling signal_done. If it produced tool - // calls in a prior iteration, treat as done. Otherwise needs_iteration. - return { - status: toolCallLog.length > 0 ? 'done' : 'needs_iteration', - toolCalls: toolCallLog, - message: choice.message.content ?? undefined, - }; - } - - // Add assistant message (with tool_calls) to conversation history - messages.push({ - role: 'assistant', - content: choice.message.content ?? null, - tool_calls: assistantToolCalls, - }); - - // Execute each tool call. With `parallel_tool_calls: true` a single - // assistant turn can carry multiple tool_calls[]; if the batch contains - // a terminal signal (signal_done / request_clarification) alongside - // other tools, we still execute the whole batch so the model's other - // side effects land, then return the first terminal signal observed. - let terminalResult: AgentRunResult | undefined; - for (let toolCall of assistantToolCalls) { - let toolName = toolCall.function.name; - let tool = tools.find((t) => t.name === toolName); - - if (!tool) { - messages.push({ - role: 'tool', - tool_call_id: toolCall.id, - content: JSON.stringify({ - error: `Unknown tool: ${toolName}`, - }), - }); - continue; - } - - let args: Record; - try { - args = JSON.parse(toolCall.function.arguments); - } catch { - args = {}; - } - - if (this.config.debug) { - this.debugLog(`>>> tool call: ${toolName}(${JSON.stringify(args)})`); - } - - let start = Date.now(); - let result: unknown; - try { - result = await tool.execute(args); - } catch (error) { - result = { - error: error instanceof Error ? error.message : String(error), - }; - } - let durationMs = Date.now() - start; - - if (this.config.debug) { - let resultStr = JSON.stringify(result); - this.debugLog( - `<<< tool result: ${toolName} (${durationMs}ms) ${resultStr.slice(0, 1000)}${resultStr.length > 1000 ? '... (truncated)' : ''}`, - ); - } - - toolCallLog.push({ tool: toolName, args, result, durationMs }); - - // Check for control flow signals - if (result && typeof result === 'object' && 'signal' in result) { - let signal = (result as Record).signal; - if (signal === DONE_SIGNAL) { - messages.push({ - role: 'tool', - tool_call_id: toolCall.id, - content: JSON.stringify({ status: 'done' }), - }); - terminalResult ??= { status: 'done', toolCalls: toolCallLog }; - continue; - } - if (signal === CLARIFICATION_SIGNAL) { - let clarificationMessage = (result as Record) - .message as string; - messages.push({ - role: 'tool', - tool_call_id: toolCall.id, - content: JSON.stringify({ - status: 'blocked', - message: clarificationMessage, - }), - }); - terminalResult ??= { - status: 'blocked', - toolCalls: toolCallLog, - message: clarificationMessage, - }; - continue; - } - } - - // Normal tool result — add to conversation - messages.push({ - role: 'tool', - tool_call_id: toolCall.id, - content: JSON.stringify(result), - }); - } - - if (terminalResult) { - return terminalResult; - } - } - - throw new Error( - `Tool-use loop exceeded ${MAX_TOOL_USE_TURNS} turns without completing. ` + - `The model may be stuck in a tool-calling loop.`, - ); - } - - /** - * Build messages for the tool-use agent. - * - * The system prompt is loaded from prompts/system.md via the prompt loader. - * Tools are provided natively via the LLM API's tool definitions parameter, - * not embedded in the prompt text. - */ - private buildMessages(context: AgentContext): ToolUseMessage[] { - let systemPrompt = this.buildToolUseSystemPrompt(context); - - let userPrompt: string; - let issueType = (context.issue as Record).issueType; - if (issueType === 'bootstrap' && context.briefUrl) { - userPrompt = assembleBootstrapPrompt({ - context, - loader: this.promptLoader, - }); - } else if (context.validationContext) { - // Validation failures from prior iteration — use iterate prompt - // so the agent receives formatted failure context for self-correction - userPrompt = assembleIteratePrompt({ - context, - previousActions: [], - iteration: context.iteration ?? 1, - loader: this.promptLoader, - }); - } else { - userPrompt = assembleImplementPrompt({ - context, - loader: this.promptLoader, - }); - } - - return [ - { role: 'system', content: systemPrompt }, - { role: 'user', content: userPrompt }, - ]; - } - - /** - * Build a system prompt for the tool-use agent using the - * prompts/system.md template. Tools are provided via the API's - * native tool definitions, not embedded in the prompt text. - */ - private buildToolUseSystemPrompt(context: AgentContext): string { - let skills = context.skills.map((s: ResolvedSkill) => ({ - name: s.name, - content: s.content, - references: s.references ?? [], - })); - - return this.promptLoader.load('system', { - targetRealm: context.targetRealm, - darkfactoryModuleUrl: requireDarkfactoryModuleUrl(context), - skills, - }); - } - - /** - * Convert FactoryTool[] to OpenRouter/OpenAI tool definitions. - */ - private buildToolDefinitions( - tools: FactoryTool[], - ): OpenRouterToolDefinition[] { - return tools.map((tool) => ({ - type: 'function' as const, - function: { - name: tool.name, - description: tool.description, - parameters: tool.parameters, - }, - })); - } - - /** - * Call OpenRouter with tool definitions. - */ - private async callOpenRouterWithTools( - messages: ToolUseMessage[], - tools: OpenRouterToolDefinition[], - ): Promise { - let body: Record = { - model: this.config.model, - messages, - stream: false, - }; - - if (tools.length > 0) { - body.tools = tools; - // Opt in to OpenAI-compatible parallel tool calls so a single assistant - // turn can emit multiple tool_calls[]. Without this, OpenRouter routes - // to Anthropic serialize 1 call/turn and re-send the full context each - // round, producing the O(n²) context blow-up observed in CS-10814. - body.parallel_tool_calls = true; - } - - let response: Response; - - if (this.useDirectApi) { - response = await this.directFetchImpl!(OPENROUTER_CHAT_URL, { - method: 'POST', - headers: { - Accept: SupportedMimeType.JSON, - 'Content-Type': SupportedMimeType.JSON, - }, - body: JSON.stringify(body), - }); - } else { - let proxyUrl = new URL( - '_request-forward', - this.config.realmServerUrl, - ).toString(); - - response = await this.config.client.authedServerFetch(proxyUrl, { - method: 'POST', - headers: { - Accept: SupportedMimeType.JSON, - 'Content-Type': SupportedMimeType.JSON, - }, - body: JSON.stringify({ - url: OPENROUTER_CHAT_URL, - method: 'POST', - requestBody: JSON.stringify(body), - }), - }); - } - - if (!response.ok) { - let errorText = await response.text(); - throw new Error( - `OpenRouter tool-use request failed: HTTP ${response.status}: ${errorText.slice(0, 500)}`, - ); - } - - return (await response.json()) as OpenRouterChatResponse; - } - - private debugLog(message: string): void { - process.stderr.write(`[factory:debug] ${message}\n`); - } -} diff --git a/packages/software-factory/src/factory-agent/types.ts b/packages/software-factory/src/factory-agent/types.ts index 64770ed18c3..96dcf1d7647 100644 --- a/packages/software-factory/src/factory-agent/types.ts +++ b/packages/software-factory/src/factory-agent/types.ts @@ -1,18 +1,16 @@ /** * Shared types, interfaces, and constants for the factory agent system. * - * This module contains all the data types used across the declarative agent - * (factory-agent.ts), the tool-use agent (factory-agent-tool-use.ts), and - * their consumers (loop, context builder, prompt loader, etc.). + * The runtime agents (`ClaudeCodeFactoryAgent` in `claude-code.ts`, + * `OpencodeFactoryAgent` in `opencode.ts`) implement the `LoopAgent` + * interface declared here; orchestration consumers (issue loop, context + * builder, prompt loader) share the data types declared below. */ // --------------------------------------------------------------------------- // Constants // --------------------------------------------------------------------------- -export const OPENROUTER_CHAT_URL = - 'https://openrouter.ai/api/v1/chat/completions' as const; - /** * Default OpenRouter model when `--agent openrouter` is selected without * a `=` suffix. @@ -20,7 +18,7 @@ export const OPENROUTER_CHAT_URL = * Pinned to `claude-opus-4-7` rather than the unversioned `claude-opus-4` * alias. The alias route exhibited a deterministic mid-stream truncation * on large tool-call arguments (`finish_reason: null`, `completion=1`) - * that broke every `write_file` for full `.gts` card definitions. Opus + * that broke every native `Write` for full `.gts` card definitions. Opus * 4.7 on the pinned route returned clean `finish_reason: tool_calls` * responses with completions up to ~4.7K tokens in a single turn, and * ran an end-to-end factory loop to `outcome=all_issues_done` with no @@ -57,14 +55,6 @@ export const VALID_ACTION_TYPES = [ export const VALID_REALMS = ['target', 'test'] as const; -// Action types that require path + content -export const FILE_ACTION_TYPES: ReadonlySet = new Set([ - 'create_file', - 'update_file', - 'create_test', - 'update_test', -]); - // --------------------------------------------------------------------------- // Types // --------------------------------------------------------------------------- @@ -80,7 +70,8 @@ export interface FactoryAgentConfig { client: import('@cardstack/boxel-cli/api').BoxelCLIClient; maxSkillTokens?: number; /** Call OpenRouter directly with this API key instead of going through the - * realm server `_request-forward` proxy. Useful for local dev / CI. */ + * realm-server `/_openrouter/chat/completions` passthrough. Useful for + * local dev / CI. */ openRouterApiKey?: string; /** When true, log prompts sent to the LLM and responses received to stderr. */ debug?: boolean; @@ -95,8 +86,9 @@ export interface ClaudeCodeAgentConfig { * query's `cwd` so the model's native Read / Write / Edit / Bash / Glob / * Grep tools operate against the factory workspace by default — paths like * `sticky-note.gts` resolve inside the workspace, with no surprise hits - * against the user's filesystem. Realm I/O still goes through factory - * MCP tools (search_realm, run_command, validators, …). + * against the user's filesystem. Realm-runtime operations go through the + * factory MCP tools (get_card_schema, run_lint / run_parse / run_evaluate + * / run_instantiate / run_tests, signal_done, request_clarification). */ workspaceDir?: string; } @@ -261,14 +253,6 @@ export interface AgentAction { toolArgs?: Record; } -// --------------------------------------------------------------------------- -// FactoryAgent interface (declarative model) -// --------------------------------------------------------------------------- - -export interface FactoryAgent { - plan(context: AgentContext): Promise; -} - // --------------------------------------------------------------------------- // Message types (for LLM communication) // --------------------------------------------------------------------------- @@ -314,6 +298,13 @@ export interface AgentRunResult { */ export interface LoopAgent { run(context: AgentContext, tools: LoopFactoryTool[]): Promise; + /** + * Optional: tear down any persistent backend state (long-lived + * subprocesses, MCP servers, sockets). The orchestrator calls this + * once after every agent.run() in the factory:go run completes. + * Agents whose state is fully owned by `run()` can omit this. + */ + close?(): Promise; } // --------------------------------------------------------------------------- diff --git a/packages/software-factory/src/factory-context-builder.ts b/packages/software-factory/src/factory-context-builder.ts index 54092a0bae8..588313bfb9f 100644 --- a/packages/software-factory/src/factory-context-builder.ts +++ b/packages/software-factory/src/factory-context-builder.ts @@ -65,7 +65,7 @@ export class ContextBuilder { * Assemble a complete AgentContext for one iteration of the execution loop. * * Steps: - * 1. Resolve skill names from ticket + project context + * 1. Resolve skill names from issue + project context * 2. Load all resolved skills from disk * 3. Apply skill budget if configured * 4. Return AgentContext (tools are provided separately as FactoryTool[]) diff --git a/packages/software-factory/src/factory-entrypoint.ts b/packages/software-factory/src/factory-entrypoint.ts index 03720ae75ee..7e9b4528c84 100644 --- a/packages/software-factory/src/factory-entrypoint.ts +++ b/packages/software-factory/src/factory-entrypoint.ts @@ -37,6 +37,14 @@ export interface FactoryEntrypointOptions { agent: FactoryAgentProvider; /** Only set when agent === 'openrouter' and the flag carried a `=` suffix. */ openRouterModel?: string; + /** + * OpenRouter API key for direct billing on the `--agent openrouter` + * path. Read from `--openrouter-api-key ` or env + * `OPENROUTER_API_KEY`. When unset, the OpenRouter path falls + * through to the realm-server `/_openrouter/chat/completions` + * passthrough (boxel tokens). Ignored on every other backend. + */ + openRouterApiKey?: string; debug?: boolean; retryBlocked?: boolean; } @@ -135,9 +143,15 @@ export function getFactoryEntrypointUsage(): string { ' --no-retry-blocked Skip retrying blocked issues (by default, blocked issues are reset to backlog)', ' --agent LLM backend: "claude" (default, uses Claude Code Agent SDK),', ' "codex" (not yet implemented),', - ' "openrouter" (defaults to anthropic/claude-opus-4),', + ' "openrouter" (defaults to anthropic/claude-opus-4-7, runs', + ' via the opencode SDK with native fs / Bash),', ' or "openrouter=" to pick a specific OpenRouter model', ' (e.g., "openrouter=anthropic/claude-sonnet-4").', + ' --openrouter-api-key OpenRouter API key for the openrouter backend.', + ' When set, opencode talks to OpenRouter directly with this key.', + ' When unset (and OPENROUTER_API_KEY env is also unset), the', + ' backend falls back to the realm server passthrough at', + ' `/_openrouter/chat/completions` — burns boxel tokens.', ' --debug Log LLM prompts and responses to stderr', ' --help Show this usage information', '', @@ -181,6 +195,9 @@ export function parseFactoryEntrypointArgs( agent: { type: 'string', }, + 'openrouter-api-key': { + type: 'string', + }, debug: { type: 'boolean', }, @@ -218,12 +235,22 @@ export function parseFactoryEntrypointArgs( ); } + let openRouterApiKey: string | undefined; + let rawOpenRouterApiKey = parsed.values['openrouter-api-key']; + if (typeof rawOpenRouterApiKey === 'string') { + let trimmed = rawOpenRouterApiKey.trim(); + if (trimmed !== '') { + openRouterApiKey = trimmed; + } + } + return { briefUrl: normalizeUrl(briefUrl, '--brief-url'), targetRealm: normalizeUrl(targetRealm, '--target-realm'), realmServerUrl, agent: parsedAgent.provider, openRouterModel: parsedAgent.openRouterModel, + openRouterApiKey, debug: parsed.values.debug === true ? true : undefined, retryBlocked: parsed.values['no-retry-blocked'] === true ? false : true, }; @@ -320,6 +347,7 @@ export async function runFactoryEntrypoint( workspaceDir, agent: options.agent, openRouterModel: options.openRouterModel, + openRouterApiKey: options.openRouterApiKey, debug: options.debug, retryBlocked: options.retryBlocked, }); diff --git a/packages/software-factory/src/factory-issue-loop-wiring.ts b/packages/software-factory/src/factory-issue-loop-wiring.ts index 854662aef2d..e7e695ca400 100644 --- a/packages/software-factory/src/factory-issue-loop-wiring.ts +++ b/packages/software-factory/src/factory-issue-loop-wiring.ts @@ -7,7 +7,7 @@ * - RealmIssueRelationshipLoader for context building * - ContextBuilder with issue-aware mode * - ToolRegistry, ToolExecutor, FactoryTool[] via buildFactoryTools - * - OpenRouterFactoryAgent as the LoopAgent + * - ClaudeCodeFactoryAgent or OpencodeFactoryAgent as the LoopAgent * - ValidationPipeline as the Validator * - runIssueLoop() invocation */ @@ -21,9 +21,8 @@ import { logger } from './logger'; import { ClaudeCodeFactoryAgent, - OpenRouterFactoryAgent, + OpencodeFactoryAgent, FACTORY_DEFAULT_OPENROUTER_MODEL, - type FactoryAgentConfig, type FactoryAgentProvider, type LoopAgent, } from './factory-agent'; @@ -72,6 +71,14 @@ export interface IssueLoopWiringConfig { agent?: FactoryAgentProvider; /** Explicit OpenRouter model id; only honoured when agent === 'openrouter'. */ openRouterModel?: string; + /** + * OpenRouter API key for direct billing. Only honoured when + * `agent === 'openrouter'`. When unset, the OpenRouter path falls + * back to the realm-server `/_openrouter/chat/completions` + * passthrough (boxel tokens). The CLI plumbs this through from + * `--openrouter-api-key` or env `OPENROUTER_API_KEY`. + */ + openRouterApiKey?: string; debug?: boolean; /** Inject a pre-built LoopAgent instance (tests only). Wins over `agent`. */ agentOverride?: LoopAgent; @@ -183,6 +190,7 @@ export async function runFactoryIssueLoop( let built = createLoopAgentWithLabel({ provider, openRouterModel: config.openRouterModel, + openRouterApiKey: config.openRouterApiKey, realmServerUrl, client, debug: config.debug, @@ -232,7 +240,20 @@ export async function runFactoryIssueLoop( maxOuterCycles: config.maxOuterCycles, }; - return runIssueLoop(issueLoopConfig); + try { + return await runIssueLoop(issueLoopConfig); + } finally { + // Some agents (notably `OpencodeFactoryAgent`) hold persistent + // backend state across iterations — long-lived opencode subprocess + // + MCP server + JWT'd HTTP client. Tear that down here so a + // crash mid-loop doesn't orphan an opencode process holding + // port 4096. + if (typeof agent.close === 'function') { + await agent.close().catch((err) => { + log.warn(`agent.close() failed: ${String(err)}`); + }); + } + } } // --------------------------------------------------------------------------- @@ -243,13 +264,21 @@ export interface CreateLoopAgentConfig { provider: FactoryAgentProvider; /** Only used when provider === 'openrouter'. */ openRouterModel?: string; + /** + * Optional OpenRouter API key. When set, the opencode-backed + * `--agent openrouter` path uses it directly; when unset, the agent + * falls back to the realm-server `/_openrouter/chat/completions` + * passthrough (boxel tokens). Read from CLI flag + * `--openrouter-api-key` or env `OPENROUTER_API_KEY`. + */ + openRouterApiKey?: string; realmServerUrl: string; client: BoxelCLIClient; debug?: boolean; /** - * Factory workspace directory. Forwarded to the Claude backend so its - * native fs tools (Read / Write / Edit / Bash) resolve relative paths - * inside the workspace. Other backends ignore it. + * Factory workspace directory. Both the Claude path and the + * opencode-backed OpenRouter path use this as their cwd so native + * fs tools resolve realm-relative paths inside the workspace. */ workspaceDir?: string; } @@ -310,14 +339,27 @@ export function createLoopAgentWithLabel(config: CreateLoopAgentConfig): { config.openRouterModel && config.openRouterModel.trim() !== '' ? config.openRouterModel.trim() : FACTORY_DEFAULT_OPENROUTER_MODEL; + if (!config.workspaceDir) { + throw new Error( + '--agent openrouter requires a workspaceDir — opencode mounts ' + + 'it as cwd for native fs tools.', + ); + } + let apiKey = + config.openRouterApiKey && config.openRouterApiKey.trim() !== '' + ? config.openRouterApiKey.trim() + : (process.env.OPENROUTER_API_KEY?.trim() ?? undefined); + let mode = apiKey ? 'direct' : 'passthrough'; return { - agent: new OpenRouterFactoryAgent({ + agent: new OpencodeFactoryAgent({ model, realmServerUrl: config.realmServerUrl, client: config.client, + openRouterApiKey: apiKey, + workspaceDir: config.workspaceDir, debug: config.debug, - } satisfies FactoryAgentConfig), - label: `openrouter (model=${model})`, + }), + label: `openrouter (model=${model}, mode=${mode})`, }; } } diff --git a/packages/software-factory/src/factory-prompt-loader.ts b/packages/software-factory/src/factory-prompt-loader.ts index 7cc039438f1..7d2b1fd587f 100644 --- a/packages/software-factory/src/factory-prompt-loader.ts +++ b/packages/software-factory/src/factory-prompt-loader.ts @@ -430,7 +430,7 @@ export function assembleImplementPrompt( let toolResultsData = buildToolResultsData(context); - return loader.load('ticket-implement', { + return loader.load('issue-implement', { project: context.project, issue: context.issue, knowledge: context.knowledge, @@ -460,7 +460,7 @@ export function assembleBootstrapPrompt( export function assembleTestPrompt(options: AssembleTestPromptOptions): string { let { context, implementedFiles, loader } = options; - return loader.load('ticket-test', { + return loader.load('issue-test', { issue: context.issue, implementedFiles, }); @@ -483,7 +483,7 @@ export function assembleIteratePrompt( let toolResultsData = buildToolResultsData(context); - return loader.load('ticket-iterate', { + return loader.load('issue-iterate', { project: context.project, issue: context.issue, iteration, diff --git a/packages/software-factory/src/factory-seed.ts b/packages/software-factory/src/factory-seed.ts index db433a4344c..e31fcac2468 100644 --- a/packages/software-factory/src/factory-seed.ts +++ b/packages/software-factory/src/factory-seed.ts @@ -117,27 +117,30 @@ function buildSeedIssueDocument( ``, `**URL:** ${brief.sourceUrl}`, `**Title:** ${brief.title}`, + `**Summary:** ${brief.contentSummary}`, ``, - brief.contentSummary, + `### Full content`, + ``, + brief.content, ``, `## Instructions`, ``, - `Read the brief and create the following project artifacts:`, + `Read the brief above and create the following project artifacts in the workspace:`, ``, - `1. **Project card** — in \`Projects/\` with fields populated from the brief`, - `2. **Issue Tracker Board** — in \`Boards/\`, linked both ways with the Project card`, - `3. **Knowledge Articles** — in \`Knowledge Articles/\`, at least Brief Context + Agent Onboarding, plus more as the brief warrants`, - `4. **Implementation Issues** — one per entry-point card, each covering:`, + `1. **Project card** — \`Projects/.json\` with fields populated from the brief`, + `2. **Issue Tracker Board** — \`Boards/.json\`, linked both ways with the Project card`, + `3. **Knowledge Articles** — \`Knowledge Articles/-brief-context.json\` and \`Knowledge Articles/-agent-onboarding.json\`, plus more as the brief warrants`, + `4. **Implementation Issues** — one per entry-point card at \`Issues/-.json\`, each covering:`, ` - Card definition (.gts) and any interior/support cards`, ` - QUnit tests (.test.gts) for entry-point and support cards`, - ` - Catalog Spec (Spec/.json) with example instances`, + ` - Catalog Spec (\`Spec/.json\`) with example instances`, ``, `Each implementation issue must have:`, `- \`project\` relationship pointing to the Project card`, `- \`relatedKnowledge\` relationships pointing to the Knowledge Article cards`, `- \`blockedBy\` relationships to any prior issues it depends on`, ``, - `When all artifacts are created, mark this issue as done via \`update_issue\`.`, + `Use the **\`Write\`** tool to create each \`.json\` file. When every artifact is on disk, call **\`signal_done\`** — the orchestrator marks this bootstrap issue done.`, ].join('\n'); let acceptanceCriteria = [ diff --git a/packages/software-factory/src/factory-skill-loader.ts b/packages/software-factory/src/factory-skill-loader.ts index aa989dc0883..c789c7d1699 100644 --- a/packages/software-factory/src/factory-skill-loader.ts +++ b/packages/software-factory/src/factory-skill-loader.ts @@ -13,10 +13,19 @@ const DEFAULT_SKILLS_DIR = join(PACKAGE_ROOT, '.agents', 'skills'); /** * Additional skill search directories, checked in order when a skill is not - * found in the primary directory. The monorepo root `.agents/skills/` hosts - * shared skills like `ember-best-practices` that live outside the package. + * found in the primary directory. + * + * - `packages/boxel-cli/plugin/skills/` hosts the boxel-cli Claude Code + * plugin skills (`boxel-api`, `boxel-command`, etc.) — boxel-cli owns the + * entire Boxel API surface, so its skills describe the platform. Same + * directory the plugin distributes to end users. + * - The monorepo root `.agents/skills/` hosts shared domain skills + * (`boxel-development`, `boxel-file-structure`, `ember-best-practices`). */ -const DEFAULT_FALLBACK_DIRS = [join(MONOREPO_ROOT, '.agents', 'skills')]; +const DEFAULT_FALLBACK_DIRS = [ + join(MONOREPO_ROOT, 'packages', 'boxel-cli', 'plugin', 'skills'), + join(MONOREPO_ROOT, '.agents', 'skills'), +]; /** Approximate characters per token for budget estimation. */ const CHARS_PER_TOKEN = 4; @@ -29,14 +38,10 @@ const SKILL_PRIORITY: readonly string[] = [ 'software-factory-bootstrap', 'boxel-development', 'boxel-file-structure', + 'boxel-api', + 'boxel-command', 'ember-best-practices', 'software-factory-operations', - 'boxel-sync', - 'boxel-track', - 'boxel-watch', - 'boxel-restore', - 'boxel-repair', - 'boxel-setup', ]; // --------------------------------------------------------------------------- @@ -63,23 +68,9 @@ const FACTORY_WORKFLOW_KEYWORDS = [ 'orchestrat', ]; -/** - * CLI skills that depend on boxel CLI commands. Excluded from the factory - * agent's tool registry — these skills reference commands the agent - * cannot invoke. They remain valid for human Claude Code sessions. - */ -const CLI_ONLY_SKILLS: readonly string[] = [ - 'boxel-sync', - 'boxel-track', - 'boxel-watch', - 'boxel-restore', - 'boxel-repair', - 'boxel-setup', -]; - /** * Reference files in `boxel-development/references/` and the keywords that - * trigger their inclusion. When a ticket doesn't match any keyword, only the + * trigger their inclusion. When an issue doesn't match any keyword, only the * "always load" references from SKILL.md are included. */ const REFERENCE_KEYWORD_MAP: Record = { @@ -98,7 +89,6 @@ const REFERENCE_KEYWORD_MAP: Record = { 'dev-command-development.md': ['command', 'action', 'invoke'], 'dev-spec-usage.md': ['spec', 'catalog', 'specification'], 'dev-qunit-testing.md': ['test', 'qunit', 'test.gts', 'verify'], - 'dev-realm-search.md': ['search', 'query', 'filter', 'find', 'realm'], 'dev-replicate-ai.md': ['replicate', 'ai', 'model', 'ml'], }; @@ -107,7 +97,6 @@ const ALWAYS_LOAD_REFERENCES: readonly string[] = [ 'dev-core-concept.md', 'dev-technical-rules.md', 'dev-quick-reference.md', - 'dev-realm-search.md', 'dev-qunit-testing.md', 'dev-spec-usage.md', ]; @@ -143,9 +132,9 @@ export class DefaultSkillResolver implements SkillResolver { * 1. boxel-development + boxel-file-structure — always loaded * 2. ember-best-practices — when issue involves .gts component code * 3. software-factory-operations — for factory delivery workflow issues - * 4. KnowledgeArticle tags can specify additional skills - * - * CLI skills are excluded (see `CLI_ONLY_SKILLS`). + * 4. boxel-api + boxel-command — always loaded so the agent has the realm + * search query syntax and host-command failure modes inline. + * 5. KnowledgeArticle tags can specify additional skills. */ resolve(issue: IssueData, project: ProjectData): string[] { let issueText = extractIssueText(issue); @@ -156,7 +145,12 @@ export class DefaultSkillResolver implements SkillResolver { return ['software-factory-bootstrap', 'boxel-file-structure']; } - let skills: string[] = ['boxel-development', 'boxel-file-structure']; + let skills: string[] = [ + 'boxel-development', + 'boxel-file-structure', + 'boxel-api', + 'boxel-command', + ]; if (matchesAnyKeyword(issueText, GTS_KEYWORDS)) { skills.push('ember-best-practices'); @@ -175,9 +169,7 @@ export class DefaultSkillResolver implements SkillResolver { } } - // Filter out CLI-only skills that reference boxel CLI commands the - // factory agent cannot invoke (tool registry excludes boxel-cli tools). - return skills.filter((s) => !CLI_ONLY_SKILLS.includes(s)); + return skills; } } diff --git a/packages/software-factory/src/factory-tool-builder.ts b/packages/software-factory/src/factory-tool-builder.ts index b30ef991d35..371b2042c77 100644 --- a/packages/software-factory/src/factory-tool-builder.ts +++ b/packages/software-factory/src/factory-tool-builder.ts @@ -35,7 +35,6 @@ import { } from './parse-execution'; import { runTestsInMemory } from './test-run-execution'; import type { RunTestsInMemoryOptions, RunTestsResult } from './test-run-types'; -import { readCard, writeCard } from './workspace-fs'; // --------------------------------------------------------------------------- // Types @@ -48,15 +47,12 @@ export interface FactoryTool { execute: (args: Record) => Promise; /** * Origin marker. `'core'` is for tools defined directly in this - * builder (read_file, search_realm, run_lint, the structured update - * tools, signals, …). `'registered'` is for tools wrapped from the - * `ToolRegistry`'s script + realm-api manifests (realm-read, - * search-realm, boxel-sync, …). - * - * The Claude backend filters out `'registered'` tools because they - * shadow the core ones with kebab-case duplicates the model picks at - * random — and most of them are reachable through native fs / Bash + - * boxel CLI anyway. OpenRouter still gets every tool. + * builder (`get_card_schema`, `run_*`, `signal_done`, + * `request_clarification`). `'registered'` is for tools wrapped + * from the `ToolRegistry`'s realm-api manifests (currently just + * `realm-create`); these are filtered out of the agent's hot path + * since the entrypoint drives the realm-create flow before the + * agent runs. */ source?: 'core' | 'registered'; } @@ -144,13 +140,18 @@ export function buildFactoryTools( toolExecutor: ToolExecutor, toolRegistry: ToolRegistry, ): FactoryTool[] { + // Filesystem and shell are owned by the agent backend (Claude Agent + // SDK or opencode) via native tools. The factory contributes + // `get_card_schema` (introspects a live `CardDef` via the + // realm-server prerenderer — no Bash equivalent), the validation + // run_* tools, and the two control signals. Tracker-schema cards + // (Project / Issue / KnowledgeArticle / Spec / issue comments) are + // written as plain JSON via the backend's native `Write` after + // schema introspection; the shapes and invariants live in the + // `software-factory-bootstrap` and `software-factory-operations` + // skills. let tools: FactoryTool[] = [ - buildWriteFileTool(config), - buildReadFileTool(config), - buildFetchTranspiledModuleTool(config), - buildSearchRealmTool(config), buildGetCardSchemaTool(config), - buildRunCommandTool(config), buildRunLintTool(config), buildRunTestsTool(config), buildRunEvaluateTool(config), @@ -160,19 +161,16 @@ export function buildFactoryTools( buildRequestClarificationTool(), ]; - // Tracker-schema cards (Project / IssueTracker / Issue / KnowledgeArticle / Spec / - // issue comments) used to have dedicated wrapper tools here that + // Wrap registered realm-api manifests (currently just `realm-create`). + // Tracker-schema cards (Project / IssueTracker / Issue / KnowledgeArticle / + // Spec / issue comments) used to have dedicated wrapper tools here that // auto-constructed the JSON:API document, enforced Issue-description - // immutability, and so on. CS-10883 retired all five; the agent now - // writes those `.json` files directly via `Write` (Claude) / - // `write_file` (OpenRouter). The shapes and invariants are taught in - // the `software-factory-bootstrap` and `software-factory-operations` - // skills, with the live `darkfactoryModuleUrl` named in the system - // prompt for `adoptsFrom.module`. - - // Add registered realm-api tools as FactoryTool wrappers. After the - // CS-10883 retirements the registry only contains `realm-create`; - // anything else added later goes through the same build path. + // immutability, etc. CS-10883 retired all five; the agent now writes + // those `.json` files directly via `Write`. The shapes and invariants are + // taught in the `software-factory-bootstrap` and + // `software-factory-operations` skills, with the live + // `darkfactoryModuleUrl` named in the system prompt for + // `adoptsFrom.module`. for (let manifest of toolRegistry.getManifests()) { if (manifest.category === 'realm-api') { tools.push(buildRegisteredTool(manifest, toolExecutor, config)); @@ -206,12 +204,10 @@ async function syncWorkspaceForToolRun( /** * Enforce that a required string argument is present and non-empty. Returns * the trimmed value or throws a clear error that propagates back to the - * model as a tool-call result. This is the only runtime guardrail against - * an LLM emitting a malformed tool call like `write_file({})` — the JSON - * Schema `required` declaration is advisory for OpenRouter's tool-use and - * the model can still send empty args. Without this check, path strings - * like `"undefined"` would end up at the realm's root (e.g., a file named - * `/undefined`). + * model as a tool-call result. The JSON Schema `required` declaration is + * advisory — the model can still send empty args — so this is the runtime + * guardrail that keeps a malformed tool call (e.g. `get_card_schema({})`) + * from sliding through with a `"undefined"` path or name. */ export function requireStringArg( args: Record, @@ -232,123 +228,6 @@ export function requireStringArg( // Factory-level tools // --------------------------------------------------------------------------- -function buildWriteFileTool(config: ToolBuilderConfig): FactoryTool { - return { - name: 'write_file', - description: - 'Write a file to the target realm workspace. The path must include the file extension. Writes go to the local workspace and are synced to the realm between iterations.', - parameters: { - type: 'object', - properties: { - path: { - type: 'string', - description: - 'Realm-relative file path with extension (e.g., "my-card.gts", "Card/1.json")', - }, - content: { type: 'string', description: 'File content' }, - realm: { - type: 'string', - enum: ['target'], - description: 'Which realm to write to (default: target)', - }, - }, - required: ['path', 'content'], - }, - execute: async (args) => { - let path = requireStringArg(args, 'path', 'write_file'); - let content = requireStringArg(args, 'content', 'write_file'); - return writeCard(config.workspaceDir, path, content); - }, - }; -} - -function buildReadFileTool(config: ToolBuilderConfig): FactoryTool { - return { - name: 'read_file', - description: - 'Read a file from the target realm workspace. Returns parsed JSON when possible, otherwise raw text.', - parameters: { - type: 'object', - properties: { - path: { - type: 'string', - description: 'Realm-relative file path', - }, - realm: { - type: 'string', - enum: ['target'], - description: 'Which realm to read from (default: target)', - }, - }, - required: ['path'], - }, - execute: async (args) => { - let path = requireStringArg(args, 'path', 'read_file'); - return readCard(config.workspaceDir, path); - }, - }; -} - -function buildFetchTranspiledModuleTool( - config: ToolBuilderConfig, -): FactoryTool { - return { - name: 'fetch_transpiled_module', - description: - "Debugging tool ONLY for investigating runtime errors in .gts modules you've written. Use when an eval or instantiate validation error reports a line/column number — those line numbers refer to the transpiled output, not your .gts source, so fetching the transpiled output is how you locate the offending source construct. Never use the transpiled output as a reference for how to write code. Do NOT copy its patterns (setComponentTemplate, precompileTemplate, wire-format templates, base64 CSS imports) into source — always write idiomatic Ember /