diff --git a/packages/matrix/tests/publish-realm.spec.ts b/packages/matrix/tests/publish-realm.spec.ts index 2d5e1eeed7..725e51c303 100644 --- a/packages/matrix/tests/publish-realm.spec.ts +++ b/packages/matrix/tests/publish-realm.spec.ts @@ -239,6 +239,207 @@ test.describe('Publish realm', () => { ).toBeVisible(); }); + test('republishing reflects updated source content on the published URL (CS-11043)', async ({ + page, + request, + }) => { + // CS-11043 regression net. The bug was: a republish reported success + // server-side but the published URL kept serving the previous publish's + // rendered HTML, sometimes for tens of hours. Every existing + // publish-realm test does exactly one publish — this is the gap the + // bug slipped through. Here we publish, change content, publish + // again, and assert the published URL shows the new content (and not + // the old). + + await clearLocalStorage(page, serverIndexUrl); + user = await createSubscribedUserAndLogin( + page, + 'publish-realm', + serverIndexUrl, + ); + + let serverURL = new URL(serverIndexUrl); + let defaultRealmURL = `${serverURL.protocol}//${serverURL.host}/${user.username}/new-workspace/`; + + await createRealm(page, 'new-workspace', '1New Workspace'); + + // Define a card type whose isolated template renders a single + // sentinel string we can grep for in the published HTML. + await postCardSource( + page, + defaultRealmURL, + 'sentinel-card.gts', + ` + import { CardDef, Component, field, contains } from "https://cardstack.com/base/card-api"; + import StringField from "https://cardstack.com/base/string"; + + export class SentinelCard extends CardDef { + @field value = contains(StringField); + + static isolated = class extends Component { + + }; + } + `, + ); + + // Initial index.json: an instance of SentinelCard carrying the + // sentinel that we expect the first publish to render. + let initialSentinel = `sentinel-initial-${Date.now()}`; + await postCardSource( + page, + defaultRealmURL, + 'index.json', + JSON.stringify( + { + data: { + type: 'card', + attributes: { value: initialSentinel }, + meta: { + adoptsFrom: { module: './sentinel-card', name: 'SentinelCard' }, + }, + }, + }, + null, + 2, + ), + ); + + // Open the publish modal and do the first publish. + await page.locator('[data-test-workspace="1New Workspace"]').click(); + await page.locator('[data-test-submode-switcher] button').click(); + await page.locator('[data-test-boxel-menu-item-text="Host"]').click(); + await page.locator('[data-test-publish-realm-button]').click(); + await page.locator('[data-test-default-domain-checkbox]').click(); + await page.locator('[data-test-publish-button]').click(); + await page.waitForSelector('[data-test-unpublish-button]'); + + // Open the published URL and verify the initial sentinel renders. + let firstTabPromise = page.waitForEvent('popup'); + await page + .locator( + '[data-test-publish-realm-modal] [data-test-open-boxel-space-button]', + ) + .click(); + let firstTab = await firstTabPromise; + await firstTab.waitForLoadState(); + await expect(firstTab.locator('[data-test-sentinel-output]')).toHaveText( + initialSentinel, + { timeout: 30_000 }, + ); + await firstTab.close(); + await page.bringToFront(); + + // Close the modal so we can re-open it cleanly for the second publish. + await page.locator('[data-test-close-modal]').click(); + + // Change the index card's sentinel value. This is the "user edits + // their realm between publishes" step. + let updatedSentinel = `sentinel-updated-${Date.now()}`; + await postCardSource( + page, + defaultRealmURL, + 'index.json', + JSON.stringify( + { + data: { + type: 'card', + attributes: { value: updatedSentinel }, + meta: { + adoptsFrom: { module: './sentinel-card', name: 'SentinelCard' }, + }, + }, + }, + null, + 2, + ), + ); + + // Guard against the failure mode where `postCardSource` silently + // returns non-ok and the source still has the initial bytes — + // without this, the test below would fail with a misleading + // "expected updated, received initial" sentinel assertion and the + // root cause (a failed write) would be invisible. Read the source + // back via the realm-server's source-MIME endpoint and assert the + // new sentinel landed before continuing to the publish step. + let sourceAuthToken = await page.evaluate( + (realmURL) => + JSON.parse(window.localStorage['boxel-session'])[realmURL] as string, + defaultRealmURL, + ); + let sourceCheck = await request.get(`${defaultRealmURL}index.json`, { + headers: { + accept: 'application/vnd.card+source', + authorization: sourceAuthToken, + }, + }); + expect( + (await sourceCheck.text()).includes(updatedSentinel), + 'source index.json should contain the updated sentinel after postCardSource', + ).toBeTruthy(); + + // Re-open the publish modal and re-trigger publish. The + // default-domain checkbox can lose its selection on modal close, + // so check its state and click only when needed — otherwise the + // publish button is disabled (`!hasSelectedPublishedRealmURLs`) + // and the click silently no-ops. + await page.locator('[data-test-publish-realm-button]').click(); + let domainCheckbox = page.locator('[data-test-default-domain-checkbox]'); + if (!(await domainCheckbox.isChecked())) { + await domainCheckbox.click(); + } + let publishButton = page.locator('[data-test-publish-button]'); + + // Set up the network wait BEFORE clicking — the handler awaits + // the full reindex before returning 202, so when this resolves we + // know the publish is fully done. Caught so a transient hiccup + // downgrades to null rather than throwing; the published-URL + // assertion below has its own retry budget and is the + // load-bearing check either way. + let publishResponsePromise = page + .waitForResponse( + (r) => + r.url().endsWith('/_publish-realm') && + r.request().method() === 'POST', + { timeout: 180_000 }, + ) + .catch(() => null); + await publishButton.click(); + let publishResponse = await publishResponsePromise; + if (publishResponse) { + expect( + publishResponse.status(), + 'second publish should succeed', + ).toBeLessThan(300); + } + + // Open the published URL again and verify the UPDATED sentinel + // renders — and the initial sentinel does NOT. This is the + // load-bearing assertion CS-11043 would have failed. + let secondTabPromise = page.waitForEvent('popup'); + await page + .locator( + '[data-test-publish-realm-modal] [data-test-open-boxel-space-button]', + ) + .click(); + let secondTab = await secondTabPromise; + await secondTab.waitForLoadState(); + // Generous retry budget: if waitForResponse above was downgraded + // to null, the publish may not yet be done by the time we land on + // the published URL. The assertion retries until the sentinel + // appears or this budget expires, which gives slow republishes + // room to land without flapping the test. + await expect(secondTab.locator('[data-test-sentinel-output]')).toHaveText( + updatedSentinel, + { timeout: 120_000 }, + ); + await expect(secondTab.locator('body')).not.toContainText(initialSentinel); + await secondTab.close(); + await page.bringToFront(); + }); + test('open site popover opens with shift-click', async ({ page }) => { await publishDefaultRealm(page); diff --git a/packages/realm-server/handlers/handle-publish-realm.ts b/packages/realm-server/handlers/handle-publish-realm.ts index 65775d6b1e..f86ad32605 100644 --- a/packages/realm-server/handlers/handle-publish-realm.ts +++ b/packages/realm-server/handlers/handle-publish-realm.ts @@ -233,6 +233,7 @@ export default function handlePublishRealm({ realmsRootPath, getMatrixRegistrationSecret, domainsForPublishedRealms, + prerenderer, }: CreateRoutesArgs): (ctxt: Koa.Context, next: Koa.Next) => Promise { return async function (ctxt: Koa.Context, _next: Koa.Next) { let token = ctxt.state.token as RealmServerTokenClaim; @@ -535,6 +536,41 @@ export default function handlePublishRealm({ param(publishedRealmURL), ]); + // CS-11043. The DB-level cache above is necessary but not + // sufficient: the prerender server's puppeteer pages for + // this realm's affinity hold an in-process host-app + // `Loader` that caches evaluated modules by URL. After a + // republish swaps new bytes onto disk, those Loaders would + // still hand back the OLD module on subsequent renders — + // the realm-server's Cache-Control: no-store on source + // responses prevents Chromium from caching the HTTP layer + // but does not reach into the host's module cache. The + // production failure mode (nyuitp2026.boxel.site rendering + // stale wordmark for ~37h after publishing the new img + // form) was exactly this. Disposing the affinity tears + // down the puppeteer pages so the next render against the + // realm spawns a fresh page that fetches modules from disk. + // + // Optional method on the Prerenderer interface — local / + // remote stubs may not implement it. Best-effort: a failure + // here is logged but doesn't fail the publish, since the + // page-pool will eventually rotate via LRU anyway; we just + // want to avoid the long staleness window. + if (prerenderer?.disposeAffinity) { + try { + await prerenderer.disposeAffinity({ + affinityType: 'realm', + affinityValue: publishedRealmURL, + }); + } catch (e) { + log.warn( + `disposeAffinity failed for ${publishedRealmURL}: ${ + e instanceof Error ? e.message : String(e) + } — continuing with publish; stale Loader cache may persist until LRU rotation`, + ); + } + } + let lastPublishedAt = Date.now().toString(); try { await upsertPublishedRealmInRegistry(dbAdapter, { @@ -552,6 +588,32 @@ export default function handlePublishRealm({ throw dbError; } + // CS-11043. For a republish, the realm is already mounted on + // this realm-server with its #sourceCache holding the + // pre-swap bytes. The reindex enqueued just below fans out + // module fetches through HTTP to this same realm-server, and + // without an explicit invalidation those fetches would hit + // the cached old bytes — producing a fresh reindex against + // STALE source, which then gets written to + // boxel_index.isolated_html and served forever (this was the + // staging-CI failure even after disposeAffinity + the + // Cache-Control: no-store + the DB modules DELETE — none of + // those reach into the realm-server's per-Realm byte cache). + // The Phase-3-PR-2 comment above relies on the NodeAdapter + // file watcher to invalidate via change events, but that's + // an async race against the immediately-enqueued reindex. + // Force the invalidation synchronously here. + // + // For a new publish, lookupOrMount mounts the realm fresh + // (registry row was just upserted above); the cache is + // empty so clearLocalCaches is a no-op. Either way the + // reindex below sees correct source. + let mountedRealmForCacheClear = + await reconciler.lookupOrMount(publishedRealmURL); + if (mountedRealmForCacheClear) { + mountedRealmForCacheClear.clearLocalCaches(); + } + // Refresh the index. For a new publish this is redundant // (lazy-mount's first start() does its own fullIndex on a // fresh DB), but the from-scratch-index coalesce handler diff --git a/packages/realm-server/prerender/manager-app.ts b/packages/realm-server/prerender/manager-app.ts index 489a162824..985cec1a47 100644 --- a/packages/realm-server/prerender/manager-app.ts +++ b/packages/realm-server/prerender/manager-app.ts @@ -1353,6 +1353,102 @@ export function buildPrerenderManagerApp(options?: { } }); + // CS-11043. Broadcast a dispose-affinity to every server currently + // assigned to the requested affinity. The publish-realm handler + // calls this through RemotePrerenderer after its FS swap so that + // every assigned server tears down the puppeteer pages whose + // host-app Loaders may still serve old module bytes. A server + // that's no longer assigned no-ops the request; assigned servers + // dispose their pages and the next render starts fresh. + router.post('/dispose-affinity', async (ctxt) => { + try { + let request = await fetchRequestFromContext(ctxt); + let raw = await request.text(); + let body: any; + try { + body = raw ? JSON.parse(raw) : {}; + } catch (e) { + ctxt.status = 400; + ctxt.body = { + errors: [{ status: 400, message: 'Invalid JSON body' }], + }; + return; + } + let attrs = body?.data?.attributes ?? {}; + let affinityType = attrs.affinityType; + let affinityValue = attrs.affinityValue; + if ( + (affinityType !== 'realm' && affinityType !== 'user') || + typeof affinityValue !== 'string' || + affinityValue.trim().length === 0 + ) { + ctxt.status = 400; + ctxt.body = { + errors: [ + { + status: 400, + message: + 'Missing or invalid attributes: affinityType, affinityValue', + }, + ], + }; + return; + } + let affinityKey = toAffinityKey({ + affinityType: affinityType as AffinityType, + affinityValue, + }); + let targets = [...(registry.affinities.get(affinityKey) ?? [])]; + log.info( + `broadcasting dispose-affinity for ${affinityKey} to ${targets.length} assigned server(s)`, + ); + await Promise.all( + targets.map(async (target) => { + let targetURL = `${normalizeURL(target)}/dispose-affinity`; + let ac = new AbortController(); + let timer = setTimeout(() => ac.abort(), proxyTimeoutMs); + (timer as any).unref?.(); + try { + let res = await fetch(targetURL, { + method: 'POST', + headers: { + 'Content-Type': 'application/vnd.api+json', + Accept: 'application/vnd.api+json', + }, + body: raw, + signal: ac.signal, + }); + if (!res.ok) { + log.warn( + `dispose-affinity on ${target} for ${affinityKey} returned ${res.status}`, + ); + } + } catch (err) { + if ((err as { name?: string })?.name === 'AbortError') { + log.warn( + `dispose-affinity on ${target} for ${affinityKey} timed out after ${proxyTimeoutMs}ms`, + ); + } else { + log.warn( + `dispose-affinity on ${target} for ${affinityKey} network error:`, + err, + ); + } + } finally { + clearTimeout(timer); + } + }), + ); + ctxt.status = 204; + } catch (err: any) { + log.error('Unhandled error in /dispose-affinity broadcast:', err); + ctxt.status = 500; + ctxt.body = { + errors: [{ status: 500, message: err?.message ?? 'Unknown error' }], + }; + } + }); + let verboseManagerLogs = process.env.PRERENDER_MANAGER_VERBOSE_LOGS === 'true'; app diff --git a/packages/realm-server/prerender/prerender-app.ts b/packages/realm-server/prerender/prerender-app.ts index 93b0d42f18..ab0952dd3f 100644 --- a/packages/realm-server/prerender/prerender-app.ts +++ b/packages/realm-server/prerender/prerender-app.ts @@ -938,6 +938,65 @@ export function buildPrerenderApp(options: { } }); + // CS-11043. Tear down all puppeteer pages for an affinity so the + // next render starts on a fresh page whose host-app Loader has no + // cached module bytes from before a publish-time FS swap. Called + // by the publish-realm handler via RemotePrerenderer; the manager + // routes a fan-out to every server currently assigned the affinity. + router.post('/dispose-affinity', async (ctxt: Koa.Context) => { + try { + let request = await fetchRequestFromContext(ctxt); + let raw = await request.text(); + let body: any; + try { + body = raw ? JSON.parse(raw) : {}; + } catch (e) { + ctxt.status = 400; + ctxt.body = { + errors: [{ status: 400, message: 'Invalid JSON body' }], + }; + return; + } + let attrs = body?.data?.attributes ?? {}; + let affinityType = attrs.affinityType; + let affinityValue = attrs.affinityValue; + let missing: string[] = []; + if (affinityType !== 'realm' && affinityType !== 'user') { + missing.push('affinityType'); + } + if ( + typeof affinityValue !== 'string' || + affinityValue.trim().length === 0 + ) { + missing.push('affinityValue'); + } + if (missing.length > 0) { + ctxt.status = 400; + ctxt.body = { + errors: [ + { + status: 400, + message: `Missing or invalid attributes: ${missing.join(', ')}`, + }, + ], + }; + return; + } + await prerenderer.disposeAffinity({ + affinityType: affinityType as AffinityType, + affinityValue, + }); + ctxt.status = 204; + } catch (err: any) { + Sentry.captureException(err); + log.error('Unhandled error in /dispose-affinity:', err); + ctxt.status = 500; + ctxt.body = { + errors: [{ status: 500, message: err?.message ?? 'Unknown error' }], + }; + } + }); + app .use((ctxt: Koa.Context, next: Koa.Next) => { if ( diff --git a/packages/realm-server/prerender/remote-prerenderer.ts b/packages/realm-server/prerender/remote-prerenderer.ts index 824e23364c..56aa99532b 100644 --- a/packages/realm-server/prerender/remote-prerenderer.ts +++ b/packages/realm-server/prerender/remote-prerenderer.ts @@ -272,6 +272,52 @@ export function createRemotePrerenderer( ); }, // Release this batch's ownership of an affinity (CS-10758 step 3). + // CS-11043. Called by the publish-realm handler after the FS swap + // to tear down puppeteer pages whose host-app Loaders may have + // cached old module bytes. Routed through the manager so disposal + // fans out to every server currently assigned this affinity. Best- + // effort: a network failure is logged but not rethrown — the + // publish completes regardless; LRU rotation will eventually + // recycle stale pages, the publish handler just doesn't want to + // wait for that. + async disposeAffinity({ affinityType, affinityValue }) { + let endpoint = new URL('dispose-affinity', prerenderURL); + let ac = new AbortController(); + let timer = setTimeout(() => ac.abort(), requestTimeoutMs); + (timer as any).unref?.(); + try { + let response = await fetch(endpoint, { + method: 'POST', + headers: jsonApiHeaders, + body: JSON.stringify({ + data: { + type: 'dispose-affinity-request', + attributes: { affinityType, affinityValue }, + }, + }), + signal: ac.signal, + }); + if (!response.ok) { + log.warn( + `disposeAffinity for ${affinityType}:${affinityValue} returned ${response.status}`, + ); + } + } catch (e) { + if ((e as { name?: string })?.name === 'AbortError') { + log.warn( + `disposeAffinity for ${affinityType}:${affinityValue} timed out after ${requestTimeoutMs}ms`, + ); + } else { + log.warn( + `disposeAffinity for ${affinityType}:${affinityValue} network error:`, + e, + ); + } + } finally { + clearTimeout(timer); + } + }, + // Routed through the manager so the release fans out to every server // currently assigned this affinity (any of which could hold local // ownership from a prior visit). Best-effort: a network-level failure diff --git a/packages/runtime-common/index.ts b/packages/runtime-common/index.ts index 2a0e21c7a6..564ad70f19 100644 --- a/packages/runtime-common/index.ts +++ b/packages/runtime-common/index.ts @@ -488,6 +488,18 @@ export interface Prerenderer { // before invoking since not every Prerenderer implementation participates // in ownership tracking (e.g. test stubs, remote variants on older servers). releaseBatch?(args: ReleaseBatchArgs): Promise; + // Optional: dispose all puppeteer pages for an affinity. Used by the + // publish-realm handler (CS-11043) to evict per-page host-Loader + // caches when the underlying files have been swapped on disk — the + // next render against the affinity then spawns fresh pages that + // fetch modules from the realm-server rather than serving stale + // cached module bytes. Optional because remote/stub Prerenderer + // implementations may not be able to reach into the page pool; + // callers should probe at runtime. + disposeAffinity?(args: { + affinityType: AffinityType; + affinityValue: string; + }): Promise; // Optional: capture a settled card render to a PNG. Optional so test // stubs and older Prerenderer implementations are not forced to // implement it; the screenshot-card worker task diff --git a/packages/runtime-common/realm.ts b/packages/runtime-common/realm.ts index 9798546ead..1f525fd7a3 100644 --- a/packages/runtime-common/realm.ts +++ b/packages/runtime-common/realm.ts @@ -1338,6 +1338,23 @@ export class Realm { this.#transpileCallCount = 0; } + // CS-11043. Bulk-invalidate this realm's in-process byte caches. + // Called by the publish-realm handler after the FS swap, BEFORE the + // reindex enqueues — so that subsequent source reads (which the + // reindex's prerender fans out across many of) bypass any + // pre-swap bytes the realm still has in `#sourceCache` / + // `#moduleCache`. The Phase-3-PR-2 publish flow relies on the + // NodeAdapter file-watcher to pick up the swap, but that's an + // async-event race against the immediately-enqueued reindex; this + // method makes the invalidation synchronous from the publish + // handler's vantage point. Different from `__testOnlyClearCaches` + // in that it does NOT reset the transpile counter (which is + // test-only diagnostic state, unrelated to byte-correctness). + clearLocalCaches(): void { + this.#sourceCache.clear(); + this.#dropAllModuleCacheEntries(); + } + // CS-11029 test seams: tests need to assert "N concurrent same-path // readers triggered exactly one transpile" and "the in-flight slot // released after the shared transpile settled." Exposing the @@ -3373,6 +3390,20 @@ export class Realm { let createdAt = await this.getCreatedTime(handle.path); let defaultHeaders: Record = { 'content-type': inferContentType(handle.path), + // CS-11043. The publish-republish failure mode was Chromium's + // process-level HTTP cache holding stale module bytes across + // publishes — the realm-server previously sent these + // responses with no `Cache-Control` and no `Last-Modified`, + // so Chromium applied heuristic caching and reused old bytes + // even after the on-disk file changed under a republish. + // `no-store` evicts the heuristic-cache vector entirely: + // every source/module fetch from the puppeteer page (and + // any other HTTP consumer) goes back to the realm-server, + // which then serves whichever bytes are current on EFS. + // Cost: no browser cache reuse for unchanged files, but + // these are typically prerendered into `boxel_index.isolated_html` + // by the indexer and not re-fetched per page view anyway. + 'cache-control': 'no-store', ...(createdAt != null ? { 'x-created': formatRFC7231(createdAt * 1000) } : {}),