apify · l2ysho · Mar 18, 2026 · Mar 18, 2026 · Mar 18, 2026 · Mar 19, 2026
diff --git a/.github/workflows/test-integration.yml b/.github/workflows/test-integration.yml
@@ -0,0 +1,70 @@
+name: Integration tests
+
+on:
+    pull_request:
+        branches: [ master, v4 ]
+    workflow_dispatch:
+
+concurrency:
+    group: ${{ github.workflow }}-${{ github.ref }}
+    cancel-in-progress: true
+
+jobs:
+    remote-browser:
+        name: Remote browser integration
+        runs-on: ubuntu-22.04
+
+        # Side-services provide the remote browser and a deterministic HTTP target.
+        services:
+            browserless:
+                image: ghcr.io/browserless/chromium:latest
+                ports:
+                    - 3000:3000
+                env:
+                    CONCURRENT: 4
+                options: >-
+                    --health-cmd "wget -qO- http://localhost:3000/json/version || exit 1"
+                    --health-interval 5s
+                    --health-timeout 5s
+                    --health-retries 12
+            httpbin:
+                # kennethreitz/httpbin is python:3.6-slim and ships without wget/curl,
+                # so no Docker HEALTHCHECK — httpbin starts in <1s and the first test
+                # request will surface any real failure.
+                image: kennethreitz/httpbin:latest
+                ports:
+                    - 8080:80
+
+        steps:
+            -   name: Checkout repository
+                uses: actions/checkout@v6
+
+            -   name: Use Node.js 24
+                uses: actions/setup-node@v6
+                with:
+                    node-version: 24
+                    package-manager-cache: false
+
+            -   name: Turbo cache
+                uses: actions/cache@v5
+                with:
+                    path: .turbo
+                    key: turbo-${{ github.job }}-${{ github.ref_name }}-${{ github.sha }}
+                    restore-keys: |
+                        turbo-${{ github.job }}-${{ github.ref_name }}-
+
+            -   uses: apify/workflows/pnpm-install@main
+
+            # No `playwright install` — these tests connect to remote Browserless
+            # over CDP and never launch a local browser binary.
+
+            -   name: Build
+                run: pnpm ci:build
+
+            -   name: Run integration tests
+                run: pnpm test:integration
+                env:
+                    BROWSERLESS_URL: http://localhost:3000
+                    HTTPBIN_URL: http://httpbin
+                    CRAWLEE_DIFFICULT_TESTS: 1
+                    RETRY_TESTS: 1
diff --git a/docs/guides/remote_browser.mdx b/docs/guides/remote_browser.mdx
@@ -0,0 +1,70 @@
+---
+id: remote-browser
+title: "Remote browser services"
+sidebar_label: "Remote browsers"
+description: Connect Crawlee crawlers to remote browser services like Browserbase, Browserless, or Steel.
+---
+
+import ApiLink from '@site/src/components/ApiLink';
+import CodeBlock from '@theme/CodeBlock';
+
+import RemoteBrowserConfigSource from '!!raw-loader!./remote_browser_config.ts';
+import RemoteBrowserProviderSource from '!!raw-loader!./remote_browser_provider.ts';
+import RemoteBrowserPuppeteerSource from '!!raw-loader!./remote_browser_puppeteer.ts';
+
+Instead of launching a local browser, Crawlee can connect to a remote browser service like [Browserbase](https://browserbase.com/), [Browserless](https://browserless.io/), [Steel](https://steel.dev/), or any service that exposes a WebSocket/CDP endpoint. The crawler manages session rotation and the request lifecycle the same way it does locally — only the browser itself runs elsewhere.
+
+Use this when you need IPs in specific regions, want to offload CPU/memory from your runner, or need stealth features the service provides.
+
+## How it works
+
+Set the crawler's `remoteBrowser` option with the connection details. The crawler builds a <ApiLink to="browser-pool/class/RemoteBrowserPool">`RemoteBrowserPool`</ApiLink> around its own browser plugin, so the connection is always for the matching browser — there's no plugin to construct and no way to mismatch the pool with the crawler. The pool (an <ApiLink to="browser-pool/interface/IBrowserPool">`IBrowserPool`</ApiLink> wrapping the regular <ApiLink to="browser-pool/class/BrowserPool">`BrowserPool`</ApiLink>) owns everything remote: resolving the endpoint, releasing sessions when browsers close, and capping how many remote browsers run at once.
+
+## Basic usage
+
+The simplest form is a static connection URL. Use this when the service exposes a single endpoint and doesn't need per-session setup.
+
+<CodeBlock language="ts" title="src/main.ts">{RemoteBrowserConfigSource}</CodeBlock>
+
+`endpoint` can also be a function returning `{ url, context }`, called once per browser launch. Pair it with a `release` callback (it receives the `context`) to clean up sessions on the service side when the browser closes, crashes, or the pool is destroyed.
+
+`maxOpenBrowsers` caps the number of concurrent remote browsers — set it to the service's concurrent-session limit to avoid 429 errors. The pool enforces it inside `newPage()`, which waits for a free slot rather than overshooting.
+
+### Self-hosted
+
+Some services ship a Docker image you can run locally or on your own infrastructure. For example, [Browserless](https://www.browserless.io/) has an open-source Chromium image:
+
+```bash
+docker run -p 3000:3000 -e CONCURRENT=4 ghcr.io/browserless/chromium
+```
+
+Point the pool at the local endpoint with `endpoint: 'ws://localhost:3000'`.
+
+## Custom provider
+
+For services with a session-create / session-release lifecycle, extend <ApiLink to="browser-pool/class/RemoteBrowserProvider">`RemoteBrowserProvider`</ApiLink> and pass the instance as the pool's `endpoint`. `connect()` runs once per browser launch and returns the connection URL plus an optional `context` object passed back to `release()`. `maxOpenBrowsers` set on the provider is adopted by the pool.
+
+<CodeBlock language="ts" title="src/main.ts">{RemoteBrowserProviderSource}</CodeBlock>
+
+## Puppeteer
+
+<ApiLink to="puppeteer-crawler/class/PuppeteerCrawler">`PuppeteerCrawler`</ApiLink> works the same way — build the pool with a `PuppeteerPlugin`. Puppeteer connects over CDP:
+
+<CodeBlock language="ts" title="src/main.ts">{RemoteBrowserPuppeteerSource}</CodeBlock>
+
+For Playwright you can choose the protocol via the `remoteBrowser.connection.protocol` option: `'cdp'` (default, `connectOverCDP()`) or `'playwright'` (`connect()`, Playwright's own WebSocket protocol).
+
+## Sharing a pool across crawlers
+
+`remoteBrowser` builds a pool the crawler owns and tears down. To share one remote pool across multiple crawlers, construct a <ApiLink to="browser-pool/class/RemoteBrowserPool">`RemoteBrowserPool`</ApiLink> yourself and pass it as the `browserPool` option instead — a pool supplied that way is never destroyed by the crawler, so you control its lifecycle. Use `remoteBrowser` *or* `browserPool`, not both.
+
+## Limitations
+
+- **`headless` and `launchOptions` don't apply.** The remote service controls headless mode and browser flags; configure them on the service side.
+- **`useIncognitoPages` is forced to `true`** for Playwright remote connections — `connect()` / `connectOverCDP()` don't accept persistent contexts. For state shared across requests, use the `SessionPool`.
+- **`userDataDir` has no effect** — there's no local profile when the browser runs remotely. Use the service's persistence API (e.g. Browserbase Contexts, Steel Profiles).
+
+## Further reading
+
+- <ApiLink to="browser-pool/class/RemoteBrowserPool">`RemoteBrowserPool` API reference</ApiLink>
+- <ApiLink to="browser-pool/class/RemoteBrowserProvider">`RemoteBrowserProvider` API reference</ApiLink>
diff --git a/docs/guides/remote_browser_config.ts b/docs/guides/remote_browser_config.ts
@@ -0,0 +1,19 @@
+import { PlaywrightCrawler } from 'crawlee';
+
+const token = process.env.BROWSERLESS_TOKEN!;
+
+const crawler = new PlaywrightCrawler({
+    // Connect to a remote browser instead of launching locally. The crawler builds the right
+    // pool for its browser — you only supply the connection details.
+    remoteBrowser: {
+        endpoint: `wss://production-sfo.browserless.io?token=${token}`,
+        // Optional — respect the service's concurrent session limit.
+        maxOpenBrowsers: 5,
+    },
+    async requestHandler({ page, request, log }) {
+        const title = await page.title();
+        log.info(`${request.loadedUrl} — "${title}"`);
+    },
+});
+
+await crawler.run(['https://crawlee.dev']);
diff --git a/docs/guides/remote_browser_provider.ts b/docs/guides/remote_browser_provider.ts
@@ -0,0 +1,46 @@
+import { RemoteBrowserProvider } from '@crawlee/browser-pool';
+import { PlaywrightCrawler } from 'crawlee';
+
+const apiKey = process.env.BROWSERBASE_API_KEY!;
+const projectId = process.env.BROWSERBASE_PROJECT_ID!;
+
+class BrowserbaseProvider extends RemoteBrowserProvider<{ id: string }> {
+    // Respect the service's concurrent session limit to avoid 429s.
+    override maxOpenBrowsers = 5;
+
+    async connect() {
+        const response = await fetch('https://api.browserbase.com/v1/sessions', {
+            method: 'POST',
+            headers: { 'x-bb-api-key': apiKey, 'Content-Type': 'application/json' },
+            body: JSON.stringify({ projectId }),
+        });
+
+        if (!response.ok) {
+            throw new Error(`Failed to create session: ${response.status} ${response.statusText}`);
+        }
+
+        const session = (await response.json()) as { id: string; connectUrl: string };
+        return { url: session.connectUrl, context: { id: session.id } };
+    }
+
+    override async release({ id }: { id: string }) {
+        await fetch(`https://api.browserbase.com/v1/sessions/${id}`, {
+            method: 'POST',
+            headers: { 'x-bb-api-key': apiKey, 'Content-Type': 'application/json' },
+            body: JSON.stringify({ status: 'REQUEST_RELEASE' }),
+        });
+    }
+}
+
+const crawler = new PlaywrightCrawler({
+    // Pass the provider as the `endpoint`; the crawler's pool calls connect()/release() per browser.
+    remoteBrowser: {
+        endpoint: new BrowserbaseProvider(),
+    },
+    async requestHandler({ page, request, log }) {
+        const title = await page.title();
+        log.info(`${request.loadedUrl} — "${title}"`);
+    },
+});
+
+await crawler.run(['https://crawlee.dev']);
diff --git a/docs/guides/remote_browser_puppeteer.ts b/docs/guides/remote_browser_puppeteer.ts
@@ -0,0 +1,16 @@
+import { PuppeteerCrawler } from 'crawlee';
+
+const token = process.env.BROWSERLESS_TOKEN!;
+
+const crawler = new PuppeteerCrawler({
+    // PuppeteerCrawler connects over CDP. Same `remoteBrowser` option, matching browser guaranteed.
+    remoteBrowser: {
+        endpoint: `wss://production-sfo.browserless.io?token=${token}`,
+    },
+    async requestHandler({ page, request, log }) {
+        const title = await page.title();
+        log.info(`${request.loadedUrl} — "${title}"`);
+    },
+});
+
+await crawler.run(['https://crawlee.dev']);
diff --git a/package.json b/package.json
@@ -37,6 +37,9 @@
         "ci:build": "turbo run build --filter=./packages/* --cache-dir=\".turbo\" && node ./scripts/typescript_fixes.mjs",
         "test": "vitest run --silent",
         "test:e2e": "node test/e2e/run.mjs",
+        "test:integration": "cross-env CRAWLEE_DIFFICULT_TESTS=1 vitest run --silent=true test/integration",
+        "test:integration:services:up": "docker network create crawlee-it 2>/dev/null; docker run -d --rm --name crawlee-it-browserless --network crawlee-it -p 3000:3000 -e CONCURRENT=4 ghcr.io/browserless/chromium && docker run -d --rm --name crawlee-it-httpbin --network crawlee-it --network-alias httpbin -p 8080:80 kennethreitz/httpbin",
+        "test:integration:services:down": "docker stop crawlee-it-browserless crawlee-it-httpbin; docker network rm crawlee-it 2>/dev/null; true",
         "test:full": "cross-env CRAWLEE_DIFFICULT_TESTS=1 vitest run --silent",
         "tsc-check-tests": "tsc --noEmit --project test/tsconfig.json",
         "coverage": "vitest --coverage",

diff --git a/packages/browser-crawler/src/internals/browser-crawler.ts b/packages/browser-crawler/src/internals/browser-crawler.ts
@@ -34,10 +34,11 @@ import type {
     BrowserPoolHooks,
     BrowserPoolOptions,
     CommonPage,
+    CrawlerRemoteBrowserOptions,
     InferBrowserPluginArray,
     LaunchContext,
 } from '@crawlee/browser-pool';
-import { BrowserPool } from '@crawlee/browser-pool';
+import { BrowserPool, RemoteBrowserPool } from '@crawlee/browser-pool';
 import type { BatchAddRequestsResult, Cookie as CookieObject, IBrowserPool, ISession } from '@crawlee/types';
 import type { RobotsTxtFile } from '@crawlee/utils';
 import { CLOUDFLARE_RETRY_CSS_SELECTORS, RETRY_CSS_SELECTORS, sleep } from '@crawlee/utils';
@@ -123,6 +124,19 @@ export interface BrowserCrawlerOptions<
      */
     browserPool?: IBrowserPool<Page>;
 
+    /**
+     * Connect to a remote browser service (Browserbase, Browserless, Steel, …) instead of launching locally.
+     *
+     * The crawler builds a {@apilink RemoteBrowserPool} around its own browser plugin, so the connection is
+     * always for the right browser — there is no plugin to construct and no way to mismatch the pool with the
+     * crawler. Supply the connection details only: a static `endpoint` URL, a function returning one per launch,
+     * or a {@apilink RemoteBrowserProvider}.
+     *
+     * Mutually exclusive with `browserPool`. For sharing a remote pool across crawlers, construct a
+     * {@apilink RemoteBrowserPool} yourself and pass it as `browserPool` instead.
+     */
+    remoteBrowser?: CrawlerRemoteBrowserOptions;
+
     /**
      * Function that is called to process each request.
      *
@@ -322,12 +336,11 @@ export abstract class BrowserCrawler<
     browserPool: IBrowserPool<Page>;
 
     /**
-     * Set when the crawler constructed its own {@apilink BrowserPool} (no `browserPool` option was provided).
-     * Holds the same instance as `browserPool`, but typed as the concrete class so the crawler can call
-     * lifecycle methods (`destroy`) that aren't part of {@apilink IBrowserPool}. A user-supplied pool is
-     * never owned and never torn down by the crawler.
+     * Set when the crawler constructed its own pool (a {@apilink BrowserPool}, or a {@apilink RemoteBrowserPool}
+     * built from the `remoteBrowser` option). Holds the same instance as `browserPool` but is the only reference
+     * the crawler tears down — a user-supplied `browserPool` is never owned and never destroyed by the crawler.
      */
-    private ownedBrowserPool?: BrowserPool<InternalBrowserPoolOptions>;
+    private ownedBrowserPool?: { destroy: () => Promise<void> };
 
     launchContext: BrowserLaunchContext<LaunchOptions, unknown>;
 
@@ -349,6 +362,7 @@ export abstract class BrowserCrawler<
         launchContext: ow.optional.object,
         headless: ow.optional.any(ow.boolean, ow.string),
         browserPool: ow.optional.object.validate(validators.browserPool),
+        remoteBrowser: ow.optional.object,
         browserPoolOptions: ow.optional.object,
         saveResponseCookies: ow.optional.boolean,
         proxyConfiguration: ow.optional.object.validate(validators.proxyConfiguration),
@@ -368,6 +382,7 @@ export abstract class BrowserCrawler<
             saveResponseCookies = true,
             launchContext = {},
             browserPool,
+            remoteBrowser,
             browserPoolOptions,
             preNavigationHooks = [],
             postNavigationHooks = [],
@@ -422,6 +437,13 @@ export abstract class BrowserCrawler<
 
         this.saveResponseCookies = saveResponseCookies;
 
+        if (browserPool && remoteBrowser) {
+            throw new Error(
+                "Set at most one of 'browserPool' and 'remoteBrowser'. To share a remote pool across crawlers, " +
+                    'build a RemoteBrowserPool yourself and pass it as `browserPool`.',
+            );
+        }
+
         if (browserPool) {
             this.browserPool = browserPool;
             return;
@@ -435,10 +457,25 @@ export abstract class BrowserCrawler<
             resolvedBrowserPoolOptions.useFingerprints = false;
         }
 
-        this.ownedBrowserPool = new BrowserPool<InternalBrowserPoolOptions>({
+        if (remoteBrowser) {
+            // The crawler already built the right plugin for its browser — hand it to a RemoteBrowserPool so the
+            // remote connection is always for the matching browser (no plugin to construct, no way to mismatch).
+            const { browserPlugins, ...remoteBrowserPoolOptions } = resolvedBrowserPoolOptions;
+            const remotePool = new RemoteBrowserPool({
+                browserPlugins: browserPlugins as BrowserPlugin[],
+                ...remoteBrowser,
+                browserPoolOptions: remoteBrowserPoolOptions as any,
+            });
+            this.ownedBrowserPool = remotePool;
+            this.browserPool = remotePool as IBrowserPool<Page>;
+            return;
+        }
+
+        const ownedBrowserPool = new BrowserPool<InternalBrowserPoolOptions>({
             ...(resolvedBrowserPoolOptions as any),
         });
-        this.browserPool = this.ownedBrowserPool as IBrowserPool<Page>;
+        this.ownedBrowserPool = ownedBrowserPool;
+        this.browserPool = ownedBrowserPool as IBrowserPool<Page>;
     }
 
     protected override buildContextPipeline(): ContextPipeline<

diff --git a/packages/browser-pool/src/abstract-classes/browser-controller.ts b/packages/browser-pool/src/abstract-classes/browser-controller.ts
@@ -208,6 +208,7 @@ export abstract class BrowserController<
             this.log.debug(`Could not close browser.\nCause: ${(error as Error).message}`, { id: this.id });
         }
 
+        await this._releaseRemoteBrowser();
         this.emit(BROWSER_CONTROLLER_EVENTS.BROWSER_CLOSED, this);
 
         setTimeout(() => {
@@ -225,9 +226,25 @@ export abstract class BrowserController<
     async kill(): Promise<void> {
         await this.hasBrowserPromise;
         await this._kill();
+        await this._releaseRemoteBrowser();
         this.emit(BROWSER_CONTROLLER_EVENTS.BROWSER_CLOSED, this);
     }
 
+    /**
+     * Releases the remote browser session (if this controller serves a remote browser) via the plugin's
+     * {@apilink RemoteConnection}. Safe to call multiple times — the token is cleared after the first call
+     * and the pool's registry also dedupes, so `release()` fires at most once across close()/kill().
+     */
+    private async _releaseRemoteBrowser(): Promise<void> {
+        const token = this.launchContext?._remoteToken;
+        if (token === undefined) return;
+
+        // Clear so release only fires once (close() schedules kill() after a timeout).
+        this.launchContext._remoteToken = undefined;
+
+        await this.browserPlugin.remoteConnection?.release(token);
+    }
+
     /**
      * Opens new browser page.
      * @ignore