diff --git a/.github/actions/cargo-build-macos-binary/action.yml b/.github/actions/cargo-build-macos-binary/action.yml
index 53e9df53dc3..4db9ce46198 100644
--- a/.github/actions/cargo-build-macos-binary/action.yml
+++ b/.github/actions/cargo-build-macos-binary/action.yml
@@ -60,7 +60,7 @@ runs:
         path: ./${{ env.ASSET_FULL_NAME }}.tar.gz
         retention-days: 3
     - name: Deploy archive to GitHub release
-      uses: quickwit-inc/upload-to-github-release@v1
+      uses: quickwit-inc/upload-to-github-release@9b2c40fba23bf8dea05b7d2eece24cbc95d4a190
       env:
         GITHUB_TOKEN: ${{ inputs.token }}
       with:
diff --git a/.github/actions/cross-build-binary/action.yml b/.github/actions/cross-build-binary/action.yml
index e5b855154ea..0f23b17aa22 100644
--- a/.github/actions/cross-build-binary/action.yml
+++ b/.github/actions/cross-build-binary/action.yml
@@ -56,7 +56,7 @@ runs:
         path: ./${{ env.ASSET_FULL_NAME }}.tar.gz
         retention-days: 3
     - name: Upload archive
-      uses: quickwit-inc/upload-to-github-release@v1
+      uses: quickwit-inc/upload-to-github-release@9b2c40fba23bf8dea05b7d2eece24cbc95d4a190
       env:
         GITHUB_TOKEN: ${{ inputs.token }}
       with:
diff --git a/.github/workflows/publish_lambda.yaml b/.github/workflows/publish_lambda.yaml
new file mode 100644
index 00000000000..80b5386f0bf
--- /dev/null
+++ b/.github/workflows/publish_lambda.yaml
@@ -0,0 +1,78 @@
+# This workflow creates a new release for a quickwit search aws lambda.
+# The artifact is a zip file containing a binary for ARM 64,
+# ready to be deployed by the deployer.
+#
+# See quickwit-lambda-client/README.md
+name: Release Lambda binary
+
+on:
+  push:
+    tags:
+      - 'lambda-*'
+  workflow_dispatch:
+    inputs:
+      version:
+        description: 'Version tag (e.g., v0.8.0)'
+        required: false
+        default: 'dev'
+
+permissions:
+  contents: read
+
+jobs:
+  build-lambda:
+    name: Build Lambda ARM64
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write
+      actions: write
+    steps:
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
+
+      - name: Set version
+        run: |
+          if [ "${{ github.ref_type }}" = "tag" ]; then
+            # Extract version from tag (e.g., lambda-v0.8.0 -> v0.8.0)
+            echo "ASSET_VERSION=${GITHUB_REF_NAME#lambda-}" >> $GITHUB_ENV
+          elif [ -n "${{ github.event.inputs.version }}" ] && [ "${{ github.event.inputs.version }}" != "dev" ]; then
+            echo "ASSET_VERSION=${{ github.event.inputs.version }}" >> $GITHUB_ENV
+          else
+            echo "ASSET_VERSION=dev-$(git rev-parse --short HEAD)" >> $GITHUB_ENV
+          fi
+
+      - name: Install rustup
+        run: curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain none -y
+
+      - name: Install cross
+        run: cargo install cross
+
+      - name: Retrieve and export commit date, hash, and tags
+        run: |
+          echo "QW_COMMIT_DATE=$(TZ=UTC0 git log -1 --format=%cd --date=format-local:%Y-%m-%dT%H:%M:%SZ)" >> $GITHUB_ENV
+          echo "QW_COMMIT_HASH=$(git rev-parse HEAD)" >> $GITHUB_ENV
+          echo "QW_COMMIT_TAGS=$(git tag --points-at HEAD | tr '\n' ',')" >> $GITHUB_ENV
+
+      - name: Build Lambda binary
+        run: cross build --release --features lambda-release --target aarch64-unknown-linux-gnu -p quickwit-lambda-server --bin quickwit-aws-lambda-leaf-search
+        env:
+          QW_COMMIT_DATE: ${{ env.QW_COMMIT_DATE }}
+          QW_COMMIT_HASH: ${{ env.QW_COMMIT_HASH }}
+          QW_COMMIT_TAGS: ${{ env.QW_COMMIT_TAGS }}
+        working-directory: ./quickwit
+
+      - name: Create Lambda zip
+        run: |
+          cd quickwit/target/aarch64-unknown-linux-gnu/release
+          cp quickwit-aws-lambda-leaf-search bootstrap
+          zip quickwit-aws-lambda-${{ env.ASSET_VERSION }}-aarch64.zip bootstrap
+          mv quickwit-aws-lambda-${{ env.ASSET_VERSION }}-aarch64.zip ../../../../
+
+      - name: Upload to GitHub release
+        uses: quickwit-inc/upload-to-github-release@9b2c40fba23bf8dea05b7d2eece24cbc95d4a190
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        with:
+          file: quickwit-aws-lambda-${{ env.ASSET_VERSION }}-aarch64.zip
+          overwrite: true
+          draft: true
+          tag_name: ${{ env.ASSET_VERSION }}
diff --git a/LICENSE-3rdparty.csv b/LICENSE-3rdparty.csv
index 594ac812cf7..b5c3eaf3c34 100644
--- a/LICENSE-3rdparty.csv
+++ b/LICENSE-3rdparty.csv
@@ -21,6 +21,8 @@ arrayvec,https://github.com/bluss/arrayvec,MIT OR Apache-2.0,bluss
 assert-json-diff,https://github.com/davidpdrsn/assert-json-diff,MIT,David Pedersen <david.pdrsn@gmail.com>
 async-compression,https://github.com/Nullus157/async-compression,MIT OR Apache-2.0,"Wim Looman <wim@nemo157.com>, Allen Bui <fairingrey@gmail.com>"
 async-speed-limit,https://github.com/tikv/async-speed-limit,MIT OR Apache-2.0,The TiKV Project Developers
+async-stream,https://github.com/tokio-rs/async-stream,MIT,Carl Lerche <me@carllerche.com>
+async-stream-impl,https://github.com/tokio-rs/async-stream,MIT,Carl Lerche <me@carllerche.com>
 async-trait,https://github.com/dtolnay/async-trait,MIT OR Apache-2.0,David Tolnay <dtolnay@gmail.com>
 atomic-waker,https://github.com/smol-rs/atomic-waker,Apache-2.0 OR MIT,"Stjepan Glavina <stjepang@gmail.com>, Contributors to futures-rs"
 aws-config,https://github.com/smithy-lang/smithy-rs,Apache-2.0,"AWS Rust SDK Team <aws-sdk-rust@amazon.com>, Russell Cohen <rcoh@amazon.com>"
@@ -28,6 +30,7 @@ aws-credential-types,https://github.com/smithy-lang/smithy-rs,Apache-2.0,AWS Rus
 aws-lc-rs,https://github.com/aws/aws-lc-rs,ISC AND (Apache-2.0 OR ISC),AWS-LibCrypto
 aws-lc-sys,https://github.com/aws/aws-lc-rs,ISC AND (Apache-2.0 OR ISC) AND OpenSSL,AWS-LC
 aws-runtime,https://github.com/smithy-lang/smithy-rs,Apache-2.0,AWS Rust SDK Team <aws-sdk-rust@amazon.com>
+aws-sdk-lambda,https://github.com/awslabs/aws-sdk-rust,Apache-2.0,"AWS Rust SDK Team <aws-sdk-rust@amazon.com>, Russell Cohen <rcoh@amazon.com>"
 aws-sdk-s3,https://github.com/awslabs/aws-sdk-rust,Apache-2.0,"AWS Rust SDK Team <aws-sdk-rust@amazon.com>, Russell Cohen <rcoh@amazon.com>"
 aws-sdk-sso,https://github.com/awslabs/aws-sdk-rust,Apache-2.0,"AWS Rust SDK Team <aws-sdk-rust@amazon.com>, Russell Cohen <rcoh@amazon.com>"
 aws-sdk-ssooidc,https://github.com/awslabs/aws-sdk-rust,Apache-2.0,"AWS Rust SDK Team <aws-sdk-rust@amazon.com>, Russell Cohen <rcoh@amazon.com>"
@@ -222,6 +225,8 @@ itoa,https://github.com/dtolnay/itoa,MIT OR Apache-2.0,David Tolnay <dtolnay@gma
 jobserver,https://github.com/rust-lang/jobserver-rs,MIT OR Apache-2.0,Alex Crichton <alex@alexcrichton.com>
 js-sys,https://github.com/wasm-bindgen/wasm-bindgen/tree/master/crates/js-sys,MIT OR Apache-2.0,The wasm-bindgen Developers
 json_comments,https://github.com/tmccombs/json-comments-rs,Apache-2.0,Thayne McCombs <astrothayne@gmail.com>
+lambda_runtime,https://github.com/awslabs/aws-lambda-rust-runtime,Apache-2.0,"David Calavera <dcalaver@amazon.com>, Harold Sun <sunhua@amazon.com>"
+lambda_runtime_api_client,https://github.com/awslabs/aws-lambda-rust-runtime,Apache-2.0,"David Calavera <dcalaver@amazon.com>, Harold Sun <sunhua@amazon.com>"
 lazy_static,https://github.com/rust-lang-nursery/lazy-static.rs,MIT OR Apache-2.0,Marvin Löbel <loebel.marvin@gmail.com>
 levenshtein_automata,https://github.com/tantivy-search/levenshtein-automata,MIT,Paul Masurel <paul.masurel@gmail.com>
 libc,https://github.com/rust-lang/libc,MIT OR Apache-2.0,The Rust Project Developers
@@ -397,6 +402,7 @@ serde_core,https://github.com/serde-rs/serde,MIT OR Apache-2.0,"Erick Tryzelaar
 serde_derive,https://github.com/serde-rs/serde,MIT OR Apache-2.0,"Erick Tryzelaar <erick.tryzelaar@gmail.com>, David Tolnay <dtolnay@gmail.com>"
 serde_json,https://github.com/serde-rs/json,MIT OR Apache-2.0,"Erick Tryzelaar <erick.tryzelaar@gmail.com>, David Tolnay <dtolnay@gmail.com>"
 serde_json_borrow,https://github.com/PSeitz/serde_json_borrow,MIT,Pascal Seitz <pascal.seitz@gmail.com>
+serde_path_to_error,https://github.com/dtolnay/path-to-error,MIT OR Apache-2.0,David Tolnay <dtolnay@gmail.com>
 serde_qs,https://github.com/samscott89/serde_qs,MIT OR Apache-2.0,Sam Scott <sam@osohq.com>
 serde_spanned,https://github.com/toml-rs/toml,MIT OR Apache-2.0,The serde_spanned Authors
 serde_urlencoded,https://github.com/nox/serde_urlencoded,MIT OR Apache-2.0,Anthony Ramine <n.oxyde@gmail.com>
@@ -497,9 +503,11 @@ unicode-width,https://github.com/unicode-rs/unicode-width,MIT OR Apache-2.0,"kwa
 unit-prefix,https://codeberg.org/commons-rs/unit-prefix,MIT,"Fabio Valentini <decathorpe@gmail.com>, Benjamin Sago <ogham@bsago.me>"
 unsafe-libyaml,https://github.com/dtolnay/unsafe-libyaml,MIT,David Tolnay <dtolnay@gmail.com>
 untrusted,https://github.com/briansmith/untrusted,ISC,Brian Smith <brian@briansmith.org>
+ureq-proto,https://github.com/algesten/ureq-proto,MIT OR Apache-2.0,Martin Algesten <martin@algesten.se>
 url,https://github.com/servo/rust-url,MIT OR Apache-2.0,The rust-url developers
 urlencoding,https://github.com/kornelski/rust_urlencoding,MIT,"Kornel <kornel@geekhood.net>, Bertram Truong <b@bertramtruong.com>"
 username,https://pijul.org/darcs/user,MIT OR Apache-2.0,Pierre-Étienne Meunier <pierre-etienne.meunier@aalto.fi>
+utf-8,https://github.com/SimonSapin/rust-utf8,MIT OR Apache-2.0,Simon Sapin <simon.sapin@exyr.org>
 utf8-ranges,https://github.com/BurntSushi/utf8-ranges,Unlicense OR MIT,Andrew Gallant <jamslam@gmail.com>
 utf8_iter,https://github.com/hsivonen/utf8_iter,Apache-2.0 OR MIT,Henri Sivonen <hsivonen@hsivonen.fi>
 utf8parse,https://github.com/alacritty/vte,Apache-2.0 OR MIT,"Joe Wilm <joe@jwilm.com>, Christian Duerr <contact@christianduerr.com>"
diff --git a/docs/configuration/lambda-config.md b/docs/configuration/lambda-config.md
new file mode 100644
index 00000000000..4f522da4b35
--- /dev/null
+++ b/docs/configuration/lambda-config.md
@@ -0,0 +1,240 @@
+---
+title: Lambda configuration
+sidebar_position: 6
+---
+
+Quickwit supports offloading leaf search operations to AWS Lambda for horizontal scaling. When the local search queue becomes saturated, overflow splits are automatically sent to Lambda functions for processing.
+
+:::note
+Lambda offloading is currently only supported on AWS.
+:::
+
+## How it works
+
+Lambda offloading is **only active when a `lambda` configuration section is present** under `searcher` in your node configuration. When configured:
+
+1. Quickwit monitors the local search queue depth
+2. When pending searches exceed the `offload_threshold`, new splits are sent to Lambda instead of being queued locally
+3. Lambda returns per-split search results that are cached and merged with local results
+
+This allows Quickwit to handle traffic spikes without provisioning additional searcher nodes.
+
+## Startup validation
+
+When a `lambda` configuration is defined, Quickwit performs a **dry run invocation** at startup to verify that:
+- The Lambda function exists
+- The function version matches the embedded binary
+- The invoker has permission to call the function
+
+If this validation fails, **Quickwit will fail to start**. This ensures that Lambda offloading works correctly before the node begins serving traffic.
+
+## Configuration
+
+Add a `lambda` section under `searcher` in your node configuration:
+
+```yaml
+searcher:
+  lambda:
+    offload_threshold: 100
+    auto_deploy:
+      execution_role_arn: arn:aws:iam::123456789012:role/quickwit-lambda-role
+      memory_size: 5 GiB
+      invocation_timeout_secs: 15
+```
+
+### Lambda configuration options
+
+| Property | Description | Default value |
+| --- | --- | --- |
+| `function_name` | Name of the AWS Lambda function to invoke. | `quickwit-lambda-search` |
+| `max_splits_per_invocation` | Maximum number of splits to send in a single Lambda invocation. Must be at least 1. | `10` |
+| `offload_threshold` | Number of pending local searches before offloading to Lambda. A value of `0` offloads everything to Lambda. | `100` |
+| `auto_deploy` | Auto-deployment configuration. If set, Quickwit automatically deploys or updates the Lambda function at startup. | (none) |
+
+### Auto-deploy configuration options
+
+| Property | Description | Default value |
+| --- | --- | --- |
+| `execution_role_arn` | **Required.** IAM role ARN for the Lambda function's execution role. | |
+| `memory_size` | Memory allocated to the Lambda function. More memory provides more CPU. | `5 GiB` |
+| `invocation_timeout_secs` | Timeout for Lambda invocations in seconds. | `15` |
+
+## Deployment options
+
+### Automatic deployment (recommended)
+
+With `auto_deploy` configured, Quickwit automatically:
+1. Creates the Lambda function if it doesn't exist
+2. Updates the function code if the embedded binary has changed
+3. Publishes a new version with a unique identifier
+4. Garbage collects old versions (keeps current + 5 most recent)
+
+This is the recommended approach as it ensures the Lambda function always matches the Quickwit binary version.
+
+### Manual deployment
+
+You can deploy the Lambda function manually without `auto_deploy`:
+1. Download the Lambda zip from [GitHub releases](https://github.com/quickwit-oss/quickwit/releases)
+2. Create or update the Lambda function using AWS CLI, Terraform, or the AWS Console
+3. Publish a version with description format `quickwit_{version}_{sha256}_{timeout}_{deploy_config}"` (e.g., `quickwit_0_8_0_fa940f44_5120_60s_6c3b2`)
+
+The description must match the format Quickwit expects, or it won't find the function version.
+
+## IAM permissions
+
+### Permissions for the Quickwit node
+
+The IAM role or user running Quickwit needs the following permissions to invoke Lambda:
+
+```json
+{
+  "Version": "2012-10-17",
+  "Statement": [
+    {
+      "Effect": "Allow",
+      "Action": [
+        "lambda:InvokeFunction"
+      ],
+      "Resource": "arn:aws:lambda:*:*:function:quickwit-lambda-search:*"
+    }
+  ]
+}
+```
+
+If using `auto_deploy`, additional permissions are required for deployment:
+
+```json
+{
+  "Version": "2012-10-17",
+  "Statement": [
+    {
+      "Effect": "Allow",
+      "Action": [
+        "lambda:CreateFunction",
+        "lambda:GetFunction",
+        "lambda:UpdateFunctionCode",
+        "lambda:PublishVersion",
+        "lambda:ListVersionsByFunction",
+        "lambda:DeleteFunction"
+      ],
+      "Resource": "arn:aws:lambda:*:*:function:quickwit-lambda-search"
+    },
+    {
+      "Effect": "Allow",
+      "Action": "iam:PassRole",
+      "Resource": "arn:aws:iam::*:role/quickwit-lambda-role",
+      "Condition": {
+        "StringEquals": {
+          "iam:PassedToService": "lambda.amazonaws.com"
+        }
+      }
+    }
+  ]
+}
+```
+
+### Lambda execution role
+
+The Lambda function requires an execution role with S3 read access to your index data.
+
+Example policy:
+
+```json
+{
+  "Version": "2012-10-17",
+  "Statement": [
+    {
+      "Effect": "Allow",
+      "Action": "s3:GetObject",
+      "Resource": "arn:aws:s3:::your-index-bucket/*"
+    }
+  ]
+}
+```
+
+The execution role must also have a trust policy allowing Lambda to assume it:
+
+```json
+{
+  "Version": "2012-10-17",
+  "Statement": [
+    {
+      "Effect": "Allow",
+      "Principal": {
+        "Service": "lambda.amazonaws.com"
+      },
+      "Action": "sts:AssumeRole"
+    }
+  ]
+}
+```
+
+## CloudWatch logging
+
+The Lambda function emits structured logs (JSON) to stdout. To have these logs captured by CloudWatch, add the following iam permissions to the Lambda execution role:
+
+```json
+{
+  "Version": "2012-10-17",
+  "Statement": [
+    {
+      "Effect": "Allow",
+      "Action": [
+        "logs:CreateLogGroup",
+        "logs:CreateLogStream",
+        "logs:PutLogEvents"
+      ],
+      "Resource": "arn:aws:logs:*:*:*"
+    }
+  ]
+}
+```
+
+No additional configuration is needed on the Quickwit side.
+
+## Versioning
+
+Quickwit uses content-based versioning for Lambda:
+- A SHA256 hash of the Lambda binary is computed at build time
+- This hash is embedded in the Lambda function description as `quickwit:{version}-{sha256_short}`
+- When Quickwit starts, it searches for a version matching this description
+- Different Quickwit builds with the same Lambda binary share the same Lambda version
+- Updating the Lambda binary automatically triggers a new deployment
+
+## Example configuration
+
+
+Minimal configuration (with auto-deployment):
+
+```yaml
+searcher:
+  lambda:
+    auto_deploy:
+      execution_role_arn: arn:aws:iam::123456789012:role/quickwit-lambda-role
+```
+
+
+Full configuration (auto-deployment):
+
+```yaml
+searcher:
+  lambda:
+    function_name: quickwit-lambda-search
+    max_splits_per_invocation: 10
+    offload_threshold: 10
+    auto_deploy:
+      execution_role_arn: arn:aws:iam::123456789012:role/quickwit-lambda-role
+      memory_size: 5 GiB
+      invocation_timeout_secs: 15
+```
+
+Aggressive offloading (send everything to Lambda):
+
+```yaml
+searcher:
+  lambda:
+    function_name: quickwit-lambda-search
+    offload_threshold: 0
+    auto_deploy:
+      execution_role_arn: arn:aws:iam::123456789012:role/quickwit-lambda-role
+```
diff --git a/quickwit/Cargo.lock b/quickwit/Cargo.lock
index 21f1a789bec..7836dd5a74b 100644
--- a/quickwit/Cargo.lock
+++ b/quickwit/Cargo.lock
@@ -532,6 +532,29 @@ dependencies = [
  "tracing",
 ]
 
+[[package]]
+name = "aws-sdk-lambda"
+version = "1.112.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1cad866b2e51c3af758e5c8bb941a8c904262663fde53019aa1c5093172c54d3"
+dependencies = [
+ "aws-credential-types",
+ "aws-runtime",
+ "aws-smithy-async",
+ "aws-smithy-eventstream",
+ "aws-smithy-http 0.62.6",
+ "aws-smithy-json 0.61.9",
+ "aws-smithy-runtime",
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "aws-types",
+ "bytes",
+ "fastrand 2.3.0",
+ "http 0.2.12",
+ "regex-lite",
+ "tracing",
+]
+
 [[package]]
 name = "aws-sdk-s3"
 version = "1.62.0"
@@ -685,9 +708,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-async"
-version = "1.2.7"
+version = "1.2.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9ee19095c7c4dda59f1697d028ce704c24b2d33c6718790c7f1d5a3015b4107c"
+checksum = "3cba48474f1d6807384d06fec085b909f5807e16653c5af5c45dfe89539f0b70"
 dependencies = [
  "futures-util",
  "pin-project-lite",
@@ -771,9 +794,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-http-client"
-version = "1.1.5"
+version = "1.1.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "59e62db736db19c488966c8d787f52e6270be565727236fd5579eaa301e7bc4a"
+checksum = "0709f0083aa19b704132684bc26d3c868e06bd428ccc4373b0b55c3e8748a58b"
 dependencies = [
  "aws-smithy-async",
  "aws-smithy-protocol-test",
@@ -823,6 +846,18 @@ dependencies = [
  "aws-smithy-types",
 ]
 
+[[package]]
+name = "aws-smithy-mocks"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "943deb750d91da9f788024482ff1f3393cefb017d0dc30fc5cecb11e1be15770"
+dependencies = [
+ "aws-smithy-http-client",
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "http 1.4.0",
+]
+
 [[package]]
 name = "aws-smithy-observability"
 version = "0.1.5"
@@ -834,9 +869,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-protocol-test"
-version = "0.63.7"
+version = "0.63.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "01317a9e3c5c06f1af35001ef0c873c1e34e458c20b2ee1eee0fb431e6dbb010"
+checksum = "b59f9305f7863a70f4a0c048fa6d81fb9dd9373a751358791faaad8881c1377f"
 dependencies = [
  "assert-json-diff",
  "aws-smithy-runtime-api",
@@ -888,9 +923,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-runtime-api"
-version = "1.9.3"
+version = "1.11.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ab0d43d899f9e508300e587bf582ba54c27a452dd0a9ea294690669138ae14a2"
+checksum = "8c55e0837e9b8526f49e0b9bfa9ee18ddee70e853f5bc09c5d11ebceddcb0fec"
 dependencies = [
  "aws-smithy-async",
  "aws-smithy-types",
@@ -905,9 +940,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-types"
-version = "1.3.5"
+version = "1.4.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "905cb13a9895626d49cf2ced759b062d913834c7482c38e49557eac4e6193f01"
+checksum = "576b0d6991c9c32bc14fc340582ef148311f924d41815f641a308b5d11e8e7cd"
 dependencies = [
  "base64-simd",
  "bytes",
@@ -4328,6 +4363,55 @@ dependencies = [
  "rustversion",
 ]
 
+[[package]]
+name = "lambda_runtime"
+version = "0.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed49669d6430292aead991e19bf13153135a884f916e68f32997c951af637ebe"
+dependencies = [
+ "async-stream",
+ "base64 0.22.1",
+ "bytes",
+ "futures",
+ "http 1.4.0",
+ "http-body 1.0.1",
+ "http-body-util",
+ "http-serde",
+ "hyper 1.8.1",
+ "hyper-util",
+ "lambda_runtime_api_client",
+ "pin-project",
+ "serde",
+ "serde_json",
+ "serde_path_to_error",
+ "tokio",
+ "tokio-stream",
+ "tower 0.4.13",
+ "tower-layer",
+ "tracing",
+]
+
+[[package]]
+name = "lambda_runtime_api_client"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c90a10f094475a34a04da2be11686c4dcfe214d93413162db9ffdff3d3af293a"
+dependencies = [
+ "bytes",
+ "futures-channel",
+ "futures-util",
+ "http 1.4.0",
+ "http-body 1.0.1",
+ "http-body-util",
+ "hyper 1.8.1",
+ "hyper-util",
+ "tokio",
+ "tower 0.4.13",
+ "tower-service",
+ "tracing",
+ "tracing-subscriber",
+]
+
 [[package]]
 name = "lazy_static"
 version = "1.5.0"
@@ -7009,6 +7093,57 @@ dependencies = [
  "utoipa",
 ]
 
+[[package]]
+name = "quickwit-lambda-client"
+version = "0.8.0"
+dependencies = [
+ "anyhow",
+ "async-trait",
+ "aws-config",
+ "aws-sdk-lambda",
+ "aws-smithy-mocks",
+ "base64 0.22.1",
+ "bytesize",
+ "md5",
+ "once_cell",
+ "prost 0.14.1",
+ "quickwit-common",
+ "quickwit-config",
+ "quickwit-lambda-server",
+ "quickwit-proto",
+ "quickwit-search",
+ "quickwit-storage",
+ "serde_json",
+ "sha2",
+ "tokio",
+ "tracing",
+ "ureq",
+]
+
+[[package]]
+name = "quickwit-lambda-server"
+version = "0.8.0"
+dependencies = [
+ "anyhow",
+ "base64 0.22.1",
+ "bytesize",
+ "lambda_runtime",
+ "openssl",
+ "prost 0.14.1",
+ "quickwit-common",
+ "quickwit-config",
+ "quickwit-doc-mapper",
+ "quickwit-proto",
+ "quickwit-search",
+ "quickwit-storage",
+ "serde",
+ "serde_json",
+ "thiserror 2.0.17",
+ "tokio",
+ "tracing",
+ "tracing-subscriber",
+]
+
 [[package]]
 name = "quickwit-macros"
 version = "0.8.0"
@@ -7260,6 +7395,7 @@ dependencies = [
  "quickwit-ingest",
  "quickwit-jaeger",
  "quickwit-janitor",
+ "quickwit-lambda-client",
  "quickwit-metastore",
  "quickwit-opentelemetry",
  "quickwit-proto",
@@ -10288,6 +10424,35 @@ version = "0.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
 
+[[package]]
+name = "ureq"
+version = "3.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fdc97a28575b85cfedf2a7e7d3cc64b3e11bd8ac766666318003abbacc7a21fc"
+dependencies = [
+ "base64 0.22.1",
+ "flate2",
+ "log",
+ "percent-encoding",
+ "rustls 0.23.36",
+ "rustls-pki-types",
+ "ureq-proto",
+ "utf-8",
+ "webpki-roots 1.0.5",
+]
+
+[[package]]
+name = "ureq-proto"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d81f9efa9df032be5934a46a068815a10a042b494b6a58cb0a1a97bb5467ed6f"
+dependencies = [
+ "base64 0.22.1",
+ "http 1.4.0",
+ "httparse",
+ "log",
+]
+
 [[package]]
 name = "url"
 version = "2.5.8"
@@ -10317,6 +10482,12 @@ dependencies = [
  "winapi 0.2.8",
 ]
 
+[[package]]
+name = "utf-8"
+version = "0.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
+
 [[package]]
 name = "utf8-ranges"
 version = "1.0.5"
diff --git a/quickwit/Cargo.toml b/quickwit/Cargo.toml
index c2e8ec62dc3..3ffdcb2f5a6 100644
--- a/quickwit/Cargo.toml
+++ b/quickwit/Cargo.toml
@@ -19,6 +19,8 @@ members = [
   "quickwit-integration-tests",
   "quickwit-jaeger",
   "quickwit-janitor",
+  "quickwit-lambda-client",
+  "quickwit-lambda-server",
   "quickwit-macros",
   "quickwit-metastore",
 
@@ -56,6 +58,8 @@ default-members = [
   "quickwit-integration-tests",
   "quickwit-jaeger",
   "quickwit-janitor",
+  "quickwit-lambda-client",
+  "quickwit-lambda-server",
   "quickwit-macros",
   "quickwit-metastore",
   "quickwit-opentelemetry",
@@ -142,6 +146,7 @@ hyper-util = { version = "0.1", default-features = false, features = [
 indexmap = { version = "2.12", features = ["serde"] }
 indicatif = "0.18"
 itertools = "0.14"
+lambda_runtime = "0.13"
 json_comments = "0.2"
 libz-sys = "1.1"
 lru = "0.16"
@@ -221,6 +226,7 @@ serde_qs = { version = "0.15" }
 serde_with = "3.16"
 serde_yaml = "0.9"
 serial_test = { version = "3.2", features = ["file_locks"] }
+sha2 = "0.10"
 siphasher = "1.0"
 smallvec = "1"
 sqlx = { version = "0.8", features = [
@@ -283,6 +289,7 @@ tracing-subscriber = { version = "0.3", features = [
 ttl_cache = "0.5"
 typetag = "0.2"
 ulid = "1.2"
+ureq = "3"
 username = "0.2"
 # We cannot upgrade to utoipa 5.0+ due to significant breaking changes:
 # 1. The `OpenApi` struct structure changed (fields are private), breaking our manual merging logic in openapi.rs
@@ -304,8 +311,10 @@ aws-credential-types = { version = "1.2", features = ["hardcoded-credentials"] }
 aws-runtime = "1.5"
 aws-sdk-kinesis = "1.97"
 aws-sdk-s3 = "=1.62"
+aws-sdk-lambda = "1"
 aws-sdk-sqs = "1.91"
 aws-smithy-async = "1.2"
+aws-smithy-mocks = "0.2"
 aws-smithy-http-client = { version = "1.1", features = ["default-client"] }
 aws-smithy-runtime = "1.9"
 aws-smithy-types = { version = "1.3", features = [
@@ -344,6 +353,8 @@ quickwit-ingest = { path = "quickwit-ingest" }
 quickwit-integration-tests = { path = "quickwit-integration-tests" }
 quickwit-jaeger = { path = "quickwit-jaeger" }
 quickwit-janitor = { path = "quickwit-janitor" }
+quickwit-lambda-client = { path = "quickwit-lambda-client" }
+quickwit-lambda-server = { path = "quickwit-lambda-server" }
 quickwit-macros = { path = "quickwit-macros" }
 quickwit-metastore = { path = "quickwit-metastore" }
 quickwit-opentelemetry = { path = "quickwit-opentelemetry" }
diff --git a/quickwit/quickwit-aws/src/lib.rs b/quickwit/quickwit-aws/src/lib.rs
index 31b63d121e2..2dec49509bc 100644
--- a/quickwit/quickwit-aws/src/lib.rs
+++ b/quickwit/quickwit-aws/src/lib.rs
@@ -42,5 +42,5 @@ pub async fn get_aws_config() -> &'static aws_config::SdkConfig {
 
 /// Returns the AWS behavior version.
 pub fn aws_behavior_version() -> BehaviorVersion {
-    BehaviorVersion::v2025_08_07()
+    BehaviorVersion::v2026_01_12()
 }
diff --git a/quickwit/quickwit-config/Cargo.toml b/quickwit/quickwit-config/Cargo.toml
index 7cf75818444..a1877661490 100644
--- a/quickwit/quickwit-config/Cargo.toml
+++ b/quickwit/quickwit-config/Cargo.toml
@@ -43,6 +43,7 @@ quickwit-proto = { workspace = true }
 tokio = { workspace = true }
 
 quickwit-proto = { workspace = true, features = ["testsuite"] }
+quickwit-common = { workspace = true, features = ["testsuite"] }
 
 [features]
 testsuite = []
diff --git a/quickwit/quickwit-config/resources/tests/node_config/quickwit.json b/quickwit/quickwit-config/resources/tests/node_config/quickwit.json
index 01def63b10e..7269b37ae22 100644
--- a/quickwit/quickwit-config/resources/tests/node_config/quickwit.json
+++ b/quickwit/quickwit-config/resources/tests/node_config/quickwit.json
@@ -70,6 +70,16 @@
             "min_throughtput_bytes_per_secs": 100000,
             "timeout_millis": 2000,
             "max_num_retries": 2
+        },
+        "lambda": {
+            "function_name": "quickwit-lambda-leaf-search",
+            "max_splits_per_invocation": 10,
+            "offload_threshold": 30,
+            "auto_deploy": {
+                "execution_role_arn": "arn:aws:iam::123456789012:role/quickwit-lambda-role",
+                "memory_size": "5 GiB",
+                "invocation_timeout_secs": 15
+            }
         }
     },
     "jaeger": {
diff --git a/quickwit/quickwit-config/resources/tests/node_config/quickwit.toml b/quickwit/quickwit-config/resources/tests/node_config/quickwit.toml
index 0b4e0c30229..ea715dcffe0 100644
--- a/quickwit/quickwit-config/resources/tests/node_config/quickwit.toml
+++ b/quickwit/quickwit-config/resources/tests/node_config/quickwit.toml
@@ -62,6 +62,16 @@ min_throughtput_bytes_per_secs = 100000
 timeout_millis = 2000
 max_num_retries = 2
 
+[searcher.lambda]
+function_name = "quickwit-lambda-leaf-search"
+max_splits_per_invocation = 10
+offload_threshold = 30
+
+[searcher.lambda.auto_deploy]
+execution_role_arn = "arn:aws:iam::123456789012:role/quickwit-lambda-role"
+memory_size = "5 GiB"
+invocation_timeout_secs = 15
+
 [jaeger]
 enable_endpoint = true
 lookback_period_hours = 24
diff --git a/quickwit/quickwit-config/resources/tests/node_config/quickwit.yaml b/quickwit/quickwit-config/resources/tests/node_config/quickwit.yaml
index cb16052fbd1..face0852972 100644
--- a/quickwit/quickwit-config/resources/tests/node_config/quickwit.yaml
+++ b/quickwit/quickwit-config/resources/tests/node_config/quickwit.yaml
@@ -64,6 +64,14 @@ searcher:
     min_throughtput_bytes_per_secs: 100000
     timeout_millis: 2000
     max_num_retries: 2
+  lambda:
+    function_name: quickwit-lambda-leaf-search
+    max_splits_per_invocation: 10
+    offload_threshold: 30
+    auto_deploy:
+      execution_role_arn: arn:aws:iam::123456789012:role/quickwit-lambda-role
+      memory_size: 5 GiB
+      invocation_timeout_secs: 15
 
 jaeger:
   enable_endpoint: true
diff --git a/quickwit/quickwit-config/src/lib.rs b/quickwit/quickwit-config/src/lib.rs
index df108b844c0..22cdb2538b4 100644
--- a/quickwit/quickwit-config/src/lib.rs
+++ b/quickwit/quickwit-config/src/lib.rs
@@ -74,8 +74,8 @@ pub use crate::metastore_config::{
 };
 pub use crate::node_config::{
     CacheConfig, CachePolicy, DEFAULT_QW_CONFIG_PATH, GrpcConfig, IndexerConfig, IngestApiConfig,
-    JaegerConfig, KeepAliveConfig, NodeConfig, RestConfig, SearcherConfig, SplitCacheLimits,
-    StorageTimeoutPolicy, TlsConfig,
+    JaegerConfig, KeepAliveConfig, LambdaConfig, LambdaDeployConfig, NodeConfig, RestConfig,
+    SearcherConfig, SplitCacheLimits, StorageTimeoutPolicy, TlsConfig,
 };
 use crate::source_config::serialize::{SourceConfigV0_7, SourceConfigV0_8, VersionedSourceConfig};
 pub use crate::storage_config::{
diff --git a/quickwit/quickwit-config/src/node_config/mod.rs b/quickwit/quickwit-config/src/node_config/mod.rs
index e8c347eb4a5..de2dc3bb4a0 100644
--- a/quickwit/quickwit-config/src/node_config/mod.rs
+++ b/quickwit/quickwit-config/src/node_config/mod.rs
@@ -300,6 +300,85 @@ pub struct SearcherConfig {
     pub storage_timeout_policy: Option<StorageTimeoutPolicy>,
     pub warmup_memory_budget: ByteSize,
     pub warmup_single_split_initial_allocation: ByteSize,
+    /// Lambda configuration for serverless leaf search execution.
+    /// If set, enables Lambda execution for leaf search.
+    ///
+    /// If set, and Quickwit cannot access the Lambda (after a deploy attempt if
+    /// auto deploy is set up), Quickwit will log an error and
+    /// fail on startup.
+    #[serde(default)]
+    pub lambda: Option<LambdaConfig>,
+}
+
+/// Configuration for AWS Lambda leaf search execution.
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+#[serde(deny_unknown_fields)]
+pub struct LambdaConfig {
+    /// AWS Lambda function name.
+    #[serde(default = "LambdaConfig::default_function_name")]
+    pub function_name: String,
+    /// Maximum number of splits per Lambda invocation.
+    #[serde(default = "LambdaConfig::default_max_splits_per_invocation")]
+    pub max_splits_per_invocation: NonZeroUsize,
+    /// Maximum number of splits to process locally before offloading to Lambda.
+    /// When the number of pending split searches exceeds this threshold,
+    /// new splits are offloaded to Lambda instead of being queued locally.
+    /// A value of 0 offloads everything to Lambda.
+    #[serde(default = "LambdaConfig::default_offload_threshold")]
+    pub offload_threshold: usize,
+    /// Auto-deploy configuration. If set, Quickwit will automatically deploy
+    /// the Lambda function at startup.
+    /// If deploying a lambda fails, Quickwit will log an error and fail.
+    #[serde(default)]
+    pub auto_deploy: Option<LambdaDeployConfig>,
+}
+
+impl LambdaConfig {
+    #[cfg(feature = "testsuite")]
+    pub fn for_test() -> Self {
+        Self {
+            function_name: Self::default_function_name(),
+            max_splits_per_invocation: Self::default_max_splits_per_invocation(),
+            offload_threshold: Self::default_offload_threshold(),
+            auto_deploy: None,
+        }
+    }
+
+    fn default_function_name() -> String {
+        "quickwit-lambda-search".to_string()
+    }
+    fn default_max_splits_per_invocation() -> NonZeroUsize {
+        NonZeroUsize::new(10).unwrap()
+    }
+    fn default_offload_threshold() -> usize {
+        100
+    }
+}
+
+/// Configuration for automatic Lambda function deployment.
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+#[serde(deny_unknown_fields)]
+pub struct LambdaDeployConfig {
+    /// IAM execution role ARN for the Lambda function.
+    /// The role only requires GetObject permission to the targeted S3 bucket.
+    pub execution_role_arn: String,
+    /// Memory size for the Lambda function.
+    /// It will be rounded up to the nearest multiple of 1MiB.
+    #[serde(default = "LambdaDeployConfig::default_memory_size")]
+    pub memory_size: ByteSize,
+    /// Timeout for Lambda invocations in seconds.
+    #[serde(default = "LambdaDeployConfig::default_invocation_timeout_secs")]
+    pub invocation_timeout_secs: u64,
+}
+
+impl LambdaDeployConfig {
+    fn default_memory_size() -> ByteSize {
+        // Empirically this implies between 4 and 6 vCPUs.
+        ByteSize::gib(5)
+    }
+    fn default_invocation_timeout_secs() -> u64 {
+        15
+    }
 }
 
 #[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
@@ -316,6 +395,14 @@ pub struct CacheConfig {
 }
 
 impl CacheConfig {
+    pub fn no_cache() -> Self {
+        CacheConfig {
+            capacity: None,
+            policy: None,
+            virtual_caches: Vec::new(),
+        }
+    }
+
     pub fn default_with_capacity(capacity: ByteSize) -> Self {
         CacheConfig {
             capacity: Some(capacity),
@@ -376,7 +463,7 @@ pub enum CachePolicy {
 }
 
 impl std::fmt::Display for CachePolicy {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
         match self {
             CachePolicy::Lru => f.write_str("lru"),
             CachePolicy::S3Fifo => f.write_str("s3-fifo"),
@@ -435,6 +522,7 @@ impl Default for SearcherConfig {
             storage_timeout_policy: None,
             warmup_memory_budget: ByteSize::gb(100),
             warmup_single_split_initial_allocation: ByteSize::gb(1),
+            lambda: None,
         }
     }
 }
diff --git a/quickwit/quickwit-config/src/node_config/serialize.rs b/quickwit/quickwit-config/src/node_config/serialize.rs
index 7d3b5f2c64b..e9b9b745e8b 100644
--- a/quickwit/quickwit-config/src/node_config/serialize.rs
+++ b/quickwit/quickwit-config/src/node_config/serialize.rs
@@ -542,8 +542,8 @@ mod tests {
     use itertools::Itertools;
 
     use super::*;
-    use crate::CacheConfig;
     use crate::storage_config::StorageBackendFlavor;
+    use crate::{CacheConfig, LambdaConfig, LambdaDeployConfig};
 
     fn get_config_filepath(config_filename: &str) -> String {
         format!(
@@ -687,6 +687,17 @@ mod tests {
                 }),
                 warmup_memory_budget: ByteSize::gb(100),
                 warmup_single_split_initial_allocation: ByteSize::gb(1),
+                lambda: Some(LambdaConfig {
+                    function_name: "quickwit-lambda-leaf-search".to_string(),
+                    max_splits_per_invocation: NonZeroUsize::new(10).unwrap(),
+                    offload_threshold: 30,
+                    auto_deploy: Some(LambdaDeployConfig {
+                        execution_role_arn: "arn:aws:iam::123456789012:role/quickwit-lambda-role"
+                            .to_string(),
+                        memory_size: ByteSize::gib(5),
+                        invocation_timeout_secs: 15,
+                    }),
+                }),
             }
         );
         assert_eq!(
diff --git a/quickwit/quickwit-lambda-client/Cargo.toml b/quickwit/quickwit-lambda-client/Cargo.toml
new file mode 100644
index 00000000000..257172068b3
--- /dev/null
+++ b/quickwit/quickwit-lambda-client/Cargo.toml
@@ -0,0 +1,45 @@
+[package]
+name = "quickwit-lambda-client"
+description = "AWS Lambda client for Quickwit leaf search invocation and deployment"
+
+version.workspace = true
+edition.workspace = true
+homepage.workspace = true
+documentation.workspace = true
+repository.workspace = true
+authors.workspace = true
+license.workspace = true
+
+[dependencies]
+anyhow = { workspace = true }
+async-trait = { workspace = true }
+aws-config = { workspace = true }
+aws-sdk-lambda = { workspace = true }
+base64 = { workspace = true }
+md5 = { workspace = true }
+prost = { workspace = true }
+serde_json = { workspace = true }
+once_cell = { workspace = true }
+tokio = { workspace = true }
+tracing = { workspace = true }
+
+quickwit-common = { workspace = true }
+quickwit-config = { workspace = true }
+quickwit-lambda-server = { workspace = true }
+quickwit-proto = { workspace = true }
+quickwit-search = { workspace = true }
+
+[dev-dependencies]
+aws-smithy-mocks = { workspace = true }
+aws-sdk-lambda = { workspace = true, features = ["test-util"] }
+bytesize = { workspace = true }
+tokio = { workspace = true, features = ["test-util", "macros"] }
+
+# Required for complicated reasons. quickwit-storage checks that we
+# do use preserve order with serde. aws forces that feature. We disable
+# the check by switching on its testsuite feature.
+quickwit-storage = { workspace = true, features = ["testsuite"] }
+
+[build-dependencies]
+sha2 = { workspace = true }
+ureq = { workspace = true }
diff --git a/quickwit/quickwit-lambda-client/README.md b/quickwit/quickwit-lambda-client/README.md
new file mode 100644
index 00000000000..94d4fbf965b
--- /dev/null
+++ b/quickwit/quickwit-lambda-client/README.md
@@ -0,0 +1,51 @@
+# Quickwit Lambda
+
+Quickwit supports offloading leaf search to AWS Lambda for horizontal scaling.
+The Lambda function is built separately and embedded into Quickwit's binary,
+allowing Quickwit to auto-deploy the function at startup.
+
+## Architecture
+
+- **quickwit-lambda-server**: The Lambda function binary that executes leaf searches
+- **quickwit-lambda-client**: The client that invokes Lambda and embeds the Lambda zip for auto-deployment
+
+## Release Process
+
+### 1. Tag the release
+
+Push a tag with the `lambda-` prefix:
+
+```bash
+git tag lambda-v0.8.0
+git push origin lambda-v0.8.0
+```
+
+This triggers the `publish_lambda.yaml` GitHub Action which:
+- Cross-compiles the Lambda binary for ARM64
+- Creates a zip file named `quickwit-aws-lambda-v0.8.0-aarch64.zip`
+- Uploads it as a **draft** GitHub release
+
+### 2. Publish the release
+
+Go to GitHub releases and manually publish the draft release to make the
+artifact URL publicly accessible.
+
+### 3. Update the embedded Lambda URL
+
+Update `LAMBDA_ZIP_URL` in `quickwit-lambda-client/build.rs` to point to the
+new release:
+
+```rust
+const LAMBDA_ZIP_URL: &str = "https://github.com/quickwit-oss/quickwit/releases/download/lambda-v0.8.0/quickwit-aws-lambda-v0.8.0-aarch64.zip";
+```
+
+### 4. Versioning
+
+The Lambda client uses content-based versioning:
+- An MD5 hash of the Lambda zip is computed at build time
+- This hash is embedded in the Lambda function description as `quickwit:{version}-{hash_short}`
+- When Quickwit starts, it checks if a matching version exists before deploying
+
+This ensures that:
+- Different Quickwit builds with the same Lambda binary share the same Lambda version
+- Updating the Lambda binary automatically triggers a new deployment
diff --git a/quickwit/quickwit-lambda-client/build.rs b/quickwit/quickwit-lambda-client/build.rs
new file mode 100644
index 00000000000..6223ed7668c
--- /dev/null
+++ b/quickwit/quickwit-lambda-client/build.rs
@@ -0,0 +1,122 @@
+// Copyright 2021-Present Datadog, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Build script for quickwit-lambda-client.
+//!
+//! This script downloads the pre-built Lambda zip from a GitHub release
+//! and places it in OUT_DIR for embedding via include_bytes!
+//!
+//! The Lambda binary is built separately in CI and published as a GitHub release.
+
+use std::env;
+use std::path::{Path, PathBuf};
+
+use sha2::{Digest, Sha256};
+
+/// URL to download the pre-built Lambda zip from GitHub releases.
+/// This should be updated when a new Lambda binary is released.
+const LAMBDA_ZIP_URL: &str = "https://github.com/quickwit-oss/quickwit/releases/download/lambda-ff6fdfa5/quickwit-aws-lambda--aarch64.zip";
+
+/// Expected SHA256 hash of the Lambda zip artifact.
+/// Must be updated alongside LAMBDA_ZIP_URL when a new Lambda binary is released.
+const LAMBDA_ZIP_SHA256: &str = "fa940f44178e28460c21e44bb2610b776542b9b97db66a53bc65b10cad653b90";
+
+/// AWS Lambda direct upload limit is 50MB.
+/// Larger artifacts must be uploaded via S3.
+const MAX_LAMBDA_ZIP_SIZE: usize = 50 * 1024 * 1024;
+
+fn main() {
+    println!("cargo:rerun-if-changed=build.rs");
+
+    let out_dir = PathBuf::from(env::var("OUT_DIR").expect("OUT_DIR not set"));
+    let zip_path = out_dir.join("lambda_bootstrap.zip");
+
+    fetch_lambda_zip(&zip_path);
+
+    // Export first 8 hex chars of the SHA256 as environment variable.
+    // This is used to create a unique qualifier for Lambda versioning.
+    let hash_short = &LAMBDA_ZIP_SHA256[..8];
+    println!("cargo:rustc-env=LAMBDA_BINARY_HASH={}", hash_short);
+    println!("lambda binary hash (short): {}", hash_short);
+}
+
+/// Fetch the Lambda zip and save it to `local_cache_path`.
+///
+/// If a cached file already exists with the correct SHA256, this is a no-op.
+/// If the hash doesn't match (stale artifact), the file is deleted and re-downloaded.
+/// If no cached file exists, the artifact is downloaded fresh.
+///
+/// This function panics if a problem happens.
+fn fetch_lambda_zip(local_cache_path: &Path) {
+    // Try the cache first.
+    if let Ok(data) = std::fs::read(local_cache_path) {
+        let actual_hash = sha256_hex(&data);
+        if actual_hash == LAMBDA_ZIP_SHA256 {
+            println!("using cached Lambda zip from {:?}", local_cache_path);
+            return;
+        }
+        println!("cargo:warning=cached Lambda zip has wrong SHA256, re-downloading");
+        std::fs::remove_file(local_cache_path).expect("failed to delete stale cached zip");
+    }
+
+    // Download from the remote URL.
+    println!(
+        "cargo:warning=downloading Lambda zip from: {}",
+        LAMBDA_ZIP_URL
+    );
+    let data = download_lambda_zip(LAMBDA_ZIP_URL).expect("failed to download Lambda zip");
+
+    // Verify SHA256 BEFORE writing to disk.
+    let actual_hash = sha256_hex(&data);
+    if actual_hash != LAMBDA_ZIP_SHA256 {
+        panic!(
+            "SHA256 mismatch for Lambda zip!\n  expected: {LAMBDA_ZIP_SHA256}\n  actual:   \
+             {actual_hash}\nThe artifact at {LAMBDA_ZIP_URL} may have been tampered with."
+        );
+    }
+
+    std::fs::write(local_cache_path, &data).expect("failed to write zip file");
+    println!(
+        "cargo:warning=downloaded Lambda zip to {:?} ({} bytes)",
+        local_cache_path,
+        data.len()
+    );
+}
+
+fn sha256_hex(data: &[u8]) -> String {
+    format!("{:x}", Sha256::digest(data))
+}
+
+fn download_lambda_zip(url: &str) -> Result<Vec<u8>, String> {
+    let response = ureq::get(url)
+        .call()
+        .map_err(|err| format!("HTTP request failed: {err}"))?;
+    // Set limit higher than MAX_LAMBDA_ZIP_SIZE so we can detect oversized artifacts.
+    let data = response
+        .into_body()
+        .with_config()
+        .limit(MAX_LAMBDA_ZIP_SIZE as u64 + 1)
+        .read_to_vec()
+        .map_err(|err| format!("failed to read response body: {err}"))?;
+    if data.len() > MAX_LAMBDA_ZIP_SIZE {
+        return Err(format!(
+            "Lambda zip is too large ({} bytes, max {} bytes).\nAWS Lambda does not support \
+             direct upload of binaries larger than 50MB.\nWorkaround: upload the Lambda zip to S3 \
+             and deploy from there instead.",
+            data.len(),
+            MAX_LAMBDA_ZIP_SIZE
+        ));
+    }
+    Ok(data)
+}
diff --git a/quickwit/quickwit-lambda-client/src/deploy.rs b/quickwit/quickwit-lambda-client/src/deploy.rs
new file mode 100644
index 00000000000..e189e4a7ec0
--- /dev/null
+++ b/quickwit/quickwit-lambda-client/src/deploy.rs
@@ -0,0 +1,1010 @@
+// Copyright 2021-Present Datadog, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Lambda function deployment for auto-deploy feature.
+//!
+//! This module provides functionality to automatically deploy or update
+//! the Lambda function used for leaf search operations.
+//!
+//! # Versioning Strategy
+//!
+//! We use AWS Lambda published versions with description-based identification:
+//! - Each published version has a description like `quickwit:0_8_0-fa752891`
+//! - We list versions to find one matching our qualifier
+//! - We invoke the specific version number (not $LATEST)
+//! - Old versions are garbage collected (keep current + top 5 most recent)
+
+use std::collections::HashMap;
+use std::sync::{Arc, OnceLock};
+
+use anyhow::{Context, anyhow};
+use aws_sdk_lambda::Client as LambdaClient;
+use aws_sdk_lambda::error::SdkError;
+use aws_sdk_lambda::primitives::Blob;
+use aws_sdk_lambda::types::{
+    Architecture, Environment, FunctionCode, LastUpdateStatus, Runtime, State,
+};
+use quickwit_config::{LambdaConfig, LambdaDeployConfig};
+use quickwit_search::LambdaLeafSearchInvoker;
+use tracing::{debug, info};
+
+use crate::invoker::create_lambda_invoker_for_version;
+
+/// Embedded Lambda binary (arm64, compressed).
+/// This is included at compile time.
+const LAMBDA_BINARY: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/lambda_bootstrap.zip"));
+
+/// Prefix for version descriptions to identify Quickwit-managed versions.
+const VERSION_DESCRIPTION_PREFIX: &str = "quickwit";
+
+/// Number of recent versions to keep during garbage collection (in addition to current).
+const GC_KEEP_RECENT_VERSIONS: usize = 5;
+
+/// Returns the Lambda qualifier combining version and binary hash.
+/// Format: "{quickwit_version}-{hash_short}" with dots replaced by underscores.
+/// Example: "0_8_0-fa752891"
+fn lambda_qualifier() -> &'static str {
+    static LAMBDA_QUALIFIER: OnceLock<String> = OnceLock::new();
+    LAMBDA_QUALIFIER
+        .get_or_init(|| {
+            format!(
+                "{}_{}",
+                env!("CARGO_PKG_VERSION").replace('.', "_"),
+                env!("LAMBDA_BINARY_HASH")
+            )
+        })
+        .as_str()
+}
+
+/// Returns the version description for our qualifier.
+///
+/// We also pass the deploy config, as we want the function to be redeployed
+/// if the deploy config is changed.
+fn version_description(deploy_config_opt: Option<&LambdaDeployConfig>) -> String {
+    if let Some(deploy_config) = deploy_config_opt {
+        let memory_size_mib = deploy_config.memory_size.as_mib() as u64;
+        let execution_role_arn_digest: String = format!(
+            "{:x}",
+            md5::compute(deploy_config.execution_role_arn.as_bytes())
+        );
+        format!(
+            "{}_{}_{}_{}s_{}",
+            VERSION_DESCRIPTION_PREFIX,
+            lambda_qualifier(),
+            memory_size_mib,
+            deploy_config.invocation_timeout_secs,
+            &execution_role_arn_digest[..5]
+        )
+    } else {
+        format!(
+            "{}_{}_nodeploy",
+            VERSION_DESCRIPTION_PREFIX,
+            lambda_qualifier()
+        )
+    }
+}
+
+/// Get or deploy the Lambda function and return an invoker.
+///
+/// This function:
+/// 1. Lists existing Lambda versions to find one matching our description
+/// 2. If not found, (and if a deploy config is provided) attempt to deploy the embedded Lambda
+///    binary
+/// 3. Garbage collects old versions (keeps current + 5 most recent)
+/// 4. Returns an invoker configured to call the specific version
+///
+/// The qualifier is computed from the Quickwit version and Lambda binary hash,
+/// ensuring the deployed Lambda matches the embedded binary.
+pub async fn try_get_or_deploy_invoker(
+    lambda_config: &LambdaConfig,
+) -> anyhow::Result<Arc<dyn LambdaLeafSearchInvoker>> {
+    let aws_config = aws_config::load_defaults(aws_config::BehaviorVersion::latest()).await;
+    let client = LambdaClient::new(&aws_config);
+    let function_name = &lambda_config.function_name;
+    let target_description = version_description(lambda_config.auto_deploy.as_ref());
+
+    info!(
+        function_name = %function_name,
+        qualifier = %lambda_qualifier(),
+        "looking for Lambda function version"
+    );
+
+    let version = find_or_deploy_version(
+        &client,
+        function_name,
+        &target_description,
+        lambda_config.auto_deploy.as_ref(),
+    )
+    .await?;
+
+    // Spawn background garbage collection (best effort, non-blocking)
+    let gc_client = client.clone();
+    let gc_function_name = function_name.clone();
+    let gc_version = version.clone();
+    tokio::spawn(async move {
+        if let Err(e) =
+            garbage_collect_old_versions(&gc_client, &gc_function_name, &gc_version).await
+        {
+            info!(error = %e, "failed to garbage collect old Lambda versions");
+        }
+    });
+
+    // Create and return the invoker
+    let invoker = create_lambda_invoker_for_version(function_name.clone(), version)
+        .await
+        .context("failed to create Lambda invoker")?;
+
+    info!("created the lambda invoker");
+
+    Ok(invoker)
+}
+
+/// Find a Lambda version with a description matching our qualifier.
+///
+/// If none is found and a deploy config is provided, attempt to deploy a new version.
+///
+/// Returns the version number as a string (because it is a string on AWS side, e.g.: "7") if found.
+async fn find_or_deploy_version(
+    client: &LambdaClient,
+    function_name: &str,
+    target_description: &str,
+    deploy_config: Option<&LambdaDeployConfig>,
+) -> anyhow::Result<String> {
+    if let Some(version) = find_matching_version(client, function_name, target_description).await? {
+        info!(
+            function_name = %function_name,
+            version = %version,
+            "found existing Lambda version"
+        );
+        return Ok(version);
+    }
+
+    let deploy_config = deploy_config.with_context(|| {
+        format!(
+            "no Lambda version found with description '{}' and auto_deploy is not configured. \
+             Either deploy the Lambda function manually or enable auto_deploy.",
+            target_description
+        )
+    })?;
+
+    info!(
+        function_name = %function_name,
+        "no matching version found, deploying Lambda function"
+    );
+
+    deploy_lambda_function(client, function_name, deploy_config).await
+}
+
+async fn find_matching_version(
+    client: &LambdaClient,
+    function_name: &str,
+    target_description: &str,
+) -> anyhow::Result<Option<String>> {
+    let mut marker: Option<String> = None;
+
+    loop {
+        let mut request = client
+            .list_versions_by_function()
+            .function_name(function_name);
+
+        if let Some(m) = marker {
+            request = request.marker(m);
+        }
+
+        let response = match request.send().await {
+            Ok(resp) => resp,
+            Err(SdkError::ServiceError(err)) if err.err().is_resource_not_found_exception() => {
+                info!(
+                    function_name = %function_name,
+                    "lambda function does not exist yet"
+                );
+                return Ok(None);
+            }
+            Err(e) => {
+                return Err(anyhow!(
+                    "failed to list Lambda versions for '{}': {}",
+                    function_name,
+                    e
+                ));
+            }
+        };
+
+        for version in response.versions() {
+            if let Some(description) = version.description()
+                && description == target_description
+                && let Some(ver) = version.version()
+                && ver != "$LATEST"
+            {
+                return Ok(Some(ver.to_string()));
+            }
+        }
+
+        marker = response.next_marker().map(|s| s.to_string());
+        if marker.is_none() {
+            break;
+        }
+    }
+
+    Ok(None)
+}
+
+/// Deploy the Lambda function and publish a new version.
+/// AWS's API is pretty terrible.
+///
+/// Lambda's version are integer generated by AWS (we don't have control over them).
+/// To publish a new version, we need to implement two paths:
+/// - If the function doesn't exist yet, `create_function(publish=true)` atomically creates it and
+///   publishes a version in one call.
+/// - If the function already exists, we first update the code. We do not publish because strangely
+///   the API call does not make it possible to change the description. Updating the code has the
+///   effect of create a version $LATEST.
+/// - We publish the version $LATEST. That's the moment AWS attributes a version number. That call
+///   allows us to change the description. We pass the sha256 hash of the code to ensure that
+///   $LATEST has not been overwritten by another concurrent update.
+async fn deploy_lambda_function(
+    client: &LambdaClient,
+    function_name: &str,
+    deploy_config: &LambdaDeployConfig,
+) -> anyhow::Result<String> {
+    // This looks overly complicated but this is not AI slop.
+    // The AWS API forces us to go through a bunch of hoops to update our function
+    // in a safe manner.
+    let description = version_description(Some(deploy_config));
+
+    // Fast path: if the function does not exist, we can create and publish the function atomically.
+    if let Some(version) =
+        try_create_function(client, function_name, deploy_config, &description).await?
+    {
+        return Ok(version);
+    }
+
+    // Function already exists — we need to update the code.
+    // This will create or update a version called "$LATEST" (that's the actual string)
+    //
+    // We cannot directly publish here, because updating the function code does not allow
+    // us to pass a different description.
+    let code_sha256 = update_function_code(client, function_name).await?;
+
+    // We can now publish that new uploaded version.
+    // We pass the code_sha256 guard to make sure a race condition does not cause
+    // us to publish a different version.
+    //
+    // Publishing will create an actual version (a number as a string) and return it.
+    publish_version(client, function_name, &code_sha256, &description).await
+}
+
+/// Try to create the Lambda function with `publish=true`.
+///
+/// Returns `Some(version)` if the function was created and published.
+/// Returns `None` if the function already exists (`ResourceConflictException`).
+async fn try_create_function(
+    client: &LambdaClient,
+    function_name: &str,
+    deploy_config: &LambdaDeployConfig,
+    description: &str,
+) -> anyhow::Result<Option<String>> {
+    let memory_size_mb = deploy_config
+        .memory_size
+        .as_u64()
+        .div_ceil(1024u64 * 1024u64) as i32;
+    let timeout_secs = deploy_config.invocation_timeout_secs as i32;
+
+    info!(
+        function_name = %function_name,
+        memory_size_mb = memory_size_mb,
+        timeout_secs = timeout_secs,
+        "attempting to create Lambda function"
+    );
+
+    let function_code = FunctionCode::builder()
+        .zip_file(Blob::new(LAMBDA_BINARY))
+        .build();
+
+    let create_result = client
+        .create_function()
+        .function_name(function_name)
+        .runtime(Runtime::Providedal2023)
+        .role(&deploy_config.execution_role_arn)
+        .handler("bootstrap")
+        .description(description)
+        .code(function_code)
+        .architectures(Architecture::Arm64)
+        .memory_size(memory_size_mb)
+        .timeout(timeout_secs)
+        .environment(build_environment())
+        .set_tags(Some(build_tags()))
+        .publish(true)
+        .send()
+        .await;
+
+    match create_result {
+        Ok(output) => {
+            let version = output
+                .version()
+                .ok_or_else(|| anyhow!("created function has no version number"))?
+                .to_string();
+            info!(
+                function_name = %function_name,
+                version = %version,
+                "lambda function created and published"
+            );
+            Ok(Some(version))
+        }
+        Err(SdkError::ServiceError(err)) if err.err().is_resource_conflict_exception() => {
+            debug!(
+                function_name = %function_name,
+                "lambda function already exists"
+            );
+            Ok(None)
+        }
+        Err(e) => Err(anyhow!(
+            "failed to create Lambda function '{}': {}",
+            function_name,
+            e
+        )),
+    }
+}
+
+/// Update `$LATEST` to our embedded binary.
+///
+/// Returns the `code_sha256` of the uploaded code, to be used as a guard
+/// when publishing the version (detects if another process overwrote `$LATEST`
+/// between our update and publish).
+async fn update_function_code(
+    client: &LambdaClient,
+    function_name: &str,
+) -> anyhow::Result<String> {
+    info!(
+        function_name = %function_name,
+        "updating Lambda function code to current binary"
+    );
+
+    let response = client
+        .update_function_code()
+        .function_name(function_name)
+        .zip_file(Blob::new(LAMBDA_BINARY))
+        .architectures(Architecture::Arm64)
+        .send()
+        .await
+        .context("failed to update Lambda function code")?;
+
+    let code_sha256 = response
+        .code_sha256()
+        .ok_or_else(|| anyhow!("update_function_code response missing code_sha256"))?
+        .to_string();
+
+    wait_for_function_ready(client, function_name).await?;
+
+    Ok(code_sha256)
+}
+
+/// Publish a new immutable version from `$LATEST` with our description.
+///
+/// The `code_sha256` parameter guards against races: if another process
+/// overwrote `$LATEST` since our `update_function_code` call, AWS will
+/// reject the publish.
+///
+/// Returns the version number (e.g., "8").
+async fn publish_version(
+    client: &LambdaClient,
+    function_name: &str,
+    code_sha256: &str,
+    description: &str,
+) -> anyhow::Result<String> {
+    info!(
+        function_name = %function_name,
+        description = %description,
+        "publishing new Lambda version"
+    );
+
+    let publish_response = client
+        .publish_version()
+        .function_name(function_name)
+        .description(description)
+        .code_sha256(code_sha256)
+        .send()
+        .await
+        .context(
+            "failed to publish Lambda version (code_sha256 mismatch means a concurrent deploy \
+             race)",
+        )?;
+
+    let version = publish_response
+        .version()
+        .context("published version has no version number")?
+        .to_string();
+
+    info!(
+        function_name = %function_name,
+        version = %version,
+        "lambda version published successfully"
+    );
+
+    Ok(version)
+}
+
+/// Wait for the Lambda function to be ready.
+///
+/// "Ready" means `State == Active` and no update is in progress
+/// (`LastUpdateStatus` is absent or `Successful`).
+///
+/// This matters because:
+/// - After `create_function`: `State` transitions `Pending → Active`
+/// - After `update_function_code`: `State` stays `Active` but `LastUpdateStatus` transitions
+///   `InProgress → Successful`
+async fn wait_for_function_ready(client: &LambdaClient, function_name: &str) -> anyhow::Result<()> {
+    const MAX_WAIT_ATTEMPTS: u32 = 30;
+    const WAIT_INTERVAL: tokio::time::Duration = tokio::time::Duration::from_secs(1);
+
+    let mut interval = tokio::time::interval(WAIT_INTERVAL);
+
+    for attempt in 0..MAX_WAIT_ATTEMPTS {
+        interval.tick().await;
+
+        let response = client
+            .get_function()
+            .function_name(function_name)
+            .send()
+            .await
+            .context("failed to get function status")?;
+
+        let Some(config) = response.configuration() else {
+            continue;
+        };
+
+        // Check for terminal failure states.
+        if config.state() == Some(&State::Failed) {
+            let reason = config.state_reason().unwrap_or("unknown reason");
+            anyhow::bail!(
+                "lambda function '{}' is in Failed state: {}",
+                function_name,
+                reason
+            );
+        }
+
+        let last_update_status: &LastUpdateStatus = config
+            .last_update_status()
+            .unwrap_or(&LastUpdateStatus::Successful);
+
+        if last_update_status == &LastUpdateStatus::Failed {
+            let reason = config
+                .last_update_status_reason()
+                .unwrap_or("unknown reason");
+            anyhow::bail!(
+                "lambda function '{}' last update failed: {}",
+                function_name,
+                reason
+            );
+        }
+
+        // Ready = Active state with no update in progress.
+        let is_active = config.state() == Some(&State::Active);
+        if is_active && last_update_status == &LastUpdateStatus::Successful {
+            info!(
+                function_name = %function_name,
+                attempts = attempt + 1,
+                "lambda function is ready"
+            );
+            return Ok(());
+        }
+
+        info!(
+            function_name = %function_name,
+            state = ?config.state(),
+            last_update_status = ?config.last_update_status(),
+            attempt = attempt + 1,
+            "waiting for Lambda function to be ready"
+        );
+    }
+
+    anyhow::bail!(
+        "lambda function '{}' did not become ready within {} seconds",
+        function_name,
+        MAX_WAIT_ATTEMPTS as u64 * WAIT_INTERVAL.as_secs()
+    )
+}
+
+/// Garbage collect old Lambda versions, keeping the current + 5 most recent.
+async fn garbage_collect_old_versions(
+    client: &LambdaClient,
+    function_name: &str,
+    current_version: &str,
+) -> anyhow::Result<()> {
+    let mut quickwit_lambda_versions: Vec<(u64, String)> = Vec::new();
+    let mut marker: Option<String> = None;
+
+    // Collect all Quickwit-managed versions
+    loop {
+        let mut request = client
+            .list_versions_by_function()
+            .function_name(function_name);
+
+        if let Some(m) = marker {
+            request = request.marker(m);
+        }
+
+        let response = request
+            .send()
+            .await
+            .context("failed to list Lambda versions for garbage collection")?;
+
+        for version in response.versions() {
+            let Some(version_str) = version.version() else {
+                continue;
+            };
+            if version_str == "$LATEST" {
+                continue;
+            }
+            // Only consider Quickwit-managed versions
+            let Some(description) = version.description() else {
+                continue;
+            };
+            if description.starts_with(VERSION_DESCRIPTION_PREFIX)
+                && let Ok(version_num) = version_str.parse::<u64>()
+            {
+                quickwit_lambda_versions.push((version_num, version_str.to_string()));
+            }
+        }
+
+        marker = response.next_marker().map(ToString::to_string);
+        if marker.is_none() {
+            break;
+        }
+    }
+
+    // Sort by version number ascending (oldest first)
+    quickwit_lambda_versions.sort();
+
+    // We keep the last 5 versions.
+    quickwit_lambda_versions.truncate(
+        quickwit_lambda_versions
+            .len()
+            .saturating_sub(GC_KEEP_RECENT_VERSIONS),
+    );
+
+    if let Some(pos) = quickwit_lambda_versions
+        .iter()
+        .position(|(_version, version_str)| version_str == current_version)
+    {
+        quickwit_lambda_versions.swap_remove(pos);
+    }
+
+    // Delete old versions
+    for (version, version_str) in quickwit_lambda_versions {
+        info!(
+            function_name = %function_name,
+            version = %version_str,
+            "deleting old Lambda version"
+        );
+
+        if let Err(e) = client
+            .delete_function()
+            .function_name(function_name)
+            .qualifier(&version_str)
+            .send()
+            .await
+        {
+            info!(
+                function_name = %function_name,
+                version = %version,
+                error = %e,
+                "failed to delete old Lambda version"
+            );
+        }
+    }
+
+    Ok(())
+}
+
+/// Build environment variables for the Lambda function.
+fn build_environment() -> Environment {
+    let mut env_vars = HashMap::new();
+    env_vars.insert("RUST_LOG".to_string(), "info".to_string());
+    env_vars.insert("RUST_BACKTRACE".to_string(), "1".to_string());
+    Environment::builder().set_variables(Some(env_vars)).build()
+}
+
+/// Build tags for the Lambda function.
+fn build_tags() -> HashMap<String, String> {
+    let mut tags = HashMap::new();
+    tags.insert("managed_by".to_string(), "quickwit".to_string());
+    tags
+}
+
+#[cfg(test)]
+mod tests {
+    use aws_sdk_lambda::operation::create_function::{CreateFunctionError, CreateFunctionOutput};
+    use aws_sdk_lambda::operation::delete_function::DeleteFunctionOutput;
+    use aws_sdk_lambda::operation::get_function::GetFunctionOutput;
+    use aws_sdk_lambda::operation::list_versions_by_function::{
+        ListVersionsByFunctionError, ListVersionsByFunctionOutput,
+    };
+    use aws_sdk_lambda::operation::publish_version::PublishVersionOutput;
+    use aws_sdk_lambda::operation::update_function_code::UpdateFunctionCodeOutput;
+    use aws_sdk_lambda::types::FunctionConfiguration;
+    use aws_sdk_lambda::types::error::{ResourceConflictException, ResourceNotFoundException};
+    use aws_smithy_mocks::{RuleMode, mock, mock_client};
+    use bytesize::ByteSize;
+
+    use super::*;
+
+    fn make_version(version: &str, description: &str) -> FunctionConfiguration {
+        FunctionConfiguration::builder()
+            .version(version)
+            .description(description)
+            .build()
+    }
+
+    fn test_deploy_config() -> LambdaDeployConfig {
+        LambdaDeployConfig {
+            execution_role_arn: "arn:aws:iam::123456789:role/test-role".to_string(),
+            memory_size: ByteSize::gib(5),
+            invocation_timeout_secs: 60,
+        }
+    }
+
+    fn test_description() -> String {
+        version_description(None)
+    }
+
+    #[test]
+    fn test_version_description() {
+        let lambda_deploy_config = test_deploy_config();
+        let description = version_description(Some(&lambda_deploy_config));
+        assert!(description.ends_with("_60s_6c3b2"));
+        let description = version_description(None);
+        assert!(description.ends_with("_nodeploy"));
+    }
+
+    // --- find_matching_version tests ---
+
+    #[tokio::test]
+    async fn test_find_matching_version_found() {
+        let target = "quickwit:test_version";
+        let rule = mock!(aws_sdk_lambda::Client::list_versions_by_function).then_output(|| {
+            ListVersionsByFunctionOutput::builder()
+                .versions(make_version("$LATEST", ""))
+                .versions(make_version("1", "quickwit:old_version"))
+                .versions(make_version("7", "quickwit:test_version"))
+                .build()
+        });
+        let client = mock_client!(aws_sdk_lambda, [&rule]);
+
+        let matching_version_opt = find_matching_version(&client, "my-fn", target)
+            .await
+            .unwrap();
+        assert_eq!(matching_version_opt, Some("7".to_string()));
+    }
+
+    #[tokio::test]
+    async fn test_find_matching_version_not_found() {
+        let rule = mock!(aws_sdk_lambda::Client::list_versions_by_function).then_output(|| {
+            ListVersionsByFunctionOutput::builder()
+                .versions(make_version("$LATEST", ""))
+                .versions(make_version("1", "quickwit:other"))
+                .build()
+        });
+        let client = mock_client!(aws_sdk_lambda, [&rule]);
+
+        let result = find_matching_version(&client, "my-fn", "quickwit:no_match")
+            .await
+            .unwrap();
+        assert_eq!(result, None);
+    }
+
+    #[tokio::test]
+    async fn test_find_matching_version_function_does_not_exist() {
+        let rule = mock!(aws_sdk_lambda::Client::list_versions_by_function).then_error(|| {
+            ListVersionsByFunctionError::ResourceNotFoundException(
+                ResourceNotFoundException::builder().build(),
+            )
+        });
+        let client = mock_client!(aws_sdk_lambda, [&rule]);
+
+        let result = find_matching_version(&client, "no-such-fn", "quickwit:x")
+            .await
+            .unwrap();
+        assert_eq!(result, None);
+    }
+
+    #[tokio::test]
+    async fn test_find_matching_version_skips_latest_even_if_description_matches() {
+        let rule = mock!(aws_sdk_lambda::Client::list_versions_by_function).then_output(|| {
+            ListVersionsByFunctionOutput::builder()
+                .versions(make_version("$LATEST", "quickwit:match"))
+                .build()
+        });
+        let client = mock_client!(aws_sdk_lambda, [&rule]);
+
+        let result = find_matching_version(&client, "my-fn", "quickwit:match")
+            .await
+            .unwrap();
+        assert_eq!(result, None);
+    }
+
+    // --- try_create_function tests ---
+
+    #[tokio::test]
+    async fn test_try_create_function_success() {
+        let rule = mock!(aws_sdk_lambda::Client::create_function).then_output(|| {
+            CreateFunctionOutput::builder()
+                .version("1")
+                .function_name("my-fn")
+                .build()
+        });
+        let client = mock_client!(aws_sdk_lambda, [&rule]);
+        let config = test_deploy_config();
+
+        let result = try_create_function(&client, "my-fn", &config, &test_description())
+            .await
+            .unwrap();
+        assert_eq!(result, Some("1".to_string()));
+    }
+
+    #[tokio::test]
+    async fn test_try_create_function_already_exists() {
+        let rule = mock!(aws_sdk_lambda::Client::create_function).then_error(|| {
+            CreateFunctionError::ResourceConflictException(
+                ResourceConflictException::builder().build(),
+            )
+        });
+        let client = mock_client!(aws_sdk_lambda, [&rule]);
+        let config = test_deploy_config();
+
+        let result = try_create_function(&client, "my-fn", &config, &test_description())
+            .await
+            .unwrap();
+        assert_eq!(result, None);
+    }
+
+    // --- deploy (update path) tests ---
+
+    #[tokio::test]
+    async fn test_deploy_update_path() {
+        // create_function → conflict (function exists)
+        let create_rule = mock!(aws_sdk_lambda::Client::create_function).then_error(|| {
+            CreateFunctionError::ResourceConflictException(
+                ResourceConflictException::builder().build(),
+            )
+        });
+        // update_function_code → success with code_sha256
+        let update_rule = mock!(aws_sdk_lambda::Client::update_function_code).then_output(|| {
+            UpdateFunctionCodeOutput::builder()
+                .code_sha256("abc123hash")
+                .build()
+        });
+        // get_function → active and ready (for wait_for_function_ready)
+        let get_rule = mock!(aws_sdk_lambda::Client::get_function).then_output(|| {
+            GetFunctionOutput::builder()
+                .configuration(
+                    FunctionConfiguration::builder()
+                        .state(State::Active)
+                        .last_update_status(LastUpdateStatus::Successful)
+                        .build(),
+                )
+                .build()
+        });
+        // publish_version → success
+        let publish_rule = mock!(aws_sdk_lambda::Client::publish_version)
+            .then_output(|| PublishVersionOutput::builder().version("8").build());
+
+        let client = mock_client!(
+            aws_sdk_lambda,
+            RuleMode::MatchAny,
+            [&create_rule, &update_rule, &get_rule, &publish_rule]
+        );
+        let config = test_deploy_config();
+
+        tokio::time::pause();
+        let version = deploy_lambda_function(&client, "my-fn", &config)
+            .await
+            .unwrap();
+        assert_eq!(version, "8");
+    }
+
+    // --- wait_for_function_ready tests ---
+
+    #[tokio::test]
+    async fn test_wait_for_function_ready_immediate() {
+        let rule = mock!(aws_sdk_lambda::Client::get_function).then_output(|| {
+            GetFunctionOutput::builder()
+                .configuration(
+                    FunctionConfiguration::builder()
+                        .state(State::Active)
+                        .last_update_status(LastUpdateStatus::Successful)
+                        .build(),
+                )
+                .build()
+        });
+        let client = mock_client!(aws_sdk_lambda, [&rule]);
+
+        tokio::time::pause();
+        wait_for_function_ready(&client, "my-fn").await.unwrap();
+    }
+
+    #[tokio::test]
+    async fn test_wait_for_function_ready_after_update_in_progress() {
+        let rule = mock!(aws_sdk_lambda::Client::get_function)
+            .sequence()
+            .output(|| {
+                GetFunctionOutput::builder()
+                    .configuration(
+                        FunctionConfiguration::builder()
+                            .state(State::Active)
+                            .last_update_status(LastUpdateStatus::InProgress)
+                            .build(),
+                    )
+                    .build()
+            })
+            .output(|| {
+                GetFunctionOutput::builder()
+                    .configuration(
+                        FunctionConfiguration::builder()
+                            .state(State::Active)
+                            .last_update_status(LastUpdateStatus::Successful)
+                            .build(),
+                    )
+                    .build()
+            })
+            .build();
+        let client = mock_client!(aws_sdk_lambda, RuleMode::Sequential, [&rule]);
+
+        tokio::time::pause();
+        wait_for_function_ready(&client, "my-fn").await.unwrap();
+        assert_eq!(rule.num_calls(), 2);
+    }
+
+    #[tokio::test]
+    async fn test_wait_for_function_ready_fails_on_failed_state() {
+        let rule = mock!(aws_sdk_lambda::Client::get_function).then_output(|| {
+            GetFunctionOutput::builder()
+                .configuration(
+                    FunctionConfiguration::builder()
+                        .state(State::Failed)
+                        .state_reason("Something broke")
+                        .build(),
+                )
+                .build()
+        });
+        let client = mock_client!(aws_sdk_lambda, [&rule]);
+
+        tokio::time::pause();
+        let err = wait_for_function_ready(&client, "my-fn").await.unwrap_err();
+        assert!(
+            err.to_string().contains("Failed state"),
+            "unexpected error: {}",
+            err
+        );
+    }
+
+    #[tokio::test]
+    async fn test_wait_for_function_ready_fails_on_last_update_failed() {
+        let rule = mock!(aws_sdk_lambda::Client::get_function).then_output(|| {
+            GetFunctionOutput::builder()
+                .configuration(
+                    FunctionConfiguration::builder()
+                        .state(State::Active)
+                        .last_update_status(LastUpdateStatus::Failed)
+                        .last_update_status_reason("Update broke")
+                        .build(),
+                )
+                .build()
+        });
+        let client = mock_client!(aws_sdk_lambda, [&rule]);
+
+        tokio::time::pause();
+        let err = wait_for_function_ready(&client, "my-fn").await.unwrap_err();
+        assert!(
+            err.to_string().contains("last update failed"),
+            "unexpected error: {}",
+            err
+        );
+    }
+
+    // --- garbage_collect_old_versions tests ---
+
+    #[tokio::test]
+    async fn test_gc_deletes_old_versions_keeps_recent() {
+        // 8 quickwit versions (1..=8) + $LATEST + one non-quickwit version
+        let list_rule =
+            mock!(aws_sdk_lambda::Client::list_versions_by_function).then_output(|| {
+                let mut builder = ListVersionsByFunctionOutput::builder()
+                    .versions(make_version("$LATEST", ""))
+                    .versions(make_version("99", "not-quickwit"));
+                for i in 1..=8 {
+                    builder = builder
+                        .versions(make_version(&i.to_string(), &format!("quickwit:ver_{}", i)));
+                }
+                builder.build()
+            });
+
+        let delete_rule = mock!(aws_sdk_lambda::Client::delete_function)
+            .then_output(|| DeleteFunctionOutput::builder().build());
+
+        let client = mock_client!(
+            aws_sdk_lambda,
+            RuleMode::MatchAny,
+            [&list_rule, &delete_rule]
+        );
+
+        // Current version is "7", so keep 7 + the 5 most recent (4,5,6,7,8).
+        // Should delete versions 1, 2, 3.
+        garbage_collect_old_versions(&client, "my-fn", "7")
+            .await
+            .unwrap();
+
+        assert_eq!(delete_rule.num_calls(), 3);
+    }
+
+    #[tokio::test]
+    async fn test_gc_nothing_to_delete() {
+        // Only 3 quickwit versions — below the GC_KEEP_RECENT_VERSIONS threshold.
+        let list_rule =
+            mock!(aws_sdk_lambda::Client::list_versions_by_function).then_output(|| {
+                ListVersionsByFunctionOutput::builder()
+                    .versions(make_version("$LATEST", ""))
+                    .versions(make_version("1", "quickwit:v1"))
+                    .versions(make_version("2", "quickwit:v2"))
+                    .versions(make_version("3", "quickwit:v3"))
+                    .build()
+            });
+
+        let delete_rule = mock!(aws_sdk_lambda::Client::delete_function)
+            .then_output(|| DeleteFunctionOutput::builder().build());
+
+        let client = mock_client!(
+            aws_sdk_lambda,
+            RuleMode::MatchAny,
+            [&list_rule, &delete_rule]
+        );
+
+        garbage_collect_old_versions(&client, "my-fn", "3")
+            .await
+            .unwrap();
+
+        assert_eq!(delete_rule.num_calls(), 0);
+    }
+
+    #[tokio::test]
+    async fn test_gc_does_not_delete_current_version() {
+        // 7 quickwit versions, current is "1" (the oldest).
+        // Without the current-version guard, version 1 would be deleted.
+        let list_rule =
+            mock!(aws_sdk_lambda::Client::list_versions_by_function).then_output(|| {
+                let mut builder =
+                    ListVersionsByFunctionOutput::builder().versions(make_version("$LATEST", ""));
+                for i in 1..=7 {
+                    builder = builder
+                        .versions(make_version(&i.to_string(), &format!("quickwit:ver_{}", i)));
+                }
+                builder.build()
+            });
+
+        let delete_rule = mock!(aws_sdk_lambda::Client::delete_function)
+            .then_output(|| DeleteFunctionOutput::builder().build());
+
+        let client = mock_client!(
+            aws_sdk_lambda,
+            RuleMode::MatchAny,
+            [&list_rule, &delete_rule]
+        );
+
+        // Current version is "1". Without guard: would delete 1,2. With guard: only deletes 2.
+        garbage_collect_old_versions(&client, "my-fn", "1")
+            .await
+            .unwrap();
+
+        assert_eq!(delete_rule.num_calls(), 1);
+    }
+}
diff --git a/quickwit/quickwit-lambda-client/src/invoker.rs b/quickwit/quickwit-lambda-client/src/invoker.rs
new file mode 100644
index 00000000000..7f29a64ba60
--- /dev/null
+++ b/quickwit/quickwit-lambda-client/src/invoker.rs
@@ -0,0 +1,185 @@
+// Copyright 2021-Present Datadog, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Arc;
+
+use anyhow::Context as _;
+use async_trait::async_trait;
+use aws_sdk_lambda::Client as LambdaClient;
+use aws_sdk_lambda::primitives::Blob;
+use aws_sdk_lambda::types::InvocationType;
+use base64::prelude::*;
+use prost::Message;
+use quickwit_lambda_server::{LambdaSearchRequestPayload, LambdaSearchResponsePayload};
+use quickwit_proto::search::{LambdaSearchResponses, LambdaSingleSplitResult, LeafSearchRequest};
+use quickwit_search::{LambdaLeafSearchInvoker, SearchError};
+use tracing::{debug, info, instrument};
+
+use crate::metrics::LAMBDA_METRICS;
+
+/// Create a Lambda invoker for a specific version.
+///
+/// The version number is used as the qualifier when invoking, ensuring we call
+/// the exact published version (not $LATEST).
+pub(crate) async fn create_lambda_invoker_for_version(
+    function_name: String,
+    version: String,
+) -> anyhow::Result<Arc<dyn LambdaLeafSearchInvoker>> {
+    let aws_config = aws_config::load_defaults(aws_config::BehaviorVersion::latest()).await;
+    let client = LambdaClient::new(&aws_config);
+    let invoker = AwsLambdaInvoker {
+        client,
+        function_name,
+        version,
+    };
+    invoker.validate().await?;
+    Ok(Arc::new(invoker))
+}
+
+/// AWS Lambda implementation of RemoteFunctionInvoker.
+struct AwsLambdaInvoker {
+    client: LambdaClient,
+    function_name: String,
+    /// The version number to invoke (e.g., "7", "12").
+    version: String,
+}
+
+impl AwsLambdaInvoker {
+    /// Validate that the Lambda function version exists and is invocable.
+    /// Uses DryRun invocation type - validates without executing.
+    async fn validate(&self) -> anyhow::Result<()> {
+        info!("lambda invoker dry run");
+        let request = self
+            .client
+            .invoke()
+            .function_name(&self.function_name)
+            .qualifier(&self.version)
+            .invocation_type(InvocationType::DryRun);
+
+        request.send().await.with_context(|| {
+            format!(
+                "failed to validate Lambda function '{}:{}'",
+                self.function_name, self.version
+            )
+        })?;
+
+        info!("the lambda invoker dry run was successful");
+        Ok(())
+    }
+}
+
+#[async_trait]
+impl LambdaLeafSearchInvoker for AwsLambdaInvoker {
+    #[instrument(skip(self, request), fields(function_name = %self.function_name, version = %self.version))]
+    async fn invoke_leaf_search(
+        &self,
+        request: LeafSearchRequest,
+    ) -> Result<Vec<LambdaSingleSplitResult>, SearchError> {
+        let start = std::time::Instant::now();
+
+        let result = self.invoke_leaf_search_inner(request).await;
+
+        let elapsed = start.elapsed().as_secs_f64();
+        let status = if result.is_ok() { "success" } else { "error" };
+        LAMBDA_METRICS
+            .leaf_search_requests_total
+            .with_label_values([status])
+            .inc();
+        LAMBDA_METRICS
+            .leaf_search_duration_seconds
+            .with_label_values([status])
+            .observe(elapsed);
+
+        result
+    }
+}
+
+impl AwsLambdaInvoker {
+    async fn invoke_leaf_search_inner(
+        &self,
+        request: LeafSearchRequest,
+    ) -> Result<Vec<LambdaSingleSplitResult>, SearchError> {
+        // Serialize request to protobuf bytes, then base64 encode
+        let request_bytes = request.encode_to_vec();
+        let payload = LambdaSearchRequestPayload {
+            payload: BASE64_STANDARD.encode(&request_bytes),
+        };
+
+        let payload_json = serde_json::to_vec(&payload)
+            .map_err(|e| SearchError::Internal(format!("JSON serialization error: {}", e)))?;
+
+        LAMBDA_METRICS
+            .leaf_search_request_payload_size_bytes
+            .observe(payload_json.len() as f64);
+
+        debug!(
+            payload_size = payload_json.len(),
+            version = %self.version,
+            "invoking Lambda function"
+        );
+
+        // Invoke the specific version
+        let invoke_builder = self
+            .client
+            .invoke()
+            .function_name(&self.function_name)
+            .qualifier(&self.version)
+            .invocation_type(InvocationType::RequestResponse)
+            .payload(Blob::new(payload_json));
+
+        let response = invoke_builder
+            .send()
+            .await
+            .map_err(|e| SearchError::Internal(format!("Lambda invocation error: {}", e)))?;
+
+        // Check for function error
+        if let Some(error) = response.function_error() {
+            let error_payload = response
+                .payload()
+                .map(|b| String::from_utf8_lossy(b.as_ref()).to_string())
+                .unwrap_or_default();
+            return Err(SearchError::Internal(format!(
+                "lambda function error: {}: {}",
+                error, error_payload
+            )));
+        }
+
+        // Deserialize response
+        let response_payload = response
+            .payload()
+            .ok_or_else(|| SearchError::Internal("no response payload from Lambda".into()))?;
+
+        LAMBDA_METRICS
+            .leaf_search_response_payload_size_bytes
+            .observe(response_payload.as_ref().len() as f64);
+
+        let lambda_response: LambdaSearchResponsePayload =
+            serde_json::from_slice(response_payload.as_ref())
+                .map_err(|e| SearchError::Internal(format!("json deserialization error: {}", e)))?;
+
+        let response_bytes = BASE64_STANDARD
+            .decode(&lambda_response.payload)
+            .map_err(|e| SearchError::Internal(format!("base64 decode error: {}", e)))?;
+
+        let leaf_responses = LambdaSearchResponses::decode(&response_bytes[..])
+            .map_err(|e| SearchError::Internal(format!("protobuf decode error: {}", e)))?;
+
+        debug!(
+            num_results = leaf_responses.split_results.len(),
+            "lambda invocation completed"
+        );
+
+        Ok(leaf_responses.split_results)
+    }
+}
diff --git a/quickwit/quickwit-lambda-client/src/lib.rs b/quickwit/quickwit-lambda-client/src/lib.rs
new file mode 100644
index 00000000000..aebf264df8c
--- /dev/null
+++ b/quickwit/quickwit-lambda-client/src/lib.rs
@@ -0,0 +1,37 @@
+// Copyright 2021-Present Datadog, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! AWS Lambda client for Quickwit leaf search operations.
+//!
+//! This crate provides:
+//! - An AWS Lambda implementation of the `LambdaLeafSearchInvoker` trait used by `quickwit-search`
+//! - Auto-deployment functionality for Lambda functions
+//!
+//! # Usage
+//!
+//! Use `try_get_or_deploy_invoker` to get an invoker that will automatically deploy
+//! the Lambda function if needed:
+//!
+//! ```ignore
+//! let invoker = try_get_or_deploy_invoker(&function_name, &deploy_config).await?;
+//! ```
+
+mod deploy;
+mod invoker;
+mod metrics;
+
+pub use deploy::try_get_or_deploy_invoker;
+pub use metrics::LAMBDA_METRICS;
+// Re-export payload types from server crate for convenience
+pub use quickwit_lambda_server::{LambdaSearchRequestPayload, LambdaSearchResponsePayload};
diff --git a/quickwit/quickwit-lambda-client/src/metrics.rs b/quickwit/quickwit-lambda-client/src/metrics.rs
new file mode 100644
index 00000000000..4325c9b3a95
--- /dev/null
+++ b/quickwit/quickwit-lambda-client/src/metrics.rs
@@ -0,0 +1,74 @@
+// Copyright 2021-Present Datadog, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// See https://prometheus.io/docs/practices/naming/
+
+use once_cell::sync::Lazy;
+use quickwit_common::metrics::{
+    Histogram, HistogramVec, IntCounterVec, exponential_buckets, new_counter_vec, new_histogram,
+    new_histogram_vec,
+};
+
+/// From 100ms to 73s seconds
+fn duration_buckets() -> Vec<f64> {
+    exponential_buckets(0.100, 3f64.sqrt(), 13).unwrap()
+}
+
+/// From 1KB to 16MB
+fn payload_size_buckets() -> Vec<f64> {
+    exponential_buckets(1024.0, 4.0, 8).unwrap()
+}
+
+pub struct LambdaMetrics {
+    pub leaf_search_requests_total: IntCounterVec<1>,
+    pub leaf_search_duration_seconds: HistogramVec<1>,
+    pub leaf_search_request_payload_size_bytes: Histogram,
+    pub leaf_search_response_payload_size_bytes: Histogram,
+}
+
+impl Default for LambdaMetrics {
+    fn default() -> Self {
+        LambdaMetrics {
+            leaf_search_requests_total: new_counter_vec(
+                "leaf_search_requests_total",
+                "Total number of Lambda leaf search invocations.",
+                "lambda",
+                &[],
+                ["status"],
+            ),
+            leaf_search_duration_seconds: new_histogram_vec(
+                "leaf_search_duration_seconds",
+                "Duration of Lambda leaf search invocations in seconds.",
+                "lambda",
+                &[],
+                ["status"],
+                duration_buckets(),
+            ),
+            leaf_search_request_payload_size_bytes: new_histogram(
+                "leaf_search_request_payload_size_bytes",
+                "Size of the request payload sent to Lambda in bytes.",
+                "lambda",
+                payload_size_buckets(),
+            ),
+            leaf_search_response_payload_size_bytes: new_histogram(
+                "leaf_search_response_payload_size_bytes",
+                "Size of the response payload received from Lambda in bytes.",
+                "lambda",
+                payload_size_buckets(),
+            ),
+        }
+    }
+}
+
+pub static LAMBDA_METRICS: Lazy<LambdaMetrics> = Lazy::new(LambdaMetrics::default);
diff --git a/quickwit/quickwit-lambda-server/Cargo.toml b/quickwit/quickwit-lambda-server/Cargo.toml
new file mode 100644
index 00000000000..e362c329e1f
--- /dev/null
+++ b/quickwit/quickwit-lambda-server/Cargo.toml
@@ -0,0 +1,54 @@
+[package]
+name = "quickwit-lambda-server"
+description = "AWS Lambda handler for Quickwit leaf search"
+
+version.workspace = true
+edition.workspace = true
+homepage.workspace = true
+documentation.workspace = true
+repository.workspace = true
+authors.workspace = true
+license.workspace = true
+
+[package.metadata.cargo-machete]
+# Its here even though it is not useful, in order to enable its "vendor" feature,
+# allowing the cross-build.
+ignored = ["openssl"]
+
+[dependencies]
+anyhow = { workspace = true }
+base64 = { workspace = true }
+bytesize = { workspace = true }
+lambda_runtime = { workspace = true }
+prost = { workspace = true }
+serde = { workspace = true }
+serde_json = { workspace = true }
+thiserror = { workspace = true }
+tokio = { workspace = true }
+tracing = { workspace = true }
+tracing-subscriber = { workspace = true, features = ["env-filter", "json"] }
+
+openssl = { workspace = true, optional = true }
+
+quickwit-common = { workspace = true }
+quickwit-config = { workspace = true }
+quickwit-doc-mapper = { workspace = true }
+quickwit-proto = { workspace = true }
+quickwit-search = { workspace = true }
+quickwit-storage = { workspace = true }
+
+[[bin]]
+name = "quickwit-aws-lambda-leaf-search"
+path = "src/bin/leaf_search.rs"
+
+[features]
+default = []
+testsuite = []
+
+# Keep this in sync with quickwit-cli!
+lambda-release = [
+    # The vendored OpenSSL will be compiled from source during the
+    # build, avoiding the pkg-config dependency issue during
+    # cross-compilation.
+    "openssl/vendored",
+]
diff --git a/quickwit/quickwit-lambda-server/src/bin/leaf_search.rs b/quickwit/quickwit-lambda-server/src/bin/leaf_search.rs
new file mode 100644
index 00000000000..0ccdbc741e0
--- /dev/null
+++ b/quickwit/quickwit-lambda-server/src/bin/leaf_search.rs
@@ -0,0 +1,52 @@
+// Copyright 2021-Present Datadog, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! AWS Lambda binary entry point for Quickwit leaf search.
+
+use std::sync::Arc;
+
+use lambda_runtime::{Error, LambdaEvent, service_fn};
+use quickwit_lambda_server::{
+    LambdaSearchRequestPayload, LambdaSearcherContext, handle_leaf_search,
+};
+use tracing::info;
+use tracing_subscriber::EnvFilter;
+
+#[tokio::main]
+async fn main() -> Result<(), Error> {
+    // Initialize tracing with JSON output for CloudWatch
+    tracing_subscriber::fmt()
+        .with_env_filter(EnvFilter::from_default_env())
+        .json()
+        .init();
+
+    // Initialize context on cold start (wrapped in Arc for sharing across invocations)
+    let context = Arc::new(LambdaSearcherContext::try_from_env()?);
+
+    info!("lambda context initialized, starting handler loop");
+
+    // Run the Lambda handler
+    lambda_runtime::run(service_fn(
+        |event: LambdaEvent<LambdaSearchRequestPayload>| {
+            let ctx = Arc::clone(&context);
+            async move {
+                let (payload, _event_ctx) = event.into_parts();
+                handle_leaf_search(payload, &ctx)
+                    .await
+                    .map_err(|e| lambda_runtime::Error::from(e.to_string()))
+            }
+        },
+    ))
+    .await
+}
diff --git a/quickwit/quickwit-lambda-server/src/context.rs b/quickwit/quickwit-lambda-server/src/context.rs
new file mode 100644
index 00000000000..879572e5f8d
--- /dev/null
+++ b/quickwit/quickwit-lambda-server/src/context.rs
@@ -0,0 +1,69 @@
+// Copyright 2021-Present Datadog, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Arc;
+
+use anyhow::Context as _;
+use bytesize::ByteSize;
+use quickwit_config::{CacheConfig, SearcherConfig};
+use quickwit_search::SearcherContext;
+use quickwit_storage::StorageResolver;
+use tracing::info;
+
+/// Lambda-specific searcher context that holds resources for search execution.
+pub struct LambdaSearcherContext {
+    pub searcher_context: Arc<SearcherContext>,
+    pub storage_resolver: StorageResolver,
+}
+
+impl LambdaSearcherContext {
+    /// Create a new Lambda searcher context from environment variables.
+    pub fn try_from_env() -> anyhow::Result<Self> {
+        info!("initializing lambda searcher context");
+
+        let searcher_config = try_searcher_config_from_env()?;
+        let searcher_context = Arc::new(SearcherContext::new(searcher_config, None, None));
+        let storage_resolver = StorageResolver::configured(&Default::default());
+
+        Ok(Self {
+            searcher_context,
+            storage_resolver,
+        })
+    }
+}
+
+/// Create a Lambda-optimized searcher config based on the `AWS_LAMBDA_FUNCTION_MEMORY_SIZE`
+/// environment variable.
+fn try_searcher_config_from_env() -> anyhow::Result<SearcherConfig> {
+    let lambda_memory_mib: u64 = quickwit_common::get_from_env_opt(
+        "AWS_LAMBDA_FUNCTION_MEMORY_SIZE",
+        /* sensitive */ false,
+    )
+    .context("could not get aws lambda function memory size from ENV")?;
+    let lambda_memory = ByteSize::mib(lambda_memory_mib);
+    anyhow::ensure!(
+        lambda_memory >= ByteSize::gib(1u64),
+        "lambda memory must be at least 1GB"
+    );
+    let warmup_memory_budget = ByteSize::b(lambda_memory.as_u64() - ByteSize::mib(500).as_u64());
+
+    let mut searcher_config = SearcherConfig::default();
+    searcher_config.max_num_concurrent_split_searches = 20;
+    searcher_config.warmup_memory_budget = warmup_memory_budget;
+    searcher_config.fast_field_cache = CacheConfig::no_cache();
+    searcher_config.split_footer_cache = CacheConfig::no_cache();
+    searcher_config.predicate_cache = CacheConfig::no_cache();
+    searcher_config.partial_request_cache = CacheConfig::no_cache();
+    Ok(searcher_config)
+}
diff --git a/quickwit/quickwit-lambda-server/src/error.rs b/quickwit/quickwit-lambda-server/src/error.rs
new file mode 100644
index 00000000000..7f60f33b9b8
--- /dev/null
+++ b/quickwit/quickwit-lambda-server/src/error.rs
@@ -0,0 +1,69 @@
+// Copyright 2021-Present Datadog, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use quickwit_search::SearchError;
+use thiserror::Error;
+
+/// Result type for Lambda operations.
+pub type LambdaResult<T> = Result<T, LambdaError>;
+
+/// Errors that can occur during Lambda handler operations.
+#[derive(Debug, Error)]
+pub enum LambdaError {
+    /// Error serializing/deserializing protobuf.
+    #[error("serialization error: {0}")]
+    Serialization(String),
+    /// Error from the search operation.
+    #[error("search error: {0}")]
+    Search(#[from] SearchError),
+    /// Internal error.
+    #[error("internal error: {0}")]
+    Internal(String),
+    /// Task was cancelled.
+    #[error("cancelled")]
+    Cancelled,
+}
+
+impl From<prost::DecodeError> for LambdaError {
+    fn from(err: prost::DecodeError) -> Self {
+        LambdaError::Serialization(format!("protobuf decode error: {}", err))
+    }
+}
+
+impl From<prost::EncodeError> for LambdaError {
+    fn from(err: prost::EncodeError) -> Self {
+        LambdaError::Serialization(format!("protobuf encode error: {}", err))
+    }
+}
+
+impl From<base64::DecodeError> for LambdaError {
+    fn from(err: base64::DecodeError) -> Self {
+        LambdaError::Serialization(format!("base64 decode error: {}", err))
+    }
+}
+
+impl From<serde_json::Error> for LambdaError {
+    fn from(err: serde_json::Error) -> Self {
+        LambdaError::Serialization(format!("json error: {}", err))
+    }
+}
+
+impl From<LambdaError> for SearchError {
+    fn from(err: LambdaError) -> Self {
+        match err {
+            LambdaError::Search(search_err) => search_err,
+            other => SearchError::Internal(other.to_string()),
+        }
+    }
+}
diff --git a/quickwit/quickwit-lambda-server/src/handler.rs b/quickwit/quickwit-lambda-server/src/handler.rs
new file mode 100644
index 00000000000..e0467af21ef
--- /dev/null
+++ b/quickwit/quickwit-lambda-server/src/handler.rs
@@ -0,0 +1,200 @@
+// Copyright 2021-Present Datadog, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::str::FromStr;
+use std::sync::Arc;
+
+use base64::prelude::*;
+use prost::Message;
+use quickwit_common::uri::Uri;
+use quickwit_doc_mapper::DocMapper;
+use quickwit_proto::search::lambda_single_split_result::Outcome;
+use quickwit_proto::search::{
+    LambdaSearchResponses, LambdaSingleSplitResult, LeafRequestRef, LeafSearchRequest,
+    SearchRequest,
+};
+use quickwit_search::leaf::single_doc_mapping_leaf_search;
+use quickwit_storage::Storage;
+use serde::{Deserialize, Serialize};
+use tracing::{error, info, instrument, warn};
+
+use crate::context::LambdaSearcherContext;
+use crate::error::{LambdaError, LambdaResult};
+
+/// Payload for leaf search Lambda invocation.
+#[derive(Debug, Serialize, Deserialize)]
+pub struct LambdaSearchRequestPayload {
+    /// Base64-encoded serialized LeafSearchRequest protobuf.
+    pub payload: String,
+}
+
+/// Response from leaf search Lambda invocation.
+#[derive(Debug, Serialize, Deserialize)]
+pub struct LambdaSearchResponsePayload {
+    /// Base64-encoded serialized `LambdaSearchResponses` protobuf (one per split).
+    pub payload: String,
+}
+
+/// Handle a leaf search request in Lambda.
+///
+/// Returns one `LambdaSingleSplitResult` per split, each tagged with its
+/// split_id. Individual split failures are reported per-split rather than
+/// failing the entire invocation, so the caller can retry only failed splits.
+#[instrument(skip(ctx), fields(request_id))]
+pub async fn handle_leaf_search(
+    event: LambdaSearchRequestPayload,
+    ctx: &LambdaSearcherContext,
+) -> LambdaResult<LambdaSearchResponsePayload> {
+    // Decode base64 payload
+    let request_bytes: Vec<u8> = BASE64_STANDARD
+        .decode(&event.payload)
+        .map_err(|err| LambdaError::Serialization(format!("base64 decode error: {}", err)))?;
+
+    // Deserialize LeafSearchRequest
+    let leaf_search_request = LeafSearchRequest::decode(&request_bytes[..])?;
+
+    // Unpack the shared fields once instead of cloning per split.
+    let search_request: Arc<SearchRequest> = leaf_search_request
+        .search_request
+        .ok_or_else(|| LambdaError::Internal("no search request".to_string()))?
+        .into();
+
+    let doc_mappers: Vec<Arc<DocMapper>> = leaf_search_request
+        .doc_mappers
+        .iter()
+        .map(String::as_str)
+        .map(serde_json::from_str::<Arc<DocMapper>>)
+        .collect::<Result<Vec<_>, _>>()
+        .map_err(|err| {
+            LambdaError::Internal(format!("failed to deserialize doc mapper: `{err}`"))
+        })?;
+
+    // Resolve storage for every index URI upfront.
+    let mut storages: Vec<Arc<dyn quickwit_storage::Storage>> =
+        Vec::with_capacity(leaf_search_request.index_uris.len());
+    for uri_str in &leaf_search_request.index_uris {
+        let uri = Uri::from_str(uri_str)
+            .map_err(|err| LambdaError::Internal(format!("invalid index uri: {err}")))?;
+        let storage =
+            ctx.storage_resolver.resolve(&uri).await.map_err(|err| {
+                LambdaError::Internal(format!("failed to resolve storage: {err}"))
+            })?;
+        storages.push(storage);
+    }
+
+    let split_results: Vec<LambdaSingleSplitResult> = lambda_leaf_search(
+        search_request,
+        leaf_search_request.leaf_requests,
+        &doc_mappers[..],
+        &storages[..],
+        ctx,
+    )
+    .await?;
+    let wrapper = LambdaSearchResponses { split_results };
+    let response_bytes = wrapper.encode_to_vec();
+    let payload = BASE64_STANDARD.encode(&response_bytes);
+
+    Ok(LambdaSearchResponsePayload { payload })
+}
+
+/// Lambda leaf search returns individual split results.
+async fn lambda_leaf_search(
+    search_request: Arc<SearchRequest>,
+    leaf_req_ref: Vec<LeafRequestRef>,
+    doc_mappers: &[Arc<DocMapper>],
+    storages: &[Arc<dyn Storage>],
+    ctx: &LambdaSearcherContext,
+) -> LambdaResult<Vec<LambdaSingleSplitResult>> {
+    // Flatten leaf_requests into per-split tasks using pre-resolved Arc references.
+    let mut split_search_joinset: tokio::task::JoinSet<(String, Result<_, String>)> =
+        tokio::task::JoinSet::new();
+
+    for leaf_req in leaf_req_ref {
+        let doc_mapper = doc_mappers
+            .get(leaf_req.doc_mapper_ord as usize)
+            .ok_or_else(|| {
+                LambdaError::Internal(format!(
+                    "doc_mapper_ord out of bounds: {}",
+                    leaf_req.doc_mapper_ord
+                ))
+            })?
+            .clone();
+        let storage = storages[leaf_req.index_uri_ord as usize].clone();
+
+        for split_id_and_footer_offsets in leaf_req.split_offsets {
+            let split_id = split_id_and_footer_offsets.split_id.clone();
+            let searcher_context = ctx.searcher_context.clone();
+            let search_request = search_request.clone();
+            let doc_mapper = doc_mapper.clone();
+            let storage = storage.clone();
+            let split = split_id_and_footer_offsets.clone();
+            split_search_joinset.spawn(async move {
+                let result = single_doc_mapping_leaf_search(
+                    searcher_context,
+                    search_request,
+                    storage,
+                    vec![split],
+                    doc_mapper,
+                )
+                .await
+                .map_err(|err| format!("{err}"));
+                (split_id, result)
+            });
+        }
+    }
+
+    let num_splits = split_search_joinset.len();
+    info!(num_splits, "processing leaf search request (per-split)");
+
+    // Collect results. Order is irrelevant: each result is tagged with its split_id.
+    let mut split_results: Vec<LambdaSingleSplitResult> = Vec::with_capacity(num_splits);
+    let mut num_successes: usize = 0;
+    let mut num_failures: usize = 0;
+    while let Some(join_result) = split_search_joinset.join_next().await {
+        match join_result {
+            Ok((split_id, Ok(response))) => {
+                num_successes += 1;
+                split_results.push(LambdaSingleSplitResult {
+                    split_id,
+                    outcome: Some(Outcome::Response(response)),
+                });
+            }
+            Ok((split_id, Err(error_msg))) => {
+                num_failures += 1;
+                warn!(split_id = %split_id, error = %error_msg, "split search failed");
+                split_results.push(LambdaSingleSplitResult {
+                    split_id,
+                    outcome: Some(Outcome::Error(error_msg)),
+                });
+            }
+            Err(join_error) if join_error.is_cancelled() => {
+                warn!("search task was cancelled");
+                return Err(LambdaError::Cancelled);
+            }
+            Err(join_error) => {
+                // Panics lose the captured split_id, so we fail the entire invocation.
+                error!(error = %join_error, "search task panicked");
+                return Err(LambdaError::Internal(format!(
+                    "search task panicked: {join_error}"
+                )));
+            }
+        }
+    }
+    info!(
+        num_successes,
+        num_failures, "leaf search completed (per-split)"
+    );
+
+    Ok(split_results)
+}
diff --git a/quickwit/quickwit-lambda-server/src/lib.rs b/quickwit/quickwit-lambda-server/src/lib.rs
new file mode 100644
index 00000000000..da2519b3b33
--- /dev/null
+++ b/quickwit/quickwit-lambda-server/src/lib.rs
@@ -0,0 +1,26 @@
+// Copyright 2021-Present Datadog, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! AWS Lambda handler for Quickwit leaf search operations.
+//!
+//! This crate provides the Lambda handler that executes leaf search requests.
+//! It is designed to be deployed as an AWS Lambda function.
+
+mod context;
+mod error;
+mod handler;
+
+pub use context::LambdaSearcherContext;
+pub use error::{LambdaError, LambdaResult};
+pub use handler::{LambdaSearchRequestPayload, LambdaSearchResponsePayload, handle_leaf_search};
diff --git a/quickwit/quickwit-lambda/README.md b/quickwit/quickwit-lambda/README.md
deleted file mode 100644
index 88fa9c8748a..00000000000
--- a/quickwit/quickwit-lambda/README.md
+++ /dev/null
@@ -1,4 +0,0 @@
-# Deprecation
-
-This package was removed in Q3 2025. The maintenance burden was high and the
-feature was unused.
\ No newline at end of file
diff --git a/quickwit/quickwit-proto/protos/quickwit/search.proto b/quickwit/quickwit-proto/protos/quickwit/search.proto
index 12443855239..ae3442fe1aa 100644
--- a/quickwit/quickwit-proto/protos/quickwit/search.proto
+++ b/quickwit/quickwit-proto/protos/quickwit/search.proto
@@ -333,8 +333,7 @@ message SplitSearchError {
   bool retryable_error = 3;
 }
 
-/// A LeafSearchRequest can span multiple indices.
-///
+// A LeafSearchRequest can span multiple indices.
 message LeafSearchRequest {
   // Search request. This is a perfect copy of the original search request
   // that was sent to root apart from the start_offset, max_hits params and index_id_patterns.
@@ -361,7 +360,7 @@ message ResourceStats {
     uint64 cpu_microsecs = 5;
 }
 
-/// LeafRequestRef references data in LeafSearchRequest to deduplicate data.
+// LeafRequestRef references data in LeafSearchRequest to deduplicate data.
 message LeafRequestRef {
   // The ordinal of the doc_mapper in `LeafSearchRequest.doc_mappers`
   uint32 doc_mapper_ord = 1;
@@ -497,6 +496,25 @@ message LeafSearchResponse {
   ResourceStats resource_stats = 8;
 }
 
+// The result of searching a single split in a Lambda invocation.
+// Each result is tagged with its split_id so that ordering is irrelevant.
+message LambdaSingleSplitResult {
+  // The split that was searched.
+  string split_id = 1;
+  oneof outcome {
+    // On success, the leaf search response for this split.
+    LeafSearchResponse response = 2;
+    // On failure, the error message.
+    string error = 3;
+  }
+}
+
+// Wrapper for per-split results from a Lambda invocation.
+message LambdaSearchResponses {
+  reserved 1; // was: repeated LeafSearchResponse responses
+  repeated LambdaSingleSplitResult split_results = 2;
+}
+
 message SnippetRequest {
   repeated string snippet_fields = 1;
   string query_ast_resolved = 2;
diff --git a/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.search.rs b/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.search.rs
index 191110365f5..1e933055cd3 100644
--- a/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.search.rs
+++ b/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.search.rs
@@ -256,8 +256,7 @@ pub struct SplitSearchError {
     #[prost(bool, tag = "3")]
     pub retryable_error: bool,
 }
-/// / A LeafSearchRequest can span multiple indices.
-/// /
+/// A LeafSearchRequest can span multiple indices.
 #[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)]
 #[derive(Clone, PartialEq, ::prost::Message)]
 pub struct LeafSearchRequest {
@@ -292,7 +291,7 @@ pub struct ResourceStats {
     #[prost(uint64, tag = "5")]
     pub cpu_microsecs: u64,
 }
-/// / LeafRequestRef references data in LeafSearchRequest to deduplicate data.
+/// LeafRequestRef references data in LeafSearchRequest to deduplicate data.
 #[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)]
 #[derive(Clone, PartialEq, ::prost::Message)]
 pub struct LeafRequestRef {
@@ -460,6 +459,38 @@ pub struct LeafSearchResponse {
     #[prost(message, optional, tag = "8")]
     pub resource_stats: ::core::option::Option<ResourceStats>,
 }
+/// The result of searching a single split in a Lambda invocation.
+/// Each result is tagged with its split_id so that ordering is irrelevant.
+#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct LambdaSingleSplitResult {
+    /// The split that was searched.
+    #[prost(string, tag = "1")]
+    pub split_id: ::prost::alloc::string::String,
+    #[prost(oneof = "lambda_single_split_result::Outcome", tags = "2, 3")]
+    pub outcome: ::core::option::Option<lambda_single_split_result::Outcome>,
+}
+/// Nested message and enum types in `LambdaSingleSplitResult`.
+pub mod lambda_single_split_result {
+    #[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)]
+    #[serde(rename_all = "snake_case")]
+    #[derive(Clone, PartialEq, ::prost::Oneof)]
+    pub enum Outcome {
+        /// On success, the leaf search response for this split.
+        #[prost(message, tag = "2")]
+        Response(super::LeafSearchResponse),
+        /// On failure, the error message.
+        #[prost(string, tag = "3")]
+        Error(::prost::alloc::string::String),
+    }
+}
+/// Wrapper for per-split results from a Lambda invocation.
+#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct LambdaSearchResponses {
+    #[prost(message, repeated, tag = "2")]
+    pub split_results: ::prost::alloc::vec::Vec<LambdaSingleSplitResult>,
+}
 #[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)]
 #[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct SnippetRequest {
diff --git a/quickwit/quickwit-proto/src/error.rs b/quickwit/quickwit-proto/src/error.rs
index b03536d0283..aa3905135a1 100644
--- a/quickwit/quickwit-proto/src/error.rs
+++ b/quickwit/quickwit-proto/src/error.rs
@@ -97,7 +97,7 @@ where E: ServiceError
 }
 
 /// A trait for encoding/decoding service errors to/from gRPC statuses. Errors are stored in JSON
-/// in the gRPC header [`QW_ERROR_HEADER_NAME`]. This allows for propagating them transparently
+/// in the gRPC header `qw-error-bin`. This allows for propagating them transparently
 /// between clients and servers over the network without being semantically limited to a status code
 /// and a message. However, it also means that modifying the serialization format of existing errors
 /// or introducing new ones is not backward compatible.
diff --git a/quickwit/quickwit-query/src/query_ast/range_query.rs b/quickwit/quickwit-query/src/query_ast/range_query.rs
index 3f5c4471ae4..346f02feed3 100644
--- a/quickwit/quickwit-query/src/query_ast/range_query.rs
+++ b/quickwit/quickwit-query/src/query_ast/range_query.rs
@@ -181,8 +181,8 @@ impl BuildTantivyAst for RangeQuery {
                     convert_bounds(&self.lower_bound, &self.upper_bound, field_entry.name())?;
                 let truncate_datetime =
                     |date: &DateTime| date.truncate(date_options.get_precision());
-                let lower_bound = map_bound(&lower_bound, truncate_datetime);
-                let upper_bound = map_bound(&upper_bound, truncate_datetime);
+                let lower_bound = lower_bound.as_ref().map(truncate_datetime);
+                let upper_bound = upper_bound.as_ref().map(truncate_datetime);
                 FastFieldRangeQuery::new(
                     lower_bound.map(|val| Term::from_field_date(field, val)),
                     upper_bound.map(|val| Term::from_field_date(field, val)),
@@ -277,14 +277,6 @@ impl BuildTantivyAst for RangeQuery {
     }
 }
 
-fn map_bound<TFrom, TTo>(bound: &Bound<TFrom>, transform: impl Fn(&TFrom) -> TTo) -> Bound<TTo> {
-    match bound {
-        Bound::Excluded(from_val) => Bound::Excluded(transform(from_val)),
-        Bound::Included(from_val) => Bound::Included(transform(from_val)),
-        Bound::Unbounded => Bound::Unbounded,
-    }
-}
-
 #[cfg(test)]
 mod tests {
     use std::ops::Bound;
diff --git a/quickwit/quickwit-search/src/invoker.rs b/quickwit/quickwit-search/src/invoker.rs
new file mode 100644
index 00000000000..8a5ad183b63
--- /dev/null
+++ b/quickwit/quickwit-search/src/invoker.rs
@@ -0,0 +1,38 @@
+// Copyright 2021-Present Datadog, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Trait for invoking remote serverless functions for leaf search.
+
+use async_trait::async_trait;
+use quickwit_proto::search::{LambdaSingleSplitResult, LeafSearchRequest};
+
+use crate::SearchError;
+
+/// Trait for invoking remote serverless functions (e.g., AWS Lambda) for leaf search.
+///
+/// This abstraction allows different cloud providers to be supported.
+/// Implementations are provided by the `quickwit-lambda` crate.
+#[async_trait]
+pub trait LambdaLeafSearchInvoker: Send + Sync + 'static {
+    /// Invoke the remote function with a LeafSearchRequest.
+    ///
+    /// Returns one `LambdaSingleSplitResult` per split in the request.
+    /// Each result is tagged with its split_id so ordering is irrelevant.
+    /// Individual split failures are reported per-split; the outer `Result`
+    /// only represents transport-level errors.
+    async fn invoke_leaf_search(
+        &self,
+        request: LeafSearchRequest,
+    ) -> Result<Vec<LambdaSingleSplitResult>, SearchError>;
+}
diff --git a/quickwit/quickwit-search/src/leaf.rs b/quickwit/quickwit-search/src/leaf.rs
index 4caf2587909..37f99a36241 100644
--- a/quickwit/quickwit-search/src/leaf.rs
+++ b/quickwit/quickwit-search/src/leaf.rs
@@ -12,7 +12,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use std::collections::{HashMap, HashSet};
+use std::cmp::Reverse;
+use std::collections::binary_heap::PeekMut;
+use std::collections::{BinaryHeap, HashMap, HashSet};
+use std::num::NonZeroUsize;
 use std::ops::Bound;
 use std::path::PathBuf;
 use std::str::FromStr;
@@ -23,8 +26,10 @@ use anyhow::Context;
 use bytesize::ByteSize;
 use futures::future::try_join_all;
 use quickwit_common::pretty::PrettySample;
+use quickwit_common::uri::Uri;
 use quickwit_directories::{CachingDirectory, HotDirectory, StorageDirectory};
 use quickwit_doc_mapper::{Automaton, DocMapper, FastFieldWarmupInfo, TermRange, WarmupInfo};
+use quickwit_proto::search::lambda_single_split_result::Outcome;
 use quickwit_proto::search::{
     CountHits, LeafSearchRequest, LeafSearchResponse, PartialHit, ResourceStats, SearchRequest,
     SortOrder, SortValue, SplitIdAndFooterOffsets, SplitSearchError,
@@ -37,8 +42,8 @@ use quickwit_storage::{
     BundleStorage, ByteRangeCache, MemorySizedCache, OwnedBytes, SplitCache, Storage,
     StorageResolver, TimeoutAndRetryStorage, wrap_storage_with_cache,
 };
+use tantivy::aggregation::AggContextParams;
 use tantivy::aggregation::agg_req::{AggregationVariants, Aggregations};
-use tantivy::aggregation::{AggContextParams, AggregationLimitsGuard};
 use tantivy::collector::Collector;
 use tantivy::directory::FileSlice;
 use tantivy::fastfield::FastFieldReaders;
@@ -48,12 +53,68 @@ use tokio::task::{JoinError, JoinSet};
 use tracing::*;
 
 use crate::collector::{IncrementalCollector, make_collector_for_split, make_merge_collector};
+use crate::leaf_cache::LeafSearchCache;
 use crate::metrics::SplitSearchOutcomeCounters;
 use crate::root::is_metadata_count_request_with_ast;
-use crate::search_permit_provider::{SearchPermit, compute_initial_memory_allocation};
+use crate::search_permit_provider::{
+    SearchPermit, SearchPermitFuture, compute_initial_memory_allocation,
+};
 use crate::service::{SearcherContext, deserialize_doc_mapper};
 use crate::{QuickwitAggregations, SearchError};
 
+/// Distributes items across batches using a greedy LPT (Longest Processing Time)
+/// algorithm to balance total weight across batches.
+///
+/// Items are sorted by weight descending, then each item is assigned to the
+/// batch with the smallest current total weight. This produces a good
+/// approximation of balanced batches.
+fn greedy_batch_split<T>(
+    items: Vec<T>,
+    weight_fn: impl Fn(&T) -> u64,
+    max_items_per_batch: NonZeroUsize,
+) -> Vec<Vec<T>> {
+    if items.is_empty() {
+        return Vec::new();
+    }
+
+    let num_items = items.len();
+    let max_items_per_batch: usize = max_items_per_batch.get();
+    let num_batches = num_items.div_ceil(max_items_per_batch);
+
+    // Compute weights, then sort descending by weight
+    let mut weighted_items: Vec<(u64, T)> = Vec::with_capacity(num_items);
+    for item in items {
+        let weight = weight_fn(&item);
+        weighted_items.push((weight, item));
+    }
+    weighted_items.sort_unstable_by_key(|(weight, _)| std::cmp::Reverse(*weight));
+
+    let mut batches: Vec<Vec<T>> = std::iter::repeat_with(Vec::new).take(num_batches).collect();
+
+    // Min-heap of (weight, item_count, batch_index).
+    // Reverse turns BinaryHeap into a min-heap.
+    // Ties break naturally: lighter weight → fewer items → lower index.
+    let mut heap: BinaryHeap<Reverse<(u64, usize, usize)>> = BinaryHeap::with_capacity(num_batches);
+    for batch_idx in 0..num_batches {
+        heap.push(Reverse((0, 0, batch_idx)));
+    }
+
+    // Greedily assign each item to the lightest batch.
+    // Full batches are removed via PeekMut::pop().
+    for (weight, item) in weighted_items {
+        let mut top = heap.peek_mut().unwrap();
+        let Reverse((ref mut batch_weight, ref mut batch_count, batch_idx)) = *top;
+        batches[batch_idx].push(item);
+        *batch_weight += weight;
+        *batch_count += 1;
+        if *batch_count >= max_items_per_batch {
+            PeekMut::pop(top);
+        }
+    }
+
+    batches
+}
+
 async fn get_split_footer_from_cache_or_fetch(
     index_storage: Arc<dyn Storage>,
     split_and_footer_offsets: &SplitIdAndFooterOffsets,
@@ -435,21 +496,25 @@ fn compute_index_size(hot_directory: &HotDirectory) -> ByteSize {
 /// Apply a leaf search on a single split.
 #[allow(clippy::too_many_arguments)]
 async fn leaf_search_single_split(
-    mut search_request: SearchRequest,
+    search_request: SearchRequest,
     ctx: Arc<LeafSearchContext>,
     storage: Arc<dyn Storage>,
     split: SplitIdAndFooterOffsets,
-    aggregations_limits: AggregationLimitsGuard,
     search_permit: &mut SearchPermit,
 ) -> crate::Result<Option<LeafSearchResponse>> {
     let mut leaf_search_state_guard =
         SplitSearchStateGuard::new(ctx.split_outcome_counters.clone());
 
-    rewrite_request(
-        &mut search_request,
-        &split,
-        ctx.doc_mapper.timestamp_field_name(),
-    );
+    // We already checked if the result was already in the partial result cache,
+    // but it's not a bad idea to check again.
+    if let Some(cached_answer) = ctx
+        .searcher_context
+        .leaf_search_cache
+        .get(split.clone(), search_request.clone())
+    {
+        leaf_search_state_guard.set_state(SplitSearchState::CacheHit);
+        return Ok(Some(cached_answer));
+    }
 
     let query_ast: QueryAst = serde_json::from_str(search_request.query_ast.as_str())
         .map_err(|err| SearchError::InvalidQuery(err.to_string()))?;
@@ -457,21 +522,11 @@ async fn leaf_search_single_split(
     // CanSplitDoBetter or rewrite_request may have changed the request to be a count only request
     // This may be the case for AllQuery with a sort by date and time filter, where the current
     // split can't have better results.
-    //
     if is_metadata_count_request_with_ast(&query_ast, &search_request) {
         leaf_search_state_guard.set_state(SplitSearchState::PrunedBeforeWarmup);
         return Ok(Some(get_leaf_resp_from_count(split.num_docs)));
     }
 
-    if let Some(cached_answer) = ctx
-        .searcher_context
-        .leaf_search_cache
-        .get(split.clone(), search_request.clone())
-    {
-        leaf_search_state_guard.set_state(SplitSearchState::CacheHit);
-        return Ok(Some(cached_answer));
-    }
-
     let split_id = split.split_id.to_string();
     let byte_range_cache =
         ByteRangeCache::with_infinite_capacity(&quickwit_storage::STORAGE_METRICS.shortlived_cache);
@@ -489,14 +544,14 @@ async fn leaf_search_single_split(
         search_permit.update_memory_usage(index_size);
     }
 
-    let reader = index
+    let searcher = index
         .reader_builder()
         .reload_policy(ReloadPolicy::Manual)
-        .try_into()?;
-    let searcher = reader.searcher();
+        .try_into()?
+        .searcher();
 
     let agg_context_params = AggContextParams {
-        limits: aggregations_limits,
+        limits: ctx.searcher_context.get_aggregation_limits(),
         tokenizers: ctx.doc_mapper.tokenizer_manager().tantivy_manager().clone(),
     };
     let mut collector =
@@ -595,16 +650,14 @@ async fn leaf_search_single_split(
             })??;
 
     // Let's cache this result in the partial result cache.
-    if let Some((leaf_search_req, leaf_search_resp)) = search_request_and_result {
-        ctx.searcher_context.leaf_search_cache.put(
-            split,
-            leaf_search_req,
-            leaf_search_resp.clone(),
-        );
-        Ok(Some(leaf_search_resp))
-    } else {
-        Ok(None)
-    }
+    let Some((leaf_search_req, leaf_search_resp)) = search_request_and_result else {
+        return Ok(None);
+    };
+    // We save our result in the cache.
+    ctx.searcher_context
+        .leaf_search_cache
+        .put(split, leaf_search_req, leaf_search_resp.clone());
+    Ok(Some(leaf_search_resp))
 }
 
 /// Rewrite a request removing parts which incur additional download or computation with no
@@ -685,16 +738,6 @@ fn visit_aggregation_mut(
     modified_something
 }
 
-// equivalent to Bound::map, which is unstable
-pub fn map_bound<T, U>(bound: Bound<T>, f: impl FnOnce(T) -> U) -> Bound<U> {
-    use Bound::*;
-    match bound {
-        Unbounded => Unbounded,
-        Included(x) => Included(f(x)),
-        Excluded(x) => Excluded(f(x)),
-    }
-}
-
 // returns the max of left and right, that isn't unbounded. Useful for making
 // the intersection of lower bound of ranges
 fn max_bound<T: Ord + Copy>(left: Bound<T>, right: Bound<T>) -> Bound<T> {
@@ -827,12 +870,8 @@ fn remove_redundant_timestamp_range(
     if final_start_timestamp != Bound::Unbounded || final_end_timestamp != Bound::Unbounded {
         let range = RangeQuery {
             field: timestamp_field.to_string(),
-            lower_bound: map_bound(final_start_timestamp, |bound| {
-                bound.into_timestamp_nanos().into()
-            }),
-            upper_bound: map_bound(final_end_timestamp, |bound| {
-                bound.into_timestamp_nanos().into()
-            }),
+            lower_bound: final_start_timestamp.map(|bound| bound.into_timestamp_nanos().into()),
+            upper_bound: final_end_timestamp.map(|bound| bound.into_timestamp_nanos().into()),
         };
         new_ast = if let QueryAst::Bool(mut bool_query) = new_ast {
             if bool_query.must.is_empty()
@@ -1061,12 +1100,12 @@ impl CanSplitDoBetter {
     /// Returns the search_requests with their split.
     fn optimize(
         &self,
-        request: Arc<SearchRequest>,
+        request: &SearchRequest,
         mut splits: Vec<SplitIdAndFooterOffsets>,
     ) -> Result<Vec<(SplitIdAndFooterOffsets, SearchRequest)>, SearchError> {
         self.optimize_split_order(&mut splits);
 
-        if !is_simple_all_query(&request) {
+        if !is_simple_all_query(request) {
             // no optimization opportunity here.
             return Ok(splits
                 .into_iter()
@@ -1211,7 +1250,7 @@ impl CanSplitDoBetter {
 pub async fn multi_index_leaf_search(
     searcher_context: Arc<SearcherContext>,
     leaf_search_request: LeafSearchRequest,
-    storage_resolver: &StorageResolver,
+    storage_resolver: StorageResolver,
 ) -> Result<LeafSearchResponse, SearchError> {
     let search_request: Arc<SearchRequest> = leaf_search_request
         .search_request
@@ -1223,8 +1262,7 @@ pub async fn multi_index_leaf_search(
         .iter()
         .map(|doc_mapper| deserialize_doc_mapper(doc_mapper))
         .collect::<crate::Result<_>>()?;
-    // Creates a collector which merges responses into one
-    let aggregation_limits = searcher_context.get_aggregation_limits();
+
     // TODO: to avoid lockstep, we should pull up the future creation over the list of split ids
     // and have the semaphore on this level.
     // This will lower resource consumption due to less in-flight futures and avoid contention.
@@ -1255,11 +1293,11 @@ pub async fn multi_index_leaf_search(
             })?
             .clone();
 
+        let storage_resolver = storage_resolver.clone();
+        let searcher_context = searcher_context.clone();
+        let search_request = search_request.clone();
+
         leaf_request_futures.spawn({
-            let storage_resolver = storage_resolver.clone();
-            let searcher_context = searcher_context.clone();
-            let search_request = search_request.clone();
-            let aggregation_limits = aggregation_limits.clone();
             async move {
                 let storage = storage_resolver.resolve(&index_uri).await?;
                 single_doc_mapping_leaf_search(
@@ -1268,7 +1306,6 @@ pub async fn multi_index_leaf_search(
                     storage,
                     leaf_search_request_ref.split_offsets,
                     doc_mapper,
-                    aggregation_limits,
                 )
                 .in_current_span()
                 .await
@@ -1276,8 +1313,11 @@ pub async fn multi_index_leaf_search(
         });
     }
 
-    let merge_collector = make_merge_collector(&search_request, aggregation_limits)?;
+    // Creates a collector which merges responses into one
+    let merge_collector =
+        make_merge_collector(&search_request, searcher_context.get_aggregation_limits())?;
     let mut incremental_merge_collector = IncrementalCollector::new(merge_collector);
+
     while let Some(leaf_response_join_result) = leaf_request_futures.join_next().await {
         // abort the search on join errors
         let leaf_response_result = leaf_response_join_result?;
@@ -1352,9 +1392,213 @@ fn disable_search_request_hits(search_request: &mut SearchRequest) {
 }
 
 /// Searches multiple splits for a specific index and a single doc mapping
-///
+/// Offloads splits to Lambda invocations, distributing them across batches
+/// balanced by document count. Each batch is invoked independently; a failure
+/// in one batch does not affect others.
+async fn run_offloaded_search_tasks(
+    searcher_context: &SearcherContext,
+    search_request: &SearchRequest,
+    doc_mapper: &DocMapper,
+    index_uri: Uri,
+    splits_with_requests: Vec<(SplitIdAndFooterOffsets, SearchRequest)>,
+    incremental_merge_collector: &Mutex<IncrementalCollector>,
+) -> Result<(), SearchError> {
+    if splits_with_requests.is_empty() {
+        return Ok(());
+    }
+
+    info!(
+        num_offloaded_splits = splits_with_requests.len(),
+        "offloading to lambda"
+    );
+
+    let lambda_invoker = searcher_context.lambda_invoker.as_ref().expect(
+        "did not receive enough permit futures despite not having any lambda invoker to offload to",
+    );
+    let lambda_config = searcher_context.searcher_config.lambda.as_ref().unwrap();
+
+    let doc_mapper_str = serde_json::to_string(doc_mapper)
+        .map_err(|err| SearchError::Internal(format!("failed to serialize doc mapper: {err}")))?;
+
+    // Build a lookup so we can match lambda results (tagged by split_id) back to the
+    // split metadata and per-split SearchRequest needed for caching.
+    let mut split_lookup: HashMap<String, (SplitIdAndFooterOffsets, SearchRequest)> =
+        HashMap::with_capacity(splits_with_requests.len());
+    let splits: Vec<SplitIdAndFooterOffsets> = splits_with_requests
+        .into_iter()
+        .map(|(split, search_req)| {
+            split_lookup.insert(split.split_id.clone(), (split.clone(), search_req));
+            split
+        })
+        .collect();
+
+    let batches: Vec<Vec<SplitIdAndFooterOffsets>> = greedy_batch_split(
+        splits,
+        |split| split.num_docs,
+        lambda_config.max_splits_per_invocation,
+    );
+
+    let mut lambda_tasks_joinset = JoinSet::new();
+    for batch in batches {
+        let batch_split_ids: Vec<String> =
+            batch.iter().map(|split| split.split_id.clone()).collect();
+        let leaf_request = LeafSearchRequest {
+            // Note this is not the split-specific rewritten request, we ship the main request,
+            // and the leaf will apply the split specific rewrite on its own.
+            search_request: Some(search_request.clone()),
+            doc_mappers: vec![doc_mapper_str.clone()],
+            index_uris: vec![index_uri.as_str().to_string()], //< careful here. Calling to_string() directly would return a redacted uri.
+            leaf_requests: vec![quickwit_proto::search::LeafRequestRef {
+                index_uri_ord: 0,
+                doc_mapper_ord: 0,
+                split_offsets: batch,
+            }],
+        };
+        let invoker = lambda_invoker.clone();
+        lambda_tasks_joinset.spawn(async move {
+            (
+                batch_split_ids,
+                invoker.invoke_leaf_search(leaf_request).await,
+            )
+        });
+    }
+
+    while let Some(join_res) = lambda_tasks_joinset.join_next().await {
+        let Ok((batch_split_ids, result)) = join_res else {
+            error!("lambda join error");
+            return Err(SearchError::Internal("lambda join error".to_string()));
+        };
+        match result {
+            Ok(split_results) => {
+                let mut locked = incremental_merge_collector.lock().unwrap();
+                for split_result in split_results {
+                    match split_result.outcome {
+                        Some(Outcome::Response(response)) => {
+                            if let Some((split_info, single_split_search_req)) =
+                                split_lookup.remove(&split_result.split_id)
+                            {
+                                // We use the single_split_search_req to perform the search
+                                searcher_context.leaf_search_cache.put(
+                                    split_info,
+                                    single_split_search_req,
+                                    response.clone(),
+                                );
+                            }
+                            if let Err(err) = locked.add_result(response) {
+                                error!(error = %err, "failed to add lambda result to collector");
+                            }
+                        }
+                        Some(Outcome::Error(error_msg)) => {
+                            locked.add_failed_split(SplitSearchError {
+                                split_id: split_result.split_id,
+                                error: format!("lambda split error: {error_msg}"),
+                                retryable_error: true,
+                            });
+                        }
+                        None => {
+                            locked.add_failed_split(SplitSearchError {
+                                split_id: split_result.split_id,
+                                error: "lambda returned empty outcome".to_string(),
+                                retryable_error: true,
+                            });
+                        }
+                    }
+                }
+            }
+            Err(err) => {
+                // Transport-level failure: the Lambda invocation itself failed.
+                // Mark all splits in this batch as failed.
+                error!(
+                    error = %err,
+                    num_splits = batch_split_ids.len(),
+                    "lambda invocation failed for batch"
+                );
+                let mut locked = incremental_merge_collector.lock().unwrap();
+                for split_id in batch_split_ids {
+                    locked.add_failed_split(SplitSearchError {
+                        split_id,
+                        error: format!("lambda invocation error: {err}"),
+                        retryable_error: true,
+                    });
+                }
+            }
+        }
+    }
+
+    Ok(())
+}
+
+struct LocalSearchTask {
+    split: SplitIdAndFooterOffsets,
+    search_request: SearchRequest,
+    search_permit_future: SearchPermitFuture,
+}
+
+struct ScheduleSearchTaskResult {
+    // The search permit futures associated to each local_search_task are
+    // guaranteed to resolve in order.
+    local_search_tasks: Vec<LocalSearchTask>,
+    // The per-split SearchRequest (already rewritten by `rewrite_request()`) is preserved
+    // so that lambda results can be cached with the correct cache key in `leaf_search_cache`.
+    offloaded_search_tasks: Vec<(SplitIdAndFooterOffsets, SearchRequest)>,
+}
+
+/// Schedule search tasks, either:
+/// - locally
+/// - remotely on lambdas, if lambda are configured, and the number of tasks scheduled exceed the
+///   offload threshold.
+async fn schedule_search_tasks(
+    mut splits: Vec<(SplitIdAndFooterOffsets, SearchRequest)>,
+    searcher_context: &SearcherContext,
+) -> ScheduleSearchTaskResult {
+    let permit_sizes: Vec<ByteSize> = splits
+        .iter()
+        .map(|(split, _)| {
+            compute_initial_memory_allocation(
+                split,
+                searcher_context
+                    .searcher_config
+                    .warmup_single_split_initial_allocation,
+            )
+        })
+        .collect();
+
+    let offload_threshold: usize = if searcher_context.lambda_invoker.is_some()
+        && let Some(lambda_config) = &searcher_context.searcher_config.lambda
+    {
+        lambda_config.offload_threshold
+    } else {
+        usize::MAX
+    };
+
+    let search_permit_futures = searcher_context
+        .search_permit_provider
+        .get_permits_with_offload(permit_sizes, offload_threshold)
+        .await;
+
+    let splits_to_run_on_lambda: Vec<(SplitIdAndFooterOffsets, SearchRequest)> =
+        splits.drain(search_permit_futures.len()..).collect();
+
+    let splits_to_run_locally: Vec<LocalSearchTask> = splits
+        .into_iter()
+        .zip(search_permit_futures)
+        .map(
+            |((split, search_request), search_permit_future)| LocalSearchTask {
+                split,
+                search_request,
+                search_permit_future,
+            },
+        )
+        .collect();
+
+    ScheduleSearchTaskResult {
+        local_search_tasks: splits_to_run_locally,
+        offloaded_search_tasks: splits_to_run_on_lambda,
+    }
+}
+
 /// The leaf search collects all kind of information, and returns a set of
-/// [PartialHit](quickwit_proto::search::PartialHit) candidates. The root will be in
+/// [PartialHit] candidates. The root will be in
 /// charge to consolidate, identify the actual final top hits to display, and
 /// fetch the actual documents to convert the partial hits into actual Hits.
 pub async fn single_doc_mapping_leaf_search(
@@ -1363,55 +1607,117 @@ pub async fn single_doc_mapping_leaf_search(
     index_storage: Arc<dyn Storage>,
     splits: Vec<SplitIdAndFooterOffsets>,
     doc_mapper: Arc<DocMapper>,
-    aggregations_limits: AggregationLimitsGuard,
 ) -> Result<LeafSearchResponse, SearchError> {
     let num_docs: u64 = splits.iter().map(|split| split.num_docs).sum();
     let num_splits = splits.len();
     info!(num_docs, num_splits, split_offsets = ?PrettySample::new(&splits, 5));
 
-    let split_filter = CanSplitDoBetter::from_request(&request, doc_mapper.timestamp_field_name());
-    let split_with_req = split_filter.optimize(request.clone(), splits)?;
+    // We simplify the request as much as possible.
+    let split_filter: CanSplitDoBetter =
+        CanSplitDoBetter::from_request(&request, doc_mapper.timestamp_field_name());
+    let mut split_with_req: Vec<(SplitIdAndFooterOffsets, SearchRequest)> =
+        split_filter.optimize(&request, splits)?;
+    for (split, single_split_search_request) in &mut split_with_req {
+        rewrite_request(
+            single_split_search_request,
+            split,
+            doc_mapper.timestamp_field_name(),
+        );
+    }
+    let split_filter_arc: Arc<RwLock<CanSplitDoBetter>> = Arc::new(RwLock::new(split_filter));
 
-    let split_filter = Arc::new(RwLock::new(split_filter));
+    let merge_collector =
+        make_merge_collector(&request, searcher_context.get_aggregation_limits())?;
+    let mut incremental_merge_collector = IncrementalCollector::new(merge_collector);
 
-    let merge_collector = make_merge_collector(&request, aggregations_limits.clone())?;
-    let incremental_merge_collector = IncrementalCollector::new(merge_collector);
-    let incremental_merge_collector = Arc::new(Mutex::new(incremental_merge_collector));
+    let split_outcome_counters = Arc::new(SplitSearchOutcomeCounters::new_unregistered());
 
-    // We acquire all of the leaf search permits to make sure our single split search tasks
-    // do no interleave with other leaf search requests.
-    let permit_sizes = split_with_req.iter().map(|(split, _)| {
-        compute_initial_memory_allocation(
-            split,
-            searcher_context
-                .searcher_config
-                .warmup_single_split_initial_allocation,
-        )
-    });
-    let permit_futures = searcher_context
-        .search_permit_provider
-        .get_permits(permit_sizes)
-        .await;
+    // Sort out the splits that are already in the partial result cache.
+    let uncached_splits: Vec<(SplitIdAndFooterOffsets, SearchRequest)> =
+        process_partial_result_cache(
+            &searcher_context.leaf_search_cache,
+            split_with_req,
+            split_outcome_counters.clone(),
+            &mut incremental_merge_collector,
+        )?;
+    let incremental_merge_collector_arc: Arc<Mutex<IncrementalCollector>> =
+        Arc::new(Mutex::new(incremental_merge_collector));
+
+    // Determine which uncached splits to process locally vs offload.
+    let ScheduleSearchTaskResult {
+        local_search_tasks,
+        offloaded_search_tasks,
+    } = schedule_search_tasks(uncached_splits, &searcher_context).await;
+
+    // Offload splits to Lambda.
+    let run_offloaded_search_tasks_fut = run_offloaded_search_tasks(
+        &searcher_context,
+        &request,
+        &doc_mapper,
+        index_storage.uri().clone(),
+        offloaded_search_tasks,
+        &incremental_merge_collector_arc,
+    );
 
+    // Spawn local split search tasks.
     let leaf_search_context = Arc::new(LeafSearchContext {
         searcher_context: searcher_context.clone(),
-        split_outcome_counters: Arc::new(SplitSearchOutcomeCounters::new_unregistered()),
-        incremental_merge_collector: incremental_merge_collector.clone(),
+        split_outcome_counters,
+        incremental_merge_collector: incremental_merge_collector_arc.clone(),
         doc_mapper: doc_mapper.clone(),
-        split_filter: split_filter.clone(),
+        split_filter: split_filter_arc.clone(),
     });
+    let run_local_search_tasks_fut = run_local_search_tasks(
+        local_search_tasks,
+        index_storage,
+        split_filter_arc,
+        leaf_search_context,
+    );
+
+    let (offloaded_res, _) =
+        tokio::join!(run_offloaded_search_tasks_fut, run_local_search_tasks_fut);
+    offloaded_res?;
+
+    // we can't use unwrap_or_clone because mutexes aren't Clone
+    let incremental_merge_collector = match Arc::try_unwrap(incremental_merge_collector_arc) {
+        Ok(filter_merger) => filter_merger.into_inner().unwrap(),
+        Err(filter_merger) => filter_merger.lock().unwrap().clone(),
+    };
 
-    let mut split_search_futures = JoinSet::new();
-    let mut task_id_to_split_id_map = HashMap::with_capacity(split_with_req.len());
-    for ((split, search_request), permit_fut) in
-        split_with_req.into_iter().zip(permit_futures.into_iter())
+    let leaf_search_response_result: tantivy::Result<LeafSearchResponse> =
+        crate::search_thread_pool()
+            .run_cpu_intensive(|| incremental_merge_collector.finalize())
+            .instrument(info_span!("incremental_merge_intermediate"))
+            .await
+            .context("failed to merge split search responses: thread panicked")?;
+
+    Ok(leaf_search_response_result?)
+}
+
+async fn run_local_search_tasks(
+    local_search_tasks: Vec<LocalSearchTask>,
+    index_storage: Arc<dyn Storage + 'static>,
+    split_filter_arc: Arc<RwLock<CanSplitDoBetter>>,
+    leaf_search_context: Arc<LeafSearchContext>,
+) {
+    let mut split_search_joinset = JoinSet::new();
+    let mut task_id_to_split_id_map = HashMap::with_capacity(local_search_tasks.len());
+
+    for LocalSearchTask {
+        split,
+        search_request,
+        search_permit_future,
+    } in local_search_tasks
     {
-        let leaf_split_search_permit = permit_fut
+        let leaf_split_search_permit = search_permit_future
             .instrument(info_span!("waiting_for_leaf_search_split_semaphore"))
             .await;
 
+        // We run simplify search request again: as we push split into the merge collector,
+        // we may have discovered that we won't find any better candidates for top hits in this
+        // split, in which case we can remove top hits collection.
         let Some(simplified_search_request) =
-            simplify_search_request(search_request, &split, &split_filter)
+            simplify_search_request(search_request, &split, &split_filter_arc)
         else {
             let mut leaf_search_state_guard =
                 SplitSearchStateGuard::new(leaf_search_context.split_outcome_counters.clone());
@@ -1419,29 +1725,25 @@ pub async fn single_doc_mapping_leaf_search(
             continue;
         };
         let split_id = split.split_id.clone();
-        let handle = split_search_futures.spawn(
+        let handle = split_search_joinset.spawn(
             leaf_search_single_split_wrapper(
                 simplified_search_request,
                 leaf_search_context.clone(),
                 index_storage.clone(),
-                split,
+                split.clone(),
                 leaf_split_search_permit,
-                aggregations_limits.clone(),
             )
             .in_current_span(),
         );
         task_id_to_split_id_map.insert(handle.id(), split_id);
     }
 
-    // TODO we could cancel running splits when !run_all_splits and the running split can no
-    // longer give better results after some other split answered.
+    // Await all local tasks.
     let mut split_search_join_errors: Vec<(String, JoinError)> = Vec::new();
 
-    while let Some(leaf_search_join_result) = split_search_futures.join_next().await {
-        // splits that did not panic were already added to the collector
+    while let Some(leaf_search_join_result) = split_search_joinset.join_next().await {
         if let Err(join_error) = leaf_search_join_result {
             if join_error.is_cancelled() {
-                // An explicit task cancellation is not an error.
                 continue;
             }
             let split_id = task_id_to_split_id_map.get(&join_error.id()).unwrap();
@@ -1454,30 +1756,44 @@ pub async fn single_doc_mapping_leaf_search(
         }
     }
 
-    info!(split_outcome_counters=%leaf_search_context.split_outcome_counters, "leaf split search finished");
-
-    // we can't use unwrap_or_clone because mutexes aren't Clone
-    let mut incremental_merge_collector = match Arc::try_unwrap(incremental_merge_collector) {
-        Ok(filter_merger) => filter_merger.into_inner().unwrap(),
-        Err(filter_merger) => filter_merger.lock().unwrap().clone(),
-    };
-
+    let mut incremental_merge_collector_lock = leaf_search_context
+        .incremental_merge_collector
+        .lock()
+        .unwrap();
     for (split_id, split_search_join_error) in split_search_join_errors {
-        incremental_merge_collector.add_failed_split(SplitSearchError {
+        incremental_merge_collector_lock.add_failed_split(SplitSearchError {
             split_id,
             error: SearchError::from(split_search_join_error).to_string(),
             retryable_error: true,
         });
     }
 
-    let leaf_search_response_reresult: Result<Result<LeafSearchResponse, _>, _> =
-        crate::search_thread_pool()
-            .run_cpu_intensive(|| incremental_merge_collector.finalize())
-            .instrument(info_span!("incremental_merge_intermediate"))
-            .await
-            .context("failed to merge split search responses");
+    info!(split_outcome_counters=%leaf_search_context.split_outcome_counters, "leaf split search finished");
+}
 
-    Ok(leaf_search_response_reresult??)
+/// We identify the splits that are in the cache and append them to the incremental merge collector.
+/// The (split, request) that are yet to be processed are returned.
+fn process_partial_result_cache(
+    leaf_search_cache: &LeafSearchCache,
+    split_with_req: Vec<(SplitIdAndFooterOffsets, SearchRequest)>,
+    split_outcome_counters: Arc<SplitSearchOutcomeCounters>,
+    incremental_merge_collector: &mut IncrementalCollector,
+) -> Result<Vec<(SplitIdAndFooterOffsets, SearchRequest)>, SearchError> {
+    let mut uncached_splits: Vec<(SplitIdAndFooterOffsets, SearchRequest)> =
+        Vec::with_capacity(split_with_req.len());
+    for (split, search_request) in split_with_req {
+        if let Some(cached_response) = leaf_search_cache
+            // TODO remove the clone here.
+            .get(split.clone(), search_request.clone())
+        {
+            let mut split_search_guard = SplitSearchStateGuard::new(split_outcome_counters.clone());
+            split_search_guard.set_state(SplitSearchState::CacheHit);
+            incremental_merge_collector.add_result(cached_response)?;
+        } else {
+            uncached_splits.push((split, search_request));
+        }
+    }
+    Ok(uncached_splits)
 }
 
 #[derive(Copy, Clone)]
@@ -1549,7 +1865,6 @@ async fn leaf_search_single_split_wrapper(
     index_storage: Arc<dyn Storage>,
     split: SplitIdAndFooterOffsets,
     mut search_permit: SearchPermit,
-    aggregations_limits: AggregationLimitsGuard,
 ) {
     let timer = crate::SEARCH_METRICS
         .leaf_search_split_duration_secs
@@ -1560,7 +1875,6 @@ async fn leaf_search_single_split_wrapper(
             ctx.clone(),
             index_storage,
             split.clone(),
-            aggregations_limits,
             &mut search_permit,
         )
         .await;
@@ -1605,8 +1919,11 @@ async fn leaf_search_single_split_wrapper(
 mod tests {
     use std::ops::Bound;
 
+    use async_trait::async_trait;
     use bytes::BufMut;
+    use quickwit_config::{LambdaConfig, SearcherConfig};
     use quickwit_directories::write_hotcache;
+    use quickwit_proto::search::LambdaSingleSplitResult;
     use rand::Rng;
     use tantivy::TantivyDocument;
     use tantivy::directory::RamDirectory;
@@ -1615,6 +1932,7 @@ mod tests {
     };
 
     use super::*;
+    use crate::LambdaLeafSearchInvoker;
 
     fn bool_filter(ast: impl Into<QueryAst>) -> QueryAst {
         BoolQuery {
@@ -2144,4 +2462,241 @@ mod tests {
         assert!(directory_size_larger > directory_size_smaller + 100);
         assert!(larger_size > smaller_size + 100);
     }
+
+    fn nz(n: usize) -> std::num::NonZeroUsize {
+        std::num::NonZeroUsize::new(n).unwrap()
+    }
+
+    #[test]
+    fn test_greedy_batch_split_empty() {
+        let items: Vec<u64> = vec![];
+        let batches = super::greedy_batch_split(items, |&x| x, nz(5));
+        assert!(batches.is_empty());
+    }
+
+    #[test]
+    fn test_greedy_batch_split_single_batch() {
+        let items = vec![10u64, 20, 30];
+        let batches = super::greedy_batch_split(items, |&x| x, nz(10));
+        assert_eq!(batches.len(), 1);
+        assert_eq!(batches[0].len(), 3);
+    }
+
+    #[test]
+    fn test_greedy_batch_split_balances_weights() {
+        // 7 items with weights, max 3 per batch -> 3 batches
+        let items = vec![100u64, 80, 60, 50, 40, 30, 20];
+        let batches = super::greedy_batch_split(items, |&x| x, nz(3));
+
+        assert_eq!(batches.len(), 3);
+
+        // All items should be present
+        let mut all_items: Vec<u64> = batches.iter().flatten().copied().collect();
+        all_items.sort_unstable();
+        assert_eq!(all_items, vec![20, 30, 40, 50, 60, 80, 100]);
+
+        // Check weights are reasonably balanced
+        let weights: Vec<u64> = batches.iter().map(|b| b.iter().sum()).collect();
+        let max_weight = *weights.iter().max().unwrap();
+        let min_weight = *weights.iter().min().unwrap();
+        // With greedy LPT, the imbalance should be bounded
+        assert!(
+            max_weight <= min_weight * 2,
+            "weights should be reasonably balanced: {:?}",
+            weights
+        );
+    }
+
+    #[test]
+    fn test_greedy_batch_split_count_balance() {
+        // 10 items, max 3 per batch -> 4 batches
+        // counts should be either 2 or 3 per batch
+        let items: Vec<u64> = (0..10).collect();
+        let batches = super::greedy_batch_split(items, |&x| x, nz(3));
+
+        assert_eq!(batches.len(), 4);
+        let counts: Vec<usize> = batches.iter().map(|b| b.len()).collect();
+        for count in &counts {
+            assert!(
+                *count >= 2 && *count <= 3,
+                "count should be 2 or 3, got {}",
+                count
+            );
+        }
+        assert_eq!(counts.iter().sum::<usize>(), 10);
+    }
+
+    fn make_splits_with_requests(
+        num_splits: usize,
+    ) -> Vec<(SplitIdAndFooterOffsets, SearchRequest)> {
+        (0..num_splits)
+            .map(|idx| {
+                let split = SplitIdAndFooterOffsets {
+                    split_id: format!("split_{idx}"),
+                    num_docs: 100,
+                    ..Default::default()
+                };
+                (split, SearchRequest::default())
+            })
+            .collect()
+    }
+
+    #[tokio::test]
+    async fn test_schedule_search_tasks_no_lambda_all_local() {
+        let searcher_context = SearcherContext::for_test();
+        let splits = make_splits_with_requests(5);
+        let result = super::schedule_search_tasks(splits, &searcher_context).await;
+        assert_eq!(result.local_search_tasks.len(), 5);
+        assert!(result.offloaded_search_tasks.is_empty());
+        for (idx, task) in result.local_search_tasks.iter().enumerate() {
+            assert_eq!(task.split.split_id, format!("split_{idx}"));
+        }
+    }
+
+    struct DummyInvoker;
+    #[async_trait]
+    impl LambdaLeafSearchInvoker for DummyInvoker {
+        async fn invoke_leaf_search(
+            &self,
+            _req: LeafSearchRequest,
+        ) -> Result<Vec<LambdaSingleSplitResult>, SearchError> {
+            todo!()
+        }
+    }
+
+    #[tokio::test]
+    async fn test_schedule_search_tasks_lambda_offloads_excess() {
+        let mut config = SearcherConfig::default();
+        config.lambda = Some(LambdaConfig {
+            offload_threshold: 3,
+            ..LambdaConfig::for_test()
+        });
+        let searcher_context = SearcherContext::new(config, None, Some(Arc::new(DummyInvoker)));
+        let splits = make_splits_with_requests(7);
+        let result = super::schedule_search_tasks(splits, &searcher_context).await;
+        assert_eq!(result.local_search_tasks.len(), 3);
+        assert_eq!(result.offloaded_search_tasks.len(), 4);
+        for (idx, task) in result.local_search_tasks.iter().enumerate() {
+            assert_eq!(task.split.split_id, format!("split_{idx}"));
+        }
+        for (idx, (split, _req)) in result.offloaded_search_tasks.iter().enumerate() {
+            assert_eq!(split.split_id, format!("split_{}", idx + 3));
+        }
+    }
+
+    #[tokio::test]
+    async fn test_schedule_search_tasks_lambda_threshold_zero_offloads_all() {
+        let mut config = SearcherConfig::default();
+        config.lambda = Some(LambdaConfig {
+            offload_threshold: 0,
+            ..LambdaConfig::for_test()
+        });
+        let searcher_context = SearcherContext::new(config, None, Some(Arc::new(DummyInvoker)));
+        let splits = make_splits_with_requests(5);
+        let result = super::schedule_search_tasks(splits, &searcher_context).await;
+        assert!(result.local_search_tasks.is_empty());
+        assert_eq!(result.offloaded_search_tasks.len(), 5);
+    }
+
+    #[tokio::test]
+    async fn test_schedule_search_tasks_lambda_threshold_above_split_count() {
+        let mut config = SearcherConfig::default();
+        config.lambda = Some(LambdaConfig {
+            offload_threshold: 100,
+            ..LambdaConfig::for_test()
+        });
+        let searcher_context = SearcherContext::new(config, None, Some(Arc::new(DummyInvoker)));
+        let splits = make_splits_with_requests(5);
+        let result = super::schedule_search_tasks(splits, &searcher_context).await;
+        assert_eq!(result.local_search_tasks.len(), 5);
+        assert!(result.offloaded_search_tasks.is_empty());
+    }
+
+    #[tokio::test]
+    async fn test_schedule_search_tasks_empty() {
+        let searcher_context = SearcherContext::for_test();
+        let result = super::schedule_search_tasks(Vec::new(), &searcher_context).await;
+        assert!(result.local_search_tasks.is_empty());
+        assert!(result.offloaded_search_tasks.is_empty());
+    }
+
+    mod proptest_greedy_batch {
+        use std::num::NonZeroUsize;
+
+        use proptest::prelude::*;
+
+        proptest! {
+            #[test]
+            fn all_items_preserved(
+                items in prop::collection::vec(0u64..1000, 0..100),
+                max_per_batch in 1usize..20
+            ) {
+                let original: Vec<u64> = items.clone();
+                let max_per_batch = NonZeroUsize::new(max_per_batch).unwrap();
+                let batches = super::super::greedy_batch_split(items, |&x| x, max_per_batch);
+
+                // All items should be present exactly once
+                let mut result: Vec<u64> = batches.into_iter().flatten().collect();
+                result.sort_unstable();
+                let mut expected = original;
+                expected.sort_unstable();
+                prop_assert_eq!(result, expected);
+            }
+
+            #[test]
+            fn batch_count_correct(
+                items in prop::collection::vec(0u64..1000, 1..100),
+                max_per_batch in 1usize..20
+            ) {
+                let n = items.len();
+                let max_per_batch_nz = NonZeroUsize::new(max_per_batch).unwrap();
+                let batches = super::super::greedy_batch_split(items, |&x| x, max_per_batch_nz);
+
+                let expected_batches = n.div_ceil(max_per_batch);
+                prop_assert_eq!(batches.len(), expected_batches);
+            }
+
+            #[test]
+            fn total_items_matches(
+                items in prop::collection::vec(0u64..1000, 1..100),
+                max_per_batch in 1usize..20
+            ) {
+                let n = items.len();
+                let max_per_batch = NonZeroUsize::new(max_per_batch).unwrap();
+                let batches = super::super::greedy_batch_split(items, |&x| x, max_per_batch);
+
+                // Total items across all batches equals input
+                let total: usize = batches.iter().map(|b| b.len()).sum();
+                prop_assert_eq!(total, n);
+            }
+
+            #[test]
+            fn greedy_balances_by_weight_not_count(
+                // Use items with significant weights to test weight balancing
+                items in prop::collection::vec(100u64..1000, 4..30),
+                max_per_batch in 2usize..10
+            ) {
+                let max_per_batch = NonZeroUsize::new(max_per_batch).unwrap();
+                let batches = super::super::greedy_batch_split(items, |&x| x, max_per_batch);
+
+                if batches.len() >= 2 {
+                    let weights: Vec<u64> = batches.iter().map(|b| b.iter().sum()).collect();
+                    let total_weight: u64 = weights.iter().sum();
+                    let avg_weight = total_weight / batches.len() as u64;
+
+                    // LPT guarantees max makespan <= (4/3) * optimal
+                    // With balanced input, max should be close to average
+                    let max_weight = *weights.iter().max().unwrap();
+
+                    // Max weight should be at most 2x average (generous bound)
+                    prop_assert!(
+                        max_weight <= avg_weight * 2 + 1000, // +1000 for rounding slack
+                        "max weight {} too far from average {}",
+                        max_weight,
+                        avg_weight
+                    );
+                }
+            }
+        }
+    }
 }
diff --git a/quickwit/quickwit-search/src/leaf_cache.rs b/quickwit/quickwit-search/src/leaf_cache.rs
index f9be57222c8..cc4d27e464e 100644
--- a/quickwit/quickwit-search/src/leaf_cache.rs
+++ b/quickwit/quickwit-search/src/leaf_cache.rs
@@ -70,7 +70,6 @@ impl LeafSearchCache {
         result: LeafSearchResponse,
     ) {
         let key = CacheKey::from_split_meta_and_request(split_info, search_request);
-
         let encoded_result = result.encode_to_vec();
         self.content.put(key, OwnedBytes::new(encoded_result));
     }
diff --git a/quickwit/quickwit-search/src/lib.rs b/quickwit/quickwit-search/src/lib.rs
index 008556d595f..89b45a69014 100644
--- a/quickwit/quickwit-search/src/lib.rs
+++ b/quickwit/quickwit-search/src/lib.rs
@@ -23,7 +23,10 @@ mod collector;
 mod error;
 mod fetch_docs;
 mod find_trace_ids_collector;
-mod leaf;
+
+mod invoker;
+/// Leaf search operations.
+pub mod leaf;
 mod leaf_cache;
 mod list_fields;
 mod list_fields_cache;
@@ -80,6 +83,7 @@ pub use crate::client::{
 pub use crate::cluster_client::ClusterClient;
 pub use crate::error::{SearchError, parse_grpc_error};
 use crate::fetch_docs::fetch_docs;
+pub use crate::invoker::LambdaLeafSearchInvoker;
 pub use crate::root::{
     IndexMetasForLeafSearch, SearchJob, check_all_index_metadata_found, jobs_to_leaf_request,
     root_search, search_plan,
@@ -283,7 +287,7 @@ pub async fn single_node_search(
     let search_job_placer = SearchJobPlacer::new(searcher_pool.clone());
     let cluster_client = ClusterClient::new(search_job_placer);
     let searcher_config = SearcherConfig::default();
-    let searcher_context = Arc::new(SearcherContext::new(searcher_config, None));
+    let searcher_context = Arc::new(SearcherContext::new(searcher_config, None, None));
     let search_service = Arc::new(SearchServiceImpl::new(
         metastore.clone(),
         storage_resolver,
diff --git a/quickwit/quickwit-search/src/list_terms.rs b/quickwit/quickwit-search/src/list_terms.rs
index 3a72f26d595..5c69972728e 100644
--- a/quickwit/quickwit-search/src/list_terms.rs
+++ b/quickwit/quickwit-search/src/list_terms.rs
@@ -17,6 +17,7 @@ use std::ops::Bound;
 use std::sync::Arc;
 
 use anyhow::Context;
+use bytesize::ByteSize;
 use futures::future::try_join_all;
 use itertools::{Either, Itertools};
 use quickwit_common::pretty::PrettySample;
@@ -326,14 +327,20 @@ pub async fn leaf_list_terms(
     splits: &[SplitIdAndFooterOffsets],
 ) -> Result<LeafListTermsResponse, SearchError> {
     info!(split_offsets = ?PrettySample::new(splits, 5));
-    let permit_sizes = splits.iter().map(|split| {
-        compute_initial_memory_allocation(
-            split,
-            searcher_context
-                .searcher_config
-                .warmup_single_split_initial_allocation,
-        )
-    });
+    let permit_sizes: Vec<ByteSize> = splits
+        .iter()
+        .map(|split| {
+            compute_initial_memory_allocation(
+                split,
+                searcher_context
+                    .searcher_config
+                    .warmup_single_split_initial_allocation,
+            )
+        })
+        .collect();
+    // We have added offloading leaf search to lambdas, but not for list_terms yet.
+    // TODO (Add it)
+    // https://github.com/quickwit-oss/quickwit/issues/6150
     let permits = searcher_context
         .search_permit_provider
         .get_permits(permit_sizes)
diff --git a/quickwit/quickwit-search/src/root.rs b/quickwit/quickwit-search/src/root.rs
index b5b03882ec0..5d5610ca4d9 100644
--- a/quickwit/quickwit-search/src/root.rs
+++ b/quickwit/quickwit-search/src/root.rs
@@ -45,7 +45,7 @@ use tantivy::aggregation::agg_result::AggregationResults;
 use tantivy::aggregation::intermediate_agg_result::IntermediateAggregationResults;
 use tantivy::collector::Collector;
 use tantivy::schema::{Field, FieldEntry, FieldType, Schema};
-use tracing::{debug, info, info_span, instrument};
+use tracing::{debug, error, info, info_span, instrument};
 
 use crate::cluster_client::ClusterClient;
 use crate::collector::{QuickwitAggregations, make_merge_collector};
@@ -1237,7 +1237,7 @@ pub async fn root_search(
     if let Some(max_total_split_searches) = searcher_context.searcher_config.max_splits_per_search
         && max_total_split_searches < num_splits
     {
-        tracing::error!(
+        error!(
             num_splits,
             max_total_split_searches,
             index=?search_request.index_id_patterns,
diff --git a/quickwit/quickwit-search/src/search_permit_provider.rs b/quickwit/quickwit-search/src/search_permit_provider.rs
index 3a4c0602326..4cd5e99918f 100644
--- a/quickwit/quickwit-search/src/search_permit_provider.rs
+++ b/quickwit/quickwit-search/src/search_permit_provider.rs
@@ -16,13 +16,12 @@ use std::collections::BinaryHeap;
 use std::collections::binary_heap::PeekMut;
 use std::future::Future;
 use std::pin::Pin;
+use std::sync::Arc;
 use std::task::{Context, Poll};
 
 use bytesize::ByteSize;
 use quickwit_common::metrics::GaugeGuard;
 use quickwit_proto::search::SplitIdAndFooterOffsets;
-#[cfg(test)]
-use tokio::sync::watch;
 use tokio::sync::{mpsc, oneshot};
 
 /// Distributor of permits to perform split search operation.
@@ -35,14 +34,21 @@ use tokio::sync::{mpsc, oneshot};
 #[derive(Clone)]
 pub struct SearchPermitProvider {
     message_sender: mpsc::UnboundedSender<SearchPermitMessage>,
-    #[cfg(test)]
-    actor_stopped: watch::Receiver<bool>,
+    #[allow(dead_code)]
+    actor_join_handle: Arc<tokio::task::JoinHandle<SearchPermitActor>>,
 }
 
 pub enum SearchPermitMessage {
-    Request {
-        permit_sender: oneshot::Sender<Vec<SearchPermitFuture>>,
+    RequestWithOffload {
         permit_sizes: Vec<u64>,
+        /// Maximum number of pending requests. If granting all
+        /// requested permits would cause the number of pending requests to exceed this threshold,
+        /// some permits will be offloaded to Lambda.
+        offload_threshold: usize,
+        /// Channel to return the result message from the actor.
+        /// When offloading permits, the number of futures can be < to the number of requested
+        /// permits.
+        permit_resp_tx: oneshot::Sender<Vec<SearchPermitFuture>>,
     },
     UpdateMemory {
         memory_delta: i64,
@@ -81,8 +87,6 @@ pub fn compute_initial_memory_allocation(
 impl SearchPermitProvider {
     pub fn new(num_download_slots: usize, memory_budget: ByteSize) -> Self {
         let (message_sender, message_receiver) = mpsc::unbounded_channel();
-        #[cfg(test)]
-        let (state_sender, state_receiver) = watch::channel(false);
         let actor = SearchPermitActor {
             msg_receiver: message_receiver,
             msg_sender: message_sender.downgrade(),
@@ -90,34 +94,57 @@ impl SearchPermitProvider {
             total_memory_budget: memory_budget.as_u64(),
             permits_requests: BinaryHeap::new(),
             total_memory_allocated: 0u64,
-            #[cfg(test)]
-            stopped: state_sender,
         };
-        tokio::spawn(actor.run());
+        let actor_join_handle = Arc::new(tokio::spawn(actor.run()));
         Self {
             message_sender,
-            #[cfg(test)]
-            actor_stopped: state_receiver,
+            actor_join_handle,
         }
     }
 
-    /// Returns one permit future for each provided split metadata.
+    #[cfg(test)]
+    async fn stop_and_unwrap(self) -> SearchPermitActor {
+        let SearchPermitProvider {
+            message_sender,
+            actor_join_handle,
+            ..
+        } = self;
+        drop(message_sender);
+        Arc::into_inner(actor_join_handle).unwrap().await.unwrap()
+    }
+
+    /// Returns permits for local splits
+    ///
+    /// The returned futures are guaranteed to resolve in order.
+    pub async fn get_permits(&self, splits: Vec<ByteSize>) -> Vec<SearchPermitFuture> {
+        self.get_permits_with_offload(splits, usize::MAX).await
+    }
+
+    /// Returns permits for local splits and a list of split indices to offload.
+    ///
+    /// The actor checks the current pending queue depth. If adding all splits
+    /// would exceed `offload_threshold` pending requests, only enough splits
+    /// to fill up to the threshold are processed locally; the rest are offloaded.
     ///
-    /// The permits returned are guaranteed to be resolved in order. In
-    /// addition, the permits are guaranteed to be resolved before permits
-    /// returned by subsequent calls to this function.
+    /// The returned futures are guaranteed to resolve in order.
     ///
-    /// The permit memory size is capped by per_permit_initial_memory_allocation.
-    pub async fn get_permits(
+    /// If `offload_threshold` is 0, all splits are offloaded.
+    /// If `offload_threshold` is usize::MAX, all splits are processed locally.
+    pub async fn get_permits_with_offload(
         &self,
-        splits: impl IntoIterator<Item = ByteSize>,
+        splits: Vec<ByteSize>,
+        offload_threshold: usize,
     ) -> Vec<SearchPermitFuture> {
+        if splits.is_empty() {
+            return Vec::new();
+        }
         let (permit_sender, permit_receiver) = oneshot::channel();
         let permit_sizes = splits.into_iter().map(|size| size.as_u64()).collect();
         self.message_sender
-            .send(SearchPermitMessage::Request {
-                permit_sender,
+            .send(SearchPermitMessage::RequestWithOffload {
+                permit_resp_tx: permit_sender,
                 permit_sizes,
+                offload_threshold,
             })
             .expect("Receiver lives longer than sender");
         permit_receiver
@@ -136,8 +163,6 @@ struct SearchPermitActor {
     total_memory_budget: u64,
     total_memory_allocated: u64,
     permits_requests: BinaryHeap<LeafPermitRequest>,
-    #[cfg(test)]
-    stopped: watch::Sender<bool>,
 }
 
 struct SingleSplitPermitRequest {
@@ -177,7 +202,9 @@ impl PartialEq for LeafPermitRequest {
 impl Eq for LeafPermitRequest {}
 
 impl LeafPermitRequest {
+    // `permit_sizes` must not be empty.
     fn from_estimated_costs(permit_sizes: Vec<u64>) -> (Self, Vec<SearchPermitFuture>) {
+        assert!(!permit_sizes.is_empty(), "permit_sizes must not be empty");
         let mut permits = Vec::with_capacity(permit_sizes.len());
         let mut single_split_permit_requests = Vec::with_capacity(permit_sizes.len());
         for permit_size in permit_sizes {
@@ -200,13 +227,11 @@ impl LeafPermitRequest {
     }
 
     fn pop_if_smaller_than(&mut self, max_size: u64) -> Option<SingleSplitPermitRequest> {
-        // IntoIter::as_slice() allows us to peek at the next element without consuming it
-        match self.single_split_permit_requests.as_slice().first() {
-            Some(request) if request.permit_size <= max_size => {
-                self.single_split_permit_requests.next()
-            }
-            _ => None,
+        let peeked_single_split_req = self.single_split_permit_requests.as_slice().first()?;
+        if peeked_single_split_req.permit_size > max_size {
+            return None;
         }
+        self.single_split_permit_requests.next()
     }
 
     fn is_empty(&self) -> bool {
@@ -215,29 +240,45 @@ impl LeafPermitRequest {
 }
 
 impl SearchPermitActor {
-    async fn run(mut self) {
+    async fn run(mut self) -> Self {
         // Stops when the last clone of SearchPermitProvider is dropped.
         while let Some(msg) = self.msg_receiver.recv().await {
             self.handle_message(msg);
         }
-        #[cfg(test)]
-        self.stopped.send(true).ok();
+        self
     }
 
     fn handle_message(&mut self, msg: SearchPermitMessage) {
         match msg {
-            SearchPermitMessage::Request {
-                permit_sizes,
-                permit_sender,
+            SearchPermitMessage::RequestWithOffload {
+                mut permit_sizes,
+                permit_resp_tx: permit_sender,
+                offload_threshold,
             } => {
-                let (leaf_permit_request, permits) =
+                let current_pending = self
+                    .permits_requests
+                    .iter()
+                    .map(|req| req.single_split_permit_requests.as_slice().len())
+                    .sum();
+                // How many new splits can we accept locally before hitting the threshold.
+                let local_capacity = offload_threshold.saturating_sub(current_pending);
+
+                // If this indeed truncates the permit_sizes vector, other splits will be offloaded
+                // to lambdas.
+                permit_sizes.truncate(local_capacity);
+
+                // We special case here in order to avoid pushing empty request in the queue.
+                // (they would never be removed)
+                if permit_sizes.is_empty() {
+                    let _ = permit_sender.send(Vec::new());
+                    return;
+                }
+
+                let (leaf_permit_request, permit_futures) =
                     LeafPermitRequest::from_estimated_costs(permit_sizes);
                 self.permits_requests.push(leaf_permit_request);
                 self.assign_available_permits();
-                // The receiver could be dropped in the (unlikely) situation
-                // where the future requesting these permits is cancelled before
-                // this message is processed.
-                let _ = permit_sender.send(permits);
+                let _ = permit_sender.send(permit_futures);
             }
             SearchPermitMessage::UpdateMemory { memory_delta } => {
                 if self.total_memory_allocated as i64 + memory_delta < 0 {
@@ -276,6 +317,10 @@ impl SearchPermitActor {
             .checked_sub(self.total_memory_allocated)?;
         let mut peeked = self.permits_requests.peek_mut()?;
 
+        assert!(
+            !peeked.is_empty(),
+            "unexpected empty permits_requests present in the search permit provider queue"
+        );
         if let Some(permit_request) = peeked.pop_if_smaller_than(available_memory) {
             if peeked.is_empty() {
                 PeekMut::pop(peeked);
@@ -380,7 +425,6 @@ impl Future for SearchPermitFuture {
 
 #[cfg(test)]
 mod tests {
-    use std::iter::repeat_n;
     use std::time::Duration;
 
     use futures::StreamExt;
@@ -394,7 +438,7 @@ mod tests {
         let permit_provider = SearchPermitProvider::new(1, ByteSize::mb(100));
         let mut all_futures = Vec::new();
         let first_batch_of_permits = permit_provider
-            .get_permits(repeat_n(ByteSize::mb(10), 10))
+            .get_permits(vec![ByteSize::mb(10); 10])
             .await;
         assert_eq!(first_batch_of_permits.len(), 10);
         all_futures.extend(
@@ -405,7 +449,7 @@ mod tests {
         );
 
         let second_batch_of_permits = permit_provider
-            .get_permits(repeat_n(ByteSize::mb(10), 10))
+            .get_permits(vec![ByteSize::mb(10); 10])
             .await;
         assert_eq!(second_batch_of_permits.len(), 10);
         all_futures.extend(
@@ -443,9 +487,7 @@ mod tests {
     async fn test_search_permit_order_with_concurrent_search() {
         let permit_provider = SearchPermitProvider::new(4, ByteSize::mb(100));
         let mut all_futures = Vec::new();
-        let first_batch_of_permits = permit_provider
-            .get_permits(repeat_n(ByteSize::mb(10), 8))
-            .await;
+        let first_batch_of_permits = permit_provider.get_permits(vec![ByteSize::mb(10); 8]).await;
         assert_eq!(first_batch_of_permits.len(), 8);
         all_futures.extend(
             first_batch_of_permits
@@ -454,9 +496,7 @@ mod tests {
                 .map(move |(i, fut)| ((1, i), fut)),
         );
 
-        let second_batch_of_permits = permit_provider
-            .get_permits(repeat_n(ByteSize::mb(10), 2))
-            .await;
+        let second_batch_of_permits = permit_provider.get_permits(vec![ByteSize::mb(10); 2]).await;
         all_futures.extend(
             second_batch_of_permits
                 .into_iter()
@@ -464,9 +504,7 @@ mod tests {
                 .map(move |(i, fut)| ((2, i), fut)),
         );
 
-        let third_batch_of_permits = permit_provider
-            .get_permits(repeat_n(ByteSize::mb(10), 6))
-            .await;
+        let third_batch_of_permits = permit_provider.get_permits(vec![ByteSize::mb(10); 6]).await;
         all_futures.extend(
             third_batch_of_permits
                 .into_iter()
@@ -518,7 +556,7 @@ mod tests {
             .next()
             .unwrap();
         let permit_fut2 = permit_provider
-            .get_permits([ByteSize::mb(10)])
+            .get_permits(vec![ByteSize::mb(10)])
             .await
             .into_iter()
             .next()
@@ -526,20 +564,20 @@ mod tests {
         drop(permit_fut1);
         let permit = permit_fut2.await;
         assert_eq!(permit.memory_allocation, ByteSize::mb(10).as_u64());
-        assert_eq!(*permit_provider.actor_stopped.borrow(), false);
+        assert!(!permit_provider.actor_join_handle.is_finished());
 
         let _permit_fut3 = permit_provider
-            .get_permits([ByteSize::mb(10)])
+            .get_permits(vec![ByteSize::mb(10)])
             .await
             .into_iter()
             .next()
             .unwrap();
-        let mut actor_stopped = permit_provider.actor_stopped.clone();
-        drop(permit_provider);
-        {
-            actor_stopped.changed().await.unwrap();
-            assert!(*actor_stopped.borrow());
-        }
+        let SearchPermitProvider {
+            message_sender,
+            actor_join_handle,
+        } = permit_provider;
+        drop(message_sender);
+        Arc::into_inner(actor_join_handle).unwrap().await.unwrap();
     }
 
     /// Tries to wait for a permit
@@ -553,7 +591,7 @@ mod tests {
     async fn test_memory_budget() {
         let permit_provider = SearchPermitProvider::new(100, ByteSize::mb(100));
         let mut permit_futs = permit_provider
-            .get_permits(repeat_n(ByteSize::mb(10), 14))
+            .get_permits(vec![ByteSize::mb(10); 14])
             .await;
         let mut remaining_permit_futs = permit_futs.split_off(10).into_iter();
         assert_eq!(remaining_permit_futs.len(), 4);
@@ -579,12 +617,87 @@ mod tests {
         try_get(next_permit_fut).await.unwrap();
     }
 
+    #[tokio::test]
+    async fn test_get_permits_with_offload_threshold_max_returns_all() {
+        let permit_provider = SearchPermitProvider::new(100, ByteSize::mb(100));
+        let permits = permit_provider
+            .get_permits_with_offload(vec![ByteSize::mb(1); 8], usize::MAX)
+            .await;
+        assert_eq!(permits.len(), 8);
+    }
+
+    #[tokio::test]
+    async fn test_get_permits_with_offload_threshold_zero_returns_none() {
+        let permit_provider = SearchPermitProvider::new(100, ByteSize::mb(100));
+        let permits = permit_provider
+            .get_permits_with_offload(vec![ByteSize::mb(1); 5], 0)
+            .await;
+        assert!(permits.is_empty());
+        let permit_actor = permit_provider.stop_and_unwrap().await;
+        assert!(permit_actor.permits_requests.is_empty());
+    }
+
+    #[tokio::test]
+    async fn test_get_permits_with_offload_truncates_to_threshold() {
+        let permit_provider = SearchPermitProvider::new(100, ByteSize::mb(100));
+        let permits = permit_provider
+            .get_permits_with_offload(vec![ByteSize::mb(1); 10], 4)
+            .await;
+        assert_eq!(permits.len(), 4);
+    }
+
+    #[tokio::test]
+    async fn test_get_permits_with_offload_futures_resolve_in_order() {
+        // We use a search permit provider with a capacity of 1 to make sure that the permits are
+        // resolved in order.
+        let permit_provider = SearchPermitProvider::new(1, ByteSize::mb(100));
+        let permits = permit_provider
+            .get_permits_with_offload(vec![ByteSize::mb(1); 4], 10)
+            .await;
+        assert_eq!(permits.len(), 4);
+        let mut futs: Vec<_> = permits
+            .into_iter()
+            .enumerate()
+            .map(|(i, permit_fut)| async move {
+                permit_fut.await;
+                i
+            })
+            .collect();
+        futs.shuffle(&mut rand::rng());
+        let mut join_set = JoinSet::new();
+        for fut in futs {
+            join_set.spawn(fut);
+        }
+        let mut results = Vec::new();
+        while let Some(result) = join_set.join_next().await {
+            results.push(result.unwrap());
+        }
+        assert_eq!(results, vec![0, 1, 2, 3]);
+    }
+
+    #[tokio::test]
+    async fn test_get_permits_with_offload_pending_consumed_frees_capacity() {
+        let permit_provider = SearchPermitProvider::new(100, ByteSize::mb(100));
+        // First call: 4 splits, threshold 6.
+        let first_permits = permit_provider
+            .get_permits_with_offload(vec![ByteSize::mb(1); 4], 6)
+            .await;
+        assert_eq!(first_permits.len(), 4);
+        // Consume all permits from the first batch (they resolve and get dropped).
+        for permit_fut in first_permits {
+            let _permit = permit_fut.await;
+        }
+        // Second call: the consumed permits no longer count as pending.
+        let second_permits = permit_provider
+            .get_permits_with_offload(vec![ByteSize::mb(1); 5], 6)
+            .await;
+        assert_eq!(second_permits.len(), 5);
+    }
+
     #[tokio::test]
     async fn test_warmup_slot() {
         let permit_provider = SearchPermitProvider::new(10, ByteSize::mb(100));
-        let mut permit_futs = permit_provider
-            .get_permits(repeat_n(ByteSize::mb(1), 16))
-            .await;
+        let mut permit_futs = permit_provider.get_permits(vec![ByteSize::mb(1); 16]).await;
         let mut remaining_permit_futs = permit_futs.split_off(10).into_iter();
         assert_eq!(remaining_permit_futs.len(), 6);
         // we should be able to obtain 10 permits right away
diff --git a/quickwit/quickwit-search/src/service.rs b/quickwit/quickwit-search/src/service.rs
index cf2c0699b81..5ee74c16556 100644
--- a/quickwit/quickwit-search/src/service.rs
+++ b/quickwit/quickwit-search/src/service.rs
@@ -33,6 +33,7 @@ use quickwit_storage::{
 };
 use tantivy::aggregation::AggregationLimitsGuard;
 
+use crate::invoker::LambdaLeafSearchInvoker;
 use crate::leaf::multi_index_leaf_search;
 use crate::leaf_cache::{LeafSearchCache, PredicateCacheImpl};
 use crate::list_fields::{leaf_list_fields, root_list_fields};
@@ -152,7 +153,8 @@ impl SearchServiceImpl {
     }
 }
 
-pub fn deserialize_doc_mapper(doc_mapper_str: &str) -> crate::Result<Arc<DocMapper>> {
+/// Deserializes a JSON-encoded doc mapper string into an `Arc<DocMapper>`.
+pub(crate) fn deserialize_doc_mapper(doc_mapper_str: &str) -> crate::Result<Arc<DocMapper>> {
     let doc_mapper = serde_json::from_str::<Arc<DocMapper>>(doc_mapper_str).map_err(|err| {
         SearchError::Internal(format!("failed to deserialize doc mapper: `{err}`"))
     })?;
@@ -190,7 +192,7 @@ impl SearchService for SearchServiceImpl {
             tracked: multi_index_leaf_search(
                 self.searcher_context.clone(),
                 leaf_search_request,
-                &self.storage_resolver,
+                self.storage_resolver.clone(),
             ),
             start: Instant::now(),
             targeted_splits: num_splits,
@@ -415,7 +417,10 @@ pub struct SearcherContext {
     /// List fields cache. Caches the list fields response for a given split.
     pub list_fields_cache: ListFieldsCache,
     /// The aggregation limits are passed to limit the memory usage.
+    /// This object is shared across all request.
     pub aggregation_limit: AggregationLimitsGuard,
+    /// Optional Lambda invoker for offloading leaf search to serverless functions.
+    pub lambda_invoker: Option<Arc<dyn LambdaLeafSearchInvoker>>,
 }
 
 impl std::fmt::Debug for SearcherContext {
@@ -431,11 +436,15 @@ impl SearcherContext {
     #[cfg(test)]
     pub fn for_test() -> SearcherContext {
         let searcher_config = SearcherConfig::default();
-        SearcherContext::new(searcher_config, None)
+        SearcherContext::new(searcher_config, None, None)
     }
 
     /// Creates a new searcher context, given a searcher config, and an optional `SplitCache`.
-    pub fn new(searcher_config: SearcherConfig, split_cache_opt: Option<Arc<SplitCache>>) -> Self {
+    pub fn new(
+        searcher_config: SearcherConfig,
+        split_cache_opt: Option<Arc<SplitCache>>,
+        lambda_invoker: Option<Arc<dyn LambdaLeafSearchInvoker>>,
+    ) -> Self {
         let global_split_footer_cache = MemorySizedCache::from_config(
             &searcher_config.split_footer_cache,
             &quickwit_storage::STORAGE_METRICS.split_footer_cache,
@@ -464,6 +473,7 @@ impl SearcherContext {
             list_fields_cache,
             split_cache_opt,
             aggregation_limit,
+            lambda_invoker,
         }
     }
 
diff --git a/quickwit/quickwit-search/src/tests.rs b/quickwit/quickwit-search/src/tests.rs
index 37acf663a1e..61751e3d253 100644
--- a/quickwit/quickwit-search/src/tests.rs
+++ b/quickwit/quickwit-search/src/tests.rs
@@ -1029,9 +1029,7 @@ async fn test_search_util(test_sandbox: &TestSandbox, query: &str) -> Vec<u32> {
         ..Default::default()
     });
     let searcher_context: Arc<SearcherContext> =
-        Arc::new(SearcherContext::new(SearcherConfig::default(), None));
-
-    let agg_limits = searcher_context.get_aggregation_limits();
+        Arc::new(SearcherContext::new(SearcherConfig::default(), None, None));
 
     let search_response = single_doc_mapping_leaf_search(
         searcher_context,
@@ -1039,7 +1037,6 @@ async fn test_search_util(test_sandbox: &TestSandbox, query: &str) -> Vec<u32> {
         test_sandbox.storage(),
         splits_offsets,
         test_sandbox.doc_mapper(),
-        agg_limits,
     )
     .await
     .unwrap();
@@ -1669,7 +1666,7 @@ async fn test_single_node_list_terms() -> anyhow::Result<()> {
         .into_iter()
         .map(|split| extract_split_and_footer_offsets(&split.split_metadata))
         .collect();
-    let searcher_context = Arc::new(SearcherContext::new(SearcherConfig::default(), None));
+    let searcher_context = Arc::new(SearcherContext::new(SearcherConfig::default(), None, None));
 
     {
         let request = ListTermsRequest {
diff --git a/quickwit/quickwit-serve/Cargo.toml b/quickwit/quickwit-serve/Cargo.toml
index 363065a3403..16f8a36002d 100644
--- a/quickwit/quickwit-serve/Cargo.toml
+++ b/quickwit/quickwit-serve/Cargo.toml
@@ -72,6 +72,7 @@ quickwit-opentelemetry = { workspace = true }
 quickwit-proto = { workspace = true }
 quickwit-query = { workspace = true }
 quickwit-search = { workspace = true }
+quickwit-lambda-client = { workspace = true }
 quickwit-storage = { workspace = true }
 quickwit-telemetry = { workspace = true }
 
diff --git a/quickwit/quickwit-serve/src/lib.rs b/quickwit/quickwit-serve/src/lib.rs
index ca4520ff0ce..dccaecd3912 100644
--- a/quickwit/quickwit-serve/src/lib.rs
+++ b/quickwit/quickwit-serve/src/lib.rs
@@ -630,9 +630,24 @@ pub async fn serve_quickwit(
             None
         };
 
+    // Initialize Lambda invoker if enabled and searcher service is running
+    let lambda_invoker_opt = if node_config.is_service_enabled(QuickwitService::Searcher) {
+        if let Some(lambda_config) = &node_config.searcher_config.lambda {
+            info!("initializing AWS Lambda invoker for search");
+            warn!("offloading to lambda is EXPERIMENTAL. Use at your own risk");
+            let invoker = quickwit_lambda_client::try_get_or_deploy_invoker(lambda_config).await?;
+            Some(invoker)
+        } else {
+            None
+        }
+    } else {
+        None
+    };
+
     let searcher_context = Arc::new(SearcherContext::new(
         node_config.searcher_config.clone(),
         split_cache_opt,
+        lambda_invoker_opt,
     ));
 
     let (search_job_placer, search_service) = setup_searcher(
@@ -1017,6 +1032,7 @@ async fn setup_searcher(
 ) -> anyhow::Result<(SearchJobPlacer, Arc<dyn SearchService>)> {
     let searcher_pool = SearcherPool::default();
     let search_job_placer = SearchJobPlacer::new(searcher_pool.clone());
+
     let search_service = start_searcher_service(
         metastore,
         storage_resolver,
@@ -1553,7 +1569,8 @@ mod tests {
     #[tokio::test]
     async fn test_setup_searcher() {
         let node_config = NodeConfig::for_test();
-        let searcher_context = Arc::new(SearcherContext::new(SearcherConfig::default(), None));
+        let searcher_context =
+            Arc::new(SearcherContext::new(SearcherConfig::default(), None, None));
         let metastore = metastore_for_test();
         let (change_stream, change_stream_tx) = ClusterChangeStream::new_unbounded();
         let storage_resolver = StorageResolver::unconfigured();
diff --git a/quickwit/quickwit-storage/src/cache/memory_sized_cache.rs b/quickwit/quickwit-storage/src/cache/memory_sized_cache.rs
index 305966d3c04..707d633501f 100644
--- a/quickwit/quickwit-storage/src/cache/memory_sized_cache.rs
+++ b/quickwit/quickwit-storage/src/cache/memory_sized_cache.rs
@@ -218,4 +218,14 @@ mod tests {
         cache.put("hello.seg", data);
         assert_eq!(cache.get(&"hello.seg").unwrap(), &b"werwer"[..]);
     }
+
+    #[test]
+    fn test_cache_no_cache() {
+        let cache =
+            MemorySizedCache::from_config(&CacheConfig::no_cache(), &CACHE_METRICS_FOR_TESTS);
+        assert!(cache.get(&"hello.seg").is_none());
+        let data = OwnedBytes::new(&b"werwer"[..]);
+        cache.put("hello.seg", data);
+        assert!(cache.get(&"hello.seg").is_none());
+    }
 }