diff --git a/CLAUDE.md b/CLAUDE.md
index 56e02ae..f54bd09 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -390,15 +390,10 @@ See `agent_instructions/adding_admin_endpoint.md` for implementation patterns (r
 - `[features.file_processing]` — RAG document ingestion (text extraction, OCR, chunking)
 - `[features.guardrails]` — Input/output guardrails (blocklist, PII detection, moderation APIs)
 - `[features.response_caching]` — Response caching with optional semantic similarity matching
-- `[features.prompt_caching]` — Anthropic prompt caching support
 - `[features.image_fetching]` — Fetch images from URLs for vision models
-- `[features.web_search]` — Web search tool integration
-- `[features.code_execution]` — Server-side code execution
 - `[features.model_catalog]` — Model metadata enrichment from models.dev
 - `[features.websocket]` — WebSocket for real-time events
 - `[features.vector_store_cleanup]` — Background cleanup for soft-deleted vector stores
-- `[features.fallback]` — Fallback and retry configuration
-- `[features.load_balancing]` — Load balancing configuration
 
 ## Caching
 
diff --git a/docs/content/docs/configuration/features/code-execution.mdx b/docs/content/docs/configuration/features/code-execution.mdx
deleted file mode 100644
index 3a771b5..0000000
--- a/docs/content/docs/configuration/features/code-execution.mdx
+++ /dev/null
@@ -1,142 +0,0 @@
----
-title: Code Execution
-description: Configure server-side code execution capabilities
----
-
-import { Callout } from "fumadocs-ui/components/callout";
-
-The `[features.code_execution]` section configures server-side code execution. By default, code execution happens client-side via WebAssembly in the browser (Pyodide for Python, QuickJS for JavaScript).
-
-<Callout type="info">
-  Client-side WASM execution requires no server configuration. This section is only needed for
-  server-side sandboxed execution.
-</Callout>
-
-## Configuration Reference
-
-### Main Settings
-
-```toml
-[features.code_execution]
-enabled = true
-mode = "wasm"
-allowed_languages = ["python", "javascript"]
-timeout_secs = 30
-max_memory_mb = 256
-```
-
-| Key                 | Type    | Default                    | Description                  |
-| ------------------- | ------- | -------------------------- | ---------------------------- |
-| `enabled`           | boolean | `true`                     | Enable code execution        |
-| `mode`              | string  | `"wasm"`                   | Execution mode               |
-| `allowed_languages` | array   | `["python", "javascript"]` | Allowed languages            |
-| `timeout_secs`      | integer | `30`                       | Execution timeout            |
-| `max_memory_mb`     | integer | `256`                      | Maximum memory per execution |
-
-## Execution Modes
-
-### WASM (Default)
-
-Client-side execution in the browser:
-
-```toml
-[features.code_execution]
-enabled = true
-mode = "wasm"
-```
-
-No server-side configuration needed. Runs via:
-
-- **Python**: Pyodide (numpy, pandas, matplotlib available)
-- **JavaScript**: QuickJS (sandboxed)
-
-### Sandboxed
-
-Server-side execution in containers:
-
-```toml
-[features.code_execution]
-enabled = true
-allowed_languages = ["python", "javascript", "bash"]
-timeout_secs = 60
-max_memory_mb = 512
-
-[features.code_execution.mode]
-sandboxed = { runtime = "docker" }
-```
-
-| Runtime       | Description                 |
-| ------------- | --------------------------- |
-| `docker`      | Docker containers           |
-| `firecracker` | Firecracker microVMs        |
-| `gvisor`      | gVisor sandboxed containers |
-
-### External Service
-
-Delegate to an external code execution service:
-
-```toml
-[features.code_execution]
-enabled = true
-allowed_languages = ["python", "javascript", "r", "julia"]
-timeout_secs = 120
-max_memory_mb = 1024
-
-[features.code_execution.mode]
-external = { url = "https://code-exec.example.com", api_key = "${CODE_EXEC_API_KEY}" }
-```
-
-| Key       | Type   | Description                |
-| --------- | ------ | -------------------------- |
-| `url`     | string | External service URL       |
-| `api_key` | string | API key for authentication |
-
-## Complete Examples
-
-### Client-Side Only (Default)
-
-```toml
-[features.code_execution]
-enabled = true
-mode = "wasm"
-allowed_languages = ["python", "javascript"]
-timeout_secs = 30
-max_memory_mb = 256
-```
-
-### Docker Sandboxed
-
-```toml
-[features.code_execution]
-enabled = true
-allowed_languages = ["python", "javascript", "bash", "ruby"]
-timeout_secs = 60
-max_memory_mb = 512
-
-[features.code_execution.mode]
-sandboxed = { runtime = "docker" }
-```
-
-### External Service
-
-```toml
-[features.code_execution]
-enabled = true
-allowed_languages = ["python", "javascript", "r", "julia", "sql"]
-timeout_secs = 120
-max_memory_mb = 2048
-
-[features.code_execution.mode]
-external = { url = "https://code-exec.internal.company.com", api_key = "${CODE_EXEC_API_KEY}" }
-```
-
-## Security Considerations
-
-<Callout type="warn">
-  Server-side code execution requires careful security configuration. Consider: - Network isolation
-  for containers - Resource limits (CPU, memory, disk) - Execution timeouts - Language restrictions
-</Callout>
-
-## See Also
-
-- [Frontend Tools Guide](/docs/features/frontend-tools) - Client-side WASM execution
diff --git a/docs/content/docs/configuration/features/fallback.mdx b/docs/content/docs/configuration/features/fallback.mdx
deleted file mode 100644
index 34c92b2..0000000
--- a/docs/content/docs/configuration/features/fallback.mdx
+++ /dev/null
@@ -1,158 +0,0 @@
----
-title: Fallback & Retry
-description: Configure automatic retries and provider fallbacks
----
-
-import { Callout } from "fumadocs-ui/components/callout";
-
-The `[features.fallback]` section configures automatic retry behavior and provider fallback chains for handling transient errors.
-
-## Configuration Reference
-
-```toml
-[features.fallback]
-retries_enabled = true
-max_retries = 3
-initial_delay_ms = 1000
-max_delay_ms = 30000
-backoff_multiplier = 2.0
-fallback_enabled = false
-fallback_order = []
-fallback_on = ["rate_limit", "server_error", "timeout"]
-```
-
-| Key                  | Type    | Default   | Description                       |
-| -------------------- | ------- | --------- | --------------------------------- |
-| `retries_enabled`    | boolean | `true`    | Enable automatic retries          |
-| `max_retries`        | integer | `3`       | Maximum retry attempts            |
-| `initial_delay_ms`   | integer | `1000`    | Initial retry delay (1 second)    |
-| `max_delay_ms`       | integer | `30000`   | Maximum retry delay (30 seconds)  |
-| `backoff_multiplier` | float   | `2.0`     | Exponential backoff multiplier    |
-| `fallback_enabled`   | boolean | `false`   | Enable provider fallbacks         |
-| `fallback_order`     | array   | `[]`      | Provider fallback chain           |
-| `fallback_on`        | array   | see below | Error types that trigger fallback |
-
-## Retry Behavior
-
-Retries use exponential backoff with the formula:
-
-```
-delay = min(initial_delay_ms * (backoff_multiplier ^ attempt), max_delay_ms)
-```
-
-Example with defaults:
-
-- Attempt 1: 1000ms delay
-- Attempt 2: 2000ms delay
-- Attempt 3: 4000ms delay
-
-## Fallback Triggers
-
-| Trigger          | Description             |
-| ---------------- | ----------------------- |
-| `rate_limit`     | 429 Too Many Requests   |
-| `server_error`   | 5xx errors              |
-| `timeout`        | Request timeout         |
-| `overloaded`     | Provider overloaded     |
-| `context_length` | Context length exceeded |
-
-```toml
-fallback_on = ["rate_limit", "server_error", "timeout"]
-```
-
-## Complete Examples
-
-### Retries Only (Default)
-
-```toml
-[features.fallback]
-retries_enabled = true
-max_retries = 3
-initial_delay_ms = 1000
-max_delay_ms = 30000
-backoff_multiplier = 2.0
-fallback_enabled = false
-```
-
-### With Provider Fallbacks
-
-```toml
-[features.fallback]
-retries_enabled = true
-max_retries = 2
-initial_delay_ms = 500
-max_delay_ms = 10000
-backoff_multiplier = 2.0
-fallback_enabled = true
-fallback_order = ["anthropic", "openai", "bedrock"]
-fallback_on = ["rate_limit", "server_error", "timeout", "overloaded"]
-```
-
-Flow: Primary provider fails → retry 2x → try Anthropic → retry 2x → try OpenAI → retry 2x → try Bedrock → fail
-
-### Aggressive Retries
-
-```toml
-[features.fallback]
-retries_enabled = true
-max_retries = 5
-initial_delay_ms = 200
-max_delay_ms = 5000
-backoff_multiplier = 1.5
-fallback_enabled = false
-```
-
-### No Retries
-
-```toml
-[features.fallback]
-retries_enabled = false
-fallback_enabled = true
-fallback_order = ["anthropic", "openai"]
-fallback_on = ["server_error", "timeout"]
-```
-
-### Context Length Fallback
-
-Handle models with different context limits:
-
-```toml
-[features.fallback]
-retries_enabled = true
-max_retries = 1
-fallback_enabled = true
-fallback_order = ["gpt-4o", "claude-sonnet"]
-fallback_on = ["context_length"]
-```
-
-## Fallback Chain Behavior
-
-```
-Request to primary provider
-         │
-         ▼
-    ┌─────────┐     ┌─────────────────┐
-    │ Success │ ←── │ Retry if failed │
-    └─────────┘     └─────────────────┘
-         │                   │
-         │              max_retries
-         │                   │
-         │                   ▼
-         │         ┌─────────────────┐
-         │         │ Next in fallback│
-         │         │     order       │
-         │         └─────────────────┘
-         │                   │
-         ▼                   ▼
-    Return response    Repeat until
-                       chain exhausted
-```
-
-<Callout type="info">
-Per-provider retry and circuit breaker settings (in `[providers.<name>]`) override global fallback settings for that provider.
-</Callout>
-
-## See Also
-
-- [Load Balancing](/docs/configuration/features/load-balancing) - Provider selection
-- [Provider Configuration](/docs/configuration/providers) - Per-provider retries
diff --git a/docs/content/docs/configuration/features/index.mdx b/docs/content/docs/configuration/features/index.mdx
index 4e58b9f..1422f73 100644
--- a/docs/content/docs/configuration/features/index.mdx
+++ b/docs/content/docs/configuration/features/index.mdx
@@ -15,12 +15,8 @@ The `[features]` section enables and configures optional gateway capabilities. A
 | [File Processing](/docs/configuration/features/file-processing)   | `[features.file_processing]`  | Document chunking, OCR, virus scanning            |
 | [Response Caching](/docs/configuration/features/response-caching) | `[features.response_caching]` | Exact and semantic response caching               |
 | [Guardrails](/docs/configuration/features/guardrails)             | `[features.guardrails]`       | Content filtering, PII detection, safety          |
-| [Web Search](/docs/configuration/features/web-search)             | `[features.web_search]`       | Web search tool providers                         |
-| [Code Execution](/docs/configuration/features/code-execution)     | `[features.code_execution]`   | Server-side code execution                        |
 | [Image Fetching](/docs/configuration/features/image-fetching)     | `[features.image_fetching]`   | URL-to-base64 conversion for non-OpenAI providers |
 | [WebSocket](/docs/configuration/features/websocket)               | `[features.websocket]`        | Real-time event subscriptions                     |
-| [Load Balancing](/docs/configuration/features/load-balancing)     | `[features.load_balancing]`   | Provider selection strategies                     |
-| [Fallback](/docs/configuration/features/fallback)                 | `[features.fallback]`         | Retry and provider fallback                       |
 | Model Catalog                                                     | `[features.model_catalog]`    | Enrich models with capabilities and pricing       |
 
 ## Minimal Configuration
@@ -107,21 +103,6 @@ timeout_secs = 30
 enabled = true
 require_auth = true
 
-# Load Balancing
-[features.load_balancing]
-strategy = "round_robin"
-
-[features.load_balancing.health_check]
-enabled = true
-interval_secs = 30
-
-# Fallback & Retry
-[features.fallback]
-retries_enabled = true
-max_retries = 3
-fallback_enabled = true
-fallback_order = ["anthropic", "openai"]
-
 # Model Catalog
 [features.model_catalog]
 enabled = true
diff --git a/docs/content/docs/configuration/features/load-balancing.mdx b/docs/content/docs/configuration/features/load-balancing.mdx
deleted file mode 100644
index c994e22..0000000
--- a/docs/content/docs/configuration/features/load-balancing.mdx
+++ /dev/null
@@ -1,141 +0,0 @@
----
-title: Load Balancing
-description: Configure provider selection strategies and health checks
----
-
-import { Callout } from "fumadocs-ui/components/callout";
-
-The `[features.load_balancing]` section configures how requests are distributed across providers when multiple providers support the same model.
-
-## Configuration Reference
-
-### Main Settings
-
-```toml
-[features.load_balancing]
-strategy = "round_robin"
-```
-
-| Key        | Type   | Default         | Description             |
-| ---------- | ------ | --------------- | ----------------------- |
-| `strategy` | string | `"round_robin"` | Load balancing strategy |
-
-### Strategies
-
-| Strategy            | Description                                   |
-| ------------------- | --------------------------------------------- |
-| `round_robin`       | Cycle through providers sequentially          |
-| `least_connections` | Route to provider with fewest active requests |
-| `random`            | Random provider selection                     |
-| `weighted`          | Distribute based on configured weights        |
-| `latency_based`     | Route to lowest-latency provider              |
-| `cost_based`        | Route to lowest-cost provider                 |
-
-```toml
-# Round Robin (default)
-[features.load_balancing]
-strategy = "round_robin"
-
-# Latency-based
-[features.load_balancing]
-strategy = "latency_based"
-
-# Cost-based
-[features.load_balancing]
-strategy = "cost_based"
-```
-
-### Health Checks
-
-Configure provider health monitoring:
-
-```toml
-[features.load_balancing.health_check]
-enabled = true
-interval_secs = 30
-unhealthy_threshold = 3
-healthy_threshold = 2
-```
-
-| Key                   | Type    | Default | Description                            |
-| --------------------- | ------- | ------- | -------------------------------------- |
-| `enabled`             | boolean | `true`  | Enable health checks                   |
-| `interval_secs`       | integer | `30`    | Check interval in seconds              |
-| `unhealthy_threshold` | integer | `3`     | Consecutive failures to mark unhealthy |
-| `healthy_threshold`   | integer | `2`     | Consecutive successes to mark healthy  |
-
-## Complete Examples
-
-### Round Robin with Health Checks
-
-```toml
-[features.load_balancing]
-strategy = "round_robin"
-
-[features.load_balancing.health_check]
-enabled = true
-interval_secs = 30
-unhealthy_threshold = 3
-healthy_threshold = 2
-```
-
-### Latency-Based
-
-```toml
-[features.load_balancing]
-strategy = "latency_based"
-
-[features.load_balancing.health_check]
-enabled = true
-interval_secs = 15
-unhealthy_threshold = 2
-healthy_threshold = 1
-```
-
-### Cost-Based
-
-```toml
-[features.load_balancing]
-strategy = "cost_based"
-
-[features.load_balancing.health_check]
-enabled = true
-interval_secs = 60
-unhealthy_threshold = 5
-healthy_threshold = 2
-```
-
-### Disabled Health Checks
-
-```toml
-[features.load_balancing]
-strategy = "random"
-
-[features.load_balancing.health_check]
-enabled = false
-```
-
-## Strategy Selection
-
-| Use Case          | Recommended Strategy |
-| ----------------- | -------------------- |
-| General workloads | `round_robin`        |
-| Latency-sensitive | `latency_based`      |
-| Cost optimization | `cost_based`         |
-| Variable load     | `least_connections`  |
-| Testing/debugging | `random`             |
-
-## Health Check Behavior
-
-```
-Provider A: ● ● ● ✗ ✗ ✗ → Unhealthy (3 consecutive failures)
-Provider B: ● ● ● ● ● ● → Healthy
-Provider C: ✗ ✗ ● ● ● ● → Healthy (2 consecutive successes)
-```
-
-Unhealthy providers are excluded from load balancing until they pass `healthy_threshold` consecutive checks.
-
-## See Also
-
-- [Fallback Configuration](/docs/configuration/features/fallback) - Retry and fallback settings
-- [Provider Configuration](/docs/configuration/providers) - Provider setup
diff --git a/docs/content/docs/configuration/features/meta.json b/docs/content/docs/configuration/features/meta.json
index 433c034..42aabe6 100644
--- a/docs/content/docs/configuration/features/meta.json
+++ b/docs/content/docs/configuration/features/meta.json
@@ -6,11 +6,7 @@
     "file-processing",
     "response-caching",
     "guardrails",
-    "web-search",
-    "code-execution",
     "image-fetching",
-    "websocket",
-    "load-balancing",
-    "fallback"
+    "websocket"
   ]
 }
diff --git a/docs/content/docs/configuration/features/web-search.mdx b/docs/content/docs/configuration/features/web-search.mdx
deleted file mode 100644
index fd0022e..0000000
--- a/docs/content/docs/configuration/features/web-search.mdx
+++ /dev/null
@@ -1,135 +0,0 @@
----
-title: Web Search
-description: Configure web search providers for the web_search tool
----
-
-import { Callout } from "fumadocs-ui/components/callout";
-
-The `[features.web_search]` section configures web search providers that can be used by the `web_search` tool in chat conversations.
-
-## Configuration Reference
-
-### Main Settings
-
-```toml
-[features.web_search]
-enabled = true
-default_provider = "tavily"
-max_results = 10
-```
-
-| Key                | Type    | Default | Description               |
-| ------------------ | ------- | ------- | ------------------------- |
-| `enabled`          | boolean | `true`  | Enable web search         |
-| `default_provider` | string  | none    | Default provider name     |
-| `max_results`      | integer | `10`    | Maximum results to return |
-
-## Providers
-
-### Tavily
-
-AI-optimized search API:
-
-```toml
-[[features.web_search.providers]]
-type = "tavily"
-api_key = "${TAVILY_API_KEY}"
-```
-
-| Key       | Type   | Description    |
-| --------- | ------ | -------------- |
-| `api_key` | string | Tavily API key |
-
-### Brave Search
-
-Privacy-focused search API:
-
-```toml
-[[features.web_search.providers]]
-type = "brave"
-api_key = "${BRAVE_SEARCH_API_KEY}"
-```
-
-| Key       | Type   | Description          |
-| --------- | ------ | -------------------- |
-| `api_key` | string | Brave Search API key |
-
-### Google Custom Search
-
-Google search via Custom Search JSON API:
-
-```toml
-[[features.web_search.providers]]
-type = "google"
-api_key = "${GOOGLE_API_KEY}"
-search_engine_id = "${GOOGLE_CSE_ID}"
-```
-
-| Key                | Type   | Description             |
-| ------------------ | ------ | ----------------------- |
-| `api_key`          | string | Google API key          |
-| `search_engine_id` | string | Custom Search Engine ID |
-
-### Bing Search
-
-Microsoft Bing Web Search API:
-
-```toml
-[[features.web_search.providers]]
-type = "bing"
-api_key = "${BING_SEARCH_API_KEY}"
-```
-
-| Key       | Type   | Description         |
-| --------- | ------ | ------------------- |
-| `api_key` | string | Bing Search API key |
-
-### SerpAPI
-
-Scraping-based search API (Google, Bing, etc.):
-
-```toml
-[[features.web_search.providers]]
-type = "serp"
-api_key = "${SERPAPI_KEY}"
-```
-
-| Key       | Type   | Description |
-| --------- | ------ | ----------- |
-| `api_key` | string | SerpAPI key |
-
-## Complete Example
-
-```toml
-[features.web_search]
-enabled = true
-default_provider = "tavily"
-max_results = 10
-
-[[features.web_search.providers]]
-type = "tavily"
-api_key = "${TAVILY_API_KEY}"
-
-[[features.web_search.providers]]
-type = "brave"
-api_key = "${BRAVE_SEARCH_API_KEY}"
-
-[[features.web_search.providers]]
-type = "google"
-api_key = "${GOOGLE_API_KEY}"
-search_engine_id = "${GOOGLE_CSE_ID}"
-```
-
-## Provider Comparison
-
-| Provider | Pricing       | Best For                                |
-| -------- | ------------- | --------------------------------------- |
-| Tavily   | Freemium      | AI/LLM applications, clean results      |
-| Brave    | Freemium      | Privacy-conscious, web search           |
-| Google   | Pay-per-query | Comprehensive results, custom filtering |
-| Bing     | Pay-per-query | Microsoft ecosystem                     |
-| SerpAPI  | Pay-per-query | Multiple search engines, SERP data      |
-
-<Callout type="info">
-  Web search results are formatted for LLM consumption and injected into the conversation context.
-</Callout>
diff --git a/src/app.rs b/src/app.rs
index 0bad0b0..1a76833 100644
--- a/src/app.rs
+++ b/src/app.rs
@@ -1357,34 +1357,22 @@ impl AppState {
         // Get embedding configuration with priority:
         // 1. file_search.embedding (explicit RAG config)
         // 2. response_caching.semantic.embedding (semantic cache config)
-        // 3. vector_search.embedding (legacy vector search config)
-        let embedding_config = file_search_config
-            .embedding
-            .as_ref()
-            .or_else(|| {
-                config
-                    .features
-                    .response_caching
-                    .as_ref()
-                    .and_then(|rc| rc.semantic.as_ref())
-                    .map(|sc| &sc.embedding)
-            })
-            .or_else(|| {
-                config
-                    .features
-                    .vector_search
-                    .as_ref()
-                    .map(|vs| &vs.embedding)
-            });
+        let embedding_config = file_search_config.embedding.as_ref().or_else(|| {
+            config
+                .features
+                .response_caching
+                .as_ref()
+                .and_then(|rc| rc.semantic.as_ref())
+                .map(|sc| &sc.embedding)
+        });
 
         let embedding_config = match embedding_config {
             Some(cfg) => cfg,
             None => {
                 tracing::warn!(
                     "File search is enabled but no embedding configuration found. \
-                     Configure [features.file_search.embedding], \
-                     [features.response_caching.semantic.embedding], or \
-                     [features.vector_search.embedding] to enable file search."
+                     Configure [features.file_search.embedding] or \
+                     [features.response_caching.semantic.embedding] to enable file search."
                 );
                 return None;
             }
diff --git a/src/cache/memory.rs b/src/cache/memory.rs
index b50587c..c4e8478 100644
--- a/src/cache/memory.rs
+++ b/src/cache/memory.rs
@@ -12,6 +12,10 @@ use std::{
 /// This prevents infinite spinning under extreme contention.
 const MAX_CAS_RETRIES: usize = 100;
 
+/// Number of entries to evict when the cache reaches capacity.
+/// Eviction removes expired entries first, then uses LRU.
+const EVICTION_BATCH_SIZE: usize = 100;
+
 use async_trait::async_trait;
 use dashmap::DashMap;
 
@@ -89,7 +93,6 @@ pub struct MemoryCache {
     counters: Arc<DashMap<String, Arc<AtomicI64>>>,
     sets: Arc<DashMap<String, SetEntry>>,
     max_entries: usize,
-    eviction_batch_size: usize,
 }
 
 impl MemoryCache {
@@ -99,7 +102,6 @@ impl MemoryCache {
             counters: Arc::new(DashMap::new()),
             sets: Arc::new(DashMap::new()),
             max_entries: config.max_entries,
-            eviction_batch_size: config.eviction_batch_size.max(1),
         }
     }
 
@@ -117,8 +119,10 @@ impl MemoryCache {
             return;
         }
 
-        // Calculate how many entries to evict
-        let target_size = self.max_entries.saturating_sub(self.eviction_batch_size);
+        // Calculate how many entries to evict: at least 1, at most EVICTION_BATCH_SIZE.
+        // Use 10% of max_entries for small caches to avoid evicting everything at once.
+        let batch = (self.max_entries / 10).clamp(1, EVICTION_BATCH_SIZE);
+        let target_size = self.max_entries.saturating_sub(batch);
         let to_evict = current_len.saturating_sub(target_size);
 
         if to_evict == 0 {
@@ -487,13 +491,9 @@ mod tests {
         }
     }
 
-    fn test_config_with_eviction(
-        max_entries: usize,
-        eviction_batch_size: usize,
-    ) -> MemoryCacheConfig {
+    fn test_config_with_eviction(max_entries: usize) -> MemoryCacheConfig {
         MemoryCacheConfig {
             max_entries,
-            eviction_batch_size,
             ..Default::default()
         }
     }
@@ -969,8 +969,8 @@ mod tests {
 
     #[tokio::test]
     async fn test_lru_eviction_evicts_oldest() {
-        // max_entries=5, eviction_batch_size=2
-        let cache = MemoryCache::new(&test_config_with_eviction(5, 2));
+        // max_entries=5; eviction batch = max(1, 5/10) = 1, target_size = 4
+        let cache = MemoryCache::new(&test_config_with_eviction(5));
 
         // Fill cache with entries (with delays to ensure distinct access times)
         for i in 0..5 {
@@ -1021,8 +1021,8 @@ mod tests {
         .filter(|&&x| x)
         .count();
 
-        // After eviction, we should have fewer than 5 entries
-        // eviction_batch_size=2 means target is max_entries - 2 = 3 entries after eviction
+        // After eviction: target_size = 4, current_len was 6, so 2 entries are evicted.
+        // key2 and key3 are the least recently accessed, so at most 1 of key2/key3/key4 remains.
         assert!(
             remaining <= 2,
             "Expected at most 2 of key2/key3/key4 to remain, got {}",
@@ -1032,7 +1032,7 @@ mod tests {
 
     #[tokio::test]
     async fn test_lru_eviction_prefers_expired_first() {
-        let cache = MemoryCache::new(&test_config_with_eviction(4, 2));
+        let cache = MemoryCache::new(&test_config_with_eviction(4));
 
         // Add entries: some expired, some not
         cache
@@ -1088,7 +1088,7 @@ mod tests {
 
     #[tokio::test]
     async fn test_lru_no_eviction_below_capacity() {
-        let cache = MemoryCache::new(&test_config_with_eviction(10, 2));
+        let cache = MemoryCache::new(&test_config_with_eviction(10));
 
         // Add entries below capacity
         for i in 0..5 {
@@ -1114,7 +1114,7 @@ mod tests {
 
     #[tokio::test]
     async fn test_get_updates_last_accessed() {
-        let cache = MemoryCache::new(&test_config_with_eviction(3, 1));
+        let cache = MemoryCache::new(&test_config_with_eviction(3));
 
         // Add entries
         cache
diff --git a/src/config/cache.rs b/src/config/cache.rs
index 74b523a..ab5db97 100644
--- a/src/config/cache.rs
+++ b/src/config/cache.rs
@@ -59,11 +59,6 @@ pub struct MemoryCacheConfig {
     #[serde(default = "default_max_entries")]
     pub max_entries: usize,
 
-    /// Number of entries to evict when cache is full.
-    /// Eviction removes expired entries first, then uses LRU.
-    #[serde(default = "default_eviction_batch_size")]
-    pub eviction_batch_size: usize,
-
     /// Default TTL for cache entries in seconds.
     #[serde(default = "default_ttl")]
     pub default_ttl_secs: u64,
@@ -77,7 +72,6 @@ impl Default for MemoryCacheConfig {
     fn default() -> Self {
         Self {
             max_entries: default_max_entries(),
-            eviction_batch_size: default_eviction_batch_size(),
             default_ttl_secs: default_ttl(),
             ttl: CacheTtlConfig::default(),
         }
@@ -99,10 +93,6 @@ fn default_max_entries() -> usize {
     100_000
 }
 
-fn default_eviction_batch_size() -> usize {
-    100 // Evict 100 entries at a time when cache is full
-}
-
 fn default_ttl() -> u64 {
     3600 // 1 hour
 }
@@ -214,14 +204,6 @@ pub struct CacheTtlConfig {
     /// TTL for dynamic provider cache in seconds.
     #[serde(default = "default_provider_ttl")]
     pub provider_secs: u64,
-
-    /// TTL for daily spend cache in seconds.
-    #[serde(default = "default_daily_spend_ttl")]
-    pub daily_spend_secs: u64,
-
-    /// TTL for monthly spend cache in seconds.
-    #[serde(default = "default_monthly_spend_ttl")]
-    pub monthly_spend_secs: u64,
 }
 
 impl Default for CacheTtlConfig {
@@ -230,8 +212,6 @@ impl Default for CacheTtlConfig {
             api_key_secs: default_api_key_ttl(),
             rate_limit_secs: default_rate_limit_ttl(),
             provider_secs: default_provider_ttl(),
-            daily_spend_secs: default_daily_spend_ttl(),
-            monthly_spend_secs: default_monthly_spend_ttl(),
         }
     }
 }
@@ -247,11 +227,3 @@ fn default_rate_limit_ttl() -> u64 {
 fn default_provider_ttl() -> u64 {
     300 // 5 minutes
 }
-
-fn default_daily_spend_ttl() -> u64 {
-    86400 // 1 day
-}
-
-fn default_monthly_spend_ttl() -> u64 {
-    86400 * 32 // ~32 days
-}
diff --git a/src/config/features.rs b/src/config/features.rs
index 3a7caba..faa3d69 100644
--- a/src/config/features.rs
+++ b/src/config/features.rs
@@ -7,49 +7,20 @@ use super::{CircuitBreakerConfig, RetryConfig};
 #[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
 #[serde(deny_unknown_fields)]
 pub struct FeaturesConfig {
-    /// Vector search / RAG features.
-    #[serde(default)]
-    pub vector_search: Option<VectorSearchConfig>,
-
     /// File search configuration for the Responses API.
     /// Enables server-side file_search tool execution for RAG.
     #[serde(default)]
     pub file_search: Option<FileSearchConfig>,
 
-    /// Web search features.
-    #[serde(default)]
-    pub web_search: Option<WebSearchConfig>,
-
-    /// Code execution features.
-    #[serde(default)]
-    pub code_execution: Option<CodeExecutionConfig>,
-
-    /// Content moderation (legacy - prefer guardrails for new deployments).
-    #[serde(default)]
-    pub moderation: Option<ModerationConfig>,
-
     /// Guardrails for content filtering, PII detection, and safety.
-    /// More comprehensive than the legacy moderation config, with support
-    /// for multiple providers, execution modes, and fine-grained actions.
+    /// Supports multiple providers, execution modes, and fine-grained actions.
     #[serde(default)]
     pub guardrails: Option<GuardrailsConfig>,
 
-    /// Prompt caching.
-    #[serde(default)]
-    pub prompt_caching: Option<PromptCachingConfig>,
-
     /// Response caching.
     #[serde(default)]
     pub response_caching: Option<ResponseCachingConfig>,
 
-    /// Fallback and retry configuration.
-    #[serde(default)]
-    pub fallback: FallbackConfig,
-
-    /// Load balancing configuration.
-    #[serde(default)]
-    pub load_balancing: LoadBalancingConfig,
-
     /// HTTP image URL fetching configuration.
     /// Controls how non-OpenAI providers (Anthropic, Bedrock, Vertex) handle
     /// HTTP image URLs in chat completion requests.
@@ -88,80 +59,6 @@ impl FeaturesConfig {
     }
 }
 
-// ─────────────────────────────────────────────────────────────────────────────
-// Vector Search
-// ─────────────────────────────────────────────────────────────────────────────
-
-/// Vector search configuration for RAG.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(deny_unknown_fields)]
-pub struct VectorSearchConfig {
-    /// Enable vector search.
-    #[serde(default = "default_true")]
-    pub enabled: bool,
-
-    /// Vector database backend.
-    pub backend: VectorBackend,
-
-    /// Default number of results to retrieve.
-    #[serde(default = "default_top_k")]
-    pub default_top_k: usize,
-
-    /// Default similarity threshold (0.0-1.0).
-    #[serde(default = "default_similarity_threshold")]
-    pub similarity_threshold: f64,
-
-    /// Embedding configuration.
-    #[serde(default)]
-    pub embedding: EmbeddingConfig,
-}
-
-fn default_top_k() -> usize {
-    5
-}
-
-fn default_similarity_threshold() -> f64 {
-    0.7
-}
-
-/// Vector database backend.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(tag = "type", rename_all = "snake_case")]
-#[serde(deny_unknown_fields)]
-pub enum VectorBackend {
-    /// PostgreSQL with pgvector extension.
-    Pgvector,
-
-    /// Qdrant vector database.
-    Qdrant {
-        url: String,
-        #[serde(default)]
-        api_key: Option<String>,
-    },
-
-    /// Pinecone vector database.
-    Pinecone {
-        api_key: String,
-        environment: String,
-    },
-
-    /// Weaviate vector database.
-    Weaviate {
-        url: String,
-        #[serde(default)]
-        api_key: Option<String>,
-    },
-
-    /// ChromaDB.
-    Chroma {
-        url: String,
-        #[serde(default)]
-        api_key: Option<String>,
-    },
-}
-
 /// Embedding configuration.
 #[derive(Debug, Clone, Serialize, Deserialize, Default)]
 #[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
@@ -776,10 +673,7 @@ pub struct FileProcessingQueueConfig {
     pub backend: FileProcessingQueueBackend,
 
     /// Connection URL for the queue backend.
-    /// Examples:
-    /// - Redis: "redis://localhost:6379"
-    /// - RabbitMQ: "amqp://guest:guest@localhost:5672"
-    /// - SQS: "https://sqs.us-east-1.amazonaws.com/123456789/queue-name"
+    /// Example: "redis://localhost:6379"
     pub url: String,
 
     /// Queue/topic name for processing jobs.
@@ -789,14 +683,6 @@ pub struct FileProcessingQueueConfig {
     /// Consumer group name (for Redis Streams).
     #[serde(default = "default_file_processing_consumer_group")]
     pub consumer_group: String,
-
-    /// AWS region (for SQS).
-    #[serde(default)]
-    pub region: Option<String>,
-
-    /// GCP project ID (for Pub/Sub).
-    #[serde(default)]
-    pub project_id: Option<String>,
 }
 
 impl FileProcessingQueueConfig {
@@ -808,15 +694,6 @@ impl FileProcessingQueueConfig {
         if self.queue_name.is_empty() {
             return Err("Queue name cannot be empty".to_string());
         }
-        match self.backend {
-            FileProcessingQueueBackend::Sqs if self.region.is_none() => {
-                return Err("SQS backend requires 'region' to be specified".to_string());
-            }
-            FileProcessingQueueBackend::PubSub if self.project_id.is_none() => {
-                return Err("Pub/Sub backend requires 'project_id' to be specified".to_string());
-            }
-            _ => {}
-        }
         Ok(())
     }
 }
@@ -829,18 +706,6 @@ pub enum FileProcessingQueueBackend {
     /// Redis Streams.
     /// Good for simple deployments, supports consumer groups.
     Redis,
-
-    /// RabbitMQ.
-    /// Full-featured message broker with routing capabilities.
-    RabbitMq,
-
-    /// AWS SQS.
-    /// Managed queue service, good for AWS deployments.
-    Sqs,
-
-    /// Google Cloud Pub/Sub.
-    /// Managed pub/sub service, good for GCP deployments.
-    PubSub,
 }
 
 // ─────────────────────────────────────────────────────────────────────────────
@@ -1161,201 +1026,6 @@ fn default_file_processing_consumer_group() -> String {
     "hadrian_workers".to_string()
 }
 
-// ─────────────────────────────────────────────────────────────────────────────
-// Web Search
-// ─────────────────────────────────────────────────────────────────────────────
-
-/// Web search configuration.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(deny_unknown_fields)]
-pub struct WebSearchConfig {
-    /// Enable web search.
-    #[serde(default = "default_true")]
-    pub enabled: bool,
-
-    /// Web search providers.
-    pub providers: Vec<WebSearchProvider>,
-
-    /// Default provider.
-    #[serde(default)]
-    pub default_provider: Option<String>,
-
-    /// Maximum results to return.
-    #[serde(default = "default_max_results")]
-    pub max_results: usize,
-}
-
-fn default_max_results() -> usize {
-    10
-}
-
-/// Web search provider configuration.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(tag = "type", rename_all = "snake_case")]
-#[serde(deny_unknown_fields)]
-pub enum WebSearchProvider {
-    /// Tavily search API.
-    Tavily { api_key: String },
-
-    /// Brave Search API.
-    Brave { api_key: String },
-
-    /// Google Custom Search.
-    Google {
-        api_key: String,
-        search_engine_id: String,
-    },
-
-    /// Bing Search API.
-    Bing { api_key: String },
-
-    /// SerpAPI.
-    Serp { api_key: String },
-}
-
-// ─────────────────────────────────────────────────────────────────────────────
-// Code Execution
-// ─────────────────────────────────────────────────────────────────────────────
-
-/// Code execution configuration.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(deny_unknown_fields)]
-pub struct CodeExecutionConfig {
-    /// Enable code execution.
-    #[serde(default = "default_true")]
-    pub enabled: bool,
-
-    /// Execution mode.
-    #[serde(default)]
-    pub mode: CodeExecutionMode,
-
-    /// Allowed languages.
-    #[serde(default = "default_languages")]
-    pub allowed_languages: Vec<String>,
-
-    /// Execution timeout in seconds.
-    #[serde(default = "default_execution_timeout")]
-    pub timeout_secs: u64,
-
-    /// Maximum memory in MB.
-    #[serde(default = "default_max_memory")]
-    pub max_memory_mb: u64,
-}
-
-fn default_languages() -> Vec<String> {
-    vec!["python".into(), "javascript".into()]
-}
-
-fn default_execution_timeout() -> u64 {
-    30
-}
-
-fn default_max_memory() -> u64 {
-    256
-}
-
-/// Code execution mode.
-#[derive(Debug, Clone, Default, Serialize, Deserialize)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(rename_all = "snake_case")]
-pub enum CodeExecutionMode {
-    /// Execute in browser via WASM (Python via Pyodide, JS native).
-    #[default]
-    Wasm,
-
-    /// Execute on server in sandboxed containers.
-    Sandboxed {
-        /// Container runtime.
-        runtime: ContainerRuntime,
-    },
-
-    /// External code execution service.
-    External {
-        /// Service URL.
-        url: String,
-        /// API key.
-        api_key: Option<String>,
-    },
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(rename_all = "snake_case")]
-pub enum ContainerRuntime {
-    Docker,
-    Firecracker,
-    Gvisor,
-}
-
-// ─────────────────────────────────────────────────────────────────────────────
-// Moderation
-// ─────────────────────────────────────────────────────────────────────────────
-
-/// Content moderation configuration.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(deny_unknown_fields)]
-pub struct ModerationConfig {
-    /// Enable moderation.
-    #[serde(default = "default_true")]
-    pub enabled: bool,
-
-    /// Check input before sending to provider.
-    #[serde(default = "default_true")]
-    pub check_input: bool,
-
-    /// Check output before returning to user.
-    #[serde(default)]
-    pub check_output: bool,
-
-    /// Moderation provider.
-    #[serde(default)]
-    pub provider: ModerationProvider,
-
-    /// Action to take on flagged content.
-    #[serde(default)]
-    pub action: ModerationAction,
-
-    /// Categories to check.
-    #[serde(default)]
-    pub categories: Vec<String>,
-
-    /// Threshold for flagging (0.0-1.0).
-    #[serde(default = "default_moderation_threshold")]
-    pub threshold: f64,
-}
-
-fn default_moderation_threshold() -> f64 {
-    0.8
-}
-
-#[derive(Debug, Clone, Default, Serialize, Deserialize)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(rename_all = "snake_case")]
-pub enum ModerationProvider {
-    #[default]
-    OpenAi,
-    Custom {
-        url: String,
-    },
-}
-
-#[derive(Debug, Clone, Default, Serialize, Deserialize)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(rename_all = "snake_case")]
-pub enum ModerationAction {
-    /// Block the request and return an error.
-    #[default]
-    Block,
-    /// Allow but log the flagged content.
-    Log,
-    /// Add a warning to the response.
-    Warn,
-}
-
 // ─────────────────────────────────────────────────────────────────────────────
 // Guardrails
 // ─────────────────────────────────────────────────────────────────────────────
@@ -2072,24 +1742,6 @@ fn default_pii_replacement() -> String {
 // Caching
 // ─────────────────────────────────────────────────────────────────────────────
 
-/// Prompt caching configuration (provider-level caching like Anthropic's).
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(deny_unknown_fields)]
-pub struct PromptCachingConfig {
-    /// Enable prompt caching.
-    #[serde(default = "default_true")]
-    pub enabled: bool,
-
-    /// Minimum prompt length to cache (in tokens).
-    #[serde(default = "default_min_cache_tokens")]
-    pub min_tokens: u32,
-}
-
-fn default_min_cache_tokens() -> u32 {
-    1024
-}
-
 /// Response caching configuration (gateway-level caching).
 #[derive(Debug, Clone, Serialize, Deserialize)]
 #[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
@@ -2396,181 +2048,6 @@ pub struct CacheKeyComponents {
     pub tools: bool,
 }
 
-// ─────────────────────────────────────────────────────────────────────────────
-// Fallback & Retry
-// ─────────────────────────────────────────────────────────────────────────────
-
-/// Fallback and retry configuration.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(deny_unknown_fields)]
-pub struct FallbackConfig {
-    /// Enable automatic retries.
-    #[serde(default = "default_true")]
-    pub retries_enabled: bool,
-
-    /// Maximum number of retries.
-    #[serde(default = "default_max_retries")]
-    pub max_retries: u32,
-
-    /// Initial retry delay in milliseconds.
-    #[serde(default = "default_retry_delay")]
-    pub initial_delay_ms: u64,
-
-    /// Maximum retry delay in milliseconds.
-    #[serde(default = "default_max_retry_delay")]
-    pub max_delay_ms: u64,
-
-    /// Retry backoff multiplier.
-    #[serde(default = "default_backoff_multiplier")]
-    pub backoff_multiplier: f64,
-
-    /// Enable fallback to alternative providers.
-    #[serde(default)]
-    pub fallback_enabled: bool,
-
-    /// Fallback provider order.
-    #[serde(default)]
-    pub fallback_order: Vec<String>,
-
-    /// Errors that trigger fallback.
-    #[serde(default = "default_fallback_errors")]
-    pub fallback_on: Vec<FallbackTrigger>,
-}
-
-impl Default for FallbackConfig {
-    fn default() -> Self {
-        Self {
-            retries_enabled: true,
-            max_retries: default_max_retries(),
-            initial_delay_ms: default_retry_delay(),
-            max_delay_ms: default_max_retry_delay(),
-            backoff_multiplier: default_backoff_multiplier(),
-            fallback_enabled: false,
-            fallback_order: vec![],
-            fallback_on: default_fallback_errors(),
-        }
-    }
-}
-
-fn default_max_retries() -> u32 {
-    3
-}
-
-fn default_retry_delay() -> u64 {
-    1000
-}
-
-fn default_max_retry_delay() -> u64 {
-    30000
-}
-
-fn default_backoff_multiplier() -> f64 {
-    2.0
-}
-
-fn default_fallback_errors() -> Vec<FallbackTrigger> {
-    vec![
-        FallbackTrigger::RateLimit,
-        FallbackTrigger::ServerError,
-        FallbackTrigger::Timeout,
-    ]
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(rename_all = "snake_case")]
-pub enum FallbackTrigger {
-    RateLimit,
-    ServerError,
-    Timeout,
-    Overloaded,
-    ContextLength,
-}
-
-// ─────────────────────────────────────────────────────────────────────────────
-// Load Balancing
-// ─────────────────────────────────────────────────────────────────────────────
-
-/// Load balancing configuration.
-#[derive(Debug, Clone, Serialize, Deserialize, Default)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(deny_unknown_fields)]
-pub struct LoadBalancingConfig {
-    /// Load balancing strategy.
-    #[serde(default)]
-    pub strategy: LoadBalanceStrategy,
-
-    /// Health check configuration.
-    #[serde(default)]
-    pub health_check: HealthCheckConfig,
-}
-
-/// Load balancing strategy.
-#[derive(Debug, Clone, Default, Serialize, Deserialize)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(rename_all = "snake_case")]
-pub enum LoadBalanceStrategy {
-    /// Round-robin across providers.
-    #[default]
-    RoundRobin,
-    /// Route to least-loaded provider.
-    LeastConnections,
-    /// Random selection.
-    Random,
-    /// Weighted distribution.
-    Weighted,
-    /// Route based on latency.
-    LatencyBased,
-    /// Route based on cost.
-    CostBased,
-}
-
-/// Health check configuration.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(deny_unknown_fields)]
-pub struct HealthCheckConfig {
-    /// Enable health checks.
-    #[serde(default = "default_true")]
-    pub enabled: bool,
-
-    /// Health check interval in seconds.
-    #[serde(default = "default_health_interval")]
-    pub interval_secs: u64,
-
-    /// Unhealthy threshold (consecutive failures).
-    #[serde(default = "default_unhealthy_threshold")]
-    pub unhealthy_threshold: u32,
-
-    /// Healthy threshold (consecutive successes).
-    #[serde(default = "default_healthy_threshold")]
-    pub healthy_threshold: u32,
-}
-
-impl Default for HealthCheckConfig {
-    fn default() -> Self {
-        Self {
-            enabled: true,
-            interval_secs: default_health_interval(),
-            unhealthy_threshold: default_unhealthy_threshold(),
-            healthy_threshold: default_healthy_threshold(),
-        }
-    }
-}
-
-fn default_health_interval() -> u64 {
-    30
-}
-
-fn default_unhealthy_threshold() -> u32 {
-    3
-}
-
-fn default_healthy_threshold() -> u32 {
-    2
-}
-
 // ─────────────────────────────────────────────────────────────────────────────
 // Image Fetching
 // ─────────────────────────────────────────────────────────────────────────────
@@ -4192,73 +3669,6 @@ mod tests {
         assert_eq!(queue.consumer_group, "my_workers");
     }
 
-    #[test]
-    fn test_file_processing_config_queue_mode_sqs() {
-        let config: FileProcessingConfig = toml::from_str(
-            r#"
-            mode = "queue"
-
-            [queue]
-            backend = "sqs"
-            url = "https://sqs.us-east-1.amazonaws.com/123456789/my-queue"
-            queue_name = "my-queue"
-            region = "us-east-1"
-            "#,
-        )
-        .unwrap();
-
-        assert_eq!(config.mode, FileProcessingMode::Queue);
-        assert!(config.validate().is_ok());
-
-        let queue = config.queue.unwrap();
-        assert_eq!(queue.backend, FileProcessingQueueBackend::Sqs);
-        assert_eq!(queue.region, Some("us-east-1".to_string()));
-    }
-
-    #[test]
-    fn test_file_processing_config_queue_mode_pubsub() {
-        let config: FileProcessingConfig = toml::from_str(
-            r#"
-            mode = "queue"
-
-            [queue]
-            backend = "pub_sub"
-            url = "https://pubsub.googleapis.com"
-            queue_name = "file-processing-topic"
-            project_id = "my-gcp-project"
-            "#,
-        )
-        .unwrap();
-
-        assert_eq!(config.mode, FileProcessingMode::Queue);
-        assert!(config.validate().is_ok());
-
-        let queue = config.queue.unwrap();
-        assert_eq!(queue.backend, FileProcessingQueueBackend::PubSub);
-        assert_eq!(queue.project_id, Some("my-gcp-project".to_string()));
-    }
-
-    #[test]
-    fn test_file_processing_config_queue_mode_rabbitmq() {
-        let config: FileProcessingConfig = toml::from_str(
-            r#"
-            mode = "queue"
-
-            [queue]
-            backend = "rabbit_mq"
-            url = "amqp://guest:guest@localhost:5672"
-            queue_name = "file_processing"
-            "#,
-        )
-        .unwrap();
-
-        assert_eq!(config.mode, FileProcessingMode::Queue);
-        assert!(config.validate().is_ok());
-
-        let queue = config.queue.unwrap();
-        assert_eq!(queue.backend, FileProcessingQueueBackend::RabbitMq);
-    }
-
     #[test]
     fn test_file_processing_config_queue_mode_missing_config() {
         let config: FileProcessingConfig = toml::from_str(
@@ -4272,40 +3682,6 @@ mod tests {
         assert!(config.validate().is_err());
     }
 
-    #[test]
-    fn test_file_processing_config_sqs_missing_region() {
-        let config: FileProcessingConfig = toml::from_str(
-            r#"
-            mode = "queue"
-
-            [queue]
-            backend = "sqs"
-            url = "https://sqs.us-east-1.amazonaws.com/123456789/my-queue"
-            queue_name = "my-queue"
-            "#,
-        )
-        .unwrap();
-
-        assert!(config.validate().is_err());
-    }
-
-    #[test]
-    fn test_file_processing_config_pubsub_missing_project() {
-        let config: FileProcessingConfig = toml::from_str(
-            r#"
-            mode = "queue"
-
-            [queue]
-            backend = "pub_sub"
-            url = "https://pubsub.googleapis.com"
-            queue_name = "file-processing-topic"
-            "#,
-        )
-        .unwrap();
-
-        assert!(config.validate().is_err());
-    }
-
     #[test]
     fn test_file_processing_config_max_size_bytes() {
         let config = FileProcessingConfig {
diff --git a/src/config/limits.rs b/src/config/limits.rs
index 6e02efd..ded9e1e 100644
--- a/src/config/limits.rs
+++ b/src/config/limits.rs
@@ -17,10 +17,6 @@ pub struct LimitsConfig {
     #[serde(default)]
     pub budgets: BudgetDefaults,
 
-    /// Token limits.
-    #[serde(default)]
-    pub tokens: TokenLimitDefaults,
-
     /// Resource limits for entity counts.
     #[serde(default)]
     pub resource_limits: ResourceLimits,
@@ -225,15 +221,6 @@ pub struct BudgetDefaults {
     #[serde(default = "default_warning_threshold")]
     pub warning_threshold: f64,
 
-    /// Hard limit action when budget is exceeded.
-    #[serde(default)]
-    pub exceeded_action: BudgetExceededAction,
-
-    /// Allow overage up to this percentage above the budget.
-    /// E.g., 0.1 means 10% overage is allowed.
-    #[serde(default)]
-    pub allowed_overage: f64,
-
     /// Estimated cost per request in cents for budget reservation.
     /// This is reserved before the request is processed to prevent race conditions.
     /// After the request completes, the actual cost replaces the estimate.
@@ -248,8 +235,6 @@ impl Default for BudgetDefaults {
             monthly_budget_usd: None,
             daily_budget_usd: None,
             warning_threshold: default_warning_threshold(),
-            exceeded_action: BudgetExceededAction::default(),
-            allowed_overage: 0.0,
             estimated_cost_cents: default_estimated_cost_cents(),
         }
     }
@@ -262,54 +247,3 @@ fn default_estimated_cost_cents() -> i64 {
 fn default_warning_threshold() -> f64 {
     0.8 // 80%
 }
-
-/// Action to take when budget is exceeded.
-#[derive(Debug, Clone, Default, Serialize, Deserialize)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(rename_all = "snake_case")]
-pub enum BudgetExceededAction {
-    /// Block the request.
-    #[default]
-    Block,
-    /// Allow the request but log a warning.
-    Warn,
-    /// Allow but throttle (reduce rate limits).
-    Throttle,
-}
-
-/// Token limit defaults.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(deny_unknown_fields)]
-pub struct TokenLimitDefaults {
-    /// Maximum input tokens per request.
-    #[serde(default)]
-    pub max_input_tokens: Option<u32>,
-
-    /// Maximum output tokens per request.
-    #[serde(default)]
-    pub max_output_tokens: Option<u32>,
-
-    /// Maximum total tokens per request (input + output).
-    #[serde(default)]
-    pub max_total_tokens: Option<u32>,
-
-    /// Default max_tokens if not specified in the request.
-    #[serde(default = "default_max_tokens")]
-    pub default_max_tokens: u32,
-}
-
-impl Default for TokenLimitDefaults {
-    fn default() -> Self {
-        Self {
-            max_input_tokens: None,
-            max_output_tokens: None,
-            max_total_tokens: None,
-            default_max_tokens: default_max_tokens(),
-        }
-    }
-}
-
-fn default_max_tokens() -> u32 {
-    4096
-}
diff --git a/src/config/observability.rs b/src/config/observability.rs
index 171435a..3deb01e 100644
--- a/src/config/observability.rs
+++ b/src/config/observability.rs
@@ -19,10 +19,6 @@ pub struct ObservabilityConfig {
     #[serde(default)]
     pub metrics: MetricsConfig,
 
-    /// Request/response logging.
-    #[serde(default)]
-    pub request_logging: RequestLoggingConfig,
-
     /// Usage logging configuration.
     #[serde(default)]
     pub usage: UsageConfig,
@@ -384,7 +380,7 @@ pub struct TracingConfig {
 }
 
 fn default_service_name() -> String {
-    "ai-gateway".to_string()
+    "hadrian".to_string()
 }
 
 /// OTLP exporter configuration.
@@ -573,102 +569,6 @@ fn default_metrics_path() -> String {
     "/metrics".to_string()
 }
 
-// ─────────────────────────────────────────────────────────────────────────────
-// Request Logging
-// ─────────────────────────────────────────────────────────────────────────────
-
-/// Request/response logging configuration.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(deny_unknown_fields)]
-pub struct RequestLoggingConfig {
-    /// Enable request logging.
-    #[serde(default)]
-    pub enabled: bool,
-
-    /// Log request bodies.
-    #[serde(default)]
-    pub log_request_body: bool,
-
-    /// Log response bodies.
-    #[serde(default)]
-    pub log_response_body: bool,
-
-    /// Maximum body size to log (in bytes).
-    #[serde(default = "default_max_body_log")]
-    pub max_body_size: usize,
-
-    /// Redact sensitive fields.
-    #[serde(default = "default_true")]
-    pub redact_sensitive: bool,
-
-    /// Fields to redact.
-    #[serde(default = "default_redact_fields")]
-    pub redact_fields: Vec<String>,
-
-    /// Log to separate destination.
-    #[serde(default)]
-    pub destination: Option<LogDestination>,
-}
-
-impl Default for RequestLoggingConfig {
-    fn default() -> Self {
-        Self {
-            enabled: false,
-            log_request_body: false,
-            log_response_body: false,
-            max_body_size: default_max_body_log(),
-            redact_sensitive: true,
-            redact_fields: default_redact_fields(),
-            destination: None,
-        }
-    }
-}
-
-fn default_max_body_log() -> usize {
-    10 * 1024 // 10 KB
-}
-
-fn default_redact_fields() -> Vec<String> {
-    vec![
-        "api_key".into(),
-        "password".into(),
-        "secret".into(),
-        "authorization".into(),
-    ]
-}
-
-/// Log destination for request logging.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(tag = "type", rename_all = "snake_case")]
-#[serde(deny_unknown_fields)]
-pub enum LogDestination {
-    /// Log to file.
-    File {
-        path: String,
-        #[serde(default)]
-        rotation: Option<LogRotation>,
-    },
-    /// Log to stdout/stderr (same as regular logs).
-    Stdout,
-    /// Send to external service.
-    Http {
-        url: String,
-        #[serde(default)]
-        headers: HashMap<String, String>,
-    },
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(rename_all = "snake_case")]
-pub enum LogRotation {
-    Daily,
-    Hourly,
-    Size { max_bytes: usize },
-}
-
 fn default_true() -> bool {
     true
 }
diff --git a/src/config/providers.rs b/src/config/providers.rs
index 0eccba4..4b779de 100644
--- a/src/config/providers.rs
+++ b/src/config/providers.rs
@@ -66,6 +66,7 @@ pub struct ModelFallback {
 /// ```
 #[derive(Debug, Clone, Default, Serialize, Deserialize)]
 #[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
+// Note: cannot use deny_unknown_fields due to #[serde(flatten)] on `pricing`
 pub struct ModelConfig {
     /// Pricing fields (flattened inline).
     #[serde(flatten)]
@@ -123,6 +124,7 @@ pub struct ModelConfig {
 /// to determine which API protocol to use.
 #[derive(Debug, Clone, Serialize, Deserialize, Default)]
 #[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
+// Note: cannot use deny_unknown_fields due to #[serde(flatten)] on `providers` HashMap
 pub struct ProvidersConfig {
     /// Default provider name for requests that don't specify one.
     #[serde(default)]
@@ -588,10 +590,6 @@ pub struct OpenAiProviderConfig {
     #[serde(default)]
     pub headers: HashMap<String, String>,
 
-    /// Whether this provider supports streaming (default: true).
-    #[serde(default = "default_true")]
-    pub supports_streaming: bool,
-
     /// Whether this provider supports function/tool calling.
     #[serde(default)]
     pub supports_tools: bool,
@@ -660,7 +658,6 @@ impl std::fmt::Debug for OpenAiProviderConfig {
             .field("allowed_models", &self.allowed_models)
             .field("model_aliases", &self.model_aliases)
             .field("headers", &self.headers)
-            .field("supports_streaming", &self.supports_streaming)
             .field("supports_tools", &self.supports_tools)
             .field("supports_vision", &self.supports_vision)
             .field("models", &self.models)
@@ -2780,7 +2777,6 @@ mod tests {
             allowed_models: vec![],
             model_aliases: HashMap::new(),
             headers: HashMap::new(),
-            supports_streaming: true,
             supports_tools: false,
             supports_vision: false,
             models: HashMap::new(),
diff --git a/src/config/secrets.rs b/src/config/secrets.rs
index 48f6d7d..24a7be5 100644
--- a/src/config/secrets.rs
+++ b/src/config/secrets.rs
@@ -42,6 +42,7 @@ impl SecretsConfig {
 /// Configuration for Vault/OpenBao secrets manager.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 #[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
+// Note: cannot use deny_unknown_fields due to #[serde(flatten)] on `auth`
 pub struct VaultSecretsConfig {
     /// Vault server address (e.g., "https://vault.example.com:8200")
     pub address: String,
@@ -153,6 +154,7 @@ fn default_k8s_token_path() -> String {
 /// Configuration for AWS Secrets Manager.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 #[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
+#[serde(deny_unknown_fields)]
 pub struct AwsSecretsConfig {
     /// AWS region (e.g., "us-east-1"). If not set, uses AWS_REGION environment variable.
     #[serde(default)]
@@ -176,6 +178,7 @@ fn default_aws_prefix() -> String {
 /// Configuration for Azure Key Vault.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 #[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
+#[serde(deny_unknown_fields)]
 pub struct AzureKeyVaultSecretsConfig {
     /// Key Vault URL (e.g., "https://myvault.vault.azure.net")
     pub vault_url: String,
@@ -195,6 +198,7 @@ fn default_azure_prefix() -> String {
 /// Configuration for GCP Secret Manager.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 #[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
+#[serde(deny_unknown_fields)]
 pub struct GcpSecretsConfig {
     /// GCP project ID
     pub project_id: String,
diff --git a/src/config/server.rs b/src/config/server.rs
index bca41a4..fc8be77 100644
--- a/src/config/server.rs
+++ b/src/config/server.rs
@@ -18,11 +18,6 @@ pub struct ServerConfig {
     #[serde(default = "default_port")]
     pub port: u16,
 
-    /// Base path for all API routes (e.g., "/api/v1").
-    /// The UI is always served from "/".
-    #[serde(default)]
-    pub api_base_path: Option<String>,
-
     /// Request body size limit in bytes.
     #[serde(default = "default_body_limit")]
     pub body_limit_bytes: usize,
@@ -51,10 +46,6 @@ pub struct ServerConfig {
     #[serde(default = "default_streaming_idle_timeout")]
     pub streaming_idle_timeout_secs: u64,
 
-    /// Enable HTTP/2 (requires TLS or h2c).
-    #[serde(default)]
-    pub http2: bool,
-
     /// TLS configuration. If omitted, serves plain HTTP.
     /// In production, TLS is typically terminated at the load balancer.
     #[serde(default)]
@@ -100,12 +91,10 @@ impl Default for ServerConfig {
         Self {
             host: default_host(),
             port: default_port(),
-            api_base_path: None,
             body_limit_bytes: default_body_limit(),
             max_response_body_bytes: default_max_response_body(),
             timeout_secs: default_timeout(),
             streaming_idle_timeout_secs: default_streaming_idle_timeout(),
-            http2: false,
             tls: None,
             trusted_proxies: TrustedProxiesConfig::default(),
             cors: CorsConfig::default(),
@@ -424,7 +413,8 @@ pub struct SecurityHeadersConfig {
     pub content_security_policy: Option<String>,
 
     /// X-XSS-Protection header value.
-    /// Legacy header for older browsers. Default: "1; mode=block"
+    /// Legacy header for older browsers. Disabled by default as CSP provides protection.
+    /// Enable for legacy browser compatibility.
     #[serde(default = "default_xss_protection")]
     pub xss_protection: Option<String>,
 
@@ -488,7 +478,7 @@ fn default_csp() -> Option<String> {
 }
 
 fn default_xss_protection() -> Option<String> {
-    Some("1; mode=block".to_string())
+    None
 }
 
 fn default_referrer_policy() -> Option<String> {
diff --git a/src/config/ui.rs b/src/config/ui.rs
index d3e07e5..6ed954a 100644
--- a/src/config/ui.rs
+++ b/src/config/ui.rs
@@ -63,14 +63,6 @@ pub struct AssetsConfig {
     /// Cache control header for static assets.
     #[serde(default = "default_cache_control")]
     pub cache_control: String,
-
-    /// Enable gzip compression.
-    #[serde(default = "default_true")]
-    pub gzip: bool,
-
-    /// Enable brotli compression.
-    #[serde(default = "default_true")]
-    pub brotli: bool,
 }
 
 impl Default for AssetsConfig {
@@ -78,8 +70,6 @@ impl Default for AssetsConfig {
         Self {
             source: AssetSource::default(),
             cache_control: default_cache_control(),
-            gzip: true,
-            brotli: true,
         }
     }
 }
@@ -122,25 +112,9 @@ pub struct ChatConfig {
     #[serde(default)]
     pub available_models: Vec<String>,
 
-    /// Enable conversation history.
-    #[serde(default = "default_true")]
-    pub history_enabled: bool,
-
-    /// Maximum conversations to store per user.
-    #[serde(default = "default_max_conversations")]
-    pub max_conversations: usize,
-
     /// Enable file uploads.
     #[serde(default)]
     pub file_uploads: FileUploadConfig,
-
-    /// Enable code execution in chat.
-    #[serde(default)]
-    pub code_execution: bool,
-
-    /// Enable web search in chat.
-    #[serde(default)]
-    pub web_search: bool,
 }
 
 impl Default for ChatConfig {
@@ -149,19 +123,11 @@ impl Default for ChatConfig {
             enabled: true,
             default_model: None,
             available_models: vec![],
-            history_enabled: true,
-            max_conversations: default_max_conversations(),
             file_uploads: FileUploadConfig::default(),
-            code_execution: false,
-            web_search: false,
         }
     }
 }
 
-fn default_max_conversations() -> usize {
-    100
-}
-
 /// File upload configuration.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 #[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
@@ -250,11 +216,6 @@ pub struct AdminConfig {
     /// Path for admin panel.
     #[serde(default = "default_admin_path")]
     pub path: String,
-
-    /// Roles that can access the admin panel.
-    /// If empty, any authenticated user with admin flag can access.
-    #[serde(default)]
-    pub allowed_roles: Vec<String>,
 }
 
 impl Default for AdminConfig {
@@ -262,7 +223,6 @@ impl Default for AdminConfig {
         Self {
             enabled: true,
             path: default_admin_path(),
-            allowed_roles: vec![],
         }
     }
 }
diff --git a/src/init.rs b/src/init.rs
index d3466a3..2f4d719 100644
--- a/src/init.rs
+++ b/src/init.rs
@@ -219,24 +219,14 @@ pub(crate) async fn init_worker_embedding_service(
         }
     };
 
-    let embedding_config = file_search_config
-        .embedding
-        .as_ref()
-        .or_else(|| {
-            config
-                .features
-                .response_caching
-                .as_ref()
-                .and_then(|rc| rc.semantic.as_ref())
-                .map(|sc| &sc.embedding)
-        })
-        .or_else(|| {
-            config
-                .features
-                .vector_search
-                .as_ref()
-                .map(|vs| &vs.embedding)
-        });
+    let embedding_config = file_search_config.embedding.as_ref().or_else(|| {
+        config
+            .features
+            .response_caching
+            .as_ref()
+            .and_then(|rc| rc.semantic.as_ref())
+            .map(|sc| &sc.embedding)
+    });
 
     let embedding_config = match embedding_config {
         Some(cfg) => cfg,
diff --git a/src/routing/resolver.rs b/src/routing/resolver.rs
index 58f2ec7..4d7e259 100644
--- a/src/routing/resolver.rs
+++ b/src/routing/resolver.rs
@@ -600,7 +600,6 @@ pub async fn dynamic_provider_to_config(
                 allowed_models: provider.models.clone(),
                 model_aliases: std::collections::HashMap::new(),
                 headers: std::collections::HashMap::new(),
-                supports_streaming: true,
                 supports_tools: false,
                 supports_vision: false,
                 models: std::collections::HashMap::new(),
diff --git a/src/services/document_processor.rs b/src/services/document_processor.rs
index 4d8dd6e..6e55bd4 100644
--- a/src/services/document_processor.rs
+++ b/src/services/document_processor.rs
@@ -130,16 +130,6 @@ pub enum QueueBackend {
         queue_name: String,
         consumer_group: String,
     },
-    /// RabbitMQ
-    RabbitMQ {
-        url: String,
-        exchange: String,
-        queue_name: String,
-    },
-    /// AWS SQS
-    Sqs { queue_url: String, region: String },
-    /// Google Cloud Pub/Sub
-    PubSub { project_id: String, topic: String },
 }
 
 /// Configuration for the document processor.
@@ -212,19 +202,6 @@ fn convert_queue_config(queue: &FileProcessingQueueConfig) -> QueueBackend {
             queue_name: queue.queue_name.clone(),
             consumer_group: queue.consumer_group.clone(),
         },
-        FileProcessingQueueBackend::RabbitMq => QueueBackend::RabbitMQ {
-            url: queue.url.clone(),
-            exchange: "hadrian".to_string(), // Default exchange name
-            queue_name: queue.queue_name.clone(),
-        },
-        FileProcessingQueueBackend::Sqs => QueueBackend::Sqs {
-            queue_url: queue.url.clone(),
-            region: queue.region.clone().unwrap_or_default(),
-        },
-        FileProcessingQueueBackend::PubSub => QueueBackend::PubSub {
-            project_id: queue.project_id.clone().unwrap_or_default(),
-            topic: queue.queue_name.clone(),
-        },
     }
 }
 
@@ -1495,21 +1472,6 @@ impl DocumentProcessor {
                         .to_string(),
                 ));
             }
-            Some(QueueBackend::RabbitMQ { url, .. }) => {
-                return Err(DocumentProcessorError::Configuration(format!(
-                    "RabbitMQ queue publishing not yet implemented (url: {url})"
-                )));
-            }
-            Some(QueueBackend::Sqs { queue_url, .. }) => {
-                return Err(DocumentProcessorError::Configuration(format!(
-                    "SQS queue publishing not yet implemented (queue: {queue_url})"
-                )));
-            }
-            Some(QueueBackend::PubSub { topic, .. }) => {
-                return Err(DocumentProcessorError::Configuration(format!(
-                    "Pub/Sub queue publishing not yet implemented (topic: {topic})"
-                )));
-            }
             None => {
                 error!("Queue mode enabled but no queue backend configured");
                 otel_span_error!("Queue backend not configured");
@@ -1619,15 +1581,6 @@ pub async fn start_file_processing_worker(
                 "Redis queue configured but the 'redis' feature is not enabled. Rebuild with: cargo build --features redis"
             );
         }
-        QueueBackend::RabbitMQ { .. } => {
-            tracing::error!("RabbitMQ worker not yet implemented");
-        }
-        QueueBackend::Sqs { .. } => {
-            tracing::error!("SQS worker not yet implemented");
-        }
-        QueueBackend::PubSub { .. } => {
-            tracing::error!("Pub/Sub worker not yet implemented");
-        }
     }
 }
 
diff --git a/src/tests/provider_e2e.rs b/src/tests/provider_e2e.rs
index 8161cc2..3bdaa58 100644
--- a/src/tests/provider_e2e.rs
+++ b/src/tests/provider_e2e.rs
@@ -822,7 +822,6 @@ type = "{}"
 base_url = "{}"
 api_key = "test-api-key"
 timeout_secs = 30
-supports_streaming = true
 supports_tools = true
 supports_vision = true
 
@@ -2459,7 +2458,6 @@ type = "open_ai"
 base_url = "{}"
 api_key = "test-api-key"
 timeout_secs = 30
-supports_streaming = true
 supports_tools = true
 
 # Circuit breaker configuration