diff --git a/CLAUDE.md b/CLAUDE.md
index 56e02ae..f54bd09 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -390,15 +390,10 @@ See `agent_instructions/adding_admin_endpoint.md` for implementation patterns (r
- `[features.file_processing]` — RAG document ingestion (text extraction, OCR, chunking)
- `[features.guardrails]` — Input/output guardrails (blocklist, PII detection, moderation APIs)
- `[features.response_caching]` — Response caching with optional semantic similarity matching
-- `[features.prompt_caching]` — Anthropic prompt caching support
- `[features.image_fetching]` — Fetch images from URLs for vision models
-- `[features.web_search]` — Web search tool integration
-- `[features.code_execution]` — Server-side code execution
- `[features.model_catalog]` — Model metadata enrichment from models.dev
- `[features.websocket]` — WebSocket for real-time events
- `[features.vector_store_cleanup]` — Background cleanup for soft-deleted vector stores
-- `[features.fallback]` — Fallback and retry configuration
-- `[features.load_balancing]` — Load balancing configuration
## Caching
diff --git a/docs/content/docs/configuration/features/code-execution.mdx b/docs/content/docs/configuration/features/code-execution.mdx
deleted file mode 100644
index 3a771b5..0000000
--- a/docs/content/docs/configuration/features/code-execution.mdx
+++ /dev/null
@@ -1,142 +0,0 @@
----
-title: Code Execution
-description: Configure server-side code execution capabilities
----
-
-import { Callout } from "fumadocs-ui/components/callout";
-
-The `[features.code_execution]` section configures server-side code execution. By default, code execution happens client-side via WebAssembly in the browser (Pyodide for Python, QuickJS for JavaScript).
-
-
- Client-side WASM execution requires no server configuration. This section is only needed for
- server-side sandboxed execution.
-
-
-## Configuration Reference
-
-### Main Settings
-
-```toml
-[features.code_execution]
-enabled = true
-mode = "wasm"
-allowed_languages = ["python", "javascript"]
-timeout_secs = 30
-max_memory_mb = 256
-```
-
-| Key | Type | Default | Description |
-| ------------------- | ------- | -------------------------- | ---------------------------- |
-| `enabled` | boolean | `true` | Enable code execution |
-| `mode` | string | `"wasm"` | Execution mode |
-| `allowed_languages` | array | `["python", "javascript"]` | Allowed languages |
-| `timeout_secs` | integer | `30` | Execution timeout |
-| `max_memory_mb` | integer | `256` | Maximum memory per execution |
-
-## Execution Modes
-
-### WASM (Default)
-
-Client-side execution in the browser:
-
-```toml
-[features.code_execution]
-enabled = true
-mode = "wasm"
-```
-
-No server-side configuration needed. Runs via:
-
-- **Python**: Pyodide (numpy, pandas, matplotlib available)
-- **JavaScript**: QuickJS (sandboxed)
-
-### Sandboxed
-
-Server-side execution in containers:
-
-```toml
-[features.code_execution]
-enabled = true
-allowed_languages = ["python", "javascript", "bash"]
-timeout_secs = 60
-max_memory_mb = 512
-
-[features.code_execution.mode]
-sandboxed = { runtime = "docker" }
-```
-
-| Runtime | Description |
-| ------------- | --------------------------- |
-| `docker` | Docker containers |
-| `firecracker` | Firecracker microVMs |
-| `gvisor` | gVisor sandboxed containers |
-
-### External Service
-
-Delegate to an external code execution service:
-
-```toml
-[features.code_execution]
-enabled = true
-allowed_languages = ["python", "javascript", "r", "julia"]
-timeout_secs = 120
-max_memory_mb = 1024
-
-[features.code_execution.mode]
-external = { url = "https://code-exec.example.com", api_key = "${CODE_EXEC_API_KEY}" }
-```
-
-| Key | Type | Description |
-| --------- | ------ | -------------------------- |
-| `url` | string | External service URL |
-| `api_key` | string | API key for authentication |
-
-## Complete Examples
-
-### Client-Side Only (Default)
-
-```toml
-[features.code_execution]
-enabled = true
-mode = "wasm"
-allowed_languages = ["python", "javascript"]
-timeout_secs = 30
-max_memory_mb = 256
-```
-
-### Docker Sandboxed
-
-```toml
-[features.code_execution]
-enabled = true
-allowed_languages = ["python", "javascript", "bash", "ruby"]
-timeout_secs = 60
-max_memory_mb = 512
-
-[features.code_execution.mode]
-sandboxed = { runtime = "docker" }
-```
-
-### External Service
-
-```toml
-[features.code_execution]
-enabled = true
-allowed_languages = ["python", "javascript", "r", "julia", "sql"]
-timeout_secs = 120
-max_memory_mb = 2048
-
-[features.code_execution.mode]
-external = { url = "https://code-exec.internal.company.com", api_key = "${CODE_EXEC_API_KEY}" }
-```
-
-## Security Considerations
-
-
- Server-side code execution requires careful security configuration. Consider: - Network isolation
- for containers - Resource limits (CPU, memory, disk) - Execution timeouts - Language restrictions
-
-
-## See Also
-
-- [Frontend Tools Guide](/docs/features/frontend-tools) - Client-side WASM execution
diff --git a/docs/content/docs/configuration/features/fallback.mdx b/docs/content/docs/configuration/features/fallback.mdx
deleted file mode 100644
index 34c92b2..0000000
--- a/docs/content/docs/configuration/features/fallback.mdx
+++ /dev/null
@@ -1,158 +0,0 @@
----
-title: Fallback & Retry
-description: Configure automatic retries and provider fallbacks
----
-
-import { Callout } from "fumadocs-ui/components/callout";
-
-The `[features.fallback]` section configures automatic retry behavior and provider fallback chains for handling transient errors.
-
-## Configuration Reference
-
-```toml
-[features.fallback]
-retries_enabled = true
-max_retries = 3
-initial_delay_ms = 1000
-max_delay_ms = 30000
-backoff_multiplier = 2.0
-fallback_enabled = false
-fallback_order = []
-fallback_on = ["rate_limit", "server_error", "timeout"]
-```
-
-| Key | Type | Default | Description |
-| -------------------- | ------- | --------- | --------------------------------- |
-| `retries_enabled` | boolean | `true` | Enable automatic retries |
-| `max_retries` | integer | `3` | Maximum retry attempts |
-| `initial_delay_ms` | integer | `1000` | Initial retry delay (1 second) |
-| `max_delay_ms` | integer | `30000` | Maximum retry delay (30 seconds) |
-| `backoff_multiplier` | float | `2.0` | Exponential backoff multiplier |
-| `fallback_enabled` | boolean | `false` | Enable provider fallbacks |
-| `fallback_order` | array | `[]` | Provider fallback chain |
-| `fallback_on` | array | see below | Error types that trigger fallback |
-
-## Retry Behavior
-
-Retries use exponential backoff with the formula:
-
-```
-delay = min(initial_delay_ms * (backoff_multiplier ^ attempt), max_delay_ms)
-```
-
-Example with defaults:
-
-- Attempt 1: 1000ms delay
-- Attempt 2: 2000ms delay
-- Attempt 3: 4000ms delay
-
-## Fallback Triggers
-
-| Trigger | Description |
-| ---------------- | ----------------------- |
-| `rate_limit` | 429 Too Many Requests |
-| `server_error` | 5xx errors |
-| `timeout` | Request timeout |
-| `overloaded` | Provider overloaded |
-| `context_length` | Context length exceeded |
-
-```toml
-fallback_on = ["rate_limit", "server_error", "timeout"]
-```
-
-## Complete Examples
-
-### Retries Only (Default)
-
-```toml
-[features.fallback]
-retries_enabled = true
-max_retries = 3
-initial_delay_ms = 1000
-max_delay_ms = 30000
-backoff_multiplier = 2.0
-fallback_enabled = false
-```
-
-### With Provider Fallbacks
-
-```toml
-[features.fallback]
-retries_enabled = true
-max_retries = 2
-initial_delay_ms = 500
-max_delay_ms = 10000
-backoff_multiplier = 2.0
-fallback_enabled = true
-fallback_order = ["anthropic", "openai", "bedrock"]
-fallback_on = ["rate_limit", "server_error", "timeout", "overloaded"]
-```
-
-Flow: Primary provider fails → retry 2x → try Anthropic → retry 2x → try OpenAI → retry 2x → try Bedrock → fail
-
-### Aggressive Retries
-
-```toml
-[features.fallback]
-retries_enabled = true
-max_retries = 5
-initial_delay_ms = 200
-max_delay_ms = 5000
-backoff_multiplier = 1.5
-fallback_enabled = false
-```
-
-### No Retries
-
-```toml
-[features.fallback]
-retries_enabled = false
-fallback_enabled = true
-fallback_order = ["anthropic", "openai"]
-fallback_on = ["server_error", "timeout"]
-```
-
-### Context Length Fallback
-
-Handle models with different context limits:
-
-```toml
-[features.fallback]
-retries_enabled = true
-max_retries = 1
-fallback_enabled = true
-fallback_order = ["gpt-4o", "claude-sonnet"]
-fallback_on = ["context_length"]
-```
-
-## Fallback Chain Behavior
-
-```
-Request to primary provider
- │
- ▼
- ┌─────────┐ ┌─────────────────┐
- │ Success │ ←── │ Retry if failed │
- └─────────┘ └─────────────────┘
- │ │
- │ max_retries
- │ │
- │ ▼
- │ ┌─────────────────┐
- │ │ Next in fallback│
- │ │ order │
- │ └─────────────────┘
- │ │
- ▼ ▼
- Return response Repeat until
- chain exhausted
-```
-
-
-Per-provider retry and circuit breaker settings (in `[providers.]`) override global fallback settings for that provider.
-
-
-## See Also
-
-- [Load Balancing](/docs/configuration/features/load-balancing) - Provider selection
-- [Provider Configuration](/docs/configuration/providers) - Per-provider retries
diff --git a/docs/content/docs/configuration/features/index.mdx b/docs/content/docs/configuration/features/index.mdx
index 4e58b9f..1422f73 100644
--- a/docs/content/docs/configuration/features/index.mdx
+++ b/docs/content/docs/configuration/features/index.mdx
@@ -15,12 +15,8 @@ The `[features]` section enables and configures optional gateway capabilities. A
| [File Processing](/docs/configuration/features/file-processing) | `[features.file_processing]` | Document chunking, OCR, virus scanning |
| [Response Caching](/docs/configuration/features/response-caching) | `[features.response_caching]` | Exact and semantic response caching |
| [Guardrails](/docs/configuration/features/guardrails) | `[features.guardrails]` | Content filtering, PII detection, safety |
-| [Web Search](/docs/configuration/features/web-search) | `[features.web_search]` | Web search tool providers |
-| [Code Execution](/docs/configuration/features/code-execution) | `[features.code_execution]` | Server-side code execution |
| [Image Fetching](/docs/configuration/features/image-fetching) | `[features.image_fetching]` | URL-to-base64 conversion for non-OpenAI providers |
| [WebSocket](/docs/configuration/features/websocket) | `[features.websocket]` | Real-time event subscriptions |
-| [Load Balancing](/docs/configuration/features/load-balancing) | `[features.load_balancing]` | Provider selection strategies |
-| [Fallback](/docs/configuration/features/fallback) | `[features.fallback]` | Retry and provider fallback |
| Model Catalog | `[features.model_catalog]` | Enrich models with capabilities and pricing |
## Minimal Configuration
@@ -107,21 +103,6 @@ timeout_secs = 30
enabled = true
require_auth = true
-# Load Balancing
-[features.load_balancing]
-strategy = "round_robin"
-
-[features.load_balancing.health_check]
-enabled = true
-interval_secs = 30
-
-# Fallback & Retry
-[features.fallback]
-retries_enabled = true
-max_retries = 3
-fallback_enabled = true
-fallback_order = ["anthropic", "openai"]
-
# Model Catalog
[features.model_catalog]
enabled = true
diff --git a/docs/content/docs/configuration/features/load-balancing.mdx b/docs/content/docs/configuration/features/load-balancing.mdx
deleted file mode 100644
index c994e22..0000000
--- a/docs/content/docs/configuration/features/load-balancing.mdx
+++ /dev/null
@@ -1,141 +0,0 @@
----
-title: Load Balancing
-description: Configure provider selection strategies and health checks
----
-
-import { Callout } from "fumadocs-ui/components/callout";
-
-The `[features.load_balancing]` section configures how requests are distributed across providers when multiple providers support the same model.
-
-## Configuration Reference
-
-### Main Settings
-
-```toml
-[features.load_balancing]
-strategy = "round_robin"
-```
-
-| Key | Type | Default | Description |
-| ---------- | ------ | --------------- | ----------------------- |
-| `strategy` | string | `"round_robin"` | Load balancing strategy |
-
-### Strategies
-
-| Strategy | Description |
-| ------------------- | --------------------------------------------- |
-| `round_robin` | Cycle through providers sequentially |
-| `least_connections` | Route to provider with fewest active requests |
-| `random` | Random provider selection |
-| `weighted` | Distribute based on configured weights |
-| `latency_based` | Route to lowest-latency provider |
-| `cost_based` | Route to lowest-cost provider |
-
-```toml
-# Round Robin (default)
-[features.load_balancing]
-strategy = "round_robin"
-
-# Latency-based
-[features.load_balancing]
-strategy = "latency_based"
-
-# Cost-based
-[features.load_balancing]
-strategy = "cost_based"
-```
-
-### Health Checks
-
-Configure provider health monitoring:
-
-```toml
-[features.load_balancing.health_check]
-enabled = true
-interval_secs = 30
-unhealthy_threshold = 3
-healthy_threshold = 2
-```
-
-| Key | Type | Default | Description |
-| --------------------- | ------- | ------- | -------------------------------------- |
-| `enabled` | boolean | `true` | Enable health checks |
-| `interval_secs` | integer | `30` | Check interval in seconds |
-| `unhealthy_threshold` | integer | `3` | Consecutive failures to mark unhealthy |
-| `healthy_threshold` | integer | `2` | Consecutive successes to mark healthy |
-
-## Complete Examples
-
-### Round Robin with Health Checks
-
-```toml
-[features.load_balancing]
-strategy = "round_robin"
-
-[features.load_balancing.health_check]
-enabled = true
-interval_secs = 30
-unhealthy_threshold = 3
-healthy_threshold = 2
-```
-
-### Latency-Based
-
-```toml
-[features.load_balancing]
-strategy = "latency_based"
-
-[features.load_balancing.health_check]
-enabled = true
-interval_secs = 15
-unhealthy_threshold = 2
-healthy_threshold = 1
-```
-
-### Cost-Based
-
-```toml
-[features.load_balancing]
-strategy = "cost_based"
-
-[features.load_balancing.health_check]
-enabled = true
-interval_secs = 60
-unhealthy_threshold = 5
-healthy_threshold = 2
-```
-
-### Disabled Health Checks
-
-```toml
-[features.load_balancing]
-strategy = "random"
-
-[features.load_balancing.health_check]
-enabled = false
-```
-
-## Strategy Selection
-
-| Use Case | Recommended Strategy |
-| ----------------- | -------------------- |
-| General workloads | `round_robin` |
-| Latency-sensitive | `latency_based` |
-| Cost optimization | `cost_based` |
-| Variable load | `least_connections` |
-| Testing/debugging | `random` |
-
-## Health Check Behavior
-
-```
-Provider A: ● ● ● ✗ ✗ ✗ → Unhealthy (3 consecutive failures)
-Provider B: ● ● ● ● ● ● → Healthy
-Provider C: ✗ ✗ ● ● ● ● → Healthy (2 consecutive successes)
-```
-
-Unhealthy providers are excluded from load balancing until they pass `healthy_threshold` consecutive checks.
-
-## See Also
-
-- [Fallback Configuration](/docs/configuration/features/fallback) - Retry and fallback settings
-- [Provider Configuration](/docs/configuration/providers) - Provider setup
diff --git a/docs/content/docs/configuration/features/meta.json b/docs/content/docs/configuration/features/meta.json
index 433c034..42aabe6 100644
--- a/docs/content/docs/configuration/features/meta.json
+++ b/docs/content/docs/configuration/features/meta.json
@@ -6,11 +6,7 @@
"file-processing",
"response-caching",
"guardrails",
- "web-search",
- "code-execution",
"image-fetching",
- "websocket",
- "load-balancing",
- "fallback"
+ "websocket"
]
}
diff --git a/docs/content/docs/configuration/features/web-search.mdx b/docs/content/docs/configuration/features/web-search.mdx
deleted file mode 100644
index fd0022e..0000000
--- a/docs/content/docs/configuration/features/web-search.mdx
+++ /dev/null
@@ -1,135 +0,0 @@
----
-title: Web Search
-description: Configure web search providers for the web_search tool
----
-
-import { Callout } from "fumadocs-ui/components/callout";
-
-The `[features.web_search]` section configures web search providers that can be used by the `web_search` tool in chat conversations.
-
-## Configuration Reference
-
-### Main Settings
-
-```toml
-[features.web_search]
-enabled = true
-default_provider = "tavily"
-max_results = 10
-```
-
-| Key | Type | Default | Description |
-| ------------------ | ------- | ------- | ------------------------- |
-| `enabled` | boolean | `true` | Enable web search |
-| `default_provider` | string | none | Default provider name |
-| `max_results` | integer | `10` | Maximum results to return |
-
-## Providers
-
-### Tavily
-
-AI-optimized search API:
-
-```toml
-[[features.web_search.providers]]
-type = "tavily"
-api_key = "${TAVILY_API_KEY}"
-```
-
-| Key | Type | Description |
-| --------- | ------ | -------------- |
-| `api_key` | string | Tavily API key |
-
-### Brave Search
-
-Privacy-focused search API:
-
-```toml
-[[features.web_search.providers]]
-type = "brave"
-api_key = "${BRAVE_SEARCH_API_KEY}"
-```
-
-| Key | Type | Description |
-| --------- | ------ | -------------------- |
-| `api_key` | string | Brave Search API key |
-
-### Google Custom Search
-
-Google search via Custom Search JSON API:
-
-```toml
-[[features.web_search.providers]]
-type = "google"
-api_key = "${GOOGLE_API_KEY}"
-search_engine_id = "${GOOGLE_CSE_ID}"
-```
-
-| Key | Type | Description |
-| ------------------ | ------ | ----------------------- |
-| `api_key` | string | Google API key |
-| `search_engine_id` | string | Custom Search Engine ID |
-
-### Bing Search
-
-Microsoft Bing Web Search API:
-
-```toml
-[[features.web_search.providers]]
-type = "bing"
-api_key = "${BING_SEARCH_API_KEY}"
-```
-
-| Key | Type | Description |
-| --------- | ------ | ------------------- |
-| `api_key` | string | Bing Search API key |
-
-### SerpAPI
-
-Scraping-based search API (Google, Bing, etc.):
-
-```toml
-[[features.web_search.providers]]
-type = "serp"
-api_key = "${SERPAPI_KEY}"
-```
-
-| Key | Type | Description |
-| --------- | ------ | ----------- |
-| `api_key` | string | SerpAPI key |
-
-## Complete Example
-
-```toml
-[features.web_search]
-enabled = true
-default_provider = "tavily"
-max_results = 10
-
-[[features.web_search.providers]]
-type = "tavily"
-api_key = "${TAVILY_API_KEY}"
-
-[[features.web_search.providers]]
-type = "brave"
-api_key = "${BRAVE_SEARCH_API_KEY}"
-
-[[features.web_search.providers]]
-type = "google"
-api_key = "${GOOGLE_API_KEY}"
-search_engine_id = "${GOOGLE_CSE_ID}"
-```
-
-## Provider Comparison
-
-| Provider | Pricing | Best For |
-| -------- | ------------- | --------------------------------------- |
-| Tavily | Freemium | AI/LLM applications, clean results |
-| Brave | Freemium | Privacy-conscious, web search |
-| Google | Pay-per-query | Comprehensive results, custom filtering |
-| Bing | Pay-per-query | Microsoft ecosystem |
-| SerpAPI | Pay-per-query | Multiple search engines, SERP data |
-
-
- Web search results are formatted for LLM consumption and injected into the conversation context.
-
diff --git a/src/app.rs b/src/app.rs
index 0bad0b0..1a76833 100644
--- a/src/app.rs
+++ b/src/app.rs
@@ -1357,34 +1357,22 @@ impl AppState {
// Get embedding configuration with priority:
// 1. file_search.embedding (explicit RAG config)
// 2. response_caching.semantic.embedding (semantic cache config)
- // 3. vector_search.embedding (legacy vector search config)
- let embedding_config = file_search_config
- .embedding
- .as_ref()
- .or_else(|| {
- config
- .features
- .response_caching
- .as_ref()
- .and_then(|rc| rc.semantic.as_ref())
- .map(|sc| &sc.embedding)
- })
- .or_else(|| {
- config
- .features
- .vector_search
- .as_ref()
- .map(|vs| &vs.embedding)
- });
+ let embedding_config = file_search_config.embedding.as_ref().or_else(|| {
+ config
+ .features
+ .response_caching
+ .as_ref()
+ .and_then(|rc| rc.semantic.as_ref())
+ .map(|sc| &sc.embedding)
+ });
let embedding_config = match embedding_config {
Some(cfg) => cfg,
None => {
tracing::warn!(
"File search is enabled but no embedding configuration found. \
- Configure [features.file_search.embedding], \
- [features.response_caching.semantic.embedding], or \
- [features.vector_search.embedding] to enable file search."
+ Configure [features.file_search.embedding] or \
+ [features.response_caching.semantic.embedding] to enable file search."
);
return None;
}
diff --git a/src/cache/memory.rs b/src/cache/memory.rs
index b50587c..c4e8478 100644
--- a/src/cache/memory.rs
+++ b/src/cache/memory.rs
@@ -12,6 +12,10 @@ use std::{
/// This prevents infinite spinning under extreme contention.
const MAX_CAS_RETRIES: usize = 100;
+/// Number of entries to evict when the cache reaches capacity.
+/// Eviction removes expired entries first, then uses LRU.
+const EVICTION_BATCH_SIZE: usize = 100;
+
use async_trait::async_trait;
use dashmap::DashMap;
@@ -89,7 +93,6 @@ pub struct MemoryCache {
counters: Arc>>,
sets: Arc>,
max_entries: usize,
- eviction_batch_size: usize,
}
impl MemoryCache {
@@ -99,7 +102,6 @@ impl MemoryCache {
counters: Arc::new(DashMap::new()),
sets: Arc::new(DashMap::new()),
max_entries: config.max_entries,
- eviction_batch_size: config.eviction_batch_size.max(1),
}
}
@@ -117,8 +119,10 @@ impl MemoryCache {
return;
}
- // Calculate how many entries to evict
- let target_size = self.max_entries.saturating_sub(self.eviction_batch_size);
+ // Calculate how many entries to evict: at least 1, at most EVICTION_BATCH_SIZE.
+ // Use 10% of max_entries for small caches to avoid evicting everything at once.
+ let batch = (self.max_entries / 10).clamp(1, EVICTION_BATCH_SIZE);
+ let target_size = self.max_entries.saturating_sub(batch);
let to_evict = current_len.saturating_sub(target_size);
if to_evict == 0 {
@@ -487,13 +491,9 @@ mod tests {
}
}
- fn test_config_with_eviction(
- max_entries: usize,
- eviction_batch_size: usize,
- ) -> MemoryCacheConfig {
+ fn test_config_with_eviction(max_entries: usize) -> MemoryCacheConfig {
MemoryCacheConfig {
max_entries,
- eviction_batch_size,
..Default::default()
}
}
@@ -969,8 +969,8 @@ mod tests {
#[tokio::test]
async fn test_lru_eviction_evicts_oldest() {
- // max_entries=5, eviction_batch_size=2
- let cache = MemoryCache::new(&test_config_with_eviction(5, 2));
+ // max_entries=5; eviction batch = max(1, 5/10) = 1, target_size = 4
+ let cache = MemoryCache::new(&test_config_with_eviction(5));
// Fill cache with entries (with delays to ensure distinct access times)
for i in 0..5 {
@@ -1021,8 +1021,8 @@ mod tests {
.filter(|&&x| x)
.count();
- // After eviction, we should have fewer than 5 entries
- // eviction_batch_size=2 means target is max_entries - 2 = 3 entries after eviction
+ // After eviction: target_size = 4, current_len was 6, so 2 entries are evicted.
+ // key2 and key3 are the least recently accessed, so at most 1 of key2/key3/key4 remains.
assert!(
remaining <= 2,
"Expected at most 2 of key2/key3/key4 to remain, got {}",
@@ -1032,7 +1032,7 @@ mod tests {
#[tokio::test]
async fn test_lru_eviction_prefers_expired_first() {
- let cache = MemoryCache::new(&test_config_with_eviction(4, 2));
+ let cache = MemoryCache::new(&test_config_with_eviction(4));
// Add entries: some expired, some not
cache
@@ -1088,7 +1088,7 @@ mod tests {
#[tokio::test]
async fn test_lru_no_eviction_below_capacity() {
- let cache = MemoryCache::new(&test_config_with_eviction(10, 2));
+ let cache = MemoryCache::new(&test_config_with_eviction(10));
// Add entries below capacity
for i in 0..5 {
@@ -1114,7 +1114,7 @@ mod tests {
#[tokio::test]
async fn test_get_updates_last_accessed() {
- let cache = MemoryCache::new(&test_config_with_eviction(3, 1));
+ let cache = MemoryCache::new(&test_config_with_eviction(3));
// Add entries
cache
diff --git a/src/config/cache.rs b/src/config/cache.rs
index 74b523a..ab5db97 100644
--- a/src/config/cache.rs
+++ b/src/config/cache.rs
@@ -59,11 +59,6 @@ pub struct MemoryCacheConfig {
#[serde(default = "default_max_entries")]
pub max_entries: usize,
- /// Number of entries to evict when cache is full.
- /// Eviction removes expired entries first, then uses LRU.
- #[serde(default = "default_eviction_batch_size")]
- pub eviction_batch_size: usize,
-
/// Default TTL for cache entries in seconds.
#[serde(default = "default_ttl")]
pub default_ttl_secs: u64,
@@ -77,7 +72,6 @@ impl Default for MemoryCacheConfig {
fn default() -> Self {
Self {
max_entries: default_max_entries(),
- eviction_batch_size: default_eviction_batch_size(),
default_ttl_secs: default_ttl(),
ttl: CacheTtlConfig::default(),
}
@@ -99,10 +93,6 @@ fn default_max_entries() -> usize {
100_000
}
-fn default_eviction_batch_size() -> usize {
- 100 // Evict 100 entries at a time when cache is full
-}
-
fn default_ttl() -> u64 {
3600 // 1 hour
}
@@ -214,14 +204,6 @@ pub struct CacheTtlConfig {
/// TTL for dynamic provider cache in seconds.
#[serde(default = "default_provider_ttl")]
pub provider_secs: u64,
-
- /// TTL for daily spend cache in seconds.
- #[serde(default = "default_daily_spend_ttl")]
- pub daily_spend_secs: u64,
-
- /// TTL for monthly spend cache in seconds.
- #[serde(default = "default_monthly_spend_ttl")]
- pub monthly_spend_secs: u64,
}
impl Default for CacheTtlConfig {
@@ -230,8 +212,6 @@ impl Default for CacheTtlConfig {
api_key_secs: default_api_key_ttl(),
rate_limit_secs: default_rate_limit_ttl(),
provider_secs: default_provider_ttl(),
- daily_spend_secs: default_daily_spend_ttl(),
- monthly_spend_secs: default_monthly_spend_ttl(),
}
}
}
@@ -247,11 +227,3 @@ fn default_rate_limit_ttl() -> u64 {
fn default_provider_ttl() -> u64 {
300 // 5 minutes
}
-
-fn default_daily_spend_ttl() -> u64 {
- 86400 // 1 day
-}
-
-fn default_monthly_spend_ttl() -> u64 {
- 86400 * 32 // ~32 days
-}
diff --git a/src/config/features.rs b/src/config/features.rs
index 3a7caba..faa3d69 100644
--- a/src/config/features.rs
+++ b/src/config/features.rs
@@ -7,49 +7,20 @@ use super::{CircuitBreakerConfig, RetryConfig};
#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
#[serde(deny_unknown_fields)]
pub struct FeaturesConfig {
- /// Vector search / RAG features.
- #[serde(default)]
- pub vector_search: Option,
-
/// File search configuration for the Responses API.
/// Enables server-side file_search tool execution for RAG.
#[serde(default)]
pub file_search: Option,
- /// Web search features.
- #[serde(default)]
- pub web_search: Option,
-
- /// Code execution features.
- #[serde(default)]
- pub code_execution: Option,
-
- /// Content moderation (legacy - prefer guardrails for new deployments).
- #[serde(default)]
- pub moderation: Option,
-
/// Guardrails for content filtering, PII detection, and safety.
- /// More comprehensive than the legacy moderation config, with support
- /// for multiple providers, execution modes, and fine-grained actions.
+ /// Supports multiple providers, execution modes, and fine-grained actions.
#[serde(default)]
pub guardrails: Option,
- /// Prompt caching.
- #[serde(default)]
- pub prompt_caching: Option,
-
/// Response caching.
#[serde(default)]
pub response_caching: Option,
- /// Fallback and retry configuration.
- #[serde(default)]
- pub fallback: FallbackConfig,
-
- /// Load balancing configuration.
- #[serde(default)]
- pub load_balancing: LoadBalancingConfig,
-
/// HTTP image URL fetching configuration.
/// Controls how non-OpenAI providers (Anthropic, Bedrock, Vertex) handle
/// HTTP image URLs in chat completion requests.
@@ -88,80 +59,6 @@ impl FeaturesConfig {
}
}
-// ─────────────────────────────────────────────────────────────────────────────
-// Vector Search
-// ─────────────────────────────────────────────────────────────────────────────
-
-/// Vector search configuration for RAG.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(deny_unknown_fields)]
-pub struct VectorSearchConfig {
- /// Enable vector search.
- #[serde(default = "default_true")]
- pub enabled: bool,
-
- /// Vector database backend.
- pub backend: VectorBackend,
-
- /// Default number of results to retrieve.
- #[serde(default = "default_top_k")]
- pub default_top_k: usize,
-
- /// Default similarity threshold (0.0-1.0).
- #[serde(default = "default_similarity_threshold")]
- pub similarity_threshold: f64,
-
- /// Embedding configuration.
- #[serde(default)]
- pub embedding: EmbeddingConfig,
-}
-
-fn default_top_k() -> usize {
- 5
-}
-
-fn default_similarity_threshold() -> f64 {
- 0.7
-}
-
-/// Vector database backend.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(tag = "type", rename_all = "snake_case")]
-#[serde(deny_unknown_fields)]
-pub enum VectorBackend {
- /// PostgreSQL with pgvector extension.
- Pgvector,
-
- /// Qdrant vector database.
- Qdrant {
- url: String,
- #[serde(default)]
- api_key: Option,
- },
-
- /// Pinecone vector database.
- Pinecone {
- api_key: String,
- environment: String,
- },
-
- /// Weaviate vector database.
- Weaviate {
- url: String,
- #[serde(default)]
- api_key: Option,
- },
-
- /// ChromaDB.
- Chroma {
- url: String,
- #[serde(default)]
- api_key: Option,
- },
-}
-
/// Embedding configuration.
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
@@ -776,10 +673,7 @@ pub struct FileProcessingQueueConfig {
pub backend: FileProcessingQueueBackend,
/// Connection URL for the queue backend.
- /// Examples:
- /// - Redis: "redis://localhost:6379"
- /// - RabbitMQ: "amqp://guest:guest@localhost:5672"
- /// - SQS: "https://sqs.us-east-1.amazonaws.com/123456789/queue-name"
+ /// Example: "redis://localhost:6379"
pub url: String,
/// Queue/topic name for processing jobs.
@@ -789,14 +683,6 @@ pub struct FileProcessingQueueConfig {
/// Consumer group name (for Redis Streams).
#[serde(default = "default_file_processing_consumer_group")]
pub consumer_group: String,
-
- /// AWS region (for SQS).
- #[serde(default)]
- pub region: Option,
-
- /// GCP project ID (for Pub/Sub).
- #[serde(default)]
- pub project_id: Option,
}
impl FileProcessingQueueConfig {
@@ -808,15 +694,6 @@ impl FileProcessingQueueConfig {
if self.queue_name.is_empty() {
return Err("Queue name cannot be empty".to_string());
}
- match self.backend {
- FileProcessingQueueBackend::Sqs if self.region.is_none() => {
- return Err("SQS backend requires 'region' to be specified".to_string());
- }
- FileProcessingQueueBackend::PubSub if self.project_id.is_none() => {
- return Err("Pub/Sub backend requires 'project_id' to be specified".to_string());
- }
- _ => {}
- }
Ok(())
}
}
@@ -829,18 +706,6 @@ pub enum FileProcessingQueueBackend {
/// Redis Streams.
/// Good for simple deployments, supports consumer groups.
Redis,
-
- /// RabbitMQ.
- /// Full-featured message broker with routing capabilities.
- RabbitMq,
-
- /// AWS SQS.
- /// Managed queue service, good for AWS deployments.
- Sqs,
-
- /// Google Cloud Pub/Sub.
- /// Managed pub/sub service, good for GCP deployments.
- PubSub,
}
// ─────────────────────────────────────────────────────────────────────────────
@@ -1161,201 +1026,6 @@ fn default_file_processing_consumer_group() -> String {
"hadrian_workers".to_string()
}
-// ─────────────────────────────────────────────────────────────────────────────
-// Web Search
-// ─────────────────────────────────────────────────────────────────────────────
-
-/// Web search configuration.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(deny_unknown_fields)]
-pub struct WebSearchConfig {
- /// Enable web search.
- #[serde(default = "default_true")]
- pub enabled: bool,
-
- /// Web search providers.
- pub providers: Vec,
-
- /// Default provider.
- #[serde(default)]
- pub default_provider: Option,
-
- /// Maximum results to return.
- #[serde(default = "default_max_results")]
- pub max_results: usize,
-}
-
-fn default_max_results() -> usize {
- 10
-}
-
-/// Web search provider configuration.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(tag = "type", rename_all = "snake_case")]
-#[serde(deny_unknown_fields)]
-pub enum WebSearchProvider {
- /// Tavily search API.
- Tavily { api_key: String },
-
- /// Brave Search API.
- Brave { api_key: String },
-
- /// Google Custom Search.
- Google {
- api_key: String,
- search_engine_id: String,
- },
-
- /// Bing Search API.
- Bing { api_key: String },
-
- /// SerpAPI.
- Serp { api_key: String },
-}
-
-// ─────────────────────────────────────────────────────────────────────────────
-// Code Execution
-// ─────────────────────────────────────────────────────────────────────────────
-
-/// Code execution configuration.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(deny_unknown_fields)]
-pub struct CodeExecutionConfig {
- /// Enable code execution.
- #[serde(default = "default_true")]
- pub enabled: bool,
-
- /// Execution mode.
- #[serde(default)]
- pub mode: CodeExecutionMode,
-
- /// Allowed languages.
- #[serde(default = "default_languages")]
- pub allowed_languages: Vec,
-
- /// Execution timeout in seconds.
- #[serde(default = "default_execution_timeout")]
- pub timeout_secs: u64,
-
- /// Maximum memory in MB.
- #[serde(default = "default_max_memory")]
- pub max_memory_mb: u64,
-}
-
-fn default_languages() -> Vec {
- vec!["python".into(), "javascript".into()]
-}
-
-fn default_execution_timeout() -> u64 {
- 30
-}
-
-fn default_max_memory() -> u64 {
- 256
-}
-
-/// Code execution mode.
-#[derive(Debug, Clone, Default, Serialize, Deserialize)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(rename_all = "snake_case")]
-pub enum CodeExecutionMode {
- /// Execute in browser via WASM (Python via Pyodide, JS native).
- #[default]
- Wasm,
-
- /// Execute on server in sandboxed containers.
- Sandboxed {
- /// Container runtime.
- runtime: ContainerRuntime,
- },
-
- /// External code execution service.
- External {
- /// Service URL.
- url: String,
- /// API key.
- api_key: Option,
- },
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(rename_all = "snake_case")]
-pub enum ContainerRuntime {
- Docker,
- Firecracker,
- Gvisor,
-}
-
-// ─────────────────────────────────────────────────────────────────────────────
-// Moderation
-// ─────────────────────────────────────────────────────────────────────────────
-
-/// Content moderation configuration.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(deny_unknown_fields)]
-pub struct ModerationConfig {
- /// Enable moderation.
- #[serde(default = "default_true")]
- pub enabled: bool,
-
- /// Check input before sending to provider.
- #[serde(default = "default_true")]
- pub check_input: bool,
-
- /// Check output before returning to user.
- #[serde(default)]
- pub check_output: bool,
-
- /// Moderation provider.
- #[serde(default)]
- pub provider: ModerationProvider,
-
- /// Action to take on flagged content.
- #[serde(default)]
- pub action: ModerationAction,
-
- /// Categories to check.
- #[serde(default)]
- pub categories: Vec,
-
- /// Threshold for flagging (0.0-1.0).
- #[serde(default = "default_moderation_threshold")]
- pub threshold: f64,
-}
-
-fn default_moderation_threshold() -> f64 {
- 0.8
-}
-
-#[derive(Debug, Clone, Default, Serialize, Deserialize)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(rename_all = "snake_case")]
-pub enum ModerationProvider {
- #[default]
- OpenAi,
- Custom {
- url: String,
- },
-}
-
-#[derive(Debug, Clone, Default, Serialize, Deserialize)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(rename_all = "snake_case")]
-pub enum ModerationAction {
- /// Block the request and return an error.
- #[default]
- Block,
- /// Allow but log the flagged content.
- Log,
- /// Add a warning to the response.
- Warn,
-}
-
// ─────────────────────────────────────────────────────────────────────────────
// Guardrails
// ─────────────────────────────────────────────────────────────────────────────
@@ -2072,24 +1742,6 @@ fn default_pii_replacement() -> String {
// Caching
// ─────────────────────────────────────────────────────────────────────────────
-/// Prompt caching configuration (provider-level caching like Anthropic's).
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(deny_unknown_fields)]
-pub struct PromptCachingConfig {
- /// Enable prompt caching.
- #[serde(default = "default_true")]
- pub enabled: bool,
-
- /// Minimum prompt length to cache (in tokens).
- #[serde(default = "default_min_cache_tokens")]
- pub min_tokens: u32,
-}
-
-fn default_min_cache_tokens() -> u32 {
- 1024
-}
-
/// Response caching configuration (gateway-level caching).
#[derive(Debug, Clone, Serialize, Deserialize)]
#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
@@ -2396,181 +2048,6 @@ pub struct CacheKeyComponents {
pub tools: bool,
}
-// ─────────────────────────────────────────────────────────────────────────────
-// Fallback & Retry
-// ─────────────────────────────────────────────────────────────────────────────
-
-/// Fallback and retry configuration.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(deny_unknown_fields)]
-pub struct FallbackConfig {
- /// Enable automatic retries.
- #[serde(default = "default_true")]
- pub retries_enabled: bool,
-
- /// Maximum number of retries.
- #[serde(default = "default_max_retries")]
- pub max_retries: u32,
-
- /// Initial retry delay in milliseconds.
- #[serde(default = "default_retry_delay")]
- pub initial_delay_ms: u64,
-
- /// Maximum retry delay in milliseconds.
- #[serde(default = "default_max_retry_delay")]
- pub max_delay_ms: u64,
-
- /// Retry backoff multiplier.
- #[serde(default = "default_backoff_multiplier")]
- pub backoff_multiplier: f64,
-
- /// Enable fallback to alternative providers.
- #[serde(default)]
- pub fallback_enabled: bool,
-
- /// Fallback provider order.
- #[serde(default)]
- pub fallback_order: Vec,
-
- /// Errors that trigger fallback.
- #[serde(default = "default_fallback_errors")]
- pub fallback_on: Vec,
-}
-
-impl Default for FallbackConfig {
- fn default() -> Self {
- Self {
- retries_enabled: true,
- max_retries: default_max_retries(),
- initial_delay_ms: default_retry_delay(),
- max_delay_ms: default_max_retry_delay(),
- backoff_multiplier: default_backoff_multiplier(),
- fallback_enabled: false,
- fallback_order: vec![],
- fallback_on: default_fallback_errors(),
- }
- }
-}
-
-fn default_max_retries() -> u32 {
- 3
-}
-
-fn default_retry_delay() -> u64 {
- 1000
-}
-
-fn default_max_retry_delay() -> u64 {
- 30000
-}
-
-fn default_backoff_multiplier() -> f64 {
- 2.0
-}
-
-fn default_fallback_errors() -> Vec {
- vec![
- FallbackTrigger::RateLimit,
- FallbackTrigger::ServerError,
- FallbackTrigger::Timeout,
- ]
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(rename_all = "snake_case")]
-pub enum FallbackTrigger {
- RateLimit,
- ServerError,
- Timeout,
- Overloaded,
- ContextLength,
-}
-
-// ─────────────────────────────────────────────────────────────────────────────
-// Load Balancing
-// ─────────────────────────────────────────────────────────────────────────────
-
-/// Load balancing configuration.
-#[derive(Debug, Clone, Serialize, Deserialize, Default)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(deny_unknown_fields)]
-pub struct LoadBalancingConfig {
- /// Load balancing strategy.
- #[serde(default)]
- pub strategy: LoadBalanceStrategy,
-
- /// Health check configuration.
- #[serde(default)]
- pub health_check: HealthCheckConfig,
-}
-
-/// Load balancing strategy.
-#[derive(Debug, Clone, Default, Serialize, Deserialize)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(rename_all = "snake_case")]
-pub enum LoadBalanceStrategy {
- /// Round-robin across providers.
- #[default]
- RoundRobin,
- /// Route to least-loaded provider.
- LeastConnections,
- /// Random selection.
- Random,
- /// Weighted distribution.
- Weighted,
- /// Route based on latency.
- LatencyBased,
- /// Route based on cost.
- CostBased,
-}
-
-/// Health check configuration.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(deny_unknown_fields)]
-pub struct HealthCheckConfig {
- /// Enable health checks.
- #[serde(default = "default_true")]
- pub enabled: bool,
-
- /// Health check interval in seconds.
- #[serde(default = "default_health_interval")]
- pub interval_secs: u64,
-
- /// Unhealthy threshold (consecutive failures).
- #[serde(default = "default_unhealthy_threshold")]
- pub unhealthy_threshold: u32,
-
- /// Healthy threshold (consecutive successes).
- #[serde(default = "default_healthy_threshold")]
- pub healthy_threshold: u32,
-}
-
-impl Default for HealthCheckConfig {
- fn default() -> Self {
- Self {
- enabled: true,
- interval_secs: default_health_interval(),
- unhealthy_threshold: default_unhealthy_threshold(),
- healthy_threshold: default_healthy_threshold(),
- }
- }
-}
-
-fn default_health_interval() -> u64 {
- 30
-}
-
-fn default_unhealthy_threshold() -> u32 {
- 3
-}
-
-fn default_healthy_threshold() -> u32 {
- 2
-}
-
// ─────────────────────────────────────────────────────────────────────────────
// Image Fetching
// ─────────────────────────────────────────────────────────────────────────────
@@ -4192,73 +3669,6 @@ mod tests {
assert_eq!(queue.consumer_group, "my_workers");
}
- #[test]
- fn test_file_processing_config_queue_mode_sqs() {
- let config: FileProcessingConfig = toml::from_str(
- r#"
- mode = "queue"
-
- [queue]
- backend = "sqs"
- url = "https://sqs.us-east-1.amazonaws.com/123456789/my-queue"
- queue_name = "my-queue"
- region = "us-east-1"
- "#,
- )
- .unwrap();
-
- assert_eq!(config.mode, FileProcessingMode::Queue);
- assert!(config.validate().is_ok());
-
- let queue = config.queue.unwrap();
- assert_eq!(queue.backend, FileProcessingQueueBackend::Sqs);
- assert_eq!(queue.region, Some("us-east-1".to_string()));
- }
-
- #[test]
- fn test_file_processing_config_queue_mode_pubsub() {
- let config: FileProcessingConfig = toml::from_str(
- r#"
- mode = "queue"
-
- [queue]
- backend = "pub_sub"
- url = "https://pubsub.googleapis.com"
- queue_name = "file-processing-topic"
- project_id = "my-gcp-project"
- "#,
- )
- .unwrap();
-
- assert_eq!(config.mode, FileProcessingMode::Queue);
- assert!(config.validate().is_ok());
-
- let queue = config.queue.unwrap();
- assert_eq!(queue.backend, FileProcessingQueueBackend::PubSub);
- assert_eq!(queue.project_id, Some("my-gcp-project".to_string()));
- }
-
- #[test]
- fn test_file_processing_config_queue_mode_rabbitmq() {
- let config: FileProcessingConfig = toml::from_str(
- r#"
- mode = "queue"
-
- [queue]
- backend = "rabbit_mq"
- url = "amqp://guest:guest@localhost:5672"
- queue_name = "file_processing"
- "#,
- )
- .unwrap();
-
- assert_eq!(config.mode, FileProcessingMode::Queue);
- assert!(config.validate().is_ok());
-
- let queue = config.queue.unwrap();
- assert_eq!(queue.backend, FileProcessingQueueBackend::RabbitMq);
- }
-
#[test]
fn test_file_processing_config_queue_mode_missing_config() {
let config: FileProcessingConfig = toml::from_str(
@@ -4272,40 +3682,6 @@ mod tests {
assert!(config.validate().is_err());
}
- #[test]
- fn test_file_processing_config_sqs_missing_region() {
- let config: FileProcessingConfig = toml::from_str(
- r#"
- mode = "queue"
-
- [queue]
- backend = "sqs"
- url = "https://sqs.us-east-1.amazonaws.com/123456789/my-queue"
- queue_name = "my-queue"
- "#,
- )
- .unwrap();
-
- assert!(config.validate().is_err());
- }
-
- #[test]
- fn test_file_processing_config_pubsub_missing_project() {
- let config: FileProcessingConfig = toml::from_str(
- r#"
- mode = "queue"
-
- [queue]
- backend = "pub_sub"
- url = "https://pubsub.googleapis.com"
- queue_name = "file-processing-topic"
- "#,
- )
- .unwrap();
-
- assert!(config.validate().is_err());
- }
-
#[test]
fn test_file_processing_config_max_size_bytes() {
let config = FileProcessingConfig {
diff --git a/src/config/limits.rs b/src/config/limits.rs
index 6e02efd..ded9e1e 100644
--- a/src/config/limits.rs
+++ b/src/config/limits.rs
@@ -17,10 +17,6 @@ pub struct LimitsConfig {
#[serde(default)]
pub budgets: BudgetDefaults,
- /// Token limits.
- #[serde(default)]
- pub tokens: TokenLimitDefaults,
-
/// Resource limits for entity counts.
#[serde(default)]
pub resource_limits: ResourceLimits,
@@ -225,15 +221,6 @@ pub struct BudgetDefaults {
#[serde(default = "default_warning_threshold")]
pub warning_threshold: f64,
- /// Hard limit action when budget is exceeded.
- #[serde(default)]
- pub exceeded_action: BudgetExceededAction,
-
- /// Allow overage up to this percentage above the budget.
- /// E.g., 0.1 means 10% overage is allowed.
- #[serde(default)]
- pub allowed_overage: f64,
-
/// Estimated cost per request in cents for budget reservation.
/// This is reserved before the request is processed to prevent race conditions.
/// After the request completes, the actual cost replaces the estimate.
@@ -248,8 +235,6 @@ impl Default for BudgetDefaults {
monthly_budget_usd: None,
daily_budget_usd: None,
warning_threshold: default_warning_threshold(),
- exceeded_action: BudgetExceededAction::default(),
- allowed_overage: 0.0,
estimated_cost_cents: default_estimated_cost_cents(),
}
}
@@ -262,54 +247,3 @@ fn default_estimated_cost_cents() -> i64 {
fn default_warning_threshold() -> f64 {
0.8 // 80%
}
-
-/// Action to take when budget is exceeded.
-#[derive(Debug, Clone, Default, Serialize, Deserialize)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(rename_all = "snake_case")]
-pub enum BudgetExceededAction {
- /// Block the request.
- #[default]
- Block,
- /// Allow the request but log a warning.
- Warn,
- /// Allow but throttle (reduce rate limits).
- Throttle,
-}
-
-/// Token limit defaults.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(deny_unknown_fields)]
-pub struct TokenLimitDefaults {
- /// Maximum input tokens per request.
- #[serde(default)]
- pub max_input_tokens: Option,
-
- /// Maximum output tokens per request.
- #[serde(default)]
- pub max_output_tokens: Option,
-
- /// Maximum total tokens per request (input + output).
- #[serde(default)]
- pub max_total_tokens: Option,
-
- /// Default max_tokens if not specified in the request.
- #[serde(default = "default_max_tokens")]
- pub default_max_tokens: u32,
-}
-
-impl Default for TokenLimitDefaults {
- fn default() -> Self {
- Self {
- max_input_tokens: None,
- max_output_tokens: None,
- max_total_tokens: None,
- default_max_tokens: default_max_tokens(),
- }
- }
-}
-
-fn default_max_tokens() -> u32 {
- 4096
-}
diff --git a/src/config/observability.rs b/src/config/observability.rs
index 171435a..3deb01e 100644
--- a/src/config/observability.rs
+++ b/src/config/observability.rs
@@ -19,10 +19,6 @@ pub struct ObservabilityConfig {
#[serde(default)]
pub metrics: MetricsConfig,
- /// Request/response logging.
- #[serde(default)]
- pub request_logging: RequestLoggingConfig,
-
/// Usage logging configuration.
#[serde(default)]
pub usage: UsageConfig,
@@ -384,7 +380,7 @@ pub struct TracingConfig {
}
fn default_service_name() -> String {
- "ai-gateway".to_string()
+ "hadrian".to_string()
}
/// OTLP exporter configuration.
@@ -573,102 +569,6 @@ fn default_metrics_path() -> String {
"/metrics".to_string()
}
-// ─────────────────────────────────────────────────────────────────────────────
-// Request Logging
-// ─────────────────────────────────────────────────────────────────────────────
-
-/// Request/response logging configuration.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(deny_unknown_fields)]
-pub struct RequestLoggingConfig {
- /// Enable request logging.
- #[serde(default)]
- pub enabled: bool,
-
- /// Log request bodies.
- #[serde(default)]
- pub log_request_body: bool,
-
- /// Log response bodies.
- #[serde(default)]
- pub log_response_body: bool,
-
- /// Maximum body size to log (in bytes).
- #[serde(default = "default_max_body_log")]
- pub max_body_size: usize,
-
- /// Redact sensitive fields.
- #[serde(default = "default_true")]
- pub redact_sensitive: bool,
-
- /// Fields to redact.
- #[serde(default = "default_redact_fields")]
- pub redact_fields: Vec,
-
- /// Log to separate destination.
- #[serde(default)]
- pub destination: Option,
-}
-
-impl Default for RequestLoggingConfig {
- fn default() -> Self {
- Self {
- enabled: false,
- log_request_body: false,
- log_response_body: false,
- max_body_size: default_max_body_log(),
- redact_sensitive: true,
- redact_fields: default_redact_fields(),
- destination: None,
- }
- }
-}
-
-fn default_max_body_log() -> usize {
- 10 * 1024 // 10 KB
-}
-
-fn default_redact_fields() -> Vec {
- vec![
- "api_key".into(),
- "password".into(),
- "secret".into(),
- "authorization".into(),
- ]
-}
-
-/// Log destination for request logging.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(tag = "type", rename_all = "snake_case")]
-#[serde(deny_unknown_fields)]
-pub enum LogDestination {
- /// Log to file.
- File {
- path: String,
- #[serde(default)]
- rotation: Option,
- },
- /// Log to stdout/stderr (same as regular logs).
- Stdout,
- /// Send to external service.
- Http {
- url: String,
- #[serde(default)]
- headers: HashMap,
- },
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
-#[serde(rename_all = "snake_case")]
-pub enum LogRotation {
- Daily,
- Hourly,
- Size { max_bytes: usize },
-}
-
fn default_true() -> bool {
true
}
diff --git a/src/config/providers.rs b/src/config/providers.rs
index 0eccba4..4b779de 100644
--- a/src/config/providers.rs
+++ b/src/config/providers.rs
@@ -66,6 +66,7 @@ pub struct ModelFallback {
/// ```
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
+// Note: cannot use deny_unknown_fields due to #[serde(flatten)] on `pricing`
pub struct ModelConfig {
/// Pricing fields (flattened inline).
#[serde(flatten)]
@@ -123,6 +124,7 @@ pub struct ModelConfig {
/// to determine which API protocol to use.
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
+// Note: cannot use deny_unknown_fields due to #[serde(flatten)] on `providers` HashMap
pub struct ProvidersConfig {
/// Default provider name for requests that don't specify one.
#[serde(default)]
@@ -588,10 +590,6 @@ pub struct OpenAiProviderConfig {
#[serde(default)]
pub headers: HashMap,
- /// Whether this provider supports streaming (default: true).
- #[serde(default = "default_true")]
- pub supports_streaming: bool,
-
/// Whether this provider supports function/tool calling.
#[serde(default)]
pub supports_tools: bool,
@@ -660,7 +658,6 @@ impl std::fmt::Debug for OpenAiProviderConfig {
.field("allowed_models", &self.allowed_models)
.field("model_aliases", &self.model_aliases)
.field("headers", &self.headers)
- .field("supports_streaming", &self.supports_streaming)
.field("supports_tools", &self.supports_tools)
.field("supports_vision", &self.supports_vision)
.field("models", &self.models)
@@ -2780,7 +2777,6 @@ mod tests {
allowed_models: vec![],
model_aliases: HashMap::new(),
headers: HashMap::new(),
- supports_streaming: true,
supports_tools: false,
supports_vision: false,
models: HashMap::new(),
diff --git a/src/config/secrets.rs b/src/config/secrets.rs
index 48f6d7d..24a7be5 100644
--- a/src/config/secrets.rs
+++ b/src/config/secrets.rs
@@ -42,6 +42,7 @@ impl SecretsConfig {
/// Configuration for Vault/OpenBao secrets manager.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
+// Note: cannot use deny_unknown_fields due to #[serde(flatten)] on `auth`
pub struct VaultSecretsConfig {
/// Vault server address (e.g., "https://vault.example.com:8200")
pub address: String,
@@ -153,6 +154,7 @@ fn default_k8s_token_path() -> String {
/// Configuration for AWS Secrets Manager.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
+#[serde(deny_unknown_fields)]
pub struct AwsSecretsConfig {
/// AWS region (e.g., "us-east-1"). If not set, uses AWS_REGION environment variable.
#[serde(default)]
@@ -176,6 +178,7 @@ fn default_aws_prefix() -> String {
/// Configuration for Azure Key Vault.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
+#[serde(deny_unknown_fields)]
pub struct AzureKeyVaultSecretsConfig {
/// Key Vault URL (e.g., "https://myvault.vault.azure.net")
pub vault_url: String,
@@ -195,6 +198,7 @@ fn default_azure_prefix() -> String {
/// Configuration for GCP Secret Manager.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
+#[serde(deny_unknown_fields)]
pub struct GcpSecretsConfig {
/// GCP project ID
pub project_id: String,
diff --git a/src/config/server.rs b/src/config/server.rs
index bca41a4..fc8be77 100644
--- a/src/config/server.rs
+++ b/src/config/server.rs
@@ -18,11 +18,6 @@ pub struct ServerConfig {
#[serde(default = "default_port")]
pub port: u16,
- /// Base path for all API routes (e.g., "/api/v1").
- /// The UI is always served from "/".
- #[serde(default)]
- pub api_base_path: Option,
-
/// Request body size limit in bytes.
#[serde(default = "default_body_limit")]
pub body_limit_bytes: usize,
@@ -51,10 +46,6 @@ pub struct ServerConfig {
#[serde(default = "default_streaming_idle_timeout")]
pub streaming_idle_timeout_secs: u64,
- /// Enable HTTP/2 (requires TLS or h2c).
- #[serde(default)]
- pub http2: bool,
-
/// TLS configuration. If omitted, serves plain HTTP.
/// In production, TLS is typically terminated at the load balancer.
#[serde(default)]
@@ -100,12 +91,10 @@ impl Default for ServerConfig {
Self {
host: default_host(),
port: default_port(),
- api_base_path: None,
body_limit_bytes: default_body_limit(),
max_response_body_bytes: default_max_response_body(),
timeout_secs: default_timeout(),
streaming_idle_timeout_secs: default_streaming_idle_timeout(),
- http2: false,
tls: None,
trusted_proxies: TrustedProxiesConfig::default(),
cors: CorsConfig::default(),
@@ -424,7 +413,8 @@ pub struct SecurityHeadersConfig {
pub content_security_policy: Option,
/// X-XSS-Protection header value.
- /// Legacy header for older browsers. Default: "1; mode=block"
+ /// Legacy header for older browsers. Disabled by default as CSP provides protection.
+ /// Enable for legacy browser compatibility.
#[serde(default = "default_xss_protection")]
pub xss_protection: Option,
@@ -488,7 +478,7 @@ fn default_csp() -> Option {
}
fn default_xss_protection() -> Option {
- Some("1; mode=block".to_string())
+ None
}
fn default_referrer_policy() -> Option {
diff --git a/src/config/ui.rs b/src/config/ui.rs
index d3e07e5..6ed954a 100644
--- a/src/config/ui.rs
+++ b/src/config/ui.rs
@@ -63,14 +63,6 @@ pub struct AssetsConfig {
/// Cache control header for static assets.
#[serde(default = "default_cache_control")]
pub cache_control: String,
-
- /// Enable gzip compression.
- #[serde(default = "default_true")]
- pub gzip: bool,
-
- /// Enable brotli compression.
- #[serde(default = "default_true")]
- pub brotli: bool,
}
impl Default for AssetsConfig {
@@ -78,8 +70,6 @@ impl Default for AssetsConfig {
Self {
source: AssetSource::default(),
cache_control: default_cache_control(),
- gzip: true,
- brotli: true,
}
}
}
@@ -122,25 +112,9 @@ pub struct ChatConfig {
#[serde(default)]
pub available_models: Vec,
- /// Enable conversation history.
- #[serde(default = "default_true")]
- pub history_enabled: bool,
-
- /// Maximum conversations to store per user.
- #[serde(default = "default_max_conversations")]
- pub max_conversations: usize,
-
/// Enable file uploads.
#[serde(default)]
pub file_uploads: FileUploadConfig,
-
- /// Enable code execution in chat.
- #[serde(default)]
- pub code_execution: bool,
-
- /// Enable web search in chat.
- #[serde(default)]
- pub web_search: bool,
}
impl Default for ChatConfig {
@@ -149,19 +123,11 @@ impl Default for ChatConfig {
enabled: true,
default_model: None,
available_models: vec![],
- history_enabled: true,
- max_conversations: default_max_conversations(),
file_uploads: FileUploadConfig::default(),
- code_execution: false,
- web_search: false,
}
}
}
-fn default_max_conversations() -> usize {
- 100
-}
-
/// File upload configuration.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))]
@@ -250,11 +216,6 @@ pub struct AdminConfig {
/// Path for admin panel.
#[serde(default = "default_admin_path")]
pub path: String,
-
- /// Roles that can access the admin panel.
- /// If empty, any authenticated user with admin flag can access.
- #[serde(default)]
- pub allowed_roles: Vec,
}
impl Default for AdminConfig {
@@ -262,7 +223,6 @@ impl Default for AdminConfig {
Self {
enabled: true,
path: default_admin_path(),
- allowed_roles: vec![],
}
}
}
diff --git a/src/init.rs b/src/init.rs
index d3466a3..2f4d719 100644
--- a/src/init.rs
+++ b/src/init.rs
@@ -219,24 +219,14 @@ pub(crate) async fn init_worker_embedding_service(
}
};
- let embedding_config = file_search_config
- .embedding
- .as_ref()
- .or_else(|| {
- config
- .features
- .response_caching
- .as_ref()
- .and_then(|rc| rc.semantic.as_ref())
- .map(|sc| &sc.embedding)
- })
- .or_else(|| {
- config
- .features
- .vector_search
- .as_ref()
- .map(|vs| &vs.embedding)
- });
+ let embedding_config = file_search_config.embedding.as_ref().or_else(|| {
+ config
+ .features
+ .response_caching
+ .as_ref()
+ .and_then(|rc| rc.semantic.as_ref())
+ .map(|sc| &sc.embedding)
+ });
let embedding_config = match embedding_config {
Some(cfg) => cfg,
diff --git a/src/routing/resolver.rs b/src/routing/resolver.rs
index 58f2ec7..4d7e259 100644
--- a/src/routing/resolver.rs
+++ b/src/routing/resolver.rs
@@ -600,7 +600,6 @@ pub async fn dynamic_provider_to_config(
allowed_models: provider.models.clone(),
model_aliases: std::collections::HashMap::new(),
headers: std::collections::HashMap::new(),
- supports_streaming: true,
supports_tools: false,
supports_vision: false,
models: std::collections::HashMap::new(),
diff --git a/src/services/document_processor.rs b/src/services/document_processor.rs
index 4d8dd6e..6e55bd4 100644
--- a/src/services/document_processor.rs
+++ b/src/services/document_processor.rs
@@ -130,16 +130,6 @@ pub enum QueueBackend {
queue_name: String,
consumer_group: String,
},
- /// RabbitMQ
- RabbitMQ {
- url: String,
- exchange: String,
- queue_name: String,
- },
- /// AWS SQS
- Sqs { queue_url: String, region: String },
- /// Google Cloud Pub/Sub
- PubSub { project_id: String, topic: String },
}
/// Configuration for the document processor.
@@ -212,19 +202,6 @@ fn convert_queue_config(queue: &FileProcessingQueueConfig) -> QueueBackend {
queue_name: queue.queue_name.clone(),
consumer_group: queue.consumer_group.clone(),
},
- FileProcessingQueueBackend::RabbitMq => QueueBackend::RabbitMQ {
- url: queue.url.clone(),
- exchange: "hadrian".to_string(), // Default exchange name
- queue_name: queue.queue_name.clone(),
- },
- FileProcessingQueueBackend::Sqs => QueueBackend::Sqs {
- queue_url: queue.url.clone(),
- region: queue.region.clone().unwrap_or_default(),
- },
- FileProcessingQueueBackend::PubSub => QueueBackend::PubSub {
- project_id: queue.project_id.clone().unwrap_or_default(),
- topic: queue.queue_name.clone(),
- },
}
}
@@ -1495,21 +1472,6 @@ impl DocumentProcessor {
.to_string(),
));
}
- Some(QueueBackend::RabbitMQ { url, .. }) => {
- return Err(DocumentProcessorError::Configuration(format!(
- "RabbitMQ queue publishing not yet implemented (url: {url})"
- )));
- }
- Some(QueueBackend::Sqs { queue_url, .. }) => {
- return Err(DocumentProcessorError::Configuration(format!(
- "SQS queue publishing not yet implemented (queue: {queue_url})"
- )));
- }
- Some(QueueBackend::PubSub { topic, .. }) => {
- return Err(DocumentProcessorError::Configuration(format!(
- "Pub/Sub queue publishing not yet implemented (topic: {topic})"
- )));
- }
None => {
error!("Queue mode enabled but no queue backend configured");
otel_span_error!("Queue backend not configured");
@@ -1619,15 +1581,6 @@ pub async fn start_file_processing_worker(
"Redis queue configured but the 'redis' feature is not enabled. Rebuild with: cargo build --features redis"
);
}
- QueueBackend::RabbitMQ { .. } => {
- tracing::error!("RabbitMQ worker not yet implemented");
- }
- QueueBackend::Sqs { .. } => {
- tracing::error!("SQS worker not yet implemented");
- }
- QueueBackend::PubSub { .. } => {
- tracing::error!("Pub/Sub worker not yet implemented");
- }
}
}
diff --git a/src/tests/provider_e2e.rs b/src/tests/provider_e2e.rs
index 8161cc2..3bdaa58 100644
--- a/src/tests/provider_e2e.rs
+++ b/src/tests/provider_e2e.rs
@@ -822,7 +822,6 @@ type = "{}"
base_url = "{}"
api_key = "test-api-key"
timeout_secs = 30
-supports_streaming = true
supports_tools = true
supports_vision = true
@@ -2459,7 +2458,6 @@ type = "open_ai"
base_url = "{}"
api_key = "test-api-key"
timeout_secs = 30
-supports_streaming = true
supports_tools = true
# Circuit breaker configuration