diff --git a/CLAUDE.md b/CLAUDE.md index 56e02ae..f54bd09 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -390,15 +390,10 @@ See `agent_instructions/adding_admin_endpoint.md` for implementation patterns (r - `[features.file_processing]` — RAG document ingestion (text extraction, OCR, chunking) - `[features.guardrails]` — Input/output guardrails (blocklist, PII detection, moderation APIs) - `[features.response_caching]` — Response caching with optional semantic similarity matching -- `[features.prompt_caching]` — Anthropic prompt caching support - `[features.image_fetching]` — Fetch images from URLs for vision models -- `[features.web_search]` — Web search tool integration -- `[features.code_execution]` — Server-side code execution - `[features.model_catalog]` — Model metadata enrichment from models.dev - `[features.websocket]` — WebSocket for real-time events - `[features.vector_store_cleanup]` — Background cleanup for soft-deleted vector stores -- `[features.fallback]` — Fallback and retry configuration -- `[features.load_balancing]` — Load balancing configuration ## Caching diff --git a/docs/content/docs/configuration/features/code-execution.mdx b/docs/content/docs/configuration/features/code-execution.mdx deleted file mode 100644 index 3a771b5..0000000 --- a/docs/content/docs/configuration/features/code-execution.mdx +++ /dev/null @@ -1,142 +0,0 @@ ---- -title: Code Execution -description: Configure server-side code execution capabilities ---- - -import { Callout } from "fumadocs-ui/components/callout"; - -The `[features.code_execution]` section configures server-side code execution. By default, code execution happens client-side via WebAssembly in the browser (Pyodide for Python, QuickJS for JavaScript). - - - Client-side WASM execution requires no server configuration. This section is only needed for - server-side sandboxed execution. - - -## Configuration Reference - -### Main Settings - -```toml -[features.code_execution] -enabled = true -mode = "wasm" -allowed_languages = ["python", "javascript"] -timeout_secs = 30 -max_memory_mb = 256 -``` - -| Key | Type | Default | Description | -| ------------------- | ------- | -------------------------- | ---------------------------- | -| `enabled` | boolean | `true` | Enable code execution | -| `mode` | string | `"wasm"` | Execution mode | -| `allowed_languages` | array | `["python", "javascript"]` | Allowed languages | -| `timeout_secs` | integer | `30` | Execution timeout | -| `max_memory_mb` | integer | `256` | Maximum memory per execution | - -## Execution Modes - -### WASM (Default) - -Client-side execution in the browser: - -```toml -[features.code_execution] -enabled = true -mode = "wasm" -``` - -No server-side configuration needed. Runs via: - -- **Python**: Pyodide (numpy, pandas, matplotlib available) -- **JavaScript**: QuickJS (sandboxed) - -### Sandboxed - -Server-side execution in containers: - -```toml -[features.code_execution] -enabled = true -allowed_languages = ["python", "javascript", "bash"] -timeout_secs = 60 -max_memory_mb = 512 - -[features.code_execution.mode] -sandboxed = { runtime = "docker" } -``` - -| Runtime | Description | -| ------------- | --------------------------- | -| `docker` | Docker containers | -| `firecracker` | Firecracker microVMs | -| `gvisor` | gVisor sandboxed containers | - -### External Service - -Delegate to an external code execution service: - -```toml -[features.code_execution] -enabled = true -allowed_languages = ["python", "javascript", "r", "julia"] -timeout_secs = 120 -max_memory_mb = 1024 - -[features.code_execution.mode] -external = { url = "https://code-exec.example.com", api_key = "${CODE_EXEC_API_KEY}" } -``` - -| Key | Type | Description | -| --------- | ------ | -------------------------- | -| `url` | string | External service URL | -| `api_key` | string | API key for authentication | - -## Complete Examples - -### Client-Side Only (Default) - -```toml -[features.code_execution] -enabled = true -mode = "wasm" -allowed_languages = ["python", "javascript"] -timeout_secs = 30 -max_memory_mb = 256 -``` - -### Docker Sandboxed - -```toml -[features.code_execution] -enabled = true -allowed_languages = ["python", "javascript", "bash", "ruby"] -timeout_secs = 60 -max_memory_mb = 512 - -[features.code_execution.mode] -sandboxed = { runtime = "docker" } -``` - -### External Service - -```toml -[features.code_execution] -enabled = true -allowed_languages = ["python", "javascript", "r", "julia", "sql"] -timeout_secs = 120 -max_memory_mb = 2048 - -[features.code_execution.mode] -external = { url = "https://code-exec.internal.company.com", api_key = "${CODE_EXEC_API_KEY}" } -``` - -## Security Considerations - - - Server-side code execution requires careful security configuration. Consider: - Network isolation - for containers - Resource limits (CPU, memory, disk) - Execution timeouts - Language restrictions - - -## See Also - -- [Frontend Tools Guide](/docs/features/frontend-tools) - Client-side WASM execution diff --git a/docs/content/docs/configuration/features/fallback.mdx b/docs/content/docs/configuration/features/fallback.mdx deleted file mode 100644 index 34c92b2..0000000 --- a/docs/content/docs/configuration/features/fallback.mdx +++ /dev/null @@ -1,158 +0,0 @@ ---- -title: Fallback & Retry -description: Configure automatic retries and provider fallbacks ---- - -import { Callout } from "fumadocs-ui/components/callout"; - -The `[features.fallback]` section configures automatic retry behavior and provider fallback chains for handling transient errors. - -## Configuration Reference - -```toml -[features.fallback] -retries_enabled = true -max_retries = 3 -initial_delay_ms = 1000 -max_delay_ms = 30000 -backoff_multiplier = 2.0 -fallback_enabled = false -fallback_order = [] -fallback_on = ["rate_limit", "server_error", "timeout"] -``` - -| Key | Type | Default | Description | -| -------------------- | ------- | --------- | --------------------------------- | -| `retries_enabled` | boolean | `true` | Enable automatic retries | -| `max_retries` | integer | `3` | Maximum retry attempts | -| `initial_delay_ms` | integer | `1000` | Initial retry delay (1 second) | -| `max_delay_ms` | integer | `30000` | Maximum retry delay (30 seconds) | -| `backoff_multiplier` | float | `2.0` | Exponential backoff multiplier | -| `fallback_enabled` | boolean | `false` | Enable provider fallbacks | -| `fallback_order` | array | `[]` | Provider fallback chain | -| `fallback_on` | array | see below | Error types that trigger fallback | - -## Retry Behavior - -Retries use exponential backoff with the formula: - -``` -delay = min(initial_delay_ms * (backoff_multiplier ^ attempt), max_delay_ms) -``` - -Example with defaults: - -- Attempt 1: 1000ms delay -- Attempt 2: 2000ms delay -- Attempt 3: 4000ms delay - -## Fallback Triggers - -| Trigger | Description | -| ---------------- | ----------------------- | -| `rate_limit` | 429 Too Many Requests | -| `server_error` | 5xx errors | -| `timeout` | Request timeout | -| `overloaded` | Provider overloaded | -| `context_length` | Context length exceeded | - -```toml -fallback_on = ["rate_limit", "server_error", "timeout"] -``` - -## Complete Examples - -### Retries Only (Default) - -```toml -[features.fallback] -retries_enabled = true -max_retries = 3 -initial_delay_ms = 1000 -max_delay_ms = 30000 -backoff_multiplier = 2.0 -fallback_enabled = false -``` - -### With Provider Fallbacks - -```toml -[features.fallback] -retries_enabled = true -max_retries = 2 -initial_delay_ms = 500 -max_delay_ms = 10000 -backoff_multiplier = 2.0 -fallback_enabled = true -fallback_order = ["anthropic", "openai", "bedrock"] -fallback_on = ["rate_limit", "server_error", "timeout", "overloaded"] -``` - -Flow: Primary provider fails → retry 2x → try Anthropic → retry 2x → try OpenAI → retry 2x → try Bedrock → fail - -### Aggressive Retries - -```toml -[features.fallback] -retries_enabled = true -max_retries = 5 -initial_delay_ms = 200 -max_delay_ms = 5000 -backoff_multiplier = 1.5 -fallback_enabled = false -``` - -### No Retries - -```toml -[features.fallback] -retries_enabled = false -fallback_enabled = true -fallback_order = ["anthropic", "openai"] -fallback_on = ["server_error", "timeout"] -``` - -### Context Length Fallback - -Handle models with different context limits: - -```toml -[features.fallback] -retries_enabled = true -max_retries = 1 -fallback_enabled = true -fallback_order = ["gpt-4o", "claude-sonnet"] -fallback_on = ["context_length"] -``` - -## Fallback Chain Behavior - -``` -Request to primary provider - │ - ▼ - ┌─────────┐ ┌─────────────────┐ - │ Success │ ←── │ Retry if failed │ - └─────────┘ └─────────────────┘ - │ │ - │ max_retries - │ │ - │ ▼ - │ ┌─────────────────┐ - │ │ Next in fallback│ - │ │ order │ - │ └─────────────────┘ - │ │ - ▼ ▼ - Return response Repeat until - chain exhausted -``` - - -Per-provider retry and circuit breaker settings (in `[providers.]`) override global fallback settings for that provider. - - -## See Also - -- [Load Balancing](/docs/configuration/features/load-balancing) - Provider selection -- [Provider Configuration](/docs/configuration/providers) - Per-provider retries diff --git a/docs/content/docs/configuration/features/index.mdx b/docs/content/docs/configuration/features/index.mdx index 4e58b9f..1422f73 100644 --- a/docs/content/docs/configuration/features/index.mdx +++ b/docs/content/docs/configuration/features/index.mdx @@ -15,12 +15,8 @@ The `[features]` section enables and configures optional gateway capabilities. A | [File Processing](/docs/configuration/features/file-processing) | `[features.file_processing]` | Document chunking, OCR, virus scanning | | [Response Caching](/docs/configuration/features/response-caching) | `[features.response_caching]` | Exact and semantic response caching | | [Guardrails](/docs/configuration/features/guardrails) | `[features.guardrails]` | Content filtering, PII detection, safety | -| [Web Search](/docs/configuration/features/web-search) | `[features.web_search]` | Web search tool providers | -| [Code Execution](/docs/configuration/features/code-execution) | `[features.code_execution]` | Server-side code execution | | [Image Fetching](/docs/configuration/features/image-fetching) | `[features.image_fetching]` | URL-to-base64 conversion for non-OpenAI providers | | [WebSocket](/docs/configuration/features/websocket) | `[features.websocket]` | Real-time event subscriptions | -| [Load Balancing](/docs/configuration/features/load-balancing) | `[features.load_balancing]` | Provider selection strategies | -| [Fallback](/docs/configuration/features/fallback) | `[features.fallback]` | Retry and provider fallback | | Model Catalog | `[features.model_catalog]` | Enrich models with capabilities and pricing | ## Minimal Configuration @@ -107,21 +103,6 @@ timeout_secs = 30 enabled = true require_auth = true -# Load Balancing -[features.load_balancing] -strategy = "round_robin" - -[features.load_balancing.health_check] -enabled = true -interval_secs = 30 - -# Fallback & Retry -[features.fallback] -retries_enabled = true -max_retries = 3 -fallback_enabled = true -fallback_order = ["anthropic", "openai"] - # Model Catalog [features.model_catalog] enabled = true diff --git a/docs/content/docs/configuration/features/load-balancing.mdx b/docs/content/docs/configuration/features/load-balancing.mdx deleted file mode 100644 index c994e22..0000000 --- a/docs/content/docs/configuration/features/load-balancing.mdx +++ /dev/null @@ -1,141 +0,0 @@ ---- -title: Load Balancing -description: Configure provider selection strategies and health checks ---- - -import { Callout } from "fumadocs-ui/components/callout"; - -The `[features.load_balancing]` section configures how requests are distributed across providers when multiple providers support the same model. - -## Configuration Reference - -### Main Settings - -```toml -[features.load_balancing] -strategy = "round_robin" -``` - -| Key | Type | Default | Description | -| ---------- | ------ | --------------- | ----------------------- | -| `strategy` | string | `"round_robin"` | Load balancing strategy | - -### Strategies - -| Strategy | Description | -| ------------------- | --------------------------------------------- | -| `round_robin` | Cycle through providers sequentially | -| `least_connections` | Route to provider with fewest active requests | -| `random` | Random provider selection | -| `weighted` | Distribute based on configured weights | -| `latency_based` | Route to lowest-latency provider | -| `cost_based` | Route to lowest-cost provider | - -```toml -# Round Robin (default) -[features.load_balancing] -strategy = "round_robin" - -# Latency-based -[features.load_balancing] -strategy = "latency_based" - -# Cost-based -[features.load_balancing] -strategy = "cost_based" -``` - -### Health Checks - -Configure provider health monitoring: - -```toml -[features.load_balancing.health_check] -enabled = true -interval_secs = 30 -unhealthy_threshold = 3 -healthy_threshold = 2 -``` - -| Key | Type | Default | Description | -| --------------------- | ------- | ------- | -------------------------------------- | -| `enabled` | boolean | `true` | Enable health checks | -| `interval_secs` | integer | `30` | Check interval in seconds | -| `unhealthy_threshold` | integer | `3` | Consecutive failures to mark unhealthy | -| `healthy_threshold` | integer | `2` | Consecutive successes to mark healthy | - -## Complete Examples - -### Round Robin with Health Checks - -```toml -[features.load_balancing] -strategy = "round_robin" - -[features.load_balancing.health_check] -enabled = true -interval_secs = 30 -unhealthy_threshold = 3 -healthy_threshold = 2 -``` - -### Latency-Based - -```toml -[features.load_balancing] -strategy = "latency_based" - -[features.load_balancing.health_check] -enabled = true -interval_secs = 15 -unhealthy_threshold = 2 -healthy_threshold = 1 -``` - -### Cost-Based - -```toml -[features.load_balancing] -strategy = "cost_based" - -[features.load_balancing.health_check] -enabled = true -interval_secs = 60 -unhealthy_threshold = 5 -healthy_threshold = 2 -``` - -### Disabled Health Checks - -```toml -[features.load_balancing] -strategy = "random" - -[features.load_balancing.health_check] -enabled = false -``` - -## Strategy Selection - -| Use Case | Recommended Strategy | -| ----------------- | -------------------- | -| General workloads | `round_robin` | -| Latency-sensitive | `latency_based` | -| Cost optimization | `cost_based` | -| Variable load | `least_connections` | -| Testing/debugging | `random` | - -## Health Check Behavior - -``` -Provider A: ● ● ● ✗ ✗ ✗ → Unhealthy (3 consecutive failures) -Provider B: ● ● ● ● ● ● → Healthy -Provider C: ✗ ✗ ● ● ● ● → Healthy (2 consecutive successes) -``` - -Unhealthy providers are excluded from load balancing until they pass `healthy_threshold` consecutive checks. - -## See Also - -- [Fallback Configuration](/docs/configuration/features/fallback) - Retry and fallback settings -- [Provider Configuration](/docs/configuration/providers) - Provider setup diff --git a/docs/content/docs/configuration/features/meta.json b/docs/content/docs/configuration/features/meta.json index 433c034..42aabe6 100644 --- a/docs/content/docs/configuration/features/meta.json +++ b/docs/content/docs/configuration/features/meta.json @@ -6,11 +6,7 @@ "file-processing", "response-caching", "guardrails", - "web-search", - "code-execution", "image-fetching", - "websocket", - "load-balancing", - "fallback" + "websocket" ] } diff --git a/docs/content/docs/configuration/features/web-search.mdx b/docs/content/docs/configuration/features/web-search.mdx deleted file mode 100644 index fd0022e..0000000 --- a/docs/content/docs/configuration/features/web-search.mdx +++ /dev/null @@ -1,135 +0,0 @@ ---- -title: Web Search -description: Configure web search providers for the web_search tool ---- - -import { Callout } from "fumadocs-ui/components/callout"; - -The `[features.web_search]` section configures web search providers that can be used by the `web_search` tool in chat conversations. - -## Configuration Reference - -### Main Settings - -```toml -[features.web_search] -enabled = true -default_provider = "tavily" -max_results = 10 -``` - -| Key | Type | Default | Description | -| ------------------ | ------- | ------- | ------------------------- | -| `enabled` | boolean | `true` | Enable web search | -| `default_provider` | string | none | Default provider name | -| `max_results` | integer | `10` | Maximum results to return | - -## Providers - -### Tavily - -AI-optimized search API: - -```toml -[[features.web_search.providers]] -type = "tavily" -api_key = "${TAVILY_API_KEY}" -``` - -| Key | Type | Description | -| --------- | ------ | -------------- | -| `api_key` | string | Tavily API key | - -### Brave Search - -Privacy-focused search API: - -```toml -[[features.web_search.providers]] -type = "brave" -api_key = "${BRAVE_SEARCH_API_KEY}" -``` - -| Key | Type | Description | -| --------- | ------ | -------------------- | -| `api_key` | string | Brave Search API key | - -### Google Custom Search - -Google search via Custom Search JSON API: - -```toml -[[features.web_search.providers]] -type = "google" -api_key = "${GOOGLE_API_KEY}" -search_engine_id = "${GOOGLE_CSE_ID}" -``` - -| Key | Type | Description | -| ------------------ | ------ | ----------------------- | -| `api_key` | string | Google API key | -| `search_engine_id` | string | Custom Search Engine ID | - -### Bing Search - -Microsoft Bing Web Search API: - -```toml -[[features.web_search.providers]] -type = "bing" -api_key = "${BING_SEARCH_API_KEY}" -``` - -| Key | Type | Description | -| --------- | ------ | ------------------- | -| `api_key` | string | Bing Search API key | - -### SerpAPI - -Scraping-based search API (Google, Bing, etc.): - -```toml -[[features.web_search.providers]] -type = "serp" -api_key = "${SERPAPI_KEY}" -``` - -| Key | Type | Description | -| --------- | ------ | ----------- | -| `api_key` | string | SerpAPI key | - -## Complete Example - -```toml -[features.web_search] -enabled = true -default_provider = "tavily" -max_results = 10 - -[[features.web_search.providers]] -type = "tavily" -api_key = "${TAVILY_API_KEY}" - -[[features.web_search.providers]] -type = "brave" -api_key = "${BRAVE_SEARCH_API_KEY}" - -[[features.web_search.providers]] -type = "google" -api_key = "${GOOGLE_API_KEY}" -search_engine_id = "${GOOGLE_CSE_ID}" -``` - -## Provider Comparison - -| Provider | Pricing | Best For | -| -------- | ------------- | --------------------------------------- | -| Tavily | Freemium | AI/LLM applications, clean results | -| Brave | Freemium | Privacy-conscious, web search | -| Google | Pay-per-query | Comprehensive results, custom filtering | -| Bing | Pay-per-query | Microsoft ecosystem | -| SerpAPI | Pay-per-query | Multiple search engines, SERP data | - - - Web search results are formatted for LLM consumption and injected into the conversation context. - diff --git a/src/app.rs b/src/app.rs index 0bad0b0..1a76833 100644 --- a/src/app.rs +++ b/src/app.rs @@ -1357,34 +1357,22 @@ impl AppState { // Get embedding configuration with priority: // 1. file_search.embedding (explicit RAG config) // 2. response_caching.semantic.embedding (semantic cache config) - // 3. vector_search.embedding (legacy vector search config) - let embedding_config = file_search_config - .embedding - .as_ref() - .or_else(|| { - config - .features - .response_caching - .as_ref() - .and_then(|rc| rc.semantic.as_ref()) - .map(|sc| &sc.embedding) - }) - .or_else(|| { - config - .features - .vector_search - .as_ref() - .map(|vs| &vs.embedding) - }); + let embedding_config = file_search_config.embedding.as_ref().or_else(|| { + config + .features + .response_caching + .as_ref() + .and_then(|rc| rc.semantic.as_ref()) + .map(|sc| &sc.embedding) + }); let embedding_config = match embedding_config { Some(cfg) => cfg, None => { tracing::warn!( "File search is enabled but no embedding configuration found. \ - Configure [features.file_search.embedding], \ - [features.response_caching.semantic.embedding], or \ - [features.vector_search.embedding] to enable file search." + Configure [features.file_search.embedding] or \ + [features.response_caching.semantic.embedding] to enable file search." ); return None; } diff --git a/src/cache/memory.rs b/src/cache/memory.rs index b50587c..c4e8478 100644 --- a/src/cache/memory.rs +++ b/src/cache/memory.rs @@ -12,6 +12,10 @@ use std::{ /// This prevents infinite spinning under extreme contention. const MAX_CAS_RETRIES: usize = 100; +/// Number of entries to evict when the cache reaches capacity. +/// Eviction removes expired entries first, then uses LRU. +const EVICTION_BATCH_SIZE: usize = 100; + use async_trait::async_trait; use dashmap::DashMap; @@ -89,7 +93,6 @@ pub struct MemoryCache { counters: Arc>>, sets: Arc>, max_entries: usize, - eviction_batch_size: usize, } impl MemoryCache { @@ -99,7 +102,6 @@ impl MemoryCache { counters: Arc::new(DashMap::new()), sets: Arc::new(DashMap::new()), max_entries: config.max_entries, - eviction_batch_size: config.eviction_batch_size.max(1), } } @@ -117,8 +119,10 @@ impl MemoryCache { return; } - // Calculate how many entries to evict - let target_size = self.max_entries.saturating_sub(self.eviction_batch_size); + // Calculate how many entries to evict: at least 1, at most EVICTION_BATCH_SIZE. + // Use 10% of max_entries for small caches to avoid evicting everything at once. + let batch = (self.max_entries / 10).clamp(1, EVICTION_BATCH_SIZE); + let target_size = self.max_entries.saturating_sub(batch); let to_evict = current_len.saturating_sub(target_size); if to_evict == 0 { @@ -487,13 +491,9 @@ mod tests { } } - fn test_config_with_eviction( - max_entries: usize, - eviction_batch_size: usize, - ) -> MemoryCacheConfig { + fn test_config_with_eviction(max_entries: usize) -> MemoryCacheConfig { MemoryCacheConfig { max_entries, - eviction_batch_size, ..Default::default() } } @@ -969,8 +969,8 @@ mod tests { #[tokio::test] async fn test_lru_eviction_evicts_oldest() { - // max_entries=5, eviction_batch_size=2 - let cache = MemoryCache::new(&test_config_with_eviction(5, 2)); + // max_entries=5; eviction batch = max(1, 5/10) = 1, target_size = 4 + let cache = MemoryCache::new(&test_config_with_eviction(5)); // Fill cache with entries (with delays to ensure distinct access times) for i in 0..5 { @@ -1021,8 +1021,8 @@ mod tests { .filter(|&&x| x) .count(); - // After eviction, we should have fewer than 5 entries - // eviction_batch_size=2 means target is max_entries - 2 = 3 entries after eviction + // After eviction: target_size = 4, current_len was 6, so 2 entries are evicted. + // key2 and key3 are the least recently accessed, so at most 1 of key2/key3/key4 remains. assert!( remaining <= 2, "Expected at most 2 of key2/key3/key4 to remain, got {}", @@ -1032,7 +1032,7 @@ mod tests { #[tokio::test] async fn test_lru_eviction_prefers_expired_first() { - let cache = MemoryCache::new(&test_config_with_eviction(4, 2)); + let cache = MemoryCache::new(&test_config_with_eviction(4)); // Add entries: some expired, some not cache @@ -1088,7 +1088,7 @@ mod tests { #[tokio::test] async fn test_lru_no_eviction_below_capacity() { - let cache = MemoryCache::new(&test_config_with_eviction(10, 2)); + let cache = MemoryCache::new(&test_config_with_eviction(10)); // Add entries below capacity for i in 0..5 { @@ -1114,7 +1114,7 @@ mod tests { #[tokio::test] async fn test_get_updates_last_accessed() { - let cache = MemoryCache::new(&test_config_with_eviction(3, 1)); + let cache = MemoryCache::new(&test_config_with_eviction(3)); // Add entries cache diff --git a/src/config/cache.rs b/src/config/cache.rs index 74b523a..ab5db97 100644 --- a/src/config/cache.rs +++ b/src/config/cache.rs @@ -59,11 +59,6 @@ pub struct MemoryCacheConfig { #[serde(default = "default_max_entries")] pub max_entries: usize, - /// Number of entries to evict when cache is full. - /// Eviction removes expired entries first, then uses LRU. - #[serde(default = "default_eviction_batch_size")] - pub eviction_batch_size: usize, - /// Default TTL for cache entries in seconds. #[serde(default = "default_ttl")] pub default_ttl_secs: u64, @@ -77,7 +72,6 @@ impl Default for MemoryCacheConfig { fn default() -> Self { Self { max_entries: default_max_entries(), - eviction_batch_size: default_eviction_batch_size(), default_ttl_secs: default_ttl(), ttl: CacheTtlConfig::default(), } @@ -99,10 +93,6 @@ fn default_max_entries() -> usize { 100_000 } -fn default_eviction_batch_size() -> usize { - 100 // Evict 100 entries at a time when cache is full -} - fn default_ttl() -> u64 { 3600 // 1 hour } @@ -214,14 +204,6 @@ pub struct CacheTtlConfig { /// TTL for dynamic provider cache in seconds. #[serde(default = "default_provider_ttl")] pub provider_secs: u64, - - /// TTL for daily spend cache in seconds. - #[serde(default = "default_daily_spend_ttl")] - pub daily_spend_secs: u64, - - /// TTL for monthly spend cache in seconds. - #[serde(default = "default_monthly_spend_ttl")] - pub monthly_spend_secs: u64, } impl Default for CacheTtlConfig { @@ -230,8 +212,6 @@ impl Default for CacheTtlConfig { api_key_secs: default_api_key_ttl(), rate_limit_secs: default_rate_limit_ttl(), provider_secs: default_provider_ttl(), - daily_spend_secs: default_daily_spend_ttl(), - monthly_spend_secs: default_monthly_spend_ttl(), } } } @@ -247,11 +227,3 @@ fn default_rate_limit_ttl() -> u64 { fn default_provider_ttl() -> u64 { 300 // 5 minutes } - -fn default_daily_spend_ttl() -> u64 { - 86400 // 1 day -} - -fn default_monthly_spend_ttl() -> u64 { - 86400 * 32 // ~32 days -} diff --git a/src/config/features.rs b/src/config/features.rs index 3a7caba..faa3d69 100644 --- a/src/config/features.rs +++ b/src/config/features.rs @@ -7,49 +7,20 @@ use super::{CircuitBreakerConfig, RetryConfig}; #[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))] #[serde(deny_unknown_fields)] pub struct FeaturesConfig { - /// Vector search / RAG features. - #[serde(default)] - pub vector_search: Option, - /// File search configuration for the Responses API. /// Enables server-side file_search tool execution for RAG. #[serde(default)] pub file_search: Option, - /// Web search features. - #[serde(default)] - pub web_search: Option, - - /// Code execution features. - #[serde(default)] - pub code_execution: Option, - - /// Content moderation (legacy - prefer guardrails for new deployments). - #[serde(default)] - pub moderation: Option, - /// Guardrails for content filtering, PII detection, and safety. - /// More comprehensive than the legacy moderation config, with support - /// for multiple providers, execution modes, and fine-grained actions. + /// Supports multiple providers, execution modes, and fine-grained actions. #[serde(default)] pub guardrails: Option, - /// Prompt caching. - #[serde(default)] - pub prompt_caching: Option, - /// Response caching. #[serde(default)] pub response_caching: Option, - /// Fallback and retry configuration. - #[serde(default)] - pub fallback: FallbackConfig, - - /// Load balancing configuration. - #[serde(default)] - pub load_balancing: LoadBalancingConfig, - /// HTTP image URL fetching configuration. /// Controls how non-OpenAI providers (Anthropic, Bedrock, Vertex) handle /// HTTP image URLs in chat completion requests. @@ -88,80 +59,6 @@ impl FeaturesConfig { } } -// ───────────────────────────────────────────────────────────────────────────── -// Vector Search -// ───────────────────────────────────────────────────────────────────────────── - -/// Vector search configuration for RAG. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))] -#[serde(deny_unknown_fields)] -pub struct VectorSearchConfig { - /// Enable vector search. - #[serde(default = "default_true")] - pub enabled: bool, - - /// Vector database backend. - pub backend: VectorBackend, - - /// Default number of results to retrieve. - #[serde(default = "default_top_k")] - pub default_top_k: usize, - - /// Default similarity threshold (0.0-1.0). - #[serde(default = "default_similarity_threshold")] - pub similarity_threshold: f64, - - /// Embedding configuration. - #[serde(default)] - pub embedding: EmbeddingConfig, -} - -fn default_top_k() -> usize { - 5 -} - -fn default_similarity_threshold() -> f64 { - 0.7 -} - -/// Vector database backend. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))] -#[serde(tag = "type", rename_all = "snake_case")] -#[serde(deny_unknown_fields)] -pub enum VectorBackend { - /// PostgreSQL with pgvector extension. - Pgvector, - - /// Qdrant vector database. - Qdrant { - url: String, - #[serde(default)] - api_key: Option, - }, - - /// Pinecone vector database. - Pinecone { - api_key: String, - environment: String, - }, - - /// Weaviate vector database. - Weaviate { - url: String, - #[serde(default)] - api_key: Option, - }, - - /// ChromaDB. - Chroma { - url: String, - #[serde(default)] - api_key: Option, - }, -} - /// Embedding configuration. #[derive(Debug, Clone, Serialize, Deserialize, Default)] #[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))] @@ -776,10 +673,7 @@ pub struct FileProcessingQueueConfig { pub backend: FileProcessingQueueBackend, /// Connection URL for the queue backend. - /// Examples: - /// - Redis: "redis://localhost:6379" - /// - RabbitMQ: "amqp://guest:guest@localhost:5672" - /// - SQS: "https://sqs.us-east-1.amazonaws.com/123456789/queue-name" + /// Example: "redis://localhost:6379" pub url: String, /// Queue/topic name for processing jobs. @@ -789,14 +683,6 @@ pub struct FileProcessingQueueConfig { /// Consumer group name (for Redis Streams). #[serde(default = "default_file_processing_consumer_group")] pub consumer_group: String, - - /// AWS region (for SQS). - #[serde(default)] - pub region: Option, - - /// GCP project ID (for Pub/Sub). - #[serde(default)] - pub project_id: Option, } impl FileProcessingQueueConfig { @@ -808,15 +694,6 @@ impl FileProcessingQueueConfig { if self.queue_name.is_empty() { return Err("Queue name cannot be empty".to_string()); } - match self.backend { - FileProcessingQueueBackend::Sqs if self.region.is_none() => { - return Err("SQS backend requires 'region' to be specified".to_string()); - } - FileProcessingQueueBackend::PubSub if self.project_id.is_none() => { - return Err("Pub/Sub backend requires 'project_id' to be specified".to_string()); - } - _ => {} - } Ok(()) } } @@ -829,18 +706,6 @@ pub enum FileProcessingQueueBackend { /// Redis Streams. /// Good for simple deployments, supports consumer groups. Redis, - - /// RabbitMQ. - /// Full-featured message broker with routing capabilities. - RabbitMq, - - /// AWS SQS. - /// Managed queue service, good for AWS deployments. - Sqs, - - /// Google Cloud Pub/Sub. - /// Managed pub/sub service, good for GCP deployments. - PubSub, } // ───────────────────────────────────────────────────────────────────────────── @@ -1161,201 +1026,6 @@ fn default_file_processing_consumer_group() -> String { "hadrian_workers".to_string() } -// ───────────────────────────────────────────────────────────────────────────── -// Web Search -// ───────────────────────────────────────────────────────────────────────────── - -/// Web search configuration. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))] -#[serde(deny_unknown_fields)] -pub struct WebSearchConfig { - /// Enable web search. - #[serde(default = "default_true")] - pub enabled: bool, - - /// Web search providers. - pub providers: Vec, - - /// Default provider. - #[serde(default)] - pub default_provider: Option, - - /// Maximum results to return. - #[serde(default = "default_max_results")] - pub max_results: usize, -} - -fn default_max_results() -> usize { - 10 -} - -/// Web search provider configuration. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))] -#[serde(tag = "type", rename_all = "snake_case")] -#[serde(deny_unknown_fields)] -pub enum WebSearchProvider { - /// Tavily search API. - Tavily { api_key: String }, - - /// Brave Search API. - Brave { api_key: String }, - - /// Google Custom Search. - Google { - api_key: String, - search_engine_id: String, - }, - - /// Bing Search API. - Bing { api_key: String }, - - /// SerpAPI. - Serp { api_key: String }, -} - -// ───────────────────────────────────────────────────────────────────────────── -// Code Execution -// ───────────────────────────────────────────────────────────────────────────── - -/// Code execution configuration. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))] -#[serde(deny_unknown_fields)] -pub struct CodeExecutionConfig { - /// Enable code execution. - #[serde(default = "default_true")] - pub enabled: bool, - - /// Execution mode. - #[serde(default)] - pub mode: CodeExecutionMode, - - /// Allowed languages. - #[serde(default = "default_languages")] - pub allowed_languages: Vec, - - /// Execution timeout in seconds. - #[serde(default = "default_execution_timeout")] - pub timeout_secs: u64, - - /// Maximum memory in MB. - #[serde(default = "default_max_memory")] - pub max_memory_mb: u64, -} - -fn default_languages() -> Vec { - vec!["python".into(), "javascript".into()] -} - -fn default_execution_timeout() -> u64 { - 30 -} - -fn default_max_memory() -> u64 { - 256 -} - -/// Code execution mode. -#[derive(Debug, Clone, Default, Serialize, Deserialize)] -#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))] -#[serde(rename_all = "snake_case")] -pub enum CodeExecutionMode { - /// Execute in browser via WASM (Python via Pyodide, JS native). - #[default] - Wasm, - - /// Execute on server in sandboxed containers. - Sandboxed { - /// Container runtime. - runtime: ContainerRuntime, - }, - - /// External code execution service. - External { - /// Service URL. - url: String, - /// API key. - api_key: Option, - }, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))] -#[serde(rename_all = "snake_case")] -pub enum ContainerRuntime { - Docker, - Firecracker, - Gvisor, -} - -// ───────────────────────────────────────────────────────────────────────────── -// Moderation -// ───────────────────────────────────────────────────────────────────────────── - -/// Content moderation configuration. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))] -#[serde(deny_unknown_fields)] -pub struct ModerationConfig { - /// Enable moderation. - #[serde(default = "default_true")] - pub enabled: bool, - - /// Check input before sending to provider. - #[serde(default = "default_true")] - pub check_input: bool, - - /// Check output before returning to user. - #[serde(default)] - pub check_output: bool, - - /// Moderation provider. - #[serde(default)] - pub provider: ModerationProvider, - - /// Action to take on flagged content. - #[serde(default)] - pub action: ModerationAction, - - /// Categories to check. - #[serde(default)] - pub categories: Vec, - - /// Threshold for flagging (0.0-1.0). - #[serde(default = "default_moderation_threshold")] - pub threshold: f64, -} - -fn default_moderation_threshold() -> f64 { - 0.8 -} - -#[derive(Debug, Clone, Default, Serialize, Deserialize)] -#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))] -#[serde(rename_all = "snake_case")] -pub enum ModerationProvider { - #[default] - OpenAi, - Custom { - url: String, - }, -} - -#[derive(Debug, Clone, Default, Serialize, Deserialize)] -#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))] -#[serde(rename_all = "snake_case")] -pub enum ModerationAction { - /// Block the request and return an error. - #[default] - Block, - /// Allow but log the flagged content. - Log, - /// Add a warning to the response. - Warn, -} - // ───────────────────────────────────────────────────────────────────────────── // Guardrails // ───────────────────────────────────────────────────────────────────────────── @@ -2072,24 +1742,6 @@ fn default_pii_replacement() -> String { // Caching // ───────────────────────────────────────────────────────────────────────────── -/// Prompt caching configuration (provider-level caching like Anthropic's). -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))] -#[serde(deny_unknown_fields)] -pub struct PromptCachingConfig { - /// Enable prompt caching. - #[serde(default = "default_true")] - pub enabled: bool, - - /// Minimum prompt length to cache (in tokens). - #[serde(default = "default_min_cache_tokens")] - pub min_tokens: u32, -} - -fn default_min_cache_tokens() -> u32 { - 1024 -} - /// Response caching configuration (gateway-level caching). #[derive(Debug, Clone, Serialize, Deserialize)] #[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))] @@ -2396,181 +2048,6 @@ pub struct CacheKeyComponents { pub tools: bool, } -// ───────────────────────────────────────────────────────────────────────────── -// Fallback & Retry -// ───────────────────────────────────────────────────────────────────────────── - -/// Fallback and retry configuration. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))] -#[serde(deny_unknown_fields)] -pub struct FallbackConfig { - /// Enable automatic retries. - #[serde(default = "default_true")] - pub retries_enabled: bool, - - /// Maximum number of retries. - #[serde(default = "default_max_retries")] - pub max_retries: u32, - - /// Initial retry delay in milliseconds. - #[serde(default = "default_retry_delay")] - pub initial_delay_ms: u64, - - /// Maximum retry delay in milliseconds. - #[serde(default = "default_max_retry_delay")] - pub max_delay_ms: u64, - - /// Retry backoff multiplier. - #[serde(default = "default_backoff_multiplier")] - pub backoff_multiplier: f64, - - /// Enable fallback to alternative providers. - #[serde(default)] - pub fallback_enabled: bool, - - /// Fallback provider order. - #[serde(default)] - pub fallback_order: Vec, - - /// Errors that trigger fallback. - #[serde(default = "default_fallback_errors")] - pub fallback_on: Vec, -} - -impl Default for FallbackConfig { - fn default() -> Self { - Self { - retries_enabled: true, - max_retries: default_max_retries(), - initial_delay_ms: default_retry_delay(), - max_delay_ms: default_max_retry_delay(), - backoff_multiplier: default_backoff_multiplier(), - fallback_enabled: false, - fallback_order: vec![], - fallback_on: default_fallback_errors(), - } - } -} - -fn default_max_retries() -> u32 { - 3 -} - -fn default_retry_delay() -> u64 { - 1000 -} - -fn default_max_retry_delay() -> u64 { - 30000 -} - -fn default_backoff_multiplier() -> f64 { - 2.0 -} - -fn default_fallback_errors() -> Vec { - vec![ - FallbackTrigger::RateLimit, - FallbackTrigger::ServerError, - FallbackTrigger::Timeout, - ] -} - -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] -#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))] -#[serde(rename_all = "snake_case")] -pub enum FallbackTrigger { - RateLimit, - ServerError, - Timeout, - Overloaded, - ContextLength, -} - -// ───────────────────────────────────────────────────────────────────────────── -// Load Balancing -// ───────────────────────────────────────────────────────────────────────────── - -/// Load balancing configuration. -#[derive(Debug, Clone, Serialize, Deserialize, Default)] -#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))] -#[serde(deny_unknown_fields)] -pub struct LoadBalancingConfig { - /// Load balancing strategy. - #[serde(default)] - pub strategy: LoadBalanceStrategy, - - /// Health check configuration. - #[serde(default)] - pub health_check: HealthCheckConfig, -} - -/// Load balancing strategy. -#[derive(Debug, Clone, Default, Serialize, Deserialize)] -#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))] -#[serde(rename_all = "snake_case")] -pub enum LoadBalanceStrategy { - /// Round-robin across providers. - #[default] - RoundRobin, - /// Route to least-loaded provider. - LeastConnections, - /// Random selection. - Random, - /// Weighted distribution. - Weighted, - /// Route based on latency. - LatencyBased, - /// Route based on cost. - CostBased, -} - -/// Health check configuration. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))] -#[serde(deny_unknown_fields)] -pub struct HealthCheckConfig { - /// Enable health checks. - #[serde(default = "default_true")] - pub enabled: bool, - - /// Health check interval in seconds. - #[serde(default = "default_health_interval")] - pub interval_secs: u64, - - /// Unhealthy threshold (consecutive failures). - #[serde(default = "default_unhealthy_threshold")] - pub unhealthy_threshold: u32, - - /// Healthy threshold (consecutive successes). - #[serde(default = "default_healthy_threshold")] - pub healthy_threshold: u32, -} - -impl Default for HealthCheckConfig { - fn default() -> Self { - Self { - enabled: true, - interval_secs: default_health_interval(), - unhealthy_threshold: default_unhealthy_threshold(), - healthy_threshold: default_healthy_threshold(), - } - } -} - -fn default_health_interval() -> u64 { - 30 -} - -fn default_unhealthy_threshold() -> u32 { - 3 -} - -fn default_healthy_threshold() -> u32 { - 2 -} - // ───────────────────────────────────────────────────────────────────────────── // Image Fetching // ───────────────────────────────────────────────────────────────────────────── @@ -4192,73 +3669,6 @@ mod tests { assert_eq!(queue.consumer_group, "my_workers"); } - #[test] - fn test_file_processing_config_queue_mode_sqs() { - let config: FileProcessingConfig = toml::from_str( - r#" - mode = "queue" - - [queue] - backend = "sqs" - url = "https://sqs.us-east-1.amazonaws.com/123456789/my-queue" - queue_name = "my-queue" - region = "us-east-1" - "#, - ) - .unwrap(); - - assert_eq!(config.mode, FileProcessingMode::Queue); - assert!(config.validate().is_ok()); - - let queue = config.queue.unwrap(); - assert_eq!(queue.backend, FileProcessingQueueBackend::Sqs); - assert_eq!(queue.region, Some("us-east-1".to_string())); - } - - #[test] - fn test_file_processing_config_queue_mode_pubsub() { - let config: FileProcessingConfig = toml::from_str( - r#" - mode = "queue" - - [queue] - backend = "pub_sub" - url = "https://pubsub.googleapis.com" - queue_name = "file-processing-topic" - project_id = "my-gcp-project" - "#, - ) - .unwrap(); - - assert_eq!(config.mode, FileProcessingMode::Queue); - assert!(config.validate().is_ok()); - - let queue = config.queue.unwrap(); - assert_eq!(queue.backend, FileProcessingQueueBackend::PubSub); - assert_eq!(queue.project_id, Some("my-gcp-project".to_string())); - } - - #[test] - fn test_file_processing_config_queue_mode_rabbitmq() { - let config: FileProcessingConfig = toml::from_str( - r#" - mode = "queue" - - [queue] - backend = "rabbit_mq" - url = "amqp://guest:guest@localhost:5672" - queue_name = "file_processing" - "#, - ) - .unwrap(); - - assert_eq!(config.mode, FileProcessingMode::Queue); - assert!(config.validate().is_ok()); - - let queue = config.queue.unwrap(); - assert_eq!(queue.backend, FileProcessingQueueBackend::RabbitMq); - } - #[test] fn test_file_processing_config_queue_mode_missing_config() { let config: FileProcessingConfig = toml::from_str( @@ -4272,40 +3682,6 @@ mod tests { assert!(config.validate().is_err()); } - #[test] - fn test_file_processing_config_sqs_missing_region() { - let config: FileProcessingConfig = toml::from_str( - r#" - mode = "queue" - - [queue] - backend = "sqs" - url = "https://sqs.us-east-1.amazonaws.com/123456789/my-queue" - queue_name = "my-queue" - "#, - ) - .unwrap(); - - assert!(config.validate().is_err()); - } - - #[test] - fn test_file_processing_config_pubsub_missing_project() { - let config: FileProcessingConfig = toml::from_str( - r#" - mode = "queue" - - [queue] - backend = "pub_sub" - url = "https://pubsub.googleapis.com" - queue_name = "file-processing-topic" - "#, - ) - .unwrap(); - - assert!(config.validate().is_err()); - } - #[test] fn test_file_processing_config_max_size_bytes() { let config = FileProcessingConfig { diff --git a/src/config/limits.rs b/src/config/limits.rs index 6e02efd..ded9e1e 100644 --- a/src/config/limits.rs +++ b/src/config/limits.rs @@ -17,10 +17,6 @@ pub struct LimitsConfig { #[serde(default)] pub budgets: BudgetDefaults, - /// Token limits. - #[serde(default)] - pub tokens: TokenLimitDefaults, - /// Resource limits for entity counts. #[serde(default)] pub resource_limits: ResourceLimits, @@ -225,15 +221,6 @@ pub struct BudgetDefaults { #[serde(default = "default_warning_threshold")] pub warning_threshold: f64, - /// Hard limit action when budget is exceeded. - #[serde(default)] - pub exceeded_action: BudgetExceededAction, - - /// Allow overage up to this percentage above the budget. - /// E.g., 0.1 means 10% overage is allowed. - #[serde(default)] - pub allowed_overage: f64, - /// Estimated cost per request in cents for budget reservation. /// This is reserved before the request is processed to prevent race conditions. /// After the request completes, the actual cost replaces the estimate. @@ -248,8 +235,6 @@ impl Default for BudgetDefaults { monthly_budget_usd: None, daily_budget_usd: None, warning_threshold: default_warning_threshold(), - exceeded_action: BudgetExceededAction::default(), - allowed_overage: 0.0, estimated_cost_cents: default_estimated_cost_cents(), } } @@ -262,54 +247,3 @@ fn default_estimated_cost_cents() -> i64 { fn default_warning_threshold() -> f64 { 0.8 // 80% } - -/// Action to take when budget is exceeded. -#[derive(Debug, Clone, Default, Serialize, Deserialize)] -#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))] -#[serde(rename_all = "snake_case")] -pub enum BudgetExceededAction { - /// Block the request. - #[default] - Block, - /// Allow the request but log a warning. - Warn, - /// Allow but throttle (reduce rate limits). - Throttle, -} - -/// Token limit defaults. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))] -#[serde(deny_unknown_fields)] -pub struct TokenLimitDefaults { - /// Maximum input tokens per request. - #[serde(default)] - pub max_input_tokens: Option, - - /// Maximum output tokens per request. - #[serde(default)] - pub max_output_tokens: Option, - - /// Maximum total tokens per request (input + output). - #[serde(default)] - pub max_total_tokens: Option, - - /// Default max_tokens if not specified in the request. - #[serde(default = "default_max_tokens")] - pub default_max_tokens: u32, -} - -impl Default for TokenLimitDefaults { - fn default() -> Self { - Self { - max_input_tokens: None, - max_output_tokens: None, - max_total_tokens: None, - default_max_tokens: default_max_tokens(), - } - } -} - -fn default_max_tokens() -> u32 { - 4096 -} diff --git a/src/config/observability.rs b/src/config/observability.rs index 171435a..3deb01e 100644 --- a/src/config/observability.rs +++ b/src/config/observability.rs @@ -19,10 +19,6 @@ pub struct ObservabilityConfig { #[serde(default)] pub metrics: MetricsConfig, - /// Request/response logging. - #[serde(default)] - pub request_logging: RequestLoggingConfig, - /// Usage logging configuration. #[serde(default)] pub usage: UsageConfig, @@ -384,7 +380,7 @@ pub struct TracingConfig { } fn default_service_name() -> String { - "ai-gateway".to_string() + "hadrian".to_string() } /// OTLP exporter configuration. @@ -573,102 +569,6 @@ fn default_metrics_path() -> String { "/metrics".to_string() } -// ───────────────────────────────────────────────────────────────────────────── -// Request Logging -// ───────────────────────────────────────────────────────────────────────────── - -/// Request/response logging configuration. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))] -#[serde(deny_unknown_fields)] -pub struct RequestLoggingConfig { - /// Enable request logging. - #[serde(default)] - pub enabled: bool, - - /// Log request bodies. - #[serde(default)] - pub log_request_body: bool, - - /// Log response bodies. - #[serde(default)] - pub log_response_body: bool, - - /// Maximum body size to log (in bytes). - #[serde(default = "default_max_body_log")] - pub max_body_size: usize, - - /// Redact sensitive fields. - #[serde(default = "default_true")] - pub redact_sensitive: bool, - - /// Fields to redact. - #[serde(default = "default_redact_fields")] - pub redact_fields: Vec, - - /// Log to separate destination. - #[serde(default)] - pub destination: Option, -} - -impl Default for RequestLoggingConfig { - fn default() -> Self { - Self { - enabled: false, - log_request_body: false, - log_response_body: false, - max_body_size: default_max_body_log(), - redact_sensitive: true, - redact_fields: default_redact_fields(), - destination: None, - } - } -} - -fn default_max_body_log() -> usize { - 10 * 1024 // 10 KB -} - -fn default_redact_fields() -> Vec { - vec![ - "api_key".into(), - "password".into(), - "secret".into(), - "authorization".into(), - ] -} - -/// Log destination for request logging. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))] -#[serde(tag = "type", rename_all = "snake_case")] -#[serde(deny_unknown_fields)] -pub enum LogDestination { - /// Log to file. - File { - path: String, - #[serde(default)] - rotation: Option, - }, - /// Log to stdout/stderr (same as regular logs). - Stdout, - /// Send to external service. - Http { - url: String, - #[serde(default)] - headers: HashMap, - }, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))] -#[serde(rename_all = "snake_case")] -pub enum LogRotation { - Daily, - Hourly, - Size { max_bytes: usize }, -} - fn default_true() -> bool { true } diff --git a/src/config/providers.rs b/src/config/providers.rs index 0eccba4..4b779de 100644 --- a/src/config/providers.rs +++ b/src/config/providers.rs @@ -66,6 +66,7 @@ pub struct ModelFallback { /// ``` #[derive(Debug, Clone, Default, Serialize, Deserialize)] #[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))] +// Note: cannot use deny_unknown_fields due to #[serde(flatten)] on `pricing` pub struct ModelConfig { /// Pricing fields (flattened inline). #[serde(flatten)] @@ -123,6 +124,7 @@ pub struct ModelConfig { /// to determine which API protocol to use. #[derive(Debug, Clone, Serialize, Deserialize, Default)] #[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))] +// Note: cannot use deny_unknown_fields due to #[serde(flatten)] on `providers` HashMap pub struct ProvidersConfig { /// Default provider name for requests that don't specify one. #[serde(default)] @@ -588,10 +590,6 @@ pub struct OpenAiProviderConfig { #[serde(default)] pub headers: HashMap, - /// Whether this provider supports streaming (default: true). - #[serde(default = "default_true")] - pub supports_streaming: bool, - /// Whether this provider supports function/tool calling. #[serde(default)] pub supports_tools: bool, @@ -660,7 +658,6 @@ impl std::fmt::Debug for OpenAiProviderConfig { .field("allowed_models", &self.allowed_models) .field("model_aliases", &self.model_aliases) .field("headers", &self.headers) - .field("supports_streaming", &self.supports_streaming) .field("supports_tools", &self.supports_tools) .field("supports_vision", &self.supports_vision) .field("models", &self.models) @@ -2780,7 +2777,6 @@ mod tests { allowed_models: vec![], model_aliases: HashMap::new(), headers: HashMap::new(), - supports_streaming: true, supports_tools: false, supports_vision: false, models: HashMap::new(), diff --git a/src/config/secrets.rs b/src/config/secrets.rs index 48f6d7d..24a7be5 100644 --- a/src/config/secrets.rs +++ b/src/config/secrets.rs @@ -42,6 +42,7 @@ impl SecretsConfig { /// Configuration for Vault/OpenBao secrets manager. #[derive(Debug, Clone, Serialize, Deserialize)] #[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))] +// Note: cannot use deny_unknown_fields due to #[serde(flatten)] on `auth` pub struct VaultSecretsConfig { /// Vault server address (e.g., "https://vault.example.com:8200") pub address: String, @@ -153,6 +154,7 @@ fn default_k8s_token_path() -> String { /// Configuration for AWS Secrets Manager. #[derive(Debug, Clone, Serialize, Deserialize)] #[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))] +#[serde(deny_unknown_fields)] pub struct AwsSecretsConfig { /// AWS region (e.g., "us-east-1"). If not set, uses AWS_REGION environment variable. #[serde(default)] @@ -176,6 +178,7 @@ fn default_aws_prefix() -> String { /// Configuration for Azure Key Vault. #[derive(Debug, Clone, Serialize, Deserialize)] #[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))] +#[serde(deny_unknown_fields)] pub struct AzureKeyVaultSecretsConfig { /// Key Vault URL (e.g., "https://myvault.vault.azure.net") pub vault_url: String, @@ -195,6 +198,7 @@ fn default_azure_prefix() -> String { /// Configuration for GCP Secret Manager. #[derive(Debug, Clone, Serialize, Deserialize)] #[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))] +#[serde(deny_unknown_fields)] pub struct GcpSecretsConfig { /// GCP project ID pub project_id: String, diff --git a/src/config/server.rs b/src/config/server.rs index bca41a4..fc8be77 100644 --- a/src/config/server.rs +++ b/src/config/server.rs @@ -18,11 +18,6 @@ pub struct ServerConfig { #[serde(default = "default_port")] pub port: u16, - /// Base path for all API routes (e.g., "/api/v1"). - /// The UI is always served from "/". - #[serde(default)] - pub api_base_path: Option, - /// Request body size limit in bytes. #[serde(default = "default_body_limit")] pub body_limit_bytes: usize, @@ -51,10 +46,6 @@ pub struct ServerConfig { #[serde(default = "default_streaming_idle_timeout")] pub streaming_idle_timeout_secs: u64, - /// Enable HTTP/2 (requires TLS or h2c). - #[serde(default)] - pub http2: bool, - /// TLS configuration. If omitted, serves plain HTTP. /// In production, TLS is typically terminated at the load balancer. #[serde(default)] @@ -100,12 +91,10 @@ impl Default for ServerConfig { Self { host: default_host(), port: default_port(), - api_base_path: None, body_limit_bytes: default_body_limit(), max_response_body_bytes: default_max_response_body(), timeout_secs: default_timeout(), streaming_idle_timeout_secs: default_streaming_idle_timeout(), - http2: false, tls: None, trusted_proxies: TrustedProxiesConfig::default(), cors: CorsConfig::default(), @@ -424,7 +413,8 @@ pub struct SecurityHeadersConfig { pub content_security_policy: Option, /// X-XSS-Protection header value. - /// Legacy header for older browsers. Default: "1; mode=block" + /// Legacy header for older browsers. Disabled by default as CSP provides protection. + /// Enable for legacy browser compatibility. #[serde(default = "default_xss_protection")] pub xss_protection: Option, @@ -488,7 +478,7 @@ fn default_csp() -> Option { } fn default_xss_protection() -> Option { - Some("1; mode=block".to_string()) + None } fn default_referrer_policy() -> Option { diff --git a/src/config/ui.rs b/src/config/ui.rs index d3e07e5..6ed954a 100644 --- a/src/config/ui.rs +++ b/src/config/ui.rs @@ -63,14 +63,6 @@ pub struct AssetsConfig { /// Cache control header for static assets. #[serde(default = "default_cache_control")] pub cache_control: String, - - /// Enable gzip compression. - #[serde(default = "default_true")] - pub gzip: bool, - - /// Enable brotli compression. - #[serde(default = "default_true")] - pub brotli: bool, } impl Default for AssetsConfig { @@ -78,8 +70,6 @@ impl Default for AssetsConfig { Self { source: AssetSource::default(), cache_control: default_cache_control(), - gzip: true, - brotli: true, } } } @@ -122,25 +112,9 @@ pub struct ChatConfig { #[serde(default)] pub available_models: Vec, - /// Enable conversation history. - #[serde(default = "default_true")] - pub history_enabled: bool, - - /// Maximum conversations to store per user. - #[serde(default = "default_max_conversations")] - pub max_conversations: usize, - /// Enable file uploads. #[serde(default)] pub file_uploads: FileUploadConfig, - - /// Enable code execution in chat. - #[serde(default)] - pub code_execution: bool, - - /// Enable web search in chat. - #[serde(default)] - pub web_search: bool, } impl Default for ChatConfig { @@ -149,19 +123,11 @@ impl Default for ChatConfig { enabled: true, default_model: None, available_models: vec![], - history_enabled: true, - max_conversations: default_max_conversations(), file_uploads: FileUploadConfig::default(), - code_execution: false, - web_search: false, } } } -fn default_max_conversations() -> usize { - 100 -} - /// File upload configuration. #[derive(Debug, Clone, Serialize, Deserialize)] #[cfg_attr(feature = "json-schema", derive(schemars::JsonSchema))] @@ -250,11 +216,6 @@ pub struct AdminConfig { /// Path for admin panel. #[serde(default = "default_admin_path")] pub path: String, - - /// Roles that can access the admin panel. - /// If empty, any authenticated user with admin flag can access. - #[serde(default)] - pub allowed_roles: Vec, } impl Default for AdminConfig { @@ -262,7 +223,6 @@ impl Default for AdminConfig { Self { enabled: true, path: default_admin_path(), - allowed_roles: vec![], } } } diff --git a/src/init.rs b/src/init.rs index d3466a3..2f4d719 100644 --- a/src/init.rs +++ b/src/init.rs @@ -219,24 +219,14 @@ pub(crate) async fn init_worker_embedding_service( } }; - let embedding_config = file_search_config - .embedding - .as_ref() - .or_else(|| { - config - .features - .response_caching - .as_ref() - .and_then(|rc| rc.semantic.as_ref()) - .map(|sc| &sc.embedding) - }) - .or_else(|| { - config - .features - .vector_search - .as_ref() - .map(|vs| &vs.embedding) - }); + let embedding_config = file_search_config.embedding.as_ref().or_else(|| { + config + .features + .response_caching + .as_ref() + .and_then(|rc| rc.semantic.as_ref()) + .map(|sc| &sc.embedding) + }); let embedding_config = match embedding_config { Some(cfg) => cfg, diff --git a/src/routing/resolver.rs b/src/routing/resolver.rs index 58f2ec7..4d7e259 100644 --- a/src/routing/resolver.rs +++ b/src/routing/resolver.rs @@ -600,7 +600,6 @@ pub async fn dynamic_provider_to_config( allowed_models: provider.models.clone(), model_aliases: std::collections::HashMap::new(), headers: std::collections::HashMap::new(), - supports_streaming: true, supports_tools: false, supports_vision: false, models: std::collections::HashMap::new(), diff --git a/src/services/document_processor.rs b/src/services/document_processor.rs index 4d8dd6e..6e55bd4 100644 --- a/src/services/document_processor.rs +++ b/src/services/document_processor.rs @@ -130,16 +130,6 @@ pub enum QueueBackend { queue_name: String, consumer_group: String, }, - /// RabbitMQ - RabbitMQ { - url: String, - exchange: String, - queue_name: String, - }, - /// AWS SQS - Sqs { queue_url: String, region: String }, - /// Google Cloud Pub/Sub - PubSub { project_id: String, topic: String }, } /// Configuration for the document processor. @@ -212,19 +202,6 @@ fn convert_queue_config(queue: &FileProcessingQueueConfig) -> QueueBackend { queue_name: queue.queue_name.clone(), consumer_group: queue.consumer_group.clone(), }, - FileProcessingQueueBackend::RabbitMq => QueueBackend::RabbitMQ { - url: queue.url.clone(), - exchange: "hadrian".to_string(), // Default exchange name - queue_name: queue.queue_name.clone(), - }, - FileProcessingQueueBackend::Sqs => QueueBackend::Sqs { - queue_url: queue.url.clone(), - region: queue.region.clone().unwrap_or_default(), - }, - FileProcessingQueueBackend::PubSub => QueueBackend::PubSub { - project_id: queue.project_id.clone().unwrap_or_default(), - topic: queue.queue_name.clone(), - }, } } @@ -1495,21 +1472,6 @@ impl DocumentProcessor { .to_string(), )); } - Some(QueueBackend::RabbitMQ { url, .. }) => { - return Err(DocumentProcessorError::Configuration(format!( - "RabbitMQ queue publishing not yet implemented (url: {url})" - ))); - } - Some(QueueBackend::Sqs { queue_url, .. }) => { - return Err(DocumentProcessorError::Configuration(format!( - "SQS queue publishing not yet implemented (queue: {queue_url})" - ))); - } - Some(QueueBackend::PubSub { topic, .. }) => { - return Err(DocumentProcessorError::Configuration(format!( - "Pub/Sub queue publishing not yet implemented (topic: {topic})" - ))); - } None => { error!("Queue mode enabled but no queue backend configured"); otel_span_error!("Queue backend not configured"); @@ -1619,15 +1581,6 @@ pub async fn start_file_processing_worker( "Redis queue configured but the 'redis' feature is not enabled. Rebuild with: cargo build --features redis" ); } - QueueBackend::RabbitMQ { .. } => { - tracing::error!("RabbitMQ worker not yet implemented"); - } - QueueBackend::Sqs { .. } => { - tracing::error!("SQS worker not yet implemented"); - } - QueueBackend::PubSub { .. } => { - tracing::error!("Pub/Sub worker not yet implemented"); - } } } diff --git a/src/tests/provider_e2e.rs b/src/tests/provider_e2e.rs index 8161cc2..3bdaa58 100644 --- a/src/tests/provider_e2e.rs +++ b/src/tests/provider_e2e.rs @@ -822,7 +822,6 @@ type = "{}" base_url = "{}" api_key = "test-api-key" timeout_secs = 30 -supports_streaming = true supports_tools = true supports_vision = true @@ -2459,7 +2458,6 @@ type = "open_ai" base_url = "{}" api_key = "test-api-key" timeout_secs = 30 -supports_streaming = true supports_tools = true # Circuit breaker configuration