From d0612eff9269259d579fdd1f359bf9e55c59e184 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Wed, 4 Mar 2026 21:31:33 +1000 Subject: [PATCH] Add config builder component --- .../ConfigBuilder/ConfigBuilder.tsx | 335 + docs/components/ConfigBuilder/TomlPreview.tsx | 92 + docs/components/ConfigBuilder/fields.tsx | 561 + docs/components/ConfigBuilder/schema-utils.ts | 191 + docs/components/ConfigBuilder/toml-utils.ts | 93 + docs/content/docs/configuration/builder.mdx | 9 + docs/content/docs/configuration/index.mdx | 1 + docs/content/docs/configuration/meta.json | 2 +- docs/mdx-components.tsx | 2 + docs/package.json | 6 +- docs/pnpm-lock.yaml | 45 + docs/public/config-schema.json | 9408 +++++++++++++++++ scripts/generate-schema.sh | 9 + 13 files changed, 10751 insertions(+), 3 deletions(-) create mode 100644 docs/components/ConfigBuilder/ConfigBuilder.tsx create mode 100644 docs/components/ConfigBuilder/TomlPreview.tsx create mode 100644 docs/components/ConfigBuilder/fields.tsx create mode 100644 docs/components/ConfigBuilder/schema-utils.ts create mode 100644 docs/components/ConfigBuilder/toml-utils.ts create mode 100644 docs/content/docs/configuration/builder.mdx create mode 100644 docs/public/config-schema.json create mode 100755 scripts/generate-schema.sh diff --git a/docs/components/ConfigBuilder/ConfigBuilder.tsx b/docs/components/ConfigBuilder/ConfigBuilder.tsx new file mode 100644 index 0000000..c70b48c --- /dev/null +++ b/docs/components/ConfigBuilder/ConfigBuilder.tsx @@ -0,0 +1,335 @@ +"use client"; + +import { RotateCcw } from "lucide-react"; +import { useCallback, useMemo, useReducer, useState } from "react"; +import { type FieldProps, ObjectField, SchemaField, TaggedUnionField } from "./fields"; +import { + CONFIG_SECTIONS, + type JsonSchema, + SECTION_LABELS, + fullyResolve, + getPrimaryType, + isTaggedUnion, +} from "./schema-utils"; +import { generateToml } from "./toml-utils"; +import { TomlPreview } from "./TomlPreview"; +/* eslint-disable @typescript-eslint/no-explicit-any */ + +import configSchemaRaw from "../../public/config-schema.json"; + +const configSchema = configSchemaRaw as unknown as JsonSchema; + +// ── Default starter config ───────────────────────────────────────────────── + +const DEFAULT_STATE: Record = { + server: { host: "0.0.0.0", port: 8080 }, + database: { type: "sqlite", path: "./hadrian.db" }, + cache: { type: "memory" }, + providers: { + default_provider: "ollama", + ollama: { type: "open_ai", base_url: "http://localhost:11434/v1" }, + }, + ui: { enabled: true, chat: { enabled: true }, admin: { enabled: true } }, + docs: { enabled: true }, +}; + +// ── State management ─────────────────────────────────────────────────────── + +type Action = + | { type: "SET_VALUE"; section: string; value: any } + | { type: "RESET_SECTION"; section: string } + | { type: "RESET" }; + +function reducer(state: Record, action: Action): Record { + switch (action.type) { + case "SET_VALUE": { + const next = { ...state }; + if (action.value === undefined || action.value === null) { + delete next[action.section]; + } else { + next[action.section] = action.value; + } + return next; + } + case "RESET_SECTION": { + const next = { ...state }; + delete next[action.section]; + return next; + } + case "RESET": + return DEFAULT_STATE; + } +} + +// ── Providers section (special handling) ─────────────────────────────────── + +const providerConfigDef = configSchema.definitions?.["ProviderConfig"]; + +function ProvidersSection({ + value, + onChange, + rootSchema, +}: { + value: any; + onChange: (v: any) => void; + rootSchema: JsonSchema; +}) { + const [newName, setNewName] = useState(""); + const current = useMemo(() => value ?? {}, [value]); + + const addProvider = useCallback(() => { + const name = newName.trim().toLowerCase().replace(/\s+/g, "_"); + if (!name || name in current) return; + onChange({ ...current, [name]: { type: "open_ai" } }); + setNewName(""); + }, [newName, current, onChange]); + + const removeProvider = useCallback( + (name: string) => { + const next = { ...current }; + delete next[name]; + onChange(Object.keys(next).length > 0 ? next : undefined); + }, + [current, onChange] + ); + + const updateProvider = useCallback( + (name: string, val: any) => { + onChange({ ...current, [name]: val }); + }, + [current, onChange] + ); + + const updateDefaultProvider = useCallback( + (val: any) => { + onChange({ ...current, default_provider: val || undefined }); + }, + [current, onChange] + ); + + // Separate default_provider from named providers + const { default_provider, ...providers } = current; + const providerEntries = Object.entries(providers); + + return ( +
+
+ +

+ Default provider name for requests that don't specify one. +

+ updateDefaultProvider(e.target.value)} + placeholder="e.g. openai" + aria-label="Default provider" + /> +
+ +
+

Providers

+
+ setNewName(e.target.value)} + placeholder="Provider name (e.g. openai, anthropic)" + onKeyDown={(e) => e.key === "Enter" && addProvider()} + aria-label="New provider name" + /> + +
+ + {providerEntries.map(([name, val]) => ( +
+
+ + [providers.{name}] + + +
+ {providerConfigDef ? ( + updateProvider(name, v)} + path={`providers.${name}`} + rootSchema={rootSchema} + /> + ) : ( +

Provider schema not available

+ )} +
+ ))} +
+
+ ); +} + +// ── Section Form (dispatches to correct field type) ──────────────────────── + +function SectionForm(props: Omit & { description?: string }) { + const { schema, rootSchema } = props; + const resolved = fullyResolve(schema, rootSchema); + + // Tagged unions (database, cache, secrets, etc.) → SchemaField handles them + if (isTaggedUnion(resolved)) { + return ; + } + + // Regular objects → ObjectField with collapsible=false for top-level sections + if (getPrimaryType(resolved) === "object" && resolved.properties) { + return ; + } + + // Fallback + return ; +} + +// ── Main Component ───────────────────────────────────────────────────────── + +export function ConfigBuilder() { + const [state, dispatch] = useReducer(reducer, DEFAULT_STATE); + const [activeTab, setActiveTab] = useState(CONFIG_SECTIONS[0]); + + const rootSchema = configSchema; + + const toml = useMemo(() => generateToml(state, rootSchema), [state, rootSchema]); + + const handleSectionChange = useCallback((section: string, value: any) => { + dispatch({ type: "SET_VALUE", section, value }); + }, []); + + const handleResetSection = useCallback((section: string) => { + dispatch({ type: "RESET_SECTION", section }); + }, []); + + const handleReset = useCallback(() => { + dispatch({ type: "RESET" }); + }, []); + + // Resolve schema for the active section + const sectionSchema = useMemo(() => { + const prop = rootSchema.properties?.[activeTab]; + if (!prop) return null; + return fullyResolve(prop, rootSchema); + }, [rootSchema, activeTab]); + + const filledSections = useMemo(() => { + const filled = new Set(); + for (const section of CONFIG_SECTIONS) { + if (state[section] && Object.keys(state[section]).length > 0) { + filled.add(section); + } + } + return filled; + }, [state]); + + return ( +
+ {/* Tab bar */} +
+
+ {CONFIG_SECTIONS.map((section) => ( + + ))} +
+ +
+ + {/* Content: form + preview */} +
+ {/* Form panel */} +
+
+
+

+ {SECTION_LABELS[activeTab] ?? activeTab} +

+ {sectionSchema?.description && ( +

+ {sectionSchema.description} +

+ )} +
+ {filledSections.has(activeTab) && ( + + )} +
+ {activeTab === "providers" ? ( + handleSectionChange("providers", v)} + rootSchema={rootSchema} + /> + ) : sectionSchema ? ( + handleSectionChange(activeTab, v)} + path={activeTab} + rootSchema={rootSchema} + /> + ) : ( +

+ No schema available for this section. +

+ )} +
+ + {/* Preview panel */} +
+ +
+
+
+ ); +} diff --git a/docs/components/ConfigBuilder/TomlPreview.tsx b/docs/components/ConfigBuilder/TomlPreview.tsx new file mode 100644 index 0000000..9772702 --- /dev/null +++ b/docs/components/ConfigBuilder/TomlPreview.tsx @@ -0,0 +1,92 @@ +"use client"; + +import { Check, Copy, Download } from "lucide-react"; +import { useCallback, useEffect, useRef, useState } from "react"; +import { type Highlighter, createHighlighter } from "shiki"; +import { useTheme } from "next-themes"; + +let highlighterPromise: Promise | null = null; + +function getHighlighter() { + if (!highlighterPromise) { + highlighterPromise = createHighlighter({ + themes: ["github-light", "github-dark"], + langs: ["toml"], + }); + } + return highlighterPromise; +} + +export function TomlPreview({ toml }: { toml: string }) { + const [html, setHtml] = useState(""); + const [copied, setCopied] = useState(false); + const debounceRef = useRef>(undefined); + const { resolvedTheme } = useTheme(); + const theme = resolvedTheme === "dark" ? "github-dark" : "github-light"; + + useEffect(() => { + clearTimeout(debounceRef.current); + debounceRef.current = setTimeout(() => { + getHighlighter().then((hl) => { + setHtml(hl.codeToHtml(toml, { lang: "toml", theme })); + }); + }, 150); + return () => clearTimeout(debounceRef.current); + }, [toml, theme]); + + const handleCopy = useCallback(async () => { + await navigator.clipboard.writeText(toml); + setCopied(true); + setTimeout(() => setCopied(false), 2000); + }, [toml]); + + const handleDownload = useCallback(() => { + const blob = new Blob([toml], { type: "application/toml" }); + const url = URL.createObjectURL(blob); + const a = document.createElement("a"); + a.href = url; + a.download = "hadrian.toml"; + a.click(); + URL.revokeObjectURL(url); + }, [toml]); + + return ( +
+
+ hadrian.toml +
+ + +
+
+ {html ? ( +
+ ) : ( +
+
+            {toml}
+          
+
+ )} +
+ ); +} diff --git a/docs/components/ConfigBuilder/fields.tsx b/docs/components/ConfigBuilder/fields.tsx new file mode 100644 index 0000000..204a35b --- /dev/null +++ b/docs/components/ConfigBuilder/fields.tsx @@ -0,0 +1,561 @@ +"use client"; + +import { ChevronDown, ChevronRight, Plus, Trash2 } from "lucide-react"; +import { useCallback, useId, useMemo, useState } from "react"; +import type { JsonSchema } from "./schema-utils"; +import { + fieldLabel, + fullyResolve, + getPrimaryType, + getVariants, + isSensitiveField, + isTaggedUnion, +} from "./schema-utils"; + +/* eslint-disable @typescript-eslint/no-explicit-any */ + +export interface FieldProps { + schema: JsonSchema; + value: any; + onChange: (value: any) => void; + path: string; + rootSchema: JsonSchema; + label?: string; +} + +// ── Shared styles ────────────────────────────────────────────────────────── + +const inputClass = + "w-full rounded-md border border-fd-border bg-fd-card px-3 py-1.5 text-sm text-fd-foreground placeholder:text-fd-muted-foreground/50 focus:border-fd-primary focus:outline-none focus:ring-1 focus:ring-fd-primary"; + +const selectClass = + "w-full rounded-md border border-fd-border bg-fd-card px-3 py-1.5 text-sm text-fd-foreground focus:border-fd-primary focus:outline-none focus:ring-1 focus:ring-fd-primary"; + +// ── String Field ─────────────────────────────────────────────────────────── + +export function StringField({ schema, value, onChange, path, label }: FieldProps) { + const id = useId(); + const sensitive = isSensitiveField(path.split(".").pop() ?? ""); + return ( +
+ + {schema.description && ( +

{schema.description}

+ )} + onChange(e.target.value || undefined)} + placeholder={schema.default != null ? String(schema.default) : undefined} + /> +
+ ); +} + +// ── Number Field ─────────────────────────────────────────────────────────── + +export function NumberField({ schema, value, onChange, path, label }: FieldProps) { + const id = useId(); + return ( +
+ + {schema.description && ( +

{schema.description}

+ )} + onChange(e.target.value === "" ? undefined : Number(e.target.value))} + placeholder={schema.default != null ? String(schema.default) : undefined} + min={schema.minimum} + max={schema.maximum} + /> +
+ ); +} + +// ── Boolean Field ────────────────────────────────────────────────────────── + +export function BooleanField({ schema, value, onChange, path, label }: FieldProps) { + const id = useId(); + const checked = value === true; + return ( +
+ +
+ + {schema.description && ( +

{schema.description}

+ )} +
+
+ ); +} + +// ── Enum Field ───────────────────────────────────────────────────────────── + +export function EnumField({ schema, value, onChange, path, label }: FieldProps) { + const id = useId(); + const options = schema.enum ?? []; + return ( +
+ + {schema.description && ( +

{schema.description}

+ )} + +
+ ); +} + +// ── Tagged Union Field ───────────────────────────────────────────────────── + +export function TaggedUnionField({ + schema, + value, + onChange, + path, + rootSchema, + label, + hideLabel, +}: FieldProps & { hideLabel?: boolean }) { + const variants = getVariants(schema); + const currentType = value?.type ?? ""; + + const activeVariant = variants.find((v) => v.tag === currentType); + + const handleTypeChange = useCallback( + (tag: string) => { + if (tag === currentType) return; + // Reset to just the type when switching variants + onChange(tag ? { type: tag } : undefined); + }, + [currentType, onChange] + ); + + const handleFieldChange = useCallback( + (fieldName: string, fieldValue: any) => { + const next = { ...(value ?? {}), type: currentType }; + if (fieldValue === undefined || fieldValue === null || fieldValue === "") { + delete next[fieldName]; + } else { + next[fieldName] = fieldValue; + } + onChange(next); + }, + [value, currentType, onChange] + ); + + // Get non-type properties for the active variant + const variantProps = activeVariant + ? Object.entries(activeVariant.schema.properties ?? {}).filter(([k]) => k !== "type") + : []; + + return ( +
+ {!hideLabel && ( +

+ {label ?? fieldLabel(path.split(".").pop() ?? "")} +

+ )} + {!hideLabel && schema.description && ( +

{schema.description}

+ )} +
+ {variants.map((v) => ( + + ))} +
+ {activeVariant?.description && ( +

{activeVariant.description}

+ )} + {variantProps.length > 0 && ( +
+ {variantProps.map(([propName, propSchema]) => { + const resolved = fullyResolve(propSchema, rootSchema); + return ( + handleFieldChange(propName, v)} + path={`${path}.${propName}`} + rootSchema={rootSchema} + /> + ); + })} +
+ )} +
+ ); +} + +// ── Array Field ──────────────────────────────────────────────────────────── + +export function ArrayField({ schema, value, onChange, path, rootSchema, label }: FieldProps) { + const items: any[] = useMemo(() => (Array.isArray(value) ? value : []), [value]); + const itemSchema = schema.items ? fullyResolve(schema.items, rootSchema) : { type: "string" }; + const isScalar = + getPrimaryType(itemSchema) === "string" || + getPrimaryType(itemSchema) === "number" || + getPrimaryType(itemSchema) === "integer"; + + const addItem = useCallback(() => { + onChange([...items, isScalar ? "" : {}]); + }, [items, isScalar, onChange]); + + const removeItem = useCallback( + (index: number) => { + const next = items.filter((_, i) => i !== index); + onChange(next.length > 0 ? next : undefined); + }, + [items, onChange] + ); + + const updateItem = useCallback( + (index: number, val: any) => { + const next = [...items]; + next[index] = val; + onChange(next); + }, + [items, onChange] + ); + + return ( +
+
+

+ {label ?? fieldLabel(path.split(".").pop() ?? "")} +

+ +
+ {schema.description && ( +

{schema.description}

+ )} + {items.map((item, i) => ( +
+
+ {isScalar ? ( + updateItem(i, e.target.value || undefined)} + placeholder={`Item ${i + 1}`} + aria-label={`${path.split(".").pop()} item ${i + 1}`} + /> + ) : ( + updateItem(i, v)} + path={`${path}[${i}]`} + rootSchema={rootSchema} + label={`Item ${i + 1}`} + /> + )} +
+ +
+ ))} +
+ ); +} + +// ── Map Field ────────────────────────────────────────────────────────────── + +export function MapField({ schema, value, onChange, path, rootSchema, label }: FieldProps) { + const entries = value && typeof value === "object" ? Object.entries(value) : []; + const valueSchema = + typeof schema.additionalProperties === "object" + ? fullyResolve(schema.additionalProperties, rootSchema) + : { type: "string" as const }; + const [newKey, setNewKey] = useState(""); + + const addEntry = useCallback(() => { + const key = newKey.trim(); + if (!key || (value && key in value)) return; + onChange({ ...(value ?? {}), [key]: {} }); + setNewKey(""); + }, [newKey, value, onChange]); + + const removeEntry = useCallback( + (key: string) => { + const next = { ...(value ?? {}) }; + delete next[key]; + onChange(Object.keys(next).length > 0 ? next : undefined); + }, + [value, onChange] + ); + + const updateEntry = useCallback( + (key: string, val: any) => { + onChange({ ...(value ?? {}), [key]: val }); + }, + [value, onChange] + ); + + return ( +
+

+ {label ?? fieldLabel(path.split(".").pop() ?? "")} +

+ {schema.description && ( +

{schema.description}

+ )} +
+ setNewKey(e.target.value)} + placeholder="Entry name" + onKeyDown={(e) => e.key === "Enter" && addEntry()} + aria-label={`New ${path.split(".").pop()} entry name`} + /> + +
+ {entries.map(([key, val]) => ( +
+
+ {key} + +
+ updateEntry(key, v)} + path={`${path}.${key}`} + rootSchema={rootSchema} + /> +
+ ))} +
+ ); +} + +// ── Object Field ─────────────────────────────────────────────────────────── + +export function ObjectField({ + schema, + value, + onChange, + path, + rootSchema, + label, + collapsible = true, + hideLabel, +}: FieldProps & { collapsible?: boolean; hideLabel?: boolean }) { + const [open, setOpen] = useState(!collapsible); + const properties = schema.properties ?? {}; + const propEntries = Object.entries(properties); + + const handleFieldChange = useCallback( + (fieldName: string, fieldValue: any) => { + const next = { ...(value ?? {}) }; + if (fieldValue === undefined || fieldValue === null) { + delete next[fieldName]; + } else { + next[fieldName] = fieldValue; + } + const hasValues = Object.keys(next).length > 0; + onChange(hasValues ? next : undefined); + }, + [value, onChange] + ); + + if (propEntries.length === 0) return null; + + const showHeader = !hideLabel; + + const header = showHeader ? ( +
+ {collapsible && + (open ? ( + + ) : ( + + ))} + + {label ?? fieldLabel(path.split(".").pop() ?? "")} + +
+ ) : null; + + return ( +
+ {showHeader && + (collapsible ? ( + + ) : ( + <> + {header} + {schema.description && ( +

{schema.description}

+ )} + + ))} + {open && ( +
+ {propEntries.map(([propName, propSchema]) => { + const resolved = fullyResolve(propSchema, rootSchema); + return ( + handleFieldChange(propName, v)} + path={`${path}.${propName}`} + rootSchema={rootSchema} + /> + ); + })} +
+ )} +
+ ); +} + +// ── Schema Field (dispatcher) ────────────────────────────────────────────── + +export function SchemaField(props: FieldProps) { + const { schema, rootSchema } = props; + const resolved = fullyResolve(schema, rootSchema); + const type = getPrimaryType(resolved); + + // Tagged union (oneOf with type discriminator) + if (isTaggedUnion(resolved)) { + return ; + } + + // Object with only additionalProperties (map) + if ( + type === "object" && + resolved.additionalProperties && + typeof resolved.additionalProperties === "object" && + Object.keys(resolved.properties ?? {}).length === 0 + ) { + return ; + } + + // Object with properties + if (type === "object" && resolved.properties) { + return ; + } + + // String enum + if ((type === "string" || !type) && resolved.enum) { + return ; + } + + // String + if (type === "string") { + return ; + } + + // Number / integer + if (type === "integer" || type === "number") { + return ; + } + + // Boolean + if (type === "boolean") { + return ; + } + + // Array + if (type === "array") { + return ; + } + + // Fallback: render as string input + return ; +} diff --git a/docs/components/ConfigBuilder/schema-utils.ts b/docs/components/ConfigBuilder/schema-utils.ts new file mode 100644 index 0000000..54d91ae --- /dev/null +++ b/docs/components/ConfigBuilder/schema-utils.ts @@ -0,0 +1,191 @@ +/* eslint-disable @typescript-eslint/no-explicit-any */ + +export interface JsonSchema { + $ref?: string; + $schema?: string; + title?: string; + description?: string; + type?: string | string[]; + properties?: Record; + additionalProperties?: JsonSchema | boolean; + required?: string[]; + default?: any; + enum?: any[]; + const?: any; + oneOf?: JsonSchema[]; + anyOf?: JsonSchema[]; + allOf?: JsonSchema[]; + items?: JsonSchema; + definitions?: Record; + format?: string; + minimum?: number; + maximum?: number; + nullable?: boolean; +} + +export function resolveRef(schema: JsonSchema, root: JsonSchema): JsonSchema { + if (!schema.$ref) return schema; + const path = schema.$ref.replace(/^#\//, "").split("/"); + let current: any = root; + for (const segment of path) { + current = current?.[segment]; + } + return (current as JsonSchema) ?? schema; +} + +export function mergeAllOf(schema: JsonSchema, root: JsonSchema): JsonSchema { + if (!schema.allOf) return schema; + const merged: JsonSchema = { ...schema }; + delete merged.allOf; + for (const sub of schema.allOf) { + const resolved = resolveRef(sub, root); + if (resolved.properties) { + merged.properties = { ...merged.properties, ...resolved.properties }; + } + if (resolved.required) { + merged.required = [...(merged.required ?? []), ...resolved.required]; + } + if (resolved.additionalProperties !== undefined) { + merged.additionalProperties = resolved.additionalProperties; + } + if (resolved.description && !merged.description) { + merged.description = resolved.description; + } + if (resolved.type && !merged.type) { + merged.type = resolved.type; + } + if (resolved.oneOf && !merged.oneOf) { + merged.oneOf = resolved.oneOf; + } + } + return merged; +} + +/** Fully resolve a schema: follow $ref, merge allOf, unwrap nullable anyOf */ +export function fullyResolve(schema: JsonSchema, root: JsonSchema): JsonSchema { + let s = resolveRef(schema, root); + s = mergeAllOf(s, root); + + // Unwrap nullable anyOf: [{ $ref: ... }, { type: "null" }] + if (s.anyOf && s.anyOf.length === 2) { + const nonNull = s.anyOf.find( + (v) => !(v.type === "null" || (Array.isArray(v.type) && v.type.includes("null"))) + ); + if (nonNull) { + const resolved = fullyResolve(nonNull, root); + return { + ...resolved, + description: s.description ?? resolved.description, + default: s.default ?? resolved.default, + nullable: true, + }; + } + } + + return s; +} + +/** Detect tagged union: oneOf where each variant has properties.type with enum/const */ +export function isTaggedUnion(schema: JsonSchema): boolean { + if (!schema.oneOf || schema.oneOf.length < 2) return false; + return schema.oneOf.every((variant) => { + const typeField = variant.properties?.type; + return typeField && (typeField.enum || typeField.const !== undefined); + }); +} + +/** Extract variant tag values and their sub-schemas from a tagged union */ +export function getVariants( + schema: JsonSchema +): { tag: string; schema: JsonSchema; description?: string }[] { + if (!schema.oneOf) return []; + return schema.oneOf.map((variant) => { + const typeField = variant.properties?.type; + const tag = typeField?.const ?? typeField?.enum?.[0] ?? "unknown"; + return { tag, schema: variant, description: variant.description }; + }); +} + +const SENSITIVE_PATTERNS = /(?:api_key|password|secret|token|credential|private_key)/i; + +export function isSensitiveField(name: string): boolean { + return SENSITIVE_PATTERNS.test(name); +} + +/** Get the primary scalar type from a schema, handling nullable type arrays */ +export function getPrimaryType(schema: JsonSchema): string | undefined { + if (typeof schema.type === "string") return schema.type; + if (Array.isArray(schema.type)) { + return schema.type.find((t) => t !== "null") ?? schema.type[0]; + } + return undefined; +} + +/** Check if a schema represents a map (object with additionalProperties but few/no fixed properties) */ +export function isMapSchema(schema: JsonSchema): boolean { + if (getPrimaryType(schema) !== "object") return false; + if (!schema.additionalProperties || schema.additionalProperties === true) return false; + const propCount = Object.keys(schema.properties ?? {}).length; + return propCount === 0; +} + +/** Human-readable label from a snake_case field name */ +export function fieldLabel(name: string): string { + return name + .replace(/_/g, " ") + .replace(/\b\w/g, (c) => c.toUpperCase()) + .replace(/\bUrl\b/g, "URL") + .replace(/\bApi\b/g, "API") + .replace(/\bIp\b/g, "IP") + .replace(/\bTtl\b/g, "TTL") + .replace(/\bSso\b/g, "SSO") + .replace(/\bMs\b/g, "ms") + .replace(/\bSecs\b/g, "Seconds") + .replace(/\bMb\b/g, "MB") + .replace(/\bOcr\b/g, "OCR") + .replace(/\bDpi\b/g, "DPI") + .replace(/\bSsl\b/g, "SSL") + .replace(/\bTls\b/g, "TLS") + .replace(/\bHsts\b/g, "HSTS") + .replace(/\bOtlp\b/g, "OTLP") + .replace(/\bCel\b/g, "CEL") + .replace(/\bRbac\b/g, "RBAC") + .replace(/\bOidc\b/g, "OIDC") + .replace(/\bSaml\b/g, "SAML") + .replace(/\bDlq\b/g, "DLQ"); +} + +/** Top-level config sections in the desired tab order */ +export const CONFIG_SECTIONS = [ + "server", + "database", + "cache", + "providers", + "auth", + "features", + "limits", + "observability", + "pricing", + "ui", + "docs", + "secrets", + "storage", + "retention", +] as const; + +export const SECTION_LABELS: Record = { + server: "Server", + database: "Database", + cache: "Cache", + providers: "Providers", + auth: "Auth", + features: "Features", + limits: "Limits", + observability: "Observability", + pricing: "Pricing", + ui: "UI", + docs: "Docs", + secrets: "Secrets", + storage: "Storage", + retention: "Retention", +}; diff --git a/docs/components/ConfigBuilder/toml-utils.ts b/docs/components/ConfigBuilder/toml-utils.ts new file mode 100644 index 0000000..b853a22 --- /dev/null +++ b/docs/components/ConfigBuilder/toml-utils.ts @@ -0,0 +1,93 @@ +/* eslint-disable @typescript-eslint/no-explicit-any */ +import { stringify } from "smol-toml"; +import { type JsonSchema, fullyResolve, getPrimaryType, isTaggedUnion } from "./schema-utils"; + +/** Recursively remove null, undefined, empty strings, and empty objects/arrays */ +export function stripEmpty(obj: any): any { + if (obj === null || obj === undefined || obj === "") return undefined; + if (Array.isArray(obj)) { + const filtered = obj.map(stripEmpty).filter((v) => v !== undefined); + return filtered.length > 0 ? filtered : undefined; + } + if (typeof obj === "object") { + const result: Record = {}; + let hasKeys = false; + for (const [k, v] of Object.entries(obj)) { + const stripped = stripEmpty(v); + if (stripped !== undefined) { + result[k] = stripped; + hasKeys = true; + } + } + return hasKeys ? result : undefined; + } + return obj; +} + +/** Coerce string form values to their schema types (numbers, booleans) */ +export function coerceTypes(obj: any, schema: JsonSchema | undefined, root: JsonSchema): any { + if (obj === null || obj === undefined || !schema) return obj; + + const resolved = fullyResolve(schema, root); + const type = getPrimaryType(resolved); + + if (typeof obj === "string") { + if (type === "integer" || type === "number") { + const n = Number(obj); + return isNaN(n) ? obj : n; + } + if (type === "boolean") { + return obj === "true"; + } + return obj; + } + + if (Array.isArray(obj)) { + return obj.map((item) => coerceTypes(item, resolved.items, root)); + } + + if (typeof obj === "object") { + const result: Record = {}; + + // For tagged unions, find the active variant schema + if (isTaggedUnion(resolved) && obj.type) { + const variant = resolved.oneOf?.find((v) => { + const t = v.properties?.type; + return t?.const === obj.type || t?.enum?.[0] === obj.type; + }); + if (variant) { + for (const [k, v] of Object.entries(obj)) { + const propSchema = variant.properties?.[k]; + result[k] = coerceTypes(v, propSchema, root); + } + return result; + } + } + + for (const [k, v] of Object.entries(obj)) { + const propSchema = + resolved.properties?.[k] ?? + (typeof resolved.additionalProperties === "object" + ? resolved.additionalProperties + : undefined); + result[k] = coerceTypes(v, propSchema, root); + } + return result; + } + + return obj; +} + +/** Generate TOML string from the config state */ +export function generateToml(state: Record, schema: JsonSchema): string { + const stripped = stripEmpty(state); + if (!stripped || Object.keys(stripped).length === 0) { + return "# Empty configuration — select a section and fill in fields"; + } + const coerced = coerceTypes(stripped, { type: "object", properties: schema.properties }, schema); + try { + return stringify(coerced); + } catch { + return "# Error: unable to generate valid TOML from current values"; + } +} diff --git a/docs/content/docs/configuration/builder.mdx b/docs/content/docs/configuration/builder.mdx new file mode 100644 index 0000000..dc07143 --- /dev/null +++ b/docs/content/docs/configuration/builder.mdx @@ -0,0 +1,9 @@ +--- +title: Config Builder +description: Interactive configuration builder for hadrian.toml +full: true +--- + +Build your `hadrian.toml` configuration file interactively. Select a section, fill in the fields, and copy or download the generated TOML. + + diff --git a/docs/content/docs/configuration/index.mdx b/docs/content/docs/configuration/index.mdx index 0bca383..49853f8 100644 --- a/docs/content/docs/configuration/index.mdx +++ b/docs/content/docs/configuration/index.mdx @@ -24,6 +24,7 @@ api_key = "${OPENAI_API_KEY}" ## Configuration Sections + diff --git a/docs/content/docs/configuration/meta.json b/docs/content/docs/configuration/meta.json index 9731d8a..23d80be 100644 --- a/docs/content/docs/configuration/meta.json +++ b/docs/content/docs/configuration/meta.json @@ -1,4 +1,4 @@ { "title": "Configuration", - "pages": ["index", "server", "database", "providers", "auth", "features", "observability"] + "pages": ["index", "builder", "server", "database", "providers", "auth", "features", "observability"] } diff --git a/docs/mdx-components.tsx b/docs/mdx-components.tsx index 064acd0..9c75236 100644 --- a/docs/mdx-components.tsx +++ b/docs/mdx-components.tsx @@ -4,6 +4,7 @@ import { APIPage } from "@/components/api-page"; import { StoryEmbed } from "@/components/story-embed"; import { Mermaid } from "@/components/mdx/mermaid"; import { QuickStartSelector } from "@/components/quick-start-selector"; +import { ConfigBuilder } from "@/components/ConfigBuilder/ConfigBuilder"; export function getMDXComponents(components?: MDXComponents): MDXComponents { return { @@ -12,6 +13,7 @@ export function getMDXComponents(components?: MDXComponents): MDXComponents { StoryEmbed, Mermaid, QuickStartSelector, + ConfigBuilder, ...components, }; } diff --git a/docs/package.json b/docs/package.json index 06079ee..a4c8d97 100644 --- a/docs/package.json +++ b/docs/package.json @@ -13,7 +13,8 @@ "lint:fix": "eslint . --fix", "format": "prettier --write \"**/*.{ts,tsx,js,mjs,css,mdx}\"", "format:check": "prettier --check \"**/*.{ts,tsx,js,mjs,css,mdx}\"", - "generate:openapi": "tsx scripts/generate-openapi-docs.ts" + "generate:openapi": "tsx scripts/generate-openapi-docs.ts", + "generate:schema": "cd .. && ./scripts/generate-schema.sh" }, "dependencies": { "@orama/orama": "^3.1.18", @@ -27,7 +28,8 @@ "next-themes": "^0.4.6", "react": "^19.2.3", "react-dom": "^19.2.3", - "shiki": "^3.20.0" + "shiki": "^3.20.0", + "smol-toml": "^1.6.0" }, "devDependencies": { "@tailwindcss/postcss": "^4.1.18", diff --git a/docs/pnpm-lock.yaml b/docs/pnpm-lock.yaml index c5107fe..420be29 100644 --- a/docs/pnpm-lock.yaml +++ b/docs/pnpm-lock.yaml @@ -44,6 +44,9 @@ importers: shiki: specifier: ^3.20.0 version: 3.20.0 + smol-toml: + specifier: ^1.6.0 + version: 1.6.0 devDependencies: '@tailwindcss/postcss': specifier: ^4.1.18 @@ -489,89 +492,105 @@ packages: resolution: {integrity: sha512-excjX8DfsIcJ10x1Kzr4RcWe1edC9PquDRRPx3YVCvQv+U5p7Yin2s32ftzikXojb1PIFc/9Mt28/y+iRklkrw==} cpu: [arm64] os: [linux] + libc: [glibc] '@img/sharp-libvips-linux-arm@1.2.4': resolution: {integrity: sha512-bFI7xcKFELdiNCVov8e44Ia4u2byA+l3XtsAj+Q8tfCwO6BQ8iDojYdvoPMqsKDkuoOo+X6HZA0s0q11ANMQ8A==} cpu: [arm] os: [linux] + libc: [glibc] '@img/sharp-libvips-linux-ppc64@1.2.4': resolution: {integrity: sha512-FMuvGijLDYG6lW+b/UvyilUWu5Ayu+3r2d1S8notiGCIyYU/76eig1UfMmkZ7vwgOrzKzlQbFSuQfgm7GYUPpA==} cpu: [ppc64] os: [linux] + libc: [glibc] '@img/sharp-libvips-linux-riscv64@1.2.4': resolution: {integrity: sha512-oVDbcR4zUC0ce82teubSm+x6ETixtKZBh/qbREIOcI3cULzDyb18Sr/Wcyx7NRQeQzOiHTNbZFF1UwPS2scyGA==} cpu: [riscv64] os: [linux] + libc: [glibc] '@img/sharp-libvips-linux-s390x@1.2.4': resolution: {integrity: sha512-qmp9VrzgPgMoGZyPvrQHqk02uyjA0/QrTO26Tqk6l4ZV0MPWIW6LTkqOIov+J1yEu7MbFQaDpwdwJKhbJvuRxQ==} cpu: [s390x] os: [linux] + libc: [glibc] '@img/sharp-libvips-linux-x64@1.2.4': resolution: {integrity: sha512-tJxiiLsmHc9Ax1bz3oaOYBURTXGIRDODBqhveVHonrHJ9/+k89qbLl0bcJns+e4t4rvaNBxaEZsFtSfAdquPrw==} cpu: [x64] os: [linux] + libc: [glibc] '@img/sharp-libvips-linuxmusl-arm64@1.2.4': resolution: {integrity: sha512-FVQHuwx1IIuNow9QAbYUzJ+En8KcVm9Lk5+uGUQJHaZmMECZmOlix9HnH7n1TRkXMS0pGxIJokIVB9SuqZGGXw==} cpu: [arm64] os: [linux] + libc: [musl] '@img/sharp-libvips-linuxmusl-x64@1.2.4': resolution: {integrity: sha512-+LpyBk7L44ZIXwz/VYfglaX/okxezESc6UxDSoyo2Ks6Jxc4Y7sGjpgU9s4PMgqgjj1gZCylTieNamqA1MF7Dg==} cpu: [x64] os: [linux] + libc: [musl] '@img/sharp-linux-arm64@0.34.5': resolution: {integrity: sha512-bKQzaJRY/bkPOXyKx5EVup7qkaojECG6NLYswgktOZjaXecSAeCWiZwwiFf3/Y+O1HrauiE3FVsGxFg8c24rZg==} engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} cpu: [arm64] os: [linux] + libc: [glibc] '@img/sharp-linux-arm@0.34.5': resolution: {integrity: sha512-9dLqsvwtg1uuXBGZKsxem9595+ujv0sJ6Vi8wcTANSFpwV/GONat5eCkzQo/1O6zRIkh0m/8+5BjrRr7jDUSZw==} engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} cpu: [arm] os: [linux] + libc: [glibc] '@img/sharp-linux-ppc64@0.34.5': resolution: {integrity: sha512-7zznwNaqW6YtsfrGGDA6BRkISKAAE1Jo0QdpNYXNMHu2+0dTrPflTLNkpc8l7MUP5M16ZJcUvysVWWrMefZquA==} engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} cpu: [ppc64] os: [linux] + libc: [glibc] '@img/sharp-linux-riscv64@0.34.5': resolution: {integrity: sha512-51gJuLPTKa7piYPaVs8GmByo7/U7/7TZOq+cnXJIHZKavIRHAP77e3N2HEl3dgiqdD/w0yUfiJnII77PuDDFdw==} engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} cpu: [riscv64] os: [linux] + libc: [glibc] '@img/sharp-linux-s390x@0.34.5': resolution: {integrity: sha512-nQtCk0PdKfho3eC5MrbQoigJ2gd1CgddUMkabUj+rBevs8tZ2cULOx46E7oyX+04WGfABgIwmMC0VqieTiR4jg==} engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} cpu: [s390x] os: [linux] + libc: [glibc] '@img/sharp-linux-x64@0.34.5': resolution: {integrity: sha512-MEzd8HPKxVxVenwAa+JRPwEC7QFjoPWuS5NZnBt6B3pu7EG2Ge0id1oLHZpPJdn3OQK+BQDiw9zStiHBTJQQQQ==} engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} cpu: [x64] os: [linux] + libc: [glibc] '@img/sharp-linuxmusl-arm64@0.34.5': resolution: {integrity: sha512-fprJR6GtRsMt6Kyfq44IsChVZeGN97gTD331weR1ex1c1rypDEABN6Tm2xa1wE6lYb5DdEnk03NZPqA7Id21yg==} engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} cpu: [arm64] os: [linux] + libc: [musl] '@img/sharp-linuxmusl-x64@0.34.5': resolution: {integrity: sha512-Jg8wNT1MUzIvhBFxViqrEhWDGzqymo3sV7z7ZsaWbZNDLXRJZoRGrjulp60YYtV4wfY8VIKcWidjojlLcWrd8Q==} engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} cpu: [x64] os: [linux] + libc: [musl] '@img/sharp-wasm32@0.34.5': resolution: {integrity: sha512-OdWTEiVkY2PHwqkbBI8frFxQQFekHaSSkUIJkwzclWZe64O1X4UlUjqqqLaPbUpMOQk6FBu/HtlGXNblIs0huw==} @@ -644,24 +663,28 @@ packages: engines: {node: '>= 10'} cpu: [arm64] os: [linux] + libc: [glibc] '@next/swc-linux-arm64-musl@16.1.1': resolution: {integrity: sha512-MFHrgL4TXNQbBPzkKKur4Fb5ICEJa87HM7fczFs2+HWblM7mMLdco3dvyTI+QmLBU9xgns/EeeINSZD6Ar+oLg==} engines: {node: '>= 10'} cpu: [arm64] os: [linux] + libc: [musl] '@next/swc-linux-x64-gnu@16.1.1': resolution: {integrity: sha512-20bYDfgOQAPUkkKBnyP9PTuHiJGM7HzNBbuqmD0jiFVZ0aOldz+VnJhbxzjcSabYsnNjMPsE0cyzEudpYxsrUQ==} engines: {node: '>= 10'} cpu: [x64] os: [linux] + libc: [glibc] '@next/swc-linux-x64-musl@16.1.1': resolution: {integrity: sha512-9pRbK3M4asAHQRkwaXwu601oPZHghuSC8IXNENgbBSyImHv/zY4K5udBusgdHkvJ/Tcr96jJwQYOll0qU8+fPA==} engines: {node: '>= 10'} cpu: [x64] os: [linux] + libc: [musl] '@next/swc-win32-arm64-msvc@16.1.1': resolution: {integrity: sha512-bdfQkggaLgnmYrFkSQfsHfOhk/mCYmjnrbRCGgkMcoOBZ4n+TRRSLmT/CU5SATzlBJ9TpioUyBW/vWFXTqQRiA==} @@ -1171,24 +1194,28 @@ packages: engines: {node: '>= 10'} cpu: [arm64] os: [linux] + libc: [glibc] '@tailwindcss/oxide-linux-arm64-musl@4.1.18': resolution: {integrity: sha512-1px92582HkPQlaaCkdRcio71p8bc8i/ap5807tPRDK/uw953cauQBT8c5tVGkOwrHMfc2Yh6UuxaH4vtTjGvHg==} engines: {node: '>= 10'} cpu: [arm64] os: [linux] + libc: [musl] '@tailwindcss/oxide-linux-x64-gnu@4.1.18': resolution: {integrity: sha512-v3gyT0ivkfBLoZGF9LyHmts0Isc8jHZyVcbzio6Wpzifg/+5ZJpDiRiUhDLkcr7f/r38SWNe7ucxmGW3j3Kb/g==} engines: {node: '>= 10'} cpu: [x64] os: [linux] + libc: [glibc] '@tailwindcss/oxide-linux-x64-musl@4.1.18': resolution: {integrity: sha512-bhJ2y2OQNlcRwwgOAGMY0xTFStt4/wyU6pvI6LSuZpRgKQwxTec0/3Scu91O8ir7qCR3AuepQKLU/kX99FouqQ==} engines: {node: '>= 10'} cpu: [x64] os: [linux] + libc: [musl] '@tailwindcss/oxide-wasm32-wasi@4.1.18': resolution: {integrity: sha512-LffYTvPjODiP6PT16oNeUQJzNVyJl1cjIebq/rWWBF+3eDst5JGEFSc5cWxyRCJ0Mxl+KyIkqRxk1XPEs9x8TA==} @@ -1468,41 +1495,49 @@ packages: resolution: {integrity: sha512-34gw7PjDGB9JgePJEmhEqBhWvCiiWCuXsL9hYphDF7crW7UgI05gyBAi6MF58uGcMOiOqSJ2ybEeCvHcq0BCmQ==} cpu: [arm64] os: [linux] + libc: [glibc] '@unrs/resolver-binding-linux-arm64-musl@1.11.1': resolution: {integrity: sha512-RyMIx6Uf53hhOtJDIamSbTskA99sPHS96wxVE/bJtePJJtpdKGXO1wY90oRdXuYOGOTuqjT8ACccMc4K6QmT3w==} cpu: [arm64] os: [linux] + libc: [musl] '@unrs/resolver-binding-linux-ppc64-gnu@1.11.1': resolution: {integrity: sha512-D8Vae74A4/a+mZH0FbOkFJL9DSK2R6TFPC9M+jCWYia/q2einCubX10pecpDiTmkJVUH+y8K3BZClycD8nCShA==} cpu: [ppc64] os: [linux] + libc: [glibc] '@unrs/resolver-binding-linux-riscv64-gnu@1.11.1': resolution: {integrity: sha512-frxL4OrzOWVVsOc96+V3aqTIQl1O2TjgExV4EKgRY09AJ9leZpEg8Ak9phadbuX0BA4k8U5qtvMSQQGGmaJqcQ==} cpu: [riscv64] os: [linux] + libc: [glibc] '@unrs/resolver-binding-linux-riscv64-musl@1.11.1': resolution: {integrity: sha512-mJ5vuDaIZ+l/acv01sHoXfpnyrNKOk/3aDoEdLO/Xtn9HuZlDD6jKxHlkN8ZhWyLJsRBxfv9GYM2utQ1SChKew==} cpu: [riscv64] os: [linux] + libc: [musl] '@unrs/resolver-binding-linux-s390x-gnu@1.11.1': resolution: {integrity: sha512-kELo8ebBVtb9sA7rMe1Cph4QHreByhaZ2QEADd9NzIQsYNQpt9UkM9iqr2lhGr5afh885d/cB5QeTXSbZHTYPg==} cpu: [s390x] os: [linux] + libc: [glibc] '@unrs/resolver-binding-linux-x64-gnu@1.11.1': resolution: {integrity: sha512-C3ZAHugKgovV5YvAMsxhq0gtXuwESUKc5MhEtjBpLoHPLYM+iuwSj3lflFwK3DPm68660rZ7G8BMcwSro7hD5w==} cpu: [x64] os: [linux] + libc: [glibc] '@unrs/resolver-binding-linux-x64-musl@1.11.1': resolution: {integrity: sha512-rV0YSoyhK2nZ4vEswT/QwqzqQXw5I6CjoaYMOX0TqBlWhojUf8P94mvI7nuJTeaCkkds3QE4+zS8Ko+GdXuZtA==} cpu: [x64] os: [linux] + libc: [musl] '@unrs/resolver-binding-wasm32-wasi@1.11.1': resolution: {integrity: sha512-5u4RkfxJm+Ng7IWgkzi3qrFOvLvQYnPBmjmZQ8+szTK/b31fQCnleNl1GgEt7nIsZRIf5PLhPwT0WM+q45x/UQ==} @@ -2790,24 +2825,28 @@ packages: engines: {node: '>= 12.0.0'} cpu: [arm64] os: [linux] + libc: [glibc] lightningcss-linux-arm64-musl@1.30.2: resolution: {integrity: sha512-5Vh9dGeblpTxWHpOx8iauV02popZDsCYMPIgiuw97OJ5uaDsL86cnqSFs5LZkG3ghHoX5isLgWzMs+eD1YzrnA==} engines: {node: '>= 12.0.0'} cpu: [arm64] os: [linux] + libc: [musl] lightningcss-linux-x64-gnu@1.30.2: resolution: {integrity: sha512-Cfd46gdmj1vQ+lR6VRTTadNHu6ALuw2pKR9lYq4FnhvgBc4zWY1EtZcAc6EffShbb1MFrIPfLDXD6Xprbnni4w==} engines: {node: '>= 12.0.0'} cpu: [x64] os: [linux] + libc: [glibc] lightningcss-linux-x64-musl@1.30.2: resolution: {integrity: sha512-XJaLUUFXb6/QG2lGIW6aIk6jKdtjtcffUT0NKvIqhSBY3hh9Ch+1LCeH80dR9q9LBjG3ewbDjnumefsLsP6aiA==} engines: {node: '>= 12.0.0'} cpu: [x64] os: [linux] + libc: [musl] lightningcss-win32-arm64-msvc@1.30.2: resolution: {integrity: sha512-FZn+vaj7zLv//D/192WFFVA0RgHawIcHqLX9xuWiQt7P0PtdFEVaxgF9rjM/IRYHQXNnk61/H/gb2Ei+kUQ4xQ==} @@ -3474,6 +3513,10 @@ packages: resolution: {integrity: sha512-ZX99e6tRweoUXqR+VBrslhda51Nh5MTQwou5tnUDgbtyM0dBgmhEDtWGP/xbKn6hqfPRHujUNwz5fy/wbbhnpw==} engines: {node: '>= 0.4'} + smol-toml@1.6.0: + resolution: {integrity: sha512-4zemZi0HvTnYwLfrpk/CF9LOd9Lt87kAt50GnqhMpyF9U3poDAP2+iukq2bZsO/ufegbYehBkqINbsWxj4l4cw==} + engines: {node: '>= 18'} + source-map-js@1.2.1: resolution: {integrity: sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==} engines: {node: '>=0.10.0'} @@ -7815,6 +7858,8 @@ snapshots: side-channel-map: 1.0.1 side-channel-weakmap: 1.0.2 + smol-toml@1.6.0: {} + source-map-js@1.2.1: {} source-map@0.7.6: {} diff --git a/docs/public/config-schema.json b/docs/public/config-schema.json new file mode 100644 index 0000000..2006118 --- /dev/null +++ b/docs/public/config-schema.json @@ -0,0 +1,9408 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "GatewayConfig", + "description": "Root configuration for the AI Gateway.\n\nThis struct represents the complete configuration file. All sections are optional with sensible defaults, allowing minimal configuration for simple deployments.", + "type": "object", + "properties": { + "auth": { + "description": "Authentication and authorization configuration.", + "default": { + "api_key": null, + "bootstrap": null, + "emergency": null, + "mode": { + "type": "none" + }, + "rbac": { + "audit": { + "log_allowed": false, + "log_denied": true + }, + "default_effect": "deny", + "enabled": false, + "fail_on_evaluation_error": false, + "gateway": { + "default_effect": "allow", + "enabled": false + }, + "lazy_load_policies": false, + "max_cached_orgs": 0, + "max_expression_length": 0, + "org_claim": null, + "policies": [], + "policy_cache_ttl_ms": 0, + "policy_eviction_batch_size": 0, + "project_claim": null, + "role_claim": "", + "role_mapping": {}, + "team_claim": null + }, + "session": null + }, + "allOf": [ + { + "$ref": "#/definitions/AuthConfig" + } + ] + }, + "cache": { + "description": "Cache configuration for rate limiting and session data.", + "default": { + "type": "none" + }, + "allOf": [ + { + "$ref": "#/definitions/CacheConfig" + } + ] + }, + "database": { + "description": "Database configuration for persistent storage. If omitted, the gateway runs in stateless mode (local dev only).", + "default": { + "type": "none" + }, + "allOf": [ + { + "$ref": "#/definitions/DatabaseConfig" + } + ] + }, + "docs": { + "description": "Documentation site configuration.", + "default": { + "assets": { + "cache_control": "public, max-age=3600", + "source": { + "type": "embedded" + } + }, + "enabled": false, + "path": "/docs" + }, + "allOf": [ + { + "$ref": "#/definitions/DocsConfig" + } + ] + }, + "features": { + "description": "Feature flags for optional capabilities.", + "default": { + "file_processing": { + "callback_url": null, + "circuit_breaker": { + "backoff_multiplier": 2.0, + "enabled": false, + "failure_status_codes": [ + 500, + 502, + 503, + 504 + ], + "failure_threshold": 5, + "max_open_timeout_secs": 300, + "open_timeout_secs": 30, + "success_threshold": 2 + }, + "default_max_chunk_tokens": 800, + "default_overlap_tokens": 200, + "document_extraction": { + "enable_ocr": false, + "force_ocr": false, + "ocr_language": "eng", + "pdf_extract_images": false, + "pdf_image_dpi": 300 + }, + "max_concurrent_tasks": 4, + "max_file_size_mb": 10, + "mode": "inline", + "queue": null, + "retry": { + "backoff_multiplier": 2.0, + "enabled": true, + "initial_delay_ms": 100, + "jitter": 0.1, + "max_delay_ms": 10000, + "max_retries": 3, + "retryable_status_codes": [ + 429, + 500, + 502, + 503, + 504 + ] + }, + "stale_processing_timeout_secs": 1800, + "virus_scan": { + "backend": "clamav", + "clamav": null, + "enabled": false + } + }, + "file_search": null, + "guardrails": null, + "image_fetching": { + "allowed_content_types": [ + "image/png", + "image/jpeg", + "image/gif", + "image/webp" + ], + "enabled": true, + "max_size_mb": 20, + "timeout_secs": 30 + }, + "model_catalog": { + "api_url": "https://models.dev/api.json", + "enabled": true, + "sync_interval_secs": 1800 + }, + "response_caching": null, + "vector_store_cleanup": { + "batch_size": 100, + "cleanup_delay_secs": 3600, + "dry_run": false, + "enabled": false, + "interval_secs": 300, + "max_duration_secs": 60 + }, + "websocket": { + "channel_capacity": 1024, + "enabled": true, + "max_connections": 1000, + "ping_interval_secs": 30, + "pong_timeout_secs": 60, + "require_auth": false + } + }, + "allOf": [ + { + "$ref": "#/definitions/FeaturesConfig" + } + ] + }, + "limits": { + "description": "Default rate limits and budgets. These can be overridden at the org, project, and user levels.", + "default": { + "budgets": { + "daily_budget_usd": null, + "estimated_cost_cents": 10, + "monthly_budget_usd": null, + "warning_threshold": 0.8 + }, + "rate_limits": { + "allow_per_key_above_global": false, + "concurrent_requests": 10, + "estimated_tokens_per_request": 1000, + "ip_rate_limits": { + "enabled": true, + "requests_per_hour": null, + "requests_per_minute": 120 + }, + "requests_per_day": null, + "requests_per_minute": 60, + "tokens_per_day": null, + "tokens_per_minute": 100000, + "window_type": "sliding" + }, + "resource_limits": { + "max_api_keys_per_user": 25, + "max_policies_per_org": 100, + "max_providers_per_user": 10 + } + }, + "allOf": [ + { + "$ref": "#/definitions/LimitsConfig" + } + ] + }, + "observability": { + "description": "Observability configuration (logging, tracing, metrics).", + "default": { + "dead_letter_queue": null, + "logging": { + "file_line": false, + "filter": null, + "format": "compact", + "include_spans": true, + "level": "info", + "siem": { + "app_name": "hadrian", + "device_product": "Gateway", + "device_vendor": "Hadrian", + "device_version": null, + "facility": "local0", + "hostname": null, + "leef_version": "2.0" + }, + "timestamps": true + }, + "metrics": { + "enabled": true, + "latency_buckets_ms": [ + 10.0, + 50.0, + 100.0, + 250.0, + 500.0, + 1000.0, + 2500.0, + 5000.0, + 10000.0 + ], + "otlp": null, + "prometheus": null, + "prometheus_query_url": null, + "token_buckets": [ + 10.0, + 50.0, + 100.0, + 500.0, + 1000.0, + 5000.0, + 10000.0, + 50000.0, + 100000.0 + ] + }, + "response_validation": { + "enabled": false, + "mode": "warn" + }, + "tracing": { + "enabled": false, + "environment": null, + "otlp": null, + "propagation": "trace_context", + "resource_attributes": {}, + "sampling": { + "rate": 1.0, + "strategy": "always_on" + }, + "service_name": "", + "service_version": null + }, + "usage": { + "buffer": { + "flush_interval_ms": 1000, + "max_pending_entries": 10000, + "max_size": 1000 + }, + "database": true, + "otlp": null + } + }, + "allOf": [ + { + "$ref": "#/definitions/ObservabilityConfig" + } + ] + }, + "pricing": { + "description": "Pricing configuration for cost calculation.", + "default": { + "cost_source": "prefer_provider", + "pricing": {} + }, + "allOf": [ + { + "$ref": "#/definitions/PricingConfig" + } + ] + }, + "providers": { + "description": "Static provider configurations. Additional providers can be added dynamically via the database at the org/project level.", + "default": { + "default_provider": null + }, + "allOf": [ + { + "$ref": "#/definitions/ProvidersConfig" + } + ] + }, + "retention": { + "description": "Data retention configuration for automatic purging of old data.", + "default": { + "enabled": false, + "interval_hours": 24, + "periods": { + "audit_logs_days": 730, + "conversations_deleted_days": 30, + "daily_spend_days": 365, + "usage_records_days": 90 + }, + "safety": { + "batch_size": 1000, + "dry_run": false, + "max_deletes_per_run": 100000 + } + }, + "allOf": [ + { + "$ref": "#/definitions/RetentionConfig" + } + ] + }, + "secrets": { + "description": "Secrets manager configuration for provider API keys.", + "default": { + "type": "none" + }, + "allOf": [ + { + "$ref": "#/definitions/SecretsConfig" + } + ] + }, + "server": { + "description": "HTTP server configuration.", + "default": { + "allow_loopback_urls": false, + "allow_private_urls": false, + "body_limit_bytes": 10485760, + "cors": { + "allow_credentials": false, + "allowed_headers": [ + "Content-Type", + "Authorization", + "X-API-Key" + ], + "allowed_methods": [ + "GET", + "POST", + "PUT", + "DELETE", + "OPTIONS" + ], + "allowed_origins": [], + "enabled": true, + "max_age_secs": 86400 + }, + "host": "0.0.0.0", + "http_client": { + "connect_timeout_secs": 10, + "http2_adaptive_window": true, + "http2_prior_knowledge": false, + "pool_idle_timeout_secs": 90, + "pool_max_idle_per_host": 32, + "tcp_keepalive_secs": 60, + "tcp_nodelay": true, + "timeout_secs": 300, + "user_agent": "hadrian/0.0.0-alpha.7", + "verbose": false + }, + "max_response_body_bytes": 104857600, + "port": 8080, + "security_headers": { + "content_security_policy": "default-src 'self'; script-src 'self' blob: 'unsafe-eval' https://cdn.jsdelivr.net; style-src 'self' 'unsafe-inline'; img-src 'self' data: blob:; font-src 'self' data:; media-src 'self' blob:; connect-src 'self' https://cdn.jsdelivr.net https://*.wikipedia.org https://www.wikidata.org; worker-src 'self' blob:; frame-src 'self' blob:; object-src 'none'; base-uri 'self'", + "content_type_options": "nosniff", + "enabled": true, + "frame_options": "DENY", + "hsts": { + "enabled": true, + "include_subdomains": true, + "max_age_secs": 31536000, + "preload": false + }, + "permissions_policy": null, + "referrer_policy": "strict-origin-when-cross-origin", + "xss_protection": null + }, + "streaming_idle_timeout_secs": 120, + "timeout_secs": 300, + "tls": null, + "trusted_proxies": { + "cidrs": [], + "dangerously_trust_all": false, + "real_ip_header": "" + } + }, + "allOf": [ + { + "$ref": "#/definitions/ServerConfig" + } + ] + }, + "storage": { + "description": "Storage configuration for files and binary data.", + "default": { + "files": { + "backend": "database", + "filesystem": null, + "s3": null + } + }, + "allOf": [ + { + "$ref": "#/definitions/StorageConfig" + } + ] + }, + "ui": { + "description": "UI configuration.", + "default": { + "admin": { + "enabled": true, + "path": "/admin" + }, + "assets": { + "cache_control": "public, max-age=31536000, immutable", + "source": { + "type": "embedded" + } + }, + "branding": { + "colors": null, + "colors_dark": null, + "custom_css_url": null, + "favicon_url": null, + "fonts": null, + "footer_links": [], + "footer_text": null, + "login": null, + "logo_dark_url": null, + "logo_url": null, + "show_version": false, + "tagline": null, + "title": null + }, + "chat": { + "available_models": [], + "default_model": null, + "enabled": true, + "file_uploads": { + "allowed_types": [ + "image/png", + "image/jpeg", + "image/gif", + "image/webp", + "application/pdf", + "text/plain", + "text/markdown" + ], + "enabled": false, + "max_size_bytes": 10485760, + "storage": { + "type": "database" + } + } + }, + "enabled": false, + "path": "/" + }, + "allOf": [ + { + "$ref": "#/definitions/UiConfig" + } + ] + } + }, + "additionalProperties": false, + "definitions": { + "AdminConfig": { + "description": "Admin panel configuration.", + "type": "object", + "properties": { + "enabled": { + "description": "Enable admin panel.", + "default": true, + "type": "boolean" + }, + "path": { + "description": "Path for admin panel.", + "default": "/admin", + "type": "string" + } + }, + "additionalProperties": false + }, + "ApiKeyAuthConfig": { + "description": "API key authentication configuration.", + "type": "object", + "properties": { + "cache_ttl_secs": { + "description": "Cache API key lookups for this many seconds. Set to 0 to disable caching (every request hits the database).", + "default": 300, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "generation_prefix": { + "description": "Prefix for generating new API keys (e.g., \"gw_live_\" for production). If not specified, uses key_prefix with \"_live\" appended if it doesn't end with \"_\".", + "default": null, + "type": [ + "string", + "null" + ] + }, + "hash_algorithm": { + "description": "Hash algorithm for storing keys.", + "default": "sha256", + "allOf": [ + { + "$ref": "#/definitions/HashAlgorithm" + } + ] + }, + "header_name": { + "description": "Header name for the API key.", + "default": "X-API-Key", + "type": "string" + }, + "key_prefix": { + "description": "Prefix for validating API keys (e.g., \"gw_\" to accept any gw_* key).", + "default": "gw_", + "type": "string" + } + }, + "additionalProperties": false + }, + "AssetSource": { + "description": "Source for static assets.", + "oneOf": [ + { + "description": "Assets embedded in the binary.", + "type": "object", + "required": [ + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "embedded" + ] + } + }, + "additionalProperties": false + }, + { + "description": "Assets served from the filesystem.", + "type": "object", + "required": [ + "path", + "type" + ], + "properties": { + "path": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "filesystem" + ] + } + }, + "additionalProperties": false + }, + { + "description": "Assets served from a CDN (UI makes requests directly).", + "type": "object", + "required": [ + "base_url", + "type" + ], + "properties": { + "base_url": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "cdn" + ] + } + }, + "additionalProperties": false + } + ] + }, + "AssetsConfig": { + "description": "Static assets configuration.", + "type": "object", + "properties": { + "cache_control": { + "description": "Cache control header for static assets.", + "default": "public, max-age=31536000, immutable", + "type": "string" + }, + "source": { + "description": "Source of static assets.", + "default": { + "type": "embedded" + }, + "allOf": [ + { + "$ref": "#/definitions/AssetSource" + } + ] + } + }, + "additionalProperties": false + }, + "AuthConfig": { + "description": "Authentication and authorization configuration.\n\nUses a single `mode` to control authentication for all endpoints: - `none` — No authentication (local dev, all access is anonymous) - `api_key` — API key required everywhere (admin shows \"enter key\" login) - `idp` — Per-org SSO + session cookies + JWT + API keys - `iap` — Reverse proxy headers + API keys", + "type": "object", + "properties": { + "api_key": { + "description": "Shared API key settings (used by api_key, idp, and iap modes).", + "default": null, + "anyOf": [ + { + "$ref": "#/definitions/ApiKeyAuthConfig" + }, + { + "type": "null" + } + ] + }, + "bootstrap": { + "description": "Bootstrap admin configuration. Used to create the initial admin user/org on first run.", + "default": null, + "anyOf": [ + { + "$ref": "#/definitions/BootstrapConfig" + }, + { + "type": "null" + } + ] + }, + "emergency": { + "description": "Emergency access configuration. Provides break-glass admin access when SSO is unavailable.", + "default": null, + "anyOf": [ + { + "$ref": "#/definitions/EmergencyAccessConfig" + }, + { + "type": "null" + } + ] + }, + "mode": { + "description": "Authentication mode. Exactly one of: none, api_key, idp, iap.", + "default": { + "type": "none" + }, + "allOf": [ + { + "$ref": "#/definitions/AuthMode" + } + ] + }, + "rbac": { + "description": "Authorization (RBAC) configuration.", + "default": { + "audit": { + "log_allowed": false, + "log_denied": true + }, + "default_effect": "deny", + "enabled": false, + "fail_on_evaluation_error": false, + "gateway": { + "default_effect": "allow", + "enabled": false + }, + "lazy_load_policies": false, + "max_cached_orgs": 0, + "max_expression_length": 0, + "org_claim": null, + "policies": [], + "policy_cache_ttl_ms": 0, + "policy_eviction_batch_size": 0, + "project_claim": null, + "role_claim": "", + "role_mapping": {}, + "team_claim": null + }, + "allOf": [ + { + "$ref": "#/definitions/RbacConfig" + } + ] + }, + "session": { + "description": "Session settings (used by idp mode for SSO cookie management).", + "default": null, + "anyOf": [ + { + "$ref": "#/definitions/SessionConfig" + }, + { + "type": "null" + } + ] + } + }, + "additionalProperties": false + }, + "AuthMode": { + "description": "Authentication mode for the gateway.\n\nControls how both API (`/v1/*`) and admin (`/admin/*`) endpoints are protected:\n\n- **none** — No authentication. Suitable for local development only. API keys may still be used optionally for cost attribution. - **api_key** — API key required for all requests. The admin panel shows an \"enter key\" login prompt. - **idp** — Per-org SSO via OIDC/SAML. Session cookies for the web UI, JWTs for programmatic access, and API keys for machine clients. - **iap** — Identity-Aware Proxy. Identity is extracted from headers set by a reverse proxy (Cloudflare Access, oauth2-proxy, Tailscale, etc.). API keys are also accepted.", + "oneOf": [ + { + "description": "No authentication. Any request is allowed.", + "type": "object", + "required": [ + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "none" + ] + } + }, + "additionalProperties": false + }, + { + "description": "API key authentication required everywhere.", + "type": "object", + "required": [ + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "api_key" + ] + } + }, + "additionalProperties": false + }, + { + "description": "Per-org SSO + session cookies + JWT + API keys.", + "type": "object", + "required": [ + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "idp" + ] + } + }, + "additionalProperties": false + }, + { + "description": "Identity-Aware Proxy (reverse proxy headers) + API keys.", + "type": "object", + "required": [ + "identity_header", + "type" + ], + "properties": { + "email_header": { + "description": "Header containing the user's email (if different from identity).", + "default": null, + "type": [ + "string", + "null" + ] + }, + "groups_header": { + "description": "Header containing groups/roles (comma-separated or JSON array).", + "default": null, + "type": [ + "string", + "null" + ] + }, + "identity_header": { + "description": "Header containing the authenticated user's identity.", + "type": "string" + }, + "jwt_assertion": { + "description": "Optional: JWT assertion header for additional validation. If set, the JWT is validated and claims are extracted.", + "default": null, + "anyOf": [ + { + "$ref": "#/definitions/ProxyAuthJwtConfig" + }, + { + "type": "null" + } + ] + }, + "name_header": { + "description": "Header containing the user's name.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "require_identity": { + "description": "Require all requests to have identity headers. If false, unauthenticated requests are allowed to public endpoints.", + "default": true, + "type": "boolean" + }, + "type": { + "type": "string", + "enum": [ + "iap" + ] + } + }, + "additionalProperties": false + } + ] + }, + "AuthzAuditConfig": { + "description": "Configuration for authorization decision audit logging.", + "type": "object", + "properties": { + "log_allowed": { + "description": "Log allowed authorization decisions. Defaults to false (only denied decisions are logged).", + "default": false, + "type": "boolean" + }, + "log_denied": { + "description": "Log denied authorization decisions. Defaults to true for security monitoring.", + "default": true, + "type": "boolean" + } + }, + "additionalProperties": false + }, + "AwsCredentials": { + "description": "AWS credential configuration.", + "oneOf": [ + { + "description": "Use the default credential chain (env, profile, IMDS, etc.)", + "type": "object", + "required": [ + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "default" + ] + } + }, + "additionalProperties": false + }, + { + "description": "Use static credentials.", + "type": "object", + "required": [ + "access_key_id", + "secret_access_key", + "type" + ], + "properties": { + "access_key_id": { + "type": "string" + }, + "secret_access_key": { + "type": "string" + }, + "session_token": { + "default": null, + "type": [ + "string", + "null" + ] + }, + "type": { + "type": "string", + "enum": [ + "static" + ] + } + }, + "additionalProperties": false + }, + { + "description": "Assume an IAM role.", + "type": "object", + "required": [ + "role_arn", + "type" + ], + "properties": { + "external_id": { + "default": null, + "type": [ + "string", + "null" + ] + }, + "role_arn": { + "type": "string" + }, + "session_name": { + "default": null, + "type": [ + "string", + "null" + ] + }, + "type": { + "type": "string", + "enum": [ + "assume_role" + ] + } + }, + "additionalProperties": false + }, + { + "description": "Use a specific AWS profile.", + "type": "object", + "required": [ + "name", + "type" + ], + "properties": { + "name": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "profile" + ] + } + }, + "additionalProperties": false + } + ] + }, + "AzureAuth": { + "description": "Azure authentication configuration.", + "oneOf": [ + { + "description": "API key authentication.", + "type": "object", + "required": [ + "api_key", + "type" + ], + "properties": { + "api_key": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "api_key" + ] + } + }, + "additionalProperties": false + }, + { + "description": "Azure AD / Entra ID authentication.", + "type": "object", + "required": [ + "client_id", + "client_secret", + "tenant_id", + "type" + ], + "properties": { + "client_id": { + "description": "Client ID.", + "type": "string" + }, + "client_secret": { + "description": "Client secret.", + "type": "string" + }, + "tenant_id": { + "description": "Tenant ID.", + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "azure_ad" + ] + } + }, + "additionalProperties": false + }, + { + "description": "Managed identity authentication.", + "type": "object", + "required": [ + "type" + ], + "properties": { + "client_id": { + "description": "Client ID of the managed identity (optional for system-assigned).", + "default": null, + "type": [ + "string", + "null" + ] + }, + "type": { + "type": "string", + "enum": [ + "managed_identity" + ] + } + }, + "additionalProperties": false + } + ] + }, + "AzureDeployment": { + "description": "Azure deployment configuration.", + "type": "object", + "required": [ + "model" + ], + "properties": { + "default": { + "description": "Whether this is the default deployment for the model.", + "default": false, + "type": "boolean" + }, + "model": { + "description": "Model name this deployment serves (for routing).", + "type": "string" + } + }, + "additionalProperties": false + }, + "BlocklistPattern": { + "description": "A pattern for the blocklist guardrails provider.", + "type": "object", + "required": [ + "pattern" + ], + "properties": { + "category": { + "description": "Category to assign when this pattern matches.", + "default": "blocked_content", + "type": "string" + }, + "is_regex": { + "description": "Whether the pattern is a regex (default: false, treated as literal string).", + "default": false, + "type": "boolean" + }, + "message": { + "description": "Human-readable description of why this pattern is blocked.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "pattern": { + "description": "The pattern to match. Can be a literal string or regex (if `is_regex` is true).", + "type": "string" + }, + "severity": { + "description": "Severity level for matches (default: high).", + "default": "high", + "type": "string" + } + }, + "additionalProperties": false + }, + "BootstrapApiKey": { + "description": "API key to create during bootstrap.", + "type": "object", + "required": [ + "name" + ], + "properties": { + "name": { + "description": "Display name for the API key.", + "type": "string" + } + }, + "additionalProperties": false + }, + "BootstrapConfig": { + "description": "Bootstrap configuration for initial setup.\n\nProvides mechanisms for bootstrapping a new deployment: - `api_key`: Pre-shared key for admin API access before first user exists - `auto_verify_domains`: Domains to auto-verify when SSO config is created - `admin_identities`: Identity IDs to grant system admin role\n\nThe bootstrap API key uses a special `_system_bootstrap` role that: - Is only valid when the database has no users (orgs can exist) - Allows creating org + SSO config, then first IdP login disables bootstrap - Cannot be assigned by IdPs (roles starting with `_` are reserved) - Grants full admin access for initial setup via RBAC policy", + "type": "object", + "properties": { + "admin_identities": { + "description": "Admin identity IDs that should be granted system admin role. These are the external identity IDs from your IdP.", + "default": [], + "type": "array", + "items": { + "type": "string" + } + }, + "api_key": { + "description": "Pre-shared API key for initial setup before first user exists.\n\nThis key provides admin access ONLY when the database has no users. Organizations can exist - bootstrap remains active until first IdP login.\n\nUse this for: - Automated deployments (Terraform, Ansible, etc.) - E2E testing - Initial SSO configuration before users can authenticate\n\nExample: `api_key = \"${HADRIAN_BOOTSTRAP_KEY}\"`", + "default": null, + "type": [ + "string", + "null" + ] + }, + "auto_verify_domains": { + "description": "Domains to automatically verify when SSO config is created.\n\nWhen an SSO configuration is created with `allowed_email_domains` that match entries in this list, those domains are automatically marked as verified without requiring DNS TXT record verification.\n\nThis is useful for: - E2E testing where DNS verification is impossible - Development environments - Pre-verified enterprise domains\n\nExample: `auto_verify_domains = [\"university.edu\", \"example.com\"]`", + "default": [], + "type": "array", + "items": { + "type": "string" + } + }, + "initial_api_key": { + "description": "Initial API key to create (owned by the initial org). The raw key is printed to stdout on first creation.", + "default": null, + "anyOf": [ + { + "$ref": "#/definitions/BootstrapApiKey" + }, + { + "type": "null" + } + ] + }, + "initial_org": { + "description": "Initial organization to create.", + "default": null, + "anyOf": [ + { + "$ref": "#/definitions/BootstrapOrg" + }, + { + "type": "null" + } + ] + } + }, + "additionalProperties": false + }, + "BootstrapOrg": { + "type": "object", + "required": [ + "name", + "slug" + ], + "properties": { + "admin_identities": { + "description": "Identity IDs to add as org admins.", + "default": [], + "type": "array", + "items": { + "type": "string" + } + }, + "name": { + "description": "Organization display name.", + "type": "string" + }, + "slug": { + "description": "Organization slug (URL-safe identifier).", + "type": "string" + }, + "sso": { + "description": "Optional SSO configuration to create for this organization.", + "default": null, + "anyOf": [ + { + "$ref": "#/definitions/BootstrapSsoConfig" + }, + { + "type": "null" + } + ] + } + }, + "additionalProperties": false + }, + "BootstrapSsoConfig": { + "description": "SSO configuration for bootstrap.\n\nCreates an OIDC or SAML SSO configuration for the initial organization. Client secrets are stored via the configured secrets manager.", + "type": "object", + "properties": { + "allowed_email_domains": { + "description": "Allowed email domains for SSO users.", + "default": [], + "type": "array", + "items": { + "type": "string" + } + }, + "client_id": { + "description": "OIDC client ID.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "client_secret": { + "description": "OIDC client secret.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "discovery_url": { + "description": "OIDC discovery URL (if different from issuer).", + "default": null, + "type": [ + "string", + "null" + ] + }, + "issuer": { + "description": "OIDC issuer URL.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "provider_type": { + "description": "SSO provider type: \"oidc\" or \"saml\".", + "default": "", + "type": "string" + }, + "redirect_uri": { + "description": "OAuth redirect URI.", + "default": null, + "type": [ + "string", + "null" + ] + } + }, + "additionalProperties": false + }, + "BrandingConfig": { + "description": "Branding customization.", + "type": "object", + "properties": { + "colors": { + "description": "Color palette for light mode.", + "default": null, + "anyOf": [ + { + "$ref": "#/definitions/ColorPalette" + }, + { + "type": "null" + } + ] + }, + "colors_dark": { + "description": "Color palette overrides for dark mode.", + "default": null, + "anyOf": [ + { + "$ref": "#/definitions/ColorPalette" + }, + { + "type": "null" + } + ] + }, + "custom_css_url": { + "description": "Custom CSS URL.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "favicon_url": { + "description": "Favicon URL.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "fonts": { + "description": "Typography configuration.", + "default": null, + "anyOf": [ + { + "$ref": "#/definitions/FontsConfig" + }, + { + "type": "null" + } + ] + }, + "footer_links": { + "description": "Footer links.", + "default": [], + "type": "array", + "items": { + "$ref": "#/definitions/FooterLink" + } + }, + "footer_text": { + "description": "Footer text.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "login": { + "description": "Login page customization.", + "default": null, + "anyOf": [ + { + "$ref": "#/definitions/LoginConfig" + }, + { + "type": "null" + } + ] + }, + "logo_dark_url": { + "description": "Logo URL for dark mode. Falls back to logo_url if not specified.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "logo_url": { + "description": "Logo URL.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "show_version": { + "description": "Show version in footer.", + "default": false, + "type": "boolean" + }, + "tagline": { + "description": "Tagline shown below the title (e.g., \"Powering research with AI\").", + "default": null, + "type": [ + "string", + "null" + ] + }, + "title": { + "description": "Application title.", + "default": null, + "type": [ + "string", + "null" + ] + } + }, + "additionalProperties": false + }, + "BudgetDefaults": { + "description": "Budget defaults.", + "type": "object", + "properties": { + "daily_budget_usd": { + "description": "Default daily budget in USD. None means unlimited.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "estimated_cost_cents": { + "description": "Estimated cost per request in cents for budget reservation. This is reserved before the request is processed to prevent race conditions. After the request completes, the actual cost replaces the estimate. Default is 10 cents ($0.10) which is conservative for most models.", + "default": 10, + "type": "integer", + "format": "int64" + }, + "monthly_budget_usd": { + "description": "Default monthly budget in USD. None means unlimited.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "warning_threshold": { + "description": "Warning threshold as a percentage (0.0-1.0). Notifications are sent when this threshold is reached.", + "default": 0.8, + "type": "number", + "format": "double" + } + }, + "additionalProperties": false + }, + "CacheConfig": { + "description": "Cache configuration.\n\nThe cache is used for: - Rate limiting counters - Budget enforcement (current spend) - Session data - API key lookups (to reduce database load)", + "oneOf": [ + { + "description": "No caching. Rate limiting and budget enforcement are disabled. Only suitable for local development.", + "type": "object", + "required": [ + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "none" + ] + } + }, + "additionalProperties": false + }, + { + "description": "In-memory cache. Good for single-node deployments. Data is lost on restart. Not suitable for multi-node.", + "type": "object", + "required": [ + "type" + ], + "properties": { + "default_ttl_secs": { + "description": "Default TTL for cache entries in seconds.", + "default": 3600, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "max_entries": { + "description": "Maximum number of entries in the cache.", + "default": 100000, + "type": "integer", + "format": "uint", + "minimum": 0.0 + }, + "ttl": { + "description": "TTL settings for specific cache types.", + "default": { + "api_key_secs": 300, + "provider_secs": 300, + "rate_limit_secs": 60 + }, + "allOf": [ + { + "$ref": "#/definitions/CacheTtlConfig" + } + ] + }, + "type": { + "type": "string", + "enum": [ + "memory" + ] + } + }, + "additionalProperties": false + }, + { + "description": "Redis cache. Required for multi-node deployments.", + "type": "object", + "required": [ + "type", + "url" + ], + "properties": { + "cluster": { + "description": "Cluster mode configuration.", + "default": null, + "anyOf": [ + { + "$ref": "#/definitions/RedisClusterConfig" + }, + { + "type": "null" + } + ] + }, + "connect_timeout_secs": { + "description": "Connection timeout in seconds.", + "default": 5, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "key_prefix": { + "description": "Key prefix for all cache keys. Useful when sharing a Redis instance with other applications.", + "default": "gw:", + "type": "string" + }, + "tls": { + "description": "Enable TLS for Redis connections.", + "default": false, + "type": "boolean" + }, + "ttl": { + "description": "TTL settings for specific cache types.", + "default": { + "api_key_secs": 300, + "provider_secs": 300, + "rate_limit_secs": 60 + }, + "allOf": [ + { + "$ref": "#/definitions/CacheTtlConfig" + } + ] + }, + "type": { + "type": "string", + "enum": [ + "redis" + ] + }, + "url": { + "description": "Redis connection URL. Format: redis://[user:password@]host:port[/database] For clusters: redis+cluster://host1:port1,host2:port2,...", + "type": "string" + } + }, + "additionalProperties": false + } + ] + }, + "CacheKeyComponents": { + "description": "Components to include in the cache key.", + "type": "object", + "properties": { + "model": { + "description": "Include model name in cache key.", + "default": true, + "type": "boolean" + }, + "system_prompt": { + "description": "Include system prompt in cache key.", + "default": true, + "type": "boolean" + }, + "temperature": { + "description": "Include temperature in cache key.", + "default": true, + "type": "boolean" + }, + "tools": { + "description": "Include tools in cache key.", + "default": true, + "type": "boolean" + } + }, + "additionalProperties": false + }, + "CacheTtlConfig": { + "description": "TTL configuration for different cache types.", + "type": "object", + "properties": { + "api_key_secs": { + "description": "TTL for API key cache in seconds.", + "default": 300, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "provider_secs": { + "description": "TTL for dynamic provider cache in seconds.", + "default": 300, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "rate_limit_secs": { + "description": "TTL for rate limit counters in seconds.", + "default": 60, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + } + }, + "additionalProperties": false + }, + "ChatConfig": { + "description": "Chat interface configuration.", + "type": "object", + "properties": { + "available_models": { + "description": "Available models in the UI (if empty, all models are shown).", + "default": [], + "type": "array", + "items": { + "type": "string" + } + }, + "default_model": { + "description": "Default model for new chats.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "enabled": { + "description": "Enable chat interface.", + "default": true, + "type": "boolean" + }, + "file_uploads": { + "description": "Enable file uploads.", + "default": { + "allowed_types": [ + "image/png", + "image/jpeg", + "image/gif", + "image/webp", + "application/pdf", + "text/plain", + "text/markdown" + ], + "enabled": false, + "max_size_bytes": 10485760, + "storage": { + "type": "database" + } + }, + "allOf": [ + { + "$ref": "#/definitions/FileUploadConfig" + } + ] + } + }, + "additionalProperties": false + }, + "CircuitBreakerConfig": { + "description": "Configuration for circuit breaker pattern on providers.\n\nThe circuit breaker prevents hammering unhealthy providers by tracking failures and temporarily rejecting requests after a threshold is exceeded.\n\nStates: - **Closed**: Normal operation, requests pass through. Failures are tracked. - **Open**: After threshold failures, requests are rejected immediately. - **Half-Open**: After timeout, limited probe requests are allowed to test recovery.\n\n# Adaptive Backoff\n\nWhen a provider repeatedly fails (circuit opens, half-open probe fails, circuit reopens), the open timeout increases exponentially to avoid hammering an unhealthy provider:\n\n```text timeout = min(open_timeout_secs * backoff_multiplier^consecutive_opens, max_open_timeout_secs) ```\n\nFor example, with defaults (30s base, 2.0 multiplier, 300s max): - First open: 30s - Second open (probe failed): 60s - Third open: 120s - Fourth open: 240s - Fifth+ open: 300s (capped)\n\nThe counter resets when the circuit successfully closes (provider recovers).", + "type": "object", + "properties": { + "backoff_multiplier": { + "description": "Multiplier for exponential backoff on repeated circuit opens. When a half-open probe fails, the next open timeout is multiplied by this value. Set to 1.0 to disable adaptive backoff (fixed timeout).", + "default": 2.0, + "type": "number", + "format": "double" + }, + "enabled": { + "description": "Whether circuit breaker is enabled.", + "default": false, + "type": "boolean" + }, + "failure_status_codes": { + "description": "Status codes that count as failures for the circuit breaker. Default: 500, 502, 503, 504 (server errors). Note: 429 is NOT included because rate limits are expected behavior, not provider failure.", + "default": [ + 500, + 502, + 503, + 504 + ], + "type": "array", + "items": { + "type": "integer", + "format": "uint16", + "minimum": 0.0 + } + }, + "failure_threshold": { + "description": "Number of consecutive failures to trigger the circuit breaker.", + "default": 5, + "type": "integer", + "format": "uint32", + "minimum": 0.0 + }, + "max_open_timeout_secs": { + "description": "Maximum open timeout in seconds after repeated failures. Caps the exponential backoff to prevent excessively long waits.", + "default": 300, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "open_timeout_secs": { + "description": "Base duration in seconds to keep the circuit open before attempting recovery. This is the initial timeout; subsequent opens may be longer due to adaptive backoff.", + "default": 30, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "success_threshold": { + "description": "Number of successful probe requests required to close the circuit.", + "default": 2, + "type": "integer", + "format": "uint32", + "minimum": 0.0 + } + }, + "additionalProperties": false + }, + "ClamAvConfig": { + "description": "ClamAV daemon (clamd) configuration.\n\nClamd must be running and accessible at the configured host:port. The gateway connects via TCP to scan file contents.", + "type": "object", + "properties": { + "host": { + "description": "Host where clamd is running. Default: \"localhost\"", + "default": "localhost", + "type": "string" + }, + "max_file_size_mb": { + "description": "Maximum file size to scan in megabytes. Files larger than this are rejected without scanning. Should match or be less than clamd's StreamMaxLength setting. Default: 25 MB (ClamAV default)", + "default": 25, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "port": { + "description": "Port where clamd is listening. Default: 3310 (standard clamd port)", + "default": 3310, + "type": "integer", + "format": "uint16", + "minimum": 0.0 + }, + "socket_path": { + "description": "Use Unix socket instead of TCP. When set, host and port are ignored.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "timeout_ms": { + "description": "Timeout for scan operations in milliseconds. Large files may need longer timeouts. Default: 30000 (30 seconds)", + "default": 30000, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + } + }, + "additionalProperties": false + }, + "ColorPalette": { + "description": "Color palette for branding customization.", + "type": "object", + "properties": { + "accent": { + "description": "Accent color for highlights and CTAs (hex).", + "default": null, + "type": [ + "string", + "null" + ] + }, + "background": { + "description": "Background color (hex).", + "default": null, + "type": [ + "string", + "null" + ] + }, + "border": { + "description": "Border color (hex).", + "default": null, + "type": [ + "string", + "null" + ] + }, + "foreground": { + "description": "Foreground/text color (hex).", + "default": null, + "type": [ + "string", + "null" + ] + }, + "muted": { + "description": "Muted color for subtle backgrounds (hex).", + "default": null, + "type": [ + "string", + "null" + ] + }, + "primary": { + "description": "Primary brand color (hex, e.g., \"#3b82f6\").", + "default": null, + "type": [ + "string", + "null" + ] + }, + "primary_foreground": { + "description": "Text color on primary backgrounds (hex, e.g., \"#ffffff\"). Used for text on primary buttons like \"New Chat\".", + "default": null, + "type": [ + "string", + "null" + ] + }, + "secondary": { + "description": "Secondary color for secondary actions (hex).", + "default": null, + "type": [ + "string", + "null" + ] + }, + "secondary_foreground": { + "description": "Text color on secondary backgrounds (hex).", + "default": null, + "type": [ + "string", + "null" + ] + } + }, + "additionalProperties": false + }, + "CorsConfig": { + "description": "CORS configuration.", + "type": "object", + "properties": { + "allow_credentials": { + "description": "Whether to allow credentials.", + "default": false, + "type": "boolean" + }, + "allowed_headers": { + "description": "Allowed headers.", + "default": [ + "Content-Type", + "Authorization", + "X-API-Key" + ], + "type": "array", + "items": { + "type": "string" + } + }, + "allowed_methods": { + "description": "Allowed HTTP methods.", + "default": [ + "GET", + "POST", + "PUT", + "DELETE", + "OPTIONS" + ], + "type": "array", + "items": { + "type": "string" + } + }, + "allowed_origins": { + "description": "Allowed origins. Use [\"*\"] for any origin (not recommended for production).", + "default": [], + "type": "array", + "items": { + "type": "string" + } + }, + "enabled": { + "description": "Enable CORS.", + "default": true, + "type": "boolean" + }, + "max_age_secs": { + "description": "Max age for preflight cache in seconds.", + "default": 86400, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + } + }, + "additionalProperties": false + }, + "CostSource": { + "description": "Source preference for cost calculation", + "oneOf": [ + { + "description": "Prefer provider-reported cost (from API response), fall back to calculated", + "type": "string", + "enum": [ + "prefer_provider" + ] + }, + { + "description": "Always use calculated cost based on configured pricing", + "type": "string", + "enum": [ + "calculated_only" + ] + }, + { + "description": "Always use provider-reported cost, fail if not available", + "type": "string", + "enum": [ + "provider_only" + ] + } + ] + }, + "CustomFont": { + "description": "Custom font definition for loading external fonts.", + "type": "object", + "required": [ + "name", + "url" + ], + "properties": { + "name": { + "description": "Font family name to use in CSS.", + "type": "string" + }, + "style": { + "description": "Font style (\"normal\" or \"italic\").", + "default": "normal", + "type": "string" + }, + "url": { + "description": "URL to the font file (woff2, woff, ttf, otf).", + "type": "string" + }, + "weight": { + "description": "Font weight (e.g., \"400\", \"700\", \"100 900\" for variable fonts).", + "default": "400", + "type": "string" + } + }, + "additionalProperties": false + }, + "CustomGuardrailsConfig": { + "description": "Custom guardrails configuration for external webhook-based evaluation.", + "type": "object", + "required": [ + "provider" + ], + "properties": { + "apply_to": { + "description": "Apply to input, output, or both.", + "default": "both", + "allOf": [ + { + "$ref": "#/definitions/GuardrailsApplyTo" + } + ] + }, + "enabled": { + "description": "Enable custom guardrails.", + "default": false, + "type": "boolean" + }, + "on_error": { + "description": "Behavior when custom guardrails provider fails.", + "default": "block", + "allOf": [ + { + "$ref": "#/definitions/GuardrailsErrorAction" + } + ] + }, + "provider": { + "description": "Custom guardrails provider configuration.", + "allOf": [ + { + "$ref": "#/definitions/CustomGuardrailsProvider" + } + ] + }, + "timeout_ms": { + "description": "Timeout for custom guardrails evaluation in milliseconds.", + "default": 5000, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + } + }, + "additionalProperties": false + }, + "CustomGuardrailsProvider": { + "description": "Custom HTTP guardrails provider configuration.", + "type": "object", + "required": [ + "url" + ], + "properties": { + "api_key": { + "description": "API key for authentication.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "headers": { + "description": "Custom headers to include in requests.", + "default": {}, + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "max_retries": { + "description": "Maximum number of retries.", + "default": 2, + "type": "integer", + "format": "uint32", + "minimum": 0.0 + }, + "retry_enabled": { + "description": "Enable retry on failure.", + "default": false, + "type": "boolean" + }, + "timeout_ms": { + "description": "Request timeout in milliseconds.", + "default": 5000, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "url": { + "description": "Guardrails service URL.", + "type": "string" + } + }, + "additionalProperties": false + }, + "DatabaseConfig": { + "description": "Database configuration.\n\nThe database stores persistent data: API keys, usage logs, budgets, org/project configurations, and dynamic provider credentials.", + "oneOf": [ + { + "description": "No database. The gateway runs in stateless/local mode. Only static providers from config are available. No authentication, tracking, or UI.", + "type": "object", + "required": [ + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "none" + ] + } + }, + "additionalProperties": false + }, + { + "description": "SQLite database. Good for single-node deployments.", + "type": "object", + "required": [ + "path", + "type" + ], + "properties": { + "busy_timeout_ms": { + "description": "Busy timeout in milliseconds.", + "default": 5000, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "create_if_missing": { + "description": "Create the database file if it doesn't exist.", + "default": true, + "type": "boolean" + }, + "max_connections": { + "description": "Maximum number of connections in the pool.", + "default": 5, + "type": "integer", + "format": "uint32", + "minimum": 0.0 + }, + "path": { + "description": "Path to the SQLite database file. Use `:memory:` for an in-memory database (testing only).", + "type": "string" + }, + "run_migrations": { + "description": "Run migrations on startup.", + "default": true, + "type": "boolean" + }, + "type": { + "type": "string", + "enum": [ + "sqlite" + ] + }, + "wal_mode": { + "description": "Enable WAL mode for better concurrency.", + "default": true, + "type": "boolean" + } + }, + "additionalProperties": false + }, + { + "description": "PostgreSQL database. Required for multi-node deployments.", + "type": "object", + "required": [ + "type", + "url" + ], + "properties": { + "connect_timeout_secs": { + "description": "Connection timeout in seconds.", + "default": 10, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "idle_timeout_secs": { + "description": "Idle connection timeout in seconds.", + "default": 300, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "max_connections": { + "description": "Maximum number of connections in each pool.\n\nDefault: 20. For high-concurrency deployments, use the formula: `(cpu_cores × 2) + max_background_jobs`. Background jobs include document processing, health checks, and usage flushing.", + "default": 20, + "type": "integer", + "format": "uint32", + "minimum": 0.0 + }, + "min_connections": { + "description": "Minimum number of connections in each pool.", + "default": 1, + "type": "integer", + "format": "uint32", + "minimum": 0.0 + }, + "read_url": { + "description": "Optional read replica URL for read-heavy queries. When configured, read operations will be routed to this pool. This can point to a single replica or a load balancer (e.g., PgBouncer) that distributes across multiple replicas.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "run_migrations": { + "description": "Run migrations on startup.", + "default": true, + "type": "boolean" + }, + "ssl_mode": { + "description": "SSL mode.", + "default": "prefer", + "allOf": [ + { + "$ref": "#/definitions/PostgresSslMode" + } + ] + }, + "type": { + "type": "string", + "enum": [ + "postgres" + ] + }, + "url": { + "description": "PostgreSQL connection URL for the primary (write) database. Format: postgres://user:password@host:port/database", + "type": "string" + } + }, + "additionalProperties": false + } + ] + }, + "DeadLetterQueueConfig": { + "description": "Dead-letter queue configuration for failed operations.", + "oneOf": [ + { + "description": "File-based dead-letter queue.", + "type": "object", + "required": [ + "path", + "type" + ], + "properties": { + "max_file_size_mb": { + "description": "Maximum file size in MB before rotation.", + "default": 100, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "max_files": { + "description": "Maximum number of files to keep.", + "default": 10, + "type": "integer", + "format": "uint32", + "minimum": 0.0 + }, + "path": { + "description": "Path to the dead-letter directory.", + "type": "string" + }, + "retry": { + "description": "Retry configuration.", + "default": { + "backoff_multiplier": 2.0, + "batch_size": 100, + "enabled": true, + "initial_delay_secs": 60, + "interval_secs": 60, + "max_delay_secs": 3600, + "max_retries": 10, + "prune_enabled": true + }, + "allOf": [ + { + "$ref": "#/definitions/DlqRetryConfig" + } + ] + }, + "type": { + "type": "string", + "enum": [ + "file" + ] + } + }, + "additionalProperties": false + }, + { + "description": "Redis-based dead-letter queue.", + "type": "object", + "required": [ + "type", + "url" + ], + "properties": { + "key_prefix": { + "description": "Key prefix for DLQ entries.", + "default": "gw:dlq:", + "type": "string" + }, + "max_entries": { + "description": "Maximum entries to keep.", + "default": 100000, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "retry": { + "description": "Retry configuration.", + "default": { + "backoff_multiplier": 2.0, + "batch_size": 100, + "enabled": true, + "initial_delay_secs": 60, + "interval_secs": 60, + "max_delay_secs": 3600, + "max_retries": 10, + "prune_enabled": true + }, + "allOf": [ + { + "$ref": "#/definitions/DlqRetryConfig" + } + ] + }, + "ttl_secs": { + "description": "TTL for DLQ entries in seconds.", + "default": 604800, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "type": { + "type": "string", + "enum": [ + "redis" + ] + }, + "url": { + "description": "Redis URL (can reuse cache URL).", + "type": "string" + } + }, + "additionalProperties": false + }, + { + "description": "Database-based dead-letter queue.", + "type": "object", + "required": [ + "type" + ], + "properties": { + "max_entries": { + "description": "Maximum entries to keep.", + "default": 100000, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "retry": { + "description": "Retry configuration.", + "default": { + "backoff_multiplier": 2.0, + "batch_size": 100, + "enabled": true, + "initial_delay_secs": 60, + "interval_secs": 60, + "max_delay_secs": 3600, + "max_retries": 10, + "prune_enabled": true + }, + "allOf": [ + { + "$ref": "#/definitions/DlqRetryConfig" + } + ] + }, + "table_name": { + "description": "Table name for DLQ entries.", + "default": "dead_letter_queue", + "type": "string" + }, + "ttl_secs": { + "description": "TTL for DLQ entries in seconds.", + "default": 604800, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "type": { + "type": "string", + "enum": [ + "database" + ] + } + }, + "additionalProperties": false + } + ] + }, + "DistanceMetric": { + "description": "Distance metric for vector similarity search.\n\nDifferent metrics are suited for different embedding models and use cases:\n\n- **Cosine**: Measures the angle between vectors. Best for normalized embeddings (most text embedding models). Score range: 0.0-1.0 (higher = more similar). This is the default and recommended for most use cases.\n\n- **DotProduct** (Inner Product): Measures the projection of one vector onto another. Best for embeddings where magnitude carries meaning. Requires normalized vectors to produce bounded scores. Score range: varies by implementation.\n\n- **Euclidean** (L2): Measures the straight-line distance between vectors. Best for embeddings in metric spaces. Score range: 0.0-∞ (lower = more similar, converted to similarity score internally).\n\n# When to Use Each Metric\n\n| Metric | Best For | Embedding Models | |-------------|----------|------------------| | **Cosine** (default) | Text similarity, semantic search | OpenAI `text-embedding-3-*`, Cohere `embed-v3`, Voyage, most text models | | **DotProduct** | Maximum inner product search (MIPS), retrieval-augmented generation | Models trained with contrastive loss, some custom models | | **Euclidean** | Clustering, when absolute distances matter | Image embeddings, some scientific/domain-specific models |\n\n**Recommendation:** Use **Cosine** unless you have a specific reason not to. Most text embedding models produce normalized vectors optimized for cosine similarity.\n\n# Backend Support\n\n| Metric | pgvector Operator | Qdrant Distance | |-------------|-------------------|-----------------| | Cosine | `<=>` (cosine) | `Cosine` | | DotProduct | `<#>` (neg. IP) | `Dot` | | Euclidean | `<->` (L2) | `Euclid` |\n\n# Score Normalization\n\nAll metrics are normalized to return similarity scores in the 0.0-1.0 range where higher values indicate more similar vectors. The conversion formulas:\n\n- Cosine: `similarity = 1.0 - cosine_distance` (pgvector returns distance) - DotProduct: `similarity = (1.0 + dot_product) / 2.0` (normalized embeddings) - Euclidean: `similarity = 1.0 / (1.0 + euclidean_distance)`\n\n# Caveats\n\n- **DotProduct requires normalized embeddings**: The score normalization formula `(1 + score) / 2` assumes unit vectors. Non-normalized embeddings may produce scores outside the 0.0-1.0 range (clamped for safety but semantically incorrect).\n\n- **Changing metrics requires re-indexing**: If you change the distance metric after data has been indexed, you must recreate the vector index for correct results.\n\n# Configuration Example\n\n```toml # RAG vector backend with explicit distance metric [features.file_search.vector_backend] type = \"pgvector\" table_name = \"rag_chunks\" distance_metric = \"cosine\" # or \"dot_product\", \"euclidean\"\n\n# Semantic caching with Qdrant [features.response_caching.semantic.vector_backend] type = \"qdrant\" url = \"http://localhost:6333\" qdrant_collection_name = \"semantic_cache\" distance_metric = \"cosine\" ```", + "oneOf": [ + { + "description": "Cosine similarity - best for text embeddings (default). Measures the angle between vectors, ignoring magnitude.", + "type": "string", + "enum": [ + "cosine" + ] + }, + { + "description": "Dot product (inner product) - for embeddings where magnitude matters. Requires normalized vectors for bounded scores.", + "type": "string", + "enum": [ + "dot_product" + ] + }, + { + "description": "Euclidean distance (L2) - for metric space embeddings. Measures straight-line distance between vector endpoints.", + "type": "string", + "enum": [ + "euclidean" + ] + } + ] + }, + "DlqRetryConfig": { + "description": "Configuration for DLQ retry processing.", + "type": "object", + "properties": { + "backoff_multiplier": { + "description": "Backoff multiplier for exponential backoff.", + "default": 2.0, + "type": "number", + "format": "double" + }, + "batch_size": { + "description": "Batch size for retry processing.", + "default": 100, + "type": "integer", + "format": "int64" + }, + "enabled": { + "description": "Enable automatic retry processing.", + "default": true, + "type": "boolean" + }, + "initial_delay_secs": { + "description": "Initial delay before first retry in seconds.", + "default": 60, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "interval_secs": { + "description": "Interval between retry processing runs in seconds.", + "default": 60, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "max_delay_secs": { + "description": "Maximum delay between retries in seconds.", + "default": 3600, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "max_retries": { + "description": "Maximum number of retry attempts before giving up.", + "default": 10, + "type": "integer", + "format": "int32" + }, + "prune_enabled": { + "description": "Enable automatic pruning of old entries.", + "default": true, + "type": "boolean" + } + }, + "additionalProperties": false + }, + "DocsAssetsConfig": { + "description": "Documentation static assets configuration.", + "type": "object", + "properties": { + "cache_control": { + "description": "Cache control header for static assets.", + "default": "public, max-age=3600", + "type": "string" + }, + "source": { + "description": "Source of static assets.", + "default": { + "type": "embedded" + }, + "allOf": [ + { + "$ref": "#/definitions/AssetSource" + } + ] + } + }, + "additionalProperties": false + }, + "DocsConfig": { + "description": "Documentation site configuration.", + "type": "object", + "properties": { + "assets": { + "description": "Static assets configuration.", + "default": { + "cache_control": "public, max-age=3600", + "source": { + "type": "embedded" + } + }, + "allOf": [ + { + "$ref": "#/definitions/DocsAssetsConfig" + } + ] + }, + "enabled": { + "description": "Enable the documentation site.", + "default": false, + "type": "boolean" + }, + "path": { + "description": "Path to serve the documentation from (default: /docs).", + "default": "/docs", + "type": "string" + } + }, + "additionalProperties": false + }, + "DocumentExtractionConfig": { + "description": "Document extraction configuration for processing rich documents.\n\nControls how PDF, Office, and other document formats are processed, including OCR settings for scanned documents and images.\n\nUses [Kreuzberg](https://github.com/Goldziher/kreuzberg) for document extraction.\n\n# Example Configuration\n\n```toml [features.file_processing.document_extraction] enable_ocr = true ocr_language = \"eng\" force_ocr = false pdf_extract_images = true pdf_image_dpi = 300 ```\n\n# OCR Requirements\n\nOCR requires Tesseract to be installed on the system: - **Linux**: `apt install tesseract-ocr tesseract-ocr-eng` - **macOS**: `brew install tesseract` - **Windows**: Install from \n\nAdditional language packs can be installed for non-English documents: - `tesseract-ocr-fra` (French) - `tesseract-ocr-deu` (German) - `tesseract-ocr-spa` (Spanish) - etc.", + "type": "object", + "properties": { + "enable_ocr": { + "description": "Enable OCR (Optical Character Recognition) for scanned documents and images.\n\nWhen enabled, Kreuzberg will use Tesseract to extract text from: - Scanned PDF documents (no embedded text layer) - Images embedded in documents - Image files (PNG, JPG, TIFF, etc.) if supported\n\nRequires Tesseract to be installed on the system. Default: false", + "default": false, + "type": "boolean" + }, + "force_ocr": { + "description": "Force OCR processing even for documents that have embedded text.\n\nUseful when: - The embedded text is known to be unreliable or incomplete - Documents were generated from scanned images with poor OCR - You want consistent processing regardless of text layer presence\n\nHas no effect if `enable_ocr` is false. Default: false", + "default": false, + "type": "boolean" + }, + "ocr_language": { + "description": "Language code for OCR processing (ISO 639-3 format).\n\nCommon values: - `eng` - English (default) - `fra` - French - `deu` - German - `spa` - Spanish - `chi_sim` - Simplified Chinese - `jpn` - Japanese\n\nThe corresponding Tesseract language pack must be installed. Default: \"eng\"", + "default": "eng", + "type": "string" + }, + "pdf_extract_images": { + "description": "Extract images from PDF documents for OCR processing.\n\nWhen enabled, embedded images in PDFs will be extracted and processed with OCR to capture text that may only exist in images.\n\nThis increases processing time but improves text extraction for documents with charts, diagrams, or embedded scanned pages.\n\nHas no effect if `enable_ocr` is false. Default: false", + "default": false, + "type": "boolean" + }, + "pdf_image_dpi": { + "description": "DPI (dots per inch) for image extraction from PDFs.\n\nHigher values produce better OCR quality but increase processing time and memory usage.\n\nRecommended values: - 150: Fast processing, acceptable quality - 300: Good balance (default) - 600: High quality for small text\n\nDefault: 300", + "default": 300, + "type": "integer", + "format": "uint32", + "minimum": 0.0 + } + }, + "additionalProperties": false + }, + "EmbeddingConfig": { + "description": "Embedding configuration.", + "type": "object", + "properties": { + "dimensions": { + "description": "Embedding dimensions.", + "default": 1536, + "type": "integer", + "format": "uint", + "minimum": 0.0 + }, + "model": { + "description": "Model to use for embeddings.", + "default": "text-embedding-3-small", + "type": "string" + }, + "provider": { + "description": "Provider to use for embeddings.", + "default": "openai", + "type": "string" + } + }, + "additionalProperties": false + }, + "EmergencyAccessConfig": { + "description": "Emergency access configuration for break-glass admin access.\n\nProvides a way for designated administrators to access Hadrian when SSO is unavailable due to IdP outages or misconfigurations. This is a critical disaster recovery feature.\n\n**Security:** - Emergency keys are compared using constant-time comparison - The `_emergency_admin` role cannot be assigned by IdPs (reserved prefix) - All access attempts are logged at WARN level - IP restrictions and rate limiting provide defense in depth - Config-only approach works even if database is corrupted\n\n**Example:** ```toml [auth.emergency] enabled = true allowed_ips = [\"10.0.0.0/8\"] # Optional: restrict to admin network\n\n[[auth.emergency.accounts]] id = \"emergency-admin-1\" name = \"Primary Emergency Admin\" key = \"${EMERGENCY_KEY_1}\" email = \"emergency@company.com\" roles = [\"_emergency_admin\", \"super_admin\"]\n\n[auth.emergency.rate_limit] max_attempts = 5 window_secs = 900 lockout_secs = 3600 ```", + "type": "object", + "properties": { + "accounts": { + "description": "Emergency admin accounts. Each account has a unique key for authentication.", + "default": [], + "type": "array", + "items": { + "$ref": "#/definitions/EmergencyAccount" + } + }, + "allowed_ips": { + "description": "Global IP allowlist for emergency access (CIDR notation). If specified, emergency access is only allowed from these IPs. Individual accounts can have additional IP restrictions.", + "default": [], + "type": "array", + "items": { + "type": "string" + } + }, + "enabled": { + "description": "Whether emergency access is enabled.", + "default": false, + "type": "boolean" + }, + "rate_limit": { + "description": "Rate limiting configuration for emergency access attempts.", + "default": { + "lockout_secs": 3600, + "max_attempts": 5, + "window_secs": 900 + }, + "allOf": [ + { + "$ref": "#/definitions/EmergencyRateLimit" + } + ] + } + }, + "additionalProperties": false + }, + "EmergencyAccount": { + "description": "An emergency admin account for break-glass access.", + "type": "object", + "required": [ + "id", + "key", + "name" + ], + "properties": { + "allowed_ips": { + "description": "Additional IP restrictions for this specific account (CIDR notation). These are in addition to the global `allowed_ips`.", + "default": [], + "type": "array", + "items": { + "type": "string" + } + }, + "email": { + "description": "Email address for audit logging and notifications.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "id": { + "description": "Unique identifier for this emergency account. Used in audit logs and rate limiting.", + "type": "string" + }, + "key": { + "description": "The emergency access key (secret). Should be stored in a secrets manager and referenced via environment variable. Example: `key = \"${EMERGENCY_KEY_1}\"` Must be at least 32 characters long for security.", + "writeOnly": true, + "type": "string" + }, + "name": { + "description": "Human-readable name for this emergency account.", + "type": "string" + }, + "roles": { + "description": "Roles granted when authenticating with this emergency key. Should include `_emergency_admin` plus any additional roles needed.", + "default": [], + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false + }, + "EmergencyRateLimit": { + "description": "Rate limiting configuration for emergency access attempts.", + "type": "object", + "properties": { + "lockout_secs": { + "description": "Lockout duration in seconds after exceeding max_attempts.", + "default": 3600, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "max_attempts": { + "description": "Maximum failed attempts before lockout.", + "default": 5, + "type": "integer", + "format": "uint32", + "minimum": 0.0 + }, + "window_secs": { + "description": "Time window in seconds for counting attempts.", + "default": 900, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + } + }, + "additionalProperties": false + }, + "EnhancedSessionConfig": { + "description": "Enhanced session management configuration.\n\nEnables opt-in features for enterprise session management including session listing, device tracking, and user-to-sessions indexing.", + "type": "object", + "properties": { + "activity_update_interval_secs": { + "description": "Minimum interval between last_activity updates in seconds. Reduces write load by only updating last_activity if the previous update was more than this many seconds ago. Defaults to 60 seconds. Set to 0 to update on every request.", + "default": 60, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "enabled": { + "description": "Master toggle for enhanced session features. When enabled, sessions are indexed by user ID for listing and management.", + "default": false, + "type": "boolean" + }, + "inactivity_timeout_secs": { + "description": "Inactivity timeout in seconds. 0 = disabled. Sessions inactive for this duration are automatically invalidated. Requires `enabled = true`.", + "default": 0, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "max_concurrent_sessions": { + "description": "Maximum concurrent sessions per user. 0 = unlimited. When exceeded, oldest sessions are automatically invalidated. Requires `enabled = true`. Enforcement is in Phase 2.", + "default": 0, + "type": "integer", + "format": "uint32", + "minimum": 0.0 + }, + "track_devices": { + "description": "Track device information (user agent, IP address) with sessions. Requires `enabled = true`.", + "default": false, + "type": "boolean" + } + }, + "additionalProperties": false + }, + "FeaturesConfig": { + "description": "Feature flags for optional capabilities.", + "type": "object", + "properties": { + "file_processing": { + "description": "File processing configuration for RAG document ingestion. Controls how uploaded files are chunked and embedded into vector stores.", + "default": { + "callback_url": null, + "circuit_breaker": { + "backoff_multiplier": 2.0, + "enabled": false, + "failure_status_codes": [ + 500, + 502, + 503, + 504 + ], + "failure_threshold": 5, + "max_open_timeout_secs": 300, + "open_timeout_secs": 30, + "success_threshold": 2 + }, + "default_max_chunk_tokens": 800, + "default_overlap_tokens": 200, + "document_extraction": { + "enable_ocr": false, + "force_ocr": false, + "ocr_language": "eng", + "pdf_extract_images": false, + "pdf_image_dpi": 300 + }, + "max_concurrent_tasks": 4, + "max_file_size_mb": 10, + "mode": "inline", + "queue": null, + "retry": { + "backoff_multiplier": 2.0, + "enabled": true, + "initial_delay_ms": 100, + "jitter": 0.1, + "max_delay_ms": 10000, + "max_retries": 3, + "retryable_status_codes": [ + 429, + 500, + 502, + 503, + 504 + ] + }, + "stale_processing_timeout_secs": 1800, + "virus_scan": { + "backend": "clamav", + "clamav": null, + "enabled": false + } + }, + "allOf": [ + { + "$ref": "#/definitions/FileProcessingConfig" + } + ] + }, + "file_search": { + "description": "File search configuration for the Responses API. Enables server-side file_search tool execution for RAG.", + "default": null, + "anyOf": [ + { + "$ref": "#/definitions/FileSearchConfig" + }, + { + "type": "null" + } + ] + }, + "guardrails": { + "description": "Guardrails for content filtering, PII detection, and safety. Supports multiple providers, execution modes, and fine-grained actions.", + "default": null, + "anyOf": [ + { + "$ref": "#/definitions/GuardrailsConfig" + }, + { + "type": "null" + } + ] + }, + "image_fetching": { + "description": "HTTP image URL fetching configuration. Controls how non-OpenAI providers (Anthropic, Bedrock, Vertex) handle HTTP image URLs in chat completion requests.", + "default": { + "allowed_content_types": [ + "image/png", + "image/jpeg", + "image/gif", + "image/webp" + ], + "enabled": true, + "max_size_mb": 20, + "timeout_secs": 30 + }, + "allOf": [ + { + "$ref": "#/definitions/ImageFetchingConfig" + } + ] + }, + "model_catalog": { + "description": "Model catalog configuration for enriching API responses with model metadata. Provides per-model capabilities, pricing, context limits, and modalities from the models.dev catalog.", + "default": { + "api_url": "https://models.dev/api.json", + "enabled": true, + "sync_interval_secs": 1800 + }, + "allOf": [ + { + "$ref": "#/definitions/ModelCatalogConfig" + } + ] + }, + "response_caching": { + "description": "Response caching.", + "default": null, + "anyOf": [ + { + "$ref": "#/definitions/ResponseCachingConfig" + }, + { + "type": "null" + } + ] + }, + "vector_store_cleanup": { + "description": "Vector store cleanup job configuration. Cleans up soft-deleted vector stores, their chunks, and orphaned files.", + "default": { + "batch_size": 100, + "cleanup_delay_secs": 3600, + "dry_run": false, + "enabled": false, + "interval_secs": 300, + "max_duration_secs": 60 + }, + "allOf": [ + { + "$ref": "#/definitions/VectorStoreCleanupConfig" + } + ] + }, + "websocket": { + "description": "WebSocket configuration for real-time event subscriptions. Enables clients to subscribe to server events via `/ws/events`.", + "default": { + "channel_capacity": 1024, + "enabled": true, + "max_connections": 1000, + "ping_interval_secs": 30, + "pong_timeout_secs": 60, + "require_auth": false + }, + "allOf": [ + { + "$ref": "#/definitions/WebSocketConfig" + } + ] + } + }, + "additionalProperties": false + }, + "FileProcessingConfig": { + "description": "Configuration for RAG file processing (chunking and embedding).\n\nControls how uploaded files are processed when added to vector stores. Supports two processing modes:\n\n- **Inline**: Process files synchronously within the gateway process. Simpler setup, but may timeout for large files (>10MB).\n\n- **Queue**: Publish processing jobs to an external queue for worker processes. Better for production deployments with large files or high volume.\n\n# Example Configuration\n\n```toml [features.file_processing] mode = \"inline\" max_file_size_mb = 10 max_concurrent_tasks = 4 default_max_chunk_tokens = 800 default_overlap_tokens = 200 ```\n\nFor queue mode:\n\n```toml [features.file_processing] mode = \"queue\"\n\n[features.file_processing.queue] backend = \"redis\" url = \"redis://localhost:6379\" queue_name = \"file_processing\" ```", + "type": "object", + "properties": { + "callback_url": { + "description": "Callback URL for queue workers to report completion. Workers POST to this URL when file processing completes. Only used in queue mode.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "circuit_breaker": { + "description": "Circuit breaker configuration for vector store operations.\n\nProtects against unhealthy vector store backends by failing fast after repeated failures. When the circuit is open, requests fail immediately without attempting the operation.\n\nDefault: enabled with 5 failures in 60s to open, 30s recovery.", + "default": { + "backoff_multiplier": 2.0, + "enabled": false, + "failure_status_codes": [ + 500, + 502, + 503, + 504 + ], + "failure_threshold": 5, + "max_open_timeout_secs": 300, + "open_timeout_secs": 30, + "success_threshold": 2 + }, + "allOf": [ + { + "$ref": "#/definitions/CircuitBreakerConfig" + } + ] + }, + "default_max_chunk_tokens": { + "description": "Default maximum chunk size in tokens when using auto chunking strategy. Default: 800", + "default": 800, + "type": "integer", + "format": "int32" + }, + "default_overlap_tokens": { + "description": "Default chunk overlap in tokens when using auto chunking strategy. Overlap provides context continuity between chunks. Default: 200", + "default": 200, + "type": "integer", + "format": "int32" + }, + "document_extraction": { + "description": "Document extraction configuration. Controls OCR and PDF-specific processing options for rich documents.", + "default": { + "enable_ocr": false, + "force_ocr": false, + "ocr_language": "eng", + "pdf_extract_images": false, + "pdf_image_dpi": 300 + }, + "allOf": [ + { + "$ref": "#/definitions/DocumentExtractionConfig" + } + ] + }, + "max_concurrent_tasks": { + "description": "Maximum concurrent file processing tasks (inline mode only). Controls how many files can be processed simultaneously. Default: 4", + "default": 4, + "type": "integer", + "format": "uint", + "minimum": 0.0 + }, + "max_file_size_mb": { + "description": "Maximum file size in megabytes. Files larger than this will be rejected. Default: 10 MB", + "default": 10, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "mode": { + "description": "Processing mode: inline or queue.", + "default": "inline", + "allOf": [ + { + "$ref": "#/definitions/FileProcessingMode" + } + ] + }, + "queue": { + "description": "Queue configuration (required when mode = \"queue\").", + "default": null, + "anyOf": [ + { + "$ref": "#/definitions/FileProcessingQueueConfig" + }, + { + "type": "null" + } + ] + }, + "retry": { + "description": "Retry configuration for vector store operations during file processing.\n\nApplies to: - Storing chunks to vector database (transient DB errors) - Deleting chunks (connection issues)\n\nNote: Embedding API retries are handled separately at the provider level. Default: enabled with 3 retries, 100ms initial delay, 2x backoff.", + "default": { + "backoff_multiplier": 2.0, + "enabled": true, + "initial_delay_ms": 100, + "jitter": 0.1, + "max_delay_ms": 10000, + "max_retries": 3, + "retryable_status_codes": [ + 429, + 500, + 502, + 503, + 504 + ] + }, + "allOf": [ + { + "$ref": "#/definitions/RetryConfig" + } + ] + }, + "stale_processing_timeout_secs": { + "description": "Timeout in seconds for detecting stale in-progress files.\n\nWhen a file has been in `in_progress` status longer than this timeout, it's considered stale (e.g., worker crashed mid-processing). Re-adding the file will reset it for re-processing.\n\nSet to 0 to disable stale detection (files stuck in in_progress will never be automatically re-processed).\n\nDefault: 1800 (30 minutes)", + "default": 1800, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "virus_scan": { + "description": "Virus scanning configuration. When enabled, uploaded files are scanned before being stored.", + "default": { + "backend": "clamav", + "clamav": null, + "enabled": false + }, + "allOf": [ + { + "$ref": "#/definitions/VirusScanConfig" + } + ] + } + }, + "additionalProperties": false + }, + "FileProcessingMode": { + "description": "File processing mode.", + "oneOf": [ + { + "description": "Process files inline within the gateway process. Simplest setup, good for small deployments and small files. May timeout for large files (>10MB) or high concurrency.", + "type": "string", + "enum": [ + "inline" + ] + }, + { + "description": "Publish processing jobs to an external queue. Workers consume jobs and process files asynchronously. Better for production with large files or high volume.", + "type": "string", + "enum": [ + "queue" + ] + } + ] + }, + "FileProcessingQueueBackend": { + "description": "Queue backend type for file processing.", + "oneOf": [ + { + "description": "Redis Streams. Good for simple deployments, supports consumer groups.", + "type": "string", + "enum": [ + "redis" + ] + } + ] + }, + "FileProcessingQueueConfig": { + "description": "Queue backend configuration for file processing.", + "type": "object", + "required": [ + "backend", + "url" + ], + "properties": { + "backend": { + "description": "Queue backend type.", + "allOf": [ + { + "$ref": "#/definitions/FileProcessingQueueBackend" + } + ] + }, + "consumer_group": { + "description": "Consumer group name (for Redis Streams).", + "default": "hadrian_workers", + "type": "string" + }, + "queue_name": { + "description": "Queue/topic name for processing jobs.", + "default": "hadrian_file_processing", + "type": "string" + }, + "url": { + "description": "Connection URL for the queue backend. Example: \"redis://localhost:6379\"", + "type": "string" + } + }, + "additionalProperties": false + }, + "FileSearchConfig": { + "description": "Configuration for the file_search tool in the Responses API.\n\nWhen enabled, the gateway intercepts `file_search` tool calls from the LLM and executes them against the local vector store, injecting results back into the conversation without exposing the search process to the client.\n\n# Example Configuration\n\n```toml [features.file_search] enabled = true max_iterations = 5 max_results_per_search = 10 timeout_secs = 30 include_annotations = true score_threshold = 0.7\n\n# Optional: Configure vector backend independently from semantic caching [features.file_search.vector_backend] type = \"pgvector\" table_name = \"rag_chunks\" # Separate from semantic cache\n\n# Optional: Configure embeddings (falls back to semantic caching config) [features.file_search.embedding] provider = \"openai\" model = \"text-embedding-3-small\" dimensions = 1536\n\n# Optional: Configure retries for transient failures [features.file_search.retry] enabled = true max_retries = 3 initial_delay_ms = 100 max_delay_ms = 10000 backoff_multiplier = 2.0 jitter = 0.1 ```", + "type": "object", + "properties": { + "circuit_breaker": { + "description": "Circuit breaker configuration for vector store operations.\n\nProtects against unhealthy vector store backends by failing fast after repeated failures. When the circuit is open, requests fail immediately without attempting the operation.\n\nDefault: enabled with 5 failures in 60s to open, 30s recovery.", + "default": { + "backoff_multiplier": 2.0, + "enabled": false, + "failure_status_codes": [ + 500, + 502, + 503, + 504 + ], + "failure_threshold": 5, + "max_open_timeout_secs": 300, + "open_timeout_secs": 30, + "success_threshold": 2 + }, + "allOf": [ + { + "$ref": "#/definitions/CircuitBreakerConfig" + } + ] + }, + "embedding": { + "description": "Embedding configuration for RAG.\n\nWhen not specified, falls back to: 1. Semantic caching embedding config (if configured) 2. Vector search embedding config (if configured)\n\nMust be configured if neither semantic caching nor vector search is enabled.", + "default": null, + "anyOf": [ + { + "$ref": "#/definitions/EmbeddingConfig" + }, + { + "type": "null" + } + ] + }, + "enabled": { + "description": "Enable file_search tool interception. When disabled, file_search tools are passed through to the provider (which may not support them).", + "default": true, + "type": "boolean" + }, + "include_annotations": { + "description": "Include file citation annotations in the response. When true, responses include metadata about which files were referenced.", + "default": true, + "type": "boolean" + }, + "max_iterations": { + "description": "Maximum number of tool call iterations before forcing completion. Prevents infinite loops where the model keeps calling file_search.", + "default": 5, + "type": "integer", + "format": "uint", + "minimum": 0.0 + }, + "max_results_per_search": { + "description": "Maximum number of search results to return per search call.", + "default": 10, + "type": "integer", + "format": "uint", + "minimum": 0.0 + }, + "max_search_result_chars": { + "description": "Maximum total characters for search results injected into continuation payload.\n\nPrevents context window overflow when search results are large. Results are truncated to fit within this limit, preserving complete result entries where possible (partial results are excluded).\n\nDefault: 50000 characters (~50 chunks of ~1000 chars each)", + "default": 50000, + "type": "integer", + "format": "uint", + "minimum": 0.0 + }, + "rerank": { + "description": "LLM-based re-ranking configuration.\n\nRe-ranking uses a language model to re-score search results based on semantic relevance to the query. Enable this to use `ranker: \"llm\"` in API requests.\n\nDefault: disabled", + "default": { + "batch_size": 10, + "enabled": false, + "fallback_on_error": true, + "max_results_to_rerank": 20, + "model": null, + "timeout_secs": 30 + }, + "allOf": [ + { + "$ref": "#/definitions/RerankConfig" + } + ] + }, + "retry": { + "description": "Retry configuration for RAG operations.\n\nApplies to: - Embedding API calls (transient 429/5xx errors) - Vector database writes (network issues, DB overload) - Vector database searches (connection errors)\n\nUses exponential backoff with configurable jitter. Default: enabled with 3 retries, 100ms initial delay, 2x backoff.", + "default": { + "backoff_multiplier": 2.0, + "enabled": true, + "initial_delay_ms": 100, + "jitter": 0.1, + "max_delay_ms": 10000, + "max_retries": 3, + "retryable_status_codes": [ + 429, + 500, + 502, + 503, + 504 + ] + }, + "allOf": [ + { + "$ref": "#/definitions/RetryConfig" + } + ] + }, + "score_threshold": { + "description": "Minimum similarity score threshold for search results (0.0-1.0). Results below this threshold are excluded.", + "default": 0.7, + "type": "number", + "format": "double" + }, + "timeout_secs": { + "description": "Timeout in seconds for each search operation.", + "default": 30, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "vector_backend": { + "description": "Vector database backend configuration for RAG chunk storage.\n\nWhen not specified, falls back to: 1. Semantic caching vector backend (if configured) 2. Default pgvector with table name \"rag_chunks\"\n\nConfiguring this separately from semantic caching ensures RAG data is stored in dedicated tables/collections, avoiding confusion with semantic cache data.", + "default": null, + "anyOf": [ + { + "$ref": "#/definitions/RagVectorBackend" + }, + { + "type": "null" + } + ] + } + }, + "additionalProperties": false + }, + "FileStorageBackend": { + "description": "Storage backend type.", + "oneOf": [ + { + "description": "Store file content directly in the database. Simplest option, good for small deployments. Files stored in the `file_data` column.", + "type": "string", + "enum": [ + "database" + ] + }, + { + "description": "Store file content on the local filesystem. Good for single-node deployments with large files. Database stores the file path reference.", + "type": "string", + "enum": [ + "filesystem" + ] + }, + { + "description": "Store file content in S3-compatible object storage. Best for production, multi-node deployments. Supports AWS S3, MinIO, R2, DigitalOcean Spaces, etc.", + "type": "string", + "enum": [ + "s3" + ] + } + ] + }, + "FileStorageConfig": { + "description": "File storage backend configuration for the Files API.\n\nDetermines where file content is stored. The database always stores file metadata; this config only affects where the actual file bytes live.\n\nNote: For chat upload storage, see `UploadStorageConfig` in `ui.rs`.", + "type": "object", + "properties": { + "backend": { + "description": "Storage backend to use.", + "default": "database", + "allOf": [ + { + "$ref": "#/definitions/FileStorageBackend" + } + ] + }, + "filesystem": { + "description": "Filesystem configuration (required when backend = \"filesystem\").", + "default": null, + "anyOf": [ + { + "$ref": "#/definitions/FilesystemStorageConfig" + }, + { + "type": "null" + } + ] + }, + "s3": { + "description": "S3 configuration (required when backend = \"s3\").", + "default": null, + "anyOf": [ + { + "$ref": "#/definitions/S3StorageConfig" + }, + { + "type": "null" + } + ] + } + }, + "additionalProperties": false + }, + "FileUploadConfig": { + "description": "File upload configuration.", + "type": "object", + "properties": { + "allowed_types": { + "description": "Allowed MIME types.", + "default": [ + "image/png", + "image/jpeg", + "image/gif", + "image/webp", + "application/pdf", + "text/plain", + "text/markdown" + ], + "type": "array", + "items": { + "type": "string" + } + }, + "enabled": { + "description": "Enable file uploads.", + "default": false, + "type": "boolean" + }, + "max_size_bytes": { + "description": "Maximum file size in bytes.", + "default": 10485760, + "type": "integer", + "format": "uint", + "minimum": 0.0 + }, + "storage": { + "description": "Storage backend for uploaded files.", + "default": { + "type": "database" + }, + "allOf": [ + { + "$ref": "#/definitions/UploadStorageConfig" + } + ] + } + }, + "additionalProperties": false + }, + "FilesystemStorageConfig": { + "description": "Local filesystem storage configuration.", + "type": "object", + "required": [ + "path" + ], + "properties": { + "create_dir": { + "description": "Create the directory if it doesn't exist. Default: true", + "default": true, + "type": "boolean" + }, + "dir_mode": { + "description": "Directory permissions (Unix mode) for new directories. Default: 0o700 (owner read/write/execute only)", + "default": 448, + "type": "integer", + "format": "uint32", + "minimum": 0.0 + }, + "file_mode": { + "description": "File permissions (Unix mode) for new files. Default: 0o600 (owner read/write only)", + "default": 384, + "type": "integer", + "format": "uint32", + "minimum": 0.0 + }, + "path": { + "description": "Base directory for file storage. Files are stored as `{path}/{file-id}`.", + "type": "string" + } + }, + "additionalProperties": false + }, + "FontsConfig": { + "description": "Typography/font configuration.", + "type": "object", + "properties": { + "body": { + "description": "Font family for body text (e.g., \"Inter\", \"Roboto\").", + "default": null, + "type": [ + "string", + "null" + ] + }, + "custom": { + "description": "Custom fonts to load via @font-face.", + "default": [], + "type": "array", + "items": { + "$ref": "#/definitions/CustomFont" + } + }, + "heading": { + "description": "Font family for headings (e.g., \"Inter\", \"Roboto\").", + "default": null, + "type": [ + "string", + "null" + ] + }, + "mono": { + "description": "Font family for monospace/code text (e.g., \"JetBrains Mono\", \"Fira Code\").", + "default": null, + "type": [ + "string", + "null" + ] + } + }, + "additionalProperties": false + }, + "FooterLink": { + "type": "object", + "required": [ + "label", + "url" + ], + "properties": { + "label": { + "type": "string" + }, + "url": { + "type": "string" + } + }, + "additionalProperties": false + }, + "GatewayRbacConfig": { + "description": "Gateway endpoint authorization configuration.\n\nControls policy-based authorization for `/v1/*` gateway endpoints. This is separate from admin RBAC to allow independent rollout.", + "type": "object", + "properties": { + "default_effect": { + "description": "Default effect for gateway endpoints when no policy matches. Defaults to \"allow\" (fail-open). Set to \"deny\" for stricter security (fail-closed).", + "default": "allow", + "allOf": [ + { + "$ref": "#/definitions/PolicyEffect" + } + ] + }, + "enabled": { + "description": "Whether gateway authorization is enabled. When false, gateway endpoints only check authentication (API key validity). When true, policies are evaluated for model access, token limits, etc.", + "default": false, + "type": "boolean" + } + }, + "additionalProperties": false + }, + "GcpCredentials": { + "description": "GCP credential configuration.", + "oneOf": [ + { + "description": "Use Application Default Credentials.", + "type": "object", + "required": [ + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "default" + ] + } + }, + "additionalProperties": false + }, + { + "description": "Use a service account key file.", + "type": "object", + "required": [ + "key_path", + "type" + ], + "properties": { + "key_path": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "service_account" + ] + } + }, + "additionalProperties": false + }, + { + "description": "Use a service account key from JSON string (useful with env vars).", + "type": "object", + "required": [ + "json", + "type" + ], + "properties": { + "json": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "service_account_json" + ] + } + }, + "additionalProperties": false + } + ] + }, + "GuardrailsAction": { + "description": "Action to take when content is flagged by guardrails.", + "oneOf": [ + { + "description": "Block the request/response and return an error.", + "type": "string", + "enum": [ + "block" + ] + }, + { + "description": "Allow but add warning headers to the response.", + "type": "string", + "enum": [ + "warn" + ] + }, + { + "description": "Allow silently but log the violation.", + "type": "string", + "enum": [ + "log" + ] + }, + { + "description": "Replace flagged content with a placeholder.", + "type": "object", + "required": [ + "redact" + ], + "properties": { + "redact": { + "type": "object", + "properties": { + "replacement": { + "description": "Replacement text for redacted content.", + "default": "[REDACTED]", + "type": "string" + } + }, + "additionalProperties": false + } + }, + "additionalProperties": false + }, + { + "description": "Transform/modify the content (provider-specific).", + "type": "string", + "enum": [ + "modify" + ] + } + ] + }, + "GuardrailsApplyTo": { + "description": "Where to apply custom guardrails.", + "oneOf": [ + { + "description": "Apply to input only.", + "type": "string", + "enum": [ + "input" + ] + }, + { + "description": "Apply to output only.", + "type": "string", + "enum": [ + "output" + ] + }, + { + "description": "Apply to both input and output.", + "type": "string", + "enum": [ + "both" + ] + } + ] + }, + "GuardrailsAuditConfig": { + "description": "Audit logging configuration for guardrails events.\n\nControls what guardrails events are logged to the audit log table. Events are logged asynchronously in a fire-and-forget pattern to avoid impacting request latency.\n\n# Example\n\n```toml [features.guardrails.audit] enabled = true log_all_evaluations = false # Only log violations, not all evaluations log_blocked = true # Log blocked requests/responses log_violations = true # Log policy violations log_redacted = true # Log redaction events (hashes, not content) ```", + "type": "object", + "properties": { + "enabled": { + "description": "Enable audit logging for guardrails events.", + "default": true, + "type": "boolean" + }, + "log_all_evaluations": { + "description": "Log all evaluations, not just violations. When false, only violations/blocks are logged. When true, every guardrails evaluation is logged (high volume).", + "default": false, + "type": "boolean" + }, + "log_blocked": { + "description": "Log blocked requests/responses.", + "default": true, + "type": "boolean" + }, + "log_redacted": { + "description": "Log redaction events. Includes content hashes (not actual content) for audit trail.", + "default": true, + "type": "boolean" + }, + "log_violations": { + "description": "Log policy violations (even if not blocked).", + "default": true, + "type": "boolean" + } + }, + "additionalProperties": false + }, + "GuardrailsConfig": { + "description": "Comprehensive guardrails configuration for content filtering, PII detection, and safety enforcement.\n\nGuardrails can be applied to: - **Input (pre-request)**: Evaluate user messages before sending to LLM - **Output (post-response)**: Evaluate LLM responses before returning to user\n\nEach stage can use a different provider and have different action policies.\n\n# Example Configuration\n\n```toml [features.guardrails] enabled = true\n\n[features.guardrails.input] enabled = true mode = \"blocking\"\n\n[features.guardrails.input.provider] type = \"bedrock\" guardrail_id = \"abc123\" guardrail_version = \"1\"\n\n[features.guardrails.input.actions] HATE = \"block\" PROMPT_ATTACK = \"block\" VIOLENCE = \"warn\"\n\n[features.guardrails.output] enabled = true\n\n[features.guardrails.output.provider] type = \"openai_moderation\"\n\n[features.guardrails.pii] enabled = true action = \"redact\" types = [\"EMAIL\", \"PHONE\", \"SSN\"] ```", + "type": "object", + "properties": { + "audit": { + "description": "Audit logging configuration for guardrails events. Controls what guardrails events are logged to the audit log.", + "default": { + "enabled": true, + "log_all_evaluations": false, + "log_blocked": true, + "log_redacted": true, + "log_violations": true + }, + "allOf": [ + { + "$ref": "#/definitions/GuardrailsAuditConfig" + } + ] + }, + "custom": { + "description": "Custom guardrails via external webhook. Use this for bring-your-own guardrails implementations.", + "default": null, + "anyOf": [ + { + "$ref": "#/definitions/CustomGuardrailsConfig" + }, + { + "type": "null" + } + ] + }, + "enabled": { + "description": "Enable guardrails globally.", + "default": true, + "type": "boolean" + }, + "input": { + "description": "Input (pre-request) guardrails configuration. Evaluates user input before sending to the LLM.", + "default": null, + "anyOf": [ + { + "$ref": "#/definitions/InputGuardrailsConfig" + }, + { + "type": "null" + } + ] + }, + "output": { + "description": "Output (post-response) guardrails configuration. Evaluates LLM output before returning to the user.", + "default": null, + "anyOf": [ + { + "$ref": "#/definitions/OutputGuardrailsConfig" + }, + { + "type": "null" + } + ] + }, + "pii": { + "description": "PII detection and handling configuration. Can work independently or in combination with content guardrails.", + "default": null, + "anyOf": [ + { + "$ref": "#/definitions/PiiGuardrailsConfig" + }, + { + "type": "null" + } + ] + } + }, + "additionalProperties": false + }, + "GuardrailsErrorAction": { + "description": "Action when guardrails provider encounters an error.", + "oneOf": [ + { + "description": "Block the request on error (fail-closed).", + "type": "string", + "enum": [ + "block" + ] + }, + { + "description": "Allow the request on error (fail-open).", + "type": "string", + "enum": [ + "allow" + ] + }, + { + "description": "Log the error and allow the request.", + "type": "string", + "enum": [ + "log_and_allow" + ] + } + ] + }, + "GuardrailsExecutionMode": { + "description": "Guardrails execution mode for input evaluation.", + "oneOf": [ + { + "description": "Blocking mode: wait for guardrails evaluation before sending to LLM. This is the safest mode but adds latency.", + "type": "string", + "enum": [ + "blocking" + ] + }, + { + "description": "Concurrent mode: start guardrails evaluation and LLM call simultaneously. If guardrails fail before LLM responds, cancel the LLM request. If LLM responds first, wait for guardrails result before returning. Reduces perceived latency while maintaining safety.", + "type": "string", + "enum": [ + "concurrent" + ] + } + ] + }, + "GuardrailsProvider": { + "description": "Guardrails provider configuration.", + "oneOf": [ + { + "description": "OpenAI Moderation API. Free, fast, good for general content moderation.", + "type": "object", + "required": [ + "type" + ], + "properties": { + "api_key": { + "description": "OpenAI API key. If not provided, uses the default OpenAI provider key.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "base_url": { + "description": "Base URL for the moderation API (default: https://api.openai.com/v1). Useful for proxies or OpenAI-compatible endpoints.", + "default": "https://api.openai.com/v1", + "type": "string" + }, + "model": { + "description": "Model to use (default: text-moderation-latest).", + "default": "text-moderation-latest", + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "openai_moderation" + ] + } + }, + "additionalProperties": false + }, + { + "description": "AWS Bedrock Guardrails. Enterprise-grade with configurable policies, PII detection, and word filters.", + "type": "object", + "required": [ + "guardrail_id", + "guardrail_version", + "type" + ], + "properties": { + "access_key_id": { + "description": "AWS access key ID. If not specified, uses default credentials.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "guardrail_id": { + "description": "Guardrail identifier.", + "type": "string" + }, + "guardrail_version": { + "description": "Guardrail version.", + "type": "string" + }, + "region": { + "description": "AWS region. If not specified, uses default region from environment.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "secret_access_key": { + "description": "AWS secret access key. If not specified, uses default credentials.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "trace_enabled": { + "description": "Enable trace for debugging.", + "default": false, + "type": "boolean" + }, + "type": { + "type": "string", + "enum": [ + "bedrock" + ] + } + }, + "additionalProperties": false + }, + { + "description": "Azure AI Content Safety. Enterprise-grade with configurable severity levels.", + "type": "object", + "required": [ + "api_key", + "endpoint", + "type" + ], + "properties": { + "api_key": { + "description": "Azure API key.", + "type": "string" + }, + "api_version": { + "description": "API version (default: 2024-09-01).", + "default": "2024-09-01", + "type": "string" + }, + "blocklist_names": { + "description": "Enable blocklist checking.", + "default": [], + "type": "array", + "items": { + "type": "string" + } + }, + "endpoint": { + "description": "Azure Content Safety endpoint URL.", + "type": "string" + }, + "thresholds": { + "description": "Severity thresholds per category (0-6, content above threshold is flagged). If not specified, uses Azure defaults.", + "default": {}, + "type": "object", + "additionalProperties": { + "type": "integer", + "format": "uint8", + "minimum": 0.0 + } + }, + "type": { + "type": "string", + "enum": [ + "azure_content_safety" + ] + } + }, + "additionalProperties": false + }, + { + "description": "Built-in blocklist provider. Fast, local pattern matching with no external dependencies.", + "type": "object", + "required": [ + "patterns", + "type" + ], + "properties": { + "case_insensitive": { + "description": "Whether to match patterns case-insensitively (default: true).", + "default": true, + "type": "boolean" + }, + "patterns": { + "description": "List of patterns to match against content.", + "type": "array", + "items": { + "$ref": "#/definitions/BlocklistPattern" + } + }, + "type": { + "type": "string", + "enum": [ + "blocklist" + ] + } + }, + "additionalProperties": false + }, + { + "description": "Built-in regex-based PII detection provider. Fast, local detection of common PII types with no external dependencies.", + "type": "object", + "required": [ + "type" + ], + "properties": { + "credit_card": { + "description": "Detect credit card numbers (with Luhn validation).", + "default": true, + "type": "boolean" + }, + "date_of_birth": { + "description": "Detect dates that may be dates of birth.", + "default": true, + "type": "boolean" + }, + "email": { + "description": "Detect email addresses.", + "default": true, + "type": "boolean" + }, + "ip_address": { + "description": "Detect IP addresses (IPv4 and IPv6).", + "default": true, + "type": "boolean" + }, + "phone": { + "description": "Detect phone numbers (US and international formats).", + "default": true, + "type": "boolean" + }, + "ssn": { + "description": "Detect Social Security Numbers.", + "default": true, + "type": "boolean" + }, + "type": { + "type": "string", + "enum": [ + "pii_regex" + ] + } + }, + "additionalProperties": false + }, + { + "description": "Built-in content limits provider. Enforces size constraints on content with no external dependencies.", + "type": "object", + "required": [ + "type" + ], + "properties": { + "max_characters": { + "description": "Maximum number of characters allowed.", + "default": null, + "type": [ + "integer", + "null" + ], + "format": "uint", + "minimum": 0.0 + }, + "max_lines": { + "description": "Maximum number of lines allowed.", + "default": null, + "type": [ + "integer", + "null" + ], + "format": "uint", + "minimum": 0.0 + }, + "max_words": { + "description": "Maximum number of words allowed.", + "default": null, + "type": [ + "integer", + "null" + ], + "format": "uint", + "minimum": 0.0 + }, + "type": { + "type": "string", + "enum": [ + "content_limits" + ] + } + }, + "additionalProperties": false + }, + { + "description": "Custom HTTP guardrails provider. For bring-your-own guardrails implementations.", + "type": "object", + "required": [ + "type", + "url" + ], + "properties": { + "api_key": { + "description": "API key for authentication.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "headers": { + "description": "Custom headers to include in requests.", + "default": {}, + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "max_retries": { + "description": "Maximum number of retries.", + "default": 2, + "type": "integer", + "format": "uint32", + "minimum": 0.0 + }, + "retry_enabled": { + "description": "Enable retry on failure.", + "default": false, + "type": "boolean" + }, + "timeout_ms": { + "description": "Request timeout in milliseconds.", + "default": 5000, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "type": { + "type": "string", + "enum": [ + "custom" + ] + }, + "url": { + "description": "Guardrails service URL.", + "type": "string" + } + }, + "additionalProperties": false + } + ] + }, + "GuardrailsTimeoutAction": { + "description": "Action when guardrails evaluation times out.", + "oneOf": [ + { + "description": "Block the request on timeout (fail-closed).", + "type": "string", + "enum": [ + "block" + ] + }, + { + "description": "Allow the request on timeout (fail-open). Use only when availability is more important than safety.", + "type": "string", + "enum": [ + "allow" + ] + } + ] + }, + "HashAlgorithm": { + "description": "Hash algorithm for API keys.", + "oneOf": [ + { + "description": "SHA-256 (fast, suitable for high-entropy keys).", + "type": "string", + "enum": [ + "sha256" + ] + }, + { + "description": "Argon2id (slow, more secure if keys might be low-entropy).", + "type": "string", + "enum": [ + "argon2" + ] + } + ] + }, + "HstsConfig": { + "description": "HTTP Strict Transport Security (HSTS) configuration.", + "type": "object", + "properties": { + "enabled": { + "description": "Enable HSTS header.", + "default": true, + "type": "boolean" + }, + "include_subdomains": { + "description": "Include all subdomains in the HSTS policy.", + "default": true, + "type": "boolean" + }, + "max_age_secs": { + "description": "Max age in seconds browsers should remember to only use HTTPS. Default: 31536000 (1 year)", + "default": 31536000, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "preload": { + "description": "Allow preloading into browser HSTS lists. Only enable if you're ready to commit to HTTPS permanently.", + "default": false, + "type": "boolean" + } + }, + "additionalProperties": false + }, + "HttpClientConfig": { + "description": "HTTP client configuration for outbound requests.\n\nControls connection pooling, timeouts, and HTTP/2 settings for requests to LLM providers and other external services.\n\n# Architecture: Single Shared Client\n\nThe gateway uses a single `reqwest::Client` instance shared across all providers. This is efficient because:\n\n- **Per-host connection pooling**: reqwest maintains separate connection pools for each host (api.openai.com, api.anthropic.com, etc.), so providers don't compete for connections.\n\n- **HTTP/2 multiplexing**: With `http2_adaptive_window` enabled, each connection can handle hundreds of concurrent request streams. At 32 idle connections per host, this supports thousands of concurrent requests per provider.\n\n- **Low overhead**: A single client shares DNS cache, TLS session cache, and connection pools, reducing memory and CPU overhead compared to per-provider clients.\n\nFor extreme workloads (10K+ RPS to a single provider), increase `pool_max_idle_per_host`. Per-provider clients would only help if you need different timeout settings per provider or complete resource isolation between providers.", + "type": "object", + "properties": { + "connect_timeout_secs": { + "description": "Connection timeout in seconds. Time allowed to establish a connection to the remote server.", + "default": 10, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "http2_adaptive_window": { + "description": "Enable HTTP/2 adaptive window sizing. Allows the receive window to grow dynamically based on throughput, improving performance for high-bandwidth connections.", + "default": true, + "type": "boolean" + }, + "http2_prior_knowledge": { + "description": "Enable HTTP/2 with prior knowledge (h2c or h2 without ALPN negotiation). Only enable if you know the target servers support HTTP/2. When false (default), HTTP version is negotiated automatically via ALPN.", + "default": false, + "type": "boolean" + }, + "pool_idle_timeout_secs": { + "description": "Idle connection timeout in seconds. Connections idle longer than this are closed.", + "default": 90, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "pool_max_idle_per_host": { + "description": "Maximum idle connections to keep per host. Higher values reduce connection establishment latency for frequently-used providers. Lower values reduce memory usage when connecting to many different hosts.", + "default": 32, + "type": "integer", + "format": "uint", + "minimum": 0.0 + }, + "tcp_keepalive_secs": { + "description": "TCP keepalive interval in seconds. Sends periodic probes to detect dead connections. Set to 0 to disable TCP keepalive.", + "default": 60, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "tcp_nodelay": { + "description": "Enable TCP_NODELAY (disable Nagle's algorithm). Reduces latency for small writes at the cost of slightly higher bandwidth usage.", + "default": true, + "type": "boolean" + }, + "timeout_secs": { + "description": "Request timeout in seconds. This is the total time allowed for a request, including connection and response. Set high enough for long-running LLM completions (streaming responses may take minutes).", + "default": 300, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "user_agent": { + "description": "User-Agent header to send with requests. Some providers require or prefer specific User-Agent values.", + "default": "hadrian/0.0.0-alpha.7", + "type": "string" + }, + "verbose": { + "description": "Enable verbose connection logging for debugging. Logs connection establishment details to help diagnose network issues.", + "default": false, + "type": "boolean" + } + }, + "additionalProperties": false + }, + "ImageFetchingConfig": { + "description": "HTTP image URL fetching configuration.\n\nNon-OpenAI providers (Anthropic, Bedrock, Vertex) only support base64 data URLs for images. When this feature is enabled, HTTP image URLs are automatically fetched and converted to base64 data URLs before being sent to the provider.\n\n# Example Configuration\n\n```toml [features.image_fetching] enabled = true max_size_mb = 20 timeout_secs = 30 allowed_content_types = [\"image/png\", \"image/jpeg\", \"image/gif\", \"image/webp\"] ```", + "type": "object", + "properties": { + "allowed_content_types": { + "description": "Allowed MIME types for fetched images. Empty list means allow all image/* types.", + "default": [ + "image/png", + "image/jpeg", + "image/gif", + "image/webp" + ], + "type": "array", + "items": { + "type": "string" + } + }, + "enabled": { + "description": "Enable HTTP image URL fetching. When disabled, HTTP image URLs will be passed through unchanged (and likely rejected by non-OpenAI providers).", + "default": true, + "type": "boolean" + }, + "max_size_mb": { + "description": "Maximum image size in megabytes. Images larger than this will not be fetched and will cause an error.", + "default": 20, + "type": "integer", + "format": "uint", + "minimum": 0.0 + }, + "timeout_secs": { + "description": "Timeout for fetching images in seconds.", + "default": 30, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + } + }, + "additionalProperties": false + }, + "InputGuardrailsConfig": { + "description": "Input guardrails configuration (pre-request evaluation).", + "type": "object", + "required": [ + "provider" + ], + "properties": { + "actions": { + "description": "Per-category action configuration. Maps category names to actions. Unknown categories use `default_action`.", + "default": {}, + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/GuardrailsAction" + } + }, + "default_action": { + "description": "Default action for categories not specified in `actions`.", + "default": "block", + "allOf": [ + { + "$ref": "#/definitions/GuardrailsAction" + } + ] + }, + "enabled": { + "description": "Enable input guardrails.", + "default": true, + "type": "boolean" + }, + "mode": { + "description": "Execution mode for input guardrails.", + "default": "blocking", + "allOf": [ + { + "$ref": "#/definitions/GuardrailsExecutionMode" + } + ] + }, + "on_error": { + "description": "Behavior when guardrails provider fails (network error, etc.).", + "default": "block", + "allOf": [ + { + "$ref": "#/definitions/GuardrailsErrorAction" + } + ] + }, + "on_timeout": { + "description": "Behavior when guardrails evaluation times out (concurrent mode only).", + "default": "block", + "allOf": [ + { + "$ref": "#/definitions/GuardrailsTimeoutAction" + } + ] + }, + "provider": { + "description": "Guardrails provider to use for input evaluation.", + "allOf": [ + { + "$ref": "#/definitions/GuardrailsProvider" + } + ] + }, + "timeout_ms": { + "description": "Timeout for guardrails evaluation in milliseconds. Only applies to concurrent mode - if evaluation takes longer, the request proceeds based on `on_timeout` setting.", + "default": 5000, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + } + }, + "additionalProperties": false + }, + "IpRateLimitConfig": { + "description": "IP-based rate limiting configuration for unauthenticated traffic.", + "type": "object", + "properties": { + "enabled": { + "description": "Enable IP-based rate limiting for unauthenticated requests.", + "default": true, + "type": "boolean" + }, + "requests_per_hour": { + "description": "Requests per hour per IP address. Provides longer-term protection against sustained abuse.", + "default": null, + "type": [ + "integer", + "null" + ], + "format": "uint32", + "minimum": 0.0 + }, + "requests_per_minute": { + "description": "Requests per minute per IP address.", + "default": 120, + "type": "integer", + "format": "uint32", + "minimum": 0.0 + } + }, + "additionalProperties": false + }, + "LeefVersion": { + "description": "LEEF format version.", + "oneOf": [ + { + "description": "LEEF version 1.0 (original format).", + "type": "string", + "enum": [ + "1.0" + ] + }, + { + "description": "LEEF version 2.0 (with delimiter specification).", + "type": "string", + "enum": [ + "2.0" + ] + } + ] + }, + "LimitsConfig": { + "description": "Default limits configuration.\n\nThese limits are applied when no specific limits are set at the org, project, or user level.", + "type": "object", + "properties": { + "budgets": { + "description": "Budget defaults.", + "default": { + "daily_budget_usd": null, + "estimated_cost_cents": 10, + "monthly_budget_usd": null, + "warning_threshold": 0.8 + }, + "allOf": [ + { + "$ref": "#/definitions/BudgetDefaults" + } + ] + }, + "rate_limits": { + "description": "Rate limiting defaults.", + "default": { + "allow_per_key_above_global": false, + "concurrent_requests": 10, + "estimated_tokens_per_request": 1000, + "ip_rate_limits": { + "enabled": true, + "requests_per_hour": null, + "requests_per_minute": 120 + }, + "requests_per_day": null, + "requests_per_minute": 60, + "tokens_per_day": null, + "tokens_per_minute": 100000, + "window_type": "sliding" + }, + "allOf": [ + { + "$ref": "#/definitions/RateLimitDefaults" + } + ] + }, + "resource_limits": { + "description": "Resource limits for entity counts.", + "default": { + "max_api_keys_per_user": 25, + "max_policies_per_org": 100, + "max_providers_per_user": 10 + }, + "allOf": [ + { + "$ref": "#/definitions/ResourceLimits" + } + ] + } + }, + "additionalProperties": false + }, + "LogFormat": { + "oneOf": [ + { + "description": "Human-readable multi-line format.", + "type": "string", + "enum": [ + "pretty" + ] + }, + { + "description": "Compact single-line format.", + "type": "string", + "enum": [ + "compact" + ] + }, + { + "description": "JSON format (for log aggregation).", + "type": "string", + "enum": [ + "json" + ] + }, + { + "description": "CEF (Common Event Format) for ArcSight, Splunk, and most SIEMs.", + "type": "string", + "enum": [ + "cef" + ] + }, + { + "description": "LEEF (Log Event Extended Format) for IBM QRadar.", + "type": "string", + "enum": [ + "leef" + ] + }, + { + "description": "Syslog (RFC 5424) format for standard syslog servers.", + "type": "string", + "enum": [ + "syslog" + ] + } + ] + }, + "LogLevel": { + "type": "string", + "enum": [ + "trace", + "debug", + "info", + "warn", + "error" + ] + }, + "LoggingConfig": { + "description": "Logging configuration.", + "type": "object", + "properties": { + "file_line": { + "description": "Include file/line information.", + "default": false, + "type": "boolean" + }, + "filter": { + "description": "Filter directives (e.g., \"tower_http=debug,sqlx=warn\").", + "default": null, + "type": [ + "string", + "null" + ] + }, + "format": { + "description": "Log format.", + "default": "compact", + "allOf": [ + { + "$ref": "#/definitions/LogFormat" + } + ] + }, + "include_spans": { + "description": "Include span information for tracing integration.", + "default": true, + "type": "boolean" + }, + "level": { + "description": "Log level.", + "default": "info", + "allOf": [ + { + "$ref": "#/definitions/LogLevel" + } + ] + }, + "siem": { + "description": "SIEM-specific configuration (for CEF, LEEF, Syslog formats).", + "default": { + "app_name": "hadrian", + "device_product": "Gateway", + "device_vendor": "Hadrian", + "device_version": null, + "facility": "local0", + "hostname": null, + "leef_version": "2.0" + }, + "allOf": [ + { + "$ref": "#/definitions/SiemConfig" + } + ] + }, + "timestamps": { + "description": "Include timestamps.", + "default": true, + "type": "boolean" + } + }, + "additionalProperties": false + }, + "LoginConfig": { + "description": "Login page customization.", + "type": "object", + "properties": { + "background_image": { + "description": "Background image URL for the login page.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "show_logo": { + "description": "Whether to show the logo on the login page (defaults to true).", + "default": true, + "type": "boolean" + }, + "subtitle": { + "description": "Subtitle shown below the title (e.g., \"Use your university credentials\").", + "default": null, + "type": [ + "string", + "null" + ] + }, + "title": { + "description": "Custom title for the login page (e.g., \"Sign in to AI Gateway\").", + "default": null, + "type": [ + "string", + "null" + ] + } + }, + "additionalProperties": false + }, + "MetricsConfig": { + "description": "Metrics configuration.", + "type": "object", + "properties": { + "enabled": { + "description": "Enable metrics gathering.", + "default": true, + "type": "boolean" + }, + "latency_buckets_ms": { + "description": "Histogram buckets for latency metrics (in milliseconds).", + "default": [ + 10.0, + 50.0, + 100.0, + 250.0, + 500.0, + 1000.0, + 2500.0, + 5000.0, + 10000.0 + ], + "type": "array", + "items": { + "type": "number", + "format": "double" + } + }, + "otlp": { + "description": "OTLP metrics exporter.", + "default": null, + "anyOf": [ + { + "$ref": "#/definitions/OtlpConfig" + }, + { + "type": "null" + } + ] + }, + "prometheus": { + "description": "Prometheus endpoint configuration.", + "default": null, + "anyOf": [ + { + "$ref": "#/definitions/PrometheusConfig" + }, + { + "type": "null" + } + ] + }, + "prometheus_query_url": { + "description": "Prometheus server URL for querying aggregated metrics (e.g., \"http://prometheus:9090\").\n\nWhen configured, provider statistics are fetched from Prometheus using PromQL queries. This enables accurate metrics aggregation in multi-node deployments where each gateway instance exposes its own /metrics endpoint to Prometheus.\n\nWhen not configured (default), provider statistics are derived from the local /metrics endpoint. This works for single-node deployments but won't show aggregate metrics across multiple gateway instances.\n\nNote: Historical stats (time series data) require Prometheus to be configured.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "token_buckets": { + "description": "Histogram buckets for token counts.", + "default": [ + 10.0, + 50.0, + 100.0, + 500.0, + 1000.0, + 5000.0, + 10000.0, + 50000.0, + 100000.0 + ], + "type": "array", + "items": { + "type": "number", + "format": "double" + } + } + }, + "additionalProperties": false + }, + "ModelCapabilities": { + "description": "Model capabilities extracted from the catalog.", + "type": "object", + "required": [ + "reasoning", + "structured_output", + "temperature", + "tool_call", + "vision" + ], + "properties": { + "reasoning": { + "description": "Whether the model supports reasoning/thinking mode", + "type": "boolean" + }, + "structured_output": { + "description": "Whether the model supports structured output (JSON mode)", + "type": "boolean" + }, + "temperature": { + "description": "Whether the model supports temperature control", + "type": "boolean" + }, + "tool_call": { + "description": "Whether the model supports tool/function calling", + "type": "boolean" + }, + "vision": { + "description": "Whether the model supports image/file attachments (vision)", + "type": "boolean" + } + } + }, + "ModelCatalogConfig": { + "description": "Configuration for the models.dev model catalog.\n\nThe catalog provides per-model metadata including capabilities, pricing, context limits, and modalities. Data is embedded at build time and optionally synced at runtime via a background job.\n\n# Example\n\n```toml [features.model_catalog] enabled = true sync_interval_secs = 1800 api_url = \"https://models.dev/api.json\" ```", + "type": "object", + "properties": { + "api_url": { + "description": "URL to fetch the catalog from.", + "default": "https://models.dev/api.json", + "type": "string" + }, + "enabled": { + "description": "Whether to enable runtime catalog sync. The embedded catalog is always loaded regardless of this setting. This only controls whether the background sync job runs.", + "default": true, + "type": "boolean" + }, + "sync_interval_secs": { + "description": "Interval between sync attempts in seconds.", + "default": 1800, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + } + }, + "additionalProperties": false + }, + "ModelConfig": { + "description": "Unified per-model configuration combining pricing, metadata, and task support.\n\nPricing fields are flattened inline so they can be specified directly: ```toml [providers.openai.models.\"dall-e-3\"] per_image = 40000 modalities = { input = [\"text\"], output = [\"image\"] } tasks = [\"image_generation\"] family = \"dall-e\" ```", + "type": "object", + "properties": { + "cache_write_per_1m_tokens": { + "description": "Cost per 1M cache write tokens", + "type": [ + "integer", + "null" + ], + "format": "int64" + }, + "cached_input_per_1m_tokens": { + "description": "Cost per 1M cached input tokens (for providers that support caching)", + "type": [ + "integer", + "null" + ], + "format": "int64" + }, + "capabilities": { + "description": "Model capabilities (vision, reasoning, tool_call, etc.).", + "anyOf": [ + { + "$ref": "#/definitions/ModelCapabilities" + }, + { + "type": "null" + } + ] + }, + "context_length": { + "description": "Maximum context window size (tokens).", + "type": [ + "integer", + "null" + ], + "format": "int64" + }, + "family": { + "description": "Model family (e.g., \"dall-e\", \"gpt-4\", \"whisper\").", + "type": [ + "string", + "null" + ] + }, + "image_pricing": { + "description": "Per-image cost by quality and size, keyed as `\"quality:size\"` (e.g. `\"hd:1024x1024\"`). Supports wildcards: `\"*:1024x1024\"`, `\"hd:*\"`, `\"*:*\"`. Falls back to `per_image` when no key matches.", + "type": [ + "object", + "null" + ], + "additionalProperties": { + "type": "integer", + "format": "int64" + } + }, + "image_qualities": { + "description": "Supported image quality options for image generation models.", + "type": "array", + "items": { + "type": "string" + } + }, + "image_sizes": { + "description": "Supported image sizes for image generation models.", + "type": "array", + "items": { + "type": "string" + } + }, + "input_per_1m_tokens": { + "description": "Cost per 1M input tokens in microcents (divide by 10000 for cents) Using per-1M to match provider APIs and avoid floating point", + "default": 0, + "type": "integer", + "format": "int64" + }, + "max_images": { + "description": "Maximum number of images per request for image generation models.", + "type": [ + "integer", + "null" + ], + "format": "int64" + }, + "max_output_tokens": { + "description": "Maximum output tokens.", + "type": [ + "integer", + "null" + ], + "format": "int64" + }, + "modalities": { + "description": "Input/output modalities (e.g., text, image, audio).", + "anyOf": [ + { + "$ref": "#/definitions/ModelModalities" + }, + { + "type": "null" + } + ] + }, + "open_weights": { + "description": "Whether the model has open weights.", + "type": [ + "boolean", + "null" + ] + }, + "output_per_1m_tokens": { + "description": "Cost per 1M output tokens in microcents", + "default": 0, + "type": "integer", + "format": "int64" + }, + "per_1m_characters": { + "description": "Cost per 1M characters (for TTS) in microcents Example: tts-1 at $0.015/1K chars = $15/1M chars = 15_000_000 microcents/1M", + "type": [ + "integer", + "null" + ], + "format": "int64" + }, + "per_image": { + "description": "Cost per image (for vision models) in microcents (fallback for image_pricing)", + "type": [ + "integer", + "null" + ], + "format": "int64" + }, + "per_request": { + "description": "Cost per request in microcents (some providers charge per-request)", + "type": [ + "integer", + "null" + ], + "format": "int64" + }, + "per_second": { + "description": "Cost per second of audio (for transcription/translation) in microcents Example: Whisper at $0.006/min = $0.0001/sec = 100 microcents/sec", + "type": [ + "integer", + "null" + ], + "format": "int64" + }, + "reasoning_per_1m_tokens": { + "description": "Cost per 1M internal reasoning tokens (for reasoning models)", + "type": [ + "integer", + "null" + ], + "format": "int64" + }, + "tasks": { + "description": "Supported tasks / API endpoints (e.g., \"chat\", \"image_generation\", \"tts\", \"transcription\", \"translation\", \"embedding\").", + "type": "array", + "items": { + "type": "string" + } + }, + "voices": { + "description": "Available voices for TTS models.", + "type": "array", + "items": { + "type": "string" + } + } + } + }, + "ModelFallback": { + "description": "Model-specific fallback configuration.\n\nSpecifies an alternative model to try when the primary model fails. Can specify a different model on the same provider, or a different provider entirely.", + "type": "object", + "required": [ + "model" + ], + "properties": { + "model": { + "description": "Model name to use for fallback.", + "type": "string" + }, + "provider": { + "description": "Provider name to use. If not specified, uses the same provider.", + "default": null, + "type": [ + "string", + "null" + ] + } + }, + "additionalProperties": false + }, + "ModelModalities": { + "description": "Model modalities from the catalog.", + "type": "object", + "required": [ + "input", + "output" + ], + "properties": { + "input": { + "description": "Supported input modalities (e.g., \"text\", \"image\", \"audio\")", + "type": "array", + "items": { + "type": "string" + } + }, + "output": { + "description": "Supported output modalities (e.g., \"text\", \"audio\")", + "type": "array", + "items": { + "type": "string" + } + } + } + }, + "ModelPricing": { + "description": "Pricing information for a specific model.\n\nCosts are stored in microcents (1/10000 of a cent) for precision. For example, $0.000002 per token = 0.0002 cents = 0.02 microcents.\n\nThis allows representing very small costs like: - OpenRouter's Gemini 3 Pro: $0.000002/token = 0.02 microcents - Cache read pricing: $0.0000002/token = 0.002 microcents", + "type": "object", + "properties": { + "cache_write_per_1m_tokens": { + "description": "Cost per 1M cache write tokens", + "type": [ + "integer", + "null" + ], + "format": "int64" + }, + "cached_input_per_1m_tokens": { + "description": "Cost per 1M cached input tokens (for providers that support caching)", + "type": [ + "integer", + "null" + ], + "format": "int64" + }, + "image_pricing": { + "description": "Per-image cost by quality and size, keyed as `\"quality:size\"` (e.g. `\"hd:1024x1024\"`). Supports wildcards: `\"*:1024x1024\"`, `\"hd:*\"`, `\"*:*\"`. Falls back to `per_image` when no key matches.", + "type": [ + "object", + "null" + ], + "additionalProperties": { + "type": "integer", + "format": "int64" + } + }, + "input_per_1m_tokens": { + "description": "Cost per 1M input tokens in microcents (divide by 10000 for cents) Using per-1M to match provider APIs and avoid floating point", + "default": 0, + "type": "integer", + "format": "int64" + }, + "output_per_1m_tokens": { + "description": "Cost per 1M output tokens in microcents", + "default": 0, + "type": "integer", + "format": "int64" + }, + "per_1m_characters": { + "description": "Cost per 1M characters (for TTS) in microcents Example: tts-1 at $0.015/1K chars = $15/1M chars = 15_000_000 microcents/1M", + "type": [ + "integer", + "null" + ], + "format": "int64" + }, + "per_image": { + "description": "Cost per image (for vision models) in microcents (fallback for image_pricing)", + "type": [ + "integer", + "null" + ], + "format": "int64" + }, + "per_request": { + "description": "Cost per request in microcents (some providers charge per-request)", + "type": [ + "integer", + "null" + ], + "format": "int64" + }, + "per_second": { + "description": "Cost per second of audio (for transcription/translation) in microcents Example: Whisper at $0.006/min = $0.0001/sec = 100 microcents/sec", + "type": [ + "integer", + "null" + ], + "format": "int64" + }, + "reasoning_per_1m_tokens": { + "description": "Cost per 1M internal reasoning tokens (for reasoning models)", + "type": [ + "integer", + "null" + ], + "format": "int64" + } + } + }, + "ObservabilityConfig": { + "description": "Observability configuration.", + "type": "object", + "properties": { + "dead_letter_queue": { + "description": "Dead-letter queue for failed operations (usage logging, etc.).", + "default": null, + "anyOf": [ + { + "$ref": "#/definitions/DeadLetterQueueConfig" + }, + { + "type": "null" + } + ] + }, + "logging": { + "description": "Logging configuration.", + "default": { + "file_line": false, + "filter": null, + "format": "compact", + "include_spans": true, + "level": "info", + "siem": { + "app_name": "hadrian", + "device_product": "Gateway", + "device_vendor": "Hadrian", + "device_version": null, + "facility": "local0", + "hostname": null, + "leef_version": "2.0" + }, + "timestamps": true + }, + "allOf": [ + { + "$ref": "#/definitions/LoggingConfig" + } + ] + }, + "metrics": { + "description": "Metrics configuration.", + "default": { + "enabled": true, + "latency_buckets_ms": [ + 10.0, + 50.0, + 100.0, + 250.0, + 500.0, + 1000.0, + 2500.0, + 5000.0, + 10000.0 + ], + "otlp": null, + "prometheus": null, + "prometheus_query_url": null, + "token_buckets": [ + 10.0, + 50.0, + 100.0, + 500.0, + 1000.0, + 5000.0, + 10000.0, + 50000.0, + 100000.0 + ] + }, + "allOf": [ + { + "$ref": "#/definitions/MetricsConfig" + } + ] + }, + "response_validation": { + "description": "Response schema validation configuration. Validates API responses against the OpenAI OpenAPI specification.", + "default": { + "enabled": false, + "mode": "warn" + }, + "allOf": [ + { + "$ref": "#/definitions/ResponseValidationConfig" + } + ] + }, + "tracing": { + "description": "Tracing configuration (OpenTelemetry).", + "default": { + "enabled": false, + "environment": null, + "otlp": null, + "propagation": "trace_context", + "resource_attributes": {}, + "sampling": { + "rate": 1.0, + "strategy": "always_on" + }, + "service_name": "", + "service_version": null + }, + "allOf": [ + { + "$ref": "#/definitions/TracingConfig" + } + ] + }, + "usage": { + "description": "Usage logging configuration.", + "default": { + "buffer": { + "flush_interval_ms": 1000, + "max_pending_entries": 10000, + "max_size": 1000 + }, + "database": true, + "otlp": null + }, + "allOf": [ + { + "$ref": "#/definitions/UsageConfig" + } + ] + } + }, + "additionalProperties": false + }, + "OneOrMany_for_String": { + "description": "A value that can be either a single item or a list.", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "OtlpConfig": { + "description": "OTLP exporter configuration.", + "type": "object", + "required": [ + "endpoint" + ], + "properties": { + "compression": { + "description": "Enable compression.", + "default": true, + "type": "boolean" + }, + "endpoint": { + "description": "OTLP endpoint URL.", + "type": "string" + }, + "headers": { + "description": "Headers to include (e.g., for authentication).", + "default": {}, + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "protocol": { + "description": "Protocol (grpc or http).", + "default": "grpc", + "allOf": [ + { + "$ref": "#/definitions/OtlpProtocol" + } + ] + }, + "timeout_secs": { + "description": "Timeout in seconds.", + "default": 10, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + } + }, + "additionalProperties": false + }, + "OtlpProtocol": { + "type": "string", + "enum": [ + "grpc", + "http" + ] + }, + "OutputGuardrailsConfig": { + "description": "Output guardrails configuration (post-response evaluation).", + "type": "object", + "required": [ + "provider" + ], + "properties": { + "actions": { + "description": "Per-category action configuration.", + "default": {}, + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/GuardrailsAction" + } + }, + "default_action": { + "description": "Default action for categories not specified in `actions`.", + "default": "block", + "allOf": [ + { + "$ref": "#/definitions/GuardrailsAction" + } + ] + }, + "enabled": { + "description": "Enable output guardrails.", + "default": true, + "type": "boolean" + }, + "on_error": { + "description": "Behavior when guardrails provider fails.", + "default": "block", + "allOf": [ + { + "$ref": "#/definitions/GuardrailsErrorAction" + } + ] + }, + "provider": { + "description": "Guardrails provider to use for output evaluation.", + "allOf": [ + { + "$ref": "#/definitions/GuardrailsProvider" + } + ] + }, + "streaming_mode": { + "description": "Streaming evaluation mode. Controls how output is evaluated during streaming responses.", + "default": { + "buffered": { + "buffer_tokens": 100 + } + }, + "allOf": [ + { + "$ref": "#/definitions/StreamingGuardrailsMode" + } + ] + }, + "timeout_ms": { + "description": "Timeout for guardrails evaluation in milliseconds.", + "default": 5000, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + } + }, + "additionalProperties": false + }, + "PgvectorIndexType": { + "description": "Index type for pgvector.", + "oneOf": [ + { + "description": "IVFFlat index - faster to build, good for moderate dataset sizes.", + "type": "string", + "enum": [ + "ivf_flat" + ] + }, + { + "description": "HNSW index - better query performance, slower to build.", + "type": "string", + "enum": [ + "hnsw" + ] + } + ] + }, + "PiiAction": { + "description": "Action to take when PII is detected.", + "oneOf": [ + { + "description": "Block the request/response containing PII.", + "type": "string", + "enum": [ + "block" + ] + }, + { + "description": "Redact PII with placeholder text.", + "type": "string", + "enum": [ + "redact" + ] + }, + { + "description": "Anonymize PII (replace with consistent fake values).", + "type": "string", + "enum": [ + "anonymize" + ] + }, + { + "description": "Log PII detection but allow the content through.", + "type": "string", + "enum": [ + "log" + ] + } + ] + }, + "PiiApplyTo": { + "description": "Where to apply PII detection.", + "oneOf": [ + { + "description": "Apply to input only.", + "type": "string", + "enum": [ + "input" + ] + }, + { + "description": "Apply to output only.", + "type": "string", + "enum": [ + "output" + ] + }, + { + "description": "Apply to both input and output.", + "type": "string", + "enum": [ + "both" + ] + } + ] + }, + "PiiGuardrailsConfig": { + "description": "PII detection and handling configuration.", + "type": "object", + "properties": { + "action": { + "description": "Action to take when PII is detected.", + "default": "redact", + "allOf": [ + { + "$ref": "#/definitions/PiiAction" + } + ] + }, + "apply_to": { + "description": "Apply to input, output, or both.", + "default": "both", + "allOf": [ + { + "$ref": "#/definitions/PiiApplyTo" + } + ] + }, + "enabled": { + "description": "Enable PII detection.", + "default": true, + "type": "boolean" + }, + "provider": { + "description": "Provider for PII detection (if not using the main guardrails provider). If not specified, uses the provider from input/output guardrails config.", + "default": null, + "anyOf": [ + { + "$ref": "#/definitions/PiiProvider" + }, + { + "type": "null" + } + ] + }, + "replacement": { + "description": "Custom replacement text for redaction. Only used when action is `redact`.", + "default": "[PII REDACTED]", + "type": "string" + }, + "types": { + "description": "PII types to detect. Common types: EMAIL, PHONE, SSN, CREDIT_CARD, ADDRESS, NAME, DATE_OF_BIRTH", + "default": [ + "EMAIL", + "PHONE", + "SSN", + "CREDIT_CARD" + ], + "type": "array", + "items": { + "$ref": "#/definitions/PiiType" + } + } + }, + "additionalProperties": false + }, + "PiiProvider": { + "description": "PII detection provider (if not using main guardrails provider).", + "oneOf": [ + { + "description": "Use AWS Bedrock Guardrails for PII detection.", + "type": "object", + "required": [ + "guardrail_id", + "guardrail_version", + "type" + ], + "properties": { + "guardrail_id": { + "type": "string" + }, + "guardrail_version": { + "type": "string" + }, + "region": { + "default": null, + "type": [ + "string", + "null" + ] + }, + "type": { + "type": "string", + "enum": [ + "bedrock" + ] + } + }, + "additionalProperties": false + }, + { + "description": "Use a regex-based local PII detector.", + "type": "object", + "required": [ + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "regex" + ] + } + }, + "additionalProperties": false + }, + { + "description": "Use a custom PII detection service.", + "type": "object", + "required": [ + "type", + "url" + ], + "properties": { + "api_key": { + "default": null, + "type": [ + "string", + "null" + ] + }, + "type": { + "type": "string", + "enum": [ + "custom" + ] + }, + "url": { + "type": "string" + } + }, + "additionalProperties": false + } + ] + }, + "PiiType": { + "description": "PII types for detection.", + "oneOf": [ + { + "type": "string", + "enum": [ + "EMAIL", + "PHONE", + "SSN", + "CREDIT_CARD", + "ADDRESS", + "NAME", + "DATE_OF_BIRTH", + "DRIVERS_LICENSE", + "PASSPORT", + "BANK_ACCOUNT", + "IP_ADDRESS", + "MAC_ADDRESS", + "URL", + "USERNAME", + "PASSWORD", + "AWS_ACCESS_KEY", + "AWS_SECRET_KEY", + "API_KEY" + ] + }, + { + "description": "Custom PII type (provider-specific).", + "type": "object", + "required": [ + "CUSTOM" + ], + "properties": { + "CUSTOM": { + "type": "string" + } + }, + "additionalProperties": false + } + ] + }, + "PolicyConfig": { + "description": "A policy for fine-grained access control.", + "type": "object", + "required": [ + "condition", + "effect", + "name" + ], + "properties": { + "action": { + "description": "Action this policy applies to (e.g., \"read\", \"create\", \"*\").", + "default": "*", + "type": "string" + }, + "condition": { + "description": "CEL expression that must evaluate to true for the policy to apply.\n\nAvailable variables for all endpoints: - `subject.user_id`: User's internal ID - `subject.external_id`: User's IdP ID - `subject.email`: User's email - `subject.roles`: List of role names - `subject.org_ids`: List of organization IDs the user belongs to - `subject.team_ids`: List of team IDs the user belongs to - `subject.project_ids`: List of project IDs the user belongs to - `context.resource_type`: Resource being accessed (e.g., \"model\", \"chat\", \"team\") - `context.action`: Action being performed (e.g., \"use\", \"read\", \"create\") - `context.org_id`: Organization ID scope (if applicable) - `context.team_id`: Team ID scope (if applicable) - `context.project_id`: Project ID scope (if applicable) - `context.resource_id`: Specific resource ID being accessed\n\nAdditional variables for API endpoints (`/v1/*`): - `context.model`: Model being requested (e.g., \"gpt-4o\", \"claude-3-opus\") - `context.request.max_tokens`: Maximum tokens requested - `context.request.messages_count`: Number of messages in conversation - `context.request.has_tools`: Whether tools/functions are being used - `context.request.has_file_search`: Whether file_search tool is present - `context.request.stream`: Whether streaming is requested - `context.now.hour`: Current hour (0-23) - `context.now.day_of_week`: Day of week (1=Monday, 7=Sunday) - `context.now.timestamp`: Unix timestamp", + "type": "string" + }, + "description": { + "description": "Human-readable description.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "effect": { + "description": "Whether this policy allows or denies the action.", + "allOf": [ + { + "$ref": "#/definitions/PolicyEffect" + } + ] + }, + "name": { + "description": "Unique name for this policy.", + "type": "string" + }, + "priority": { + "description": "Priority for evaluation order (higher = evaluated first). At the same priority, deny policies are evaluated before allow.", + "default": 0, + "type": "integer", + "format": "int32" + }, + "resource": { + "description": "Resource type this policy applies to (e.g., \"organization\", \"project\", \"*\").", + "default": "*", + "type": "string" + } + }, + "additionalProperties": false + }, + "PolicyEffect": { + "description": "Policy effect (allow or deny).", + "type": "string", + "enum": [ + "allow", + "deny" + ] + }, + "PostgresSslMode": { + "description": "PostgreSQL SSL mode.", + "oneOf": [ + { + "description": "No SSL.", + "type": "string", + "enum": [ + "disable" + ] + }, + { + "description": "Try SSL, fall back to non-SSL.", + "type": "string", + "enum": [ + "prefer" + ] + }, + { + "description": "Require SSL.", + "type": "string", + "enum": [ + "require" + ] + }, + { + "description": "Require SSL and verify server certificate.", + "type": "string", + "enum": [ + "verify_ca" + ] + }, + { + "description": "Require SSL and verify server certificate and hostname.", + "type": "string", + "enum": [ + "verify_full" + ] + } + ] + }, + "PricingConfig": { + "description": "Pricing configuration for all providers and models", + "type": "object", + "properties": { + "cost_source": { + "description": "Cost source preference for usage tracking", + "default": "prefer_provider", + "allOf": [ + { + "$ref": "#/definitions/CostSource" + } + ] + }, + "pricing": { + "description": "Pricing by provider and model Structure: pricing[provider][model] = ModelPricing", + "default": {}, + "type": "object", + "additionalProperties": { + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/ModelPricing" + } + } + } + } + }, + "PrometheusConfig": { + "description": "Prometheus configuration.", + "type": "object", + "properties": { + "enabled": { + "description": "Enable Prometheus endpoint.", + "default": true, + "type": "boolean" + }, + "path": { + "description": "Path for the metrics endpoint.", + "default": "/metrics", + "type": "string" + }, + "process_metrics": { + "description": "Include default process metrics.", + "default": true, + "type": "boolean" + } + }, + "additionalProperties": false + }, + "PropagationFormat": { + "oneOf": [ + { + "description": "W3C Trace Context.", + "type": "string", + "enum": [ + "trace_context" + ] + }, + { + "description": "B3 (Zipkin).", + "type": "string", + "enum": [ + "b3" + ] + }, + { + "description": "Jaeger.", + "type": "string", + "enum": [ + "jaeger" + ] + }, + { + "description": "Multiple formats.", + "type": "string", + "enum": [ + "multi" + ] + } + ] + }, + "ProviderConfig": { + "description": "Configuration for a single provider.\n\nThe `type` field determines which API protocol to use. Some providers require specific features to be enabled: - `bedrock` requires the `provider-bedrock` feature - `vertex` requires the `provider-vertex` feature - `azure_openai` requires the `provider-azure` feature", + "oneOf": [ + { + "description": "OpenAI API (also works for OpenAI-compatible providers like OpenRouter, Together, Groq, and local servers like Ollama/vLLM).", + "type": "object", + "required": [ + "type" + ], + "properties": { + "allowed_models": { + "description": "Models available through this provider. If empty, all models are allowed.", + "default": [], + "type": "array", + "items": { + "type": "string" + } + }, + "api_key": { + "description": "API key. Required for OpenAI and most hosted providers. Optional for local servers like Ollama.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "base_url": { + "description": "Base URL for the API.", + "default": "https://api.openai.com/v1", + "type": "string" + }, + "catalog_provider": { + "description": "Override the catalog provider ID for model enrichment. If not specified, the provider is auto-detected from the base URL. Use this for OpenAI-compatible providers that aren't auto-detected.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "circuit_breaker": { + "description": "Circuit breaker configuration for unhealthy provider protection.", + "default": { + "backoff_multiplier": 2.0, + "enabled": false, + "failure_status_codes": [ + 500, + 502, + 503, + 504 + ], + "failure_threshold": 5, + "max_open_timeout_secs": 300, + "open_timeout_secs": 30, + "success_threshold": 2 + }, + "allOf": [ + { + "$ref": "#/definitions/CircuitBreakerConfig" + } + ] + }, + "fallback_providers": { + "description": "Fallback providers to try when this provider fails. Providers are tried in order on retryable errors (5xx, timeout, circuit breaker open).", + "default": [], + "type": "array", + "items": { + "type": "string" + } + }, + "headers": { + "description": "Custom headers to include in requests. Useful for provider-specific headers like OpenRouter's HTTP-Referer.", + "default": {}, + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "health_check": { + "description": "Health check configuration for proactive provider monitoring.", + "default": { + "enabled": false, + "interval_secs": 60, + "mode": "reachability", + "timeout_secs": 10 + }, + "allOf": [ + { + "$ref": "#/definitions/ProviderHealthCheckConfig" + } + ] + }, + "model_aliases": { + "description": "Model aliases (e.g., \"gpt4\" -> \"gpt-4-turbo-preview\").", + "default": {}, + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "model_fallbacks": { + "description": "Model-specific fallback configurations. Model fallbacks are tried before provider-level fallbacks.", + "default": {}, + "type": "object", + "additionalProperties": { + "type": "array", + "items": { + "$ref": "#/definitions/ModelFallback" + } + } + }, + "models": { + "description": "Per-model configuration (pricing, modalities, tasks, metadata). If set, overrides default pricing and adds metadata for these models.", + "default": {}, + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/ModelConfig" + } + }, + "organization": { + "description": "Organization ID (OpenAI-specific).", + "default": null, + "type": [ + "string", + "null" + ] + }, + "project": { + "description": "Project ID (OpenAI-specific, for project-based access).", + "default": null, + "type": [ + "string", + "null" + ] + }, + "retry": { + "description": "Retry configuration for transient failures.", + "default": { + "backoff_multiplier": 2.0, + "enabled": true, + "initial_delay_ms": 100, + "jitter": 0.1, + "max_delay_ms": 10000, + "max_retries": 3, + "retryable_status_codes": [ + 429, + 500, + 502, + 503, + 504 + ] + }, + "allOf": [ + { + "$ref": "#/definitions/RetryConfig" + } + ] + }, + "supports_tools": { + "description": "Whether this provider supports function/tool calling.", + "default": false, + "type": "boolean" + }, + "supports_vision": { + "description": "Whether this provider supports vision/image inputs.", + "default": false, + "type": "boolean" + }, + "timeout_secs": { + "description": "Request timeout in seconds.", + "default": 300, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "type": { + "type": "string", + "enum": [ + "open_ai" + ] + } + } + }, + { + "description": "Anthropic API.", + "type": "object", + "required": [ + "api_key", + "type" + ], + "properties": { + "allowed_models": { + "description": "Models available through this provider.", + "default": [], + "type": "array", + "items": { + "type": "string" + } + }, + "api_key": { + "description": "API key (required).", + "type": "string" + }, + "base_url": { + "description": "Base URL override.", + "default": "https://api.anthropic.com", + "type": "string" + }, + "catalog_provider": { + "description": "Override the catalog provider ID for model enrichment. Defaults to \"anthropic\".", + "default": null, + "type": [ + "string", + "null" + ] + }, + "circuit_breaker": { + "description": "Circuit breaker configuration for unhealthy provider protection.", + "default": { + "backoff_multiplier": 2.0, + "enabled": false, + "failure_status_codes": [ + 500, + 502, + 503, + 504 + ], + "failure_threshold": 5, + "max_open_timeout_secs": 300, + "open_timeout_secs": 30, + "success_threshold": 2 + }, + "allOf": [ + { + "$ref": "#/definitions/CircuitBreakerConfig" + } + ] + }, + "default_max_tokens": { + "description": "Default max_tokens to use if not specified in requests.", + "default": null, + "type": [ + "integer", + "null" + ], + "format": "uint32", + "minimum": 0.0 + }, + "default_model": { + "description": "Default model to use if not specified in requests.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "fallback_providers": { + "description": "Fallback providers to try when this provider fails.", + "default": [], + "type": "array", + "items": { + "type": "string" + } + }, + "health_check": { + "description": "Health check configuration for proactive provider monitoring.", + "default": { + "enabled": false, + "interval_secs": 60, + "mode": "reachability", + "timeout_secs": 10 + }, + "allOf": [ + { + "$ref": "#/definitions/ProviderHealthCheckConfig" + } + ] + }, + "model_aliases": { + "description": "Model aliases.", + "default": {}, + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "model_fallbacks": { + "description": "Model-specific fallback configurations.", + "default": {}, + "type": "object", + "additionalProperties": { + "type": "array", + "items": { + "$ref": "#/definitions/ModelFallback" + } + } + }, + "models": { + "description": "Per-model configuration (pricing, modalities, tasks, metadata).", + "default": {}, + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/ModelConfig" + } + }, + "retry": { + "description": "Retry configuration for transient failures.", + "default": { + "backoff_multiplier": 2.0, + "enabled": true, + "initial_delay_ms": 100, + "jitter": 0.1, + "max_delay_ms": 10000, + "max_retries": 3, + "retryable_status_codes": [ + 429, + 500, + 502, + 503, + 504 + ] + }, + "allOf": [ + { + "$ref": "#/definitions/RetryConfig" + } + ] + }, + "streaming_buffer": { + "description": "Streaming buffer limits for DoS protection.", + "default": { + "max_input_buffer_bytes": 16777216, + "max_output_buffer_chunks": 1000 + }, + "allOf": [ + { + "$ref": "#/definitions/StreamingBufferConfig" + } + ] + }, + "timeout_secs": { + "description": "Request timeout in seconds.", + "default": 300, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "type": { + "type": "string", + "enum": [ + "anthropic" + ] + } + } + }, + { + "description": "AWS Bedrock. Requires the `provider-bedrock` feature.", + "type": "object", + "required": [ + "region", + "type" + ], + "properties": { + "allowed_models": { + "description": "Models available through this provider.", + "default": [], + "type": "array", + "items": { + "type": "string" + } + }, + "catalog_provider": { + "description": "Override the catalog provider ID for model enrichment. Defaults to \"amazon-bedrock\".", + "default": null, + "type": [ + "string", + "null" + ] + }, + "circuit_breaker": { + "description": "Circuit breaker configuration for unhealthy provider protection.", + "default": { + "backoff_multiplier": 2.0, + "enabled": false, + "failure_status_codes": [ + 500, + 502, + 503, + 504 + ], + "failure_threshold": 5, + "max_open_timeout_secs": 300, + "open_timeout_secs": 30, + "success_threshold": 2 + }, + "allOf": [ + { + "$ref": "#/definitions/CircuitBreakerConfig" + } + ] + }, + "converse_base_url": { + "description": "Custom Converse API base URL override. If not specified, defaults to `https://bedrock-runtime..amazonaws.com`. This is useful for VPC endpoints, testing, or custom deployments.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "credentials": { + "description": "Credential source.", + "default": { + "type": "default" + }, + "allOf": [ + { + "$ref": "#/definitions/AwsCredentials" + } + ] + }, + "fallback_providers": { + "description": "Fallback providers to try when this provider fails.", + "default": [], + "type": "array", + "items": { + "type": "string" + } + }, + "health_check": { + "description": "Health check configuration for proactive provider monitoring.", + "default": { + "enabled": false, + "interval_secs": 60, + "mode": "reachability", + "timeout_secs": 10 + }, + "allOf": [ + { + "$ref": "#/definitions/ProviderHealthCheckConfig" + } + ] + }, + "inference_profile_arn": { + "description": "Cross-region inference profile ARN (for multi-region routing).", + "default": null, + "type": [ + "string", + "null" + ] + }, + "model_aliases": { + "description": "Model aliases.", + "default": {}, + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "model_fallbacks": { + "description": "Model-specific fallback configurations.", + "default": {}, + "type": "object", + "additionalProperties": { + "type": "array", + "items": { + "$ref": "#/definitions/ModelFallback" + } + } + }, + "models": { + "description": "Per-model configuration (pricing, modalities, tasks, metadata).", + "default": {}, + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/ModelConfig" + } + }, + "region": { + "description": "AWS region.", + "type": "string" + }, + "retry": { + "description": "Retry configuration for transient failures.", + "default": { + "backoff_multiplier": 2.0, + "enabled": true, + "initial_delay_ms": 100, + "jitter": 0.1, + "max_delay_ms": 10000, + "max_retries": 3, + "retryable_status_codes": [ + 429, + 500, + 502, + 503, + 504 + ] + }, + "allOf": [ + { + "$ref": "#/definitions/RetryConfig" + } + ] + }, + "streaming_buffer": { + "description": "Streaming buffer limits for DoS protection.", + "default": { + "max_input_buffer_bytes": 16777216, + "max_output_buffer_chunks": 1000 + }, + "allOf": [ + { + "$ref": "#/definitions/StreamingBufferConfig" + } + ] + }, + "timeout_secs": { + "description": "Request timeout in seconds.", + "default": 300, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "type": { + "type": "string", + "enum": [ + "bedrock" + ] + } + } + }, + { + "description": "Google Vertex AI. Requires the `provider-vertex` feature.", + "type": "object", + "required": [ + "type" + ], + "properties": { + "allowed_models": { + "description": "Models available through this provider.", + "default": [], + "type": "array", + "items": { + "type": "string" + } + }, + "api_key": { + "description": "API key for simple Gemini API access. When set, uses `https://aiplatform.googleapis.com/v1/publishers/{publisher}/models` with `?key=` query parameter authentication. Mutually exclusive with project/region/credentials.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "base_url": { + "description": "Custom base URL override. Useful for VPC endpoints, testing, or custom deployments. If not specified, defaults based on auth mode: - API key: `https://aiplatform.googleapis.com` - OAuth: `https://{region}-aiplatform.googleapis.com`", + "default": null, + "type": [ + "string", + "null" + ] + }, + "catalog_provider": { + "description": "Override the catalog provider ID for model enrichment. Defaults to \"google-vertex\".", + "default": null, + "type": [ + "string", + "null" + ] + }, + "circuit_breaker": { + "description": "Circuit breaker configuration for unhealthy provider protection.", + "default": { + "backoff_multiplier": 2.0, + "enabled": false, + "failure_status_codes": [ + 500, + 502, + 503, + 504 + ], + "failure_threshold": 5, + "max_open_timeout_secs": 300, + "open_timeout_secs": 30, + "success_threshold": 2 + }, + "allOf": [ + { + "$ref": "#/definitions/CircuitBreakerConfig" + } + ] + }, + "credentials": { + "description": "Credential source for OAuth/ADC mode. Ignored with api_key.", + "default": { + "type": "default" + }, + "allOf": [ + { + "$ref": "#/definitions/GcpCredentials" + } + ] + }, + "fallback_providers": { + "description": "Fallback providers to try when this provider fails.", + "default": [], + "type": "array", + "items": { + "type": "string" + } + }, + "health_check": { + "description": "Health check configuration for proactive provider monitoring.", + "default": { + "enabled": false, + "interval_secs": 60, + "mode": "reachability", + "timeout_secs": 10 + }, + "allOf": [ + { + "$ref": "#/definitions/ProviderHealthCheckConfig" + } + ] + }, + "model_aliases": { + "description": "Model aliases.", + "default": {}, + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "model_fallbacks": { + "description": "Model-specific fallback configurations.", + "default": {}, + "type": "object", + "additionalProperties": { + "type": "array", + "items": { + "$ref": "#/definitions/ModelFallback" + } + } + }, + "models": { + "description": "Per-model configuration (pricing, modalities, tasks, metadata).", + "default": {}, + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/ModelConfig" + } + }, + "project": { + "description": "GCP project ID. Required for OAuth/ADC mode, ignored with api_key.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "publisher": { + "description": "Model publisher. Defaults to \"google\". Use \"anthropic\" for Claude models, \"meta\" for Llama models on Vertex AI.", + "default": "google", + "type": "string" + }, + "region": { + "description": "GCP region. Required for OAuth/ADC mode, ignored with api_key.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "retry": { + "description": "Retry configuration for transient failures.", + "default": { + "backoff_multiplier": 2.0, + "enabled": true, + "initial_delay_ms": 100, + "jitter": 0.1, + "max_delay_ms": 10000, + "max_retries": 3, + "retryable_status_codes": [ + 429, + 500, + 502, + 503, + 504 + ] + }, + "allOf": [ + { + "$ref": "#/definitions/RetryConfig" + } + ] + }, + "streaming_buffer": { + "description": "Streaming buffer limits for DoS protection.", + "default": { + "max_input_buffer_bytes": 16777216, + "max_output_buffer_chunks": 1000 + }, + "allOf": [ + { + "$ref": "#/definitions/StreamingBufferConfig" + } + ] + }, + "timeout_secs": { + "description": "Request timeout in seconds.", + "default": 300, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "type": { + "type": "string", + "enum": [ + "vertex" + ] + } + } + }, + { + "description": "Azure OpenAI. Requires the `provider-azure` feature.", + "type": "object", + "required": [ + "auth", + "resource_name", + "type" + ], + "properties": { + "allowed_models": { + "description": "Models available through this provider.", + "default": [], + "type": "array", + "items": { + "type": "string" + } + }, + "api_version": { + "description": "API version.", + "default": "2024-02-01", + "type": "string" + }, + "auth": { + "description": "Authentication method.", + "allOf": [ + { + "$ref": "#/definitions/AzureAuth" + } + ] + }, + "catalog_provider": { + "description": "Override the catalog provider ID for model enrichment. Defaults to \"azure\".", + "default": null, + "type": [ + "string", + "null" + ] + }, + "circuit_breaker": { + "description": "Circuit breaker configuration for unhealthy provider protection.", + "default": { + "backoff_multiplier": 2.0, + "enabled": false, + "failure_status_codes": [ + 500, + 502, + 503, + 504 + ], + "failure_threshold": 5, + "max_open_timeout_secs": 300, + "open_timeout_secs": 30, + "success_threshold": 2 + }, + "allOf": [ + { + "$ref": "#/definitions/CircuitBreakerConfig" + } + ] + }, + "deployments": { + "description": "Deployment configurations. Maps deployment ID to model info for routing.", + "default": {}, + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/AzureDeployment" + } + }, + "fallback_providers": { + "description": "Fallback providers to try when this provider fails.", + "default": [], + "type": "array", + "items": { + "type": "string" + } + }, + "health_check": { + "description": "Health check configuration for proactive provider monitoring.", + "default": { + "enabled": false, + "interval_secs": 60, + "mode": "reachability", + "timeout_secs": 10 + }, + "allOf": [ + { + "$ref": "#/definitions/ProviderHealthCheckConfig" + } + ] + }, + "model_aliases": { + "description": "Model aliases.", + "default": {}, + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "model_fallbacks": { + "description": "Model-specific fallback configurations.", + "default": {}, + "type": "object", + "additionalProperties": { + "type": "array", + "items": { + "$ref": "#/definitions/ModelFallback" + } + } + }, + "models": { + "description": "Per-model configuration (pricing, modalities, tasks, metadata).", + "default": {}, + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/ModelConfig" + } + }, + "resource_name": { + "description": "Azure resource name.", + "type": "string" + }, + "retry": { + "description": "Retry configuration for transient failures.", + "default": { + "backoff_multiplier": 2.0, + "enabled": true, + "initial_delay_ms": 100, + "jitter": 0.1, + "max_delay_ms": 10000, + "max_retries": 3, + "retryable_status_codes": [ + 429, + 500, + 502, + 503, + 504 + ] + }, + "allOf": [ + { + "$ref": "#/definitions/RetryConfig" + } + ] + }, + "timeout_secs": { + "description": "Request timeout in seconds.", + "default": 300, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "type": { + "type": "string", + "enum": [ + "azure_open_ai" + ] + } + } + }, + { + "description": "Test provider (mock responses, no API calls).", + "type": "object", + "required": [ + "type" + ], + "properties": { + "allowed_models": { + "description": "Models available through this provider. If empty, all models are allowed.", + "default": [], + "type": "array", + "items": { + "type": "string" + } + }, + "catalog_provider": { + "description": "Override the catalog provider ID for model enrichment. Test providers typically don't need catalog enrichment.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "circuit_breaker": { + "description": "Circuit breaker configuration for unhealthy provider protection.", + "default": { + "backoff_multiplier": 2.0, + "enabled": false, + "failure_status_codes": [ + 500, + 502, + 503, + 504 + ], + "failure_threshold": 5, + "max_open_timeout_secs": 300, + "open_timeout_secs": 30, + "success_threshold": 2 + }, + "allOf": [ + { + "$ref": "#/definitions/CircuitBreakerConfig" + } + ] + }, + "failure_mode": { + "description": "Failure mode for simulating errors. Defaults to `none` (normal operation).", + "default": { + "type": "none" + }, + "allOf": [ + { + "$ref": "#/definitions/TestFailureMode" + } + ] + }, + "fallback_providers": { + "description": "Fallback providers to try when this provider fails.", + "default": [], + "type": "array", + "items": { + "type": "string" + } + }, + "health_check": { + "description": "Health check configuration for proactive provider monitoring.", + "default": { + "enabled": false, + "interval_secs": 60, + "mode": "reachability", + "timeout_secs": 10 + }, + "allOf": [ + { + "$ref": "#/definitions/ProviderHealthCheckConfig" + } + ] + }, + "model_aliases": { + "description": "Model aliases.", + "default": {}, + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "model_fallbacks": { + "description": "Model-specific fallback configurations.", + "default": {}, + "type": "object", + "additionalProperties": { + "type": "array", + "items": { + "$ref": "#/definitions/ModelFallback" + } + } + }, + "model_name": { + "description": "Model name to use in responses.", + "default": "test-model", + "type": "string" + }, + "models": { + "description": "Per-model configuration (pricing, modalities, tasks, metadata).", + "default": {}, + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/ModelConfig" + } + }, + "retry": { + "description": "Retry configuration for transient failures.", + "default": { + "backoff_multiplier": 2.0, + "enabled": true, + "initial_delay_ms": 100, + "jitter": 0.1, + "max_delay_ms": 10000, + "max_retries": 3, + "retryable_status_codes": [ + 429, + 500, + 502, + 503, + 504 + ] + }, + "allOf": [ + { + "$ref": "#/definitions/RetryConfig" + } + ] + }, + "timeout_secs": { + "description": "Request timeout in seconds (ignored, but kept for consistency).", + "default": 300, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "type": { + "type": "string", + "enum": [ + "test" + ] + } + } + } + ] + }, + "ProviderHealthCheckConfig": { + "description": "Configuration for provider health checks.\n\nHealth checks allow proactive monitoring of provider availability, rather than only reacting to failures via circuit breakers.\n\n# Modes\n\n- **Reachability** (default): Calls the provider's `/models` endpoint. This is free, fast, and verifies basic connectivity and authentication.\n\n- **Inference**: Sends a minimal chat completion request. More thorough (tests the full inference path) but costs money. Requires specifying a model and optional prompt.\n\n# Example\n\n```toml [providers.my-openai.health_check] enabled = true mode = \"reachability\" # or \"inference\" interval_secs = 60 # Check every 60 seconds timeout_secs = 10 # Timeout for health check request\n\n# Only for mode = \"inference\" model = \"gpt-4o-mini\" # Cheap model for health checks prompt = \"Say OK\" # Simple prompt (default: \"ping\") ```", + "type": "object", + "properties": { + "enabled": { + "description": "Whether health checks are enabled for this provider. Default: false (no active health checks)", + "default": false, + "type": "boolean" + }, + "interval_secs": { + "description": "Interval between health checks in seconds. Default: 60 seconds", + "default": 60, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "mode": { + "description": "Health check mode. Default: Reachability (free endpoint check)", + "default": "reachability", + "allOf": [ + { + "$ref": "#/definitions/ProviderHealthCheckMode" + } + ] + }, + "model": { + "description": "Model to use for inference health checks. Required when mode = \"inference\".", + "type": [ + "string", + "null" + ] + }, + "prompt": { + "description": "Prompt to send for inference health checks. Default: \"ping\"", + "type": [ + "string", + "null" + ] + }, + "timeout_secs": { + "description": "Timeout for each health check request in seconds. Default: 10 seconds", + "default": 10, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + } + }, + "additionalProperties": false + }, + "ProviderHealthCheckMode": { + "description": "Health check mode determining how provider health is verified.", + "oneOf": [ + { + "description": "Call the provider's list models endpoint. Free, fast, verifies connectivity and authentication.", + "type": "string", + "enum": [ + "reachability" + ] + }, + { + "description": "Send a minimal chat completion request. More thorough but costs money. Requires a model to be specified.", + "type": "string", + "enum": [ + "inference" + ] + } + ] + }, + "ProvidersConfig": { + "description": "Provider configurations container.\n\nEach provider has a unique name (the TOML key) and specifies its type to determine which API protocol to use.", + "type": "object", + "properties": { + "default_provider": { + "description": "Default provider name for requests that don't specify one.", + "default": null, + "type": [ + "string", + "null" + ] + } + } + }, + "ProxyAuthJwtConfig": { + "description": "JWT assertion configuration for proxy auth. Used when the proxy also provides a signed JWT for additional verification.", + "type": "object", + "required": [ + "audience", + "header", + "issuer", + "jwks_url" + ], + "properties": { + "audience": { + "description": "Expected audience.", + "allOf": [ + { + "$ref": "#/definitions/OneOrMany_for_String" + } + ] + }, + "header": { + "description": "Header containing the JWT.", + "type": "string" + }, + "issuer": { + "description": "Expected issuer.", + "type": "string" + }, + "jwks_url": { + "description": "JWKS URL for validating the JWT.", + "type": "string" + } + }, + "additionalProperties": false + }, + "RagVectorBackend": { + "description": "Vector database backend for RAG chunk storage.\n\nThis is separate from `SemanticVectorBackend` to allow independent configuration of RAG storage vs. semantic caching storage. Using separate tables/collections for each purpose improves clarity and allows different index configurations.", + "oneOf": [ + { + "description": "PostgreSQL with pgvector extension. Uses the existing database connection pool.", + "type": "object", + "required": [ + "type" + ], + "properties": { + "distance_metric": { + "description": "Distance metric for similarity search. Defaults to cosine, which works best for text embeddings.", + "default": "cosine", + "allOf": [ + { + "$ref": "#/definitions/DistanceMetric" + } + ] + }, + "index_type": { + "description": "Index type for vector similarity search.", + "default": "ivf_flat", + "allOf": [ + { + "$ref": "#/definitions/PgvectorIndexType" + } + ] + }, + "table_name": { + "description": "Table name for storing RAG document chunks. Note: A second table \"{table_name}_chunks\" will NOT be created; this table name IS the chunks table.", + "default": "rag_chunks", + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "pgvector" + ] + } + }, + "additionalProperties": false + }, + { + "description": "Qdrant vector database.", + "type": "object", + "required": [ + "type", + "url" + ], + "properties": { + "api_key": { + "description": "API key for authentication (optional).", + "default": null, + "type": [ + "string", + "null" + ] + }, + "distance_metric": { + "description": "Distance metric for similarity search. Defaults to cosine, which works best for text embeddings.", + "default": "cosine", + "allOf": [ + { + "$ref": "#/definitions/DistanceMetric" + } + ] + }, + "qdrant_collection_name": { + "description": "VectorStore name for storing RAG document chunks.", + "default": "rag_chunks", + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "qdrant" + ] + }, + "url": { + "description": "Qdrant server URL.", + "type": "string" + } + }, + "additionalProperties": false + } + ] + }, + "RateLimitDefaults": { + "description": "Rate limiting defaults.", + "type": "object", + "properties": { + "allow_per_key_above_global": { + "description": "Allow per-API-key rate limits to exceed global defaults. When false (default), API keys cannot have higher rate limits than the global config. When true, API keys can have any positive rate limit value.", + "default": false, + "type": "boolean" + }, + "concurrent_requests": { + "description": "Concurrent request limit per identity.", + "default": 10, + "type": "integer", + "format": "uint32", + "minimum": 0.0 + }, + "estimated_tokens_per_request": { + "description": "Estimated tokens per request for atomic token rate limit reservation. This is reserved before the request is processed to prevent race conditions. After the request completes, the actual token count replaces the estimate. Default is 1000 tokens which is conservative for most prompts.", + "default": 1000, + "type": "integer", + "format": "int64" + }, + "ip_rate_limits": { + "description": "IP-based rate limiting for unauthenticated requests. Protects public endpoints (health, auth) from abuse.", + "default": { + "enabled": true, + "requests_per_hour": null, + "requests_per_minute": 120 + }, + "allOf": [ + { + "$ref": "#/definitions/IpRateLimitConfig" + } + ] + }, + "requests_per_day": { + "description": "Requests per day per identity.", + "default": null, + "type": [ + "integer", + "null" + ], + "format": "uint32", + "minimum": 0.0 + }, + "requests_per_minute": { + "description": "Requests per minute per identity.", + "default": 60, + "type": "integer", + "format": "uint32", + "minimum": 0.0 + }, + "tokens_per_day": { + "description": "Tokens per day per identity.", + "default": null, + "type": [ + "integer", + "null" + ], + "format": "uint32", + "minimum": 0.0 + }, + "tokens_per_minute": { + "description": "Tokens per minute per identity.", + "default": 100000, + "type": "integer", + "format": "uint32", + "minimum": 0.0 + }, + "window_type": { + "description": "Rate limit window type.", + "default": "sliding", + "allOf": [ + { + "$ref": "#/definitions/RateLimitWindowType" + } + ] + } + }, + "additionalProperties": false + }, + "RateLimitWindowType": { + "description": "Rate limit window type.", + "oneOf": [ + { + "description": "Fixed window (resets at interval boundaries).", + "type": "string", + "enum": [ + "fixed" + ] + }, + { + "description": "Sliding window (rolling count over the interval).", + "type": "string", + "enum": [ + "sliding" + ] + } + ] + }, + "RbacConfig": { + "description": "Role-based access control configuration.\n\nRoles come from the IdP (JWT claims), and policies are defined here with CEL conditions for fine-grained access control.", + "type": "object", + "properties": { + "audit": { + "description": "Audit logging configuration for authorization decisions.", + "default": { + "log_allowed": false, + "log_denied": true + }, + "allOf": [ + { + "$ref": "#/definitions/AuthzAuditConfig" + } + ] + }, + "default_effect": { + "description": "Default effect when no policy matches. Defaults to \"deny\".", + "default": "deny", + "allOf": [ + { + "$ref": "#/definitions/PolicyEffect" + } + ] + }, + "enabled": { + "description": "Whether RBAC is enabled for admin endpoints. If false, all admin requests are allowed.", + "default": false, + "type": "boolean" + }, + "fail_on_evaluation_error": { + "description": "Behavior when a CEL policy condition fails to evaluate at runtime.\n\nEven though policies are validated at creation time, runtime errors can occur due to unexpected data shapes (e.g., null values, type mismatches).\n\n- `true` (default): Deny the request on evaluation error (fail-closed). This is the secure option - errors don't create security holes. - `false`: Skip the erroring policy and continue to the next one (fail-open). Use only if availability is more important than security.\n\nErrors are always logged regardless of this setting.", + "default": true, + "type": "boolean" + }, + "gateway": { + "description": "Gateway endpoint authorization configuration. Controls authorization for `/v1/*` endpoints (chat completions, embeddings, etc.).", + "default": { + "default_effect": "allow", + "enabled": false + }, + "allOf": [ + { + "$ref": "#/definitions/GatewayRbacConfig" + } + ] + }, + "lazy_load_policies": { + "description": "Load org policies lazily on first request instead of at startup.\n\n- `false` (default): Load all org policies at startup (eager loading). Good for smaller deployments where startup time isn't critical. - `true`: Load policies on-demand when an org is first accessed. Recommended for large deployments with many organizations.\n\nLazy loading eliminates startup memory spikes and reduces initial load time, but the first request for each org may be slightly slower as policies are loaded from the database.", + "default": false, + "type": "boolean" + }, + "max_cached_orgs": { + "description": "Maximum number of organizations to keep in the policy cache.\n\nWhen the cache exceeds this limit, the least recently used (LRU) organizations are evicted to make room for new ones.\n\n- 0 (default): No limit, cache grows unbounded - >0: Enforce LRU eviction when cache size exceeds this value\n\nSetting a limit is recommended for large deployments to bound memory usage. Evicted orgs will have their policies reloaded from the database on next access.", + "default": 0, + "type": "integer", + "format": "uint", + "minimum": 0.0 + }, + "max_expression_length": { + "description": "Maximum allowed length of a CEL expression in bytes.\n\nLimits the size of CEL expressions in both system and organization policies to prevent excessively complex expressions that could cause performance issues during evaluation.\n\nDefault: 4096 bytes. Set to 0 to disable the limit.", + "default": 4096, + "type": "integer", + "format": "uint", + "minimum": 0.0 + }, + "org_claim": { + "description": "JWT claim containing organization IDs the user belongs to. If not set, org membership must be determined from the database.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "policies": { + "description": "Authorization policies evaluated using CEL.", + "default": [], + "type": "array", + "items": { + "$ref": "#/definitions/PolicyConfig" + } + }, + "policy_cache_ttl_ms": { + "description": "How often to check Redis for policy version changes (milliseconds).\n\nIn multi-node deployments, each node maintains a local cache of compiled RBAC policies. This TTL controls how often nodes check Redis for version changes triggered by other nodes.\n\n- Lower values: Faster policy propagation, more Redis round-trips - Higher values: Slower policy propagation, fewer Redis operations - Set to 0: Check Redis on every authorization request (not recommended)\n\nDefault: 1000 (1 second). This provides a good balance between propagation speed and Redis load.\n\nOnly applies when Redis cache is configured. With in-memory cache only, policies are refreshed immediately on the node that made the change.", + "default": 1000, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "policy_eviction_batch_size": { + "description": "Number of organizations to evict when the cache is full.\n\nWhen `max_cached_orgs` is reached, this many least-recently-used organizations are evicted in a single batch to avoid frequent evictions.\n\nDefault: 100. Higher values reduce eviction frequency but may cause more cache misses after eviction.", + "default": 100, + "type": "integer", + "format": "uint", + "minimum": 0.0 + }, + "project_claim": { + "description": "JWT claim containing project IDs the user belongs to.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "role_claim": { + "description": "JWT claim containing user roles (e.g., \"roles\", \"groups\", \"permissions\").", + "default": "roles", + "type": "string" + }, + "role_mapping": { + "description": "Map IdP role names to internal role names. Useful when IdP uses different naming conventions.", + "default": {}, + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "team_claim": { + "description": "JWT claim containing team IDs the user belongs to.", + "default": null, + "type": [ + "string", + "null" + ] + } + }, + "additionalProperties": false + }, + "RedisClusterConfig": { + "description": "Redis cluster configuration.", + "type": "object", + "properties": { + "connection_timeout_secs": { + "description": "Connection timeout for cluster nodes in seconds.", + "default": 5, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "read_from_replicas": { + "description": "Read from replicas for read operations.", + "default": false, + "type": "boolean" + }, + "response_timeout_secs": { + "description": "Response timeout for cluster operations in seconds.", + "default": 1, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "retries": { + "description": "Number of retries for cluster operations.", + "default": 3, + "type": "integer", + "format": "uint32", + "minimum": 0.0 + }, + "retry_delay_ms": { + "description": "Retry delay in milliseconds between retries.", + "default": 100, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + } + }, + "additionalProperties": false + }, + "RerankConfig": { + "description": "Configuration for LLM-based re-ranking of search results.\n\nRe-ranking is a second-stage retrieval technique that takes initial search results (from vector or hybrid search) and re-scores them using a language model based on semantic relevance to the query. This typically improves precision at the cost of additional latency and API calls.\n\n# When to Use Re-ranking\n\n- **High-precision requirements**: When result quality matters more than speed - **Complex queries**: When semantic understanding beyond vector similarity is needed - **Small result sets**: Re-ranking 10-20 results is fast; 100+ becomes slow\n\n# Example Configuration\n\n```toml [features.file_search.rerank] enabled = true model = \"gpt-4o-mini\" # Optional: uses default model if not set max_results_to_rerank = 20 # Re-rank top 20 from initial search batch_size = 10 # Process 10 results per LLM call timeout_secs = 30 # Timeout for re-ranking operation ```\n\n# API Usage\n\nEnable re-ranking per-request using the `ranker` field:\n\n```json { \"ranking_options\": { \"ranker\": \"llm\", \"score_threshold\": 0.5 } } ```", + "type": "object", + "properties": { + "batch_size": { + "description": "Number of results to process per LLM call.\n\nResults are processed in batches to balance latency and context usage. Smaller batches have lower per-call latency but more total calls. Default: 10", + "default": 10, + "type": "integer", + "format": "uint", + "minimum": 0.0 + }, + "enabled": { + "description": "Enable LLM-based re-ranking.\n\nWhen false, requests with `ranker: \"llm\"` will fall back to vector search. Default: false", + "default": false, + "type": "boolean" + }, + "fallback_on_error": { + "description": "Whether to fall back to original vector scores when re-ranking fails.\n\nWhen true (default), if the LLM re-ranking call fails (network error, rate limit, parse error, etc.), the search returns the original vector search results instead of failing the entire request.\n\nWhen false, re-ranking failures propagate as errors, allowing callers to handle them explicitly. Default: true", + "default": true, + "type": "boolean" + }, + "max_results_to_rerank": { + "description": "Maximum number of results to pass to the re-ranker.\n\nThe re-ranker receives the top N results from the initial search. Higher values may improve recall but increase latency and cost. Default: 20", + "default": 20, + "type": "integer", + "format": "uint", + "minimum": 0.0 + }, + "model": { + "description": "LLM model to use for re-ranking.\n\nIf not specified, uses the gateway's default model. Recommended: A fast, capable model like `gpt-4o-mini` or `claude-3-haiku`.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "timeout_secs": { + "description": "Timeout in seconds for the entire re-ranking operation.\n\nIf re-ranking exceeds this timeout, returns the original search results without re-ranking (graceful degradation). Default: 30", + "default": 30, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + } + }, + "additionalProperties": false + }, + "ResourceLimits": { + "description": "Resource limits for entity counts.\n\nThese limits prevent unbounded growth of resources that could cause performance issues or resource exhaustion.", + "type": "object", + "properties": { + "max_api_keys_per_user": { + "description": "Maximum API keys per user (self-service). Set to 0 for unlimited. Default: 25 keys per user.", + "default": 25, + "type": "integer", + "format": "uint32", + "minimum": 0.0 + }, + "max_policies_per_org": { + "description": "Maximum RBAC policies per organization. Set to 0 for unlimited. Default: 100 policies per org.\n\nThis limit prevents resource exhaustion from unbounded policy growth. Organizations hitting this limit must delete or disable existing policies before creating new ones.", + "default": 100, + "type": "integer", + "format": "uint32", + "minimum": 0.0 + }, + "max_providers_per_user": { + "description": "Maximum dynamic providers per user (BYOK). Set to 0 for unlimited. Default: 10 providers per user.", + "default": 10, + "type": "integer", + "format": "uint32", + "minimum": 0.0 + } + }, + "additionalProperties": false + }, + "ResponseCachingConfig": { + "description": "Response caching configuration (gateway-level caching).", + "type": "object", + "properties": { + "enabled": { + "description": "Enable response caching.", + "default": true, + "type": "boolean" + }, + "key_components": { + "description": "Cache key components.", + "default": { + "model": false, + "system_prompt": false, + "temperature": false, + "tools": false + }, + "allOf": [ + { + "$ref": "#/definitions/CacheKeyComponents" + } + ] + }, + "max_size_bytes": { + "description": "Maximum response size to cache in bytes.", + "default": 1048576, + "type": "integer", + "format": "uint", + "minimum": 0.0 + }, + "only_deterministic": { + "description": "Only cache responses with temperature = 0.", + "default": true, + "type": "boolean" + }, + "semantic": { + "description": "Semantic caching configuration. When enabled, requests are matched based on semantic similarity in addition to exact hash matching.", + "default": null, + "anyOf": [ + { + "$ref": "#/definitions/SemanticCachingConfig" + }, + { + "type": "null" + } + ] + }, + "ttl_secs": { + "description": "Cache TTL in seconds.", + "default": 3600, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + } + }, + "additionalProperties": false + }, + "ResponseValidationConfig": { + "description": "Response schema validation configuration.\n\nWhen enabled, validates API responses against the OpenAI OpenAPI specification before sending them to clients. This helps catch response format issues early, especially from non-OpenAI providers.\n\n# Example\n\n```toml [observability.response_validation] enabled = true mode = \"warn\" ```", + "type": "object", + "properties": { + "enabled": { + "description": "Enable response schema validation. When enabled, responses are validated against the OpenAI OpenAPI spec.", + "default": false, + "type": "boolean" + }, + "mode": { + "description": "Validation mode. - `warn`: Log validation failures but return the response anyway. - `error`: Return a 500 error if validation fails.", + "default": "warn", + "allOf": [ + { + "$ref": "#/definitions/ResponseValidationMode" + } + ] + } + }, + "additionalProperties": false + }, + "ResponseValidationMode": { + "description": "Response validation mode.", + "oneOf": [ + { + "description": "Log validation failures but return the response anyway.", + "type": "string", + "enum": [ + "warn" + ] + }, + { + "description": "Return a 500 error if validation fails.", + "type": "string", + "enum": [ + "error" + ] + } + ] + }, + "RetentionConfig": { + "description": "Data retention configuration.\n\nControls automatic purging of old data from the database. When enabled, a background worker periodically deletes records older than their configured retention period.", + "type": "object", + "properties": { + "enabled": { + "description": "Whether retention purging is enabled. Default: false (must be explicitly enabled)", + "default": false, + "type": "boolean" + }, + "interval_hours": { + "description": "How often to run the retention worker (in hours). Default: 24 (once per day)", + "default": 24, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "periods": { + "description": "Retention periods for different data types.", + "default": { + "audit_logs_days": 730, + "conversations_deleted_days": 30, + "daily_spend_days": 365, + "usage_records_days": 90 + }, + "allOf": [ + { + "$ref": "#/definitions/RetentionPeriods" + } + ] + }, + "safety": { + "description": "Safety settings to prevent accidental data loss.", + "default": { + "batch_size": 1000, + "dry_run": false, + "max_deletes_per_run": 100000 + }, + "allOf": [ + { + "$ref": "#/definitions/RetentionSafety" + } + ] + } + }, + "additionalProperties": false + }, + "RetentionPeriods": { + "description": "Retention periods for different data types.\n\nEach field specifies the number of days to keep records. Set to 0 to disable retention for that data type (keep forever).", + "type": "object", + "properties": { + "audit_logs_days": { + "description": "Days to keep audit log entries. Audit logs track admin operations and may be required for compliance. Default: 730 days (2 years)", + "default": 730, + "type": "integer", + "format": "uint32", + "minimum": 0.0 + }, + "conversations_deleted_days": { + "description": "Days to keep soft-deleted conversations before hard deleting. Conversations are first soft-deleted, then permanently removed after this period. Default: 30 days", + "default": 30, + "type": "integer", + "format": "uint32", + "minimum": 0.0 + }, + "daily_spend_days": { + "description": "Days to keep aggregated daily spend records. These are lower-volume summary records (one per API key per model per day). Default: 365 days", + "default": 365, + "type": "integer", + "format": "uint32", + "minimum": 0.0 + }, + "usage_records_days": { + "description": "Days to keep individual usage records. These are high-volume records (one per API request). Default: 90 days", + "default": 90, + "type": "integer", + "format": "uint32", + "minimum": 0.0 + } + }, + "additionalProperties": false + }, + "RetentionSafety": { + "description": "Safety settings for retention operations.\n\nThese settings help prevent accidental data loss and allow testing retention policies before enabling them.", + "type": "object", + "properties": { + "batch_size": { + "description": "Batch size for delete operations. Records are deleted in batches to avoid locking the database. Default: 1000", + "default": 1000, + "type": "integer", + "format": "uint32", + "minimum": 0.0 + }, + "dry_run": { + "description": "If true, log what would be deleted without actually deleting. Useful for testing retention policies. Default: false", + "default": false, + "type": "boolean" + }, + "max_deletes_per_run": { + "description": "Maximum number of records to delete per run per table. Prevents long-running delete operations that could impact performance. Set to 0 for unlimited. Default: 100000", + "default": 100000, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + } + }, + "additionalProperties": false + }, + "RetryConfig": { + "description": "Configuration for provider request retries.\n\nWhen enabled, retries requests on transient failures with exponential backoff. Only retries on status codes that indicate temporary issues (429, 5xx).", + "type": "object", + "properties": { + "backoff_multiplier": { + "description": "Multiplier for exponential backoff.", + "default": 2.0, + "type": "number", + "format": "double" + }, + "embedding_max_retries": { + "description": "Override max_retries for embedding operations. Embeddings are fully idempotent (same input = same output), so aggressive retry is safe. Default: 5", + "type": [ + "integer", + "null" + ], + "format": "uint32", + "minimum": 0.0 + }, + "enabled": { + "description": "Whether retries are enabled.", + "default": true, + "type": "boolean" + }, + "image_generation_max_retries": { + "description": "Override max_retries for image generation operations. Image generation is NOT idempotent (each attempt creates a different image), so we minimize retries to avoid creating duplicates. Default: 1", + "type": [ + "integer", + "null" + ], + "format": "uint32", + "minimum": 0.0 + }, + "initial_delay_ms": { + "description": "Initial delay before first retry in milliseconds.", + "default": 100, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "jitter": { + "description": "Add random jitter to delays (percentage, 0.0-1.0).", + "default": 0.1, + "type": "number", + "format": "double" + }, + "max_delay_ms": { + "description": "Maximum delay between retries in milliseconds.", + "default": 10000, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "max_retries": { + "description": "Maximum number of retry attempts (not including the initial request).", + "default": 3, + "type": "integer", + "format": "uint32", + "minimum": 0.0 + }, + "read_only_max_retries": { + "description": "Override max_retries for read-only operations (list_models, etc.). These are fully idempotent with no side effects, so aggressive retry is safe. Default: 5", + "type": [ + "integer", + "null" + ], + "format": "uint32", + "minimum": 0.0 + }, + "retryable_status_codes": { + "description": "Status codes that should trigger a retry. Default: 429 (rate limit), 500, 502, 503, 504 (server errors).", + "default": [ + 429, + 500, + 502, + 503, + 504 + ], + "type": "array", + "items": { + "type": "integer", + "format": "uint16", + "minimum": 0.0 + } + } + }, + "additionalProperties": false + }, + "S3ServerSideEncryption": { + "description": "S3 server-side encryption configuration.", + "oneOf": [ + { + "description": "Server-side encryption with Amazon S3-managed keys (SSE-S3).", + "type": "object", + "required": [ + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "aes256" + ] + } + }, + "additionalProperties": false + }, + { + "description": "Server-side encryption with AWS KMS keys (SSE-KMS).", + "type": "object", + "required": [ + "key_id", + "type" + ], + "properties": { + "key_id": { + "description": "KMS key ID or ARN.", + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "kms" + ] + } + }, + "additionalProperties": false + } + ] + }, + "S3StorageConfig": { + "description": "S3-compatible object storage configuration.", + "type": "object", + "required": [ + "bucket" + ], + "properties": { + "access_key_id": { + "description": "AWS access key ID. If not specified, uses environment variables or IAM role.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "bucket": { + "description": "S3 bucket name.", + "type": "string" + }, + "endpoint": { + "description": "Custom endpoint URL for S3-compatible services. Examples: - MinIO: \"http://localhost:9000\" - R2: \"https://.r2.cloudflarestorage.com\" - DigitalOcean Spaces: \"https://.digitaloceanspaces.com\"", + "default": null, + "type": [ + "string", + "null" + ] + }, + "force_path_style": { + "description": "Use path-style URLs instead of virtual-hosted style. Required for MinIO and some S3-compatible services. Default: false (use virtual-hosted style)", + "default": false, + "type": "boolean" + }, + "key_prefix": { + "description": "Key prefix for all stored files. Useful for organizing files in a shared bucket. Example: \"hadrian/files/\" would store files as \"hadrian/files/\"", + "default": null, + "type": [ + "string", + "null" + ] + }, + "region": { + "description": "AWS region (e.g., \"us-east-1\"). For non-AWS S3-compatible services, use their region name.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "secret_access_key": { + "description": "AWS secret access key. If not specified, uses environment variables or IAM role.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "server_side_encryption": { + "description": "Enable server-side encryption.", + "default": null, + "anyOf": [ + { + "$ref": "#/definitions/S3ServerSideEncryption" + }, + { + "type": "null" + } + ] + }, + "storage_class": { + "description": "Storage class for new objects. AWS: STANDARD, REDUCED_REDUNDANCY, STANDARD_IA, ONEZONE_IA, INTELLIGENT_TIERING, GLACIER, DEEP_ARCHIVE", + "default": null, + "type": [ + "string", + "null" + ] + } + }, + "additionalProperties": false + }, + "SameSite": { + "type": "string", + "enum": [ + "lax", + "strict", + "none" + ] + }, + "SamplingConfig": { + "description": "Sampling configuration.", + "type": "object", + "properties": { + "rate": { + "description": "Sample rate for ratio-based sampling (0.0-1.0).", + "default": 1.0, + "type": "number", + "format": "double" + }, + "strategy": { + "description": "Sampling strategy.", + "default": "always_on", + "allOf": [ + { + "$ref": "#/definitions/SamplingStrategy" + } + ] + } + }, + "additionalProperties": false + }, + "SamplingStrategy": { + "oneOf": [ + { + "description": "Sample all traces.", + "type": "string", + "enum": [ + "always_on" + ] + }, + { + "description": "Sample no traces.", + "type": "string", + "enum": [ + "always_off" + ] + }, + { + "description": "Sample a percentage of traces.", + "type": "string", + "enum": [ + "ratio" + ] + }, + { + "description": "Parent-based sampling (inherit from parent span).", + "type": "string", + "enum": [ + "parent_based" + ] + } + ] + }, + "SecretsConfig": { + "description": "Configuration for the secrets manager.", + "oneOf": [ + { + "description": "No secrets manager (secrets are not resolved from external sources)", + "type": "object", + "required": [ + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "none" + ] + } + } + }, + { + "description": "Environment variable-based secrets Keys are looked up directly as environment variable names.", + "type": "object", + "required": [ + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "env" + ] + } + } + }, + { + "description": "HashiCorp Vault / OpenBao secrets manager. Requires the `vault` feature.", + "type": "object", + "oneOf": [ + { + "description": "Token-based authentication", + "type": "object", + "required": [ + "auth", + "token" + ], + "properties": { + "auth": { + "type": "string", + "enum": [ + "token" + ] + }, + "token": { + "description": "The Vault token", + "type": "string" + } + } + }, + { + "description": "AppRole authentication (recommended for production)", + "type": "object", + "required": [ + "auth", + "role_id", + "secret_id" + ], + "properties": { + "auth": { + "type": "string", + "enum": [ + "app_role" + ] + }, + "auth_mount": { + "description": "Auth mount path (default: \"approle\")", + "default": "approle", + "type": "string" + }, + "role_id": { + "description": "AppRole role ID", + "type": "string" + }, + "secret_id": { + "description": "AppRole secret ID", + "type": "string" + } + } + }, + { + "description": "Kubernetes authentication (for pods running in k8s)", + "type": "object", + "required": [ + "auth", + "role" + ], + "properties": { + "auth": { + "type": "string", + "enum": [ + "kubernetes" + ] + }, + "auth_mount": { + "description": "Auth mount path (default: \"kubernetes\")", + "default": "kubernetes", + "type": "string" + }, + "role": { + "description": "Vault role name", + "type": "string" + }, + "token_path": { + "description": "Path to the service account token (default: /var/run/secrets/kubernetes.io/serviceaccount/token)", + "default": "/var/run/secrets/kubernetes.io/serviceaccount/token", + "type": "string" + } + } + } + ], + "required": [ + "address", + "type" + ], + "properties": { + "address": { + "description": "Vault server address (e.g., \"https://vault.example.com:8200\")", + "type": "string" + }, + "mount": { + "description": "KV v2 mount point (default: \"secret\")", + "default": "secret", + "type": "string" + }, + "path_prefix": { + "description": "Path prefix for all secrets (default: \"hadrian\")", + "default": "hadrian", + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "vault" + ] + } + } + }, + { + "description": "AWS Secrets Manager. Requires the `secrets-aws` feature.", + "type": "object", + "required": [ + "type" + ], + "properties": { + "endpoint_url": { + "description": "Custom endpoint URL (for localstack or other AWS-compatible services)", + "default": null, + "type": [ + "string", + "null" + ] + }, + "prefix": { + "description": "Prefix for all secret names (default: \"gateway/\")", + "default": "gateway/", + "type": "string" + }, + "region": { + "description": "AWS region (e.g., \"us-east-1\"). If not set, uses AWS_REGION environment variable.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "type": { + "type": "string", + "enum": [ + "aws" + ] + } + } + }, + { + "description": "Azure Key Vault. Requires the `secrets-azure` feature.", + "type": "object", + "required": [ + "type", + "vault_url" + ], + "properties": { + "prefix": { + "description": "Prefix for all secret names (default: \"gateway-\") Note: Azure Key Vault only allows alphanumeric characters and hyphens in secret names.", + "default": "gateway-", + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "azure" + ] + }, + "vault_url": { + "description": "Key Vault URL (e.g., \"https://myvault.vault.azure.net\")", + "type": "string" + } + } + }, + { + "description": "GCP Secret Manager. Requires the `secrets-gcp` feature.", + "type": "object", + "required": [ + "project_id", + "type" + ], + "properties": { + "prefix": { + "description": "Prefix for all secret names (default: \"gateway-\")", + "default": "gateway-", + "type": "string" + }, + "project_id": { + "description": "GCP project ID", + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "gcp" + ] + } + } + } + ] + }, + "SecurityHeadersConfig": { + "description": "Security headers configuration.\n\nThese headers protect against common web vulnerabilities like clickjacking, MIME-sniffing, and protocol downgrade attacks.", + "type": "object", + "properties": { + "content_security_policy": { + "description": "Content-Security-Policy header value. Controls resource loading to prevent XSS attacks.", + "default": "default-src 'self'; script-src 'self' blob: 'unsafe-eval' https://cdn.jsdelivr.net; style-src 'self' 'unsafe-inline'; img-src 'self' data: blob:; font-src 'self' data:; media-src 'self' blob:; connect-src 'self' https://cdn.jsdelivr.net https://*.wikipedia.org https://www.wikidata.org; worker-src 'self' blob:; frame-src 'self' blob:; object-src 'none'; base-uri 'self'", + "type": [ + "string", + "null" + ] + }, + "content_type_options": { + "description": "X-Content-Type-Options header value. Prevents MIME-sniffing attacks. Default: \"nosniff\"", + "default": "nosniff", + "type": "string" + }, + "enabled": { + "description": "Enable security headers.", + "default": true, + "type": "boolean" + }, + "frame_options": { + "description": "X-Frame-Options header value. Prevents clickjacking attacks. Options: \"DENY\", \"SAMEORIGIN\", or omit. Default: \"DENY\"", + "default": "DENY", + "type": [ + "string", + "null" + ] + }, + "hsts": { + "description": "Strict-Transport-Security header configuration. Forces HTTPS connections. Only sent over HTTPS connections.", + "default": { + "enabled": true, + "include_subdomains": true, + "max_age_secs": 31536000, + "preload": false + }, + "allOf": [ + { + "$ref": "#/definitions/HstsConfig" + } + ] + }, + "permissions_policy": { + "description": "Permissions-Policy header value. Controls browser features available to the page. Default: None (not set)", + "default": null, + "type": [ + "string", + "null" + ] + }, + "referrer_policy": { + "description": "Referrer-Policy header value. Controls referrer information sent in requests. Default: \"strict-origin-when-cross-origin\"", + "default": "strict-origin-when-cross-origin", + "type": [ + "string", + "null" + ] + }, + "xss_protection": { + "description": "X-XSS-Protection header value. Legacy header for older browsers. Disabled by default as CSP provides protection. Enable for legacy browser compatibility.", + "default": null, + "type": [ + "string", + "null" + ] + } + }, + "additionalProperties": false + }, + "SemanticCachingConfig": { + "description": "Semantic caching configuration for similarity-based cache matching.\n\nWhen enabled, the cache will also look up semantically similar requests using vector embeddings, allowing cache hits for requests that are different in wording but semantically equivalent.\n\n# Configuration Example\n\n```toml [features.response_caching.semantic] enabled = true similarity_threshold = 0.95 # Minimum cosine similarity for cache hit\n\n[features.response_caching.semantic.embedding] provider = \"openai\" model = \"text-embedding-3-small\" dimensions = 1536\n\n[features.response_caching.semantic.vector_backend] type = \"pgvector\" ```", + "type": "object", + "required": [ + "vector_backend" + ], + "properties": { + "embedding": { + "description": "Embedding configuration for generating request embeddings.", + "default": { + "dimensions": 0, + "model": "", + "provider": "" + }, + "allOf": [ + { + "$ref": "#/definitions/EmbeddingConfig" + } + ] + }, + "enabled": { + "description": "Enable semantic caching. When false, only exact-match caching is used.", + "default": false, + "type": "boolean" + }, + "similarity_threshold": { + "description": "Minimum cosine similarity threshold for a semantic cache hit (0.0-1.0). Higher values require closer semantic matches. Recommended: 0.92-0.98 depending on use case.", + "default": 0.95, + "type": "number", + "format": "double" + }, + "top_k": { + "description": "Maximum number of similar results to consider when looking up. The closest match above the threshold is used.", + "default": 1, + "type": "integer", + "format": "uint", + "minimum": 0.0 + }, + "vector_backend": { + "description": "Vector database backend for storing and querying embeddings.", + "allOf": [ + { + "$ref": "#/definitions/SemanticVectorBackend" + } + ] + } + }, + "additionalProperties": false + }, + "SemanticVectorBackend": { + "description": "Vector database backend for semantic caching.\n\nUnlike the general `VectorBackend` for RAG, semantic caching only supports backends that can be efficiently queried for single-vector similarity lookups.", + "oneOf": [ + { + "description": "PostgreSQL with pgvector extension. Uses the existing database connection pool.", + "type": "object", + "required": [ + "type" + ], + "properties": { + "distance_metric": { + "description": "Distance metric for similarity search. Defaults to cosine, which works best for text embeddings.", + "default": "cosine", + "allOf": [ + { + "$ref": "#/definitions/DistanceMetric" + } + ] + }, + "index_type": { + "description": "Index type for vector similarity search.", + "default": "ivf_flat", + "allOf": [ + { + "$ref": "#/definitions/PgvectorIndexType" + } + ] + }, + "table_name": { + "description": "Table name for storing cache embeddings.", + "default": "semantic_cache_embeddings", + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "pgvector" + ] + } + }, + "additionalProperties": false + }, + { + "description": "Qdrant vector database.", + "type": "object", + "required": [ + "type", + "url" + ], + "properties": { + "api_key": { + "description": "API key for authentication (optional).", + "default": null, + "type": [ + "string", + "null" + ] + }, + "distance_metric": { + "description": "Distance metric for similarity search. Defaults to cosine, which works best for text embeddings.", + "default": "cosine", + "allOf": [ + { + "$ref": "#/definitions/DistanceMetric" + } + ] + }, + "qdrant_collection_name": { + "description": "VectorStore name for storing cache embeddings.", + "default": "semantic_cache", + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "qdrant" + ] + }, + "url": { + "description": "Qdrant server URL.", + "type": "string" + } + }, + "additionalProperties": false + } + ] + }, + "ServerConfig": { + "description": "HTTP server configuration.", + "type": "object", + "properties": { + "allow_loopback_urls": { + "description": "Allow loopback addresses (127.0.0.1, ::1, localhost) in user-supplied URLs.\n\nWhen false (default), URLs targeting loopback addresses are blocked to prevent SSRF. Enable for development only. Private ranges and cloud metadata endpoints are always blocked regardless of this setting.", + "default": false, + "type": "boolean" + }, + "allow_private_urls": { + "description": "Allow private/internal IP ranges (10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16) in user-supplied URLs.\n\nWhen false (default), URLs resolving to private IPs are blocked to prevent SSRF. Enable for Docker, Kubernetes, or other environments where services communicate over private networks (e.g., Keycloak at `http://keycloak:8080`). Cloud metadata endpoints (169.254.169.254) are always blocked.", + "default": false, + "type": "boolean" + }, + "body_limit_bytes": { + "description": "Request body size limit in bytes.", + "default": 10485760, + "type": "integer", + "format": "uint", + "minimum": 0.0 + }, + "cors": { + "description": "CORS configuration.", + "default": { + "allow_credentials": false, + "allowed_headers": [ + "Content-Type", + "Authorization", + "X-API-Key" + ], + "allowed_methods": [ + "GET", + "POST", + "PUT", + "DELETE", + "OPTIONS" + ], + "allowed_origins": [], + "enabled": true, + "max_age_secs": 86400 + }, + "allOf": [ + { + "$ref": "#/definitions/CorsConfig" + } + ] + }, + "host": { + "description": "Host address to bind to.", + "default": "0.0.0.0", + "type": "string", + "format": "ip" + }, + "http_client": { + "description": "HTTP client configuration for outbound requests to LLM providers.", + "default": { + "connect_timeout_secs": 10, + "http2_adaptive_window": true, + "http2_prior_knowledge": false, + "pool_idle_timeout_secs": 90, + "pool_max_idle_per_host": 32, + "tcp_keepalive_secs": 60, + "tcp_nodelay": true, + "timeout_secs": 300, + "user_agent": "hadrian/0.0.0-alpha.7", + "verbose": false + }, + "allOf": [ + { + "$ref": "#/definitions/HttpClientConfig" + } + ] + }, + "max_response_body_bytes": { + "description": "Maximum response body size for buffering provider responses (in bytes). This prevents OOM from malicious or malformed provider responses.", + "default": 104857600, + "type": "integer", + "format": "uint", + "minimum": 0.0 + }, + "port": { + "description": "Port to listen on.", + "default": 8080, + "type": "integer", + "format": "uint16", + "minimum": 0.0 + }, + "security_headers": { + "description": "Security headers configuration.", + "default": { + "content_security_policy": "default-src 'self'; script-src 'self' blob: 'unsafe-eval' https://cdn.jsdelivr.net; style-src 'self' 'unsafe-inline'; img-src 'self' data: blob:; font-src 'self' data:; media-src 'self' blob:; connect-src 'self' https://cdn.jsdelivr.net https://*.wikipedia.org https://www.wikidata.org; worker-src 'self' blob:; frame-src 'self' blob:; object-src 'none'; base-uri 'self'", + "content_type_options": "nosniff", + "enabled": true, + "frame_options": "DENY", + "hsts": { + "enabled": true, + "include_subdomains": true, + "max_age_secs": 31536000, + "preload": false + }, + "permissions_policy": null, + "referrer_policy": "strict-origin-when-cross-origin", + "xss_protection": null + }, + "allOf": [ + { + "$ref": "#/definitions/SecurityHeadersConfig" + } + ] + }, + "streaming_idle_timeout_secs": { + "description": "Streaming response idle timeout in seconds.\n\nThis is the maximum time allowed between chunks in a streaming response. If no chunk is received from the upstream provider within this timeout, the stream is terminated.\n\nThis protects against: - Stalled upstream providers that stop sending data - Connection pool exhaustion from hung streams\n\nSet to 0 to disable idle timeout (not recommended). Default: 120 seconds (2 minutes)", + "default": 120, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "timeout_secs": { + "description": "Request timeout in seconds.", + "default": 300, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "tls": { + "description": "TLS configuration. If omitted, serves plain HTTP. In production, TLS is typically terminated at the load balancer.", + "default": null, + "anyOf": [ + { + "$ref": "#/definitions/TlsConfig" + }, + { + "type": "null" + } + ] + }, + "trusted_proxies": { + "description": "Trusted proxy configuration for extracting real client IPs.", + "default": { + "cidrs": [], + "dangerously_trust_all": false, + "real_ip_header": "" + }, + "allOf": [ + { + "$ref": "#/definitions/TrustedProxiesConfig" + } + ] + } + }, + "additionalProperties": false + }, + "SessionConfig": { + "description": "Session cookie configuration.", + "type": "object", + "properties": { + "cookie_name": { + "description": "Cookie name.", + "default": "__gw_session", + "type": "string" + }, + "duration_secs": { + "description": "Session duration in seconds.", + "default": 604800, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "enhanced": { + "description": "Enhanced session management configuration. Enables session listing, device tracking, and user-to-sessions indexing.", + "default": { + "activity_update_interval_secs": 0, + "enabled": false, + "inactivity_timeout_secs": 0, + "max_concurrent_sessions": 0, + "track_devices": false + }, + "allOf": [ + { + "$ref": "#/definitions/EnhancedSessionConfig" + } + ] + }, + "same_site": { + "description": "SameSite cookie attribute.", + "default": "lax", + "allOf": [ + { + "$ref": "#/definitions/SameSite" + } + ] + }, + "secret": { + "description": "Secret key for signing session cookies. If not provided, a random key is generated on startup (sessions won't survive restarts).", + "default": null, + "type": [ + "string", + "null" + ] + }, + "secure": { + "description": "Secure cookie (HTTPS only).", + "default": true, + "type": "boolean" + } + }, + "additionalProperties": false + }, + "SiemConfig": { + "description": "SIEM-specific configuration for CEF, LEEF, and Syslog formats.", + "type": "object", + "properties": { + "app_name": { + "description": "Application name for Syslog APP-NAME field.", + "default": "hadrian", + "type": "string" + }, + "device_product": { + "description": "Device product name for CEF/LEEF headers.", + "default": "Gateway", + "type": "string" + }, + "device_vendor": { + "description": "Device vendor name for CEF/LEEF headers.", + "default": "Hadrian", + "type": "string" + }, + "device_version": { + "description": "Device version for CEF/LEEF headers. If not specified, uses the crate version from Cargo.toml.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "facility": { + "description": "Syslog facility (only used for Syslog format).", + "default": "local0", + "allOf": [ + { + "$ref": "#/definitions/SyslogFacility" + } + ] + }, + "hostname": { + "description": "Override hostname for Syslog/CEF/LEEF. If not specified, uses the system hostname.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "leef_version": { + "description": "LEEF format version (1.0 or 2.0).", + "default": "2.0", + "allOf": [ + { + "$ref": "#/definitions/LeefVersion" + } + ] + } + }, + "additionalProperties": false + }, + "StorageConfig": { + "description": "Storage configuration for files and other binary data.", + "type": "object", + "properties": { + "files": { + "description": "File storage configuration.", + "default": { + "backend": "database", + "filesystem": null, + "s3": null + }, + "allOf": [ + { + "$ref": "#/definitions/FileStorageConfig" + } + ] + } + }, + "additionalProperties": false + }, + "StreamingBufferConfig": { + "description": "Configuration for streaming response buffer limits.\n\nThese limits prevent DoS attacks from malformed SSE data or slow consumers. Only applies to providers that transform streams (Anthropic, Bedrock, Vertex).\n\nOpenAI-compatible providers (OpenAI, Azure OpenAI) pass through SSE streams directly without buffering or transformation, so these limits don't apply.\n\nUse [`ProviderConfig::streaming_buffer_config()`] to check if a provider supports streaming buffer configuration.", + "type": "object", + "properties": { + "max_input_buffer_bytes": { + "description": "Maximum size of the input buffer in bytes. Protects against malformed SSE data without newlines. Default: 16 MB", + "default": 16777216, + "type": "integer", + "format": "uint", + "minimum": 0.0 + }, + "max_output_buffer_chunks": { + "description": "Maximum number of output chunks to buffer. Protects against slow consumers causing unbounded memory growth. Default: 1000 chunks", + "default": 1000, + "type": "integer", + "format": "uint", + "minimum": 0.0 + } + }, + "additionalProperties": false + }, + "StreamingGuardrailsMode": { + "description": "Streaming output evaluation mode.", + "oneOf": [ + { + "description": "Only evaluate the complete response after streaming finishes. Lowest latency, but harmful content may be partially streamed.", + "type": "string", + "enum": [ + "final_only" + ] + }, + { + "description": "Buffer chunks and evaluate periodically. Balance between latency and safety. This is the default mode.", + "type": "object", + "required": [ + "buffered" + ], + "properties": { + "buffered": { + "type": "object", + "properties": { + "buffer_tokens": { + "description": "Number of tokens to buffer before evaluation.", + "default": 100, + "type": "integer", + "format": "uint32", + "minimum": 0.0 + } + } + } + }, + "additionalProperties": false + }, + { + "description": "Evaluate each chunk individually. Highest safety but significantly increases latency.", + "type": "string", + "enum": [ + "per_chunk" + ] + } + ] + }, + "SyslogFacility": { + "description": "Syslog facility as defined in RFC 5424.", + "oneOf": [ + { + "description": "Kernel messages (0).", + "type": "string", + "enum": [ + "kern" + ] + }, + { + "description": "User-level messages (1).", + "type": "string", + "enum": [ + "user" + ] + }, + { + "description": "Mail system (2).", + "type": "string", + "enum": [ + "mail" + ] + }, + { + "description": "System daemons (3).", + "type": "string", + "enum": [ + "daemon" + ] + }, + { + "description": "Security/authorization messages (4).", + "type": "string", + "enum": [ + "auth" + ] + }, + { + "description": "Messages generated internally by syslogd (5).", + "type": "string", + "enum": [ + "syslog" + ] + }, + { + "description": "Line printer subsystem (6).", + "type": "string", + "enum": [ + "lpr" + ] + }, + { + "description": "Network news subsystem (7).", + "type": "string", + "enum": [ + "news" + ] + }, + { + "description": "UUCP subsystem (8).", + "type": "string", + "enum": [ + "uucp" + ] + }, + { + "description": "Clock daemon (9).", + "type": "string", + "enum": [ + "cron" + ] + }, + { + "description": "Security/authorization messages (private) (10).", + "type": "string", + "enum": [ + "authpriv" + ] + }, + { + "description": "FTP daemon (11).", + "type": "string", + "enum": [ + "ftp" + ] + }, + { + "description": "NTP subsystem (12).", + "type": "string", + "enum": [ + "ntp" + ] + }, + { + "description": "Log audit (13).", + "type": "string", + "enum": [ + "audit" + ] + }, + { + "description": "Log alert (14).", + "type": "string", + "enum": [ + "alert" + ] + }, + { + "description": "Clock daemon (15).", + "type": "string", + "enum": [ + "clock" + ] + }, + { + "description": "Local use 0 (16).", + "type": "string", + "enum": [ + "local0" + ] + }, + { + "description": "Local use 1 (17).", + "type": "string", + "enum": [ + "local1" + ] + }, + { + "description": "Local use 2 (18).", + "type": "string", + "enum": [ + "local2" + ] + }, + { + "description": "Local use 3 (19).", + "type": "string", + "enum": [ + "local3" + ] + }, + { + "description": "Local use 4 (20).", + "type": "string", + "enum": [ + "local4" + ] + }, + { + "description": "Local use 5 (21).", + "type": "string", + "enum": [ + "local5" + ] + }, + { + "description": "Local use 6 (22).", + "type": "string", + "enum": [ + "local6" + ] + }, + { + "description": "Local use 7 (23).", + "type": "string", + "enum": [ + "local7" + ] + } + ] + }, + "TestFailureMode": { + "description": "Failure mode configuration for test providers.\n\nAllows simulating various failure conditions for testing fallback behavior, circuit breakers, and error handling.", + "oneOf": [ + { + "description": "Normal operation - return successful responses (default).", + "type": "object", + "required": [ + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "none" + ] + } + } + }, + { + "description": "Return an HTTP error status code. Use this to test retry and fallback behavior.", + "type": "object", + "required": [ + "status_code", + "type" + ], + "properties": { + "message": { + "description": "Optional error message to include in the response.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "status_code": { + "description": "HTTP status code to return (e.g., 500, 502, 503, 504 for server errors, 400, 401, 403, 404 for client errors, 429 for rate limiting).", + "type": "integer", + "format": "uint16", + "minimum": 0.0 + }, + "type": { + "type": "string", + "enum": [ + "http_error" + ] + } + } + }, + { + "description": "Simulate a connection/request error. Useful for testing network failure handling.", + "type": "object", + "required": [ + "type" + ], + "properties": { + "message": { + "description": "Error message describing the connection failure.", + "default": "Connection refused", + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "connection_error" + ] + } + } + }, + { + "description": "Simulate a timeout. Waits for the specified duration before returning an error.", + "type": "object", + "required": [ + "type" + ], + "properties": { + "delay_ms": { + "description": "Delay in milliseconds before timing out.", + "default": 5000, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "type": { + "type": "string", + "enum": [ + "timeout" + ] + } + } + }, + { + "description": "Fail after N successful requests (for testing circuit breaker). Alternates between success and failure based on the counter.", + "type": "object", + "required": [ + "success_count", + "type" + ], + "properties": { + "failure_status": { + "description": "HTTP status code to return when failing.", + "default": 500, + "type": "integer", + "format": "uint16", + "minimum": 0.0 + }, + "success_count": { + "description": "Number of successful requests before starting to fail.", + "type": "integer", + "format": "uint32", + "minimum": 0.0 + }, + "type": { + "type": "string", + "enum": [ + "fail_after_n" + ] + } + } + } + ] + }, + "TlsConfig": { + "description": "TLS configuration.", + "type": "object", + "required": [ + "cert_path", + "key_path" + ], + "properties": { + "cert_path": { + "description": "Path to the certificate file (PEM format).", + "type": "string" + }, + "key_path": { + "description": "Path to the private key file (PEM format).", + "type": "string" + } + }, + "additionalProperties": false + }, + "TracingConfig": { + "description": "Tracing configuration.", + "type": "object", + "properties": { + "enabled": { + "description": "Enable distributed tracing.", + "default": false, + "type": "boolean" + }, + "environment": { + "description": "Environment (e.g., \"production\", \"staging\").", + "default": null, + "type": [ + "string", + "null" + ] + }, + "otlp": { + "description": "OTLP exporter configuration.", + "default": null, + "anyOf": [ + { + "$ref": "#/definitions/OtlpConfig" + }, + { + "type": "null" + } + ] + }, + "propagation": { + "description": "Propagation format.", + "default": "trace_context", + "allOf": [ + { + "$ref": "#/definitions/PropagationFormat" + } + ] + }, + "resource_attributes": { + "description": "Additional resource attributes.", + "default": {}, + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "sampling": { + "description": "Sampling configuration.", + "default": { + "rate": 1.0, + "strategy": "always_on" + }, + "allOf": [ + { + "$ref": "#/definitions/SamplingConfig" + } + ] + }, + "service_name": { + "description": "Service name.", + "default": "hadrian", + "type": "string" + }, + "service_version": { + "description": "Service version.", + "default": null, + "type": [ + "string", + "null" + ] + } + }, + "additionalProperties": false + }, + "TrustedProxiesConfig": { + "description": "Configuration for trusted reverse proxies.\n\n**Security Note:** Proxy header spoofing is a serious vulnerability. Only trust proxy headers when the connecting client is from a known proxy IP/CIDR range.\n\n- `dangerously_trust_all: true` - **DANGEROUS**: Trusts proxy headers from ANY source. Only use in controlled environments where the gateway is not directly accessible from the internet (e.g., behind a load balancer that strips/rewrites headers).\n\n- `cidrs: [\"10.0.0.0/8\"]` - Trust proxy headers only when the connecting IP is within one of the specified CIDR ranges. This is the recommended approach.\n\nWhen proxy headers are trusted, X-Forwarded-For is parsed right-to-left, skipping IPs that are within trusted CIDRs, to find the first untrusted (client) IP.", + "type": "object", + "properties": { + "cidrs": { + "description": "List of trusted proxy CIDR ranges (e.g., [\"10.0.0.0/8\", \"172.16.0.0/12\"]).\n\nProxy headers are only trusted when the connecting IP is within one of these ranges. This prevents IP spoofing from untrusted sources.", + "default": [], + "type": "array", + "items": { + "type": "string" + } + }, + "dangerously_trust_all": { + "description": "Trust all proxies (use X-Forwarded-For as-is).\n\n**WARNING: This is a security risk!** Only enable this if the gateway is completely isolated behind a trusted load balancer that: 1. Is the only way to reach the gateway 2. Properly sets/overwrites the X-Forwarded-For header\n\nIf attackers can connect directly to the gateway, they can spoof any IP and bypass IP-based rate limiting entirely.", + "default": false, + "type": "boolean" + }, + "real_ip_header": { + "description": "Header to use for the real client IP. Common values: \"X-Forwarded-For\", \"X-Real-IP\", \"CF-Connecting-IP\"", + "default": "X-Forwarded-For", + "type": "string" + } + }, + "additionalProperties": false + }, + "UiConfig": { + "description": "UI configuration.", + "type": "object", + "properties": { + "admin": { + "description": "Admin panel configuration.", + "default": { + "enabled": true, + "path": "/admin" + }, + "allOf": [ + { + "$ref": "#/definitions/AdminConfig" + } + ] + }, + "assets": { + "description": "Static assets configuration.", + "default": { + "cache_control": "public, max-age=31536000, immutable", + "source": { + "type": "embedded" + } + }, + "allOf": [ + { + "$ref": "#/definitions/AssetsConfig" + } + ] + }, + "branding": { + "description": "Branding customization.", + "default": { + "colors": null, + "colors_dark": null, + "custom_css_url": null, + "favicon_url": null, + "fonts": null, + "footer_links": [], + "footer_text": null, + "login": null, + "logo_dark_url": null, + "logo_url": null, + "show_version": false, + "tagline": null, + "title": null + }, + "allOf": [ + { + "$ref": "#/definitions/BrandingConfig" + } + ] + }, + "chat": { + "description": "Chat interface configuration.", + "default": { + "available_models": [], + "default_model": null, + "enabled": true, + "file_uploads": { + "allowed_types": [ + "image/png", + "image/jpeg", + "image/gif", + "image/webp", + "application/pdf", + "text/plain", + "text/markdown" + ], + "enabled": false, + "max_size_bytes": 10485760, + "storage": { + "type": "database" + } + } + }, + "allOf": [ + { + "$ref": "#/definitions/ChatConfig" + } + ] + }, + "enabled": { + "description": "Enable the UI.", + "default": false, + "type": "boolean" + }, + "path": { + "description": "Path to serve the UI from (default: /).", + "default": "/", + "type": "string" + } + }, + "additionalProperties": false + }, + "UploadStorageConfig": { + "description": "Storage backend for chat file uploads.\n\nNote: For the Files API storage backend, see `FileStorageConfig` in `storage.rs`.", + "oneOf": [ + { + "description": "Store in database (for small files).", + "type": "object", + "required": [ + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "database" + ] + } + }, + "additionalProperties": false + }, + { + "description": "Store on local filesystem.", + "type": "object", + "required": [ + "path", + "type" + ], + "properties": { + "path": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "filesystem" + ] + } + }, + "additionalProperties": false + }, + { + "description": "Store in S3-compatible storage.", + "type": "object", + "required": [ + "bucket", + "type" + ], + "properties": { + "bucket": { + "type": "string" + }, + "endpoint": { + "default": null, + "type": [ + "string", + "null" + ] + }, + "prefix": { + "default": null, + "type": [ + "string", + "null" + ] + }, + "region": { + "default": null, + "type": [ + "string", + "null" + ] + }, + "type": { + "type": "string", + "enum": [ + "s3" + ] + } + }, + "additionalProperties": false + } + ] + }, + "UsageBufferConfig": { + "description": "Buffer configuration for usage logging.", + "type": "object", + "properties": { + "flush_interval_ms": { + "description": "Maximum time between flushes in milliseconds.", + "default": 1000, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "max_pending_entries": { + "description": "Maximum pending entries before dropping oldest. When the sink is slow or unavailable, entries accumulate in the buffer. If pending entries exceed this limit, the oldest entries are dropped to prevent unbounded memory growth (OOM). Set to 0 to disable (not recommended). Default: 10x max_size (10,000 entries at ~1KB each = ~10MB max memory).", + "default": 10000, + "type": "integer", + "format": "uint", + "minimum": 0.0 + }, + "max_size": { + "description": "Maximum entries to buffer before flushing.", + "default": 1000, + "type": "integer", + "format": "uint", + "minimum": 0.0 + } + }, + "additionalProperties": false + }, + "UsageConfig": { + "description": "Usage logging configuration.\n\nControls where API usage data (tokens, costs, latency) is sent. Multiple sinks can be enabled simultaneously.", + "type": "object", + "properties": { + "buffer": { + "description": "Buffer configuration for batched writes.", + "default": { + "flush_interval_ms": 1000, + "max_pending_entries": 10000, + "max_size": 1000 + }, + "allOf": [ + { + "$ref": "#/definitions/UsageBufferConfig" + } + ] + }, + "database": { + "description": "Enable database logging (default: true if database is configured).", + "default": true, + "type": "boolean" + }, + "otlp": { + "description": "OTLP exporter for usage data. Sends usage records as OTLP log records to any OpenTelemetry-compatible backend.", + "default": null, + "anyOf": [ + { + "$ref": "#/definitions/UsageOtlpConfig" + }, + { + "type": "null" + } + ] + } + }, + "additionalProperties": false + }, + "UsageOtlpConfig": { + "description": "OTLP configuration for usage logging.", + "type": "object", + "properties": { + "compression": { + "description": "Enable compression.", + "default": true, + "type": "boolean" + }, + "enabled": { + "description": "Enable OTLP usage export.", + "default": true, + "type": "boolean" + }, + "endpoint": { + "description": "OTLP endpoint URL. If not specified, uses the tracing OTLP endpoint.", + "default": null, + "type": [ + "string", + "null" + ] + }, + "headers": { + "description": "Headers to include (e.g., for authentication).", + "default": {}, + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "protocol": { + "description": "Protocol (grpc or http).", + "default": "grpc", + "allOf": [ + { + "$ref": "#/definitions/OtlpProtocol" + } + ] + }, + "service_name": { + "description": "Service name override (defaults to tracing service name).", + "default": null, + "type": [ + "string", + "null" + ] + }, + "timeout_secs": { + "description": "Timeout in seconds.", + "default": 10, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + } + }, + "additionalProperties": false + }, + "VectorStoreCleanupConfig": { + "description": "Configuration for the vector store cleanup background job.\n\nThe cleanup job periodically removes: 1. Soft-deleted vector stores that have passed the cleanup delay 2. Chunks associated with deleted stores from the vector database 3. Files that are no longer referenced by any vector store\n\n# Example Configuration\n\n```toml [features.vector_store_cleanup] enabled = true interval_secs = 300 cleanup_delay_secs = 3600 batch_size = 100 max_duration_secs = 60 ```", + "type": "object", + "properties": { + "batch_size": { + "description": "Maximum number of stores to clean up per run. Prevents long-running cleanup operations. Default: 100", + "default": 100, + "type": "integer", + "format": "uint32", + "minimum": 0.0 + }, + "cleanup_delay_secs": { + "description": "Time to wait after soft deletion before hard deleting (in seconds). This gives users time to recover accidentally deleted stores. Default: 3600 (1 hour)", + "default": 3600, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "dry_run": { + "description": "Dry run mode - log what would be deleted without actually deleting. Useful for testing cleanup configuration. Default: false", + "default": false, + "type": "boolean" + }, + "enabled": { + "description": "Enable the cleanup job. When disabled, soft-deleted stores remain in the database indefinitely.", + "default": false, + "type": "boolean" + }, + "interval_secs": { + "description": "How often to run the cleanup job (in seconds). Default: 300 (5 minutes)", + "default": 300, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "max_duration_secs": { + "description": "Maximum duration for a single cleanup run (in seconds). If exceeded, cleanup stops gracefully and continues next run. Set to 0 for unlimited. Default: 60", + "default": 60, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + } + }, + "additionalProperties": false + }, + "VirusScanBackend": { + "description": "Virus scanning backend type.", + "oneOf": [ + { + "description": "ClamAV via clamd daemon. Open-source antivirus with regularly updated signatures.", + "type": "string", + "enum": [ + "clamav" + ] + } + ] + }, + "VirusScanConfig": { + "description": "Virus scanning configuration for file uploads.\n\nWhen enabled, files are scanned for malware before being stored. Currently supports ClamAV via the clamd daemon.\n\n# Example Configuration\n\n```toml [features.file_processing.virus_scan] enabled = true backend = \"clamav\"\n\n[features.file_processing.virus_scan.clamav] host = \"localhost\" port = 3310 timeout_ms = 30000 ```", + "type": "object", + "properties": { + "backend": { + "description": "Virus scanning backend. Currently only \"clamav\" is supported.", + "default": "clamav", + "allOf": [ + { + "$ref": "#/definitions/VirusScanBackend" + } + ] + }, + "clamav": { + "description": "ClamAV-specific configuration. Required when backend = \"clamav\" and enabled = true.", + "default": null, + "anyOf": [ + { + "$ref": "#/definitions/ClamAvConfig" + }, + { + "type": "null" + } + ] + }, + "enabled": { + "description": "Enable virus scanning. When false, files are not scanned before storage. Default: false", + "default": false, + "type": "boolean" + } + }, + "additionalProperties": false + }, + "WebSocketConfig": { + "description": "WebSocket configuration for real-time event subscriptions.\n\nWhen enabled, clients can connect to `/ws/events` to receive real-time notifications about server events such as audit logs, usage tracking, circuit breaker state changes, and budget alerts.\n\n# Authentication\n\nWebSocket connections can be authenticated via: - Query parameter `token` - API key for programmatic access - Session cookie - For browser-based access (requires prior OIDC login)\n\nIf `require_auth` is true, unauthenticated connections will be rejected.\n\n# Example Configuration\n\n```toml [features.websocket] enabled = true require_auth = true ping_interval_secs = 30 pong_timeout_secs = 60 ```", + "type": "object", + "properties": { + "channel_capacity": { + "description": "Event bus channel capacity. Determines how many events can be buffered before slow subscribers start missing events (lagging).", + "default": 1024, + "type": "integer", + "format": "uint", + "minimum": 0.0 + }, + "enabled": { + "description": "Enable WebSocket event subscriptions. When disabled, the `/ws/events` endpoint is not registered.", + "default": true, + "type": "boolean" + }, + "max_connections": { + "description": "Maximum number of concurrent WebSocket connections. Set to 0 for unlimited connections (not recommended in production).", + "default": 1000, + "type": "integer", + "format": "uint", + "minimum": 0.0 + }, + "ping_interval_secs": { + "description": "Ping interval for keepalive in seconds. The server sends ping frames at this interval to detect dead connections.", + "default": 30, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "pong_timeout_secs": { + "description": "Pong timeout in seconds. If no pong response is received within this time after a ping, the connection is terminated.", + "default": 60, + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "require_auth": { + "description": "Require authentication for WebSocket connections. When true, connections without valid API key or session are rejected. When false, unauthenticated connections are allowed (useful for development).", + "default": false, + "type": "boolean" + } + }, + "additionalProperties": false + } + } +} \ No newline at end of file diff --git a/scripts/generate-schema.sh b/scripts/generate-schema.sh new file mode 100755 index 0000000..0b0cb87 --- /dev/null +++ b/scripts/generate-schema.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" + +echo "Generating config JSON schema..." +cargo run --features json-schema -- schema --output "$ROOT_DIR/docs/public/config-schema.json" +echo "Done: docs/public/config-schema.json"