diff --git a/hub/src/web/routes/voice.test.ts b/hub/src/web/routes/voice.test.ts
index 15249c723..9cc3f4232 100644
--- a/hub/src/web/routes/voice.test.ts
+++ b/hub/src/web/routes/voice.test.ts
@@ -1,4 +1,4 @@
-import { describe, expect, it, mock } from 'bun:test'
+import { describe, expect, it, mock, test, afterEach } from 'bun:test'
 import { Hono } from 'hono'
 import { SignJWT } from 'jose'
 import type { WebAppEnv } from '../middleware/auth'
@@ -188,3 +188,155 @@ describe('POST /api/voice/token', () => {
         else delete process.env.ELEVENLABS_AGENT_ID
     })
 })
+
+describe('GET /api/voice/backend', () => {
+    const originalEnv = process.env.VOICE_BACKEND
+
+    afterEach(() => {
+        if (originalEnv === undefined) {
+            delete process.env.VOICE_BACKEND
+        } else {
+            process.env.VOICE_BACKEND = originalEnv
+        }
+    })
+
+    test('returns elevenlabs by default', async () => {
+        delete process.env.VOICE_BACKEND
+        const app = createApp()
+        const headers = await authHeaders()
+        const res = await app.request('/api/voice/backend', { headers })
+        expect(res.status).toBe(200)
+        const body = await res.json() as { backend: string }
+        expect(body.backend).toBe('elevenlabs')
+    })
+
+    test('returns gemini-live when configured', async () => {
+        process.env.VOICE_BACKEND = 'gemini-live'
+        const app = createApp()
+        const headers = await authHeaders()
+        const res = await app.request('/api/voice/backend', { headers })
+        expect(res.status).toBe(200)
+        const body = await res.json() as { backend: string }
+        expect(body.backend).toBe('gemini-live')
+    })
+
+    test('returns qwen-realtime when configured', async () => {
+        process.env.VOICE_BACKEND = 'qwen-realtime'
+        const app = createApp()
+        const headers = await authHeaders()
+        const res = await app.request('/api/voice/backend', { headers })
+        expect(res.status).toBe(200)
+        const body = await res.json() as { backend: string }
+        expect(body.backend).toBe('qwen-realtime')
+    })
+
+    test('falls back to elevenlabs for unknown values', async () => {
+        process.env.VOICE_BACKEND = 'unknown-backend'
+        const app = createApp()
+        const headers = await authHeaders()
+        const res = await app.request('/api/voice/backend', { headers })
+        expect(res.status).toBe(200)
+        const body = await res.json() as { backend: string }
+        expect(body.backend).toBe('elevenlabs')
+    })
+})
+
+describe('POST /api/voice/gemini-token', () => {
+    const origGemini = process.env.GEMINI_API_KEY
+    const origGoogle = process.env.GOOGLE_API_KEY
+
+    afterEach(() => {
+        if (origGemini === undefined) delete process.env.GEMINI_API_KEY
+        else process.env.GEMINI_API_KEY = origGemini
+        if (origGoogle === undefined) delete process.env.GOOGLE_API_KEY
+        else process.env.GOOGLE_API_KEY = origGoogle
+    })
+
+    test('returns 400 when no API key configured', async () => {
+        delete process.env.GEMINI_API_KEY
+        delete process.env.GOOGLE_API_KEY
+        const app = createApp()
+        const headers = await authHeaders()
+        const res = await app.request('/api/voice/gemini-token', { method: 'POST', headers })
+        expect(res.status).toBe(400)
+        const body = await res.json() as { allowed: boolean; error: string }
+        expect(body.allowed).toBe(false)
+        expect(body.error).toContain('not configured')
+    })
+
+    test('returns proxied wsUrl when GEMINI_API_KEY is set', async () => {
+        process.env.GEMINI_API_KEY = 'test-gemini-key'
+        delete process.env.GOOGLE_API_KEY
+        const app = createApp()
+        const headers = await authHeaders()
+        const res = await app.request('/api/voice/gemini-token', { method: 'POST', headers })
+        expect(res.status).toBe(200)
+        const body = await res.json() as { allowed: boolean; apiKey: string; wsUrl: string }
+        expect(body.allowed).toBe(true)
+        expect(body.apiKey).toBe('proxied')
+        expect(body.wsUrl).toContain('/api/voice/gemini-ws')
+    })
+
+    test('falls back to GOOGLE_API_KEY', async () => {
+        delete process.env.GEMINI_API_KEY
+        process.env.GOOGLE_API_KEY = 'test-google-key'
+        const app = createApp()
+        const headers = await authHeaders()
+        const res = await app.request('/api/voice/gemini-token', { method: 'POST', headers })
+        expect(res.status).toBe(200)
+        const body = await res.json() as { allowed: boolean; apiKey: string; wsUrl: string }
+        expect(body.allowed).toBe(true)
+        expect(body.apiKey).toBe('proxied')
+        expect(body.wsUrl).toContain('/api/voice/gemini-ws')
+    })
+})
+
+describe('POST /api/voice/qwen-token', () => {
+    const origDash = process.env.DASHSCOPE_API_KEY
+    const origQwen = process.env.QWEN_API_KEY
+
+    afterEach(() => {
+        if (origDash === undefined) delete process.env.DASHSCOPE_API_KEY
+        else process.env.DASHSCOPE_API_KEY = origDash
+        if (origQwen === undefined) delete process.env.QWEN_API_KEY
+        else process.env.QWEN_API_KEY = origQwen
+    })
+
+    test('returns 400 when no API key configured', async () => {
+        delete process.env.DASHSCOPE_API_KEY
+        delete process.env.QWEN_API_KEY
+        const app = createApp()
+        const headers = await authHeaders()
+        const res = await app.request('/api/voice/qwen-token', { method: 'POST', headers })
+        expect(res.status).toBe(400)
+        const body = await res.json() as { allowed: boolean; error: string }
+        expect(body.allowed).toBe(false)
+        expect(body.error).toContain('not configured')
+    })
+
+    test('returns wsUrl when DASHSCOPE_API_KEY is set (no raw key exposed)', async () => {
+        process.env.DASHSCOPE_API_KEY = 'test-dash-key'
+        delete process.env.QWEN_API_KEY
+        const app = createApp()
+        const headers = await authHeaders()
+        const res = await app.request('/api/voice/qwen-token', { method: 'POST', headers })
+        expect(res.status).toBe(200)
+        const body = await res.json() as { allowed: boolean; wsUrl: string }
+        expect(body.allowed).toBe(true)
+        expect(body.wsUrl).toContain('/api/voice/qwen-ws')
+        expect(body).not.toHaveProperty('apiKey')
+    })
+
+    test('falls back to QWEN_API_KEY', async () => {
+        delete process.env.DASHSCOPE_API_KEY
+        process.env.QWEN_API_KEY = 'test-qwen-key'
+        const app = createApp()
+        const headers = await authHeaders()
+        const res = await app.request('/api/voice/qwen-token', { method: 'POST', headers })
+        expect(res.status).toBe(200)
+        const body = await res.json() as { allowed: boolean; wsUrl: string }
+        expect(body.allowed).toBe(true)
+        expect(body.wsUrl).toContain('/api/voice/qwen-ws')
+        expect(body).not.toHaveProperty('apiKey')
+    })
+})
diff --git a/hub/src/web/routes/voice.ts b/hub/src/web/routes/voice.ts
index 091b9c2ac..3ccc48155 100644
--- a/hub/src/web/routes/voice.ts
+++ b/hub/src/web/routes/voice.ts
@@ -4,8 +4,19 @@ import type { WebAppEnv } from '../middleware/auth'
 import {
     ELEVENLABS_API_BASE,
     VOICE_AGENT_NAME,
-    buildVoiceAgentConfig
+    buildVoiceAgentConfig,
+    DEFAULT_VOICE_BACKEND
 } from '@hapi/protocol/voice'
+import type { VoiceBackendType } from '@hapi/protocol/voice'
+
+function buildVoiceWsUrl(base: string, pathname: string): string {
+    const url = new URL(base)
+    url.protocol = url.protocol === 'https:' ? 'wss:' : 'ws:'
+    url.pathname = pathname
+    url.search = ''
+    url.hash = ''
+    return url.toString()
+}
 
 const tokenRequestSchema = z.object({
     customAgentId: z.string().optional(),
@@ -166,6 +177,65 @@ async function getOrCreateAgentIdForVoice(apiKey: string, voiceId?: string): Pro
 export function createVoiceRoutes(): Hono<WebAppEnv> {
     const app = new Hono<WebAppEnv>()
 
+    // Return the configured voice backend type
+    app.get('/voice/backend', (c) => {
+        const raw = process.env.VOICE_BACKEND
+        const backend: VoiceBackendType =
+            raw === 'gemini-live' ? 'gemini-live'
+            : raw === 'qwen-realtime' ? 'qwen-realtime'
+            : DEFAULT_VOICE_BACKEND
+        return c.json({ backend })
+    })
+
+    // Get Gemini API key for Gemini Live voice sessions
+    // Gemini Live API does not support ephemeral tokens, so we proxy the key.
+    // The key is short-lived in the browser session and never persisted client-side.
+    app.post('/voice/gemini-token', async (c) => {
+        const apiKey = process.env.GEMINI_API_KEY || process.env.GOOGLE_API_KEY
+        if (!apiKey) {
+            return c.json({
+                allowed: false,
+                error: 'Gemini API key not configured (set GEMINI_API_KEY or GOOGLE_API_KEY)'
+            }, 400)
+        }
+
+        // Use server-side WS proxy to avoid region restrictions.
+        // The proxy at /api/voice/gemini-ws handles the API key server-side.
+        // Derive wsUrl from the request origin so remote browsers connect back to the hub,
+        // not to localhost. HAPI_PUBLIC_URL overrides when set (e.g. behind a reverse proxy).
+        const requestOrigin = new URL(c.req.url).origin
+        const publicUrl = process.env.HAPI_PUBLIC_URL || requestOrigin
+        const wsProxyUrl = buildVoiceWsUrl(publicUrl, '/api/voice/gemini-ws')
+
+        return c.json({
+            allowed: true,
+            apiKey: 'proxied', // Dummy — key is handled server-side
+            wsUrl: wsProxyUrl, // Always proxy — env WS URLs are upstream-only (server-side)
+            baseUrl: process.env.GEMINI_API_BASE || undefined
+        })
+    })
+
+    // Check Qwen (DashScope) availability for Qwen Realtime voice sessions
+    // The actual API key is never sent to the browser — it stays server-side in the WS proxy.
+    app.post('/voice/qwen-token', async (c) => {
+        const apiKey = process.env.DASHSCOPE_API_KEY || process.env.QWEN_API_KEY
+        if (!apiKey) {
+            return c.json({
+                allowed: false,
+                error: 'DashScope API key not configured (set DASHSCOPE_API_KEY or QWEN_API_KEY)'
+            }, 400)
+        }
+
+        const requestOrigin = new URL(c.req.url).origin
+        const publicUrl = process.env.HAPI_PUBLIC_URL || requestOrigin
+        const wsProxyUrl = buildVoiceWsUrl(publicUrl, '/api/voice/qwen-ws')
+
+        return c.json({
+            allowed: true,
+            wsUrl: wsProxyUrl // Always proxy — env WS URLs are upstream-only (server-side)
+        })
+    })
+
     // Get ElevenLabs ConvAI conversation token
     app.post('/voice/token', async (c) => {
         const requestId = crypto.randomUUID()
diff --git a/hub/src/web/server.ts b/hub/src/web/server.ts
index e18f3ddc6..b128bd40f 100644
--- a/hub/src/web/server.ts
+++ b/hub/src/web/server.ts
@@ -6,6 +6,7 @@ import { existsSync } from 'node:fs'
 import { serveStatic } from 'hono/bun'
 import { getConfiguration } from '../configuration'
 import { PROTOCOL_VERSION } from '@hapi/protocol'
+import { buildGeminiLiveSetupMessage, buildQwenSessionUpdateMessage, isQwenSafeClientFrame, QWEN_REALTIME_MODEL } from '@hapi/protocol/voice'
 import type { SyncEngine } from '../sync/syncEngine'
 import { createAuthMiddleware, type WebAppEnv } from './middleware/auth'
 import { createAuthRoutes } from './routes/auth'
@@ -21,13 +22,216 @@ import { createPushRoutes } from './routes/push'
 import { createVoiceRoutes } from './routes/voice'
 import type { SSEManager } from '../sse/sseManager'
 import type { VisibilityTracker } from '../visibility/visibilityTracker'
-import type { Server as BunServer } from 'bun'
+import type { Server as BunServer, ServerWebSocket } from 'bun'
 import type { Server as SocketEngine } from '@socket.io/bun-engine'
+import { jwtVerify } from 'jose'
 import type { WebSocketData } from '@socket.io/bun-engine'
 import { loadEmbeddedAssetMap, type EmbeddedWebAsset } from './embeddedAssets'
 import { isBunCompiled } from '../utils/bunCompiled'
 import type { Store } from '../store'
 
+// Normalise upstream close codes before forwarding to the browser client.
+// Codes 1005/1006/1015 are reserved and cannot be sent in a close frame;
+// abnormal upstream drops commonly produce 1006, which would throw on clientWs.close().
+function toClientCloseCode(code: number): number {
+    return code >= 1000 && code <= 4999 && code !== 1005 && code !== 1006 && code !== 1015
+        ? code
+        : 1011
+}
+
+function decodeWsText(message: string | ArrayBuffer | Uint8Array): string {
+    if (typeof message === 'string') return message
+    const bytes = message instanceof Uint8Array ? message : new Uint8Array(message)
+    return new TextDecoder().decode(bytes)
+}
+
+function isGeminiSetupFrame(message: string | ArrayBuffer | Uint8Array): boolean {
+    try {
+        const parsed = JSON.parse(decodeWsText(message)) as unknown
+        return parsed !== null && typeof parsed === 'object' && 'setup' in (parsed as object)
+    } catch {
+        return false
+    }
+}
+
+function isGeminiSetupCompleteFrame(message: string | ArrayBuffer | Uint8Array): boolean {
+    try {
+        const parsed = JSON.parse(decodeWsText(message)) as unknown
+        return parsed !== null && typeof parsed === 'object' && 'setupComplete' in (parsed as object)
+    } catch {
+        return false
+    }
+}
+
+const MAX_GEMINI_PENDING_BYTES = 1024 * 1024 // 1 MiB — rejects setup-window floods
+function frameByteSize(msg: string | ArrayBuffer | Uint8Array): number {
+    return typeof msg === 'string' ? msg.length : (msg as ArrayBuffer | Uint8Array).byteLength
+}
+
+// Gemini Live WebSocket proxy — relays browser WS to Google, bypassing region restrictions
+function createGeminiProxyWebSocketHandler() {
+    const GEMINI_WS_BASE = 'wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent'
+    const upstreamMap = new WeakMap<ServerWebSocket<unknown>, WebSocket>()
+    // pendingMap holds queued client frames until Google acknowledges setup via setupComplete.
+    // Flushed on setupComplete; until then message() queues rather than forwards.
+    const pendingMap = new WeakMap<ServerWebSocket<unknown>, Array<string | ArrayBuffer | Uint8Array>>()
+    const pendingBytesMap = new WeakMap<ServerWebSocket<unknown>, number>()
+
+    return {
+        open(clientWs: ServerWebSocket<unknown>) {
+            const data = clientWs.data as { _geminiProxy: boolean; apiKey: string; language?: string }
+            const upstreamUrl = `${process.env.GEMINI_LIVE_WS_URL || GEMINI_WS_BASE}?key=${encodeURIComponent(data.apiKey)}`
+            const pending: Array<string | ArrayBuffer | Uint8Array> = []
+            pendingMap.set(clientWs, pending)
+            pendingBytesMap.set(clientWs, 0)
+
+            const upstream = new WebSocket(upstreamUrl)
+            upstreamMap.set(clientWs, upstream)
+
+            upstream.onopen = () => {
+                // Hub-owned setup only — never forward client setup (prevents generic Gemini proxy abuse).
+                // Do NOT flush pending here: wait for Google's setupComplete before forwarding client frames.
+                upstream.send(JSON.stringify(buildGeminiLiveSetupMessage(data.language)))
+            }
+            upstream.onmessage = (event) => {
+                try {
+                    if (clientWs.readyState === 1) {
+                        clientWs.send(typeof event.data === 'string' ? event.data : new Uint8Array(event.data as ArrayBuffer))
+                    }
+                } catch { /* client gone */ }
+                // Flush queued client frames only after Google acknowledges setup.
+                const pending = pendingMap.get(clientWs)
+                if (pending && isGeminiSetupCompleteFrame(event.data as string | ArrayBuffer)) {
+                    pendingMap.delete(clientWs)
+                    pendingBytesMap.delete(clientWs)
+                    for (const queued of pending) {
+                        try { upstream.send(queued) } catch { /* upstream gone */ }
+                    }
+                }
+            }
+            upstream.onerror = () => {
+                pendingMap.delete(clientWs)
+                pendingBytesMap.delete(clientWs)
+                try { clientWs.close(1011, 'Upstream error') } catch { /* */ }
+            }
+            upstream.onclose = (event) => {
+                pendingMap.delete(clientWs)
+                pendingBytesMap.delete(clientWs)
+                try { clientWs.close(toClientCloseCode(event.code), event.reason || 'Upstream closed') } catch { /* client gone */ }
+                upstreamMap.delete(clientWs)
+            }
+        },
+        message(clientWs: ServerWebSocket<unknown>, message: string | ArrayBuffer | Uint8Array) {
+            if (isGeminiSetupFrame(message)) {
+                try { clientWs.close(1008, 'Client-provided Gemini setup is not allowed') } catch { /* */ }
+                return
+            }
+            const upstream = upstreamMap.get(clientWs)
+            const pending = pendingMap.get(clientWs)
+            if (pending) {
+                // Still awaiting setupComplete — queue, but cap to prevent setup-window floods.
+                const total = (pendingBytesMap.get(clientWs) ?? 0) + frameByteSize(message)
+                if (total > MAX_GEMINI_PENDING_BYTES) {
+                    try { clientWs.close(1009, 'Setup-window frame budget exceeded') } catch { /* */ }
+                    return
+                }
+                pendingBytesMap.set(clientWs, total)
+                pending.push(message)
+            } else if (upstream?.readyState === WebSocket.OPEN) {
+                upstream.send(message)
+            }
+        },
+        close(clientWs: ServerWebSocket<unknown>, code: number, reason: string) {
+            const upstream = upstreamMap.get(clientWs)
+            pendingMap.delete(clientWs)
+            pendingBytesMap.delete(clientWs)
+            if (upstream) {
+                try { upstream.close(toClientCloseCode(code), (reason || 'Client closed').slice(0, 123)) } catch { /* */ }
+                upstreamMap.delete(clientWs)
+            }
+        }
+    }
+}
+
+// Qwen Realtime WebSocket proxy — bridges browser (no custom headers) to DashScope (requires Authorization header)
+function createQwenProxyWebSocketHandler() {
+    const QWEN_WS_BASE = 'wss://dashscope-intl.aliyuncs.com/api-ws/v1/realtime'
+    const upstreamMap = new WeakMap<ServerWebSocket<unknown>, WebSocket>()
+    // Holds the hub-owned session.update payload until session.created arrives from DashScope.
+    // Sending session.update before session.created violates the Qwen Realtime protocol ordering.
+    const pendingSetupMap = new WeakMap<ServerWebSocket<unknown>, string>()
+
+    return {
+        open(clientWs: ServerWebSocket<unknown>) {
+            const data = clientWs.data as { apiKey: string; model: string; language?: string }
+            const upstreamUrl = `${process.env.QWEN_REALTIME_WS_URL || QWEN_WS_BASE}?model=${encodeURIComponent(data.model)}`
+
+            const upstream = new WebSocket(upstreamUrl, {
+                headers: { 'Authorization': `Bearer ${data.apiKey}` }
+            } as unknown as string[])
+
+            upstreamMap.set(clientWs, upstream)
+            pendingSetupMap.set(clientWs, JSON.stringify(buildQwenSessionUpdateMessage(data.language)))
+
+            upstream.onmessage = (event) => {
+                const raw = event.data
+                const text = typeof raw === 'string'
+                    ? raw
+                    : new TextDecoder().decode(raw instanceof Uint8Array ? raw : new Uint8Array(raw as ArrayBuffer))
+
+                // Respect Qwen protocol ordering: relay session.created first, then send hub-owned
+                // session.update. DashScope must receive session.update after session.created.
+                const pendingSetup = pendingSetupMap.get(clientWs)
+                if (pendingSetup) {
+                    try {
+                        const parsed = JSON.parse(text) as { type?: string }
+                        if (parsed.type === 'session.created') {
+                            pendingSetupMap.delete(clientWs)
+                            try { if (clientWs.readyState === 1) clientWs.send(text) } catch { /* client gone */ }
+                            upstream.send(pendingSetup)
+                            return
+                        }
+                    } catch { /* not JSON — relay as-is below */ }
+                }
+
+                try {
+                    if (clientWs.readyState === 1) {
+                        clientWs.send(typeof raw === 'string' ? raw : new Uint8Array(raw as ArrayBuffer))
+                    }
+                } catch { /* client gone */ }
+            }
+            upstream.onerror = () => {
+                pendingSetupMap.delete(clientWs)
+                upstreamMap.delete(clientWs)
+                try { clientWs.close(1011, 'Upstream error') } catch { /* */ }
+            }
+            upstream.onclose = (event) => {
+                pendingSetupMap.delete(clientWs)
+                try { clientWs.close(toClientCloseCode(event.code), event.reason || 'Upstream closed') } catch { /* client gone */ }
+                upstreamMap.delete(clientWs)
+            }
+        },
+        message(clientWs: ServerWebSocket<unknown>, message: string | ArrayBuffer | Uint8Array) {
+            if (!isQwenSafeClientFrame(message)) {
+                try { clientWs.close(1008, 'Client session.update may only modify instructions') } catch { /* */ }
+                return
+            }
+            const upstream = upstreamMap.get(clientWs)
+            if (upstream?.readyState === WebSocket.OPEN) {
+                upstream.send(message)
+            }
+        },
+        close(clientWs: ServerWebSocket<unknown>, code: number, reason: string) {
+            pendingSetupMap.delete(clientWs)
+            const upstream = upstreamMap.get(clientWs)
+            if (upstream) {
+                try { upstream.close(toClientCloseCode(code), (reason || 'Client closed').slice(0, 123)) } catch { /* */ }
+                upstreamMap.delete(clientWs)
+            }
+        }
+    }
+}
+
 function findWebappDistDir(): { distDir: string; indexHtmlPath: string } {
     const candidates = [
         join(process.cwd(), '..', 'web', 'dist'),
@@ -232,17 +436,102 @@ export async function startWebServer(options: {
     const configuration = getConfiguration()
     const socketHandler = options.socketEngine.handler()
 
-    const server = Bun.serve({
+    // Wrap socket.io websocket handler to also support Gemini/Qwen proxy connections
+    const originalWsHandler = socketHandler.websocket
+    const geminiProxyHandler = createGeminiProxyWebSocketHandler()
+    const qwenProxyHandler = createQwenProxyWebSocketHandler()
+
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    const server = (Bun.serve as any)({
         hostname: configuration.listenHost,
         port: configuration.listenPort,
         idleTimeout: Math.max(30, socketHandler.idleTimeout),
         maxRequestBodySize: Math.max(socketHandler.maxRequestBodySize, 68 * 1024 * 1024),
-        websocket: socketHandler.websocket,
-        fetch: (req, server) => {
+        websocket: {
+            ...originalWsHandler,
+            open(ws: unknown) {
+                const wsAny = ws as ServerWebSocket<{ _qwenProxy?: boolean; _geminiProxy?: boolean }>
+                if (wsAny.data?._geminiProxy) {
+                    geminiProxyHandler.open(wsAny)
+                } else if (wsAny.data?._qwenProxy) {
+                    qwenProxyHandler.open(wsAny)
+                } else {
+                    originalWsHandler.open?.(ws as never)
+                }
+            },
+            message(ws: unknown, message: unknown) {
+                const wsAny = ws as ServerWebSocket<{ _qwenProxy?: boolean; _geminiProxy?: boolean }>
+                if (wsAny.data?._geminiProxy) {
+                    geminiProxyHandler.message(wsAny, message as string)
+                } else if (wsAny.data?._qwenProxy) {
+                    qwenProxyHandler.message(wsAny, message as string)
+                } else {
+                    originalWsHandler.message?.(ws as never, message as never)
+                }
+            },
+            close(ws: unknown, code: number, reason: string) {
+                const wsAny = ws as ServerWebSocket<{ _qwenProxy?: boolean; _geminiProxy?: boolean }>
+                if (wsAny.data?._geminiProxy) {
+                    geminiProxyHandler.close(wsAny, code, reason)
+                } else if (wsAny.data?._qwenProxy) {
+                    qwenProxyHandler.close(wsAny, code, reason)
+                } else {
+                    originalWsHandler.close?.(ws as never, code as never, reason as never)
+                }
+            }
+        },
+        fetch: async (req: Request, server: { upgrade: (req: Request, opts?: unknown) => boolean }) => {
             const url = new URL(req.url)
             if (url.pathname.startsWith('/socket.io/')) {
-                return socketHandler.fetch(req, server)
+                return socketHandler.fetch(req, server as never)
+            }
+
+            // Voice WebSocket proxies — require JWT auth via query param
+            // (browser WebSocket API cannot set custom headers)
+            if (url.pathname === '/api/voice/gemini-ws' || url.pathname === '/api/voice/qwen-ws') {
+                const token = url.searchParams.get('token')
+                if (!token) {
+                    return new Response('Missing authorization token', { status: 401 })
+                }
+                try {
+                    await jwtVerify(token, options.jwtSecret, { algorithms: ['HS256'] })
+                } catch {
+                    return new Response('Invalid token', { status: 401 })
+                }
+            }
+
+            // Gemini Live WebSocket proxy
+            if (url.pathname === '/api/voice/gemini-ws') {
+                const apiKey = process.env.GEMINI_API_KEY || process.env.GOOGLE_API_KEY
+                if (!apiKey) {
+                    return new Response('Gemini API key not configured', { status: 400 })
+                }
+                const language = url.searchParams.get('language') ?? undefined
+                const upgraded = (server as unknown as { upgrade: (req: Request, opts: unknown) => boolean }).upgrade(req, {
+                    data: { _geminiProxy: true, apiKey, language }
+                })
+                if (!upgraded) {
+                    return new Response('WebSocket upgrade failed', { status: 500 })
+                }
+                return undefined as unknown as Response
             }
+            // Qwen Realtime WebSocket proxy
+            if (url.pathname === '/api/voice/qwen-ws') {
+                const apiKey = process.env.DASHSCOPE_API_KEY || process.env.QWEN_API_KEY
+                const model = QWEN_REALTIME_MODEL
+                const language = url.searchParams.get('language') ?? undefined
+                if (!apiKey) {
+                    return new Response('DashScope API key not configured', { status: 400 })
+                }
+                const upgraded = (server as unknown as { upgrade: (req: Request, opts: unknown) => boolean }).upgrade(req, {
+                    data: { _qwenProxy: true, apiKey, model, language }
+                })
+                if (!upgraded) {
+                    return new Response('WebSocket upgrade failed', { status: 500 })
+                }
+                return undefined as unknown as Response
+            }
+
             return app.fetch(req)
         }
     })
diff --git a/shared/src/voice.gemini.test.ts b/shared/src/voice.gemini.test.ts
new file mode 100644
index 000000000..b9602811a
--- /dev/null
+++ b/shared/src/voice.gemini.test.ts
@@ -0,0 +1,88 @@
+import { describe, expect, test } from 'bun:test'
+import { buildGeminiLiveSetupMessage, buildQwenSessionUpdateMessage, isQwenSafeClientFrame, GEMINI_LIVE_MODEL, GEMINI_LIVE_VOICE, QWEN_REALTIME_VOICE } from './voice'
+
+describe('buildGeminiLiveSetupMessage', () => {
+    test('locks model and voice to HAPI defaults', () => {
+        const msg = buildGeminiLiveSetupMessage()
+        expect(msg.setup.model).toBe(`models/${GEMINI_LIVE_MODEL}`)
+        const speech = msg.setup.generationConfig as {
+            speechConfig?: { voiceConfig?: { prebuiltVoiceConfig?: { voiceName?: string } } }
+        }
+        expect(speech.speechConfig?.voiceConfig?.prebuiltVoiceConfig?.voiceName).toBe(GEMINI_LIVE_VOICE)
+    })
+
+    test('appends Chinese block when language is zh', () => {
+        const en = buildGeminiLiveSetupMessage()
+        const zh = buildGeminiLiveSetupMessage('zh')
+        const enText = (en.setup.systemInstruction as { parts: Array<{ text: string }> }).parts[0].text
+        const zhText = (zh.setup.systemInstruction as { parts: Array<{ text: string }> }).parts[0].text
+        expect(zhText.length).toBeGreaterThan(enText.length)
+    })
+})
+
+describe('buildQwenSessionUpdateMessage', () => {
+    test('locks voice to HAPI default', () => {
+        const msg = buildQwenSessionUpdateMessage()
+        const session = msg.session as { voice: string }
+        expect(session.voice).toBe(QWEN_REALTIME_VOICE)
+    })
+
+    test('includes both tools', () => {
+        const msg = buildQwenSessionUpdateMessage()
+        // Realtime shape: flat {type, name, description, parameters} — NOT chat-completions {function:{...}}
+        const session = msg.session as { tools: Array<{ type: string; name: string }> }
+        const names = session.tools.map(t => t.name)
+        expect(names).toContain('messageCodingAgent')
+        expect(names).toContain('processPermissionRequest')
+        // Ensure no nested function key (would be wrong chat-completions shape)
+        session.tools.forEach(t => expect((t as Record<string, unknown>).function).toBeUndefined())
+    })
+
+    test('appends Chinese block when language is zh', () => {
+        const en = buildQwenSessionUpdateMessage()
+        const zh = buildQwenSessionUpdateMessage('zh')
+        const enInstr = (en.session as { instructions: string }).instructions
+        const zhInstr = (zh.session as { instructions: string }).instructions
+        expect(zhInstr.length).toBeGreaterThan(enInstr.length)
+    })
+})
+
+describe('isQwenSafeClientFrame', () => {
+    test('allows non-session.update frames', () => {
+        expect(isQwenSafeClientFrame(JSON.stringify({ type: 'input_audio_buffer.append', audio: 'abc' }))).toBe(true)
+        expect(isQwenSafeClientFrame(JSON.stringify({ type: 'response.create' }))).toBe(true)
+        expect(isQwenSafeClientFrame(JSON.stringify({ type: 'conversation.item.create', item: {} }))).toBe(true)
+    })
+
+    test('allows session.update with only instructions', () => {
+        expect(isQwenSafeClientFrame(JSON.stringify({
+            type: 'session.update',
+            session: { instructions: 'updated prompt' }
+        }))).toBe(true)
+    })
+
+    test('blocks session.update that includes tools', () => {
+        expect(isQwenSafeClientFrame(JSON.stringify({
+            type: 'session.update',
+            session: { instructions: 'x', tools: [] }
+        }))).toBe(false)
+    })
+
+    test('blocks session.update that includes voice', () => {
+        expect(isQwenSafeClientFrame(JSON.stringify({
+            type: 'session.update',
+            session: { voice: 'Cherry' }
+        }))).toBe(false)
+    })
+
+    test('blocks full config session.update', () => {
+        expect(isQwenSafeClientFrame(JSON.stringify({
+            type: 'session.update',
+            session: { modalities: ['text', 'audio'], voice: 'Cherry', instructions: 'x', tools: [], tool_choice: 'auto' }
+        }))).toBe(false)
+    })
+
+    test('allows non-JSON (binary audio frames pass through)', () => {
+        expect(isQwenSafeClientFrame('not json {')).toBe(true)
+    })
+})
diff --git a/shared/src/voice.ts b/shared/src/voice.ts
index 2843d84eb..3099a9057 100644
--- a/shared/src/voice.ts
+++ b/shared/src/voice.ts
@@ -8,10 +8,16 @@
 export const ELEVENLABS_API_BASE = 'https://api.elevenlabs.io/v1'
 export const VOICE_AGENT_NAME = 'Hapi Voice Assistant'
 
-export const VOICE_SYSTEM_PROMPT = `# Identity
+export const VOICE_SYSTEM_PROMPT = `# CRITICAL RULE - Tool Usage
+
+You MUST call the messageCodingAgent tool for ANY request related to coding, files, development, debugging, or tasks for the agent. Do NOT respond verbally to these requests — call the tool FIRST, then briefly confirm. This is your most important behavior.
+
+# Identity
 
 You are Hapi Voice Assistant. You bridge voice communication between users and their AI coding agents in the Hapi ecosystem.
 
+IMPORTANT: Never refer to yourself as Gemini, Google, or any underlying model or provider name. You are HAPI — always.
+
 You are friendly, proactive, and highly intelligent with a world-class engineering background. Your approach is warm, witty, and relaxed, balancing professionalism with an approachable vibe.
 
 # Environment Overview
@@ -136,9 +142,96 @@ For builds, tests, or large file operations:
 - Treat garbled input as phonetic hints and ask for clarification
 - Correct yourself immediately if you realize you made an error
 - Keep conversations forward-moving with fresh insights
-- Assume a technical software developer audience`
+- Assume a technical software developer audience
+
+# First Interaction
+
+When the user speaks to you for the first time, begin your response with a brief greeting before addressing their request. If their first message is a coding request, greet briefly AND call the tool — do both.`
 
-export const VOICE_FIRST_MESSAGE = "Hey! Hapi here."
+/**
+ * Language blocks appended to VOICE_SYSTEM_PROMPT for Gemini/Qwen backends
+ * (ElevenLabs has its own language field).
+ *
+ * Always append one of these — silence causes models to drift to their training
+ * language (Chinese for Qwen, mixed for Gemini).
+ */
+export const VOICE_CHINESE_LANGUAGE_BLOCK = `
+
+# Language
+
+IMPORTANT: Always respond in Chinese (Mandarin). Use natural spoken Chinese.
+- Greet users in Chinese
+- Summarize technical content in Chinese
+- Use English only for proper nouns, tool names, and code identifiers
+- Keep the same warm, concise conversational style in Chinese`
+
+/** When no language is selected: mirror the user's detected speech language. */
+const VOICE_LANGUAGE_BLOCK_AUTO = `
+
+# Language
+
+Detect the language the user is speaking and respond in that same language.
+Maintain it consistently throughout the session — do not drift between turns.
+If the language cannot be determined, default to English.`
+
+/** BCP-47 code → spoken language name (for explicit-language block). */
+const LANGUAGE_NAMES: Record<string, string> = {
+    en: 'English',
+    es: 'Spanish',
+    fr: 'French',
+    de: 'German',
+    ja: 'Japanese',
+    ko: 'Korean',
+    pt: 'Portuguese',
+    'pt-br': 'Brazilian Portuguese',
+    it: 'Italian',
+    ar: 'Arabic',
+    ru: 'Russian',
+    hi: 'Hindi',
+    th: 'Thai',
+    vi: 'Vietnamese',
+    id: 'Indonesian',
+    nl: 'Dutch',
+    sv: 'Swedish',
+    no: 'Norwegian',
+    da: 'Danish',
+    fi: 'Finnish',
+    pl: 'Polish',
+    tr: 'Turkish',
+    bg: 'Bulgarian',
+    ro: 'Romanian',
+    cs: 'Czech',
+    el: 'Greek',
+    ms: 'Malay',
+    tl: 'Filipino',
+    uk: 'Ukrainian',
+    hu: 'Hungarian',
+    hr: 'Croatian',
+    sk: 'Slovak',
+}
+
+/**
+ * Returns the language instruction block to append to VOICE_SYSTEM_PROMPT.
+ * - Explicit 'zh' → Chinese block
+ * - Other explicit code → "Always respond in [Language]"
+ * - undefined/auto → "detect from user speech and maintain it"
+ */
+export function buildVoiceLanguageBlock(language?: string): string {
+    if (!language) return VOICE_LANGUAGE_BLOCK_AUTO
+    if (language === 'zh' || language.startsWith('zh-')) return VOICE_CHINESE_LANGUAGE_BLOCK
+    const name = LANGUAGE_NAMES[language] ?? language
+    return `
+
+# Language
+
+IMPORTANT: Always respond in ${name}. Maintain ${name} consistently throughout
+the session — do not drift to a different language between turns.
+Use English only for proper nouns, code identifiers, and technical terms with
+no ${name} equivalent.`
+}
+
+/** ElevenLabs first message — language controlled by ElevenLabs language field */
+export const VOICE_FIRST_MESSAGE = "Hey! Hapi here — what can I help you with?"
 
 export const VOICE_TOOLS = [
     {
@@ -261,3 +354,162 @@ export function buildVoiceAgentConfig(): VoiceAgentConfig {
         }
     }
 }
+
+export type VoiceBackendType = 'elevenlabs' | 'gemini-live' | 'qwen-realtime'
+
+export const QWEN_REALTIME_MODEL = 'qwen3.5-omni-flash-realtime'
+export const QWEN_REALTIME_VOICE = 'Tina'
+
+export const DEFAULT_VOICE_BACKEND: VoiceBackendType = 'elevenlabs'
+
+export const GEMINI_LIVE_MODEL = 'gemini-2.5-flash-native-audio-latest'
+export const GEMINI_LIVE_VOICE = 'Aoede'
+
+export interface VoiceToolDefinition {
+    name: string
+    description: string
+    parameters: {
+        type: 'object'
+        required: string[]
+        properties: Record<string, {
+            type: string
+            description: string
+        }>
+    }
+}
+
+type VoiceToolSource = Pick<(typeof VOICE_TOOLS)[number], 'name' | 'description' | 'parameters'>
+
+function cloneVoiceToolDefinition(tool: VoiceToolSource): VoiceToolDefinition {
+    const properties: VoiceToolDefinition['parameters']['properties'] = {}
+
+    for (const [key, value] of Object.entries(tool.parameters.properties)) {
+        properties[key] = {
+            type: value.type,
+            description: value.description
+        }
+    }
+
+    return {
+        name: tool.name,
+        description: tool.description,
+        parameters: {
+            type: 'object',
+            required: [...tool.parameters.required],
+            properties
+        }
+    }
+}
+
+export const VOICE_TOOL_DEFINITIONS: VoiceToolDefinition[] = VOICE_TOOLS.map(cloneVoiceToolDefinition)
+
+export type GeminiLiveFunctionDeclaration = VoiceToolDefinition
+
+export interface GeminiLiveConfig {
+    model: string
+    systemInstruction: string
+    tools: Array<{
+        functionDeclarations: GeminiLiveFunctionDeclaration[]
+    }>
+    responseModalities: ['AUDIO']
+}
+
+export function buildGeminiLiveFunctionDeclarations(): GeminiLiveFunctionDeclaration[] {
+    return VOICE_TOOLS.map(cloneVoiceToolDefinition)
+}
+
+export function buildGeminiLiveConfig(language?: string): GeminiLiveConfig {
+    const systemInstruction = `${VOICE_SYSTEM_PROMPT}${buildVoiceLanguageBlock(language)}`
+    return {
+        model: GEMINI_LIVE_MODEL,
+        systemInstruction,
+        tools: [
+            {
+                functionDeclarations: buildGeminiLiveFunctionDeclarations()
+            }
+        ],
+        responseModalities: ['AUDIO']
+    }
+}
+
+/** Hub-owned initial session.update for Qwen Realtime (hub proxy). */
+export function buildQwenSessionUpdateMessage(language?: string): Record<string, unknown> {
+    const instructions = `${VOICE_SYSTEM_PROMPT}${buildVoiceLanguageBlock(language)}`
+    // Qwen Realtime uses the flat Realtime shape, not the chat-completions nested {function:{...}} shape.
+    const tools = VOICE_TOOL_DEFINITIONS.map((td) => ({
+        type: 'function' as const,
+        name: td.name,
+        description: td.description,
+        parameters: td.parameters
+    }))
+    return {
+        type: 'session.update',
+        session: {
+            modalities: ['text', 'audio'],
+            voice: QWEN_REALTIME_VOICE,
+            input_audio_format: 'pcm',
+            output_audio_format: 'pcm',
+            instructions,
+            temperature: 0.7,
+            turn_detection: {
+                type: 'server_vad',
+                threshold: 0.5,
+                silence_duration_ms: 800,
+                prefix_padding_ms: 300
+            },
+            tools,
+            tool_choice: 'auto'
+        }
+    }
+}
+
+/**
+ * Returns true if a client WebSocket frame is safe to forward to DashScope.
+ * Blocks session.update frames that touch config fields (tools, voice, etc.);
+ * allows instruction-only updates and all runtime event types.
+ */
+export function isQwenSafeClientFrame(message: string | ArrayBuffer | Uint8Array): boolean {
+    try {
+        const text = typeof message === 'string'
+            ? message
+            : new TextDecoder().decode(message instanceof ArrayBuffer ? new Uint8Array(message) : message)
+        const parsed = JSON.parse(text) as unknown
+        if (!parsed || typeof parsed !== 'object') return true
+        const p = parsed as Record<string, unknown>
+        if (p.type !== 'session.update') return true
+        const session = p.session as Record<string, unknown> | undefined
+        if (!session) return false
+        const keys = Object.keys(session)
+        return keys.length === 1 && keys[0] === 'instructions'
+    } catch {
+        return true
+    }
+}
+
+/** Wire-format setup frame for Gemini Live BidiGenerateContent (hub proxy + web client). */
+export function buildGeminiLiveSetupMessage(language?: string): { setup: Record<string, unknown> } {
+    const liveConfig = buildGeminiLiveConfig(language)
+    return {
+        setup: {
+            model: `models/${liveConfig.model}`,
+            generationConfig: {
+                responseModalities: ['AUDIO'],
+                speechConfig: {
+                    voiceConfig: {
+                        prebuiltVoiceConfig: { voiceName: GEMINI_LIVE_VOICE }
+                    }
+                }
+            },
+            systemInstruction: {
+                parts: [{ text: liveConfig.systemInstruction }]
+            },
+            tools: liveConfig.tools.map((t) => ({
+                functionDeclarations: t.functionDeclarations.map((fd) => ({
+                    name: fd.name,
+                    description: fd.description,
+                    parameters: fd.parameters
+                }))
+            }))
+        }
+    }
+}
diff --git a/web/src/api/client.ts b/web/src/api/client.ts
index d8dfa1bb9..0e7883772 100644
--- a/web/src/api/client.ts
+++ b/web/src/api/client.ts
@@ -567,4 +567,37 @@ export class ApiClient {
             body: JSON.stringify(event)
         })
     }
+
+    /** Return the current auth token (for WebSocket query-param auth). */
+    getAuthToken(): string | null {
+        return this.getToken ? this.getToken() : this.token
+    }
+
+    async fetchVoiceBackend(): Promise<{ backend: string }> {
+        return await this.request('/api/voice/backend')
+    }
+
+    async fetchQwenToken(): Promise<{
+        allowed: boolean
+        wsUrl?: string
+        error?: string
+    }> {
+        return await this.request('/api/voice/qwen-token', {
+            method: 'POST',
+            body: JSON.stringify({})
+        })
+    }
+
+    async fetchGeminiToken(): Promise<{
+        allowed: boolean
+        apiKey?: string
+        wsUrl?: string
+        baseUrl?: string
+        error?: string
+    }> {
+        return await this.request('/api/voice/gemini-token', {
+            method: 'POST',
+            body: JSON.stringify({})
+        })
+    }
 }
diff --git a/web/src/api/voice.ts b/web/src/api/voice.ts
index c5e8f27a4..df6828127 100644
--- a/web/src/api/voice.ts
+++ b/web/src/api/voice.ts
@@ -15,6 +15,7 @@ import {
     VOICE_AGENT_NAME,
     buildVoiceAgentConfig
 } from '@hapi/protocol/voice'
+import type { VoiceBackendType } from '@hapi/protocol/voice'
 
 export interface VoiceTokenResponse {
     allowed: boolean
@@ -177,3 +178,64 @@ export async function createOrUpdateHapiAgent(apiKey: string): Promise<CreateAge
         return { success: false, error: e instanceof Error ? e.message : 'Network error' }
     }
 }
+
+// --- Pluggable voice backend API ---
+
+export interface QwenTokenResponse {
+    allowed: boolean
+    wsUrl?: string
+    error?: string
+}
+
+/**
+ * Fetch a DashScope API key from the hub for Qwen Realtime voice sessions.
+ */
+export async function fetchQwenToken(api: ApiClient): Promise<QwenTokenResponse> {
+    try {
+        return await api.fetchQwenToken()
+    } catch (error) {
+        return {
+            allowed: false,
+            error: error instanceof Error ? error.message : 'Network error'
+        }
+    }
+}
+
+export interface VoiceBackendResponse {
+    backend: VoiceBackendType
+}
+
+export interface GeminiTokenResponse {
+    allowed: boolean
+    apiKey?: string
+    wsUrl?: string
+    baseUrl?: string
+    error?: string
+}
+
+/**
+ * Discover which voice backend the hub is configured to use.
+ * Throws on network/server error or unrecognised backend value — callers must handle failures explicitly.
+ */
+export async function fetchVoiceBackend(api: ApiClient): Promise<VoiceBackendResponse> {
+    const result = await api.fetchVoiceBackend()
+    const { backend } = result
+    if (backend === 'elevenlabs' || backend === 'gemini-live' || backend === 'qwen-realtime') {
+        return { backend }
+    }
+    throw new Error(`Unrecognised voice backend: ${backend}`)
+}
+
+/**
+ * Fetch a Gemini API key from the hub for Gemini Live voice sessions.
+ */
+export async function fetchGeminiToken(api: ApiClient): Promise<GeminiTokenResponse> {
+    try {
+        return await api.fetchGeminiToken()
+    } catch (error) {
+        return {
+            allowed: false,
+            error: error instanceof Error ? error.message : 'Network error'
+        }
+    }
+}
diff --git a/web/src/components/SessionChat.tsx b/web/src/components/SessionChat.tsx
index 7cd5fb2c2..39926684b 100644
--- a/web/src/components/SessionChat.tsx
+++ b/web/src/components/SessionChat.tsx
@@ -35,7 +35,7 @@ import { useCodexModels } from '@/hooks/queries/useCodexModels'
 import { useCursorModels } from '@/hooks/queries/useCursorModels'
 import { useOpencodeModels } from '@/hooks/queries/useOpencodeModels'
 import { useVoiceOptional } from '@/lib/voice-context'
-import { RealtimeVoiceSession, registerSessionStore, registerVoiceHooksStore, voiceHooks } from '@/realtime'
+import { VoiceBackendSession, registerSessionStore, registerVoiceHooksStore, voiceHooks } from '@/realtime'
 import { isRemoteTerminalSupported } from '@/utils/terminalSupport'
 
 /**
@@ -207,6 +207,7 @@ export function SessionChat(props: {
 
     // Voice assistant integration
     const voice = useVoiceOptional()
+    const [voiceBackendReady, setVoiceBackendReady] = useState(false)
 
     // Register session store for voice client tools
     useEffect(() => {
@@ -673,18 +674,19 @@ export function SessionChat(props: {
                         autocompleteSuggestions={props.autocompleteSuggestions}
                         voiceStatus={voice?.status}
                         voiceMicMuted={voice?.micMuted}
-                        onVoiceToggle={voice ? handleVoiceToggle : undefined}
-                        onVoiceMicToggle={voice ? handleVoiceMicToggle : undefined}
+                        onVoiceToggle={voice && voiceBackendReady ? handleVoiceToggle : undefined}
+                        onVoiceMicToggle={voice && voiceBackendReady ? handleVoiceMicToggle : undefined}
                     />
                 </div>
             </AssistantRuntimeProvider>
 
-            {/* Voice session component - renders nothing but initializes ElevenLabs */}
+            {/* Voice session component - renders nothing but initializes voice backend */}
             {voice && (
-                <RealtimeVoiceSession
+                <VoiceBackendSession
                     api={props.api}
                     micMuted={voice.micMuted}
                     onStatusChange={voice.setStatus}
+                    onReadyChange={setVoiceBackendReady}
                 />
             )}
         </div>
diff --git a/web/src/lib/locales/en.ts b/web/src/lib/locales/en.ts
index 842097126..a15343052 100644
--- a/web/src/lib/locales/en.ts
+++ b/web/src/lib/locales/en.ts
@@ -418,6 +418,8 @@ export default {
   'settings.voice.autoDetect': 'Auto-detect',
   'settings.voice.voice': 'Voice',
   'settings.voice.voiceDefault': 'Default',
+  'settings.voice.proactive': 'Start voice session with summary',
+  'settings.voice.proactive.description': 'When on, starting a voice session opens with a spoken summary of current agent activity. When off, the assistant greets you and waits for you to speak.',
   'settings.about.title': 'About',
   'settings.about.website': 'Website',
   'settings.about.appVersion': 'App Version',
diff --git a/web/src/lib/locales/zh-CN.ts b/web/src/lib/locales/zh-CN.ts
index c1873a303..dc9049674 100644
--- a/web/src/lib/locales/zh-CN.ts
+++ b/web/src/lib/locales/zh-CN.ts
@@ -420,6 +420,8 @@ export default {
   'settings.voice.autoDetect': '自动检测',
   'settings.voice.voice': '声音',
   'settings.voice.voiceDefault': '默认',
+  'settings.voice.proactive': '以摘要开始语音会话',
+  'settings.voice.proactive.description': '开启后，启动语音会话时将朗读当前代理活动的摘要。关闭后，助手向您打招呼并等待您先开口。',
   'settings.about.title': '关于',
   'settings.about.website': '官方网站',
   'settings.about.appVersion': '应用版本',
diff --git a/web/src/realtime/GeminiLiveVoiceSession.tsx b/web/src/realtime/GeminiLiveVoiceSession.tsx
new file mode 100644
index 000000000..a23f55493
--- /dev/null
+++ b/web/src/realtime/GeminiLiveVoiceSession.tsx
@@ -0,0 +1,412 @@
+import { useEffect, useRef, useCallback } from 'react'
+import { registerVoiceSession, resetRealtimeSessionState } from './RealtimeSession'
+import { registerSessionStore } from './realtimeClientTools'
+import { fetchGeminiToken } from '@/api/voice'
+import { GeminiAudioRecorder } from './gemini/audioRecorder'
+import { GeminiAudioPlayer } from './gemini/audioPlayer'
+import { handleGeminiFunctionCalls } from './gemini/toolAdapter'
+import { buildGeminiLiveSetupMessage } from '@hapi/protocol/voice'
+import type { VoiceSession, VoiceSessionConfig, StatusCallback } from './types'
+import type { ApiClient } from '@/api/client'
+import type { Session } from '@/types/api'
+import type { GeminiFunctionCall } from './gemini/toolAdapter'
+
+const DEBUG = import.meta.env.DEV
+
+// Default Gemini Live WebSocket API endpoint (Google direct)
+const DEFAULT_GEMINI_LIVE_WS_BASE = 'wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent'
+
+interface GeminiLiveState {
+    ws: WebSocket | null
+    recorder: GeminiAudioRecorder | null
+    player: GeminiAudioPlayer | null
+    playbackContext: AudioContext | null
+    statusCallback: StatusCallback | null
+    apiKey: string | null
+    wsBaseUrl: string | null
+    modelSpeaking: boolean
+    micMuted: boolean
+}
+
+const state: GeminiLiveState = {
+    ws: null,
+    recorder: null,
+    player: null,
+    playbackContext: null,
+    statusCallback: null,
+    apiKey: null,
+    wsBaseUrl: null,
+    modelSpeaking: false,
+    micMuted: false
+}
+
+function cleanup() {
+    if (state.recorder) {
+        state.recorder.dispose()
+        state.recorder = null
+    }
+    if (state.player) {
+        state.player.dispose()
+        state.player = null
+    }
+    if (state.playbackContext && state.playbackContext.state !== 'closed') {
+        void state.playbackContext.close()
+    }
+    state.playbackContext = null
+    if (state.ws) {
+        if (state.ws.readyState === WebSocket.OPEN || state.ws.readyState === WebSocket.CONNECTING) {
+            state.ws.close()
+        }
+        state.ws = null
+    }
+    // Always reset modelSpeaking so a restart doesn't begin with audio capture silenced
+    state.modelSpeaking = false
+}
+
+class GeminiLiveVoiceSessionImpl implements VoiceSession {
+    private api: ApiClient
+
+    constructor(api: ApiClient) {
+        this.api = api
+    }
+
+    async startSession(config: VoiceSessionConfig): Promise<void> {
+        cleanup()
+        state.statusCallback?.('connecting')
+
+        // Create playback AudioContext immediately while still inside the user
+        // gesture (click/tap). Mobile browsers require this for autoplay policy.
+        // Store in state so cleanup() can close it on failure or stop.
+        state.playbackContext = new AudioContext({ sampleRate: 24000 })
+        await state.playbackContext.resume()
+
+        // Get API key from hub
+        console.log('[GeminiLive] Fetching token...')
+        const tokenResp = await fetchGeminiToken(this.api)
+        console.log('[GeminiLive] Token response:', { allowed: tokenResp.allowed, hasKey: !!tokenResp.apiKey, error: tokenResp.error })
+        if (!tokenResp.allowed || !tokenResp.apiKey) {
+            const msg = tokenResp.error ?? 'Gemini API key not available'
+            console.error('[GeminiLive] Token failed:', msg)
+            state.statusCallback?.('error', msg)
+            cleanup()
+            throw new Error(msg)
+        }
+        state.apiKey = tokenResp.apiKey
+        state.wsBaseUrl = tokenResp.wsUrl || null
+        if (!state.wsBaseUrl) {
+            const msg = 'Hub must provide wsUrl for Gemini connections — direct key connection is not supported'
+            state.statusCallback?.('error', msg)
+            cleanup()
+            throw new Error(msg)
+        }
+
+        // Request microphone
+        console.log('[GeminiLive] Requesting microphone...')
+        let permissionStream: MediaStream | null = null
+        try {
+            permissionStream = await navigator.mediaDevices.getUserMedia({ audio: true })
+            console.log('[GeminiLive] Microphone granted')
+        } catch (error) {
+            console.error('[GeminiLive] Microphone denied:', error)
+            state.statusCallback?.('error', 'Microphone permission denied')
+            cleanup()
+            throw error
+        } finally {
+            permissionStream?.getTracks().forEach((t) => t.stop())
+        }
+
+        // Connect WebSocket — use proxy URL if provided (avoids region restrictions)
+        const wsBase = state.wsBaseUrl || DEFAULT_GEMINI_LIVE_WS_BASE
+        const isProxy = !!state.wsBaseUrl
+        const authToken = this.api.getAuthToken() || ''
+        const languageParam = config.language ? `&language=${encodeURIComponent(config.language)}` : ''
+        const wsUrl = isProxy
+            ? `${wsBase}${wsBase.includes('?') ? '&' : '?'}token=${encodeURIComponent(authToken)}${languageParam}`
+            : `${wsBase}?key=${encodeURIComponent(state.apiKey)}`
+        console.log('[GeminiLive] Connecting WebSocket to:', wsBase, isProxy ? '(proxied)' : '(direct)')
+        const ws = new WebSocket(wsUrl)
+        state.ws = ws
+
+        return new Promise<void>((resolve, reject) => {
+            let setupDone = false
+
+            ws.onopen = () => {
+                if (DEBUG) console.log('[GeminiLive] WebSocket connected', isProxy ? '(hub sends setup)' : ', sending setup')
+
+                // Proxied sessions: hub sends HAPI-owned setup server-side (see gemini-ws proxy).
+                if (!isProxy) {
+                    ws.send(JSON.stringify(buildGeminiLiveSetupMessage(config.language)))
+                }
+            }
+
+            ws.onmessage = async (event) => {
+                let data: Record<string, unknown>
+                try {
+                    if (event.data instanceof Blob) {
+                        const text = await event.data.text()
+                        data = JSON.parse(text) as Record<string, unknown>
+                    } else {
+                        data = JSON.parse(event.data as string) as Record<string, unknown>
+                    }
+                } catch {
+                    if (DEBUG) console.warn('[GeminiLive] Failed to parse message')
+                    return
+                }
+
+                // Log all message types for debugging
+                const msgKeys = Object.keys(data).filter(k => k !== 'serverContent' || !('modelTurn' in (data.serverContent as Record<string, unknown> || {})))
+                if (!data.serverContent) {
+                    console.log('[GeminiLive] Message:', msgKeys.join(', '), JSON.stringify(data).slice(0, 200))
+                }
+
+                // Setup complete
+                if (data.setupComplete && !setupDone) {
+                    setupDone = true
+                    if (DEBUG) console.log('[GeminiLive] Setup complete')
+
+                    // Await audio capture so setMuted runs after getUserMedia resolves.
+                    // Wrap so a mic failure rejects the outer startSession promise.
+                    try {
+                        await startAudioCapture(state.playbackContext!)
+                    } catch (error) {
+                        const message = error instanceof Error ? error.message : 'Microphone error'
+                        cleanup()
+                        state.statusCallback?.('error', message)
+                        reject(error instanceof Error ? error : new Error(message))
+                        return
+                    }
+                    state.statusCallback?.('connected')
+
+                    const proactive = localStorage.getItem('hapi-voice-proactive') === 'true'
+
+                    if (proactive && config.initialContext) {
+                        // Proactive with context: speak the summary immediately.
+                        sendClientContent(`[Context] ${config.initialContext}`, true)
+                    } else {
+                        // Reactive, or proactive with no context: feed context silently if
+                        // available, then trigger a greeting so Gemini doesn't sit silent.
+                        if (config.initialContext) {
+                            sendClientContent(`[Context] ${config.initialContext}`, false)
+                        }
+                        sendClientContent('[Greet the user as HAPI. Say a brief hello and invite them to speak. Do not mention Gemini or any model name. Do not reference any context or recent activity.]', true)
+                    }
+
+                    resolve()
+                    return
+                }
+
+                // Server content (audio / text / turn complete)
+                const serverContent = data.serverContent as {
+                    modelTurn?: { parts?: Array<{ inlineData?: { data: string; mimeType: string }; text?: string }> }
+                    turnComplete?: boolean
+                } | undefined
+
+                if (serverContent) {
+                    if (serverContent.modelTurn?.parts) {
+                        // Model is generating — mute mic to prevent barge-in from noise
+                        if (!state.modelSpeaking) {
+                            state.modelSpeaking = true
+                            state.recorder?.setMuted(true)
+                        }
+                        for (const part of serverContent.modelTurn.parts) {
+                            if (part.inlineData?.data) {
+                                state.player?.enqueue(part.inlineData.data)
+                            }
+                            if (part.text) {
+                                console.log('[GeminiLive] Text:', part.text)
+                            }
+                        }
+                    }
+                    if (serverContent.turnComplete) {
+                        console.log('[GeminiLive] Turn complete')
+                        // Restore to user's chosen mute state, not unconditionally unmuted
+                        state.modelSpeaking = false
+                        state.recorder?.setMuted(state.micMuted)
+                    }
+                }
+
+                // Tool calls
+                const toolCall = data.toolCall as {
+                    functionCalls?: Array<{ name: string; args: Record<string, unknown>; id: string }>
+                } | undefined
+
+                if (toolCall?.functionCalls && toolCall.functionCalls.length > 0) {
+                    console.log('[GeminiLive] Tool calls:', toolCall.functionCalls.map((c) => c.name))
+
+                    const responses = await handleGeminiFunctionCalls(
+                        toolCall.functionCalls as GeminiFunctionCall[]
+                    )
+
+                    // Send tool responses back
+                    if (state.ws?.readyState === WebSocket.OPEN) {
+                        state.ws.send(JSON.stringify({
+                            toolResponse: {
+                                functionResponses: responses.map((r) => ({
+                                    id: r.id,
+                                    name: r.name,
+                                    response: r.response
+                                }))
+                            }
+                        }))
+                    }
+                }
+            }
+
+            ws.onerror = (event) => {
+                console.error('[GeminiLive] WebSocket error:', event)
+                if (!setupDone) {
+                    setupDone = true
+                    cleanup()
+                    state.statusCallback?.('error', 'WebSocket connection failed')
+                    reject(new Error('WebSocket connection failed'))
+                }
+            }
+
+            ws.onclose = (event) => {
+                if (state.ws !== ws) return
+                if (DEBUG) console.log('[GeminiLive] WebSocket closed:', event.code, event.reason)
+                cleanup()
+                resetRealtimeSessionState()
+                if (!setupDone) {
+                    const message = event.reason || 'WebSocket closed before setup completed'
+                    state.statusCallback?.('error', message)
+                    reject(new Error(message))
+                    return
+                }
+                state.statusCallback?.('disconnected')
+            }
+        })
+    }
+
+    async endSession(): Promise<void> {
+        cleanup()
+        resetRealtimeSessionState()
+        state.statusCallback?.('disconnected')
+    }
+
+    sendTextMessage(message: string): void {
+        sendClientContent(message)
+    }
+
+    sendContextualUpdate(update: string): void {
+        // Append context without triggering a response — turnComplete: false accumulates
+        // silently until the next sendTextMessage fires with turnComplete: true
+        sendClientContent(`[System Context Update] ${update}`, false)
+    }
+}
+
+function sendClientContent(text: string, turnComplete = true): void {
+    if (!state.ws || state.ws.readyState !== WebSocket.OPEN) return
+    state.ws.send(JSON.stringify({
+        clientContent: {
+            turns: [{ role: 'user', parts: [{ text }] }],
+            turnComplete
+        }
+    }))
+}
+
+function sendAudioChunk(base64Pcm: string): void {
+    if (!state.ws || state.ws.readyState !== WebSocket.OPEN) return
+    // Don't send audio while model is speaking
+    if (state.modelSpeaking) return
+    state.ws.send(JSON.stringify({
+        realtimeInput: {
+            mediaChunks: [{
+                mimeType: 'audio/pcm;rate=16000',
+                data: base64Pcm
+            }]
+        }
+    }))
+}
+
+async function startAudioCapture(playbackContext: AudioContext): Promise<void> {
+    state.player = new GeminiAudioPlayer(playbackContext)
+    state.recorder = new GeminiAudioRecorder()
+
+    await state.recorder.start(
+        (pcm16Chunk) => sendAudioChunk(pcm16Chunk),
+        (error) => {
+            console.error('[GeminiLive] Audio capture error:', error)
+            state.statusCallback?.('error', 'Microphone error')
+        }
+    )
+
+    // Apply mute state after recorder has a stream — safe to call either way
+    state.recorder.setMuted(state.micMuted)
+}
+
+// --- React component ---
+
+export interface GeminiLiveVoiceSessionProps {
+    api: ApiClient
+    micMuted?: boolean
+    onStatusChange?: StatusCallback
+    onRegistered?: () => void
+    getSession?: (sessionId: string) => Session | null
+    sendMessage?: (sessionId: string, message: string) => void
+    approvePermission?: (sessionId: string, requestId: string) => Promise<void>
+    denyPermission?: (sessionId: string, requestId: string) => Promise<void>
+}
+
+export function GeminiLiveVoiceSession({
+    api,
+    micMuted = false,
+    onStatusChange,
+    onRegistered,
+    getSession,
+    sendMessage,
+    approvePermission,
+    denyPermission
+}: GeminiLiveVoiceSessionProps) {
+    const hasRegistered = useRef(false)
+
+    // Store status callback
+    useEffect(() => {
+        state.statusCallback = onStatusChange || null
+        return () => { state.statusCallback = null }
+    }, [onStatusChange])
+
+    // Register session store for client tools
+    useEffect(() => {
+        if (getSession && sendMessage && approvePermission && denyPermission) {
+            registerSessionStore({
+                getSession: (sessionId: string) =>
+                    getSession(sessionId) as { agentState?: { requests?: Record<string, unknown> } } | null,
+                sendMessage,
+                approvePermission,
+                denyPermission
+            })
+        }
+    }, [getSession, sendMessage, approvePermission, denyPermission])
+
+    // Register voice session once
+    useEffect(() => {
+        if (!hasRegistered.current) {
+            try {
+                registerVoiceSession(new GeminiLiveVoiceSessionImpl(api))
+                hasRegistered.current = true
+                onRegistered?.()
+            } catch (error) {
+                console.error('[GeminiLive] Failed to register voice session:', error)
+            }
+        }
+    }, [api]) // eslint-disable-line react-hooks/exhaustive-deps
+
+    // Sync mic mute state — also persist to module state so startAudioCapture can apply it
+    useEffect(() => {
+        state.micMuted = micMuted
+        if (state.recorder) {
+            state.recorder.setMuted(micMuted)
+        }
+    }, [micMuted])
+
+
+    // Cleanup on unmount
+    useEffect(() => {
+        return () => {
+            cleanup()
+        }
+    }, [])
+
+    return null
+}
diff --git a/web/src/realtime/QwenVoiceSession.tsx b/web/src/realtime/QwenVoiceSession.tsx
new file mode 100644
index 000000000..f99ee3976
--- /dev/null
+++ b/web/src/realtime/QwenVoiceSession.tsx
@@ -0,0 +1,412 @@
+import { useEffect, useRef } from 'react'
+import { registerVoiceSession, resetRealtimeSessionState } from './RealtimeSession'
+import { registerSessionStore } from './realtimeClientTools'
+import { fetchQwenToken } from '@/api/voice'
+import { GeminiAudioRecorder } from './gemini/audioRecorder'
+import { GeminiAudioPlayer } from './gemini/audioPlayer'
+import { realtimeClientTools } from './realtimeClientTools'
+import {
+    VOICE_SYSTEM_PROMPT,
+    buildVoiceLanguageBlock,
+} from '@hapi/protocol/voice'
+import type { VoiceSession, VoiceSessionConfig, StatusCallback } from './types'
+import type { ApiClient } from '@/api/client'
+import type { Session } from '@/types/api'
+
+const DEBUG = import.meta.env.DEV
+
+// Qwen WebSocket connects via Hub proxy (browser can't set Authorization header)
+
+interface QwenState {
+    ws: WebSocket | null
+    recorder: GeminiAudioRecorder | null
+    player: GeminiAudioPlayer | null
+    playbackContext: AudioContext | null
+    statusCallback: StatusCallback | null
+    apiKey: string | null
+    wsBaseUrl: string | null
+    micMuted: boolean
+}
+
+const state: QwenState = {
+    ws: null,
+    recorder: null,
+    player: null,
+    playbackContext: null,
+    statusCallback: null,
+    apiKey: null,
+    wsBaseUrl: null,
+    micMuted: false
+}
+
+let eventCounter = 0
+function nextEventId(): string {
+    return `evt_${++eventCounter}`
+}
+
+function cleanup() {
+    if (state.recorder) {
+        state.recorder.dispose()
+        state.recorder = null
+    }
+    if (state.player) {
+        state.player.dispose()
+        state.player = null
+    }
+    if (state.playbackContext && state.playbackContext.state !== 'closed') {
+        void state.playbackContext.close()
+    }
+    state.playbackContext = null
+    if (state.ws) {
+        if (state.ws.readyState === WebSocket.OPEN || state.ws.readyState === WebSocket.CONNECTING) {
+            state.ws.close()
+        }
+        state.ws = null
+    }
+}
+
+function sendEvent(type: string, payload?: Record<string, unknown>): void {
+    if (!state.ws || state.ws.readyState !== WebSocket.OPEN) return
+    state.ws.send(JSON.stringify({
+        event_id: nextEventId(),
+        type,
+        ...payload
+    }))
+}
+
+
+class QwenVoiceSessionImpl implements VoiceSession {
+    private api: ApiClient
+    private currentInstructions: string | null = null
+
+    constructor(api: ApiClient) {
+        this.api = api
+    }
+
+    private updateInstructions(update: string): void {
+        if (this.currentInstructions === null) return
+        this.currentInstructions = `${this.currentInstructions}\n\n${update}`
+        // Hub filter allows only instruction-only session.update frames.
+        sendEvent('session.update', { session: { instructions: this.currentInstructions } })
+    }
+
+    async startSession(config: VoiceSessionConfig): Promise<void> {
+        // Mirror the base instructions the hub will send so subsequent updates accumulate correctly.
+        this.currentInstructions = `${VOICE_SYSTEM_PROMPT}${buildVoiceLanguageBlock(config.language)}`
+        cleanup()
+        state.statusCallback?.('connecting')
+
+        // Create playback AudioContext immediately while still inside the user
+        // gesture (click/tap). Mobile browsers require this for autoplay policy.
+        // Store in state so cleanup() can close it on failure or stop.
+        state.playbackContext = new AudioContext({ sampleRate: 24000 })
+        await state.playbackContext.resume()
+
+        // Check Qwen availability (hub no longer sends the raw API key)
+        const tokenResp = await fetchQwenToken(this.api)
+        if (!tokenResp.allowed) {
+            const msg = tokenResp.error ?? 'DashScope API key not available'
+            state.statusCallback?.('error', msg)
+            cleanup()
+            throw new Error(msg)
+        }
+        state.apiKey = null // key stays server-side
+        state.wsBaseUrl = tokenResp.wsUrl || null
+
+        // Request microphone
+        let permissionStream: MediaStream | null = null
+        try {
+            permissionStream = await navigator.mediaDevices.getUserMedia({ audio: true })
+        } catch (error) {
+            state.statusCallback?.('error', 'Microphone permission denied')
+            cleanup()
+            throw error
+        } finally {
+            permissionStream?.getTracks().forEach((t) => t.stop())
+        }
+
+        // Connect via Hub WebSocket proxy (DashScope requires Authorization header,
+        // which browser WebSocket API doesn't support)
+        const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:'
+        const defaultProxyUrl = `${protocol}//${window.location.host}/api/voice/qwen-ws`
+        const proxyUrl = state.wsBaseUrl || defaultProxyUrl
+        const authToken = this.api.getAuthToken() || ''
+        const separator = proxyUrl.includes('?') ? '&' : '?'
+        const langParam = config.language ? `&language=${encodeURIComponent(config.language)}` : ''
+        const wsUrl = `${proxyUrl}${separator}token=${encodeURIComponent(authToken)}${langParam}`
+        const ws = new WebSocket(wsUrl)
+        state.ws = ws
+
+        return new Promise<void>((resolve, reject) => {
+            let sessionReady = false
+
+            ws.onopen = () => {
+                if (DEBUG) console.log('[Qwen] WebSocket connected')
+            }
+
+            ws.onmessage = async (event) => {
+                let data: Record<string, unknown>
+                try {
+                    data = JSON.parse(event.data as string) as Record<string, unknown>
+                } catch {
+                    if (DEBUG) console.warn('[Qwen] Failed to parse message')
+                    return
+                }
+
+                const eventType = data.type as string
+
+                // Session created — hub sends the initial session.update; browser waits for session.updated.
+                if (eventType === 'session.created') {
+                    if (DEBUG) console.log('[Qwen] Session created (hub owns setup)')
+                    return
+                }
+
+                // Session updated - only act on the first one (initial config ack).
+                // Subsequent session.update calls (for instruction appends) also
+                // echo session.updated — ignore those after setup is complete.
+                if (eventType === 'session.updated') {
+                    if (sessionReady) return
+                    sessionReady = true
+                    if (DEBUG) console.log('[Qwen] Session configured')
+                    try {
+                        await startAudioCapture(state.playbackContext!)
+                    } catch (error) {
+                        const message = error instanceof Error ? error.message : 'Microphone error'
+                        cleanup()
+                        state.statusCallback?.('error', message)
+                        reject(error instanceof Error ? error : new Error(message))
+                        return
+                    }
+                    state.statusCallback?.('connected')
+
+                    const proactive = localStorage.getItem('hapi-voice-proactive') === 'true'
+                    if (proactive && config.initialContext) {
+                        this.sendTextMessage(`[Context] ${config.initialContext}`)
+                    } else {
+                        if (config.initialContext) {
+                            this.sendContextualUpdate(config.initialContext)
+                        }
+                        this.sendTextMessage('[Greet the user as HAPI. Say a brief hello and invite them to speak. Do not mention Qwen or any model name. Do not reference any context or recent activity.]')
+                    }
+
+                    resolve()
+                    return
+                }
+
+                // Audio output streaming
+                if (eventType === 'response.audio.delta') {
+                    const delta = data.delta as string
+                    if (delta) {
+                        state.player?.enqueue(delta)
+                    }
+                    return
+                }
+
+                // Text transcript (for debug)
+                if (eventType === 'response.audio_transcript.delta' && DEBUG) {
+                    console.log('[Qwen] Transcript:', data.delta)
+                    return
+                }
+
+                // Function call complete
+                if (eventType === 'response.function_call_arguments.done') {
+                    const callId = data.call_id as string
+                    const fnName = data.name as string
+                    const argsStr = data.arguments as string
+
+                    if (DEBUG) console.log('[Qwen] Tool call:', fnName, argsStr)
+
+                    let args: Record<string, unknown> = {}
+                    try { args = JSON.parse(argsStr) } catch { /* empty */ }
+
+                    // Execute the tool
+                    const handler = fnName === 'messageCodingAgent'
+                        ? realtimeClientTools.messageCodingAgent
+                        : fnName === 'processPermissionRequest'
+                        ? realtimeClientTools.processPermissionRequest
+                        : null
+
+                    const result = handler
+                        ? await handler(args)
+                        : `error (unknown tool: ${fnName})`
+
+                    // Send function result back
+                    sendEvent('conversation.item.create', {
+                        item: {
+                            type: 'function_call_output',
+                            call_id: callId,
+                            output: typeof result === 'string' ? result : JSON.stringify(result)
+                        }
+                    })
+                    // Trigger model to continue
+                    sendEvent('response.create')
+                    return
+                }
+
+                // VAD: user started speaking - barge-in
+                if (eventType === 'input_audio_buffer.speech_started') {
+                    if (state.player?.isPlaying()) {
+                        state.player.clearQueue()
+                    }
+                    return
+                }
+
+                // Response done
+                if (eventType === 'response.done' && DEBUG) {
+                    const resp = data.response as Record<string, unknown> | undefined
+                    const usage = resp?.usage as Record<string, unknown> | undefined
+                    if (usage) console.log('[Qwen] Usage:', usage)
+                    return
+                }
+
+                // Error
+                if (eventType === 'error') {
+                    const err = data.error as { message?: string } | undefined
+                    const message = err?.message || 'Realtime session setup failed'
+                    console.error('[Qwen] Server error:', message)
+                    state.statusCallback?.('error', message)
+                    if (!sessionReady) {
+                        reject(new Error(message))
+                        ws.close()
+                    }
+                    return
+                }
+            }
+
+            ws.onerror = (event) => {
+                console.error('[Qwen] WebSocket error:', event)
+                if (!sessionReady) {
+                    sessionReady = true
+                    cleanup()
+                    state.statusCallback?.('error', 'WebSocket connection failed')
+                    reject(new Error('WebSocket connection failed'))
+                }
+            }
+
+            ws.onclose = (event) => {
+                if (state.ws !== ws) return
+                if (DEBUG) console.log('[Qwen] WebSocket closed:', event.code, event.reason)
+                cleanup()
+                resetRealtimeSessionState()
+                if (!sessionReady) {
+                    const message = event.reason || 'WebSocket closed before setup completed'
+                    state.statusCallback?.('error', message)
+                    reject(new Error(message))
+                    return
+                }
+                state.statusCallback?.('disconnected')
+            }
+        })
+    }
+
+    async endSession(): Promise<void> {
+        this.currentInstructions = null
+        cleanup()
+        resetRealtimeSessionState()
+        state.statusCallback?.('disconnected')
+    }
+
+    sendTextMessage(message: string): void {
+        // Qwen Realtime requires a user conversation item before response.create.
+        sendEvent('conversation.item.create', {
+            item: {
+                type: 'message',
+                role: 'user',
+                content: [{ type: 'input_text', text: message }]
+            }
+        })
+        sendEvent('response.create')
+    }
+
+    sendContextualUpdate(update: string): void {
+        // Append context silently — no response.create, so model doesn't speak yet.
+        this.updateInstructions(`[System Context Update] ${update}`)
+    }
+}
+
+async function startAudioCapture(playbackContext: AudioContext): Promise<void> {
+    state.player = new GeminiAudioPlayer(playbackContext)
+    state.recorder = new GeminiAudioRecorder()
+
+    await state.recorder.start(
+        (base64Pcm) => {
+            sendEvent('input_audio_buffer.append', { audio: base64Pcm })
+        },
+        (error) => {
+            console.error('[Qwen] Audio capture error:', error)
+            state.statusCallback?.('error', 'Microphone error')
+        }
+    )
+
+    // Apply mute state after recorder has a stream — safe to call either way
+    state.recorder.setMuted(state.micMuted)
+}
+
+// --- React component ---
+
+export interface QwenVoiceSessionProps {
+    api: ApiClient
+    micMuted?: boolean
+    onStatusChange?: StatusCallback
+    onRegistered?: () => void
+    getSession?: (sessionId: string) => Session | null
+    sendMessage?: (sessionId: string, message: string) => void
+    approvePermission?: (sessionId: string, requestId: string) => Promise<void>
+    denyPermission?: (sessionId: string, requestId: string) => Promise<void>
+}
+
+export function QwenVoiceSession({
+    api,
+    micMuted = false,
+    onStatusChange,
+    onRegistered,
+    getSession,
+    sendMessage,
+    approvePermission,
+    denyPermission
+}: QwenVoiceSessionProps) {
+    const hasRegistered = useRef(false)
+
+    useEffect(() => {
+        state.statusCallback = onStatusChange || null
+        return () => { state.statusCallback = null }
+    }, [onStatusChange])
+
+    useEffect(() => {
+        if (getSession && sendMessage && approvePermission && denyPermission) {
+            registerSessionStore({
+                getSession: (sessionId: string) =>
+                    getSession(sessionId) as { agentState?: { requests?: Record<string, unknown> } } | null,
+                sendMessage,
+                approvePermission,
+                denyPermission
+            })
+        }
+    }, [getSession, sendMessage, approvePermission, denyPermission])
+
+    useEffect(() => {
+        if (!hasRegistered.current) {
+            try {
+                registerVoiceSession(new QwenVoiceSessionImpl(api))
+                hasRegistered.current = true
+                onRegistered?.()
+            } catch (error) {
+                console.error('[Qwen] Failed to register voice session:', error)
+            }
+        }
+    }, [api]) // eslint-disable-line react-hooks/exhaustive-deps
+
+    // Sync mic mute state — also persist to module state so startAudioCapture can apply it
+    useEffect(() => {
+        state.micMuted = micMuted
+        if (state.recorder) {
+            state.recorder.setMuted(micMuted)
+        }
+    }, [micMuted])
+
+    useEffect(() => {
+        return () => { cleanup() }
+    }, [])
+
+    return null
+}
diff --git a/web/src/realtime/RealtimeVoiceSession.tsx b/web/src/realtime/RealtimeVoiceSession.tsx
index 428d1c171..89e438b74 100644
--- a/web/src/realtime/RealtimeVoiceSession.tsx
+++ b/web/src/realtime/RealtimeVoiceSession.tsx
@@ -136,6 +136,7 @@ export interface RealtimeVoiceSessionProps {
     api: ApiClient
     micMuted?: boolean
     onStatusChange?: StatusCallback
+    onRegistered?: () => void
     getSession?: (sessionId: string) => Session | null
     sendMessage?: (sessionId: string, message: string) => void
     approvePermission?: (sessionId: string, requestId: string) => Promise<void>
@@ -146,6 +147,7 @@ export function RealtimeVoiceSession({
     api,
     micMuted: micMutedProp = false,
     onStatusChange,
+    onRegistered,
     getSession,
     sendMessage,
     approvePermission,
@@ -241,6 +243,7 @@ export function RealtimeVoiceSession({
             try {
                 registerVoiceSession(new RealtimeVoiceSessionImpl(api))
                 hasRegistered.current = true
+                onRegistered?.()
             } catch (error) {
                 console.error('[Voice] Failed to register voice session:', error)
             }
diff --git a/web/src/realtime/VoiceBackendSession.tsx b/web/src/realtime/VoiceBackendSession.tsx
new file mode 100644
index 000000000..a30cf488f
--- /dev/null
+++ b/web/src/realtime/VoiceBackendSession.tsx
@@ -0,0 +1,69 @@
+import { lazy, Suspense, useCallback, useEffect, useState } from 'react'
+import { RealtimeVoiceSession } from './RealtimeVoiceSession'
+import type { RealtimeVoiceSessionProps } from './RealtimeVoiceSession'
+import { fetchVoiceBackend } from '@/api/voice'
+import type { ApiClient } from '@/api/client'
+import type { VoiceBackendType } from '@hapi/protocol/voice'
+
+// Lazy-load alternative backends to avoid bundling when using ElevenLabs
+const GeminiLiveVoiceSession = lazy(() =>
+    import('./GeminiLiveVoiceSession').then((m) => ({ default: m.GeminiLiveVoiceSession }))
+)
+const QwenVoiceSession = lazy(() =>
+    import('./QwenVoiceSession').then((m) => ({ default: m.QwenVoiceSession }))
+)
+
+export type VoiceBackendSessionProps = RealtimeVoiceSessionProps & {
+    api: ApiClient
+    onReadyChange?: (ready: boolean) => void
+}
+
+/**
+ * Dynamically selects the voice session component based on the hub's configured backend.
+ * Queries GET /voice/backend once on mount and renders the appropriate component.
+ * Only signals readiness after the selected backend has mounted and registered its session.
+ */
+export function VoiceBackendSession(props: VoiceBackendSessionProps) {
+    const [backend, setBackend] = useState<VoiceBackendType | null>(null)
+
+    useEffect(() => {
+        let cancelled = false
+        setBackend(null)
+        fetchVoiceBackend(props.api).then((resp) => {
+            if (!cancelled) setBackend(resp.backend)
+        }).catch((err: unknown) => {
+            if (!cancelled) {
+                const msg = err instanceof Error ? err.message : 'Could not detect voice backend'
+                props.onStatusChange?.('error', msg)
+            }
+        })
+        return () => {
+            cancelled = true
+            props.onReadyChange?.(false)
+        }
+    }, [props.api]) // eslint-disable-line react-hooks/exhaustive-deps
+
+    const handleRegistered = useCallback(() => {
+        props.onReadyChange?.(true)
+    }, [props.onReadyChange])
+
+    if (!backend) return null
+
+    if (backend === 'gemini-live') {
+        return (
+            <Suspense fallback={null}>
+                <GeminiLiveVoiceSession {...props} onRegistered={handleRegistered} />
+            </Suspense>
+        )
+    }
+
+    if (backend === 'qwen-realtime') {
+        return (
+            <Suspense fallback={null}>
+                <QwenVoiceSession {...props} onRegistered={handleRegistered} />
+            </Suspense>
+        )
+    }
+
+    return <RealtimeVoiceSession {...props} onRegistered={handleRegistered} />
+}
diff --git a/web/src/realtime/gemini/audioPlayer.ts b/web/src/realtime/gemini/audioPlayer.ts
new file mode 100644
index 000000000..23d1d341e
--- /dev/null
+++ b/web/src/realtime/gemini/audioPlayer.ts
@@ -0,0 +1,75 @@
+import { base64ToArrayBuffer, pcm16ToFloat32 } from './pcmUtils';
+
+export class GeminiAudioPlayer {
+  private audioContext: AudioContext;
+  private ownsContext: boolean;
+  private lastEndTime: number = 0;
+  private activeSources: AudioBufferSourceNode[] = [];
+
+  constructor(audioContext?: AudioContext) {
+    if (audioContext) {
+      this.audioContext = audioContext;
+      this.ownsContext = false;
+    } else {
+      this.audioContext = new AudioContext({ sampleRate: 24000 });
+      this.ownsContext = true;
+    }
+    this.lastEndTime = this.audioContext.currentTime;
+  }
+
+  enqueue(base64Pcm: string): void {
+    if (this.audioContext.state === 'suspended') {
+      this.audioContext.resume();
+    }
+
+    const arrayBuffer = base64ToArrayBuffer(base64Pcm);
+    const float32Data = pcm16ToFloat32(arrayBuffer);
+    
+    if (float32Data.length === 0) return;
+
+    const audioBuffer = this.audioContext.createBuffer(1, float32Data.length, 24000);
+    audioBuffer.copyToChannel(new Float32Array(float32Data), 0);
+
+    const source = this.audioContext.createBufferSource();
+    source.buffer = audioBuffer;
+    source.connect(this.audioContext.destination);
+
+    const startTime = Math.max(this.audioContext.currentTime, this.lastEndTime);
+    
+    source.onended = () => {
+      const index = this.activeSources.indexOf(source);
+      if (index > -1) {
+        this.activeSources.splice(index, 1);
+      }
+    };
+
+    source.start(startTime);
+    this.activeSources.push(source);
+
+    this.lastEndTime = startTime + audioBuffer.duration;
+  }
+
+  clearQueue(): void {
+    this.activeSources.forEach(source => {
+      try {
+        source.stop();
+      } catch (e) {
+        // Ignore if already stopped
+      }
+      source.disconnect();
+    });
+    this.activeSources = [];
+    this.lastEndTime = this.audioContext.currentTime;
+  }
+
+  isPlaying(): boolean {
+    return this.lastEndTime > this.audioContext.currentTime;
+  }
+
+  dispose(): void {
+    this.clearQueue();
+    if (this.ownsContext && this.audioContext.state !== 'closed') {
+      this.audioContext.close();
+    }
+  }
+}
diff --git a/web/src/realtime/gemini/audioRecorder.ts b/web/src/realtime/gemini/audioRecorder.ts
new file mode 100644
index 000000000..98813212a
--- /dev/null
+++ b/web/src/realtime/gemini/audioRecorder.ts
@@ -0,0 +1,139 @@
+import { float32ToPcm16, arrayBufferToBase64 } from './pcmUtils';
+
+// Inline worklet source to avoid Vite bundling issues with ?url imports.
+// AudioWorklet.addModule() requires a URL to valid JS, so we create a Blob URL.
+const WORKLET_SOURCE = `
+class PcmRecorderProcessor extends AudioWorkletProcessor {
+  constructor() {
+    super();
+    this.buffer = new Float32Array(4096);
+    this.idx = 0;
+  }
+  process(inputs) {
+    const input = inputs[0];
+    if (input && input.length > 0) {
+      const channel = input[0];
+      for (let i = 0; i < channel.length; i++) {
+        this.buffer[this.idx++] = channel[i];
+        if (this.idx >= 4096) {
+          this.port.postMessage({ samples: this.buffer.slice() });
+          this.idx = 0;
+        }
+      }
+    }
+    return true;
+  }
+}
+registerProcessor('pcm-recorder-processor', PcmRecorderProcessor);
+`;
+
+function createWorkletUrl(): string {
+  const blob = new Blob([WORKLET_SOURCE], { type: 'application/javascript' });
+  return URL.createObjectURL(blob);
+}
+
+export class GeminiAudioRecorder {
+  private audioContext: AudioContext | null = null;
+  private mediaStream: MediaStream | null = null;
+  private sourceNode: MediaStreamAudioSourceNode | null = null;
+  private workletNode: AudioWorkletNode | null = null;
+  private scriptNode: ScriptProcessorNode | null = null;
+
+  async start(onChunk: (base64Pcm: string) => void, onError?: (error: Error) => void): Promise<void> {
+    try {
+      this.mediaStream = await navigator.mediaDevices.getUserMedia({
+        audio: { sampleRate: 16000, channelCount: 1 }
+      });
+
+      this.mediaStream.getTracks().forEach((track) => {
+        track.onended = () => {
+          if (onError) onError(new Error('Microphone disconnected'));
+        };
+      });
+
+      this.audioContext = new AudioContext({ sampleRate: 16000 });
+      if (this.audioContext.state === 'suspended') {
+        await this.audioContext.resume();
+      }
+
+      this.sourceNode = this.audioContext.createMediaStreamSource(this.mediaStream);
+
+      try {
+        const workletUrl = createWorkletUrl();
+        await this.audioContext.audioWorklet.addModule(workletUrl);
+        URL.revokeObjectURL(workletUrl);
+
+        this.workletNode = new AudioWorkletNode(this.audioContext, 'pcm-recorder-processor');
+        this.workletNode.port.onmessage = (event) => {
+          const pcm16 = float32ToPcm16(event.data.samples);
+          const base64 = arrayBufferToBase64(pcm16);
+          onChunk(base64);
+        };
+        // Connect source → worklet → silent sink → destination.
+        // The downstream connection is required so the audio graph pulls
+        // frames through the worklet node and port.onmessage fires.
+        const sink = this.audioContext.createGain();
+        sink.gain.value = 0;
+        this.sourceNode.connect(this.workletNode);
+        this.workletNode.connect(sink);
+        sink.connect(this.audioContext.destination);
+      } catch (e) {
+        console.warn('[GeminiLive] AudioWorklet failed, falling back to ScriptProcessorNode', e);
+        this.scriptNode = this.audioContext.createScriptProcessor(4096, 1, 1);
+        this.scriptNode.onaudioprocess = (event) => {
+          const inputData = event.inputBuffer.getChannelData(0);
+          const pcm16 = float32ToPcm16(new Float32Array(inputData));
+          const base64 = arrayBufferToBase64(pcm16);
+          onChunk(base64);
+        };
+        this.sourceNode.connect(this.scriptNode);
+        this.scriptNode.connect(this.audioContext.destination);
+      }
+    } catch (e) {
+      if (onError) onError(e instanceof Error ? e : new Error(String(e)));
+      throw e;
+    }
+  }
+
+  stop(): void {
+    if (this.mediaStream) {
+      this.mediaStream.getTracks().forEach(track => {
+        track.onended = null;
+        track.stop();
+      });
+      this.mediaStream = null;
+    }
+
+    if (this.scriptNode) {
+      this.scriptNode.disconnect();
+      this.scriptNode = null;
+    }
+
+    if (this.workletNode) {
+      this.workletNode.disconnect();
+      this.workletNode = null;
+    }
+
+    if (this.sourceNode) {
+      this.sourceNode.disconnect();
+      this.sourceNode = null;
+    }
+
+    if (this.audioContext) {
+      this.audioContext.close();
+      this.audioContext = null;
+    }
+  }
+
+  setMuted(muted: boolean): void {
+    if (this.mediaStream) {
+      this.mediaStream.getAudioTracks().forEach(track => {
+        track.enabled = !muted;
+      });
+    }
+  }
+
+  dispose(): void {
+    this.stop();
+  }
+}
diff --git a/web/src/realtime/gemini/pcm-recorder.worklet.ts b/web/src/realtime/gemini/pcm-recorder.worklet.ts
new file mode 100644
index 000000000..404f65445
--- /dev/null
+++ b/web/src/realtime/gemini/pcm-recorder.worklet.ts
@@ -0,0 +1,35 @@
+// AudioWorklet processor runs in a separate scope with its own globals.
+// These declarations satisfy TypeScript without pulling in DOM lib types.
+declare class AudioWorkletProcessor {
+  readonly port: MessagePort
+  constructor()
+}
+declare function registerProcessor(name: string, ctor: new () => AudioWorkletProcessor): void
+
+class PcmRecorderProcessor extends AudioWorkletProcessor {
+  private buffer: Float32Array;
+  private bufferSize = 4096;
+  private bufferIndex = 0;
+
+  constructor() {
+    super();
+    this.buffer = new Float32Array(this.bufferSize);
+  }
+
+  process(inputs: Float32Array[][]): boolean {
+    const input = inputs[0];
+    if (input && input.length > 0) {
+      const channel = input[0];
+      for (let i = 0; i < channel.length; i++) {
+        this.buffer[this.bufferIndex++] = channel[i];
+        if (this.bufferIndex >= this.bufferSize) {
+          this.port.postMessage({ samples: this.buffer.slice() });
+          this.bufferIndex = 0;
+        }
+      }
+    }
+    return true;
+  }
+}
+
+registerProcessor('pcm-recorder-processor', PcmRecorderProcessor);
diff --git a/web/src/realtime/gemini/pcmUtils.test.ts b/web/src/realtime/gemini/pcmUtils.test.ts
new file mode 100644
index 000000000..1b2159f5c
--- /dev/null
+++ b/web/src/realtime/gemini/pcmUtils.test.ts
@@ -0,0 +1,60 @@
+import { describe, test, expect } from 'vitest'
+import {
+    float32ToPcm16,
+    pcm16ToFloat32,
+    arrayBufferToBase64,
+    base64ToArrayBuffer
+} from './pcmUtils'
+
+describe('pcmUtils', () => {
+    describe('float32ToPcm16 / pcm16ToFloat32 round-trip', () => {
+        test('preserves signal within quantization error', () => {
+            const input = new Float32Array([0, 0.5, -0.5, 1.0, -1.0])
+            const pcm16 = float32ToPcm16(input)
+            const output = pcm16ToFloat32(pcm16)
+
+            expect(output.length).toBe(input.length)
+            for (let i = 0; i < input.length; i++) {
+                expect(Math.abs(output[i] - input[i])).toBeLessThan(0.001)
+            }
+        })
+
+        test('clamps values outside [-1, 1]', () => {
+            const input = new Float32Array([2.0, -2.0])
+            const pcm16 = float32ToPcm16(input)
+            const output = pcm16ToFloat32(pcm16)
+
+            expect(Math.abs(output[0] - 1.0)).toBeLessThan(0.001)
+            expect(Math.abs(output[1] - (-1.0))).toBeLessThan(0.001)
+        })
+
+        test('handles empty input', () => {
+            const input = new Float32Array(0)
+            const pcm16 = float32ToPcm16(input)
+            expect(pcm16.byteLength).toBe(0)
+            const output = pcm16ToFloat32(pcm16)
+            expect(output.length).toBe(0)
+        })
+    })
+
+    describe('arrayBufferToBase64 / base64ToArrayBuffer round-trip', () => {
+        test('preserves binary data', () => {
+            const original = new Uint8Array([0, 1, 127, 128, 255])
+            const base64 = arrayBufferToBase64(original.buffer)
+            const restored = new Uint8Array(base64ToArrayBuffer(base64))
+
+            expect(restored.length).toBe(original.length)
+            for (let i = 0; i < original.length; i++) {
+                expect(restored[i]).toBe(original[i])
+            }
+        })
+
+        test('handles empty buffer', () => {
+            const empty = new ArrayBuffer(0)
+            const base64 = arrayBufferToBase64(empty)
+            expect(base64).toBe('')
+            const restored = base64ToArrayBuffer(base64)
+            expect(restored.byteLength).toBe(0)
+        })
+    })
+})
diff --git a/web/src/realtime/gemini/pcmUtils.ts b/web/src/realtime/gemini/pcmUtils.ts
new file mode 100644
index 000000000..67e2928fc
--- /dev/null
+++ b/web/src/realtime/gemini/pcmUtils.ts
@@ -0,0 +1,39 @@
+export function float32ToPcm16(samples: Float32Array): ArrayBuffer {
+  const buffer = new ArrayBuffer(samples.length * 2);
+  const view = new DataView(buffer);
+  for (let i = 0; i < samples.length; i++) {
+    let s = Math.max(-1, Math.min(1, samples[i]));
+    s = s < 0 ? s * 0x8000 : s * 0x7FFF;
+    view.setInt16(i * 2, s, true);
+  }
+  return buffer;
+}
+
+export function pcm16ToFloat32(buffer: ArrayBuffer): Float32Array {
+  const int16Array = new Int16Array(buffer);
+  const float32Array = new Float32Array(int16Array.length);
+  for (let i = 0; i < int16Array.length; i++) {
+    const s = int16Array[i];
+    float32Array[i] = s < 0 ? s / 0x8000 : s / 0x7FFF;
+  }
+  return float32Array;
+}
+
+export function arrayBufferToBase64(buffer: ArrayBuffer): string {
+  let binary = '';
+  const bytes = new Uint8Array(buffer);
+  const len = bytes.byteLength;
+  for (let i = 0; i < len; i++) {
+    binary += String.fromCharCode(bytes[i]);
+  }
+  return btoa(binary);
+}
+
+export function base64ToArrayBuffer(base64: string): ArrayBuffer {
+  const binary = atob(base64);
+  const bytes = new Uint8Array(binary.length);
+  for (let i = 0; i < binary.length; i++) {
+    bytes[i] = binary.charCodeAt(i);
+  }
+  return bytes.buffer;
+}
diff --git a/web/src/realtime/gemini/toolAdapter.test.ts b/web/src/realtime/gemini/toolAdapter.test.ts
new file mode 100644
index 000000000..651e890a1
--- /dev/null
+++ b/web/src/realtime/gemini/toolAdapter.test.ts
@@ -0,0 +1,28 @@
+import { describe, test, expect } from 'vitest'
+import { handleGeminiFunctionCall, handleGeminiFunctionCalls } from './toolAdapter'
+import type { GeminiFunctionCall } from './toolAdapter'
+
+describe('toolAdapter', () => {
+    test('returns error for unknown tool', async () => {
+        const call: GeminiFunctionCall = {
+            name: 'unknownTool',
+            args: {},
+            id: 'call-1'
+        }
+        const resp = await handleGeminiFunctionCall(call)
+        expect(resp.name).toBe('unknownTool')
+        expect(resp.id).toBe('call-1')
+        expect(resp.response.result).toContain('unknown tool')
+    })
+
+    test('handles multiple calls in parallel', async () => {
+        const calls: GeminiFunctionCall[] = [
+            { name: 'unknownA', args: {}, id: 'a' },
+            { name: 'unknownB', args: {}, id: 'b' }
+        ]
+        const responses = await handleGeminiFunctionCalls(calls)
+        expect(responses.length).toBe(2)
+        expect(responses[0].id).toBe('a')
+        expect(responses[1].id).toBe('b')
+    })
+})
diff --git a/web/src/realtime/gemini/toolAdapter.ts b/web/src/realtime/gemini/toolAdapter.ts
new file mode 100644
index 000000000..dbf4dee9c
--- /dev/null
+++ b/web/src/realtime/gemini/toolAdapter.ts
@@ -0,0 +1,76 @@
+import { realtimeClientTools } from '../realtimeClientTools'
+
+/**
+ * Gemini Live API function call from server.
+ * Matches the `toolCall` shape in a BidiGenerateContent serverMessage.
+ */
+export interface GeminiFunctionCall {
+    name: string
+    args: Record<string, unknown>
+    id: string
+}
+
+/**
+ * Response sent back to Gemini Live via `toolResponse`.
+ */
+export interface GeminiFunctionResponse {
+    name: string
+    id: string
+    response: { result: string }
+}
+
+type ClientToolHandler = (parameters: unknown) => Promise<string>
+
+const toolHandlers: Record<string, ClientToolHandler> = {
+    messageCodingAgent: realtimeClientTools.messageCodingAgent,
+    processPermissionRequest: realtimeClientTools.processPermissionRequest
+}
+
+/**
+ * Execute a Gemini Live function call using the existing client tool handlers.
+ * Returns a GeminiFunctionResponse ready to send back over the WebSocket.
+ */
+export async function handleGeminiFunctionCall(
+    call: GeminiFunctionCall
+): Promise<GeminiFunctionResponse> {
+    const handler = toolHandlers[call.name]
+
+    if (!handler) {
+        return {
+            name: call.name,
+            id: call.id,
+            response: { result: `error (unknown tool: ${call.name})` }
+        }
+    }
+
+    try {
+        const result = await handler(call.args)
+        return {
+            name: call.name,
+            id: call.id,
+            response: { result }
+        }
+    } catch (error) {
+        const message = error instanceof Error ? error.message : 'unknown error'
+        return {
+            name: call.name,
+            id: call.id,
+            response: { result: `error (${message})` }
+        }
+    }
+}
+
+/**
+ * Process multiple function calls sequentially to avoid racing on shared
+ * session state (e.g. processPermissionRequest resolving the same pending
+ * request twice when calls run in parallel).
+ */
+export async function handleGeminiFunctionCalls(
+    calls: GeminiFunctionCall[]
+): Promise<GeminiFunctionResponse[]> {
+    const responses: GeminiFunctionResponse[] = []
+    for (const call of calls) {
+        responses.push(await handleGeminiFunctionCall(call))
+    }
+    return responses
+}
diff --git a/web/src/realtime/hooks/voiceHooks.ts b/web/src/realtime/hooks/voiceHooks.ts
index c6318d3c1..e87630294 100644
--- a/web/src/realtime/hooks/voiceHooks.ts
+++ b/web/src/realtime/hooks/voiceHooks.ts
@@ -151,7 +151,13 @@ export const voiceHooks = {
         reportSession(sessionId)
         const messages = messagesGetter?.(sessionId) ?? []
         const lastAssistantText = extractLastAssistantSpeakable(messages)
-        reportTextUpdate(formatReadyEvent(sessionId, lastAssistantText))
+        const update = formatReadyEvent(sessionId, lastAssistantText)
+        const proactive = localStorage.getItem('hapi-voice-proactive') === 'true'
+        if (proactive) {
+            reportTextUpdate(update)
+        } else {
+            reportContextualUpdate(update)
+        }
     },
 
     /**
diff --git a/web/src/realtime/index.ts b/web/src/realtime/index.ts
index 58b7b229e..d3e58c686 100644
--- a/web/src/realtime/index.ts
+++ b/web/src/realtime/index.ts
@@ -15,8 +15,9 @@ export {
 // Client tools
 export { realtimeClientTools, registerSessionStore } from './realtimeClientTools'
 
-// Voice session component
+// Voice session components
 export { RealtimeVoiceSession, type RealtimeVoiceSessionProps } from './RealtimeVoiceSession'
+export { VoiceBackendSession, type VoiceBackendSessionProps } from './VoiceBackendSession'
 
 // Voice hooks
 export { voiceHooks, registerVoiceHooksStore } from './hooks/voiceHooks'
diff --git a/web/src/routes/settings/index.tsx b/web/src/routes/settings/index.tsx
index d754e9ad7..d798a64e4 100644
--- a/web/src/routes/settings/index.tsx
+++ b/web/src/routes/settings/index.tsx
@@ -349,6 +349,20 @@ export default function SettingsPage() {
     const [playingVoiceId, setPlayingVoiceId] = useState<string | null>(null)
     const currentAudioRef = useRef<HTMLAudioElement | null>(null)
 
+    // Voice proactive mode - read from localStorage, default false (reactive)
+    const [voiceProactive, setVoiceProactive] = useState<boolean>(() => {
+        return localStorage.getItem('hapi-voice-proactive') === 'true'
+    })
+
+    const handleVoiceProactiveChange = (value: boolean) => {
+        setVoiceProactive(value)
+        if (value) {
+            localStorage.setItem('hapi-voice-proactive', 'true')
+        } else {
+            localStorage.removeItem('hapi-voice-proactive')
+        }
+    }
+
     const fontScaleOptions = getFontScaleOptions()
     const terminalFontSizeOptions = getTerminalFontSizeOptions()
     const composerEnterBehaviorOptions = getComposerEnterBehaviorOptions()
@@ -988,7 +1002,6 @@ export default function SettingsPage() {
                                 </div>
                             )}
                         </div>
-
                         <div ref={voicePickerContainerRef} className="relative">
                             <button
                                 type="button"
@@ -1075,6 +1088,28 @@ export default function SettingsPage() {
                                 </div>
                             )}
                         </div>
+
+                        <div className="border-t border-[var(--app-divider)] px-3 py-3">
+                            <div className="flex items-center justify-between">
+                                <span className="text-[var(--app-fg)]">{t('settings.voice.proactive')}</span>
+                                <button
+                                    type="button"
+                                    role="switch"
+                                    aria-checked={voiceProactive}
+                                    onClick={() => handleVoiceProactiveChange(!voiceProactive)}
+                                    className={`relative inline-flex h-6 w-11 shrink-0 cursor-pointer rounded-full border-2 border-transparent transition-colors focus-visible:outline-none ${
+                                        voiceProactive ? 'bg-[var(--app-link)]' : 'bg-[var(--app-border)]'
+                                    }`}
+                                >
+                                    <span
+                                        className={`pointer-events-none inline-block h-5 w-5 transform rounded-full bg-white shadow-lg transition-transform ${
+                                            voiceProactive ? 'translate-x-5' : 'translate-x-0'
+                                        }`}
+                                    />
+                                </button>
+                            </div>
+                            <p className="mt-1 text-xs text-[var(--app-hint)]">{t('settings.voice.proactive.description')}</p>
+                        </div>
                     </div>
 
                     {/* About section */}
diff --git a/web/tsconfig.json b/web/tsconfig.json
index 8b0682a4b..de7bcdca5 100644
--- a/web/tsconfig.json
+++ b/web/tsconfig.json
@@ -11,5 +11,6 @@
             "@/*": ["./src/*"]
         }
     },
-    "include": ["src"]
+    "include": ["src"],
+    "exclude": ["src/**/*.test.ts", "src/**/*.test.tsx", "src/**/*.spec.ts", "src/**/*.spec.tsx"]
 }