diff --git a/cli/src/utils/spawnHappyCLI.test.ts b/cli/src/utils/spawnHappyCLI.test.ts index bfc097e80b..eae409d788 100644 --- a/cli/src/utils/spawnHappyCLI.test.ts +++ b/cli/src/utils/spawnHappyCLI.test.ts @@ -157,6 +157,8 @@ describe('spawnHappyCLI windowsHide behavior', () => { it('falls back to a real argv0 executable before process.execPath in compiled mode', async () => { isBunCompiledMock.mockReturnValue(true); + const previousCliExecutable = process.env.HAPI_CLI_EXECUTABLE; + delete process.env.HAPI_CLI_EXECUTABLE; const previousArgv0 = process.argv[0]; process.argv[0] = 'C:\\Users\\Administrator\\.hapi\\patched\\resume-recovery-0.17.2\\hapi.exe'; const { resolveHappyCliExecutable } = await import('./spawnHappyCLI'); @@ -165,6 +167,11 @@ describe('spawnHappyCLI windowsHide behavior', () => { expect(resolveHappyCliExecutable()).toBe(process.argv[0]); } finally { process.argv[0] = previousArgv0; + if (previousCliExecutable === undefined) { + delete process.env.HAPI_CLI_EXECUTABLE; + } else { + process.env.HAPI_CLI_EXECUTABLE = previousCliExecutable; + } } }); diff --git a/hub/src/web/routes/voice.test.ts b/hub/src/web/routes/voice.test.ts new file mode 100644 index 0000000000..15249c7239 --- /dev/null +++ b/hub/src/web/routes/voice.test.ts @@ -0,0 +1,190 @@ +import { describe, expect, it, mock } from 'bun:test' +import { Hono } from 'hono' +import { SignJWT } from 'jose' +import type { WebAppEnv } from '../middleware/auth' +import { createAuthMiddleware } from '../middleware/auth' +import { createVoiceRoutes } from './voice' + +const JWT_SECRET = new TextEncoder().encode('test-secret') + +async function authHeaders() { + const token = await new SignJWT({ uid: 1, ns: 'default' }) + .setProtectedHeader({ alg: 'HS256' }) + .setIssuedAt() + .setExpirationTime('1h') + .sign(JWT_SECRET) + return { authorization: `Bearer ${token}` } +} + +function createApp() { + const app = new Hono() + app.use('*', createAuthMiddleware(JWT_SECRET)) + app.route('/api', createVoiceRoutes()) + return app +} + +describe('GET /api/voice/voices', () => { + it('returns 401 without auth', async () => { + const app = createApp() + const res = await app.request('/api/voice/voices') + expect(res.status).toBe(401) + }) + + it('returns empty list when ELEVENLABS_API_KEY is not set', async () => { + const app = createApp() + const headers = await authHeaders() + const prev = process.env.ELEVENLABS_API_KEY + delete process.env.ELEVENLABS_API_KEY + + const res = await app.request('/api/voice/voices', { headers }) + expect(res.status).toBe(200) + expect(await res.json()).toEqual({ voices: [] }) + + if (prev) process.env.ELEVENLABS_API_KEY = prev + }) + + it('maps ElevenLabs voice fields correctly', async () => { + const app = createApp() + const headers = await authHeaders() + const prev = process.env.ELEVENLABS_API_KEY + process.env.ELEVENLABS_API_KEY = 'test-key' + + const fetchMock = mock(() => Promise.resolve(new Response(JSON.stringify({ + voices: [ + { voice_id: 'v1', name: 'Alice', preview_url: 'https://cdn.example/a.mp3', category: 'premade' }, + { voice_id: 'v2', name: 'MyClone', preview_url: 'https://cdn.example/c.mp3', category: 'cloned' }, + ] + }), { status: 200 }))) + + const originalFetch = global.fetch + // @ts-expect-error test override + global.fetch = fetchMock + + const res = await app.request('/api/voice/voices', { headers }) + expect(res.status).toBe(200) + expect(await res.json()).toEqual({ + voices: [ + { id: 'v1', name: 'Alice', previewUrl: 'https://cdn.example/a.mp3', category: 'premade' }, + { id: 'v2', name: 'MyClone', previewUrl: 'https://cdn.example/c.mp3', category: 'cloned' }, + ] + }) + + global.fetch = originalFetch + if (prev) process.env.ELEVENLABS_API_KEY = prev + else delete process.env.ELEVENLABS_API_KEY + }) +}) + +describe('POST /api/voice/token', () => { + it('creates/selects voice-specific agent when voiceId is provided', async () => { + const app = createApp() + const headers = { + ...(await authHeaders()), + 'content-type': 'application/json' + } + + const prevKey = process.env.ELEVENLABS_API_KEY + const prevAgent = process.env.ELEVENLABS_AGENT_ID + process.env.ELEVENLABS_API_KEY = 'test-key' + delete process.env.ELEVENLABS_AGENT_ID + + const requests: Array<{ url: string; init?: RequestInit }> = [] + const originalFetch = global.fetch + // @ts-expect-error test override + global.fetch = (async (input: RequestInfo | URL, init?: RequestInit) => { + const url = String(input) + requests.push({ url, init }) + + if (url.endsWith('/convai/agents') && init?.method === 'GET') { + return new Response(JSON.stringify({ agents: [] }), { status: 200 }) + } + if (url.endsWith('/convai/agents/create') && init?.method === 'POST') { + return new Response(JSON.stringify({ agent_id: 'agent_voice_alice' }), { status: 200 }) + } + if (url.includes('/convai/conversation/token?agent_id=')) { + return new Response(JSON.stringify({ token: 'tok_alice' }), { status: 200 }) + } + return new Response('not found', { status: 404 }) + }) as typeof fetch + + const res = await app.request('/api/voice/token', { + method: 'POST', + headers, + body: JSON.stringify({ voiceId: 'alice-voice-id' }) + }) + + expect(res.status).toBe(200) + expect(await res.json()).toEqual({ + allowed: true, + token: 'tok_alice', + agentId: 'agent_voice_alice' + }) + + const createCall = requests.find(r => r.url.endsWith('/convai/agents/create')) + expect(createCall).toBeTruthy() + const createBody = JSON.parse(String(createCall?.init?.body)) + expect(createBody.name).toContain('[voice:alice-voice-id]') + expect(createBody.conversation_config?.tts?.voice_id).toBe('alice-voice-id') + + global.fetch = originalFetch + if (prevKey) process.env.ELEVENLABS_API_KEY = prevKey + else delete process.env.ELEVENLABS_API_KEY + if (prevAgent) process.env.ELEVENLABS_AGENT_ID = prevAgent + else delete process.env.ELEVENLABS_AGENT_ID + }) + + it('prefers voice-specific agent over ELEVENLABS_AGENT_ID when voiceId is provided', async () => { + const app = createApp() + const headers = { + ...(await authHeaders()), + 'content-type': 'application/json' + } + + const prevKey = process.env.ELEVENLABS_API_KEY + const prevAgent = process.env.ELEVENLABS_AGENT_ID + process.env.ELEVENLABS_API_KEY = 'test-key' + process.env.ELEVENLABS_AGENT_ID = 'env_default_agent' + + const requests: Array<{ url: string; init?: RequestInit }> = [] + const originalFetch = global.fetch + // @ts-expect-error test override + global.fetch = (async (input: RequestInfo | URL, init?: RequestInit) => { + const url = String(input) + requests.push({ url, init }) + + if (url.endsWith('/convai/agents') && init?.method === 'GET') { + return new Response(JSON.stringify({ agents: [] }), { status: 200 }) + } + if (url.endsWith('/convai/agents/create') && init?.method === 'POST') { + return new Response(JSON.stringify({ agent_id: 'agent_voice_jessicax' }), { status: 200 }) + } + if (url.includes('/convai/conversation/token?agent_id=')) { + return new Response(JSON.stringify({ token: 'tok_jessicax' }), { status: 200 }) + } + return new Response('not found', { status: 404 }) + }) as typeof fetch + + const res = await app.request('/api/voice/token', { + method: 'POST', + headers, + body: JSON.stringify({ voiceId: 'jessicax-voice-id' }) + }) + + expect(res.status).toBe(200) + expect(await res.json()).toEqual({ + allowed: true, + token: 'tok_jessicax', + agentId: 'agent_voice_jessicax' + }) + + const tokenCall = requests.find(r => r.url.includes('/convai/conversation/token?agent_id=')) + expect(tokenCall?.url).toContain('agent_id=agent_voice_jessicax') + expect(tokenCall?.url).not.toContain('agent_id=env_default_agent') + + global.fetch = originalFetch + if (prevKey) process.env.ELEVENLABS_API_KEY = prevKey + else delete process.env.ELEVENLABS_API_KEY + if (prevAgent) process.env.ELEVENLABS_AGENT_ID = prevAgent + else delete process.env.ELEVENLABS_AGENT_ID + }) +}) diff --git a/hub/src/web/routes/voice.ts b/hub/src/web/routes/voice.ts index 1a55f83639..091b9c2ac5 100644 --- a/hub/src/web/routes/voice.ts +++ b/hub/src/web/routes/voice.ts @@ -9,7 +9,17 @@ import { const tokenRequestSchema = z.object({ customAgentId: z.string().optional(), - customApiKey: z.string().optional() + customApiKey: z.string().optional(), + voiceId: z.string().optional() +}) + +const telemetryEventSchema = z.object({ + stage: z.string().min(1), + message: z.string().min(1), + sessionId: z.string().optional(), + voiceId: z.string().optional(), + language: z.string().optional(), + details: z.record(z.string(), z.unknown()).optional() }) // Cache for auto-created agent IDs (keyed by API key hash) @@ -20,10 +30,26 @@ interface ElevenLabsAgent { name: string } +function parseVoiceAgentMap(): Record { + const raw = process.env.ELEVENLABS_VOICE_AGENT_MAP + if (!raw) return {} + try { + const parsed = JSON.parse(raw) as unknown + if (!parsed || typeof parsed !== 'object') return {} + return Object.fromEntries( + Object.entries(parsed as Record) + .filter(([k, v]) => typeof k === 'string' && typeof v === 'string') + .map(([k, v]) => [k, v as string]) + ) + } catch { + return {} + } +} + /** * Find an existing "Hapi Voice Assistant" agent */ -async function findHapiAgent(apiKey: string): Promise { +async function findHapiAgent(apiKey: string, agentName: string = VOICE_AGENT_NAME): Promise { try { const response = await fetch(`${ELEVENLABS_API_BASE}/convai/agents`, { method: 'GET', @@ -39,7 +65,7 @@ async function findHapiAgent(apiKey: string): Promise { const data = await response.json() as { agents?: ElevenLabsAgent[] } const agents: ElevenLabsAgent[] = data.agents || [] - const hapiAgent = agents.find(agent => agent.name === VOICE_AGENT_NAME) + const hapiAgent = agents.find(agent => agent.name === agentName) return hapiAgent?.agent_id || null } catch { @@ -51,7 +77,17 @@ async function findHapiAgent(apiKey: string): Promise { * Create a new "Hapi Voice Assistant" agent */ async function createHapiAgent(apiKey: string): Promise { + return createNamedHapiAgent(apiKey, VOICE_AGENT_NAME) +} + +async function createNamedHapiAgent(apiKey: string, agentName: string, voiceId?: string): Promise { try { + const config = buildVoiceAgentConfig() + config.name = agentName + if (voiceId) { + config.conversation_config.tts.voice_id = voiceId + } + const response = await fetch(`${ELEVENLABS_API_BASE}/convai/agents/create`, { method: 'POST', headers: { @@ -59,7 +95,7 @@ async function createHapiAgent(apiKey: string): Promise { 'Content-Type': 'application/json', 'Accept': 'application/json' }, - body: JSON.stringify(buildVoiceAgentConfig()) + body: JSON.stringify(config) }) if (!response.ok) { @@ -83,23 +119,37 @@ async function createHapiAgent(apiKey: string): Promise { * Get or create agent ID - finds existing or creates new "Hapi Voice Assistant" agent */ async function getOrCreateAgentId(apiKey: string): Promise { + return getOrCreateAgentIdForVoice(apiKey) +} + +function getVoiceAgentName(voiceId?: string): string { + if (!voiceId || voiceId.trim().length === 0) return VOICE_AGENT_NAME + return `${VOICE_AGENT_NAME} [voice:${voiceId}]` +} + +async function getOrCreateAgentIdForVoice(apiKey: string, voiceId?: string): Promise { // Check cache first (simple hash of first/last chars of API key) - const cacheKey = `${apiKey.slice(0, 4)}...${apiKey.slice(-4)}` + const cacheKey = `${apiKey.slice(0, 4)}...${apiKey.slice(-4)}::${voiceId ?? 'default'}` const cached = agentIdCache.get(cacheKey) if (cached) { return cached } + const agentName = getVoiceAgentName(voiceId) + // Try to find existing agent - console.log('[Voice] No agent ID configured, searching for existing agent...') - let agentId = await findHapiAgent(apiKey) + console.log('[Voice] No agent ID configured, searching for existing agent...', { + voiceId, + agentName + }) + let agentId = await findHapiAgent(apiKey, agentName) if (agentId) { console.log('[Voice] Found existing agent:', agentId) } else { // Create new agent console.log('[Voice] No existing agent found, creating new one...') - agentId = await createHapiAgent(apiKey) + agentId = await createNamedHapiAgent(apiKey, agentName, voiceId) if (agentId) { console.log('[Voice] Created new agent:', agentId) } @@ -118,37 +168,64 @@ export function createVoiceRoutes(): Hono { // Get ElevenLabs ConvAI conversation token app.post('/voice/token', async (c) => { + const requestId = crypto.randomUUID() const json = await c.req.json().catch(() => null) const parsed = tokenRequestSchema.safeParse(json ?? {}) if (!parsed.success) { + console.warn('[Voice][Token] Invalid request body', { requestId }) return c.json({ allowed: false, error: 'Invalid request body' }, 400) } - const { customAgentId, customApiKey } = parsed.data + const { customAgentId, customApiKey, voiceId } = parsed.data // Use custom credentials if provided, otherwise fall back to env vars const apiKey = customApiKey || process.env.ELEVENLABS_API_KEY - let agentId = customAgentId || process.env.ELEVENLABS_AGENT_ID + const voiceAgentMap = parseVoiceAgentMap() + const mappedAgentId = voiceId ? voiceAgentMap[voiceId] : undefined + let agentId = customAgentId || mappedAgentId if (!apiKey) { + console.warn('[Voice][Token] Missing API key', { requestId }) return c.json({ allowed: false, error: 'ElevenLabs API key not configured' }, 400) } - // Auto-create agent if not configured + // If a voice was selected and no explicit mapping/custom agent is set, + // resolve/create a dedicated per-voice agent so selection always takes effect. + if (!agentId && voiceId) { + agentId = await getOrCreateAgentIdForVoice(apiKey, voiceId) ?? undefined + } + + // Fallback to environment default agent only when no voice-specific route applies. if (!agentId) { - agentId = await getOrCreateAgentId(apiKey) ?? undefined - if (!agentId) { - return c.json({ - allowed: false, - error: 'Failed to create ElevenLabs agent automatically' - }, 500) - } + agentId = process.env.ELEVENLABS_AGENT_ID + } + + // Final fallback for setups without configured agent id. + if (!agentId) { + agentId = await getOrCreateAgentIdForVoice(apiKey, undefined) ?? undefined + } + + if (!agentId) { + console.error('[Voice][Token] Failed to resolve/create agent ID', { requestId }) + return c.json({ + allowed: false, + error: 'Failed to create ElevenLabs agent automatically' + }, 500) } try { + console.log('[Voice][Token] Requesting ElevenLabs conversation token', { + requestId, + agentId, + voiceId, + hasCustomAgentId: Boolean(customAgentId), + hasMappedAgentId: Boolean(mappedAgentId), + hasCustomApiKey: Boolean(customApiKey) + }) + // Fetch conversation token from ElevenLabs const response = await fetch( `https://api.elevenlabs.io/v1/convai/conversation/token?agent_id=${encodeURIComponent(agentId)}`, @@ -164,7 +241,12 @@ export function createVoiceRoutes(): Hono { if (!response.ok) { const errorData = await response.json().catch(() => ({})) as { detail?: { message?: string }; error?: string } const errorMessage = errorData.detail?.message || errorData.error || `ElevenLabs API error: ${response.status}` - console.error('[Voice] Failed to get token from ElevenLabs:', errorMessage) + console.error('[Voice][Token] Failed to get token from ElevenLabs', { + requestId, + agentId, + status: response.status, + errorMessage + }) return c.json({ allowed: false, error: errorMessage @@ -173,19 +255,29 @@ export function createVoiceRoutes(): Hono { const data = await response.json() as { token?: string } if (!data.token) { + console.error('[Voice][Token] Token response missing token field', { + requestId, + agentId + }) return c.json({ allowed: false, error: 'No token in ElevenLabs response' }, 500) } + console.log('[Voice][Token] Token issued successfully', { requestId, agentId }) + return c.json({ allowed: true, token: data.token, agentId }) } catch (error) { - console.error('[Voice] Error fetching token:', error) + console.error('[Voice][Token] Error fetching token', { + requestId, + agentId, + error: error instanceof Error ? error.message : String(error) + }) return c.json({ allowed: false, error: error instanceof Error ? error.message : 'Network error' @@ -193,5 +285,84 @@ export function createVoiceRoutes(): Hono { } }) + // Get available ElevenLabs voices (includes user's voice clones) + app.get('/voice/voices', async (c) => { + const requestId = crypto.randomUUID() + const apiKey = process.env.ELEVENLABS_API_KEY + if (!apiKey) { + console.warn('[Voice][Voices] Missing API key, returning empty voices list', { requestId }) + return c.json({ voices: [] }) + } + + try { + const response = await fetch(`${ELEVENLABS_API_BASE}/voices`, { + headers: { + 'xi-api-key': apiKey, + 'Accept': 'application/json' + } + }) + + if (!response.ok) { + console.error('[Voice][Voices] ElevenLabs voices request failed', { + requestId, + status: response.status + }) + return c.json({ voices: [] }) + } + + const data = await response.json() as { + voices?: Array<{ + voice_id: string + name: string + preview_url: string + category: string + }> + } + + const voices = (data.voices ?? []).map(v => ({ + id: v.voice_id, + name: v.name, + previewUrl: v.preview_url, + category: v.category + })) + + console.log('[Voice][Voices] Voices fetched', { + requestId, + count: voices.length + }) + + return c.json({ voices }) + } catch (error) { + console.error('[Voice][Voices] Unexpected error fetching voices', { + requestId, + error: error instanceof Error ? error.message : String(error) + }) + return c.json({ voices: [] }) + } + }) + + app.post('/voice/telemetry', async (c) => { + const requestId = crypto.randomUUID() + const json = await c.req.json().catch(() => null) + const parsed = telemetryEventSchema.safeParse(json ?? {}) + if (!parsed.success) { + console.warn('[Voice][Telemetry] Invalid payload', { requestId }) + return c.json({ ok: false, error: 'Invalid telemetry payload' }, 400) + } + + const { stage, message, sessionId, voiceId, language, details } = parsed.data + console.log('[Voice][Telemetry]', { + requestId, + stage, + message, + sessionId, + voiceId, + language, + details + }) + + return c.json({ ok: true }) + }) + return app } diff --git a/shared/src/voice.ts b/shared/src/voice.ts index 6751f0eba4..2843d84eb4 100644 --- a/shared/src/voice.ts +++ b/shared/src/voice.ts @@ -208,6 +208,9 @@ export interface VoiceAgentConfig { language?: boolean first_message?: boolean } + tts?: { + voice_id?: boolean + } } } } @@ -249,6 +252,9 @@ export function buildVoiceAgentConfig(): VoiceAgentConfig { conversation_config_override: { agent: { language: true + }, + tts: { + voice_id: true } } } diff --git a/web/src/api/client.ts b/web/src/api/client.ts index 52c25619b0..f8bd941abd 100644 --- a/web/src/api/client.ts +++ b/web/src/api/client.ts @@ -525,7 +525,7 @@ export class ApiClient { }) } - async fetchVoiceToken(options?: { customAgentId?: string; customApiKey?: string }): Promise<{ + async fetchVoiceToken(options?: { customAgentId?: string; customApiKey?: string; voiceId?: string }): Promise<{ allowed: boolean token?: string agentId?: string @@ -536,4 +536,22 @@ export class ApiClient { body: JSON.stringify(options || {}) }) } + + async fetchVoices(): Promise<{ voices: Array<{ id: string; name: string; previewUrl: string; category: string }> }> { + return await this.request('/api/voice/voices') + } + + async sendVoiceTelemetry(event: { + stage: string + message: string + sessionId?: string + voiceId?: string + language?: string + details?: Record + }): Promise { + await this.request('/api/voice/telemetry', { + method: 'POST', + body: JSON.stringify(event) + }) + } } diff --git a/web/src/api/voice.ts b/web/src/api/voice.ts index 66cee443f1..c5e8f27a4d 100644 --- a/web/src/api/voice.ts +++ b/web/src/api/voice.ts @@ -26,6 +26,7 @@ export interface VoiceTokenResponse { export interface VoiceTokenRequest { customAgentId?: string customApiKey?: string + voiceId?: string } /** @@ -50,6 +51,22 @@ export async function fetchVoiceToken( } } +export interface VoiceInfo { + id: string + name: string + previewUrl: string + category: string +} + +export async function fetchVoices(api: ApiClient): Promise { + try { + const result = await api.fetchVoices() + return result.voices + } catch { + return [] + } +} + export interface ElevenLabsAgent { agent_id: string name: string diff --git a/web/src/lib/locales/en.ts b/web/src/lib/locales/en.ts index 905cda7c2a..3615adcc75 100644 --- a/web/src/lib/locales/en.ts +++ b/web/src/lib/locales/en.ts @@ -403,6 +403,8 @@ export default { 'settings.voice.title': 'Voice Assistant', 'settings.voice.language': 'Voice Language', 'settings.voice.autoDetect': 'Auto-detect', + 'settings.voice.voice': 'Voice', + 'settings.voice.voiceDefault': 'Default', 'settings.about.title': 'About', 'settings.about.website': 'Website', 'settings.about.appVersion': 'App Version', diff --git a/web/src/lib/locales/zh-CN.ts b/web/src/lib/locales/zh-CN.ts index cf5c7d73a8..50ae69eeab 100644 --- a/web/src/lib/locales/zh-CN.ts +++ b/web/src/lib/locales/zh-CN.ts @@ -405,6 +405,8 @@ export default { 'settings.voice.title': '语音助手', 'settings.voice.language': '语音语言', 'settings.voice.autoDetect': '自动检测', + 'settings.voice.voice': '声音', + 'settings.voice.voiceDefault': '默认', 'settings.about.title': '关于', 'settings.about.website': '官方网站', 'settings.about.appVersion': '应用版本', diff --git a/web/src/lib/voice-context.tsx b/web/src/lib/voice-context.tsx index 71424a4362..cebc030beb 100644 --- a/web/src/lib/voice-context.tsx +++ b/web/src/lib/voice-context.tsx @@ -40,11 +40,12 @@ export function VoiceProvider({ children }: { children: ReactNode }) { setCurrentSessionId(sessionId) const initialContext = voiceHooks.onVoiceStarted(sessionId) - // Read voice language preference from localStorage + // Read voice preferences from localStorage const voiceLang = localStorage.getItem('hapi-voice-lang') const elevenLabsLang = getElevenLabsCodeFromPreference(voiceLang) + const voiceId = localStorage.getItem('hapi-voice-id') ?? undefined - await startRealtimeSession(sessionId, initialContext, elevenLabsLang) + await startRealtimeSession(sessionId, initialContext, elevenLabsLang, voiceId) }, []) const stopVoice = useCallback(async () => { diff --git a/web/src/lib/voices.test.ts b/web/src/lib/voices.test.ts new file mode 100644 index 0000000000..d6d70888e5 --- /dev/null +++ b/web/src/lib/voices.test.ts @@ -0,0 +1,14 @@ +import { describe, it, expect } from 'vitest' +import { getFallbackVoices } from './voices' + +describe('getFallbackVoices', () => { + it('returns localized Chinese aliases for zh-CN fallback list', () => { + const voices = getFallbackVoices('zh-CN') + expect(voices.some(v => /杰西卡|瑞秋|贝拉|乔什|亚当/.test(v.name))).toBe(true) + }) + + it('keeps canonical English names for en fallback list', () => { + const voices = getFallbackVoices('en') + expect(voices.some(v => v.name === 'Jessica')).toBe(true) + }) +}) diff --git a/web/src/lib/voices.ts b/web/src/lib/voices.ts new file mode 100644 index 0000000000..6092cea23b --- /dev/null +++ b/web/src/lib/voices.ts @@ -0,0 +1,65 @@ +import type { Locale } from '@/lib/use-translation' + +export interface Voice { + id: string + name: string + gender: 'female' | 'male' + description: string + aliases?: Partial> +} + +export const VOICES: Voice[] = [ + { + id: 'cgSgspJ2msm6clMCkdW9', + name: 'Jessica', + aliases: { 'zh-CN': '杰西卡' }, + gender: 'female', + description: 'Default — warm, conversational', + }, + { + id: '21m00Tcm4TlvDq8ikWAM', + name: 'Rachel', + aliases: { 'zh-CN': '瑞秋' }, + gender: 'female', + description: 'Calm, professional', + }, + { + id: 'EXAVITQu4vr4xnSDxMaL', + name: 'Bella', + aliases: { 'zh-CN': '贝拉' }, + gender: 'female', + description: 'Soft, warm', + }, + { + id: 'TxGEqnHWrfWFTfGW9XjX', + name: 'Josh', + aliases: { 'zh-CN': '乔什' }, + gender: 'male', + description: 'Deep, smooth', + }, + { + id: 'pNInz6obpgDQGcFmaJgB', + name: 'Adam', + aliases: { 'zh-CN': '亚当' }, + gender: 'male', + description: 'Narration, clear', + }, + { id: 'AZnzlk1XvdvUeBnXmlld', name: 'Domi', gender: 'female', description: 'Strong, confident' }, + { id: 'MF3mGyEYCl7XYWbV9V6O', name: 'Elli', gender: 'female', description: 'Young, clear' }, + { id: 'VR6AewLTigWG4xSOukaG', name: 'Arnold', gender: 'male', description: 'Crisp, authoritative' }, + { id: 'ErXwobaYiN019PkySvjV', name: 'Antoni', gender: 'male', description: 'Well-rounded' }, + { id: 'yoZ06aMxZJJ28mfd3POQ', name: 'Sam', gender: 'male', description: 'Raspy, dynamic' }, +] + +export const DEFAULT_VOICE_ID = 'cgSgspJ2msm6clMCkdW9' + +export function getVoiceById(id: string | null): Voice | undefined { + return VOICES.find(v => v.id === id) +} + +export function getFallbackVoices(locale: Locale): Voice[] { + return VOICES.map((voice) => ({ + ...voice, + name: voice.aliases?.[locale] ?? voice.name, + })) +} diff --git a/web/src/realtime/RealtimeSession.ts b/web/src/realtime/RealtimeSession.ts index 132304f86e..ee9883d45d 100644 --- a/web/src/realtime/RealtimeSession.ts +++ b/web/src/realtime/RealtimeSession.ts @@ -8,7 +8,8 @@ let currentSessionId: string | null = null export async function startRealtimeSession( sessionId: string, initialContext?: string, - language?: ElevenLabsLanguage + language?: ElevenLabsLanguage, + voiceId?: string ) { if (!voiceSession) { console.warn('[Voice] No voice session registered') @@ -20,7 +21,8 @@ export async function startRealtimeSession( await voiceSession.startSession({ sessionId, initialContext, - language + language, + voiceId }) voiceSessionStarted = true } catch (error) { diff --git a/web/src/realtime/RealtimeVoiceSession.tsx b/web/src/realtime/RealtimeVoiceSession.tsx index fff9b7b44b..428d1c1715 100644 --- a/web/src/realtime/RealtimeVoiceSession.tsx +++ b/web/src/realtime/RealtimeVoiceSession.tsx @@ -49,7 +49,9 @@ class RealtimeVoiceSessionImpl implements VoiceSession { // Fetch conversation token from server let tokenResponse: Awaited> try { - tokenResponse = await fetchVoiceToken(this.api) + tokenResponse = await fetchVoiceToken(this.api, { + voiceId: config.voiceId + }) } catch (error) { console.error('[Voice] Failed to fetch voice token:', error) statusCallback?.('error', 'Network error') @@ -62,30 +64,38 @@ class RealtimeVoiceSessionImpl implements VoiceSession { throw error } + const baseSessionConfig = { + conversationToken: tokenResponse.token, + connectionType: 'webrtc' as const, + dynamicVariables: { + sessionId: config.sessionId, + initialConversationContext: config.initialContext || '' + }, + // Language override — requires override permissions enabled on the agent + // See: https://elevenlabs.io/docs/agents-platform/customization/personalization/overrides + overrides: { + agent: { + language: config.language + } + } + } + // Use conversation token from server (private agent flow) try { - const conversationId = await conversationInstance.startSession({ - conversationToken: tokenResponse.token, - connectionType: 'webrtc', - dynamicVariables: { - sessionId: config.sessionId, - initialConversationContext: config.initialContext || '' - }, - // Language override - requires agent to have platform_settings.overrides enabled - // See: https://elevenlabs.io/docs/agents-platform/customization/personalization/overrides - overrides: { - agent: { - language: config.language - } - } - }) + const conversationId = await conversationInstance.startSession(baseSessionConfig) if (DEBUG) { console.log('[Voice] Started conversation with ID:', conversationId) } } catch (error) { - console.error('[Voice] Failed to start realtime session:', error) - statusCallback?.('error', 'Failed to start voice session') + const errorMessage = error instanceof Error ? error.message : String(error) + console.error('[Voice] Failed to start realtime session:', { + error: errorMessage, + sessionId: config.sessionId, + language: config.language, + voiceId: config.voiceId + }) + statusCallback?.('error', `Failed to start voice session: ${errorMessage}`) throw error } } diff --git a/web/src/realtime/types.ts b/web/src/realtime/types.ts index 47753c5286..32b4b75dae 100644 --- a/web/src/realtime/types.ts +++ b/web/src/realtime/types.ts @@ -4,6 +4,7 @@ export interface VoiceSessionConfig { sessionId: string initialContext?: string language?: ElevenLabsLanguage + voiceId?: string } export interface VoiceSession { diff --git a/web/src/routes/settings/index.test.tsx b/web/src/routes/settings/index.test.tsx index daf059a4a5..0f3c894ae1 100644 --- a/web/src/routes/settings/index.test.tsx +++ b/web/src/routes/settings/index.test.tsx @@ -1,5 +1,5 @@ -import { describe, it, expect, vi, beforeEach } from 'vitest' -import { render, screen } from '@testing-library/react' +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest' +import { render, screen, fireEvent, waitFor, act, cleanup } from '@testing-library/react' import { I18nContext, I18nProvider } from '@/lib/i18n-context' import { en } from '@/lib/locales' import { PROTOCOL_VERSION } from '@hapi/protocol' @@ -95,6 +95,40 @@ vi.mock('@/lib/languages', () => ({ getLanguageDisplayName: (lang: { code: string | null; name: string }) => lang.name, })) +// Use vi.hoisted so these mocks are available when vi.mock factories run +const { mockFetchVoices, mockApi } = vi.hoisted(() => { + const mockFetchVoices = vi.fn(() => Promise.resolve([])) + const mockApi = { + fetchVoices: vi.fn(() => Promise.resolve({ voices: [] })), + } + return { mockFetchVoices, mockApi } +}) + +// Mock static voices list +vi.mock('@/lib/voices', () => ({ + VOICES: [{ id: 'voice1', name: 'Jessica', gender: 'female', description: 'Default' }], + DEFAULT_VOICE_ID: 'voice1', + getVoiceById: (id: string | null) => + id === 'voice1' ? { id: 'voice1', name: 'Jessica', gender: 'female', description: 'Default' } : undefined, + getFallbackVoices: () => [{ id: 'voice1', name: 'Jessica', gender: 'female', description: 'Default' }], +})) + +// Mock fetchVoices to return a resolved list by default +vi.mock('@/api/voice', () => ({ + fetchVoices: mockFetchVoices, + fetchVoiceToken: vi.fn(() => Promise.resolve({ allowed: true, token: 'tok' })), +})) + +// Mock useAppContext so the page doesn't throw "AppContext is not available" +vi.mock('@/lib/app-context', () => ({ + useAppContext: () => ({ api: mockApi, token: 'test', baseUrl: '' }), + AppContextProvider: ({ children }: { children: React.ReactNode }) => children, +})) + + +afterEach(() => { + cleanup() +}) function renderWithProviders(ui: React.ReactElement) { return render( @@ -117,9 +151,11 @@ function renderWithSpyT(ui: React.ReactElement) { describe('SettingsPage', () => { beforeEach(() => { vi.clearAllMocks() + // Reset fetchVoices mock to return empty list by default + mockFetchVoices.mockResolvedValue([]) // Mock localStorage const localStorageMock = { - getItem: vi.fn(() => 'en'), + getItem: vi.fn(() => null), setItem: vi.fn(), removeItem: vi.fn(), clear: vi.fn(), @@ -231,4 +267,139 @@ describe('SettingsPage', () => { expect(calledKeys).toContain('settings.chat.userMessageBackground') expect(calledKeys).toContain('settings.chat.surfaceColor.default') }) + + // Voice picker tests + it('renders the Voice section with "Voice" label', () => { + renderWithProviders() + expect(screen.getAllByText('Voice').length).toBeGreaterThanOrEqual(1) + }) + + it('uses correct i18n keys for the voice picker', () => { + const spyT = renderWithSpyT() + const calledKeys = spyT.mock.calls.map((call) => call[0]) + expect(calledKeys).toContain('settings.voice.voice') + expect(calledKeys).toContain('settings.voice.voiceDefault') + }) + + it('voice picker shows "Default" option when opened', () => { + renderWithProviders() + // The current value "Default" is shown in the closed picker button + expect(screen.getAllByText('Default').length).toBeGreaterThanOrEqual(1) + }) + + it('opens voice picker and shows "Default" option in the list', () => { + renderWithProviders() + // Click the voice picker button (aria-label target via the label text) + const voiceButtons = screen.getAllByRole('button', { name: /Default/i }) + // Find the button that has aria-haspopup — that's the voice picker trigger + const pickerButton = voiceButtons.find(btn => btn.getAttribute('aria-haspopup') === 'listbox') + expect(pickerButton).toBeTruthy() + fireEvent.click(pickerButton!) + // The listbox should appear with a "Default" option inside + const listbox = screen.getByRole('listbox', { name: 'Voice' }) + expect(listbox).toBeInTheDocument() + expect(listbox.textContent).toContain('Default') + }) + + it('shows dynamic voices in picker when fetchVoices returns a list', async () => { + mockFetchVoices.mockResolvedValue([ + { id: 'dyn1', name: 'Alice', previewUrl: '', category: 'premade' }, + { id: 'dyn2', name: 'Bob', previewUrl: 'https://example.com/bob.mp3', category: 'premade' }, + ]) + + renderWithProviders() + + const pickerButton = screen.getByRole('button', { name: /Voice\s*Default/i }) + fireEvent.click(pickerButton) + + await waitFor(() => { + expect(screen.getByText('Alice')).toBeInTheDocument() + expect(screen.getByText('Bob')).toBeInTheDocument() + }) + }) + + + it('shows a disabled preview button with tooltip when previewUrl is missing', async () => { + mockFetchVoices.mockResolvedValue([ + { id: 'dyn1', name: 'Alice', previewUrl: '', category: 'premade' }, + ]) + + renderWithProviders() + + const pickerButton = screen.getByRole('button', { name: /Voice\s*Default/i }) + fireEvent.click(pickerButton) + + const previewButton = await screen.findByLabelText('Preview voice') + expect(previewButton).toBeDisabled() + expect(previewButton).toHaveAttribute('title', 'Preview unavailable without an ElevenLabs API key') + }) + + it('shows a play button for voices with a previewUrl', async () => { + mockFetchVoices.mockResolvedValue([ + { id: 'dyn1', name: 'Alice', previewUrl: 'https://example.com/alice.mp3', category: 'premade' }, + ]) + + renderWithProviders() + + const pickerButton = screen.getByRole('button', { name: /Voice\s*Default/i }) + fireEvent.click(pickerButton) + + await screen.findByText('Alice') + expect(screen.getByLabelText('Preview voice')).toBeInTheDocument() + expect(screen.getByLabelText('Preview voice')).not.toBeDisabled() + }) + + it('stops preview audio on unmount', async () => { + mockFetchVoices.mockResolvedValue([ + { id: 'dyn1', name: 'Alice', previewUrl: 'https://example.com/alice.mp3', category: 'premade' }, + ]) + + const pause = vi.fn() + const play = vi.fn(() => Promise.resolve()) + const addEventListener = vi.fn() + class MockAudio { + pause = pause + play = play + addEventListener = addEventListener + constructor(_url: string) {} + } + const OriginalAudio = globalThis.Audio + const OriginalWindowAudio = window.Audio + // @ts-expect-error test override + globalThis.Audio = MockAudio + // @ts-expect-error test override + window.Audio = MockAudio + + const view = renderWithProviders() + const pickerButton = screen.getByRole('button', { name: /Voice\s*Default/i }) + fireEvent.click(pickerButton) + const aliceLabel = await screen.findByText('Alice') + const optionRow = aliceLabel.closest('[role="option"]') + expect(optionRow).toBeTruthy() + const enabledPreview = optionRow?.querySelector('button[aria-label="Preview voice"]') as HTMLButtonElement | null + expect(enabledPreview).toBeTruthy() + expect(enabledPreview?.disabled).toBe(false) + fireEvent.click(enabledPreview as HTMLElement) + + view.unmount() + expect(pause).toHaveBeenCalled() + + globalThis.Audio = OriginalAudio + window.Audio = OriginalWindowAudio + }) + + it('selecting a voice calls localStorage.setItem with the voice id', async () => { + mockFetchVoices.mockResolvedValue([ + { id: 'dyn1', name: 'Alice', previewUrl: '', category: 'premade' }, + ]) + + renderWithProviders() + + const pickerButton = screen.getByRole('button', { name: /Voice\s*Default/i }) + fireEvent.click(pickerButton) + + const alice = await screen.findByText('Alice') + fireEvent.click(alice) + expect(window.localStorage.setItem).toHaveBeenCalledWith('hapi-voice-id', 'dyn1') + }) }) diff --git a/web/src/routes/settings/index.tsx b/web/src/routes/settings/index.tsx index 78dc263d88..5faf696af1 100644 --- a/web/src/routes/settings/index.tsx +++ b/web/src/routes/settings/index.tsx @@ -2,6 +2,9 @@ import { useState, useRef, useEffect } from 'react' import { useTranslation, type Locale } from '@/lib/use-translation' import { useAppGoBack } from '@/hooks/useAppGoBack' import { getElevenLabsSupportedLanguages, getLanguageDisplayName, type Language } from '@/lib/languages' +import { VOICES, getFallbackVoices } from '@/lib/voices' +import { useAppContext } from '@/lib/app-context' +import { fetchVoices, type VoiceInfo } from '@/api/voice' import { getFontScaleOptions, useFontScale, type FontScale } from '@/hooks/useFontScale' import { getTerminalFontSizeOptions, useTerminalFontSize, type TerminalFontSize } from '@/hooks/useTerminalFontSize' import { getComposerEnterBehaviorOptions, useComposerEnterBehavior, type ComposerEnterBehavior } from '@/hooks/useComposerEnterBehavior' @@ -88,6 +91,36 @@ function ChevronDownIcon(props: { className?: string }) { ) } +function PlayIcon(props: { className?: string }) { + return ( + + + + ) +} + +function StopIcon(props: { className?: string }) { + return ( + + + + ) +} + function MinusIcon(props: { className?: string }) { return ( (null) const appearanceContainerRef = useRef(null) const fontContainerRef = useRef(null) @@ -281,6 +316,7 @@ export default function SettingsPage() { const chatContainerRef = useRef(null) const terminalToolDisplayContainerRef = useRef(null) const voiceContainerRef = useRef(null) + const voicePickerContainerRef = useRef(null) const { fontScale, setFontScale } = useFontScale() const { terminalFontSize, setTerminalFontSize } = useTerminalFontSize() const { sessionPreviewLimit, setSessionPreviewLimit } = useSessionPreviewLimit() @@ -299,6 +335,16 @@ export default function SettingsPage() { return localStorage.getItem('hapi-voice-lang') }) + // Voice ID state - read from localStorage + const [voiceId, setVoiceId] = useState(() => { + return localStorage.getItem('hapi-voice-id') + }) + + // Dynamic voice list fetched from hub (includes user's cloned voices) + const [dynamicVoices, setDynamicVoices] = useState(null) + const [playingVoiceId, setPlayingVoiceId] = useState(null) + const currentAudioRef = useRef(null) + const fontScaleOptions = getFontScaleOptions() const terminalFontSizeOptions = getTerminalFontSizeOptions() const composerEnterBehaviorOptions = getComposerEnterBehaviorOptions() @@ -312,6 +358,16 @@ export default function SettingsPage() { const currentTerminalToolDisplayModeLabel = terminalToolDisplayModeOptions.find((opt) => opt.value === terminalToolDisplayMode)?.labelKey ?? 'settings.chat.terminalToolDisplay.compact' const currentVoiceLanguage = voiceLanguages.find((lang) => lang.code === voiceLanguage) + // Voice list: dynamic (from ElevenLabs API, includes clones) or static fallback + const fallbackVoices = getFallbackVoices(locale) + const voiceOptions: VoiceInfo[] = dynamicVoices && dynamicVoices.length > 0 + ? dynamicVoices + : fallbackVoices.map(v => ({ id: v.id, name: v.name, previewUrl: '', category: 'premade' })) + + const currentVoiceName = voiceId + ? (voiceOptions.find(v => v.id === voiceId)?.name ?? fallbackVoices.find(v => v.id === voiceId)?.name ?? voiceId) + : null + const handleLocaleChange = (newLocale: Locale) => { setLocale(newLocale) setIsOpen(false) @@ -352,9 +408,56 @@ export default function SettingsPage() { setIsVoiceOpen(false) } + const handleVoiceChange = (id: string | null) => { + setVoiceId(id) + if (id === null) { + localStorage.removeItem('hapi-voice-id') + } else { + localStorage.setItem('hapi-voice-id', id) + } + setIsVoicePickerOpen(false) + } + + // Fetch available voices from hub on mount + useEffect(() => { + fetchVoices(api).then(voices => { + if (voices.length > 0) setDynamicVoices(voices) + }) + }, [api]) + + const handleVoicePreview = (previewUrl: string, voiceId: string, event: React.MouseEvent) => { + event.stopPropagation() + if (!previewUrl) return + + if (playingVoiceId === voiceId) { + currentAudioRef.current?.pause() + currentAudioRef.current = null + setPlayingVoiceId(null) + return + } + + currentAudioRef.current?.pause() + const audio = new Audio(previewUrl) + currentAudioRef.current = audio + setPlayingVoiceId(voiceId) + audio.play().catch(() => setPlayingVoiceId(null)) + audio.addEventListener('ended', () => { + setPlayingVoiceId(null) + currentAudioRef.current = null + }) + } + + useEffect(() => { + return () => { + currentAudioRef.current?.pause() + currentAudioRef.current = null + setPlayingVoiceId(null) + } + }, []) + // Close dropdown when clicking outside useEffect(() => { - if (!isOpen && !isAppearanceOpen && !isFontOpen && !isTerminalFontOpen && !isChatOpen && !isTerminalToolDisplayOpen && !isVoiceOpen) return + if (!isOpen && !isAppearanceOpen && !isFontOpen && !isTerminalFontOpen && !isChatOpen && !isTerminalToolDisplayOpen && !isVoiceOpen && !isVoicePickerOpen) return const handleClickOutside = (event: MouseEvent) => { if (isOpen && containerRef.current && !containerRef.current.contains(event.target as Node)) { @@ -378,15 +481,18 @@ export default function SettingsPage() { if (isVoiceOpen && voiceContainerRef.current && !voiceContainerRef.current.contains(event.target as Node)) { setIsVoiceOpen(false) } + if (isVoicePickerOpen && voicePickerContainerRef.current && !voicePickerContainerRef.current.contains(event.target as Node)) { + setIsVoicePickerOpen(false) + } } document.addEventListener('mousedown', handleClickOutside) return () => document.removeEventListener('mousedown', handleClickOutside) - }, [isOpen, isAppearanceOpen, isFontOpen, isTerminalFontOpen, isChatOpen, isTerminalToolDisplayOpen, isVoiceOpen]) + }, [isOpen, isAppearanceOpen, isFontOpen, isTerminalFontOpen, isChatOpen, isTerminalToolDisplayOpen, isVoiceOpen, isVoicePickerOpen]) // Close on escape key useEffect(() => { - if (!isOpen && !isAppearanceOpen && !isFontOpen && !isTerminalFontOpen && !isChatOpen && !isTerminalToolDisplayOpen && !isVoiceOpen) return + if (!isOpen && !isAppearanceOpen && !isFontOpen && !isTerminalFontOpen && !isChatOpen && !isTerminalToolDisplayOpen && !isVoiceOpen && !isVoicePickerOpen) return const handleEscape = (event: KeyboardEvent) => { if (event.key === 'Escape') { @@ -397,12 +503,13 @@ export default function SettingsPage() { setIsChatOpen(false) setIsTerminalToolDisplayOpen(false) setIsVoiceOpen(false) + setIsVoicePickerOpen(false) } } document.addEventListener('keydown', handleEscape) return () => document.removeEventListener('keydown', handleEscape) - }, [isOpen, isAppearanceOpen, isFontOpen, isTerminalFontOpen, isChatOpen, isTerminalToolDisplayOpen, isVoiceOpen]) + }, [isOpen, isAppearanceOpen, isFontOpen, isTerminalFontOpen, isChatOpen, isTerminalToolDisplayOpen, isVoiceOpen, isVoicePickerOpen]) return (
@@ -813,6 +920,93 @@ export default function SettingsPage() {
)} + +
+ + + {isVoicePickerOpen && ( +
+
+ +
+ {voiceOptions.map((voice) => { + const isSelected = voiceId === voice.id + const isPlaying = playingVoiceId === voice.id + return ( +
+ + +
+ ) + })} +
+ )} +
{/* About section */}