Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
217 changes: 217 additions & 0 deletions web/src/realtime/hooks/contextFormatters.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
import { describe, expect, it } from 'vitest'
import type { DecryptedMessage } from '@/types/api'
import { extractLastAssistantSpeakable, formatMessage, formatNewMessages, formatReadyEvent } from './contextFormatters'

function msg(partial: Pick<DecryptedMessage, 'id' | 'seq' | 'content'>): DecryptedMessage {
return {
id: partial.id,
seq: partial.seq,
localId: null,
content: partial.content,
createdAt: 0,
sessionId: 'session-1'
} as DecryptedMessage
}

describe('extractLastAssistantSpeakable', () => {
it('returns null for empty history', () => {
expect(extractLastAssistantSpeakable([])).toBeNull()
})

it('returns the latest assistant plain string', () => {
const messages = [
msg({ id: '1', seq: 1, content: { role: 'user', content: 'hello' } }),
msg({ id: '2', seq: 2, content: { role: 'assistant', content: 'first reply' } }),
msg({ id: '3', seq: 3, content: { role: 'assistant', content: ' latest reply ' } })
]
expect(extractLastAssistantSpeakable(messages)).toBe('latest reply')
})

it('skips trailing user messages and reads earlier assistant text', () => {
const messages = [
msg({ id: '1', seq: 1, content: { role: 'assistant', content: 'done with the refactor' } }),
msg({ id: '2', seq: 2, content: { role: 'user', content: 'thanks' } })
]
expect(extractLastAssistantSpeakable(messages)).toBe('done with the refactor')
})

it('extracts text blocks from assistant content arrays', () => {
const messages = [
msg({
id: '1',
seq: 1,
content: {
role: 'assistant',
content: [
{ type: 'text', text: 'Part one.' },
{ type: 'text', text: 'Part two.' }
]
}
})
]
expect(extractLastAssistantSpeakable(messages)).toBe('Part one.\n\nPart two.')
})

it('extracts codex stream-json assistant messages', () => {
const messages = [
msg({
id: '1',
seq: 1,
content: {
role: 'agent',
content: {
type: 'codex',
data: {
type: 'message',
message: 'Indexed 5,018 items in the search database.'
}
}
}
}),
msg({
id: '2',
seq: 2,
content: {
role: 'agent',
content: {
type: 'codex',
data: { type: 'ready' }
}
}
})
]
expect(extractLastAssistantSpeakable(messages)).toBe('Indexed 5,018 items in the search database.')
})

it('unwraps codex-style output envelopes', () => {
const messages = [
msg({
id: '1',
seq: 1,
content: {
type: 'output',
data: {
type: 'assistant',
message: { content: 'Codex finished the refactor.' }
}
}
})
]
expect(extractLastAssistantSpeakable(messages)).toBe('Codex finished the refactor.')
})
})

describe('formatReadyEvent', () => {
const sessionId = '9d04335d-2b90-4941-98a7-eb414823f0e0'

it('embeds assistant text when provided', () => {
const text = 'Added full-text search to the API module.'
const event = formatReadyEvent(sessionId, text)
expect(event).toContain('coding agent finished working')
expect(event).toContain(`<text>${text}</text>`)
expect(event).not.toContain('Claude Code')
})

it('falls back when assistant text is missing', () => {
const event = formatReadyEvent(sessionId, null)
expect(event).toContain('Use the latest agent message already present in context')
expect(event).not.toContain('Claude Code')
})

it('treats blank assistant text as missing', () => {
const event = formatReadyEvent(sessionId, ' ')
expect(event).toContain('Use the latest agent message already present in context')
})
})

describe('formatMessage', () => {
it('formats codex stream-json assistant messages for voice context', () => {
const formatted = formatMessage(msg({
id: '1',
seq: 1,
content: {
role: 'agent',
content: {
type: 'codex',
data: {
type: 'message',
message: 'Indexed 5,018 items in the search database.'
}
}
}
}))

expect(formatted).toContain('Claude Code:')
expect(formatted).toContain('<text>Indexed 5,018 items in the search database.</text>')
})

it('ignores codex ready and tool-call payloads', () => {
expect(formatMessage(msg({
id: '1',
seq: 1,
content: {
role: 'agent',
content: {
type: 'codex',
data: { type: 'ready' }
}
}
}))).toBeNull()
})

it('does not treat session status events as speakable assistant text', () => {
expect(formatMessage(msg({
id: '1',
seq: 1,
content: {
role: 'agent',
content: {
id: 'some-uuid',
type: 'event',
data: { type: 'message', message: 'Aborting task.' }
}
}
}))).toBeNull()
})

it('preserves tool-call context for mixed text+tool_use content array', () => {
const formatted = formatMessage(msg({
id: '1',
seq: 1,
content: {
role: 'assistant',
content: [
{ type: 'text', text: 'Here is the result.' },
{ type: 'tool_use', name: 'Bash', input: { command: 'ls' } }
]
}
}))

expect(formatted).toContain('Here is the result.')
expect(formatted).toContain('Claude Code is using Bash')
})
})

describe('formatNewMessages', () => {
it('includes codex assistant replies in contextual updates', () => {
const update = formatNewMessages('session-1', [
msg({
id: '1',
seq: 1,
content: {
role: 'agent',
content: {
type: 'codex',
data: {
type: 'message',
message: 'Local database file size is 2.43 GiB.'
}
}
}
})
])

expect(update).toContain('New messages in session: session-1')
expect(update).toContain('Local database file size is 2.43 GiB.')
})
})
102 changes: 93 additions & 9 deletions web/src/realtime/hooks/contextFormatters.ts
Original file line number Diff line number Diff line change
Expand Up @@ -92,21 +92,26 @@ export function formatPermissionRequest(
* Format a single message for voice context
*/
export function formatMessage(message: DecryptedMessage): string | null {
const lines: string[] = []
const { role, content: wrappedContent } = unwrapRoleWrappedContent(message)
const { roleOverride, content } = unwrapOutputContent(wrappedContent)
const normalizedRole = roleOverride ?? role

if (isNonSpeakableAgentPayload(wrappedContent) || isNonSpeakableAgentPayload(content)) {
return null
}

const speakable = !isContentArray(content) ? extractSpeakableFromContent(content) : null
if (speakable) {
const roleForFormat = normalizedRole === 'user' ? 'user' : 'assistant'
return formatPlainText(roleForFormat, speakable)
}

if (!isContentArray(content)) {
if (typeof content === 'string') {
return formatPlainText(normalizedRole, content)
}
if (isObject(content) && content.type === 'text' && typeof content.text === 'string') {
return formatPlainText(normalizedRole, content.text)
}
return null
}

const lines: string[] = []

// Determine message type by checking for tool_use (assistant) vs user content
const hasToolUse = content.some(item => item.type === 'tool_use')
const isAssistant = normalizedRole === 'assistant'
Expand Down Expand Up @@ -134,6 +139,81 @@ export function formatMessage(message: DecryptedMessage): string | null {
return lines.join('\n\n')
}

function extractSpeakableFromContent(content: unknown): string | null {
if (typeof content === 'string' && content.trim()) {
return content.trim()
}

if (isObject(content) && content.type === 'text' && typeof content.text === 'string' && content.text.trim()) {
return content.text.trim()
}

// Codex / stream-json agent messages: { type: 'codex', data: { type: 'message', message: '...' } }
if (isObject(content) && content.type === 'codex' && isObject(content.data)) {
const data = content.data
if (data.type === 'message' && typeof data.message === 'string' && data.message.trim()) {
return data.message.trim()
}
}

if (!isContentArray(content)) {
return null
}

const textParts = content
.filter((item) => item.type === 'text' && item.text)
.map((item) => item.text!.trim())
.filter(Boolean)

if (textParts.length > 0) {
return textParts.join('\n\n')
}

return null
}

function isNonSpeakableAgentPayload(content: unknown): boolean {
if (!isObject(content) || typeof content.type !== 'string') {
return false
}

if (content.type === 'codex' && isObject(content.data)) {
const eventType = content.data.type
return eventType === 'ready'
|| eventType === 'tool-call'
|| eventType === 'tool-call-result'
|| eventType === 'event'
}

return false
}

export function extractLastAssistantSpeakable(messages: DecryptedMessage[]): string | null {
const sorted = [...messages].sort((a, b) => (a.seq ?? 0) - (b.seq ?? 0))

for (let i = sorted.length - 1; i >= 0; i -= 1) {
const message = sorted[i]
const { role, content: wrappedContent } = unwrapRoleWrappedContent(message)
const { roleOverride, content } = unwrapOutputContent(wrappedContent)
const normalizedRole = roleOverride ?? role

if (normalizedRole === 'user') {
continue
}

if (isNonSpeakableAgentPayload(wrappedContent) || isNonSpeakableAgentPayload(content)) {
continue
}

const speakable = extractSpeakableFromContent(content)
if (speakable) {
return speakable
}
}

return null
}

export function formatNewSingleMessage(sessionId: string, message: DecryptedMessage): string | null {
const formatted = formatMessage(message)
if (!formatted) {
Expand Down Expand Up @@ -199,6 +279,10 @@ export function formatSessionFocus(sessionId: string, _metadata?: SessionMetadat
return `Session became focused: ${sessionId}`
}

export function formatReadyEvent(sessionId: string): string {
return `Claude Code done working in session: ${sessionId}. The previous message(s) are the summary of the work done. Report this to the human immediately.`
export function formatReadyEvent(sessionId: string, lastAssistantText?: string | null): string {
const trimmed = lastAssistantText?.trim()
if (trimmed) {
return `The coding agent finished working in session: ${sessionId}. Summarize this for the human immediately:\n<text>${trimmed}</text>`
}
return `The coding agent finished working in session: ${sessionId}. Use the latest agent message already present in context and summarize it for the human immediately.`
}
8 changes: 6 additions & 2 deletions web/src/realtime/hooks/voiceHooks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ import {
formatSessionFocus,
formatSessionFull,
formatSessionOffline,
formatSessionOnline
formatSessionOnline,
extractLastAssistantSpeakable
} from './contextFormatters'
import { VOICE_CONFIG } from '../voiceConfig'
import type { DecryptedMessage, Session } from '@/types/api'
Expand Down Expand Up @@ -68,6 +69,7 @@ function reportSession(sessionId: string) {
reportContextualUpdate(contextUpdate)
}


export const voiceHooks = {
/**
* Called when a session comes online/connects
Expand Down Expand Up @@ -147,7 +149,9 @@ export const voiceHooks = {
if (VOICE_CONFIG.DISABLE_READY_EVENTS) return

reportSession(sessionId)
reportTextUpdate(formatReadyEvent(sessionId))
const messages = messagesGetter?.(sessionId) ?? []
const lastAssistantText = extractLastAssistantSpeakable(messages)
reportTextUpdate(formatReadyEvent(sessionId, lastAssistantText))
},

/**
Expand Down
3 changes: 2 additions & 1 deletion web/src/realtime/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ export {
formatSessionOffline,
formatSessionFocus,
formatPermissionRequest,
formatReadyEvent
formatReadyEvent,
extractLastAssistantSpeakable
} from './hooks/contextFormatters'

// Config
Expand Down
Loading