YourGPT · ankushchhabradelta4infotech-ai · May 12, 2026 · May 12, 2026 · May 12, 2026 · May 12, 2026
diff --git a/packages/llm-sdk/src/adapters/anthropic.ts b/packages/llm-sdk/src/adapters/anthropic.ts
@@ -10,6 +10,8 @@ import type {
   LLMAdapter,
   ChatCompletionRequest,
   CompletionResult,
+  ResponseRequest,
+  ResponseResult,
 } from "./base";
 import {
   formatMessagesForAnthropic,
@@ -803,6 +805,88 @@ export class AnthropicAdapter implements LLMAdapter {
       };
     }
   }
+
+  /**
+   * Responses API — MCP tools + reasoning + structured output via Anthropic Messages API
+   * Uses beta headers: mcp-client-2025-11-20, interleaved-thinking-2025-05-14
+   */
+  async respond(request: ResponseRequest): Promise<ResponseResult> {
+    const client = await this.getClient();
+
+    const mcpServers = (request.mcpServers ?? []).map((mcp) => ({
+      type: "url",
+      url: mcp.server_url,
+      name: mcp.server_label,
+      ...(mcp.headers
+        ? { authorization_token: mcp.headers["Authorization"] }
+        : {}),
+      ...(mcp.allowed_tools
+        ? { tool_configuration: { allowed_tools: mcp.allowed_tools } }
+        : {}),
+    }));
+
+    const thinkingBudget =
+      request.reasoningEffort === "high"
+        ? 16000
+        : request.reasoningEffort === "medium"
+          ? 8000
+          : 4000;
+
+    const betas: string[] = [];
+    if (mcpServers.length) betas.push("mcp-client-2025-11-20");
+    // interleaved-thinking-2025-05-14 is deprecated on Claude 4.x — adaptive thinking handles it automatically
+
+    const payload: Record<string, unknown> = {
+      model: this.model,
+      max_tokens: request.maxTokens ?? 8192,
+      messages: [{ role: "user", content: request.prompt }],
+      ...(mcpServers.length ? { mcp_servers: mcpServers } : {}),
+      ...(request.reasoningEffort
+        ? { thinking: { type: "enabled", budget_tokens: thinkingBudget } }
+        : {}),
+      ...(request.outputSchema
+        ? {
+            output_config: {
+              format: {
+                type: "json_schema",
+                json_schema: {
+                  name: request.outputSchema.name,
+                  schema: request.outputSchema.schema,
+                },
+              },
+            },
+          }
+        : {}),
+    };
+
+    const response = await client.beta.messages.create(payload as any, {
+      headers: betas.length ? { "anthropic-beta": betas.join(",") } : {},
+    });
+
+    let text = "";
+    let inputTokens = 0;
+    let outputTokens = 0;
+
+    for (const block of response.content ?? []) {
+      if ((block as any).type === "text") {
+        text += (block as any).text;
+      }
+    }
+
+    if (response.usage) {
+      inputTokens = (response.usage as any).input_tokens ?? 0;
+      outputTokens = (response.usage as any).output_tokens ?? 0;
+    }
+
+    return {
+      text,
+      usage: {
+        prompt_tokens: inputTokens,
+        completion_tokens: outputTokens,
+        total_tokens: inputTokens + outputTokens,
+      },
+    };
+  }
 }
 
 /**

diff --git a/packages/llm-sdk/src/adapters/base.ts b/packages/llm-sdk/src/adapters/base.ts
@@ -76,6 +76,51 @@ export interface CompletionResult {
   rawResponse: Record<string, unknown>;
 }
 
+/**
+ * MCP server configuration for the Responses API
+ */
+export interface McpServerConfig {
+  type: "mcp";
+  server_label: string;
+  server_url: string;
+  headers?: Record<string, string>;
+  allowed_tools?: string[];
+  require_approval?: "never" | "always";
+}
+
+/**
+ * Request for the Responses API (OpenAI Responses / Anthropic Messages with MCP)
+ */
+export interface ResponseRequest {
+  /** Prompt text */
+  prompt: string;
+  /** MCP server(s) to attach */
+  mcpServers?: McpServerConfig[];
+  /** Reasoning effort: low | medium | high */
+  reasoningEffort?: "low" | "medium" | "high";
+  /** Zod/JSON schema for structured output */
+  outputSchema?: {
+    name: string;
+    schema: Record<string, unknown>;
+  };
+  /** Max tokens for the response */
+  maxTokens?: number;
+}
+
+/**
+ * Normalized result from the Responses API
+ */
+export interface ResponseResult {
+  /** Generated text */
+  text: string;
+  /** Token usage */
+  usage?: {
+    prompt_tokens: number;
+    completion_tokens: number;
+    total_tokens: number;
+  };
+}
+
 /**
  * Base LLM adapter interface
  */
@@ -95,6 +140,12 @@ export interface LLMAdapter {
    * Non-streaming chat completion (for debugging/comparison)
    */
   complete?(request: ChatCompletionRequest): Promise<CompletionResult>;
+
+  /**
+   * Responses API — MCP tools + reasoning + structured output.
+   * OpenAI: uses /v1/responses. Anthropic: uses /v1/messages with beta headers.
+   */
+  respond?(request: ResponseRequest): Promise<ResponseResult>;
 }
 
 /**
@@ -754,11 +805,23 @@ export function messageToOpenAIContent(
   const attachments = message.metadata?.attachments;
   const content = message.content ?? "";
 
-  // If no image attachments, return simple string
-  if (!hasImageAttachments(message)) {
+  // Check for audio parts in content array
+  const hasAudio =
+    Array.isArray(message.content) &&
+    (message.content as Array<{ type: string }>).some(
+      (p) => p.type === "input_audio",
+    );
+
+  // If no image attachments and no audio parts, return simple string
+  if (!hasImageAttachments(message) && !hasAudio) {
     return content;
   }
 
+  // If content is already an array of parts (e.g. audio + text), pass through directly
+  if (Array.isArray(message.content)) {
+    return message.content as unknown as OpenAIContentBlock[];
+  }
+
   // Build content blocks array
   const blocks: OpenAIContentBlock[] = [];
 

diff --git a/packages/llm-sdk/src/adapters/google.ts b/packages/llm-sdk/src/adapters/google.ts
@@ -174,8 +174,34 @@ function messageToGeminiContent(msg: Message): GeminiContent | null {
     return { role: "user", parts };
   }
 
-  // Add text content
-  if (msg.content) {
+  // Handle content as array of parts (e.g. input_audio + text from OpenAI format)
+  if (Array.isArray(msg.content)) {
+    for (const part of msg.content as Array<{
+      type: string;
+      text?: string;
+      input_audio?: { data: string; format: string };
+    }>) {
+      if (part.type === "text" && part.text) {
+        parts.push({ text: part.text });
+      } else if (part.type === "input_audio" && part.input_audio) {
+        const mimeMap: Record<string, string> = {
+          mp3: "audio/mp3",
+          wav: "audio/wav",
+          ogg: "audio/ogg",
+          webm: "audio/webm",
+          m4a: "audio/mp4",
+          flac: "audio/flac",
+        };
+        parts.push({
+          inlineData: {
+            mimeType: mimeMap[part.input_audio.format] || "audio/mp3",
+            data: part.input_audio.data,
+          },
+        });
+      }
+    }
+  } else if (msg.content) {
+    // Add text content
     parts.push({ text: msg.content });
   }
 

diff --git a/packages/llm-sdk/src/adapters/openai.ts b/packages/llm-sdk/src/adapters/openai.ts
@@ -10,6 +10,8 @@ import type {
   LLMAdapter,
   ChatCompletionRequest,
   CompletionResult,
+  ResponseRequest,
+  ResponseResult,
 } from "./base";
 import {
   buildOpenAITokenParams,
@@ -716,6 +718,73 @@ export class OpenAIAdapter implements LLMAdapter {
       rawResponse: response as Record<string, unknown>,
     };
   }
+
+  /**
+   * Responses API — MCP tools + reasoning + structured output via OpenAI /v1/responses
+   */
+  async respond(request: ResponseRequest): Promise<ResponseResult> {
+    const client = await this.getClient();
+
+    const tools: Array<Record<string, unknown>> = (
+      request.mcpServers ?? []
+    ).map((mcp) => ({
+      type: "mcp",
+      server_label: mcp.server_label,
+      server_url: mcp.server_url,
+      ...(mcp.headers ? { headers: mcp.headers } : {}),
+      ...(mcp.allowed_tools ? { allowed_tools: mcp.allowed_tools } : {}),
+      require_approval: mcp.require_approval ?? "never",
+    }));
+
+    const payload: Record<string, unknown> = {
+      model: this.model,
+      input: [
+        {
+          role: "developer",
+          content: [{ type: "input_text", text: request.prompt }],
+        },
+      ],
+      ...(tools.length ? { tools } : {}),
+      ...(request.reasoningEffort
+        ? { reasoning: { effort: request.reasoningEffort, summary: "auto" } }
+        : {}),
+      ...(request.outputSchema
+        ? {
+            text: {
+              format: {
+                type: "json_schema",
+                name: request.outputSchema.name,
+                schema: request.outputSchema.schema,
+                strict: true,
+              },
+            },
+          }
+        : {}),
+      store: false,
+    };
+
+    const response = await client.responses.create(payload);
+
+    const output: Array<{
+      type: string;
+      content?: Array<{ type: string; text?: string }>;
+    }> = response.output ?? [];
+    const messageItem = output.find((item) => item.type === "message");
+    const text =
+      messageItem?.content?.find((c) => c.type === "output_text")?.text ?? "";
+
+    const usage = response.usage
+      ? {
+          prompt_tokens: response.usage.input_tokens ?? 0,
+          completion_tokens: response.usage.output_tokens ?? 0,
+          total_tokens:
+            (response.usage.input_tokens ?? 0) +
+            (response.usage.output_tokens ?? 0),
+        }
+      : undefined;
+
+    return { text, usage };
+  }
 }
 
 /**

diff --git a/packages/llm-sdk/src/core/types.ts b/packages/llm-sdk/src/core/types.ts
@@ -111,7 +111,7 @@ export interface ToolMessage {
 /**
  * Content parts for multimodal user messages
  */
-export type UserContentPart = TextPart | ImagePart | FilePart;
+export type UserContentPart = TextPart | ImagePart | FilePart | AudioPart;
 
 export interface TextPart {
   type: "text";
@@ -134,6 +134,16 @@ export interface FilePart {
   mimeType: string;
 }
 
+export interface AudioPart {
+  type: "input_audio";
+  input_audio: {
+    /** Base64-encoded audio data */
+    data: string;
+    /** Audio format (e.g., 'mp3', 'wav', 'ogg', 'webm') */
+    format: string;
+  };
+}
+
 // ============================================
 // Tool Types
 // ============================================

diff --git a/packages/llm-sdk/src/index.ts b/packages/llm-sdk/src/index.ts
@@ -51,6 +51,9 @@ export type {
   ImagePart,
   FilePart,
 
+  // Content Parts
+  AudioPart,
+
   // Tools
   Tool,
   ToolContext,
@@ -159,6 +162,9 @@ export type {
   LLMAdapter,
   ChatCompletionRequest,
   AdapterFactory,
+  ResponseRequest,
+  ResponseResult,
+  McpServerConfig,
 } from "./adapters/base";
 
 // Provider types (no implementations - use subpath imports)