diff --git a/apps/docs/ai-sdk/overview.mdx b/apps/docs/ai-sdk/overview.mdx
index 0c9a48f49..535f0cef2 100644
--- a/apps/docs/ai-sdk/overview.mdx
+++ b/apps/docs/ai-sdk/overview.mdx
@@ -40,6 +40,7 @@ const result = await generateText({
   
   ```typescript
   const modelWithMemory = withSupermemory(openai("gpt-5"), "user-123", {
+    conversationId: "conv-1",
     addMemory: "always"
   })
   ```
diff --git a/apps/docs/ai-sdk/user-profiles.mdx b/apps/docs/ai-sdk/user-profiles.mdx
index df8c5430f..09977272d 100644
--- a/apps/docs/ai-sdk/user-profiles.mdx
+++ b/apps/docs/ai-sdk/user-profiles.mdx
@@ -50,6 +50,7 @@ All of this happens transparently - you write code as if using a normal model, b
   
   ```typescript
   const model = withSupermemory(openai("gpt-5"), "user-123", {
+    conversationId: "conv-1",
     addMemory: "always"
   })
   ```
@@ -117,6 +118,32 @@ const result = await generateText({
 // Uses both profile (user's expertise) AND search (previous debugging sessions)
 ```
 
+### Hybrid Search Mode 
+
+Use `searchMode: "hybrid"` to search both memories AND document chunks. 
+
+```typescript
+const model = withSupermemory(openai("gpt-4"), "user-123", { 
+  mode: "full",
+  searchMode: "hybrid",  // Search memories + document chunks
+  searchLimit: 15        // Max results (default: 10)
+})
+
+const result = await generateText({
+  model,
+  messages: [{ 
+    role: "user", 
+    content: "What's in my documents about quarterly goals?" 
+  }]
+})
+// Searches both extracted memories AND raw document content
+```
+
+**Search Mode Options:**
+- `"memories"` (default) - Search only memory entries
+- `"hybrid"` - Search memories + document chunks
+- `"documents"` - Search only document chunks
+
 ## Custom Prompt Templates
 
 Customize how memories are formatted and injected into the system prompt using the `promptTemplate` option. This is useful for:
diff --git a/apps/docs/integrations/ai-sdk.mdx b/apps/docs/integrations/ai-sdk.mdx
index a9e2d6f8a..c87cb0a7b 100644
--- a/apps/docs/integrations/ai-sdk.mdx
+++ b/apps/docs/integrations/ai-sdk.mdx
@@ -48,6 +48,7 @@ const result = await generateText({
 
   ```typescript
   const modelWithMemory = withSupermemory(openai("gpt-5"), "user-123", {
+    conversationId: "conv-1",
     addMemory: "always"
   })
   ```
diff --git a/packages/tools/README.md b/packages/tools/README.md
index 2d03411d3..391981c0a 100644
--- a/packages/tools/README.md
+++ b/packages/tools/README.md
@@ -184,6 +184,29 @@ const result = await generateText({
 })
 ```
 
+**Hybrid Search Mode (RAG)** - Search both memories AND document chunks:
+```typescript
+import { generateText } from "ai"
+import { withSupermemory } from "@supermemory/tools/ai-sdk"
+import { openai } from "@ai-sdk/openai"
+
+const modelWithHybrid = withSupermemory(openai("gpt-4"), "user-123", { 
+  mode: "full",
+  searchMode: "hybrid",  // Search memories + document chunks
+  searchLimit: 15        // Max results (default: 10)
+})
+
+const result = await generateText({
+  model: modelWithHybrid,
+  messages: [{ role: "user", content: "What's in my documents about quarterly goals?" }],
+})
+```
+
+Search mode options:
+- `"memories"` (default) - Search only memory entries
+- `"hybrid"` - Search memories + document chunks (recommended for RAG)
+- `"documents"` - Search only document chunks
+
 #### Automatic Memory Capture
 
 The middleware can automatically save user messages as memories:
@@ -653,6 +676,8 @@ interface WithSupermemoryOptions {
   conversationId?: string
   verbose?: boolean
   mode?: "profile" | "query" | "full"
+  searchMode?: "memories" | "hybrid" | "documents"
+  searchLimit?: number
   addMemory?: "always" | "never"
   /** Optional Supermemory API key. Use this in browser environments. */
   apiKey?: string
@@ -662,6 +687,8 @@ interface WithSupermemoryOptions {
 - **conversationId**: Optional conversation ID to group messages into a single document for contextual memory generation
 - **verbose**: Enable detailed logging of memory search and injection process (default: false)
 - **mode**: Memory search mode - "profile" (default), "query", or "full"
+- **searchMode**: Search mode - "memories" (default), "hybrid", or "documents". Use "hybrid" for RAG applications
+- **searchLimit**: Maximum number of search results when using hybrid/documents mode (default: 10)
 - **addMemory**: Automatic memory storage mode - "always" or "never" (default: "never")
 
 ## Available Tools
diff --git a/packages/tools/package.json b/packages/tools/package.json
index 8d192aba6..1575c7f4b 100644
--- a/packages/tools/package.json
+++ b/packages/tools/package.json
@@ -1,7 +1,7 @@
 {
   "name": "@supermemory/tools",
   "type": "module",
-  "version": "1.4.01",
+  "version": "1.5.0",
   "description": "Memory tools for AI SDK and OpenAI function calling with supermemory",
   "scripts": {
     "build": "tsdown",
diff --git a/packages/tools/src/shared/index.ts b/packages/tools/src/shared/index.ts
index 5a6e0f7ba..f866f3cdd 100644
--- a/packages/tools/src/shared/index.ts
+++ b/packages/tools/src/shared/index.ts
@@ -3,6 +3,7 @@ export type {
 	MemoryPromptData,
 	PromptTemplate,
 	MemoryMode,
+	SearchMode,
 	AddMemoryMode,
 	Logger,
 	ProfileStructure,
@@ -34,9 +35,12 @@ export {
 // Memory client
 export {
 	supermemoryProfileSearch,
+	supermemoryHybridSearch,
 	buildMemoriesText,
 	extractQueryText,
 	getLastUserMessageText,
 	type BuildMemoriesTextOptions,
 	type GenericMessage,
+	type SearchResultItem,
+	type SearchResponse,
 } from "./memory-client"
diff --git a/packages/tools/src/shared/memory-client.ts b/packages/tools/src/shared/memory-client.ts
index 58754c895..b1fdf3016 100644
--- a/packages/tools/src/shared/memory-client.ts
+++ b/packages/tools/src/shared/memory-client.ts
@@ -5,12 +5,36 @@ import type {
 	MemoryPromptData,
 	ProfileStructure,
 	PromptTemplate,
+	SearchMode,
 } from "./types"
 import {
 	convertProfileToMarkdown,
 	defaultPromptTemplate,
 } from "./prompt-builder"
 
+/**
+ * Search result item from the Supermemory search API.
+ * Contains either a memory field (for memory results) or a chunk field (for document chunks).
+ */
+export interface SearchResultItem {
+	id: string
+	similarity: number
+	memory?: string
+	chunk?: string
+	title?: string
+	content?: string
+	metadata?: Record<string, unknown>
+}
+
+/**
+ * Response structure from the Supermemory search API.
+ */
+export interface SearchResponse {
+	results: SearchResultItem[]
+	total: number
+	timing: number
+}
+
 /**
  * Fetches profile and search results from the Supermemory API.
  *
@@ -61,6 +85,59 @@ export const supermemoryProfileSearch = async (
 	}
 }
 
+/**
+ * Performs a hybrid search using the Supermemory search API.
+ * Hybrid search returns both memories AND document chunks.
+ *
+ * @param containerTag - The container tag/user ID for scoping memories
+ * @param queryText - The search query text
+ * @param searchMode - The search mode: "memories", "hybrid", or "documents"
+ * @param baseUrl - The API base URL
+ * @param apiKey - The API key for authentication
+ * @param limit - Maximum number of results to return (default: 10)
+ * @returns The search response with results containing either memory or chunk fields
+ */
+export const supermemoryHybridSearch = async (
+	containerTag: string,
+	queryText: string,
+	searchMode: SearchMode,
+	baseUrl: string,
+	apiKey: string,
+	limit = 10,
+): Promise<SearchResponse> => {
+	const payload = JSON.stringify({
+		q: queryText,
+		containerTag: containerTag,
+		searchMode: searchMode,
+		limit: limit,
+	})
+
+	try {
+		const response = await fetch(`${baseUrl}/v4/search`, {
+			method: "POST",
+			headers: {
+				"Content-Type": "application/json",
+				Authorization: `Bearer ${apiKey}`,
+			},
+			body: payload,
+		})
+
+		if (!response.ok) {
+			const errorText = await response.text().catch(() => "Unknown error")
+			throw new Error(
+				`Supermemory search failed: ${response.status} ${response.statusText}. ${errorText}`,
+			)
+		}
+
+		return await response.json()
+	} catch (error) {
+		if (error instanceof Error) {
+			throw error
+		}
+		throw new Error(`Supermemory API request failed: ${error}`)
+	}
+}
+
 /**
  * Options for building memories text.
  */
@@ -72,12 +149,48 @@ export interface BuildMemoriesTextOptions {
 	apiKey: string
 	logger: Logger
 	promptTemplate?: PromptTemplate
+	/**
+	 * Search mode for memory retrieval:
+	 * - "memories": Search only memory entries (default)
+	 * - "hybrid": Search both memories AND document chunks (recommended for RAG)
+	 * - "documents": Search only document chunks
+	 */
+	searchMode?: SearchMode
+	/** Maximum number of search results to return (default: 10) */
+	searchLimit?: number
+}
+
+/**
+ * Formats search results (memories and/or chunks) into a readable string.
+ */
+const formatSearchResults = (
+	results: SearchResultItem[],
+	includeChunks: boolean,
+): string => {
+	if (results.length === 0) return ""
+
+	const formattedResults = results
+		.map((result) => {
+			if (result.memory) {
+				return `- ${result.memory}`
+			}
+			if (result.chunk && includeChunks) {
+				return `- [Document] ${result.chunk}`
+			}
+			return null
+		})
+		.filter(Boolean)
+
+	return formattedResults.join("\n")
 }
 
 /**
  * Fetches memories from the API, deduplicates them, and formats them into
  * the final string to be injected into the system prompt.
  *
+ * When searchMode is "hybrid" or "documents", uses the search API to retrieve
+ * both memories and document chunks. Otherwise, uses the profile API.
+ *
  * @param options - Configuration for building memories text
  * @returns The final formatted memories string ready for injection
  */
@@ -92,69 +205,144 @@ export const buildMemoriesText = async (
 		apiKey,
 		logger,
 		promptTemplate = defaultPromptTemplate,
+		searchMode = "memories",
+		searchLimit = 10,
 	} = options
 
-	const memoriesResponse = await supermemoryProfileSearch(
-		containerTag,
-		queryText,
-		baseUrl,
-		apiKey,
-	)
+	const useHybridSearch = searchMode === "hybrid" || searchMode === "documents"
 
-	const memoryCountStatic = memoriesResponse.profile.static?.length || 0
-	const memoryCountDynamic = memoriesResponse.profile.dynamic?.length || 0
+	let userMemories = ""
+	let generalSearchMemories = ""
+	let rawSearchResults: Array<{
+		memory: string
+		metadata?: Record<string, unknown>
+	}> = []
 
-	logger.info("Memory search completed", {
-		containerTag,
-		memoryCountStatic,
-		memoryCountDynamic,
-		queryText:
-			queryText.substring(0, 100) + (queryText.length > 100 ? "..." : ""),
-		mode,
-	})
+	if (useHybridSearch && queryText) {
+		logger.info("Using hybrid search mode", {
+			containerTag,
+			searchMode,
+			queryText:
+				queryText.substring(0, 100) + (queryText.length > 100 ? "..." : ""),
+		})
 
-	const deduplicated = deduplicateMemories({
-		static: memoriesResponse.profile.static,
-		dynamic: memoriesResponse.profile.dynamic,
-		searchResults: memoriesResponse.searchResults?.results,
-	})
+		const searchResponse = await supermemoryHybridSearch(
+			containerTag,
+			queryText,
+			searchMode,
+			baseUrl,
+			apiKey,
+			searchLimit,
+		)
 
-	logger.debug("Memory deduplication completed", {
-		static: {
-			original: memoryCountStatic,
-			deduplicated: deduplicated.static.length,
-		},
-		dynamic: {
-			original: memoryCountDynamic,
-			deduplicated: deduplicated.dynamic.length,
-		},
-		searchResults: {
-			original: memoriesResponse.searchResults?.results?.length,
-			deduplicated: deduplicated.searchResults?.length,
-		},
-	})
+		logger.info("Hybrid search completed", {
+			containerTag,
+			resultCount: searchResponse.results.length,
+			timing: searchResponse.timing,
+			searchMode,
+		})
+
+		const includeChunks = searchMode === "hybrid" || searchMode === "documents"
+		generalSearchMemories = formatSearchResults(
+			searchResponse.results,
+			includeChunks,
+		)
 
-	const userMemories =
-		mode !== "query"
-			? convertProfileToMarkdown({
-					profile: {
-						static: deduplicated.static,
-						dynamic: deduplicated.dynamic,
-					},
-					searchResults: { results: [] },
-				})
-			: ""
-	const generalSearchMemories =
-		mode !== "profile"
-			? `Search results for user's recent message: \n${deduplicated.searchResults
-					.map((memory) => `- ${memory}`)
-					.join("\n")}`
-			: ""
+		if (generalSearchMemories) {
+			generalSearchMemories = `Search results for user's recent message:\n${generalSearchMemories}`
+		}
+
+		rawSearchResults = searchResponse.results.map((r) => ({
+			memory: r.memory || r.chunk || "",
+			metadata: r.metadata,
+		}))
+
+		if (mode !== "query") {
+			const profileResponse = await supermemoryProfileSearch(
+				containerTag,
+				"",
+				baseUrl,
+				apiKey,
+			)
+
+			const deduplicated = deduplicateMemories({
+				static: profileResponse.profile.static,
+				dynamic: profileResponse.profile.dynamic,
+				searchResults: [],
+			})
+
+			userMemories = convertProfileToMarkdown({
+				profile: {
+					static: deduplicated.static,
+					dynamic: deduplicated.dynamic,
+				},
+				searchResults: { results: [] },
+			})
+		}
+	} else {
+		const memoriesResponse = await supermemoryProfileSearch(
+			containerTag,
+			queryText,
+			baseUrl,
+			apiKey,
+		)
+
+		const memoryCountStatic = memoriesResponse.profile.static?.length || 0
+		const memoryCountDynamic = memoriesResponse.profile.dynamic?.length || 0
+
+		logger.info("Memory search completed", {
+			containerTag,
+			memoryCountStatic,
+			memoryCountDynamic,
+			queryText:
+				queryText.substring(0, 100) + (queryText.length > 100 ? "..." : ""),
+			mode,
+		})
+
+		const deduplicated = deduplicateMemories({
+			static: memoriesResponse.profile.static,
+			dynamic: memoriesResponse.profile.dynamic,
+			searchResults: memoriesResponse.searchResults?.results,
+		})
+
+		logger.debug("Memory deduplication completed", {
+			static: {
+				original: memoryCountStatic,
+				deduplicated: deduplicated.static.length,
+			},
+			dynamic: {
+				original: memoryCountDynamic,
+				deduplicated: deduplicated.dynamic.length,
+			},
+			searchResults: {
+				original: memoriesResponse.searchResults?.results?.length,
+				deduplicated: deduplicated.searchResults?.length,
+			},
+		})
+
+		userMemories =
+			mode !== "query"
+				? convertProfileToMarkdown({
+						profile: {
+							static: deduplicated.static,
+							dynamic: deduplicated.dynamic,
+						},
+						searchResults: { results: [] },
+					})
+				: ""
+		generalSearchMemories =
+			mode !== "profile"
+				? `Search results for user's recent message: \n${deduplicated.searchResults
+						.map((memory) => `- ${memory}`)
+						.join("\n")}`
+				: ""
+		rawSearchResults = memoriesResponse.searchResults?.results ?? []
+	}
 
 	const promptData: MemoryPromptData = {
 		userMemories,
 		generalSearchMemories,
-		searchResults: memoriesResponse.searchResults?.results ?? [],
+		searchResults: rawSearchResults,
 	}
 
 	const memories = promptTemplate(promptData)
diff --git a/packages/tools/src/shared/types.ts b/packages/tools/src/shared/types.ts
index 421785f52..70427eae6 100644
--- a/packages/tools/src/shared/types.ts
+++ b/packages/tools/src/shared/types.ts
@@ -47,6 +47,14 @@ export type PromptTemplate = (data: MemoryPromptData) => string
  */
 export type MemoryMode = "profile" | "query" | "full"
 
+/**
+ * Search mode for memory retrieval:
+ * - "memories": Search only memory entries (default)
+ * - "hybrid": Search both memories AND document chunks (recommended for RAG)
+ * - "documents": Search only document chunks
+ */
+export type SearchMode = "memories" | "hybrid" | "documents"
+
 /**
  * Memory persistence mode:
  * - "always": Automatically save conversations as memories
@@ -117,6 +125,15 @@ export interface SupermemoryBaseOptions {
 	threadId?: string
 	/** Memory retrieval mode */
 	mode?: MemoryMode
+	/**
+	 * Search mode for memory retrieval:
+	 * - "memories": Search only memory entries (default)
+	 * - "hybrid": Search both memories AND document chunks (recommended for RAG)
+	 * - "documents": Search only document chunks
+	 */
+	searchMode?: SearchMode
+	/** Maximum number of search results to return when using hybrid/documents mode (default: 10) */
+	searchLimit?: number
 	/** Memory persistence mode */
 	addMemory?: AddMemoryMode
 	/** Enable detailed logging of memory search and injection */
diff --git a/packages/tools/src/vercel/index.ts b/packages/tools/src/vercel/index.ts
index beeef093b..be74b7306 100644
--- a/packages/tools/src/vercel/index.ts
+++ b/packages/tools/src/vercel/index.ts
@@ -13,7 +13,7 @@ import {
 import type { PromptTemplate, MemoryPromptData } from "./memory-prompt"
 
 interface WrapVercelLanguageModelOptions {
-	/** Optional conversation ID to group messages for contextual memory generation */
+	/** Conversation ID to group messages into a single document (maps to customId in Supermemory). Required when addMemory is "always". */
 	conversationId?: string
 	/** Enable detailed logging of memory search and injection */
 	verbose?: boolean
@@ -24,10 +24,19 @@ interface WrapVercelLanguageModelOptions {
 	 * - "full": Combines both profile and query-based results
 	 */
 	mode?: "profile" | "query" | "full"
+	/**
+	 * Search mode for memory retrieval:
+	 * - "memories": Search only memory entries (default)
+	 * - "hybrid": Search both memories AND document chunks (recommended for RAG)
+	 * - "documents": Search only document chunks
+	 */
+	searchMode?: "memories" | "hybrid" | "documents"
+	/** Maximum number of search results to return when using hybrid/documents mode (default: 10) */
+	searchLimit?: number
 	/**
 	 * Memory persistence mode:
-	 * - "always": Automatically save conversations as memories
-	 * - "never": Only retrieve memories, don't store new ones
+	 * - "always": Automatically save conversations as memories (requires conversationId)
+	 * - "never": Only retrieve memories, don't store new ones (default)
 	 */
 	addMemory?: "always" | "never"
 	/** Supermemory API key (falls back to SUPERMEMORY_API_KEY env var) */
@@ -65,11 +74,13 @@ interface WrapVercelLanguageModelOptions {
  *
  * @param model - The language model to wrap with supermemory capabilities (V2 or V3)
  * @param containerTag - The container tag/identifier for memory search (e.g., user ID, project ID)
- * @param options - Optional configuration options for the middleware
- * @param options.conversationId - Optional conversation ID to group messages into a single document for contextual memory generation
+ * @param options - Configuration options for the middleware
+ * @param options.conversationId - Conversation ID to group messages into a single document (maps to customId in Supermemory)
  * @param options.verbose - Optional flag to enable detailed logging of memory search and injection process (default: false)
  * @param options.mode - Optional mode for memory search: "profile", "query", or "full" (default: "profile")
- * @param options.addMemory - Optional mode for memory search: "always", "never" (default: "never")
+ * @param options.searchMode - Optional search mode: "memories" (default), "hybrid" (memories + chunks), or "documents" (chunks only)
+ * @param options.searchLimit - Optional maximum number of search results when using hybrid/documents mode (default: 10)
+ * @param options.addMemory - Optional mode for memory persistence: "always" (requires conversationId), "never" (default)
  * @param options.apiKey - Optional Supermemory API key to use instead of the environment variable
  * @param options.baseUrl - Optional base URL for the Supermemory API (default: "https://api.supermemory.ai")
  *
@@ -80,15 +91,24 @@ interface WrapVercelLanguageModelOptions {
  * import { withSupermemory } from "@supermemory/tools/ai-sdk"
  * import { openai } from "@ai-sdk/openai"
  *
+ * // Basic usage with profile memories
  * const modelWithMemory = withSupermemory(openai("gpt-4"), "user-123", {
- *   conversationId: "conversation-456",
+ *   conversationId: "conv-456",
  *   mode: "full",
  *   addMemory: "always"
  * })
  *
+ * // RAG usage with hybrid search (memories + document chunks)
+ * const ragModel = withSupermemory(openai("gpt-4"), "user-123", {
+ *   conversationId: "conv-789",
+ *   mode: "full",
+ *   searchMode: "hybrid",  // Search both memories and document chunks
+ *   searchLimit: 15,
+ * })
+ *
  * const result = await generateText({
- *   model: modelWithMemory,
- *   messages: [{ role: "user", content: "What's my favorite programming language?" }]
+ *   model: ragModel,
+ *   messages: [{ role: "user", content: "What's in my documents about quarterly goals?" }]
  * })
  * ```
  *
@@ -108,12 +128,23 @@ const wrapVercelLanguageModel = <T extends LanguageModel>(
 		)
 	}
 
+	if (
+		(options?.addMemory ?? "never") === "always" &&
+		!options?.conversationId
+	) {
+		throw new Error(
+			'conversationId is required when addMemory is "always" — provide it via options.conversationId to group messages into a single document',
+		)
+	}
+
 	const ctx = createSupermemoryContext({
 		containerTag,
 		apiKey: providedApiKey,
 		conversationId: options?.conversationId,
 		verbose: options?.verbose ?? false,
 		mode: options?.mode ?? "profile",
+		searchMode: options?.searchMode ?? "memories",
+		searchLimit: options?.searchLimit ?? 10,
 		addMemory: options?.addMemory ?? "never",
 		baseUrl: options?.baseUrl,
 		promptTemplate: options?.promptTemplate,
@@ -130,7 +161,12 @@ const wrapVercelLanguageModel = <T extends LanguageModel>(
 				const result = await model.doGenerate(transformedParams as any)
 
 				const userMessage = getLastUserMessage(params)
-				if (ctx.addMemory === "always" && userMessage && userMessage.trim()) {
+				if (
+					ctx.addMemory === "always" &&
+					ctx.conversationId &&
+					userMessage &&
+					userMessage.trim()
+				) {
 					const assistantResponseText = extractAssistantResponseText(
 						result.content as unknown[],
 					)
@@ -180,6 +216,7 @@ const wrapVercelLanguageModel = <T extends LanguageModel>(
 						const userMessage = getLastUserMessage(params)
 						if (
 							ctx.addMemory === "always" &&
+							ctx.conversationId &&
 							userMessage &&
 							userMessage.trim()
 						) {
diff --git a/packages/tools/src/vercel/middleware.ts b/packages/tools/src/vercel/middleware.ts
index 8c31b86fd..66ce007f2 100644
--- a/packages/tools/src/vercel/middleware.ts
+++ b/packages/tools/src/vercel/middleware.ts
@@ -11,6 +11,7 @@ import {
 	type Logger,
 	type PromptTemplate,
 	type MemoryMode,
+	type SearchMode,
 } from "../shared"
 import {
 	type LanguageModelCallOptions,
@@ -19,7 +20,7 @@ import {
 } from "./util"
 import { extractQueryText, injectMemoriesIntoParams } from "./memory-prompt"
 
-const getConversationContent = (params: LanguageModelCallOptions) => {
+const _getConversationContent = (params: LanguageModelCallOptions) => {
 	return params.prompt
 		.filter((msg) => msg.role !== "system" && msg.role !== "tool")
 		.map((msg) => {
@@ -99,58 +100,34 @@ const convertToConversationMessages = (
 }
 
 export const saveMemoryAfterResponse = async (
-	client: Supermemory,
+	_client: Supermemory,
 	containerTag: string,
-	conversationId: string | undefined,
+	conversationId: string,
 	assistantResponseText: string,
 	params: LanguageModelCallOptions,
 	logger: Logger,
 	apiKey: string,
 	baseUrl: string,
 ): Promise<void> => {
-	const customId = conversationId ? `conversation:${conversationId}` : undefined
-
 	try {
-		if (customId && conversationId) {
-			const conversationMessages = convertToConversationMessages(
-				params,
-				assistantResponseText,
-			)
-
-			const response = await addConversation({
-				conversationId,
-				messages: conversationMessages,
-				containerTags: [containerTag],
-				apiKey,
-				baseUrl,
-			})
-
-			logger.info("Conversation saved successfully via /v4/conversations", {
-				containerTag,
-				conversationId,
-				messageCount: conversationMessages.length,
-				responseId: response.id,
-			})
-			return
-		}
-
-		const userMessage = getLastUserMessage(params)
-		const content = conversationId
-			? `${getConversationContent(params)} \n\n Assistant: ${assistantResponseText}`
-			: `User: ${userMessage} \n\n Assistant: ${assistantResponseText}`
-
-		const response = await client.add({
-			content,
+		const conversationMessages = convertToConversationMessages(
+			params,
+			assistantResponseText,
+		)
+
+		const response = await addConversation({
+			conversationId,
+			messages: conversationMessages,
 			containerTags: [containerTag],
-			customId,
+			apiKey,
+			baseUrl,
 		})
 
-		logger.info("Memory saved successfully via /v3/documents", {
+		logger.info("Conversation saved successfully via /v4/conversations", {
 			containerTag,
-			customId,
-			content,
-			contentLength: content.length,
-			memoryId: response.id,
+			conversationId,
+			messageCount: conversationMessages.length,
+			responseId: response.id,
 		})
 	} catch (error) {
 		logger.error("Error saving memory", {
@@ -167,7 +144,7 @@ interface SupermemoryMiddlewareOptions {
 	containerTag: string
 	/** Supermemory API key */
 	apiKey: string
-	/** Optional conversation ID to group messages for contextual memory generation */
+	/** Conversation ID to group messages into a single document (maps to customId in Supermemory). Required when addMemory is "always". */
 	conversationId?: string
 	/** Enable detailed logging of memory search and injection */
 	verbose?: boolean
@@ -178,6 +155,15 @@ interface SupermemoryMiddlewareOptions {
 	 * - "full": Combines both profile and query-based results
 	 */
 	mode?: MemoryMode
+	/**
+	 * Search mode for memory retrieval:
+	 * - "memories": Search only memory entries (default)
+	 * - "hybrid": Search both memories AND document chunks (recommended for RAG)
+	 * - "documents": Search only document chunks
+	 */
+	searchMode?: SearchMode
+	/** Maximum number of search results to return (default: 10) */
+	searchLimit?: number
 	/**
 	 * Memory persistence mode:
 	 * - "always": Automatically save conversations as memories
@@ -196,6 +182,8 @@ interface SupermemoryMiddlewareContext {
 	containerTag: string
 	conversationId?: string
 	mode: MemoryMode
+	searchMode: SearchMode
+	searchLimit: number
 	addMemory: "always" | "never"
 	normalizedBaseUrl: string
 	apiKey: string
@@ -216,6 +204,8 @@ export const createSupermemoryContext = (
 		conversationId,
 		verbose = false,
 		mode = "profile",
+		searchMode = "memories",
+		searchLimit = 10,
 		addMemory = "never",
 		baseUrl,
 		promptTemplate,
@@ -237,6 +227,8 @@ export const createSupermemoryContext = (
 		containerTag,
 		conversationId,
 		mode,
+		searchMode,
+		searchLimit,
 		addMemory,
 		normalizedBaseUrl,
 		apiKey,
@@ -298,6 +290,7 @@ export const transformParamsWithMemory = async (
 		containerTag: ctx.containerTag,
 		conversationId: ctx.conversationId,
 		mode: ctx.mode,
+		searchMode: ctx.searchMode,
 		isNewTurn,
 		cacheHit: false,
 	})
@@ -312,6 +305,8 @@ export const transformParamsWithMemory = async (
 		apiKey: ctx.apiKey,
 		logger: ctx.logger,
 		promptTemplate: ctx.promptTemplate,
+		searchMode: ctx.searchMode,
+		searchLimit: ctx.searchLimit,
 	})
 
 	ctx.memoryCache.set(turnKey, memories)