Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions packages/components/credentials/OllamaCloudApi.credential.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import { INodeParams, INodeCredential } from '../src/Interface'

class OllamaCloudApi implements INodeCredential {
label: string
name: string
version: number
description?: string
inputs: INodeParams[]

constructor() {
this.label = 'Ollama Cloud API'
this.name = 'ollamaCloudApi'
this.version = 1.0
this.description = 'API key for Ollama Cloud (https://ollama.com)'
this.inputs = [
{
label: 'Ollama Cloud API Key',
name: 'ollamaCloudApiKey',
type: 'password',
placeholder: 'sk-...'
}
]
}
}

module.exports = { credClass: OllamaCloudApi }

32 changes: 28 additions & 4 deletions packages/components/nodes/chatmodels/ChatOllama/ChatOllama.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import { ChatOllamaInput } from '@langchain/ollama'
import { BaseChatModelParams } from '@langchain/core/language_models/chat_models'
import { BaseCache } from '@langchain/core/caches'
import { IMultiModalOption, INode, INodeData, INodeParams } from '../../../src/Interface'
import { getBaseClasses } from '../../../src/utils'
import { ChatOllama } from './FlowiseChatOllama'
import { ChatOllama, FlowiseChatOllamaInput } from './FlowiseChatOllama'

class ChatOllama_ChatModels implements INode {
label: string
Expand All @@ -20,7 +19,7 @@ class ChatOllama_ChatModels implements INode {
constructor() {
this.label = 'ChatOllama'
this.name = 'chatOllama'
this.version = 5.0
this.version = 5.1
this.type = 'ChatOllama'
this.icon = 'Ollama.svg'
this.category = 'Chat Models'
Expand Down Expand Up @@ -210,6 +209,29 @@ class ChatOllama_ChatModels implements INode {
step: 0.1,
optional: true,
additionalParams: true
},
{
label: 'Reasoning Effort',
name: 'reasoningEffort',
type: 'options',
description:
'Controls the thinking/reasoning depth for reasoning models (e.g., GPT-OSS, DeepSeek-R1, Qwen3). Higher effort = more thorough reasoning but slower responses.',
options: [
{
label: 'Low',
name: 'low'
},
{
label: 'Medium',
name: 'medium'
},
{
label: 'High',
name: 'high'
}
],
optional: true,
additionalParams: true
}
]
}
Expand All @@ -230,13 +252,14 @@ class ChatOllama_ChatModels implements INode {
const repeatLastN = nodeData.inputs?.repeatLastN as string
const repeatPenalty = nodeData.inputs?.repeatPenalty as string
const tfsZ = nodeData.inputs?.tfsZ as string
const reasoningEffort = nodeData.inputs?.reasoningEffort as string
const allowImageUploads = nodeData.inputs?.allowImageUploads as boolean
const jsonMode = nodeData.inputs?.jsonMode as boolean
const streaming = nodeData.inputs?.streaming as boolean

const cache = nodeData.inputs?.cache as BaseCache

const obj: ChatOllamaInput & BaseChatModelParams = {
const obj: FlowiseChatOllamaInput & BaseChatModelParams = {
baseUrl,
temperature: parseFloat(temperature),
model: modelName,
Expand All @@ -257,6 +280,7 @@ class ChatOllama_ChatModels implements INode {
if (keepAlive) obj.keepAlive = keepAlive
if (cache) obj.cache = cache
if (jsonMode) obj.format = 'json'
if (reasoningEffort) obj.reasoningEffort = reasoningEffort as 'low' | 'medium' | 'high'

const multiModalOption: IMultiModalOption = {
image: {
Expand Down
139 changes: 138 additions & 1 deletion packages/components/nodes/chatmodels/ChatOllama/FlowiseChatOllama.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,25 @@
import { ChatOllama as LCChatOllama, ChatOllamaInput } from '@langchain/ollama'
import { IMultiModalOption, IVisionChatModal } from '../../../src'
import { BaseMessage, AIMessageChunk } from '@langchain/core/messages'
import { CallbackManagerForLLMRun } from '@langchain/core/callbacks/manager'
import { ChatGenerationChunk } from '@langchain/core/outputs'

export interface FlowiseChatOllamaInput extends ChatOllamaInput {
reasoningEffort?: 'low' | 'medium' | 'high'
}

export class ChatOllama extends LCChatOllama implements IVisionChatModal {
configuredModel: string
configuredMaxToken?: number
multiModalOption: IMultiModalOption
id: string
reasoningEffort?: 'low' | 'medium' | 'high'

constructor(id: string, fields?: ChatOllamaInput) {
constructor(id: string, fields?: FlowiseChatOllamaInput) {
super(fields)
this.id = id
this.configuredModel = fields?.model ?? ''
this.reasoningEffort = fields?.reasoningEffort
}

revertToOriginalModel(): void {
Expand All @@ -24,4 +33,132 @@ export class ChatOllama extends LCChatOllama implements IVisionChatModal {
setVisionModel(): void {
// pass
}

/**
* Override _streamResponseChunks to inject the 'think' parameter for reasoning models
*/
async *_streamResponseChunks(
messages: BaseMessage[],
options: this['ParsedCallOptions'],
runManager?: CallbackManagerForLLMRun
): AsyncGenerator<ChatGenerationChunk> {
// If reasoningEffort is set, we need to use non-streaming with think parameter
// because streaming with think requires special handling
if (this.reasoningEffort) {
try {
// Call the non-streaming version and yield the result as a single chunk
const result = await this._generateNonStreaming(messages, options, runManager)
if (result) {
yield result
}
return
} catch (error: any) {
// If we get a 405 error, it means the endpoint doesn't support native Ollama API
// Fall back to regular streaming without the think parameter
if (error?.message?.includes('405')) {
console.warn(
'Ollama reasoning effort requires native Ollama API endpoint. Falling back to standard mode.'
)
// Fall through to use parent's streaming implementation
} else {
throw error
}
}
}

// Otherwise, use the parent's streaming implementation
for await (const chunk of super._streamResponseChunks(messages, options, runManager)) {
yield chunk
}
}

/**
* Non-streaming generation with think parameter support
*/
private async _generateNonStreaming(
messages: BaseMessage[],
options: this['ParsedCallOptions'],
runManager?: CallbackManagerForLLMRun
): Promise<ChatGenerationChunk | undefined> {
let baseUrl = this.baseUrl || 'http://localhost:11434'
// Remove trailing slash if present
baseUrl = baseUrl.replace(/\/+$/, '')
const url = `${baseUrl}/api/chat`

// Convert messages to Ollama format
const ollamaMessages = messages.map((msg) => ({
role: msg._getType() === 'human' ? 'user' : msg._getType() === 'ai' ? 'assistant' : 'system',
content: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content)
}))

const requestBody: any = {
model: this.model,
messages: ollamaMessages,
stream: false,
options: {}
}

// Add think parameter for reasoning effort
// GPT-OSS model requires effort level: 'low', 'medium', 'high'
// Other models (DeepSeek R1, Qwen3) accept boolean true/false
// We pass the effort level string - Ollama handles this appropriately per model
if (this.reasoningEffort) {
requestBody.think = this.reasoningEffort
}

// Add other Ollama options
if (this.temperature !== undefined) requestBody.options.temperature = this.temperature
if (this.topP !== undefined) requestBody.options.top_p = this.topP
if (this.topK !== undefined) requestBody.options.top_k = this.topK
if (this.numCtx !== undefined) requestBody.options.num_ctx = this.numCtx
if (this.repeatPenalty !== undefined) requestBody.options.repeat_penalty = this.repeatPenalty
if (this.mirostat !== undefined) requestBody.options.mirostat = this.mirostat
if (this.mirostatEta !== undefined) requestBody.options.mirostat_eta = this.mirostatEta
if (this.mirostatTau !== undefined) requestBody.options.mirostat_tau = this.mirostatTau
if (this.numGpu !== undefined) requestBody.options.num_gpu = this.numGpu
if (this.numThread !== undefined) requestBody.options.num_thread = this.numThread
if (this.repeatLastN !== undefined) requestBody.options.repeat_last_n = this.repeatLastN
if (this.tfsZ !== undefined) requestBody.options.tfs_z = this.tfsZ
if (this.format) requestBody.format = this.format
if (this.keepAlive) requestBody.keep_alive = this.keepAlive

try {
const response = await fetch(url, {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify(requestBody)
})

if (!response.ok) {
const errorText = await response.text()
throw new Error(`Ollama API error: ${response.status} ${errorText}`)
}

const data = await response.json()

// Extract content and thinking from response
let content = data.message?.content || ''
const thinking = data.message?.thinking || ''

// If there's thinking content, optionally prepend it (or handle separately)
// For now, we just return the main content
// The thinking is available in data.message.thinking if needed

const chunk = new ChatGenerationChunk({
message: new AIMessageChunk({
content,
additional_kwargs: thinking ? { thinking } : {}
}),
text: content
})

await runManager?.handleLLMNewToken(content)

return chunk
} catch (error) {
throw error
}
}
}
Loading