diff --git a/packages/opencode/src/cli/cmd/models.ts b/packages/opencode/src/cli/cmd/models.ts index 156dae91c67..4d950b10157 100644 --- a/packages/opencode/src/cli/cmd/models.ts +++ b/packages/opencode/src/cli/cmd/models.ts @@ -24,6 +24,10 @@ export const ModelsCommand = cmd({ describe: "refresh the models cache from models.dev", type: "boolean", }) + .option("local", { + describe: "show only locally detected models (e.g. Ollama)", + type: "boolean", + }) }, handler: async (args) => { if (args.refresh) { @@ -60,7 +64,7 @@ export const ModelsCommand = cmd({ return } - const providerIDs = Object.keys(providers).sort((a, b) => { + let providerIDs = Object.keys(providers).sort((a, b) => { const aIsOpencode = a.startsWith("opencode") const bIsOpencode = b.startsWith("opencode") if (aIsOpencode && !bIsOpencode) return -1 @@ -68,6 +72,17 @@ export const ModelsCommand = cmd({ return a.localeCompare(b) }) + if (args.local) { + const localProviders = Object.entries(providers) + .filter(([, p]) => p.source === "api") + .map(([id]) => id) + providerIDs = providerIDs.filter((id) => localProviders.includes(id)) + if (providerIDs.length === 0) { + UI.println(UI.Style.TEXT_WARNING + "No local models detected. Make sure Ollama is running." + UI.Style.TEXT_NORMAL) + return + } + } + for (const providerID of providerIDs) { printModels(providerID, args.verbose) } diff --git a/packages/opencode/src/provider/ollama.ts b/packages/opencode/src/provider/ollama.ts new file mode 100644 index 00000000000..4c20f1dc388 --- /dev/null +++ b/packages/opencode/src/provider/ollama.ts @@ -0,0 +1,83 @@ +import { Log } from "../util/log" + +const log = Log.create({ service: "ollama" }) + +export interface OllamaModel { + name: string + model: string + modified_at: string + size: number + digest: string + details?: { + parent_model: string + format: string + family: string + families: string[] + parameter_size: string + quantization_level: string + } +} + +export interface OllamaTagsResponse { + models: OllamaModel[] +} + +export interface OllamaStatus { + running: boolean + url: string + models: OllamaModel[] +} + +const OLLAMA_DEFAULT_URL = "http://localhost:11434" +const OLLAMA_API_TAGS = "/api/tags" + +export async function detect(url: string = OLLAMA_DEFAULT_URL): Promise { + const start = Date.now() + + try { + const controller = new AbortController() + const timeout = setTimeout(() => controller.abort(), 3000) + + const response = await fetch(`${url}${OLLAMA_API_TAGS}`, { + signal: controller.signal, + headers: { Accept: "application/json" }, + }) + + clearTimeout(timeout) + + if (!response.ok) { + log.info("ollama not responding", { url, status: response.status }) + return { running: false, url, models: [] } + } + + const data = (await response.json()) as OllamaTagsResponse + const models = data.models ?? [] + + log.info("ollama detected", { url, modelCount: models.length, ms: Date.now() - start }) + + return { + running: true, + url, + models, + } + } catch (error) { + const err = error as Error + if (err.name === "AbortError") { + log.info("ollama timeout", { url }) + } else { + log.info("ollama not running", { url, error: err.message }) + } + return { running: false, url, models: [] } + } +} + +export function parseModelName(fullName: string): { model: string; tag?: string } { + const colonIndex = fullName.lastIndexOf(":") + if (colonIndex === -1) { + return { model: fullName } + } + return { + model: fullName.substring(0, colonIndex), + tag: fullName.substring(colonIndex + 1), + } +} diff --git a/packages/opencode/src/provider/provider.ts b/packages/opencode/src/provider/provider.ts index 6d5c9d1ad37..960f433acc9 100644 --- a/packages/opencode/src/provider/provider.ts +++ b/packages/opencode/src/provider/provider.ts @@ -45,6 +45,7 @@ import { fromNodeProviderChain } from "@aws-sdk/credential-providers" import { GoogleAuth } from "google-auth-library" import { ProviderTransform } from "./transform" import { Installation } from "../installation" +import { detect as detectOllama, parseModelName } from "./ollama" export namespace Provider { const log = Log.create({ service: "provider" }) @@ -180,6 +181,22 @@ export namespace Provider { options: {}, } }, + ollama: async (input) => { + const hasKey = await (async () => { + const env = Env.all() + if (input.env.some((item) => env[item])) return true + if (await Auth.get(input.id)) return true + return true + })() + + return { + autoload: hasKey, + async getModel(sdk: any, modelID: string, _options?: Record) { + return sdk.chat(modelID) + }, + options: { apiKey: "ollama" }, + } + }, azure: async () => { return { autoload: false, @@ -1007,6 +1024,102 @@ export namespace Provider { mergeProvider(providerID, partial) } + // Patterns to detect reasoning-capable Ollama models + const OLLAMA_REASONING_PATTERNS = [ + /qwen[_\-]?3/i, // qwen3, qwen-3, qwen_3 + /phi[_\-]?4/i, // phi4, phi-4 + /gemma[_\-]?3/i, // gemma3, gemma-3 + /llama[_\-]?3/i, // llama3, llama-3 + /r1$/i, // deepseek-r1, etc. + /qwq/i, // QwQ + /deepseek/i, // DeepSeek family + /gpt-?oss/i, // GPT-OSS + ] + + function isOllamaReasoningModel(modelName: string, family?: string, families?: string[]): boolean { + const searchText = `${modelName} ${family ?? ""} ${families?.join(" ") ?? ""}` + return OLLAMA_REASONING_PATTERNS.some(pattern => pattern.test(searchText)) + } + + // Auto-detect Ollama if not already configured + const ollamaConfigured = providers["ollama"] || configProviders.some(([id]) => id === "ollama") + if (!ollamaConfigured) { + const ollama = await detectOllama() + if (ollama.running && ollama.models.length > 0) { + const ollamaProviderID = "ollama" + const ollamaModels: Record = {} + + for (const ollamaModel of ollama.models) { + const { model, tag } = parseModelName(ollamaModel.name) + const modelID = tag ? `${model}:${tag}` : model + + // Detect if this is a reasoning model based on patterns + const isReasoning = isOllamaReasoningModel( + model, + ollamaModel.details?.family, + ollamaModel.details?.families, + ) + + // Check for config overrides - allow forcing reasoning on/off + const configModel = config.provider?.ollama?.models?.[modelID] + const configForceReasoning = configModel?.reasoning // undefined = auto, true = force on, false = force off + const finalReasoning = configForceReasoning !== undefined ? configForceReasoning : isReasoning + + ollamaModels[modelID] = { + id: modelID, + providerID: ollamaProviderID, + name: model, + family: ollamaModel.details?.family ?? model, + api: { + id: modelID, + url: ollama.url, + npm: "@ai-sdk/openai-compatible", + }, + status: "active", + capabilities: { + temperature: true, + reasoning: finalReasoning, + attachment: false, + toolcall: true, + input: { text: true, audio: false, image: false, video: false, pdf: false }, + output: { text: true, audio: false, image: false, video: false, pdf: false }, + interleaved: finalReasoning + ? configModel?.interleaved ?? { field: "reasoning_content" } + : false, + }, + cost: { input: 0, output: 0, cache: { read: 0, write: 0 } }, + options: + finalReasoning + ? { reasoningEffort: configModel?.options?.reasoningEffort ?? "medium", ...configModel?.options } + : {}, + limit: { + context: + configModel?.limit?.context ?? + (finalReasoning ? 200000 : ollamaModel.details?.parameter_size ? 128000 : 8192), + output: configModel?.limit?.output ?? (finalReasoning ? 32768 : 8192), + }, + headers: {}, + release_date: "", + variants: {}, + } + ollamaModels[modelID].variants = mapValues( + ProviderTransform.variants(ollamaModels[modelID]), + (v) => v, + ) + } + + providers[ollamaProviderID] = { + id: ollamaProviderID, + name: "Ollama", + source: "api", + env: [], + options: { baseURL: `${ollama.url}/v1`, apiKey: "ollama" }, + models: ollamaModels, + } + log.info("ollama auto-detected", { modelCount: ollamaModels.length }) + } + } + for (const [providerID, provider] of Object.entries(providers)) { if (!isProviderAllowed(providerID)) { delete providers[providerID] @@ -1016,6 +1129,28 @@ export namespace Provider { const configProvider = config.provider?.[providerID] for (const [modelID, model] of Object.entries(provider.models)) { + // Apply reasoning detection for Ollama models (both auto-detected and config-loaded) + if (providerID === "ollama") { + const isReasoning = isOllamaReasoningModel(modelID) + const configModel = config.provider?.ollama?.models?.[modelID] + const configForceReasoning = configModel?.reasoning + const finalReasoning = configForceReasoning !== undefined ? configForceReasoning : isReasoning + + if (finalReasoning) { + model.capabilities.reasoning = true + if (!model.capabilities.interleaved) { + model.capabilities.interleaved = configModel?.interleaved ?? { field: "reasoning_content" } + } + model.options = { reasoningEffort: configModel?.options?.reasoningEffort ?? "medium", ...model.options } + if (!model.limit.output || model.limit.output < 32768) { + model.limit.output = configModel?.limit?.output ?? 32768 + } + if (!model.limit.context || model.limit.context < 200000) { + model.limit.context = configModel?.limit?.context ?? 200000 + } + } + } + model.api.id = model.api.id ?? model.id ?? modelID if (modelID === "gpt-5-chat-latest" || (providerID === "openrouter" && modelID === "openai/gpt-5-chat")) delete provider.models[modelID] diff --git a/packages/opencode/src/provider/transform.ts b/packages/opencode/src/provider/transform.ts index 6980be05188..d195100f8bf 100644 --- a/packages/opencode/src/provider/transform.ts +++ b/packages/opencode/src/provider/transform.ts @@ -345,8 +345,14 @@ export namespace ProviderTransform { id.includes("kimi") || // TODO: Remove this after models.dev data is fixed to use "kimi-k2.5" instead of "k2p5" id.includes("k2p5") - ) + ) { + // Skip variants for certain providers that need special handling in options() + // but allow variants for Ollama models since they use @ai-sdk/openai-compatible + if (model.providerID === "ollama") { + return Object.fromEntries(WIDELY_SUPPORTED_EFFORTS.map((effort) => [effort, { reasoningEffort: effort }])) + } return {} + } // see: https://docs.x.ai/docs/guides/reasoning#control-how-hard-the-model-thinks if (id.includes("grok") && id.includes("grok-3-mini")) { @@ -756,6 +762,28 @@ export namespace ProviderTransform { result["enable_thinking"] = true } + // Enable thinking for Ollama reasoning models via the "think" parameter + // Ollama supports thinking for: DeepSeek R1, DeepSeek v3.1, Qwen 3, GPT-OSS + // Most models accept true/false, GPT-OSS accepts low/medium/high + if ( + input.model.providerID === "ollama" && + input.model.capabilities.reasoning && + input.model.api.npm === "@ai-sdk/openai-compatible" + ) { + // Check if it's a GPT-OSS model (supports thinking levels) + if (input.model.id.toLowerCase().includes("gpt-oss")) { + result["think"] = input.model.options?.reasoningEffort ?? "medium" + } else { + // For most models, enable thinking when not disabled + const effort = input.model.options?.reasoningEffort + if (effort === "none") { + result["think"] = false + } else { + result["think"] = true + } + } + } + if (input.model.api.id.includes("gpt-5") && !input.model.api.id.includes("gpt-5-chat")) { if (!input.model.api.id.includes("gpt-5-pro")) { result["reasoningEffort"] = "medium"