Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions mobilerun/agent/providers/setup_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@
DEFAULT_KWARGS_BY_VARIANT: dict[str, dict[str, int]] = {
"anthropic_oauth": {"max_tokens": 1024},
"gemini_oauth_code_assist": {"max_tokens": 1024},
# Without this, llama-index asks Ollama for the model's MAXIMUM context,
# which allocates the full KV cache (256K-context models -> ~19 GB) and
# spills to CPU on typical machines. -1 restores model-max for big GPUs.
"Ollama": {"context_window": 32768},
}

HIDDEN_ROLE_FALLBACKS: tuple[str, ...] = ("app_opener", "structured_output")
Expand Down
73 changes: 73 additions & 0 deletions mobilerun/agent/utils/llm_picker.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,78 @@ def _validate_gemini_oauth_model(model: object) -> None:
)


# Default Ollama context size. llama-index's own default (-1) resolves to the
# model's maximum context, which allocates the full KV cache up front (e.g. a
# 256K-context model -> ~19 GB, spilling to CPU) — and because mobilerun sends
# num_ctx per request, it overrides every Ollama-side setting
# (OLLAMA_CONTEXT_LENGTH, Modelfile, /set parameter), so users cannot fix it
# server-side. ``context_window: -1`` in profile kwargs restores model-max.
_OLLAMA_DEFAULT_CONTEXT_WINDOW = 32768

_warned_ollama_kwargs: set[str] = set()


def _prepare_ollama_kwargs(kwargs: dict[str, Any], llm_class: Any) -> dict[str, Any]:
"""Translate provider-portable kwargs for llama-index's Ollama class.

``max_tokens`` is not an Ollama constructor field and pydantic silently
drops it; translate it to ``additional_kwargs.num_predict`` (an explicit
``num_predict`` wins). Also default ``context_window`` (see
``_OLLAMA_DEFAULT_CONTEXT_WINDOW``) and keep it aligned with an explicit
``additional_kwargs.num_ctx`` so the -1 path's hidden ``client.show()``
lookup is never triggered by mobilerun defaults.
"""
kwargs = dict(kwargs)
additional_kwargs = dict(kwargs.get("additional_kwargs") or {})

if "max_tokens" in kwargs and "max_tokens" not in llm_class.model_fields:
max_tokens = kwargs.pop("max_tokens")
if isinstance(max_tokens, bool) or max_tokens is None:
valid_max_tokens = None
else:
try:
valid_max_tokens = int(max_tokens)
except (TypeError, ValueError):
valid_max_tokens = None
if valid_max_tokens is None:
logger.warning(
f"Ignoring non-integer max_tokens={max_tokens!r} for Ollama."
)
elif "num_predict" in additional_kwargs:
if additional_kwargs["num_predict"] != valid_max_tokens:
logger.warning(
f"Both max_tokens={valid_max_tokens} and "
f"additional_kwargs.num_predict="
f"{additional_kwargs['num_predict']} are set for Ollama; "
f"num_predict wins."
)
else:
additional_kwargs["num_predict"] = valid_max_tokens

if kwargs.get("context_window") is None:
context_window = _OLLAMA_DEFAULT_CONTEXT_WINDOW
if "num_ctx" in additional_kwargs:
try:
context_window = int(additional_kwargs["num_ctx"])
except (TypeError, ValueError):
pass
kwargs["context_window"] = context_window

if additional_kwargs:
kwargs["additional_kwargs"] = additional_kwargs

for key, value in kwargs.items():
if value is None or key in llm_class.model_fields:
continue
if key not in _warned_ollama_kwargs:
_warned_ollama_kwargs.add(key)
logger.warning(
f"Ollama does not accept the {key!r} option; it will be ignored."
)

return kwargs


def _load_openai_responses(**kwargs: Any) -> LLM:
from llama_index.llms.openai.responses import OpenAIResponses

Expand Down Expand Up @@ -232,6 +304,7 @@ def load_llm(provider_name: str, model: str | None = None, **kwargs: Any) -> LLM
from llama_index.llms.ollama import Ollama

llm_class = Ollama
kwargs = _prepare_ollama_kwargs(kwargs, Ollama)
elif provider_name == "Anthropic":
return _load_anthropic(**kwargs)
elif provider_name == "OpenRouter":
Expand Down
81 changes: 58 additions & 23 deletions mobilerun/cli/configure_wizard.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,18 @@ def _set_profile_max_tokens(profile: Any, value: int) -> None:
profile.kwargs["max_tokens"] = value


def _set_profile_context_window(profile: Any, value: int) -> None:
profile.kwargs = dict(profile.kwargs)
profile.kwargs["context_window"] = value


def _any_ollama_profile(config) -> bool:
return any(
getattr(profile, "provider", "") == "Ollama"
for profile in config.llm_profiles.values()
)


def _toggle_label(enabled: bool) -> str:
"""Return a toggle indicator: ON/OFF."""
return "[ON]" if enabled else "[OFF]"
Expand Down Expand Up @@ -289,31 +301,40 @@ def _configure_advanced_settings(
vision_on = _is_vision_enabled(config)
reasoning_on = config.agent.reasoning

choices = [
SelectChoice(
value="vision",
label=f"Vision {_toggle_label(vision_on)}",
),
SelectChoice(
value="reasoning",
label=f"Reasoning {_toggle_label(reasoning_on)}",
),
SelectChoice(
value="max_steps",
label="Maximum steps",
),
SelectChoice(
value="temperature",
label="Temperature",
),
SelectChoice(
value="max_tokens",
label="Max tokens",
),
]
if _any_ollama_profile(config):
choices.append(
SelectChoice(
value="context_window",
label="Context window (Ollama)",
)
)
choices.append(SelectChoice(value="done", label="Done"))

selected = _select_with_back(
"Advanced settings",
[
SelectChoice(
value="vision",
label=f"Vision {_toggle_label(vision_on)}",
),
SelectChoice(
value="reasoning",
label=f"Reasoning {_toggle_label(reasoning_on)}",
),
SelectChoice(
value="max_steps",
label="Maximum steps",
),
SelectChoice(
value="temperature",
label="Temperature",
),
SelectChoice(
value="max_tokens",
label="Max tokens",
),
SelectChoice(value="done", label="Done"),
],
choices,
default=default_selection,
)

Expand Down Expand Up @@ -346,6 +367,20 @@ def _configure_advanced_settings(
for role in _ALL_CONFIG_ROLES:
if role in config.llm_profiles:
_set_profile_max_tokens(config.llm_profiles[role], value)
elif selected == "context_window":
current_value = config.llm_profiles[_ALL_CONFIG_ROLES[0]].kwargs.get(
"context_window", 32768
)
try:
current_default = int(current_value)
except (TypeError, ValueError):
current_default = 32768
value = _prompt_int(
console, "Context window (-1 = model max)", default=current_default
)
for role in _ALL_CONFIG_ROLES:
if role in config.llm_profiles:
_set_profile_context_window(config.llm_profiles[role], value)

default_selection = selected

Expand Down
10 changes: 10 additions & 0 deletions mobilerun/config_example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,16 @@ llm_profiles:
# api_key_source: auto # auto = saved env file first, then shell env; env = shell only; file = saved env file
# kwargs: # optional kwargs, add api_key in kwargs if not already in .env
# max_tokens: 8192
#
# Ollama example — max_tokens is translated to Ollama's num_predict, and
# context_window controls num_ctx (defaults to 32768; -1 = model maximum,
# which preallocates the full KV cache and can spill to CPU):
# provider: Ollama
# model: qwen3:8b
# base_url: http://localhost:11434
# kwargs:
# max_tokens: 2048
# context_window: 32768

# Executor: Selects and executes atomic actions
executor:
Expand Down
4 changes: 3 additions & 1 deletion mobilerun/config_manager/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ def load(cls, config_path: Optional[str] = None) -> MobileConfig:
Load config with resolution order:
1. Explicit config_path argument
2. MOBILERUN_CONFIG env var
3. User config (~/.config/mobilerun/config.yaml)
3. User config (platformdirs config dir for "droidrun", e.g.
~/Library/Application Support/droidrun/config.yaml on macOS,
~/.config/droidrun/config.yaml on Linux)
4. Package defaults (creates user config)
"""
if config_path:
Expand Down
144 changes: 144 additions & 0 deletions tests/test_llm_picker.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,3 +161,147 @@ def test_anthropic_current_catalog_models_have_metadata(model: str) -> None:

assert metadata.model_name == model
assert metadata.context_window > 0


# --- Ollama kwarg translation (max_tokens / context_window) ------------------


import logging


@pytest.fixture
def mobilerun_caplog(caplog):
"""caplog wired to the non-propagating "mobilerun" logger."""
logger = logging.getLogger("mobilerun")
previous = logger.propagate
logger.propagate = True
caplog.set_level(logging.WARNING, logger="mobilerun")
yield caplog
logger.propagate = previous


def _ollama_class():
from llama_index.llms.ollama import Ollama

return Ollama


def _prepare(kwargs):
from mobilerun.agent.utils.llm_picker import _prepare_ollama_kwargs

return _prepare_ollama_kwargs(kwargs, _ollama_class())


def test_ollama_max_tokens_translates_to_num_predict() -> None:
out = _prepare({"model": "qwen3:0.6b", "max_tokens": 2048})

assert "max_tokens" not in out
assert out["additional_kwargs"]["num_predict"] == 2048


def test_ollama_explicit_num_predict_wins_over_max_tokens(mobilerun_caplog) -> None:
out = _prepare(
{
"model": "qwen3:0.6b",
"max_tokens": 2048,
"additional_kwargs": {"num_predict": 512},
}
)

assert out["additional_kwargs"]["num_predict"] == 512
assert any("num_predict wins" in r.message for r in mobilerun_caplog.records)


def test_ollama_equal_num_predict_and_max_tokens_no_warning(mobilerun_caplog) -> None:
out = _prepare(
{
"model": "qwen3:0.6b",
"max_tokens": 512,
"additional_kwargs": {"num_predict": 512},
}
)

assert out["additional_kwargs"]["num_predict"] == 512
assert not any("num_predict wins" in r.message for r in mobilerun_caplog.records)


def test_ollama_numeric_string_max_tokens_is_converted() -> None:
out = _prepare({"model": "qwen3:0.6b", "max_tokens": "1024"})

assert out["additional_kwargs"]["num_predict"] == 1024


@pytest.mark.parametrize("bad", ["lots", True, None])
def test_ollama_invalid_max_tokens_warns_and_skips(bad, mobilerun_caplog) -> None:
out = _prepare({"model": "qwen3:0.6b", "max_tokens": bad})

assert "max_tokens" not in out
assert "num_predict" not in out.get("additional_kwargs", {})
assert any("Ignoring non-integer max_tokens" in r.message for r in mobilerun_caplog.records)


def test_ollama_context_window_defaults_to_32k() -> None:
out = _prepare({"model": "qwen3:0.6b"})

assert out["context_window"] == 32768


@pytest.mark.parametrize("explicit", [8192, -1])
def test_ollama_explicit_context_window_is_preserved(explicit) -> None:
out = _prepare({"model": "qwen3:0.6b", "context_window": explicit})

assert out["context_window"] == explicit


def test_ollama_num_ctx_mirrors_into_context_window() -> None:
out = _prepare(
{"model": "qwen3:0.6b", "additional_kwargs": {"num_ctx": 16384}}
)

assert out["context_window"] == 16384
assert out["additional_kwargs"]["num_ctx"] == 16384


def test_ollama_non_numeric_num_ctx_falls_back_to_default() -> None:
out = _prepare(
{"model": "qwen3:0.6b", "additional_kwargs": {"num_ctx": "max"}}
)

assert out["context_window"] == 32768


def test_ollama_unknown_kwarg_warns_once(mobilerun_caplog) -> None:
from mobilerun.agent.utils import llm_picker

llm_picker._warned_ollama_kwargs.discard("frobnicate")
_prepare({"model": "qwen3:0.6b", "frobnicate": 1})
_prepare({"model": "qwen3:0.6b", "frobnicate": 1})

warnings = [r for r in mobilerun_caplog.records if "'frobnicate'" in r.message]
assert len(warnings) == 1


def test_ollama_translation_disabled_if_class_grows_max_tokens_field() -> None:
from mobilerun.agent.utils.llm_picker import _prepare_ollama_kwargs

class FakeOllama:
model_fields = {"model": None, "max_tokens": None, "context_window": None}

out = _prepare_ollama_kwargs({"model": "m", "max_tokens": 99}, FakeOllama)

assert out["max_tokens"] == 99
assert "additional_kwargs" not in out


def test_load_llm_ollama_end_to_end_applies_translation() -> None:
llm = load_llm("Ollama", model="qwen3:0.6b", max_tokens=256)

assert llm.context_window == 32768
assert llm.additional_kwargs["num_predict"] == 256
assert not hasattr(llm, "max_tokens") or "max_tokens" not in type(llm).model_fields


def test_ollama_wizard_default_includes_context_window() -> None:
from mobilerun.agent.providers.setup_service import DEFAULT_KWARGS_BY_VARIANT

assert DEFAULT_KWARGS_BY_VARIANT["Ollama"] == {"context_window": 32768}
Loading