Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions .github/run-eval/resolve_model_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,17 @@ def _sigterm_handler(signum: int, _frame: object) -> None:
# Keep this list in sync with SDK LLM config parameters that are SDK-internal.
SDK_ONLY_PARAMS = {"disable_vision"}

# Default model IDs for scheduled/label-triggered integration test runs.
# This list is the single source of truth; the integration-runner workflow
# reads it from the checked-out branch via `resolve_model_config.py`, so
# changes on release branches take effect even though `pull_request_target`
# evaluates workflow-level env vars from the default branch.
DEFAULT_INTEGRATION_MODEL_IDS = [
"claude-sonnet-4-6",
"deepseek-v4-flash",
"kimi-k2.6",
"gemini-3.1-pro",
]

# Model configurations dictionary
MODELS = {
Expand Down
34 changes: 15 additions & 19 deletions .github/workflows/integration-runner.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,6 @@ on:

env:
N_PROCESSES: 4 # Global configuration for number of parallel processes for evaluation
# Default models for scheduled/label-triggered runs (subset of models from resolve_model_config.py)
DEFAULT_MODEL_IDS: claude-sonnet-4-6,deepseek-v4-flash,kimi-k2.6,gemini-3.1-pro

jobs:
setup-matrix:
Expand All @@ -75,27 +73,26 @@ jobs:
id: resolve-models
env:
MODEL_IDS_INPUT: ${{ github.event.inputs.model_ids || '' }}
DEFAULT_MODEL_IDS: ${{ env.DEFAULT_MODEL_IDS }}
run: |
# Use input model_ids if provided, otherwise use defaults
if [ -z "$MODEL_IDS_INPUT" ]; then
MODEL_IDS="$DEFAULT_MODEL_IDS"
echo "No model_ids specified, using defaults: $MODEL_IDS"
else
MODEL_IDS="$MODEL_IDS_INPUT"
echo "Using specified model_ids: $MODEL_IDS"
fi

# Resolve model configs using resolve_model_config.py
# Transform output to matrix format for integration tests
MATRIX=$(python3 << EOF
# Resolve model configs using resolve_model_config.py from the
# checked-out branch. DEFAULT_INTEGRATION_MODEL_IDS lives in
# that file so release branches can update the default set
# without waiting for main to be updated (pull_request_target
# evaluates workflow-level env from the default branch).
MATRIX=$(python3 << 'EOF'
import json
import os
import sys
sys.path.insert(0, '.github/run-eval')
from resolve_model_config import MODELS
from resolve_model_config import DEFAULT_INTEGRATION_MODEL_IDS, MODELS

model_ids = "$MODEL_IDS".split(",")
model_ids = [m.strip() for m in model_ids if m.strip()]
model_ids_input = os.environ.get("MODEL_IDS_INPUT", "").strip()
if model_ids_input:
model_ids = [m.strip() for m in model_ids_input.split(",") if m.strip()]
print(f"Using specified model_ids: {','.join(model_ids)}", file=sys.stderr)
else:
model_ids = list(DEFAULT_INTEGRATION_MODEL_IDS)
print(f"No model_ids specified, using defaults: {','.join(model_ids)}", file=sys.stderr)

matrix = []
for model_id in model_ids:
Expand All @@ -104,7 +101,6 @@ jobs:
print(f"Error: Model ID '{model_id}' not found. Available: {available}", file=sys.stderr)
sys.exit(1)
model = MODELS[model_id]
# Create run-suffix from model id (replace special chars with underscore)
run_suffix = model_id.replace("-", "_").replace(".", "_") + "_run"
matrix.append({
"id": model_id,
Expand Down
10 changes: 10 additions & 0 deletions tests/cross/test_resolve_model_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
run_eval_path = Path(__file__).parent.parent.parent / ".github" / "run-eval"
sys.path.append(str(run_eval_path))
from resolve_model_config import ( # noqa: E402 # type: ignore[import-not-found]
DEFAULT_INTEGRATION_MODEL_IDS,
MODELS,
check_model,
find_models_by_id,
Expand Down Expand Up @@ -659,3 +660,12 @@ def test_deepseek_v4_flash_config():
assert model["id"] == "deepseek-v4-flash"
assert model["display_name"] == "DeepSeek V4 Flash"
assert model["llm_config"]["model"] == "litellm_proxy/deepseek/deepseek-v4-flash"


def test_default_integration_model_ids_all_exist():
"""Every ID in DEFAULT_INTEGRATION_MODEL_IDS must exist in MODELS."""
for model_id in DEFAULT_INTEGRATION_MODEL_IDS:
assert model_id in MODELS, (
f"DEFAULT_INTEGRATION_MODEL_IDS contains '{model_id}' "
f"which is not in MODELS"
)
Loading