From 185fdfa6f565e67ef435e7cd9427d83d4858ac98 Mon Sep 17 00:00:00 2001 From: Simon Rosenberg Date: Mon, 2 Mar 2026 17:24:05 -0300 Subject: [PATCH] Replace `uv run python` with `sys.executable` in eval scripts Simpler, removes the hard dependency on uv being available at eval time, and uses the correct Python interpreter in both uv and NeMo environments. Co-Authored-By: Claude Opus 4.6 --- benchmarks/multiswebench/eval_infer.py | 5 ++--- benchmarks/swebench/eval_infer.py | 6 +----- benchmarks/swtbench/eval_infer.py | 19 +------------------ 3 files changed, 4 insertions(+), 26 deletions(-) diff --git a/benchmarks/multiswebench/eval_infer.py b/benchmarks/multiswebench/eval_infer.py index 6bbf6953d..43fc61bc5 100644 --- a/benchmarks/multiswebench/eval_infer.py +++ b/benchmarks/multiswebench/eval_infer.py @@ -12,6 +12,7 @@ import argparse import shutil import subprocess +import sys from pathlib import Path from benchmarks.multiswebench.download_dataset import download_and_concat_dataset @@ -74,9 +75,7 @@ def run_multi_swebench_evaluation( logger.info("Running Multi-SWE-Bench evaluation harness...") cmd = [ - "uv", - "run", - "python", + sys.executable, "-m", "multi_swe_bench.harness.run_evaluation", "--config", diff --git a/benchmarks/swebench/eval_infer.py b/benchmarks/swebench/eval_infer.py index 85862687d..ab52b26ec 100644 --- a/benchmarks/swebench/eval_infer.py +++ b/benchmarks/swebench/eval_infer.py @@ -143,12 +143,8 @@ def run_swebench_evaluation( predictions_dir = predictions_path.parent predictions_filename = predictions_path.name - # Run SWE-Bench evaluation using global python (not UV environment) - # since swebench is installed globally cmd = [ - "uv", - "run", - "python", + sys.executable, "-m", "swebench.harness.run_evaluation", "--dataset_name", diff --git a/benchmarks/swtbench/eval_infer.py b/benchmarks/swtbench/eval_infer.py index e5bdc9b55..acf7a0eab 100644 --- a/benchmarks/swtbench/eval_infer.py +++ b/benchmarks/swtbench/eval_infer.py @@ -268,24 +268,7 @@ def run_swtbench_evaluation( shutil.copy2(predictions_file, swt_predictions_file) # Run SWT-Bench evaluation by running python directly from the swt-bench directory - # but using the uv environment's python executable which has all dependencies - benchmarks_dir = Path(__file__).parent.parent.parent - - # Get the python executable from the uv environment - python_executable = subprocess.run( - [ - "uv", - "run", - "--directory", - str(benchmarks_dir), - "python", - "-c", - "import sys; print(sys.executable)", - ], - capture_output=True, - text=True, - cwd=benchmarks_dir, - ).stdout.strip() + python_executable = sys.executable # Set up environment with PYTHONPATH to include swt-bench directory env = os.environ.copy()