From 185fdfa6f565e67ef435e7cd9427d83d4858ac98 Mon Sep 17 00:00:00 2001
From: Simon Rosenberg <simonrosen10@gmail.com>
Date: Mon, 2 Mar 2026 17:24:05 -0300
Subject: [PATCH] Replace `uv run python` with `sys.executable` in eval scripts

Simpler, removes the hard dependency on uv being available at eval time,
and uses the correct Python interpreter in both uv and NeMo environments.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 benchmarks/multiswebench/eval_infer.py |  5 ++---
 benchmarks/swebench/eval_infer.py      |  6 +-----
 benchmarks/swtbench/eval_infer.py      | 19 +------------------
 3 files changed, 4 insertions(+), 26 deletions(-)

diff --git a/benchmarks/multiswebench/eval_infer.py b/benchmarks/multiswebench/eval_infer.py
index 6bbf6953d..43fc61bc5 100644
--- a/benchmarks/multiswebench/eval_infer.py
+++ b/benchmarks/multiswebench/eval_infer.py
@@ -12,6 +12,7 @@
 import argparse
 import shutil
 import subprocess
+import sys
 from pathlib import Path
 
 from benchmarks.multiswebench.download_dataset import download_and_concat_dataset
@@ -74,9 +75,7 @@ def run_multi_swebench_evaluation(
         logger.info("Running Multi-SWE-Bench evaluation harness...")
 
         cmd = [
-            "uv",
-            "run",
-            "python",
+            sys.executable,
             "-m",
             "multi_swe_bench.harness.run_evaluation",
             "--config",
diff --git a/benchmarks/swebench/eval_infer.py b/benchmarks/swebench/eval_infer.py
index 85862687d..ab52b26ec 100644
--- a/benchmarks/swebench/eval_infer.py
+++ b/benchmarks/swebench/eval_infer.py
@@ -143,12 +143,8 @@ def run_swebench_evaluation(
         predictions_dir = predictions_path.parent
         predictions_filename = predictions_path.name
 
-        # Run SWE-Bench evaluation using global python (not UV environment)
-        # since swebench is installed globally
         cmd = [
-            "uv",
-            "run",
-            "python",
+            sys.executable,
             "-m",
             "swebench.harness.run_evaluation",
             "--dataset_name",
diff --git a/benchmarks/swtbench/eval_infer.py b/benchmarks/swtbench/eval_infer.py
index e5bdc9b55..acf7a0eab 100644
--- a/benchmarks/swtbench/eval_infer.py
+++ b/benchmarks/swtbench/eval_infer.py
@@ -268,24 +268,7 @@ def run_swtbench_evaluation(
         shutil.copy2(predictions_file, swt_predictions_file)
 
         # Run SWT-Bench evaluation by running python directly from the swt-bench directory
-        # but using the uv environment's python executable which has all dependencies
-        benchmarks_dir = Path(__file__).parent.parent.parent
-
-        # Get the python executable from the uv environment
-        python_executable = subprocess.run(
-            [
-                "uv",
-                "run",
-                "--directory",
-                str(benchmarks_dir),
-                "python",
-                "-c",
-                "import sys; print(sys.executable)",
-            ],
-            capture_output=True,
-            text=True,
-            cwd=benchmarks_dir,
-        ).stdout.strip()
+        python_executable = sys.executable
 
         # Set up environment with PYTHONPATH to include swt-bench directory
         env = os.environ.copy()