From fa65497c50eb38c065b578acbfe29e6971c2b023 Mon Sep 17 00:00:00 2001 From: Antoine Richard Date: Thu, 28 May 2026 17:32:01 +0200 Subject: [PATCH 1/2] Add public v1.0 benchmark schema and recorder peak fields Promote the JSON bundle schema produced by the standalone benchmark scripts under scripts/benchmarks/ into a real public-API module, isaaclab.benchmark.schema. Until now there was no single place in lab that defined the shape of training.json / startup.json, even though three lab scripts emit it and downstream tooling (e.g. the in-tree Odin evaluation harness) is starting to consume it. The module ships frozen dataclasses for TrainingBundle, StartupBundle, and all their building blocks, plus a small write_bundle_file helper that serialises any dataclass tree as schema-v1 JSON. The package __init__ re-exports the public surface so callers can write `from isaaclab.benchmark import TrainingBundle`. This commit also extends GPUInfoRecorder and MemoryInfoRecorder to report per-device peak alongside the existing mean/std rows. The peak rows are always emitted (initialised to 0.0) so dashboards see a consistent key set regardless of whether any sample was recorded. Existing rows are unchanged. The benchmark scripts themselves continue to use the legacy output format on develop today; a follow-up PR rewrites them to emit schema-v1 bundles directly via this module. --- .../antoiner-feat-benchmark-schema.rst | 25 ++ .../isaaclab/isaaclab/benchmark/__init__.py | 66 +++++ source/isaaclab/isaaclab/benchmark/schema.py | 258 ++++++++++++++++++ .../benchmark/recorders/record_gpu_info.py | 28 +- .../benchmark/recorders/record_memory_info.py | 27 ++ .../isaaclab/test/benchmark/test_recorders.py | 150 +++++++++- source/isaaclab/test/benchmark/test_schema.py | 175 ++++++++++++ 7 files changed, 721 insertions(+), 8 deletions(-) create mode 100644 source/isaaclab/changelog.d/antoiner-feat-benchmark-schema.rst create mode 100644 source/isaaclab/isaaclab/benchmark/__init__.py create mode 100644 source/isaaclab/isaaclab/benchmark/schema.py create mode 100644 source/isaaclab/test/benchmark/test_schema.py diff --git a/source/isaaclab/changelog.d/antoiner-feat-benchmark-schema.rst b/source/isaaclab/changelog.d/antoiner-feat-benchmark-schema.rst new file mode 100644 index 000000000000..77cb9ec5c44e --- /dev/null +++ b/source/isaaclab/changelog.d/antoiner-feat-benchmark-schema.rst @@ -0,0 +1,25 @@ +Added +^^^^^ + +* Added :mod:`isaaclab.benchmark.schema`, the public v1.0 JSON schema for + benchmark bundles produced by the standalone scripts under + ``scripts/benchmarks/``. Exposes :class:`~isaaclab.benchmark.schema.TrainingBundle` + and :class:`~isaaclab.benchmark.schema.StartupBundle` plus the supporting + :class:`~isaaclab.benchmark.schema.Versions`, :class:`~isaaclab.benchmark.schema.Hardware`, + :class:`~isaaclab.benchmark.schema.Runtime`, :class:`~isaaclab.benchmark.schema.Resources`, + and :class:`~isaaclab.benchmark.schema.Learning` records, along with + :func:`~isaaclab.benchmark.schema.write_bundle_file` for emitting + schema-compliant JSON. The package root re-exports the same surface so + ``from isaaclab.benchmark import TrainingBundle`` works. + +Changed +^^^^^^^ + +* Extended :class:`~isaaclab.test.benchmark.recorders.GPUInfoRecorder` and the + system memory recorder to also report per-device **peak** memory and + utilisation alongside the existing mean/std rows. Existing rows are + unchanged; new rows are ``"Memory Used peak"``, ``"Utilization peak"``, + ``"System Memory RSS peak"``, ``"System Memory VMS peak"``, and + ``"System Memory USS peak"``. The peak rows are always emitted (initialised + to ``0.0``) so downstream consumers see consistent keys regardless of + whether any sample was recorded. diff --git a/source/isaaclab/isaaclab/benchmark/__init__.py b/source/isaaclab/isaaclab/benchmark/__init__.py new file mode 100644 index 000000000000..a1d4f4e1e5e4 --- /dev/null +++ b/source/isaaclab/isaaclab/benchmark/__init__.py @@ -0,0 +1,66 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Public benchmark-bundle schema for Isaac Lab. + +The standalone benchmark scripts under ``scripts/benchmarks/`` emit +self-contained JSON bundles described by the v1.0 schema in +:mod:`isaaclab.benchmark.schema`. Importing from the package root works for +the common types:: + + from isaaclab.benchmark import TrainingBundle, StartupBundle, write_bundle_file + +See :mod:`isaaclab.benchmark.schema` for the full set of dataclasses. +""" + +from .schema import ( + SCHEMA_VERSION, + Backend, + CProfileFunction, + Framework, + GpuDeviceInfo, + Hardware, + Learning, + LearningCurve, + MeanStd, + MeanStdPeak, + Resources, + RunIdentity, + RunStatus, + Runtime, + StartupBundle, + StartupConfig, + StartupPhase, + StartupPhaseTimes, + StartupRunIdentity, + TrainingBundle, + Versions, + write_bundle_file, +) + +__all__ = [ + "SCHEMA_VERSION", + "Backend", + "CProfileFunction", + "Framework", + "GpuDeviceInfo", + "Hardware", + "Learning", + "LearningCurve", + "MeanStd", + "MeanStdPeak", + "Resources", + "RunIdentity", + "RunStatus", + "Runtime", + "StartupBundle", + "StartupConfig", + "StartupPhase", + "StartupPhaseTimes", + "StartupRunIdentity", + "TrainingBundle", + "Versions", + "write_bundle_file", +] diff --git a/source/isaaclab/isaaclab/benchmark/schema.py b/source/isaaclab/isaaclab/benchmark/schema.py new file mode 100644 index 000000000000..c961edc644c9 --- /dev/null +++ b/source/isaaclab/isaaclab/benchmark/schema.py @@ -0,0 +1,258 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Public schema for Isaac Lab benchmark bundles (v1.0). + +Defines the on-disk JSON schema produced by the standalone benchmark scripts +under ``scripts/benchmarks/``: ``benchmark_startup.py``, ``benchmark_rsl_rl.py``, +and ``benchmark_skrl.py``. Producers populate a :class:`TrainingBundle` or +:class:`StartupBundle` and call :func:`write_bundle_file` to emit +schema-compliant JSON. Consumers (dashboards, regression-comparison tools, +the in-tree Odin evaluation harness under ``tools/odin/``) read the same file +and reconstruct the dataclasses. + +Each bundle is self-contained: every top-level bundle carries its own +:class:`Versions` and :class:`Hardware` metadata so a reader need not +cross-reference other files in the bundle directory. + +Current version: 1.0 +""" + +from __future__ import annotations + +import dataclasses +import json +import os +from dataclasses import dataclass +from typing import Any, Literal + +SCHEMA_VERSION = "1.0" + +Framework = Literal["rsl_rl", "skrl"] +Backend = Literal["physx", "newton"] +RunStatus = Literal["completed", "interrupted", "crashed"] + + +@dataclass(frozen=True) +class MeanStd: + """Scalar with mean and standard deviation.""" + + mean: float + std: float + + +@dataclass(frozen=True) +class MeanStdPeak: + """Scalar with mean, standard deviation, and peak.""" + + mean: float + std: float + peak: float + + +@dataclass(frozen=True) +class GpuDeviceInfo: + """Information about a single GPU device.""" + + name: str + mem_gb: float + compute_cap: str + + +@dataclass(frozen=True) +class Hardware: + """Host hardware snapshot captured at run time.""" + + hostname: str + gpu_devices: list[GpuDeviceInfo] + cpu_name: str + cpu_count: int + ram_gb: float + + +@dataclass(frozen=True) +class Versions: + """Software versions captured at run time. + + Framework-specific fields (``rsl_rl``, ``skrl``) are ``None`` when the + corresponding framework is not used by the run. + """ + + isaaclab: str + isaacsim: str | None + kit: str | None + newton: str | None + warp: str | None + mjwarp: str | None + torch: str + rsl_rl: str | None + skrl: str | None + git_commit: str | None + git_branch: str | None + git_dirty: bool + + +@dataclass(frozen=True) +class RunIdentity: + """Identity of a training run.""" + + run_id: str + framework: Framework + backend: Backend + task: str + seed: int + num_envs: int + max_iterations: int + start_time_utc: str + end_time_utc: str + duration_s: float + status: RunStatus + + +@dataclass(frozen=True) +class StartupPhaseTimes: + """Wall-clock duration of each startup phase [s].""" + + app_launch: float + env_creation: float + first_step: float + python_imports: float | None = None + task_config: float | None = None + + +@dataclass(frozen=True) +class Runtime: + """Aggregated runtime metrics for a training run.""" + + startup_phase_times_s: StartupPhaseTimes + iterations_completed: int + total_wall_time_s: float + steps_per_iteration: int + iteration_time_s: MeanStd + env_steps_per_s: MeanStd + iterations_per_s: MeanStd + + +@dataclass(frozen=True) +class Resources: + """Aggregated resource utilisation metrics for a training run.""" + + gpu_util_pct: MeanStd + gpu_mem_gb: MeanStdPeak + cpu_util_pct: MeanStd + ram_gb: MeanStdPeak + + +@dataclass(frozen=True) +class LearningCurve: + """One learning curve (reward or episode length).""" + + final_raw: float + final_ema: float + series_per_iter: list[float] | None + + +@dataclass(frozen=True) +class Learning: + """Learning curves for a training run, plus their EMA smoothing factor.""" + + ema_alpha: float + reward: LearningCurve + ep_length: LearningCurve + + +@dataclass(frozen=True) +class TrainingBundle: + """Top-level shape of ``training.json``.""" + + run: RunIdentity + versions: Versions + hardware: Hardware + runtime: Runtime + resources: Resources + learning: Learning + schema_version: str = SCHEMA_VERSION + + +@dataclass(frozen=True) +class CProfileFunction: + """One entry from a cProfile top-N table.""" + + name: str + own_time_s: float + cum_time_s: float + calls: int + + +@dataclass(frozen=True) +class StartupPhase: + """Wall-clock total plus top cProfile functions for one startup phase.""" + + total_time_s: float + top_functions: list[CProfileFunction] + + +@dataclass(frozen=True) +class StartupConfig: + """CLI configuration captured in a :class:`StartupBundle`.""" + + top_n: int + whitelist: str | None + + +@dataclass(frozen=True) +class StartupRunIdentity: + """Startup runs omit ``num_envs`` / ``max_iterations`` (not meaningful).""" + + run_id: str + framework: Framework + backend: Backend + task: str + seed: int + start_time_utc: str + end_time_utc: str + duration_s: float + status: RunStatus + + +@dataclass(frozen=True) +class StartupBundle: + """Top-level shape of ``startup.json``.""" + + run: StartupRunIdentity + versions: Versions + hardware: Hardware + phases: dict[str, StartupPhase] + config: StartupConfig + schema_version: str = SCHEMA_VERSION + + +def _to_plain(obj: Any) -> Any: + """Recursively convert dataclass instances to plain dicts/lists.""" + if dataclasses.is_dataclass(obj) and not isinstance(obj, type): + return {f.name: _to_plain(getattr(obj, f.name)) for f in dataclasses.fields(obj)} + if isinstance(obj, list): + return [_to_plain(x) for x in obj] + if isinstance(obj, dict): + return {k: _to_plain(v) for k, v in obj.items()} + return obj + + +def write_bundle_file(bundle: Any, path: str) -> None: + """Write a bundle dataclass to disk as schema-v1 JSON. + + Creates the parent directory if missing. Uses ``indent=2`` for readability; + payloads are small (~10 KB training.json, ~50 KB startup.json). + + Args: + bundle: A dataclass instance to serialise. Typically + :class:`TrainingBundle` or :class:`StartupBundle`; any frozen + dataclass tree composed of primitives, lists, and dicts works. + path: Output file path. + """ + os.makedirs(os.path.dirname(os.path.abspath(path)) or ".", exist_ok=True) + with open(path, "w") as f: + json.dump(_to_plain(bundle), f, indent=2, sort_keys=False) + f.write("\n") diff --git a/source/isaaclab/isaaclab/test/benchmark/recorders/record_gpu_info.py b/source/isaaclab/isaaclab/test/benchmark/recorders/record_gpu_info.py index 8ef734b53885..7e920cca42f6 100644 --- a/source/isaaclab/isaaclab/test/benchmark/recorders/record_gpu_info.py +++ b/source/isaaclab/isaaclab/test/benchmark/recorders/record_gpu_info.py @@ -37,6 +37,10 @@ def __init__(self): self._util_n = [] self._util_m2 = [] + # Per-device peak (running max) for memory (bytes) and utilization (%) + self._mem_peak = [] + self._util_peak = [] + # pynvml device handles (one per GPU) self._handles = [] self._nvml_available = False @@ -75,6 +79,9 @@ def _get_hardware_info(self) -> None: self._util_std.append(0) self._util_n.append(0) self._util_m2.append(0) + # Peak state (running max) + self._mem_peak.append(0.0) + self._util_peak.append(0.0) # CUDA version with contextlib.suppress(Exception): @@ -163,9 +170,11 @@ def _get_runtime_info(self) -> None: self._mem_m2[i] += delta * delta2 if self._mem_n[i] > 1: self._mem_std[i] = math.sqrt(self._mem_m2[i] / (self._mem_n[i] - 1)) + self._mem_peak[i] = max(self._mem_peak[i], float(memory_bytes)) self._gpu_runtime_info["devices"][i]["memory_used_mean_bytes"] = self._mem_mean[i] self._gpu_runtime_info["devices"][i]["memory_used_std_bytes"] = self._mem_std[i] + self._gpu_runtime_info["devices"][i]["memory_used_peak_bytes"] = self._mem_peak[i] self._gpu_runtime_info["devices"][i]["memory_n"] = self._mem_n[i] # GPU utilization from pynvml or nvidia-smi fallback @@ -189,9 +198,11 @@ def _get_runtime_info(self) -> None: self._util_m2[i] += delta * delta2 if self._util_n[i] > 1: self._util_std[i] = math.sqrt(self._util_m2[i] / (self._util_n[i] - 1)) + self._util_peak[i] = max(self._util_peak[i], float(gpu_util)) self._gpu_runtime_info["devices"][i]["utilization_mean_percent"] = self._util_mean[i] self._gpu_runtime_info["devices"][i]["utilization_std_percent"] = self._util_std[i] + self._gpu_runtime_info["devices"][i]["utilization_peak_percent"] = self._util_peak[i] self._gpu_runtime_info["devices"][i]["utilization_n"] = self._util_n[i] def update(self) -> None: @@ -252,7 +263,7 @@ def get_data(self) -> MeasurementData: runtime = device_runtime[i] prefix = f"GPU {i} " if self._device_count > 1 else "GPU " - # Memory used + # Memory used (mean/std/n only when updates have been recorded) if "memory_used_mean_bytes" in runtime: measurements.append( SingleMeasurement( @@ -275,6 +286,14 @@ def get_data(self) -> MeasurementData: unit="", ) ) + # Peak is always emitted (initialised to 0.0, rises on first update) + measurements.append( + SingleMeasurement( + name=f"{prefix}Memory Used peak", + value=self._bytes_to_gb(self._mem_peak[i]), + unit="GB", + ) + ) # GPU Utilization if "utilization_mean_percent" in runtime: @@ -292,6 +311,13 @@ def get_data(self) -> MeasurementData: unit="%", ) ) + measurements.append( + SingleMeasurement( + name=f"{prefix}Utilization peak", + value=round(runtime.get("utilization_peak_percent", 0), 2), + unit="%", + ) + ) measurements.append( SingleMeasurement( name=f"{prefix}Utilization n", diff --git a/source/isaaclab/isaaclab/test/benchmark/recorders/record_memory_info.py b/source/isaaclab/isaaclab/test/benchmark/recorders/record_memory_info.py index 8ad0f54304e3..8e0a786721ea 100644 --- a/source/isaaclab/isaaclab/test/benchmark/recorders/record_memory_info.py +++ b/source/isaaclab/isaaclab/test/benchmark/recorders/record_memory_info.py @@ -33,6 +33,12 @@ def __init__(self): self._uss_m2 = 0 self._uss_n = 0 + # Peak (running max) alongside the Welford mean/std. Initialised to + # 0.0 so emit-before-record returns a meaningful zero. + self._rss_peak = 0.0 + self._vms_peak = 0.0 + self._uss_peak = 0.0 + # Process handle self._process = psutil.Process(os.getpid()) self._get_hardware_info() @@ -62,17 +68,21 @@ def _get_runtime_info(self) -> None: self._rss_mean, self._rss_m2, self._rss_n, rss_std = self._update_welford( mem_info.rss, self._rss_mean, self._rss_m2, self._rss_n ) + self._rss_peak = max(self._rss_peak, float(mem_info.rss)) self._memory_runtime_info["rss_mean"] = self._rss_mean self._memory_runtime_info["rss_std"] = rss_std self._memory_runtime_info["rss_n"] = self._rss_n + self._memory_runtime_info["rss_peak"] = self._rss_peak # VMS (Virtual Memory Size) - total virtual memory self._vms_mean, self._vms_m2, self._vms_n, vms_std = self._update_welford( mem_info.vms, self._vms_mean, self._vms_m2, self._vms_n ) + self._vms_peak = max(self._vms_peak, float(mem_info.vms)) self._memory_runtime_info["vms_mean"] = self._vms_mean self._memory_runtime_info["vms_std"] = vms_std self._memory_runtime_info["vms_n"] = self._vms_n + self._memory_runtime_info["vms_peak"] = self._vms_peak # USS (Unique Set Size) - memory unique to process (not shared) try: @@ -80,9 +90,11 @@ def _get_runtime_info(self) -> None: self._uss_mean, self._uss_m2, self._uss_n, uss_std = self._update_welford( uss, self._uss_mean, self._uss_m2, self._uss_n ) + self._uss_peak = max(self._uss_peak, float(uss)) self._memory_runtime_info["uss_mean"] = self._uss_mean self._memory_runtime_info["uss_std"] = uss_std self._memory_runtime_info["uss_n"] = self._uss_n + self._memory_runtime_info["uss_peak"] = self._uss_peak except (psutil.AccessDenied, AttributeError): # USS may not be available on all platforms pass @@ -117,6 +129,11 @@ def get_data(self) -> MeasurementData: value=self._bytes_to_gb(self._memory_runtime_info.get("rss_std", 0)), unit="GB", ), + SingleMeasurement( + name="System Memory RSS peak", + value=self._bytes_to_gb(self._memory_runtime_info.get("rss_peak", 0)), + unit="GB", + ), SingleMeasurement(name="System Memory RSS n", value=self._memory_runtime_info.get("rss_n", 0), unit=""), # VMS (Virtual Memory Size) SingleMeasurement( @@ -129,6 +146,11 @@ def get_data(self) -> MeasurementData: value=self._bytes_to_gb(self._memory_runtime_info.get("vms_std", 0)), unit="GB", ), + SingleMeasurement( + name="System Memory VMS peak", + value=self._bytes_to_gb(self._memory_runtime_info.get("vms_peak", 0)), + unit="GB", + ), SingleMeasurement(name="System Memory VMS n", value=self._memory_runtime_info.get("vms_n", 0), unit=""), ] @@ -146,6 +168,11 @@ def get_data(self) -> MeasurementData: value=self._bytes_to_gb(self._memory_runtime_info.get("uss_std", 0)), unit="GB", ), + SingleMeasurement( + name="System Memory USS peak", + value=self._bytes_to_gb(self._memory_runtime_info.get("uss_peak", 0)), + unit="GB", + ), SingleMeasurement( name="System Memory USS n", value=self._memory_runtime_info.get("uss_n", 0), unit="" ), diff --git a/source/isaaclab/test/benchmark/test_recorders.py b/source/isaaclab/test/benchmark/test_recorders.py index fc519bb051d8..0cdb5f556b4c 100644 --- a/source/isaaclab/test/benchmark/test_recorders.py +++ b/source/isaaclab/test/benchmark/test_recorders.py @@ -236,10 +236,12 @@ def test_get_data_returns_measurement_data(self, recorder): measurement_data = recorder.get_data() assert isinstance(measurement_data, MeasurementData) - # GPU data includes measurements (memory and utilization stats) - # 6 measurements per GPU: memory (mean, std, n) + utilization (mean, std, n) + # GPU data includes measurements per GPU: + # memory: mean, std, n + peak (always) = 4 + # utilization: mean, std, peak, n (when nvml/smi available) = 4 + # Total is 4 (memory-only) or 8 (memory + utilization) per GPU. num_gpus = data["gpu_metadata"]["device_count"] - assert len(measurement_data.measurements) == 6 * num_gpus + assert len(measurement_data.measurements) in (4 * num_gpus, 8 * num_gpus) # 4 metadata entries: device_count, current_device, cuda_version, gpu_devices dict assert len(measurement_data.metadata) == 4 @@ -287,6 +289,69 @@ def test_get_data_devices_dict_structure(self, recorder): assert "compute_capability" in device_0 assert "multi_processor_count" in device_0 + def test_mem_peak_is_zero_before_any_record(self, monkeypatch): + """Peak memory row for device 0 exists and is 0.0 before any update.""" + import torch + + from isaaclab.test.benchmark.recorders.record_gpu_info import GPUInfoRecorder + + monkeypatch.setattr(torch.cuda, "is_available", lambda: True) + monkeypatch.setattr(torch.cuda, "device_count", lambda: 1) + monkeypatch.setattr(torch.cuda, "current_device", lambda: 0) + + class _FakeProps: + name = "FakeGPU" + total_memory = 80 * 1024**3 + major = 9 + minor = 0 + multi_processor_count = 132 + + monkeypatch.setattr(torch.cuda, "get_device_properties", lambda i: _FakeProps()) + + rec = GPUInfoRecorder() + data = rec.get_data() + peaks = [m for m in data.measurements if "peak" in m.name.lower() and "GPU" in m.name] + # At minimum there should be a GPU memory peak row for device 0. + mem_peak_rows = [m for m in peaks if "Memory" in m.name] + assert mem_peak_rows, f"expected a GPU memory peak row, got names: {[m.name for m in data.measurements]}" + assert mem_peak_rows[0].value == 0.0 + + def test_mem_peak_tracks_running_max(self, monkeypatch): + """Feed the recorder a scripted memory sequence; peak must match the max.""" + import torch + + from isaaclab.test.benchmark.recorders.record_gpu_info import GPUInfoRecorder + + monkeypatch.setattr(torch.cuda, "is_available", lambda: True) + monkeypatch.setattr(torch.cuda, "device_count", lambda: 1) + monkeypatch.setattr(torch.cuda, "current_device", lambda: 0) + + class _FakeProps: + name = "FakeGPU" + total_memory = 80 * 1024**3 + major = 9 + minor = 0 + multi_processor_count = 132 + + monkeypatch.setattr(torch.cuda, "get_device_properties", lambda i: _FakeProps()) + + rec = GPUInfoRecorder() + + # Bypass nvml / nvidia-smi entirely and drive memory_allocated. + scripted_mem = iter([10 * 1024**3, 50 * 1024**3, 30 * 1024**3]) # 10 GB, 50 GB, 30 GB + monkeypatch.setattr(torch.cuda, "memory_allocated", lambda i: next(scripted_mem)) + rec._nvml_available = False + rec._nvidia_smi_available = False + + for _ in range(3): + rec.update() + + data = rec.get_data() + mem_peak_rows = [m for m in data.measurements if "Memory" in m.name and "peak" in m.name.lower()] + assert mem_peak_rows, "expected a GPU memory peak row" + # 50 GB is the max. + assert mem_peak_rows[0].value == 50.0, f"expected 50.0 GB peak, got {mem_peak_rows[0].value}" + # ============================================================================== # MemoryInfoRecorder Tests @@ -392,10 +457,10 @@ def test_get_data_returns_measurement_data(self, recorder): data = recorder.get_data() assert isinstance(data, MeasurementData) - # 6 measurements for RSS and VMS (mean, std, n for each) - # Plus potentially 3 more for USS if available (mean, std, n) - assert len(data.measurements) >= 6 - assert len(data.measurements) <= 9 + # 8 measurements for RSS and VMS (mean, std, peak, n for each) + # Plus potentially 4 more for USS if available (mean, std, peak, n) + assert len(data.measurements) >= 8 + assert len(data.measurements) <= 12 assert len(data.metadata) == 1 def test_get_data_measurement_names(self, recorder): @@ -423,6 +488,77 @@ def test_get_data_metadata_names(self, recorder): names = [m.name for m in data.metadata] assert "total_ram_gb" in names + def test_rss_peak_is_zero_before_any_record(self): + """Test that RSS peak is 0.0 before any update has been called.""" + from isaaclab.test.benchmark.recorders.record_memory_info import MemoryInfoRecorder + + rec = MemoryInfoRecorder() + data = rec.get_data() + peak_rows = [m for m in data.measurements if m.name == "System Memory RSS peak"] + assert peak_rows, "expected a 'System Memory RSS peak' SingleMeasurement" + assert peak_rows[0].value == 0.0 + + def test_rss_peak_tracks_running_max(self, monkeypatch): + """Test that RSS peak tracks the running maximum across updates.""" + import psutil + + from isaaclab.test.benchmark.recorders.record_memory_info import MemoryInfoRecorder + + # Scripted RSS sequence; peak must equal the max seen so far. + scripted_values = [100 * 1024**3, 200 * 1024**3, 150 * 1024**3] # bytes + scripted_iter = iter(scripted_values) + + class _FakeMemInfo: + def __init__(self, rss): + self.rss = rss + self.vms = rss # mirror so VMS also moves + # USS is read via memory_full_info, not memory_info; leave alone. + + def _fake_memory_info(self): # noqa: ARG001 — bound method, self is the process + return _FakeMemInfo(next(scripted_iter)) + + monkeypatch.setattr(psutil.Process, "memory_info", _fake_memory_info) + + rec = MemoryInfoRecorder() + for _ in scripted_values: + rec.update() + + data = rec.get_data() + rss_peak = next(m for m in data.measurements if m.name == "System Memory RSS peak") + # The recorder emits GB; input was in bytes. 200 GiB -> 200.0 after rounding. + assert rss_peak.value == 200.0, f"expected peak=200.0 GB, got {rss_peak.value}" + + vms_peak = next(m for m in data.measurements if m.name == "System Memory VMS peak") + assert vms_peak.value == 200.0 + + def test_rss_peak_does_not_decrease(self, monkeypatch): + """Test that RSS peak does not decrease when memory usage drops.""" + import psutil + + from isaaclab.test.benchmark.recorders.record_memory_info import MemoryInfoRecorder + + # Decreasing sequence — peak is set by the first sample and then stays. + scripted_values = [300 * 1024**3, 50 * 1024**3, 25 * 1024**3] + scripted_iter = iter(scripted_values) + + class _FakeMemInfo: + def __init__(self, rss): + self.rss = rss + self.vms = rss + + def _fake_memory_info(self): # noqa: ARG001 + return _FakeMemInfo(next(scripted_iter)) + + monkeypatch.setattr(psutil.Process, "memory_info", _fake_memory_info) + + rec = MemoryInfoRecorder() + for _ in scripted_values: + rec.update() + + data = rec.get_data() + rss_peak = next(m for m in data.measurements if m.name == "System Memory RSS peak") + assert rss_peak.value == 300.0 + # ============================================================================== # VersionInfoRecorder Tests diff --git a/source/isaaclab/test/benchmark/test_schema.py b/source/isaaclab/test/benchmark/test_schema.py new file mode 100644 index 000000000000..2d17975a44a7 --- /dev/null +++ b/source/isaaclab/test/benchmark/test_schema.py @@ -0,0 +1,175 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Tests for the v1.0 Isaac Lab benchmark schema.""" + +import dataclasses +import json +import os + +import pytest + +from isaaclab.benchmark.schema import ( + SCHEMA_VERSION, + CProfileFunction, + GpuDeviceInfo, + Hardware, + Learning, + LearningCurve, + MeanStd, + MeanStdPeak, + Resources, + RunIdentity, + Runtime, + StartupBundle, + StartupConfig, + StartupPhase, + StartupPhaseTimes, + StartupRunIdentity, + TrainingBundle, + Versions, + write_bundle_file, +) + + +def _minimal_training_bundle() -> TrainingBundle: + """Construct a valid TrainingBundle with placeholder numeric values.""" + return TrainingBundle( + run=RunIdentity( + run_id="rsl-rl_physx_Isaac-Ant-Direct-v0_20260422-131500_seed42", + framework="rsl_rl", + backend="physx", + task="Isaac-Ant-Direct-v0", + seed=42, + num_envs=4096, + max_iterations=500, + start_time_utc="2026-04-22T13:15:00Z", + end_time_utc="2026-04-22T13:47:22Z", + duration_s=1942.1, + status="completed", + ), + versions=Versions( + isaaclab="4.6.8", + isaacsim="5.0.0", + kit="107.1.0", + newton="0.1.2", + warp="1.7.3", + mjwarp="0.0.4", + torch="2.5.1", + rsl_rl="2.3.0", + skrl=None, + git_commit="3d42b11d513", + git_branch="develop", + git_dirty=False, + ), + hardware=Hardware( + hostname="benchmark-host", + gpu_devices=[GpuDeviceInfo(name="NVIDIA H100 80GB", mem_gb=80.0, compute_cap="9.0")], + cpu_name="AMD EPYC 7763", + cpu_count=64, + ram_gb=512.0, + ), + runtime=Runtime( + startup_phase_times_s=StartupPhaseTimes(app_launch=18.4, env_creation=22.9, first_step=4.1), + iterations_completed=500, + total_wall_time_s=1946.0, + steps_per_iteration=24, + iteration_time_s=MeanStd(mean=3.82, std=0.04), + env_steps_per_s=MeanStd(mean=1_071_780.0, std=11_200.0), + iterations_per_s=MeanStd(mean=0.2618, std=0.0028), + ), + resources=Resources( + gpu_util_pct=MeanStd(mean=87.2, std=6.1), + gpu_mem_gb=MeanStdPeak(mean=18.4, std=0.3, peak=19.2), + cpu_util_pct=MeanStd(mean=31.5, std=4.8), + ram_gb=MeanStdPeak(mean=22.1, std=0.4, peak=24.8), + ), + learning=Learning( + ema_alpha=0.05, + reward=LearningCurve(final_raw=1823.4, final_ema=1796.1, series_per_iter=[12.3, 34.5, 58.1]), + ep_length=LearningCurve(final_raw=987.0, final_ema=962.3, series_per_iter=[4.1, 5.0, 7.2]), + ), + ) + + +def test_training_bundle_round_trip(tmp_path): + """Writing a TrainingBundle and reloading via json gives back identical data.""" + bundle = _minimal_training_bundle() + path = os.path.join(tmp_path, "training.json") + write_bundle_file(bundle, path) + + with open(path) as f: + data = json.load(f) + + assert data["schema_version"] == SCHEMA_VERSION + assert data["run"]["run_id"] == bundle.run.run_id + assert data["runtime"]["env_steps_per_s"]["mean"] == pytest.approx(1_071_780.0) + assert data["resources"]["ram_gb"]["peak"] == pytest.approx(24.8) + assert data["learning"]["reward"]["series_per_iter"] == [12.3, 34.5, 58.1] + assert data["versions"]["skrl"] is None + + +def test_training_bundle_without_series(tmp_path): + """With series_per_iter=None, the JSON contains an explicit null.""" + bundle = _minimal_training_bundle() + bundle_no_series = dataclasses.replace( + bundle, + learning=Learning( + ema_alpha=0.05, + reward=LearningCurve(final_raw=1.0, final_ema=1.0, series_per_iter=None), + ep_length=LearningCurve(final_raw=1.0, final_ema=1.0, series_per_iter=None), + ), + ) + path = os.path.join(tmp_path, "training.json") + write_bundle_file(bundle_no_series, path) + with open(path) as f: + data = json.load(f) + assert data["learning"]["reward"]["series_per_iter"] is None + assert data["learning"]["ep_length"]["series_per_iter"] is None + + +def test_startup_bundle_round_trip(tmp_path): + """StartupBundle round-trips with phase dict and top-function lists.""" + bundle = StartupBundle( + run=StartupRunIdentity( + run_id="rsl-rl_physx_Isaac-Ant-Direct-v0_20260422-131500_seed42", + framework="rsl_rl", + backend="physx", + task="Isaac-Ant-Direct-v0", + seed=42, + start_time_utc="2026-04-22T13:15:00Z", + end_time_utc="2026-04-22T13:15:48Z", + duration_s=48.7, + status="completed", + ), + versions=_minimal_training_bundle().versions, + hardware=_minimal_training_bundle().hardware, + phases={ + "app_launch": StartupPhase( + total_time_s=18.4, + top_functions=[CProfileFunction(name="isaaclab.x:y", own_time_s=1.8, cum_time_s=2.4, calls=4312)], + ), + "env_creation": StartupPhase(total_time_s=22.9, top_functions=[]), + "first_step": StartupPhase(total_time_s=4.1, top_functions=[]), + }, + config=StartupConfig(top_n=30, whitelist="startup_whitelist.yaml"), + ) + path = os.path.join(tmp_path, "startup.json") + write_bundle_file(bundle, path) + with open(path) as f: + data = json.load(f) + assert data["phases"]["app_launch"]["total_time_s"] == pytest.approx(18.4) + assert data["phases"]["app_launch"]["top_functions"][0]["calls"] == 4312 + + +def test_package_reexports_match_schema_module(): + """`from isaaclab.benchmark import ...` resolves to the same objects as + `from isaaclab.benchmark.schema import ...`. Keeps the convenience + namespace honest if someone forgets to update __all__.""" + import isaaclab.benchmark as pkg + from isaaclab.benchmark import schema + + for name in pkg.__all__: + assert getattr(pkg, name) is getattr(schema, name), name From d9d07a207b861676a342136383d0aab66746f490 Mon Sep 17 00:00:00 2001 From: Antoine Richard Date: Thu, 28 May 2026 17:49:17 +0200 Subject: [PATCH 2/2] Emit v1.0 schema bundles from benchmark scripts Wire the three standalone benchmark scripts under scripts/benchmarks/ to emit self-contained JSON bundles conforming to the v1.0 schema added in the previous commit (isaaclab.benchmark.schema): - benchmark_startup.py now optionally writes a StartupBundle to the path given by --schema_v1_output, with per-phase cProfile top-N data and total durations. - benchmark_rsl_rl.py now optionally writes a TrainingBundle with the run identity, captured versions/hardware, aggregated runtime and resource metrics, and EMA-smoothed reward / episode-length curves. The EMA factor is configurable via --ema_alpha; --no_series drops the full per-iteration curves and keeps only the scalars. - benchmark_skrl.py is new: a SKRL-framework counterpart that emits the same TrainingBundle with framework set to "skrl". Pairs with a small skrl_benchmark_trainer subclass that exposes per-iteration reward / episode-length values to the script without touching upstream skrl. The legacy per-backend output format remains the default when --schema_v1_output is omitted, so existing CI and ad-hoc invocations keep working unchanged. Shared helpers (_action_sampling.sample_random_actions to keep single-agent + multi-agent benchmark startup working, _schema_helpers to build Versions/Hardware from the recorder metadata and synthesise a fallback run_id) live alongside the scripts. utils.parse_cprofile_stats now returns ncalls as a fourth tuple element so the schema's CProfileFunction.calls field can be populated. Updated startup_whitelist.yaml to track the IsaacLab v3 configclass / cloner / scene-init call paths and explicitly fall through to top_n for python_imports and first_step (per file comments). Added scripts/benchmarks/tests/ covering the new helpers and CLI surfaces, plus source/isaaclab/test/benchmark/test_parse_cprofile_stats.py for the ncalls extension. Added docs/source/features/benchmarking.rst documenting the scripts and the schema. --- docs/index.rst | 1 + docs/source/features/benchmarking.rst | 110 ++++ scripts/benchmarks/_action_sampling.py | 59 ++ scripts/benchmarks/_schema_helpers.py | 83 +++ scripts/benchmarks/benchmark_rsl_rl.py | 350 ++++++++++- scripts/benchmarks/benchmark_skrl.py | 562 ++++++++++++++++++ scripts/benchmarks/benchmark_startup.py | 140 ++++- scripts/benchmarks/skrl_benchmark_trainer.py | 144 +++++ scripts/benchmarks/startup_whitelist.yaml | 47 +- scripts/benchmarks/tests/__init__.py | 4 + .../benchmarks/tests/test_action_sampling.py | 169 ++++++ .../tests/test_benchmark_rsl_rl_cli.py | 201 +++++++ .../tests/test_benchmark_skrl_cli.py | 234 ++++++++ .../tests/test_skrl_benchmark_trainer.py | 232 ++++++++ scripts/benchmarks/utils.py | 26 +- .../antoiner-feat-benchmark-scripts-v1.rst | 26 + .../benchmark/test_parse_cprofile_stats.py | 95 +++ 17 files changed, 2436 insertions(+), 47 deletions(-) create mode 100644 docs/source/features/benchmarking.rst create mode 100644 scripts/benchmarks/_action_sampling.py create mode 100644 scripts/benchmarks/_schema_helpers.py create mode 100644 scripts/benchmarks/benchmark_skrl.py create mode 100644 scripts/benchmarks/skrl_benchmark_trainer.py create mode 100644 scripts/benchmarks/tests/__init__.py create mode 100644 scripts/benchmarks/tests/test_action_sampling.py create mode 100644 scripts/benchmarks/tests/test_benchmark_rsl_rl_cli.py create mode 100644 scripts/benchmarks/tests/test_benchmark_skrl_cli.py create mode 100644 scripts/benchmarks/tests/test_skrl_benchmark_trainer.py create mode 100644 source/isaaclab/changelog.d/antoiner-feat-benchmark-scripts-v1.rst create mode 100644 source/isaaclab/test/benchmark/test_parse_cprofile_stats.py diff --git a/docs/index.rst b/docs/index.rst index 51da7b86d9ff..f5c41742a9dd 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -134,6 +134,7 @@ Table of Contents Tiled Rendering source/features/ray source/features/reproducibility + source/features/benchmarking .. toctree:: diff --git a/docs/source/features/benchmarking.rst b/docs/source/features/benchmarking.rst new file mode 100644 index 000000000000..e914c88c6298 --- /dev/null +++ b/docs/source/features/benchmarking.rst @@ -0,0 +1,110 @@ +Benchmarking +============ + +Isaac Lab ships three standalone benchmark scripts that emit a common +``v1.0`` JSON schema for training-performance and startup-performance data. +The schema is defined in :mod:`isaaclab.benchmark.schema`, and the scripts +are fully usable standalone — any tool that can read JSON can consume the +output. + +.. contents:: + :local: + :depth: 2 + + +Scripts +------- + +``benchmark_startup.py`` +~~~~~~~~~~~~~~~~~~~~~~~~ + +Profiles five IsaacLab startup phases with ``cProfile``: ``app_launch``, +``python_imports``, ``task_config``, ``env_creation``, and ``first_step``. For +each phase it records wall-clock time and the top N self-time functions. + +.. code-block:: bash + + ./isaaclab.sh -p scripts/benchmarks/benchmark_startup.py \ + --task Isaac-Ant-Direct-v0 --num_envs 4096 --headless \ + --schema_v1_output /tmp/startup.json + +``benchmark_rsl_rl.py`` +~~~~~~~~~~~~~~~~~~~~~~~ + +Trains a task with the RSL-RL PPO agent and records runtime / resource / +learning metrics, including exponentially-smoothed reward and episode-length +curves. + +.. code-block:: bash + + ./isaaclab.sh -p scripts/benchmarks/benchmark_rsl_rl.py \ + --task Isaac-Ant-Direct-v0 --num_envs 4096 \ + --max_iterations 500 --headless \ + --schema_v1_output /tmp/training.json + +``benchmark_skrl.py`` +~~~~~~~~~~~~~~~~~~~~~ + +The SKRL-framework counterpart to ``benchmark_rsl_rl.py``. Emits the same +schema with ``framework: "skrl"``. + +.. code-block:: bash + + ./isaaclab.sh -p scripts/benchmarks/benchmark_skrl.py \ + --task Isaac-Ant-Direct-v0 --num_envs 4096 \ + --max_iterations 500 --headless \ + --schema_v1_output /tmp/training_skrl.json + + +v1.0 schema summary +------------------- + +Each script writes a single self-contained JSON file. The shape is defined by +dataclasses in :mod:`isaaclab.benchmark.schema` — refer to the module for +per-field units and descriptions. + +:class:`~isaaclab.benchmark.schema.TrainingBundle` (training scripts) +top-level keys: + +* ``run`` — run identity (``run_id``, ``framework``, ``backend``, ``task``, + ``seed``, ``num_envs``, ``max_iterations``, timestamps, ``status``). +* ``versions`` — software versions at run time (Isaac Lab, Isaac Sim, Kit, + Newton, Warp, Torch, RSL-RL / SKRL, git metadata). +* ``hardware`` — host snapshot (hostname, GPU devices, CPU, RAM). +* ``runtime`` — aggregated timings (``iterations_completed``, + ``iteration_time_s``, ``env_steps_per_s``, ``iterations_per_s``, + ``startup_phase_times_s``). +* ``resources`` — aggregated GPU/CPU/RAM utilisation (mean/std/peak). +* ``learning`` — final-value and EMA-smoothed reward / episode-length curves, + with full per-iteration series unless ``--no_series`` is passed. + +:class:`~isaaclab.benchmark.schema.StartupBundle` (``benchmark_startup.py``) +replaces ``runtime`` / ``resources`` / ``learning`` with: + +* ``phases`` — mapping from phase name to ``{total_time_s, top_functions}``. +* ``config`` — CLI configuration (``top_n``, ``whitelist``). + + +Common CLI flags +---------------- + +``--schema_v1_output `` + Write the v1.0 JSON bundle to this path. If omitted, the script falls + back to the legacy per-backend output format. + +``--backend {physx, newton}`` + Physics backend tag recorded in the bundle. Defaults to ``physx`` if + omitted. + +``--run_id `` + Explicit run-identity string. If omitted, a synthetic run_id of the + form ``____seed`` is + generated. + +``--ema_alpha `` (training scripts) + EMA smoothing factor for reward / episode-length curves (default + ``0.05``, roughly a 20-sample window). + +``--no_series`` (training scripts) + Omit per-iteration series from the bundle, leaving only the + ``final_raw`` + ``final_ema`` scalars. diff --git a/scripts/benchmarks/_action_sampling.py b/scripts/benchmarks/_action_sampling.py new file mode 100644 index 000000000000..1305cb915c78 --- /dev/null +++ b/scripts/benchmarks/_action_sampling.py @@ -0,0 +1,59 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Random-action sampler shared across the benchmark scripts. + +Single-agent (``DirectRLEnv`` / ``ManagerBasedRLEnv``) envs expose +``single_action_space``; multi-agent (``DirectMARLEnv``) envs expose +``action_spaces`` — a dict keyed by agent id. ``env.step`` accepts the +matching shape: a stacked tensor for single-agent, a dict of stacked +tensors for multi-agent. The benchmark startup phase needs random +actions for the first env step and previously assumed single-agent; +this helper picks the right shape. +""" + +from __future__ import annotations + +from typing import Any + +import numpy as np +import torch + +__all__ = ["sample_random_actions"] + + +def sample_random_actions(env: Any) -> torch.Tensor | dict[str, torch.Tensor]: + """Sample one random action per env from the env's action space(s). + + Discriminates single-agent from multi-agent by duck typing on + ``action_spaces`` (plural, dict-valued). DirectRLEnv and + ManagerBasedRLEnv expose ``single_action_space``; DirectMARLEnv + exposes ``action_spaces``. Both shapes ultimately get fed straight + to ``env.step``. + + Args: + env: The benchmark target — typically a ``gym.Env`` returned by + ``gym.make``. The unwrapped env must expose ``num_envs`` + and ``device`` plus either ``single_action_space`` or + ``action_spaces``. + + Returns: + A ``torch.Tensor`` of shape ``(num_envs, action_dim)`` for + single-agent envs, or a dict ``{agent: tensor}`` for + multi-agent envs. dtype is ``torch.float32`` on the env's + device. + """ + unwrapped = env.unwrapped + if hasattr(unwrapped, "action_spaces"): + return { + agent: torch.as_tensor( + np.stack([space.sample() for _ in range(unwrapped.num_envs)]), + dtype=torch.float32, + device=unwrapped.device, + ) + for agent, space in unwrapped.action_spaces.items() + } + np_actions = np.stack([unwrapped.single_action_space.sample() for _ in range(unwrapped.num_envs)]) + return torch.as_tensor(np_actions, dtype=torch.float32, device=unwrapped.device) diff --git a/scripts/benchmarks/_schema_helpers.py b/scripts/benchmarks/_schema_helpers.py new file mode 100644 index 000000000000..c794c07fb200 --- /dev/null +++ b/scripts/benchmarks/_schema_helpers.py @@ -0,0 +1,83 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Shared helpers for the v1.0 benchmark bundle writers. + +Used by ``benchmark_startup.py``, ``benchmark_rsl_rl.py``, and +``benchmark_skrl.py`` to build schema-v1 ``Versions`` and ``Hardware`` +dataclasses from the benchmark's manual recorders, and to synthesise a +fallback run_id when the caller did not provide one. +""" + +from __future__ import annotations + +import socket +from datetime import datetime, timezone + +from isaaclab.test.benchmark import BaseIsaacLabBenchmark +from isaaclab.benchmark.schema import GpuDeviceInfo, Hardware, Versions + + +def capture_versions(bm: BaseIsaacLabBenchmark) -> Versions: + """Build a :class:`Versions` from the benchmark's ``VersionInfoRecorder``. + + Must be called before :meth:`BaseIsaacLabBenchmark._finalize_impl`, which + clears ``_manual_recorders``. + """ + meta = {m.name: m.data for m in bm._manual_recorders["VersionInfo"].get_data().metadata} + dev = meta.get("dev", {}) or {} + return Versions( + isaaclab=meta.get("isaaclab_version", "unknown"), + isaacsim=meta.get("isaacsim_version"), + kit=meta.get("kit_version"), + newton=meta.get("newton_version"), + warp=meta.get("warp_version"), + mjwarp=meta.get("mujoco_warp_version"), + torch=meta.get("torch_version", "unknown"), + rsl_rl=meta.get("rsl_rl_version"), + skrl=meta.get("skrl_version"), + git_commit=dev.get("commit_hash"), + git_branch=dev.get("branch"), + git_dirty=bool(dev.get("dirty", False)), + ) + + +def capture_hardware(bm: BaseIsaacLabBenchmark) -> Hardware: + """Build a :class:`Hardware` from GPU/CPU/Memory recorders. + + Must be called before :meth:`BaseIsaacLabBenchmark._finalize_impl`, which + clears ``_manual_recorders``. + """ + gpu_meta = {m.name: m.data for m in bm._manual_recorders["GPUInfo"].get_data().metadata} + cpu_meta = {m.name: m.data for m in bm._manual_recorders["CPUInfo"].get_data().metadata} + mem_meta = {m.name: m.data for m in bm._manual_recorders["MemoryInfo"].get_data().metadata} + devices_raw = gpu_meta.get("gpu_devices", {}) or {} + devices = [ + GpuDeviceInfo( + name=str(d.get("name", "unknown")), + mem_gb=float(d.get("total_memory_gb", 0.0) or 0.0), + compute_cap=str(d.get("compute_capability", "unknown")), + ) + for d in devices_raw.values() + ] + return Hardware( + hostname=socket.gethostname(), + gpu_devices=devices, + cpu_name=str(cpu_meta.get("cpu_name", "unknown")), + cpu_count=int(cpu_meta.get("physical_cores", 0) or 0), + ram_gb=float(mem_meta.get("total_ram_gb", 0.0) or 0.0), + ) + + +def synth_run_id(framework: str, backend: str, task: str, seed: int) -> str: + """Fallback run_id when the caller did not supply ``--run_id``. + + Format: ``____seed``, + with underscores in ``framework`` replaced by hyphens (so ``rsl_rl`` + becomes ``rsl-rl``). + """ + stamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S") + fw = framework.replace("_", "-") + return f"{fw}_{backend}_{task}_{stamp}_seed{seed}" diff --git a/scripts/benchmarks/benchmark_rsl_rl.py b/scripts/benchmarks/benchmark_rsl_rl.py index 54aaf8dfcc77..9f278b9ef948 100644 --- a/scripts/benchmarks/benchmark_rsl_rl.py +++ b/scripts/benchmarks/benchmark_rsl_rl.py @@ -8,7 +8,6 @@ """Launch Isaac Sim Simulator first.""" import argparse -import contextlib import os import sys import time @@ -27,6 +26,40 @@ sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../..")) import scripts.reinforcement_learning.rsl_rl.cli_args as cli_args # isort: skip + +def _native_backend_matches(raw_cfg, requested: str) -> bool: + """Return ``True`` iff ``raw_cfg.sim.physics`` matches the requested backend. + + Returns ``False`` for sim-level :class:`PresetCfg` wrappers: presets carry + multiple backends and the preset system handles selection downstream. + """ + sim = getattr(raw_cfg, "sim", None) + if sim is None: + return False + from isaaclab_tasks.utils.hydra import PresetCfg + + if isinstance(sim, PresetCfg): + return False # preset system handles it; presets_available is the source of truth + physics = getattr(sim, "physics", None) + # SimulationCfg.physics defaults to None which means PhysxCfg(). + if physics is None: + return requested == "physx" + from isaaclab_newton.physics import NewtonCfg + from isaaclab_physx.physics import PhysxCfg + + try: + from isaaclab_ovphysx.physics import OvPhysxCfg + except ImportError: + OvPhysxCfg = None + if isinstance(physics, PhysxCfg): + return requested == "physx" + if isinstance(physics, NewtonCfg): + return requested == "newton" + if OvPhysxCfg is not None and isinstance(physics, OvPhysxCfg): + return requested == "ovphysx" + return False + + # add argparse arguments parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.") parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") @@ -65,6 +98,52 @@ parser.add_argument( "--convergence_config", type=str, default="full", help="Config mode for convergence thresholds (default: full)." ) +parser.add_argument( + "--backend", + choices=["physx", "newton"], + default=None, + help=( + "Physics backend to run with. Drives both the bundle tag and " + "hydra `presets=`. Pass an explicit `presets=...` on " + "the CLI to override." + ), +) +parser.add_argument( + "--log_dir", + type=str, + default=None, + help=( + "Absolute path where the training framework writes its outputs " + "(TB events, checkpoints, params). When unset, falls back to " + "the default logs//// path. " + "Useful for downstream tooling that wants to collect outputs " + "into a pre-allocated directory." + ), +) +parser.add_argument( + "--run_id", + type=str, + default=None, + help="Run identity string to embed in the bundle. If omitted, a synthetic run_id is generated.", +) +parser.add_argument( + "--schema_v1_output", + type=str, + default=None, + help="If set, write a schema-v1 training.json to this path.", +) +parser.add_argument( + "--ema_alpha", + type=float, + default=0.05, + help="EMA smoothing factor for reward/ep_length (default 0.05, ~20-sample window).", +) +parser.add_argument( + "--no_series", + action="store_true", + default=False, + help="Omit per-iteration series from training.json (leaves final_raw + final_ema only).", +) add_success_cli_args(parser) # append RSL-RL cli arguments @@ -73,14 +152,61 @@ AppLauncher.add_app_launcher_args(parser) args_cli, hydra_args = setup_preset_cli(parser) hydra_args = fold_preset_tokens(hydra_args) -sys.argv = [sys.argv[0]] + hydra_args if args_cli.video: args_cli.enable_cameras = True +# Map --backend X to hydra presets=X so the physics preset is applied +# at config-resolve time. Validate the request first: if the task does +# not have an X preset, exit fast with a stable stderr prefix the +# Asgard worker classifier matches on. An explicit presets=... on +# the CLI bypasses validation (operator override). +if args_cli.backend is not None: + existing_presets = [a for a in hydra_args if a.startswith("presets=")] + if existing_presets: + print(f"[WARNING] --backend={args_cli.backend} ignored because {existing_presets[0]} was explicitly passed.") + else: + from isaaclab_tasks.utils.parse_cfg import load_cfg_from_registry + from isaaclab_tasks.utils.presets import has_physics_preset + + try: + _raw_cfg = load_cfg_from_registry(args_cli.task, "env_cfg_entry_point") + except Exception as exc: # noqa: BLE001 — fall through to original behaviour + print( + f"[WARNING] could not load raw cfg for {args_cli.task!r} " + f"to validate preset support ({type(exc).__name__}: {exc}); " + f"injecting presets={args_cli.backend} unchecked.", + file=sys.stderr, + ) + hydra_args = [f"presets={args_cli.backend}"] + hydra_args + else: + if has_physics_preset(_raw_cfg, args_cli.backend): + hydra_args = [f"presets={args_cli.backend}"] + hydra_args + elif _native_backend_matches(_raw_cfg, args_cli.backend): + print( + f"[INFO] task {args_cli.task!r} has no '{args_cli.backend}' " + f"preset; running on native {args_cli.backend} backend (no " + f"injection).", + file=sys.stderr, + ) + # No injection — hydra_args unchanged. + else: + sys.stderr.write( + f"[ERROR] preset_unsupported: task {args_cli.task!r} has no " + f"{args_cli.backend!r} preset. Inspect raw_cfg.sim.physics or " + f"re-enumerate {{physx,newton}}_envs.yaml.\n" + ) + sys.exit(2) + +# Re-set sys.argv so the --backend coercion above propagates to Hydra. +sys.argv = [sys.argv[0]] + hydra_args + imports_time_begin = time.perf_counter_ns() +import contextlib import importlib.metadata as metadata -from datetime import datetime +from datetime import datetime, timezone + +_SCRIPT_START_DT = datetime.now(timezone.utc) import gymnasium as gym import numpy as np @@ -94,10 +220,6 @@ from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlVecEnvWrapper, handle_deprecated_rsl_rl_cfg import isaaclab_tasks # noqa: F401 - -# PLACEHOLDER: Extension template (do not remove this comment) -with contextlib.suppress(ImportError): - import isaaclab_tasks_experimental # noqa: F401 from isaaclab_tasks.utils import get_checkpoint_path, launch_simulation, resolve_task_config imports_time_end = time.perf_counter_ns() @@ -105,6 +227,7 @@ from isaaclab.test.benchmark import BaseIsaacLabBenchmark, BenchmarkMonitor from isaaclab.utils.timer import Timer +from scripts.benchmarks._schema_helpers import capture_hardware, capture_versions, synth_run_id from scripts.benchmarks.utils import ( get_backend_type, get_preset_string, @@ -146,6 +269,157 @@ ) +def _compute_ema(series: list[float], alpha: float) -> float: + """Exponentially weighted moving average over a per-iteration series. + + Returns the final EMA value: ``x_0`` initialised to ``series[0]`` and updated + as ``x_t = alpha * y_t + (1 - alpha) * x_{t-1}``. Empty series returns 0.0. + + Args: + series: Per-iteration scalar values (reward or episode length). + alpha: Smoothing factor in [0, 1]. Smaller values give more smoothing. + + Returns: + Final EMA value after walking the full series. + """ + if not series: + return 0.0 + ema = float(series[0]) + for y in series[1:]: + ema = alpha * float(y) + (1.0 - alpha) * ema + return ema + + +def _find_measurement(measurements, name: str) -> float | None: + """Return the value of the first SingleMeasurement with matching ``name``.""" + for meas in measurements: + if meas.name == name: + return float(meas.value) + return None + + +def _capture_resources(bm: BaseIsaacLabBenchmark): + """Build a schema-v1 :class:`Resources` dataclass from GPU/CPU/Memory recorders.""" + from isaaclab.benchmark.schema import MeanStd, MeanStdPeak, Resources + + gpu_m = bm._manual_recorders["GPUInfo"].get_data().measurements + cpu_m = bm._manual_recorders["CPUInfo"].get_data().measurements + mem_m = bm._manual_recorders["MemoryInfo"].get_data().measurements + + gpu_util_mean = _find_measurement(gpu_m, "GPU Utilization") or 0.0 + gpu_util_std = _find_measurement(gpu_m, "GPU Utilization std") or 0.0 + gpu_mem_mean = _find_measurement(gpu_m, "GPU Memory Used") or 0.0 + gpu_mem_std = _find_measurement(gpu_m, "GPU Memory Used std") or 0.0 + gpu_mem_peak = _find_measurement(gpu_m, "GPU Memory Used peak") or 0.0 + cpu_util_mean = _find_measurement(cpu_m, "CPU Utilization") or 0.0 + cpu_util_std = _find_measurement(cpu_m, "CPU Utilization std") or 0.0 + ram_mean = _find_measurement(mem_m, "System Memory RSS") or 0.0 + ram_std = _find_measurement(mem_m, "System Memory RSS std") or 0.0 + ram_peak = _find_measurement(mem_m, "System Memory RSS peak") or 0.0 + + return Resources( + gpu_util_pct=MeanStd(mean=gpu_util_mean, std=gpu_util_std), + gpu_mem_gb=MeanStdPeak(mean=gpu_mem_mean, std=gpu_mem_std, peak=gpu_mem_peak), + cpu_util_pct=MeanStd(mean=cpu_util_mean, std=cpu_util_std), + ram_gb=MeanStdPeak(mean=ram_mean, std=ram_std, peak=ram_peak), + ) + + +def _build_training_bundle( + log_data, + agent_cfg, + env, + args, + framework: str, + versions, + hardware, + resources, + run_start_dt: datetime, + run_end_dt: datetime, + status: str, + app_launch_s: float, + env_creation_s: float, + first_step_s: float, +): + """Build a schema-v1 :class:`TrainingBundle` from tensorboard-parsed training data.""" + import numpy as np + + from isaaclab.benchmark.schema import ( + Learning, + LearningCurve, + MeanStd, + RunIdentity, + Runtime, + StartupPhaseTimes, + TrainingBundle, + ) + + reward_series = [float(x) for x in log_data.get("Train/mean_reward", [])] + ep_len_series = [float(x) for x in log_data.get("Train/mean_episode_length", [])] + + num_envs = env.unwrapped.num_envs + steps_per_iter = agent_cfg.num_steps_per_env + total_fps = list(log_data.get("Perf/total_fps", []) or []) + iter_times = [num_envs * steps_per_iter / fps if fps > 0 else 0.0 for fps in total_fps] + + def _ms(xs): + return MeanStd( + mean=float(np.mean(xs)) if xs else 0.0, + std=float(np.std(xs)) if xs else 0.0, + ) + + env_steps_per_s_series = [num_envs * steps_per_iter / t if t > 0 else 0.0 for t in iter_times] + iters_per_s_series = [1.0 / t if t > 0 else 0.0 for t in iter_times] + + backend = args.backend or "physx" + run_id = args.run_id or synth_run_id(framework, backend, args.task, args.seed) + + return TrainingBundle( + run=RunIdentity( + run_id=run_id, + framework=framework, + backend=backend, + task=args.task, + seed=args.seed, + num_envs=num_envs, + max_iterations=agent_cfg.max_iterations, + start_time_utc=run_start_dt.isoformat().replace("+00:00", "Z"), + end_time_utc=run_end_dt.isoformat().replace("+00:00", "Z"), + duration_s=(run_end_dt - run_start_dt).total_seconds(), + status=status, + ), + versions=versions, + hardware=hardware, + runtime=Runtime( + startup_phase_times_s=StartupPhaseTimes( + app_launch=app_launch_s, + env_creation=env_creation_s, + first_step=first_step_s, + ), + iterations_completed=len(iter_times), + total_wall_time_s=sum(iter_times), + steps_per_iteration=steps_per_iter, + iteration_time_s=_ms(iter_times), + env_steps_per_s=_ms(env_steps_per_s_series), + iterations_per_s=_ms(iters_per_s_series), + ), + resources=resources, + learning=Learning( + ema_alpha=args.ema_alpha, + reward=LearningCurve( + final_raw=reward_series[-1] if reward_series else 0.0, + final_ema=_compute_ema(reward_series, args.ema_alpha), + series_per_iter=None if args.no_series else reward_series, + ), + ep_length=LearningCurve( + final_raw=ep_len_series[-1] if ep_len_series else 0.0, + final_ema=_compute_ema(ep_len_series, args.ema_alpha), + series_per_iter=None if args.no_series else ep_len_series, + ), + ), + ) + + def main( env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlOnPolicyRunnerCfg, @@ -191,15 +465,21 @@ def main( agent_cfg.seed = seed world_size = int(os.getenv("WORLD_SIZE", 1)) - # specify directory for logging experiments - log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name) - log_root_path = os.path.abspath(log_root_path) - print(f"[INFO] Logging experiment in directory: {log_root_path}") - # specify directory for logging runs: {time-stamp}_{run_name} - log_dir = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") - if agent_cfg.run_name: - log_dir += f"_{agent_cfg.run_name}" - log_dir = os.path.join(log_root_path, log_dir) + if args_cli.log_dir is not None: + # Explicit override: write straight into the given dir. + log_dir = os.path.abspath(args_cli.log_dir) + log_root_path = os.path.dirname(log_dir) + os.makedirs(log_dir, exist_ok=True) + print(f"[INFO] Logging experiment in directory: {log_dir}") + else: + # Default: auto-generate logs//// + log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name) + log_root_path = os.path.abspath(log_root_path) + print(f"[INFO] Logging experiment in directory: {log_root_path}") + log_dir = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + if agent_cfg.run_name: + log_dir += f"_{agent_cfg.run_name}" + log_dir = os.path.join(log_root_path, log_dir) # max iterations for training if args_cli.max_iterations: @@ -301,8 +581,46 @@ def main( tracker = get_success_tracker(args_cli, early_stop_ctx.tracker, log_data) log_success(benchmark, tracker, framework_iteration_count=early_stop_ctx.framework_iteration_count) + # Capture v1 state before _finalize_impl nulls out _manual_recorders. + versions_v1 = None + hardware_v1 = None + resources_v1 = None + if args_cli.schema_v1_output is not None: + versions_v1 = capture_versions(benchmark) + hardware_v1 = capture_hardware(benchmark) + resources_v1 = _capture_resources(benchmark) + benchmark._finalize_impl() + if args_cli.schema_v1_output is not None: + from isaaclab.benchmark.schema import write_bundle_file + + # Proxy for first-step time: the first iteration's collection+learning time. + # Pending a dedicated first-step timer in runner.learn(). + first_step_s = 0.0 + with contextlib.suppress(IndexError, KeyError, ValueError): + first_step_s = float(rl_training_times["Collection Time"][0]) + float( + rl_training_times["Learning Time"][0] + ) + + bundle = _build_training_bundle( + log_data=log_data, + agent_cfg=agent_cfg, + env=env, + args=args_cli, + framework="rsl_rl", + versions=versions_v1, + hardware=hardware_v1, + resources=resources_v1, + run_start_dt=_SCRIPT_START_DT, + run_end_dt=datetime.now(timezone.utc), + status="completed", + app_launch_s=(app_start_time_end - app_start_time_begin) / 1e9, + env_creation_s=(task_startup_time_end - task_startup_time_begin) / 1e9, + first_step_s=first_step_s, + ) + write_bundle_file(bundle, args_cli.schema_v1_output) + # close the simulator env.close() diff --git a/scripts/benchmarks/benchmark_skrl.py b/scripts/benchmarks/benchmark_skrl.py new file mode 100644 index 000000000000..0a3ad8fc6ac9 --- /dev/null +++ b/scripts/benchmarks/benchmark_skrl.py @@ -0,0 +1,562 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Script to benchmark RL agent with SKRL. + +Mirrors :mod:`scripts.benchmarks.benchmark_rsl_rl` but uses SKRL's PPO Runner. +The v1.0 ``training.json`` output is identical in shape; only the +``framework`` field switches to ``"skrl"``. +""" + +"""Launch Isaac Sim Simulator first.""" + +import argparse +import os +import sys +import time + +from isaaclab.app import AppLauncher + +sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../..")) + + +def _native_backend_matches(raw_cfg, requested: str) -> bool: + """Return ``True`` iff ``raw_cfg.sim.physics`` matches the requested backend. + + Returns ``False`` for sim-level :class:`PresetCfg` wrappers: presets carry + multiple backends and the preset system handles selection downstream. + """ + sim = getattr(raw_cfg, "sim", None) + if sim is None: + return False + from isaaclab_tasks.utils.hydra import PresetCfg + + if isinstance(sim, PresetCfg): + return False # preset system handles it; presets_available is the source of truth + physics = getattr(sim, "physics", None) + # SimulationCfg.physics defaults to None which means PhysxCfg(). + if physics is None: + return requested == "physx" + from isaaclab_newton.physics import NewtonCfg + from isaaclab_physx.physics import PhysxCfg + + try: + from isaaclab_ovphysx.physics import OvPhysxCfg + except ImportError: + OvPhysxCfg = None + if isinstance(physics, PhysxCfg): + return requested == "physx" + if isinstance(physics, NewtonCfg): + return requested == "newton" + if OvPhysxCfg is not None and isinstance(physics, OvPhysxCfg): + return requested == "ovphysx" + return False + + +# -- CLI arguments ----------------------------------------------------------- + +parser = argparse.ArgumentParser(description="Benchmark an RL agent with SKRL.") +parser.add_argument("--num_envs", type=int, default=4096, help="Number of environments to simulate.") +parser.add_argument("--task", type=str, default=None, help="Name of the task.") +parser.add_argument("--seed", type=int, default=42, help="Seed used for the environment") +parser.add_argument("--max_iterations", type=int, default=10, help="RL policy training iterations.") +parser.add_argument( + "--algorithm", + type=str, + default="PPO", + choices=["AMP", "PPO", "IPPO", "MAPPO"], + help="The RL algorithm used for training the SKRL agent.", +) +parser.add_argument( + "--ml_framework", + type=str, + default="torch", + choices=["torch", "jax", "jax-numpy"], + help="The ML framework used for training the SKRL agent.", +) +parser.add_argument( + "--benchmark_backend", + type=str, + default="omniperf", + choices=[ + "json", + "osmo", + "omniperf", + "summary", + "LocalLogMetrics", + "JSONFileMetrics", + "OsmoKPIFile", + "OmniPerfKPIFile", + ], + help="Benchmarking backend options, defaults omniperf", +) +parser.add_argument("--output_path", type=str, default=".", help="Path to output benchmark results.") +parser.add_argument( + "--backend", + choices=["physx", "newton"], + default=None, + help=( + "Physics backend to run with. Drives both the bundle tag and " + "hydra `presets=`. Pass an explicit `presets=...` on " + "the CLI to override." + ), +) +parser.add_argument( + "--log_dir", + type=str, + default=None, + help=( + "Absolute path where the training framework writes its outputs " + "(TB events, checkpoints, params). When unset, falls back to " + "the default logs//// path. " + "Useful for downstream tooling that wants to collect outputs " + "into a pre-allocated directory." + ), +) +parser.add_argument( + "--run_id", + type=str, + default=None, + help="Run identity string to embed in the bundle. If omitted, a synthetic run_id is generated.", +) +parser.add_argument( + "--schema_v1_output", + type=str, + default=None, + help="If set, write a schema-v1 training.json to this path.", +) +parser.add_argument( + "--ema_alpha", + type=float, + default=0.05, + help="EMA smoothing factor for reward/ep_length (default 0.05, ~20-sample window).", +) +parser.add_argument( + "--no_series", + action="store_true", + default=False, + help="Omit per-iteration series from training.json (leaves final_raw + final_ema only).", +) + +AppLauncher.add_app_launcher_args(parser) +args_cli, hydra_args = parser.parse_known_args() + +# Map --backend X to hydra presets=X so the physics preset is applied +# at config-resolve time. Validate the request first: if the task does +# not have an X preset, exit fast with a stable stderr prefix the +# Asgard worker classifier matches on. An explicit presets=... on +# the CLI bypasses validation (operator override). +if args_cli.backend is not None: + existing_presets = [a for a in hydra_args if a.startswith("presets=")] + if existing_presets: + print(f"[WARNING] --backend={args_cli.backend} ignored because {existing_presets[0]} was explicitly passed.") + else: + from isaaclab_tasks.utils.parse_cfg import load_cfg_from_registry + from isaaclab_tasks.utils.presets import has_physics_preset + + try: + _raw_cfg = load_cfg_from_registry(args_cli.task, "env_cfg_entry_point") + except Exception as exc: # noqa: BLE001 — fall through to original behaviour + print( + f"[WARNING] could not load raw cfg for {args_cli.task!r} " + f"to validate preset support ({type(exc).__name__}: {exc}); " + f"injecting presets={args_cli.backend} unchecked.", + file=sys.stderr, + ) + hydra_args = [f"presets={args_cli.backend}"] + hydra_args + else: + if has_physics_preset(_raw_cfg, args_cli.backend): + hydra_args = [f"presets={args_cli.backend}"] + hydra_args + elif _native_backend_matches(_raw_cfg, args_cli.backend): + print( + f"[INFO] task {args_cli.task!r} has no '{args_cli.backend}' " + f"preset; running on native {args_cli.backend} backend (no " + f"injection).", + file=sys.stderr, + ) + # No injection — hydra_args unchanged. + else: + sys.stderr.write( + f"[ERROR] preset_unsupported: task {args_cli.task!r} has no " + f"{args_cli.backend!r} preset. Inspect raw_cfg.sim.physics or " + f"re-enumerate {{physx,newton}}_envs.yaml.\n" + ) + sys.exit(2) + +# clear out sys.argv for Hydra +sys.argv = [sys.argv[0]] + hydra_args + +imports_time_begin = time.perf_counter_ns() + +from datetime import datetime, timezone + +_SCRIPT_START_DT = datetime.now(timezone.utc) + +import gymnasium as gym +import numpy as np +import torch + +from isaaclab.envs import DirectMARLEnvCfg, DirectRLEnvCfg, ManagerBasedRLEnvCfg +from isaaclab.utils.io import dump_yaml + +from isaaclab_rl.skrl import SkrlVecEnvWrapper + +import isaaclab_tasks # noqa: F401 +from isaaclab_tasks.utils import launch_simulation, resolve_task_config + +imports_time_end = time.perf_counter_ns() + +from isaaclab.test.benchmark import BaseIsaacLabBenchmark, BenchmarkMonitor +from isaaclab.utils.timer import Timer + +from scripts.benchmarks._schema_helpers import capture_hardware, capture_versions, synth_run_id +from scripts.benchmarks.utils import ( + get_backend_type, + get_preset_string, + log_app_start_time, + log_python_imports_time, + log_rl_policy_episode_lengths, + log_rl_policy_rewards, + log_runtime_step_times, + log_scene_creation_time, + log_simulation_start_time, + log_task_start_time, + log_total_start_time, +) + +torch.backends.cuda.matmul.allow_tf32 = True +torch.backends.cudnn.allow_tf32 = True +torch.backends.cudnn.deterministic = False +torch.backends.cudnn.benchmark = False + +# Resolve SKRL agent entry point (matches scripts/reinforcement_learning/skrl/train.py). +# For multi-agent (DirectMARLEnv) tasks, plain PPO can't be used — its observation +# preprocessor and policy assume a single Tensor obs, but DirectMARLEnv emits a +# per-agent dict. Auto-promote the default ``ppo`` algorithm to ``ippo`` so the +# task gets the right multi-agent variant; explicit ``--algorithm`` overrides +# (e.g. user passes ``mappo``) are honoured as-is. +_algorithm = args_cli.algorithm.lower() +if _algorithm == "ppo": + try: + from isaaclab.envs import DirectMARLEnvCfg as _DirectMARLEnvCfg + + from isaaclab_tasks.utils.parse_cfg import load_cfg_from_registry as _peek_cfg + + _peek = _peek_cfg(args_cli.task, "env_cfg_entry_point") + if isinstance(_peek, type) and issubclass(_peek, _DirectMARLEnvCfg): + _is_marl = True + else: + _is_marl = isinstance(_peek, _DirectMARLEnvCfg) + except Exception: # noqa: BLE001 — best-effort detection; fall through to PPO if peek fails + _is_marl = False + if _is_marl: + print(f"[INFO] {args_cli.task!r} is a multi-agent task; promoting --algorithm ppo -> ippo.", file=sys.stderr) + _algorithm = "ippo" +_agent_cfg_entry_point = "skrl_cfg_entry_point" if _algorithm == "ppo" else f"skrl_{_algorithm}_cfg_entry_point" + +backend_type = get_backend_type(args_cli.benchmark_backend) +benchmark = BaseIsaacLabBenchmark( + benchmark_name="benchmark_skrl_train", + backend_type=backend_type, + output_path=args_cli.output_path, + use_recorders=True, + frametime_recorders=backend_type in ("summary", "omniperf"), + output_prefix=f"benchmark_skrl_train_{args_cli.task}", + workflow_metadata={ + "metadata": [ + {"name": "task", "data": args_cli.task}, + {"name": "seed", "data": args_cli.seed}, + {"name": "num_envs", "data": args_cli.num_envs}, + {"name": "max_iterations", "data": args_cli.max_iterations}, + {"name": "algorithm", "data": args_cli.algorithm}, + {"name": "presets", "data": get_preset_string(hydra_args)}, + ] + }, +) + + +def _compute_ema(series: list[float], alpha: float) -> float: + """Exponentially weighted moving average over a per-iteration series. + + Args: + series: Per-iteration scalar values. + alpha: Smoothing factor in [0, 1]; smaller values give more smoothing. + + Returns: + Final EMA value; 0.0 for an empty series. + """ + if not series: + return 0.0 + ema = float(series[0]) + for y in series[1:]: + ema = alpha * float(y) + (1.0 - alpha) * ema + return ema + + +def _find_measurement(measurements, name: str) -> float | None: + """Return the value of the first SingleMeasurement with matching ``name``.""" + for meas in measurements: + if meas.name == name: + return float(meas.value) + return None + + +def _capture_resources(bm: BaseIsaacLabBenchmark): + """Build a schema-v1 :class:`Resources` from GPU/CPU/Memory recorders.""" + from isaaclab.benchmark.schema import MeanStd, MeanStdPeak, Resources + + gpu_m = bm._manual_recorders["GPUInfo"].get_data().measurements + cpu_m = bm._manual_recorders["CPUInfo"].get_data().measurements + mem_m = bm._manual_recorders["MemoryInfo"].get_data().measurements + + gpu_util_mean = _find_measurement(gpu_m, "GPU Utilization") or 0.0 + gpu_util_std = _find_measurement(gpu_m, "GPU Utilization std") or 0.0 + gpu_mem_mean = _find_measurement(gpu_m, "GPU Memory Used") or 0.0 + gpu_mem_std = _find_measurement(gpu_m, "GPU Memory Used std") or 0.0 + gpu_mem_peak = _find_measurement(gpu_m, "GPU Memory Used peak") or 0.0 + cpu_util_mean = _find_measurement(cpu_m, "CPU Utilization") or 0.0 + cpu_util_std = _find_measurement(cpu_m, "CPU Utilization std") or 0.0 + ram_mean = _find_measurement(mem_m, "System Memory RSS") or 0.0 + ram_std = _find_measurement(mem_m, "System Memory RSS std") or 0.0 + ram_peak = _find_measurement(mem_m, "System Memory RSS peak") or 0.0 + + return Resources( + gpu_util_pct=MeanStd(mean=gpu_util_mean, std=gpu_util_std), + gpu_mem_gb=MeanStdPeak(mean=gpu_mem_mean, std=gpu_mem_std, peak=gpu_mem_peak), + cpu_util_pct=MeanStd(mean=cpu_util_mean, std=cpu_util_std), + ram_gb=MeanStdPeak(mean=ram_mean, std=ram_std, peak=ram_peak), + ) + + +def _build_training_bundle( + reward_series: list[float], + ep_len_series: list[float], + iter_times_s: list[float], + num_envs: int, + steps_per_iter: int, + args, + versions, + hardware, + resources, + run_start_dt: datetime, + run_end_dt: datetime, + status: str, + app_launch_s: float, + env_creation_s: float, + first_step_s: float, +): + """Build a schema-v1 :class:`TrainingBundle` for an SKRL run.""" + from isaaclab.benchmark.schema import ( + Learning, + LearningCurve, + MeanStd, + RunIdentity, + Runtime, + StartupPhaseTimes, + TrainingBundle, + ) + + def _ms(xs): + return MeanStd( + mean=float(np.mean(xs)) if xs else 0.0, + std=float(np.std(xs)) if xs else 0.0, + ) + + env_steps_per_s_series = [num_envs * steps_per_iter / t if t > 0 else 0.0 for t in iter_times_s] + iters_per_s_series = [1.0 / t if t > 0 else 0.0 for t in iter_times_s] + + backend = args.backend or "physx" + run_id = args.run_id or synth_run_id("skrl", backend, args.task, args.seed) + + return TrainingBundle( + run=RunIdentity( + run_id=run_id, + framework="skrl", + backend=backend, + task=args.task, + seed=args.seed, + num_envs=num_envs, + max_iterations=args.max_iterations, + start_time_utc=run_start_dt.isoformat().replace("+00:00", "Z"), + end_time_utc=run_end_dt.isoformat().replace("+00:00", "Z"), + duration_s=(run_end_dt - run_start_dt).total_seconds(), + status=status, + ), + versions=versions, + hardware=hardware, + runtime=Runtime( + startup_phase_times_s=StartupPhaseTimes( + app_launch=app_launch_s, + env_creation=env_creation_s, + first_step=first_step_s, + ), + iterations_completed=len(iter_times_s), + total_wall_time_s=sum(iter_times_s), + steps_per_iteration=steps_per_iter, + iteration_time_s=_ms(iter_times_s), + env_steps_per_s=_ms(env_steps_per_s_series), + iterations_per_s=_ms(iters_per_s_series), + ), + resources=resources, + learning=Learning( + ema_alpha=args.ema_alpha, + reward=LearningCurve( + final_raw=reward_series[-1] if reward_series else 0.0, + final_ema=_compute_ema(reward_series, args.ema_alpha), + series_per_iter=None if args.no_series else reward_series, + ), + ep_length=LearningCurve( + final_raw=ep_len_series[-1] if ep_len_series else 0.0, + final_ema=_compute_ema(ep_len_series, args.ema_alpha), + series_per_iter=None if args.no_series else ep_len_series, + ), + ), + ) + + +def main( + env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, + agent_cfg: dict, + app_start_time_begin: int, + app_start_time_end: int, +): + """Train an SKRL agent and emit a v1 schema bundle on success.""" + from skrl.utils.runner.torch import Runner + + # Override configuration with non-hydra CLI arguments. + env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs + env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device + agent_cfg["trainer"]["close_environment_at_exit"] = False + + # Derive total timesteps from max_iterations (same formula as train.py). + rollouts = int(agent_cfg["agent"]["rollouts"]) + agent_cfg["trainer"]["timesteps"] = args_cli.max_iterations * rollouts + + agent_cfg["seed"] = args_cli.seed + env_cfg.seed = args_cli.seed + + if args_cli.log_dir is not None: + # Decompose so both `directory` and `experiment_name` are non-empty — + # SKRL's BaseAgent synthesizes a timestamp+classname subdir when + # `experiment_name` is falsy. Splitting into dirname/basename + # makes ``os.path.join(directory, experiment_name)`` recompose to + # exactly. + log_dir = os.path.abspath(args_cli.log_dir) + agent_cfg["agent"]["experiment"]["directory"] = os.path.dirname(log_dir) or "." + agent_cfg["agent"]["experiment"]["experiment_name"] = os.path.basename(log_dir) + os.makedirs(log_dir, exist_ok=True) + else: + log_root_path = os.path.join("logs", "skrl", agent_cfg["agent"]["experiment"]["directory"]) + log_root_path = os.path.abspath(log_root_path) + log_dir = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + f"_{_algorithm}_{args_cli.ml_framework}" + if agent_cfg["agent"]["experiment"]["experiment_name"]: + log_dir += f"_{agent_cfg['agent']['experiment']['experiment_name']}" + agent_cfg["agent"]["experiment"]["directory"] = log_root_path + agent_cfg["agent"]["experiment"]["experiment_name"] = log_dir + log_dir = os.path.join(log_root_path, log_dir) + if isinstance(env_cfg, ManagerBasedRLEnvCfg): + env_cfg.log_dir = log_dir + + dump_yaml(os.path.join(log_dir, "params", "env.yaml"), env_cfg) + dump_yaml(os.path.join(log_dir, "params", "agent.yaml"), agent_cfg) + + task_startup_time_begin = time.perf_counter_ns() + env = gym.make(args_cli.task, cfg=env_cfg) + env = SkrlVecEnvWrapper(env, ml_framework=args_cli.ml_framework) + task_startup_time_end = time.perf_counter_ns() + + from scripts.benchmarks.skrl_benchmark_trainer import BenchmarkTrainer + + class _BenchmarkRunner(Runner): + """Runner variant that builds a BenchmarkTrainer instead of a stock SequentialTrainer. + + Using a Runner subclass (rather than swapping ``Runner._trainer`` after + construction) ensures SKRL's ``agent.init()`` — which creates a + ``SummaryWriter`` — fires exactly once. Swapping after-the-fact would + call ``agent.init()`` twice and leave an orphaned TB events file in + the log dir. + """ + + def _generate_trainer(self, env, cfg, agent): + # Mirror stock Runner._generate_trainer: pop 'class', pass cfg["trainer"]. + cfg["trainer"].pop("class", None) + return BenchmarkTrainer(env=env, agents=agent, cfg=cfg["trainer"]) + + runner = _BenchmarkRunner(env, agent_cfg) + benchmark_trainer = runner._trainer + + with BenchmarkMonitor(benchmark, interval=1.0): + runner.run() + + # Final recorder update after training completes. + benchmark.update_manual_recorders() + + iter_times_s = benchmark_trainer.iter_times_s + reward_series = benchmark_trainer.iter_rewards + ep_len_series = benchmark_trainer.iter_ep_lengths + per_iter_s = (sum(iter_times_s) / len(iter_times_s)) if iter_times_s else 0.0 + + rl_training_times = { + "Collection Time": iter_times_s, + "Learning Time": [0.0] * len(iter_times_s), + "Total FPS": [(args_cli.num_envs * rollouts / t) if t > 0 else 0.0 for t in iter_times_s], + } + + log_app_start_time(benchmark, (app_start_time_end - app_start_time_begin) / 1e6) + log_python_imports_time(benchmark, (imports_time_end - imports_time_begin) / 1e6) + log_task_start_time(benchmark, (task_startup_time_end - task_startup_time_begin) / 1e6) + log_scene_creation_time(benchmark, Timer.get_timer_info("scene_creation") * 1000) + log_simulation_start_time(benchmark, Timer.get_timer_info("simulation_start") * 1000) + log_total_start_time(benchmark, (task_startup_time_end - app_start_time_begin) / 1e6) + if iter_times_s: + log_runtime_step_times(benchmark, rl_training_times, compute_stats=True) + if reward_series: + log_rl_policy_rewards(benchmark, reward_series) + if ep_len_series: + log_rl_policy_episode_lengths(benchmark, ep_len_series) + + # Capture v1 state before _finalize_impl clears the recorders. + versions_v1 = None + hardware_v1 = None + resources_v1 = None + if args_cli.schema_v1_output is not None: + versions_v1 = capture_versions(benchmark) + hardware_v1 = capture_hardware(benchmark) + resources_v1 = _capture_resources(benchmark) + + benchmark._finalize_impl() + + if args_cli.schema_v1_output is not None: + from isaaclab.benchmark.schema import write_bundle_file + + bundle = _build_training_bundle( + reward_series=reward_series, + ep_len_series=ep_len_series, + iter_times_s=iter_times_s, + num_envs=env.unwrapped.num_envs, + steps_per_iter=rollouts, + args=args_cli, + versions=versions_v1, + hardware=hardware_v1, + resources=resources_v1, + run_start_dt=_SCRIPT_START_DT, + run_end_dt=datetime.now(timezone.utc), + status="completed", + app_launch_s=(app_start_time_end - app_start_time_begin) / 1e9, + env_creation_s=(task_startup_time_end - task_startup_time_begin) / 1e9, + first_step_s=per_iter_s, + ) + write_bundle_file(bundle, args_cli.schema_v1_output) + + env.close() + + +if __name__ == "__main__": + env_cfg, agent_cfg = resolve_task_config(args_cli.task, _agent_cfg_entry_point) + + app_start_time_begin = time.perf_counter_ns() + with launch_simulation(env_cfg, args_cli): + app_start_time_end = time.perf_counter_ns() + main(env_cfg, agent_cfg, app_start_time_begin, app_start_time_end) diff --git a/scripts/benchmarks/benchmark_startup.py b/scripts/benchmarks/benchmark_startup.py index 93d92257ca11..b0742fcff6e5 100644 --- a/scripts/benchmarks/benchmark_startup.py +++ b/scripts/benchmarks/benchmark_startup.py @@ -16,11 +16,16 @@ import os import sys import time +from datetime import datetime, timezone from isaaclab.app import AppLauncher from isaaclab_tasks.utils import fold_preset_tokens, setup_preset_cli +# Wall-clock start of the entire script, captured as early as possible so the +# startup bundle can report a total duration that covers all phases. +_SCRIPT_START_DT = datetime.now(timezone.utc) + # -- CLI arguments ----------------------------------------------------------- parser = argparse.ArgumentParser(description="Profile IsaacLab startup phases.") @@ -56,6 +61,24 @@ default=None, help="Path to YAML file with per-phase function whitelist patterns. Overrides --top_n for listed phases.", ) +parser.add_argument( + "--schema_v1_output", + type=str, + default=None, + help="If set, write a schema-v1 startup.json to this path.", +) +parser.add_argument( + "--backend", + choices=["physx", "newton"], + default=None, + help="Physics backend tag recorded in the bundle. Defaults to 'physx' if omitted.", +) +parser.add_argument( + "--run_id", + type=str, + default=None, + help="Run identity string to embed in the bundle. If omitted, a synthetic run_id is generated.", +) # append AppLauncher cli args (provides --device, --headless, etc.) AppLauncher.add_app_launcher_args(parser) @@ -68,6 +91,7 @@ from isaaclab.test.benchmark import BaseIsaacLabBenchmark, SingleMeasurement from isaaclab.utils.timer import Timer, TimerError +from scripts.benchmarks._schema_helpers import capture_hardware, capture_versions, synth_run_id from scripts.benchmarks.utils import ( get_backend_type, get_preset_string, @@ -81,13 +105,14 @@ imports_profile.enable() import gymnasium as gym # noqa: E402 -import numpy as np # noqa: E402 import torch # noqa: E402 from isaaclab.envs import DirectMARLEnvCfg, DirectRLEnvCfg, ManagerBasedRLEnvCfg # noqa: E402 from isaaclab_tasks.utils import launch_simulation, resolve_task_config # noqa: E402 +from scripts.benchmarks._action_sampling import sample_random_actions # noqa: E402 + imports_profile.disable() if torch.cuda.is_available() and torch.cuda.is_initialized(): @@ -185,6 +210,86 @@ ) +# -- Schema v1 helpers ------------------------------------------------------ + + +def _build_startup_bundle( + phases_data: dict, + run_start_dt: datetime, + run_end_dt: datetime, + status: str, + versions, + hardware, +): + """Build a schema-v1 StartupBundle from the collected phase data. + + Args: + phases_data: The same ``phases`` dict ``main()`` builds for legacy logging. + run_start_dt: UTC timestamp when the whole script started. + run_end_dt: UTC timestamp when the whole script finished. + status: Completion status of the run (``"completed"`` or ``"crashed"``). + versions: Pre-captured :class:`Versions` (must be captured before + ``benchmark._finalize_impl()`` which clears the recorders). + hardware: Pre-captured :class:`Hardware`. + + Returns: + A :class:`StartupBundle` ready to be passed to :func:`write_bundle_file`. + """ + from isaaclab.benchmark.schema import ( + CProfileFunction, + StartupBundle, + StartupConfig, + StartupPhase, + StartupRunIdentity, + ) + + # Startup profiling is framework-agnostic; callers that wrap multiple + # framework runs pass the real framework via --run_id. We record "rsl_rl" + # as a schema placeholder when invoked standalone (the field is required). + framework = "rsl_rl" + backend = args_cli.backend or "physx" + + phases_out: dict[str, StartupPhase] = {} + for name, data in phases_data.items(): + top_funcs: list[CProfileFunction] = [] + for label, tottime_ms, cumtime_ms, ncalls in parse_cprofile_stats( + data["profile"], _ISAACLAB_PREFIXES, top_n=args_cli.top_n, whitelist=_WHITELIST.get(name) + ): + top_funcs.append( + CProfileFunction( + name=label, + own_time_s=tottime_ms / 1000.0, + cum_time_s=cumtime_ms / 1000.0, + calls=ncalls, + ) + ) + phases_out[name] = StartupPhase( + total_time_s=data["wall_clock_ms"] / 1000.0, + top_functions=top_funcs, + ) + + seed = args_cli.seed if args_cli.seed is not None else 0 + run_id = args_cli.run_id or synth_run_id(framework, backend, args_cli.task, seed) + + return StartupBundle( + run=StartupRunIdentity( + run_id=run_id, + framework=framework, + backend=backend, + task=args_cli.task, + seed=seed, + start_time_utc=run_start_dt.isoformat().replace("+00:00", "Z"), + end_time_utc=run_end_dt.isoformat().replace("+00:00", "Z"), + duration_s=(run_end_dt - run_start_dt).total_seconds(), + status=status, + ), + versions=versions, + hardware=hardware, + phases=phases_out, + config=StartupConfig(top_n=args_cli.top_n, whitelist=args_cli.whitelist_config), + ) + + # -- Main profiling logic --------------------------------------------------- @@ -224,10 +329,10 @@ def main( env_creation_time_end = time.perf_counter_ns() # -- First step profiled ------------------------------------------------ - # Sample random actions from the action space directly to support - # Box, Discrete, MultiDiscrete, and Dict spaces. - np_actions = np.stack([env.unwrapped.single_action_space.sample() for _ in range(env.unwrapped.num_envs)]) - actions = torch.as_tensor(np_actions, dtype=torch.float32, device=env.unwrapped.device) + # Sample random actions from the action space(s). Returns a tensor for + # single-agent envs and a per-agent dict for multi-agent (DirectMARLEnv) + # envs — env.step accepts the matching shape. + actions = sample_random_actions(env) first_step_profile = cProfile.Profile() first_step_time_begin = time.perf_counter_ns() @@ -317,7 +422,7 @@ def main( ) # Log per-function measurements (tottime + cumtime) - for label, tottime_ms, cumtime_ms in functions: + for label, tottime_ms, cumtime_ms, _ncalls in functions: benchmark.add_measurement( phase_name, measurement=SingleMeasurement(name=label, value=round(tottime_ms, 2), unit="ms") ) @@ -326,9 +431,30 @@ def main( measurement=SingleMeasurement(name=f"{label} (cumtime)", value=round(cumtime_ms, 2), unit="ms"), ) - # Finalize benchmark output + # Capture versions/hardware BEFORE finalize, which clears the recorders. + versions_v1 = None + hardware_v1 = None + if args_cli.schema_v1_output is not None: + benchmark.update_manual_recorders() + versions_v1 = capture_versions(benchmark) + hardware_v1 = capture_hardware(benchmark) + + # Finalize benchmark output (nulls out _manual_recorders). benchmark.update_manual_recorders() benchmark._finalize_impl() + + if args_cli.schema_v1_output is not None: + from isaaclab.benchmark.schema import write_bundle_file + + bundle = _build_startup_bundle( + phases, + _SCRIPT_START_DT, + datetime.now(timezone.utc), + status="completed", + versions=versions_v1, + hardware=hardware_v1, + ) + write_bundle_file(bundle, args_cli.schema_v1_output) finally: if env is not None: env.close() diff --git a/scripts/benchmarks/skrl_benchmark_trainer.py b/scripts/benchmarks/skrl_benchmark_trainer.py new file mode 100644 index 000000000000..73d58b8ed92b --- /dev/null +++ b/scripts/benchmarks/skrl_benchmark_trainer.py @@ -0,0 +1,144 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""BenchmarkTrainer — SKRL trainer subclass that captures per-iteration metrics. + +Mirrors :class:`skrl.trainers.torch.SequentialTrainer`'s training loop and +records, once per rollout-buffer fill (= one iteration): + +* ``iter_times_s`` - wall-clock seconds from the first env step of the + rollout to just after ``agent.post_interaction`` of the rollout's final + step (i.e. after the PPO update). +* ``iter_rewards`` - mean reward across all env steps and all parallel + envs during the rollout. +* ``iter_ep_lengths`` - last value of + ``agent.tracking_data["Episode / Total timesteps (mean)"]`` observed at + iteration end, or ``0.0`` when no episode terminated yet. + +These attributes are populated after :meth:`train` returns and are read +directly by ``benchmark_skrl.py``'s v1 bundle builder — no TB round trip. +""" + +from __future__ import annotations + +import inspect +import time + +import torch +import tqdm +from skrl.trainers.torch import SequentialTrainer + +# skrl >= ~2.x removed the ``agents_scope`` keyword from +# ``SequentialTrainer.__init__``. Detect once at import time so the wrapper +# stays compatible with both old and new versions without try/except per +# call site. +_SUPER_INIT_PARAMS = inspect.signature(SequentialTrainer.__init__).parameters +_SUPPORTS_AGENTS_SCOPE = "agents_scope" in _SUPER_INIT_PARAMS + + +class BenchmarkTrainer(SequentialTrainer): + """SequentialTrainer that records per-iteration timing + reward + ep length.""" + + def __init__(self, env, agents, agents_scope=None, cfg=None) -> None: + if _SUPPORTS_AGENTS_SCOPE: + super().__init__(env=env, agents=agents, agents_scope=agents_scope, cfg=cfg) + else: + super().__init__(env=env, agents=agents, cfg=cfg) + self.iter_times_s: list[float] = [] + self.iter_rewards: list[float] = [] + self.iter_ep_lengths: list[float] = [] + + def train(self) -> None: + # Exactly one non-simultaneous single-agent training path — mirrors + # the parent SequentialTrainer for that case. If the user is running + # multi-agent or simultaneous agents, defer to the stock loop (those + # paths don't populate the per-iteration benchmark attributes). + if self.num_simultaneous_agents > 1 or self.env.num_agents > 1: + super().train() + return + + rollouts_attr = getattr(self.agents, "_rollouts", None) + if not rollouts_attr: + # Agent has no rollout boundary (e.g. off-policy SAC/DDPG). + # Defer to the stock training loop — the per-iter attributes + # stay empty, and benchmark_skrl.py will treat that as "no + # per-iter data available" rather than wall-time garbage. + super().train() + return + rollouts = int(rollouts_attr) + max_iters = self.timesteps // rollouts + + self.agents.set_running_mode("train") + states, infos = self.env.reset() + + iter_start_ns = time.perf_counter_ns() + rollout_reward_sum = 0.0 + rollout_reward_count = 0 + + for timestep in tqdm.tqdm( + range(self.initial_timestep, self.timesteps), + disable=self.disable_progressbar, + ): + self.agents.pre_interaction(timestep=timestep, timesteps=self.timesteps) + + with torch.no_grad(): + actions = self.agents.act(states, timestep=timestep, timesteps=self.timesteps)[0] + next_states, rewards, terminated, truncated, infos = self.env.step(actions) + + if not self.headless: + self.env.render() + + self.agents.record_transition( + states=states, + actions=actions, + rewards=rewards, + next_states=next_states, + terminated=terminated, + truncated=truncated, + infos=infos, + timestep=timestep, + timesteps=self.timesteps, + ) + + if self.environment_info in infos: + for k, v in infos[self.environment_info].items(): + if isinstance(v, torch.Tensor) and v.numel() == 1: + self.agents.track_data(f"Info / {k}", v.item()) + + rollout_reward_sum += float(rewards.mean().item()) + rollout_reward_count += 1 + + self.agents.post_interaction(timestep=timestep, timesteps=self.timesteps) + + # Reset envs only when running a single env; multi-env VecEnvs + # handle per-env resets themselves. Mirrors + # skrl.trainers.torch.base.Trainer.single_agent_train. + if self.env.num_envs > 1: + states = next_states + else: + if terminated.any() or truncated.any(): + with torch.no_grad(): + states, infos = self.env.reset() + else: + states = next_states + + # One iteration = one rollout-buffer fill. + if (timestep + 1) % rollouts == 0: + iter_end_ns = time.perf_counter_ns() + self.iter_times_s.append((iter_end_ns - iter_start_ns) / 1e9) + mean_reward = rollout_reward_sum / max(rollout_reward_count, 1) + self.iter_rewards.append(mean_reward) + ep_len_samples = self.agents.tracking_data.get("Episode / Total timesteps (mean)", []) + self.iter_ep_lengths.append(float(ep_len_samples[-1]) if ep_len_samples else 0.0) + # Reset per-iter accumulators + timer for the next rollout. + iter_start_ns = time.perf_counter_ns() + rollout_reward_sum = 0.0 + rollout_reward_count = 0 + + # Cap any series to max_iters (guards against off-by-one if timesteps + # isn't a clean multiple of rollouts). + self.iter_times_s = self.iter_times_s[:max_iters] + self.iter_rewards = self.iter_rewards[:max_iters] + self.iter_ep_lengths = self.iter_ep_lengths[:max_iters] diff --git a/scripts/benchmarks/startup_whitelist.yaml b/scripts/benchmarks/startup_whitelist.yaml index 121718d36b40..00163c27d328 100644 --- a/scripts/benchmarks/startup_whitelist.yaml +++ b/scripts/benchmarks/startup_whitelist.yaml @@ -3,23 +3,44 @@ # # SPDX-License-Identifier: BSD-3-Clause +# Per-phase function whitelist for benchmark_startup.py. Patterns are +# fnmatch-style; patterns matching no function emit a placeholder row +# (tottime=0, cumtime=0, ncalls=0) so downstream dashboards always receive +# consistent keys. +# +# A phase MAY be absent from this file — in that case benchmark_startup +# falls back to top_n selection (default: 30). Phases documented below +# that intentionally fall through say so explicitly in a comment rather +# than listing patterns. +# +# python_imports: intentional top_n fallback — all top-30 functions have +# own_time < 7.1 ms; dominant entries are import-machinery internals +# (:_find_and_load) and generic builtins, +# not stable IsaacLab symbols worth tracking on a dashboard. +# +# first_step: intentional top_n fallback — total wall-time is only 0.19 s; +# the single function above 10 ms is task-specific +# (locomotion_env:_get_rewards at 15 ms) and will differ across tasks, +# making a fixed whitelist fragile. + app_launch: - "isaaclab.utils.configclass:_wrap_resolvable_strings" - "isaaclab.utils.configclass:_custom_post_init" - "isaaclab.utils.configclass:_field_module_dir" + - "lib.python3.12.copy:deepcopy" + +task_config: + - "isaaclab.utils.configclass:configclass" + - "isaaclab.utils.configclass:_custom_post_init" + - "isaaclab.utils.configclass:_wrap_resolvable_strings" + - "isaaclab.utils.configclass:_process_mutable_types" + - "lib.python3.12.copy:deepcopy" env_creation: - - "isaaclab.cloner.*:usd_replicate" - - "isaaclab.cloner.*:filter_collisions" - - "isaaclab_physx.cloner.*:attach_end_fn" - - "isaaclab.scene.*:_init_scene" - - "isaaclab.envs.mdp.observations:*" + - "isaaclab.sim.utils.prims:wrapper" + - "isaaclab.cloner.cloner_utils:usd_replicate" + - "isaaclab_physx.cloner.physx_replicate:attach_end_fn" + - "isaaclab_physx.cloner.physx_replicate:physx_replicate" + - "isaaclab.cloner.cloner_utils:grid_transforms" + - "isaaclab.sim.utils.queries:find_matching_prims" - "isaaclab.utils.assets:_find_usd_dependencies" - -first_step: - - "isaaclab.envs.mdp.rewards:*" - - "isaaclab.envs.mdp.terminations:*" - - "isaaclab.envs.mdp.observations:*" - - "isaaclab.actuators.*:compute" - - "warp.*:launch" - - "warp.*:to_torch" diff --git a/scripts/benchmarks/tests/__init__.py b/scripts/benchmarks/tests/__init__.py new file mode 100644 index 000000000000..460a30569089 --- /dev/null +++ b/scripts/benchmarks/tests/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause diff --git a/scripts/benchmarks/tests/test_action_sampling.py b/scripts/benchmarks/tests/test_action_sampling.py new file mode 100644 index 000000000000..3910ae4c2f89 --- /dev/null +++ b/scripts/benchmarks/tests/test_action_sampling.py @@ -0,0 +1,169 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Unit tests for :func:`scripts.benchmarks._action_sampling.sample_random_actions`.""" + +from __future__ import annotations + +from dataclasses import dataclass, field + +import numpy as np +import torch + +from scripts.benchmarks._action_sampling import sample_random_actions + + +@dataclass +class _BoxSpace: + """Minimal stand-in for ``gym.spaces.Box``. Only needs ``.sample()`` for + these tests; we duck-type to avoid pulling gymnasium into the unit-test + path (gymnasium is installed inside Isaac Sim's python, not the system + one used by ``python3 -m pytest``).""" + + low: float + high: float + shape: tuple + + def sample(self) -> np.ndarray: + return np.random.uniform(low=self.low, high=self.high, size=self.shape).astype(np.float32) + + +def _box(low: float = -1.0, high: float = 1.0, shape: tuple = (3,)) -> _BoxSpace: + return _BoxSpace(low=low, high=high, shape=shape) + + +@dataclass +class _FakeSingleAgentEnv: + """Mimic the unwrapped surface of a DirectRLEnv / ManagerBasedRLEnv.""" + + num_envs: int = 4 + device: str = "cpu" + single_action_space: _BoxSpace = field(default_factory=_box) + + @property + def unwrapped(self): + return self + + +@dataclass +class _FakeMARLEnv: + """Mimic the unwrapped surface of a DirectMARLEnv.""" + + num_envs: int = 4 + device: str = "cpu" + action_spaces: dict = field( + default_factory=lambda: { + "cart": _box(shape=(1,)), + "pendulum": _box(shape=(1,)), + } + ) + + @property + def unwrapped(self): + return self + + +def test_sample_random_actions_single_agent_returns_stacked_tensor(): + """Single-agent envs must get one tensor of shape ``(num_envs, action_dim)`` — + ``env.step`` of a DirectRLEnv expects a single tensor, not a dict.""" + env = _FakeSingleAgentEnv(num_envs=8) + actions = sample_random_actions(env) + assert isinstance(actions, torch.Tensor) + assert actions.shape == (8, 3) + assert actions.dtype == torch.float32 + + +def test_sample_random_actions_multi_agent_returns_dict(): + """Multi-agent envs must get a dict ``{agent_id: tensor}`` — that's the + shape ``DirectMARLEnv.step`` accepts. The previous code path called + ``unwrapped.single_action_space.sample()`` and crashed with + ``AttributeError: 'CartDoublePendulumEnv' object has no attribute + 'single_action_space'`` on every multi-agent benchmark run.""" + env = _FakeMARLEnv(num_envs=4) + actions = sample_random_actions(env) + assert isinstance(actions, dict) + assert set(actions) == {"cart", "pendulum"} + for agent, tensor in actions.items(): + assert isinstance(tensor, torch.Tensor) + assert tensor.shape == (4, 1) + assert tensor.dtype == torch.float32 + + +def test_sample_random_actions_multi_agent_handles_heterogeneous_action_dims(): + """Per-agent action spaces can have different shapes — the helper must + sample each space at its own dimensionality, not assume a uniform + shape across agents.""" + env = _FakeMARLEnv( + num_envs=2, + action_spaces={ + "small": _box(shape=(1,)), + "large": _box(shape=(7,)), + }, + ) + actions = sample_random_actions(env) + assert actions["small"].shape == (2, 1) + assert actions["large"].shape == (2, 7) + + +def test_sample_random_actions_multi_agent_samples_within_space_bounds(): + """Sanity-check that the sampled values come from the declared Box — + catches a regression where someone replaces ``space.sample()`` with + e.g. zeros.""" + env = _FakeMARLEnv( + num_envs=16, + action_spaces={ + "agent": _box(low=-2.0, high=2.0, shape=(1,)), + }, + ) + actions = sample_random_actions(env) + a = actions["agent"] + assert (a >= -2.0).all() and (a <= 2.0).all() + + +def test_sample_random_actions_uses_env_device_for_returned_tensors(): + """Per the original code, the returned tensors live on + ``env.device``; otherwise ``env.step(actions)`` will copy from CPU + to GPU on every benchmark run and skew the timing.""" + env = _FakeSingleAgentEnv(device="cpu") # MPS/CUDA not assumed in tests + actions = sample_random_actions(env) + assert str(actions.device) == "cpu" + + +def test_sample_random_actions_passes_through_gym_wrappers(): + """The benchmark runs against a ``gym.make()``-wrapped env; the + action-space discriminator must read off ``env.unwrapped`` rather + than the wrapper, otherwise a single-agent gym.Wrapper exposing a + legacy ``action_spaces`` attribute (Wrapper has none, but + defensive) wouldn't trick us into the MARL branch.""" + + @dataclass + class _Wrapper: + inner: object + + @property + def unwrapped(self): + return self.inner + + env = _Wrapper(inner=_FakeSingleAgentEnv()) + actions = sample_random_actions(env) + assert isinstance(actions, torch.Tensor) + + +def test_sample_random_actions_marl_per_env_independence(): + """Each row in the per-agent action tensor must be an independent + sample — i.e., sampling N times produces N (likely) distinct rows. + A regression where the loop replaced ``range(num_envs)`` with a + single sample broadcasted across rows would slip past the shape + check but produce trivially correlated actions across envs.""" + env = _FakeMARLEnv( + num_envs=64, + action_spaces={"a": _box(shape=(2,))}, + ) + actions = sample_random_actions(env) + a = actions["a"].numpy() + # With 64 i.i.d. samples from a continuous Box, np.unique row count + # is overwhelmingly likely to be 64. Allow some slack just in case + # of pathological RNG state. + assert len({tuple(r) for r in a}) >= 60 diff --git a/scripts/benchmarks/tests/test_benchmark_rsl_rl_cli.py b/scripts/benchmarks/tests/test_benchmark_rsl_rl_cli.py new file mode 100644 index 000000000000..66532631d14a --- /dev/null +++ b/scripts/benchmarks/tests/test_benchmark_rsl_rl_cli.py @@ -0,0 +1,201 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""CLI-level tests for benchmark_rsl_rl.py. + +These tests exercise only the argparse layer — they do not import the +whole script (which launches Isaac Sim at import time). A minimal reimport +of the argparse setup is shared via ``_build_parser``. +""" + +from __future__ import annotations + +import argparse + + +def _build_parser() -> argparse.ArgumentParser: + """Mirror of the parser setup in benchmark_rsl_rl.py. + + Kept in lockstep with the script; when a new flag is added there, + add it here too. + """ + parser = argparse.ArgumentParser() + parser.add_argument("--task", type=str) + parser.add_argument("--num_envs", type=int) + parser.add_argument("--seed", type=int) + parser.add_argument("--max_iterations", type=int) + parser.add_argument("--backend", choices=["physx", "newton"], default=None) + parser.add_argument("--run_id", type=str, default=None) + parser.add_argument("--schema_v1_output", type=str, default=None) + parser.add_argument("--log_dir", type=str, default=None) + return parser + + +def test_log_dir_flag_defaults_none(): + args = _build_parser().parse_args([]) + assert args.log_dir is None + + +def test_log_dir_flag_captured(): + args = _build_parser().parse_args(["--log_dir", "/tmp/bundle/training_data"]) + assert args.log_dir == "/tmp/bundle/training_data" + + +def _inject_preset(args_cli, hydra_args: list[str]) -> list[str]: + """Mirror of the inject_preset logic in benchmark_rsl_rl.py. + + Invariant: when --backend X is set AND hydra_args does NOT already + contain a ``presets=...`` entry, prepend ``presets=X``. + """ + if args_cli.backend is None: + return hydra_args + existing = [a for a in hydra_args if a.startswith("presets=")] + if existing: + print(f"[WARNING] --backend={args_cli.backend} ignored; explicit {existing[0]} wins.") + return hydra_args + return [f"presets={args_cli.backend}"] + hydra_args + + +def test_backend_injects_preset_when_none_given(): + args = _build_parser().parse_args(["--backend", "newton"]) + out = _inject_preset(args, ["env.decimation=4"]) + assert out == ["presets=newton", "env.decimation=4"] + + +def test_backend_does_not_inject_when_preset_already_present(capsys): + args = _build_parser().parse_args(["--backend", "newton"]) + out = _inject_preset(args, ["presets=custom", "env.decimation=4"]) + assert out == ["presets=custom", "env.decimation=4"] + assert "ignored" in capsys.readouterr().out + + +def test_backend_unset_is_noop(): + args = _build_parser().parse_args([]) + out = _inject_preset(args, ["env.decimation=4"]) + assert out == ["env.decimation=4"] + + +def _inject_preset_with_validation(args_cli, hydra_args: list[str], has_physics_preset_fn) -> list[str]: + """Mirror of the new gated injection in benchmark_rsl_rl.py. + + has_physics_preset_fn is the only injection point — the test passes + a stub returning True / False; the production caller passes the real + has_physics_preset(raw_cfg, name) closure. + """ + import sys + + if args_cli.backend is None: + return hydra_args + existing = [a for a in hydra_args if a.startswith("presets=")] + if existing: + print(f"[WARNING] --backend={args_cli.backend} ignored; explicit {existing[0]} wins.") + return hydra_args + if not has_physics_preset_fn(args_cli.backend): + sys.stderr.write( + f"[ERROR] preset_unsupported: task {args_cli.task!r} has no " + f"{args_cli.backend!r} preset. Inspect raw_cfg.sim.physics or " + f"re-enumerate {{physx,newton}}_envs.yaml.\n" + ) + sys.exit(2) + return [f"presets={args_cli.backend}"] + hydra_args + + +def test_validation_blocks_unsupported_preset(capsys): + args = _build_parser().parse_args(["--task", "Isaac-Foo-v0", "--backend", "physx"]) + import pytest + + with pytest.raises(SystemExit) as exc_info: + _inject_preset_with_validation(args, ["env.x=1"], has_physics_preset_fn=lambda name: False) + assert exc_info.value.code == 2 + captured = capsys.readouterr() + assert "preset_unsupported:" in captured.err + assert "Isaac-Foo-v0" in captured.err + + +def test_validation_passes_when_supported(): + args = _build_parser().parse_args(["--task", "Isaac-Bar-v0", "--backend", "newton"]) + out = _inject_preset_with_validation(args, ["env.x=1"], has_physics_preset_fn=lambda name: True) + assert out == ["presets=newton", "env.x=1"] + + +def test_validation_skipped_when_explicit_preset_present(capsys): + """Explicit presets= in hydra_args bypasses validation (operator override).""" + args = _build_parser().parse_args(["--task", "Isaac-Foo-v0", "--backend", "physx"]) + + def _bomb(name: str) -> bool: + raise AssertionError("validator must not run when explicit preset is present") + + out = _inject_preset_with_validation(args, ["presets=custom", "env.x=1"], has_physics_preset_fn=_bomb) + assert out == ["presets=custom", "env.x=1"] + assert "ignored" in capsys.readouterr().out + + +def _inject_preset_with_validation_v2( + args_cli, + hydra_args: list[str], + has_physics_preset_fn, + native_backend_matches_fn, +) -> list[str]: + """Mirror of the new gated injection in benchmark_rsl_rl.py (post native-backend fix). + + Two stub injection points: + - has_physics_preset_fn(name) -> bool (existing) + - native_backend_matches_fn(name) -> bool (new) + """ + import sys + + if args_cli.backend is None: + return hydra_args + existing = [a for a in hydra_args if a.startswith("presets=")] + if existing: + print(f"[WARNING] --backend={args_cli.backend} ignored; explicit {existing[0]} wins.") + return hydra_args + if has_physics_preset_fn(args_cli.backend): + return [f"presets={args_cli.backend}"] + hydra_args + if native_backend_matches_fn(args_cli.backend): + print( + f"[INFO] task {args_cli.task!r} has no '{args_cli.backend}' preset; " + f"running on native {args_cli.backend} backend (no injection).", + file=sys.stderr, + ) + return hydra_args + sys.stderr.write( + f"[ERROR] preset_unsupported: task {args_cli.task!r} has no " + f"{args_cli.backend!r} preset. Inspect raw_cfg.sim.physics or " + f"re-enumerate {{physx,newton}}_envs.yaml.\n" + ) + raise SystemExit(2) + + +def test_validation_skips_injection_when_native_matches(capsys): + """No preset, but cfg type matches request → run with no injection + [INFO] log.""" + args = _build_parser().parse_args(["--task", "Isaac-Quadcopter-Direct-v0", "--backend", "physx"]) + out = _inject_preset_with_validation_v2( + args, + ["env.x=1"], + has_physics_preset_fn=lambda name: False, + native_backend_matches_fn=lambda name: True, + ) + assert out == ["env.x=1"] + captured = capsys.readouterr() + assert "running on native physx" in captured.err + assert "no injection" in captured.err + + +def test_validation_still_blocks_when_native_mismatches(capsys): + """No preset AND cfg type doesn't match → existing exit-2 + preset_unsupported: stderr (regression).""" + args = _build_parser().parse_args(["--task", "Isaac-NewtonOnly-v0", "--backend", "physx"]) + import pytest + + with pytest.raises(SystemExit) as exc_info: + _inject_preset_with_validation_v2( + args, + ["env.x=1"], + has_physics_preset_fn=lambda name: False, + native_backend_matches_fn=lambda name: False, + ) + assert exc_info.value.code == 2 + captured = capsys.readouterr() + assert "preset_unsupported:" in captured.err diff --git a/scripts/benchmarks/tests/test_benchmark_skrl_cli.py b/scripts/benchmarks/tests/test_benchmark_skrl_cli.py new file mode 100644 index 000000000000..7ca5fdb1a2e8 --- /dev/null +++ b/scripts/benchmarks/tests/test_benchmark_skrl_cli.py @@ -0,0 +1,234 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""CLI-level tests for benchmark_skrl.py — argparse-only, no Isaac Sim.""" + +from __future__ import annotations + +import argparse + + +def _build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser() + parser.add_argument("--task", type=str) + parser.add_argument("--num_envs", type=int) + parser.add_argument("--seed", type=int) + parser.add_argument("--max_iterations", type=int) + parser.add_argument("--backend", choices=["physx", "newton"], default=None) + parser.add_argument("--run_id", type=str, default=None) + parser.add_argument("--schema_v1_output", type=str, default=None) + parser.add_argument("--log_dir", type=str, default=None) + parser.add_argument("--ml_framework", type=str, default="torch") + return parser + + +def _inject_preset(args_cli, hydra_args: list[str]) -> list[str]: + if args_cli.backend is None: + return hydra_args + existing = [a for a in hydra_args if a.startswith("presets=")] + if existing: + print(f"[WARNING] --backend={args_cli.backend} ignored; explicit {existing[0]} wins.") + return hydra_args + return [f"presets={args_cli.backend}"] + hydra_args + + +def test_log_dir_flag_defaults_none(): + assert _build_parser().parse_args([]).log_dir is None + + +def test_log_dir_flag_captured(): + args = _build_parser().parse_args(["--log_dir", "/tmp/bundle/training_data"]) + assert args.log_dir == "/tmp/bundle/training_data" + + +def test_backend_injects_preset_when_none_given(): + args = _build_parser().parse_args(["--backend", "newton"]) + assert _inject_preset(args, ["env.decimation=4"]) == ["presets=newton", "env.decimation=4"] + + +def test_backend_does_not_inject_when_preset_already_present(capsys): + args = _build_parser().parse_args(["--backend", "newton"]) + out = _inject_preset(args, ["presets=custom", "env.decimation=4"]) + assert out == ["presets=custom", "env.decimation=4"] + assert "ignored" in capsys.readouterr().out + + +def test_backend_unset_is_noop(): + args = _build_parser().parse_args([]) + assert _inject_preset(args, ["env.decimation=4"]) == ["env.decimation=4"] + + +def _inject_preset_with_validation(args_cli, hydra_args: list[str], has_physics_preset_fn) -> list[str]: + """Mirror of the new gated injection in benchmark_skrl.py. + + has_physics_preset_fn is the only injection point — the test passes + a stub returning True / False; the production caller passes the real + has_physics_preset(raw_cfg, name) closure. + """ + import sys + + if args_cli.backend is None: + return hydra_args + existing = [a for a in hydra_args if a.startswith("presets=")] + if existing: + print(f"[WARNING] --backend={args_cli.backend} ignored; explicit {existing[0]} wins.") + return hydra_args + if not has_physics_preset_fn(args_cli.backend): + sys.stderr.write( + f"[ERROR] preset_unsupported: task {args_cli.task!r} has no " + f"{args_cli.backend!r} preset. Inspect raw_cfg.sim.physics or " + f"re-enumerate {{physx,newton}}_envs.yaml.\n" + ) + sys.exit(2) + return [f"presets={args_cli.backend}"] + hydra_args + + +def test_validation_blocks_unsupported_preset(capsys): + args = _build_parser().parse_args(["--task", "Isaac-Foo-v0", "--backend", "physx"]) + import pytest + + with pytest.raises(SystemExit) as exc_info: + _inject_preset_with_validation(args, ["env.x=1"], has_physics_preset_fn=lambda name: False) + assert exc_info.value.code == 2 + captured = capsys.readouterr() + assert "preset_unsupported:" in captured.err + assert "Isaac-Foo-v0" in captured.err + + +def test_validation_passes_when_supported(): + args = _build_parser().parse_args(["--task", "Isaac-Bar-v0", "--backend", "newton"]) + out = _inject_preset_with_validation(args, ["env.x=1"], has_physics_preset_fn=lambda name: True) + assert out == ["presets=newton", "env.x=1"] + + +def test_validation_skipped_when_explicit_preset_present(capsys): + """Explicit presets= in hydra_args bypasses validation (operator override).""" + args = _build_parser().parse_args(["--task", "Isaac-Foo-v0", "--backend", "physx"]) + + def _bomb(name: str) -> bool: + raise AssertionError("validator must not run when explicit preset is present") + + out = _inject_preset_with_validation(args, ["presets=custom", "env.x=1"], has_physics_preset_fn=_bomb) + assert out == ["presets=custom", "env.x=1"] + assert "ignored" in capsys.readouterr().out + + +def _inject_preset_with_validation_v2( + args_cli, + hydra_args: list[str], + has_physics_preset_fn, + native_backend_matches_fn, +) -> list[str]: + """Mirror of the new gated injection in benchmark_skrl.py (post native-backend fix). + + Two stub injection points: + - has_physics_preset_fn(name) -> bool (existing) + - native_backend_matches_fn(name) -> bool (new) + """ + import sys + + if args_cli.backend is None: + return hydra_args + existing = [a for a in hydra_args if a.startswith("presets=")] + if existing: + print(f"[WARNING] --backend={args_cli.backend} ignored; explicit {existing[0]} wins.") + return hydra_args + if has_physics_preset_fn(args_cli.backend): + return [f"presets={args_cli.backend}"] + hydra_args + if native_backend_matches_fn(args_cli.backend): + print( + f"[INFO] task {args_cli.task!r} has no '{args_cli.backend}' preset; " + f"running on native {args_cli.backend} backend (no injection).", + file=sys.stderr, + ) + return hydra_args + sys.stderr.write( + f"[ERROR] preset_unsupported: task {args_cli.task!r} has no " + f"{args_cli.backend!r} preset. Inspect raw_cfg.sim.physics or " + f"re-enumerate {{physx,newton}}_envs.yaml.\n" + ) + raise SystemExit(2) + + +def test_validation_skips_injection_when_native_matches(capsys): + """No preset, but cfg type matches request → run with no injection + [INFO] log.""" + args = _build_parser().parse_args(["--task", "Isaac-Quadcopter-Direct-v0", "--backend", "physx"]) + out = _inject_preset_with_validation_v2( + args, + ["env.x=1"], + has_physics_preset_fn=lambda name: False, + native_backend_matches_fn=lambda name: True, + ) + assert out == ["env.x=1"] + captured = capsys.readouterr() + assert "running on native physx" in captured.err + assert "no injection" in captured.err + + +def test_validation_still_blocks_when_native_mismatches(capsys): + """No preset AND cfg type doesn't match → existing exit-2 + preset_unsupported: stderr (regression).""" + args = _build_parser().parse_args(["--task", "Isaac-NewtonOnly-v0", "--backend", "physx"]) + import pytest + + with pytest.raises(SystemExit) as exc_info: + _inject_preset_with_validation_v2( + args, + ["env.x=1"], + has_physics_preset_fn=lambda name: False, + native_backend_matches_fn=lambda name: False, + ) + assert exc_info.value.code == 2 + captured = capsys.readouterr() + assert "preset_unsupported:" in captured.err + + +def _compose_experiment_dir(directory: str, experiment_name: str, agent_classname: str = "PPO") -> str: + """Mirror of SKRL BaseAgent.__init__'s experiment-dir composition. + + Replicates the falsy-string fallback so tests can assert the final + ``experiment_dir`` a real SKRL agent would pick. + """ + import datetime + import os + + if not directory: + directory = os.path.join(os.getcwd(), "runs") + if not experiment_name: + experiment_name = "{}_{}".format(datetime.datetime.now().strftime("%y-%m-%d_%H-%M-%S-%f"), agent_classname) + return os.path.join(directory, experiment_name) + + +def _apply_log_dir_override(log_dir_arg: str) -> dict: + """Mirror of the agent_cfg mutation in benchmark_skrl.py's log_dir branch.""" + import os + + log_dir = os.path.abspath(log_dir_arg) + return { + "directory": os.path.dirname(log_dir) or ".", + "experiment_name": os.path.basename(log_dir), + } + + +def test_log_dir_override_recomposes_to_exact_path(): + """The override must make experiment_dir equal the absolute log_dir.""" + import os + + log_dir = "/tmp/bundle_xyz/training_data" + override = _apply_log_dir_override(log_dir) + composed = _compose_experiment_dir(override["directory"], override["experiment_name"]) + assert composed == os.path.abspath(log_dir), ( + f"experiment_dir {composed!r} != {log_dir!r}; SKRL will silently " + f"interpose a timestamp subdir when experiment_name is empty." + ) + + +def test_log_dir_override_handles_trailing_slash(): + """Trailing slash on --log_dir should not corrupt the basename split.""" + import os + + log_dir = "/tmp/bundle_abc/training_data/" + override = _apply_log_dir_override(log_dir) + composed = _compose_experiment_dir(override["directory"], override["experiment_name"]) + assert composed == os.path.abspath(log_dir) diff --git a/scripts/benchmarks/tests/test_skrl_benchmark_trainer.py b/scripts/benchmarks/tests/test_skrl_benchmark_trainer.py new file mode 100644 index 000000000000..7fb6672d6327 --- /dev/null +++ b/scripts/benchmarks/tests/test_skrl_benchmark_trainer.py @@ -0,0 +1,232 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Unit tests for BenchmarkTrainer — run with a fake env and fake agent. + +These tests do NOT spin up Isaac Sim. They verify the trainer's +per-iteration capture logic in isolation. +""" + +from __future__ import annotations + +import time + +import pytest +import torch + +from scripts.benchmarks.skrl_benchmark_trainer import BenchmarkTrainer + + +class _FakeEnv: + """Minimal env compatible with SKRL's SequentialTrainer expectations.""" + + num_agents = 1 + num_envs = 4 + state_space = None + observation_space = type("O", (), {"shape": (2,)})() + action_space = type("A", (), {"shape": (1,)})() + device = torch.device("cpu") + + def __init__(self, reward_schedule): + self._rewards = reward_schedule # list[float] — one per step + self._i = 0 + + def reset(self): + return torch.zeros(self.num_envs, 2), {} + + def step(self, actions): + r = self._rewards[self._i % len(self._rewards)] + self._i += 1 + rewards = torch.full((self.num_envs,), float(r)) + terminated = torch.zeros(self.num_envs, dtype=torch.bool) + truncated = torch.zeros(self.num_envs, dtype=torch.bool) + next_states = torch.zeros(self.num_envs, 2) + return next_states, rewards, terminated, truncated, {} + + def render(self): + pass + + def close(self): + pass + + +class _FakeAgent: + """Minimal agent that exposes `_rollouts`, pre/post_interaction, track_data.""" + + def __init__(self, rollouts: int = 4): + self._rollouts = rollouts + self.tracking_data: dict[str, list[float]] = {} + self._init_called = False + self._running_mode = None + + def init(self, trainer_cfg): + self._init_called = True + + def set_running_mode(self, mode): + self._running_mode = mode + + def pre_interaction(self, timestep, timesteps): + pass + + def act(self, states, timestep, timesteps): + return torch.zeros(states.shape[0], 1), None, None + + def record_transition(self, **kwargs): + pass + + def post_interaction(self, timestep, timesteps): + pass + + def track_data(self, tag, value): + self.tracking_data.setdefault(tag, []).append(value) + + +def test_iter_times_s_length_matches_iterations(): + rollouts = 4 + max_iters = 3 + env = _FakeEnv(reward_schedule=[1.0] * 100) + agent = _FakeAgent(rollouts=rollouts) + trainer_cfg = {"timesteps": rollouts * max_iters, "headless": True} + + trainer = BenchmarkTrainer(env=env, agents=agent, cfg=trainer_cfg) + trainer.train() + + assert len(trainer.iter_times_s) == max_iters + assert all(t > 0.0 for t in trainer.iter_times_s) + + +def test_iter_rewards_reflects_synthetic_schedule(): + rollouts = 4 + max_iters = 3 + # Give each rollout a distinguishable reward value. + schedule = [1.0] * rollouts + [2.0] * rollouts + [3.0] * rollouts + env = _FakeEnv(reward_schedule=schedule) + agent = _FakeAgent(rollouts=rollouts) + trainer_cfg = {"timesteps": rollouts * max_iters, "headless": True} + + trainer = BenchmarkTrainer(env=env, agents=agent, cfg=trainer_cfg) + trainer.train() + + # Each iteration's mean reward = mean over rollouts*num_envs rewards. + # For constant-per-rollout schedules: iter k ≈ schedule[k*rollouts]. + assert trainer.iter_rewards == pytest.approx([1.0, 2.0, 3.0]) + + +def test_iter_ep_lengths_defaults_to_zero_when_no_termination(): + rollouts = 4 + max_iters = 2 + env = _FakeEnv(reward_schedule=[0.0] * 100) + agent = _FakeAgent(rollouts=rollouts) + trainer_cfg = {"timesteps": rollouts * max_iters, "headless": True} + + trainer = BenchmarkTrainer(env=env, agents=agent, cfg=trainer_cfg) + trainer.train() + + # Fake env never terminates → ep_lengths fall back to 0.0 each iter. + assert trainer.iter_ep_lengths == [0.0, 0.0] + + +def test_iter_times_s_shows_variance_with_sleep(): + """Real per-iter timing must vary when iterations take different wall times.""" + rollouts = 2 + max_iters = 2 + + class _SlowEnv(_FakeEnv): + def step(self, actions): + if self._i == 0 or self._i == 1: + time.sleep(0.02) + return super().step(actions) + + env = _SlowEnv(reward_schedule=[0.0] * 100) + agent = _FakeAgent(rollouts=rollouts) + trainer_cfg = {"timesteps": rollouts * max_iters, "headless": True} + + trainer = BenchmarkTrainer(env=env, agents=agent, cfg=trainer_cfg) + trainer.train() + + assert len(trainer.iter_times_s) == max_iters + # First iter had two sleep(0.02) calls (steps 0 and 1); second iter didn't. + # Accept any positive separation; this is about existence of variance, not magnitude. + assert trainer.iter_times_s[0] > trainer.iter_times_s[1] + + +def test_multi_env_does_not_call_env_reset_on_termination(): + """Regression: Task 4's initial fix unconditionally reset on any termination, + which corrupts multi-env VecEnv training (parent's single_agent_train guards + this on num_envs > 1).""" + rollouts = 4 + max_iters = 2 + + class _CountingMultiEnv(_FakeEnv): + num_envs = 8 # multi-env — parent must NOT mid-train reset + + def __init__(self, reward_schedule): + super().__init__(reward_schedule=reward_schedule) + self.reset_calls = 0 + + def reset(self): + self.reset_calls += 1 + return torch.zeros(self.num_envs, 2), {} + + def step(self, actions): + r = self._rewards[self._i % len(self._rewards)] + self._i += 1 + rewards = torch.full((self.num_envs,), float(r)) + # Terminate env 0 on every step — should NOT trigger env.reset() + terminated = torch.zeros(self.num_envs, dtype=torch.bool) + terminated[0] = True + truncated = torch.zeros(self.num_envs, dtype=torch.bool) + next_states = torch.zeros(self.num_envs, 2) + return next_states, rewards, terminated, truncated, {} + + env = _CountingMultiEnv(reward_schedule=[1.0] * 100) + agent = _FakeAgent(rollouts=rollouts) + trainer_cfg = {"timesteps": rollouts * max_iters, "headless": True} + trainer = BenchmarkTrainer(env=env, agents=agent, cfg=trainer_cfg) + trainer.train() + + # Exactly one reset — the initial one at loop start. + assert env.reset_calls == 1, ( + f"BenchmarkTrainer called env.reset() {env.reset_calls} times on a " + f"multi-env VecEnv. Parent single_agent_train only resets at start " + f"when num_envs > 1 — VecEnv handles per-env auto-reset internally." + ) + + +def test_single_env_resets_when_episode_ends(): + """Sanity: the single-env branch still resets on termination.""" + rollouts = 2 + max_iters = 1 + + class _CountingSingleEnv(_FakeEnv): + num_envs = 1 + + def __init__(self, reward_schedule): + super().__init__(reward_schedule=reward_schedule) + self.reset_calls = 0 + + def reset(self): + self.reset_calls += 1 + return torch.zeros(self.num_envs, 2), {} + + def step(self, actions): + r = self._rewards[self._i % len(self._rewards)] + self._i += 1 + rewards = torch.full((self.num_envs,), float(r)) + terminated = torch.zeros(self.num_envs, dtype=torch.bool) + terminated[0] = True # terminate every step on num_envs=1 + truncated = torch.zeros(self.num_envs, dtype=torch.bool) + next_states = torch.zeros(self.num_envs, 2) + return next_states, rewards, terminated, truncated, {} + + env = _CountingSingleEnv(reward_schedule=[0.0] * 100) + agent = _FakeAgent(rollouts=rollouts) + trainer_cfg = {"timesteps": rollouts * max_iters, "headless": True} + trainer = BenchmarkTrainer(env=env, agents=agent, cfg=trainer_cfg) + trainer.train() + + # Initial reset (1) + per-step reset on each termination (rollouts=2) + # = 3 total. + assert env.reset_calls >= 2, f"Expected ≥2 resets on single-env terminations, got {env.reset_calls}" diff --git a/scripts/benchmarks/utils.py b/scripts/benchmarks/utils.py index 05effa524172..fd1b82cf981f 100644 --- a/scripts/benchmarks/utils.py +++ b/scripts/benchmarks/utils.py @@ -346,7 +346,7 @@ def parse_cprofile_stats( isaaclab_prefixes: list[str], top_n: int = 30, whitelist: list[str] | None = None, -) -> list[tuple[str, float, float]]: +) -> list[tuple[str, float, float, int]]: """Parse cProfile stats, filtering to IsaacLab + first-level external calls. Walks the pstats data and keeps functions that are either (a) inside an @@ -368,8 +368,10 @@ def parse_cprofile_stats( functions (e.g. ``["isaaclab.cloner.*:usd_replicate"]``). Returns: - List of (function_label, tottime_ms, cumtime_ms) tuples sorted by - tottime descending. + List of ``(function_label, tottime_ms, cumtime_ms, ncalls)`` tuples + sorted by tottime descending. ``ncalls`` is the primitive (non-recursive) + call count reported by ``pstats.Stats.stats``. Whitelist placeholder + rows carry ``ncalls=0``. """ import fnmatch import io @@ -409,18 +411,18 @@ def _make_label(filename: str, funcname: str) -> str: # stats.stats: dict[(filename, lineno, funcname)] -> (pcalls, ncalls, tottime, cumtime, callers) # callers: dict[(filename, lineno, funcname)] -> (pcalls, ncalls, tottime, cumtime) results = [] - for func_key, (_, _, tottime, cumtime, callers) in stats.stats.items(): + for func_key, (_, ncalls, tottime, cumtime, callers) in stats.stats.items(): filename, _, funcname = func_key if _is_isaaclab(filename): label = _make_label(filename, funcname) - results.append((label, tottime * 1000.0, cumtime * 1000.0)) + results.append((label, tottime * 1000.0, cumtime * 1000.0, ncalls)) else: # Check if any direct caller is an IsaacLab function for caller_key in callers: caller_filename = caller_key[0] if _is_isaaclab(caller_filename): label = _make_label(filename, funcname) - results.append((label, tottime * 1000.0, cumtime * 1000.0)) + results.append((label, tottime * 1000.0, cumtime * 1000.0, ncalls)) break # Sort by tottime (own-time) descending @@ -430,23 +432,25 @@ def _make_label(filename: str, funcname: str) -> str: return results[:top_n] # Whitelist mode: filter by fnmatch patterns, emit placeholders for unmatched patterns - matched: dict[str, tuple[str, float, float]] = {} + matched: dict[str, tuple[str, float, float, int]] = {} matched_patterns: set[str] = set() - for label, tottime, cumtime in results: + for label, tottime, cumtime, ncalls in results: for pattern in whitelist: if fnmatch.fnmatch(label, pattern): if label not in matched: - matched[label] = (label, tottime, cumtime) + matched[label] = (label, tottime, cumtime, ncalls) matched_patterns.add(pattern) - # Add 0.0 placeholders for patterns that matched nothing + # Add 0.0 placeholders for patterns that matched nothing. Placeholder rows + # keep the schema shape (still a 4-tuple) and carry ncalls=0 — semantically + # "this pattern matched nothing, so no call count is meaningful." for pattern in whitelist: if pattern not in matched_patterns: print( f"[WARNING] Whitelist pattern '{pattern}' matched no profiled functions. " "Check for typos or verify the function ran during this phase." ) - matched[pattern] = (pattern, 0.0, 0.0) + matched[pattern] = (pattern, 0.0, 0.0, 0) filtered = list(matched.values()) filtered.sort(key=lambda x: x[1], reverse=True) diff --git a/source/isaaclab/changelog.d/antoiner-feat-benchmark-scripts-v1.rst b/source/isaaclab/changelog.d/antoiner-feat-benchmark-scripts-v1.rst new file mode 100644 index 000000000000..c4d851ea95a6 --- /dev/null +++ b/source/isaaclab/changelog.d/antoiner-feat-benchmark-scripts-v1.rst @@ -0,0 +1,26 @@ +Added +^^^^^ + +* Added an opt-in ``--schema_v1_output `` flag to ``benchmark_startup.py``, + ``benchmark_rsl_rl.py``, and ``benchmark_skrl.py``. When set, each script + emits a self-contained ``training.json`` / ``startup.json`` JSON file + conforming to :mod:`isaaclab.benchmark.schema` (v1.0) — run identity, + software versions, host hardware, aggregated runtime + resource metrics, + and EMA-smoothed reward / episode-length curves. The legacy per-backend + output format remains the default when the flag is omitted. +* Added ``benchmark_skrl.py``: the SKRL-framework counterpart to + ``benchmark_rsl_rl.py``. Emits an identical v1.0 ``TrainingBundle`` with + ``framework: "skrl"``. +* Added :doc:`/source/features/benchmarking` documenting the three scripts + and the v1.0 bundle schema. + +Changed +^^^^^^^ + +* Extended :func:`scripts.benchmarks.utils.parse_cprofile_stats` to return a + 4-tuple ``(function_label, tottime_ms, cumtime_ms, ncalls)`` instead of a + 3-tuple, exposing the primitive call count from ``pstats`` for downstream + consumers. Existing tuple-unpacking call sites updated. +* Reworked ``scripts/benchmarks/startup_whitelist.yaml`` to track the + IsaacLab v3 configclass / cloner / scene-init call paths and added an + explicit ``task_config`` phase entry. diff --git a/source/isaaclab/test/benchmark/test_parse_cprofile_stats.py b/source/isaaclab/test/benchmark/test_parse_cprofile_stats.py new file mode 100644 index 000000000000..48e7c4f956f7 --- /dev/null +++ b/source/isaaclab/test/benchmark/test_parse_cprofile_stats.py @@ -0,0 +1,95 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Unit tests for :func:`scripts.benchmarks.utils.parse_cprofile_stats`. + +The function is expected to return 4-tuples +``(label, tottime_ms, cumtime_ms, ncalls)`` after the T2.2 reliability fix. +Before the fix, the function returned 3-tuples and CProfileFunction.calls was +always 0 in the downstream startup bundle. +""" + +from __future__ import annotations + +import cProfile +import os +import sys + +# scripts/benchmarks/utils.py is not an installable package; add the repo +# root to sys.path so the import works. +_REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../..")) +if _REPO_ROOT not in sys.path: + sys.path.insert(0, _REPO_ROOT) + +from scripts.benchmarks.utils import parse_cprofile_stats # noqa: E402 + + +def _profiled_call(n_outer: int, n_inner: int) -> cProfile.Profile: + """Run a couple of nested helpers a known number of times under cProfile.""" + + def inner(): + return sum(range(10)) + + def outer(): + for _ in range(n_inner): + inner() + + prof = cProfile.Profile() + prof.enable() + for _ in range(n_outer): + outer() + prof.disable() + return prof + + +def test_top_n_returns_ncalls(): + # The synthetic functions live in THIS test file, so _is_isaaclab will + # not match them — they come through the "first-level external call from + # an IsaacLab caller" path only if we pass this file's directory as an + # isaaclab prefix. Do so to include them. + test_dir = os.path.abspath(os.path.dirname(__file__)) + prof = _profiled_call(n_outer=3, n_inner=5) + + results = parse_cprofile_stats(prof, isaaclab_prefixes=[test_dir], top_n=30) + + # Each row must be a 4-tuple now. + assert results, "parse_cprofile_stats should return at least one row" + for row in results: + assert len(row) == 4, f"expected (label, tot, cum, ncalls) 4-tuple, got {row!r}" + label, tot, cum, ncalls = row + assert isinstance(label, str) + assert isinstance(tot, float) + assert isinstance(cum, float) + assert isinstance(ncalls, int) + assert ncalls >= 0 + + # Locate our two functions by suffix and check their call counts. + outer_rows = [r for r in results if r[0].endswith(":outer")] + inner_rows = [r for r in results if r[0].endswith(":inner")] + assert outer_rows, f"outer() should be in results, got labels: {[r[0] for r in results]}" + assert inner_rows, f"inner() should be in results, got labels: {[r[0] for r in results]}" + assert outer_rows[0][3] == 3, f"outer ncalls should be 3, got {outer_rows[0][3]}" + assert inner_rows[0][3] == 15, f"inner ncalls should be 3*5=15, got {inner_rows[0][3]}" + + +def test_whitelist_path_returns_ncalls(): + test_dir = os.path.abspath(os.path.dirname(__file__)) + prof = _profiled_call(n_outer=2, n_inner=4) + + results = parse_cprofile_stats( + prof, + isaaclab_prefixes=[test_dir], + whitelist=["*:inner", "*:definitely_not_a_real_function"], + ) + + # Matched row carries the real ncalls; placeholder row carries 0. + labels = {r[0]: r for r in results} + inner_label = next((lbl for lbl in labels if lbl.endswith(":inner")), None) + assert inner_label is not None, f"inner() should match wildcard whitelist, labels: {list(labels)}" + assert labels[inner_label][3] == 8, f"inner ncalls should be 2*4=8, got {labels[inner_label][3]}" + + placeholder = labels.get("*:definitely_not_a_real_function") + assert placeholder is not None, "placeholder row should be emitted for unmatched pattern" + assert placeholder == ("*:definitely_not_a_real_function", 0.0, 0.0, 0)