From 0b7f643d857c6ceca47ee5da9e5d72f5798b3ef7 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 14 Jan 2026 03:51:17 -0800 Subject: [PATCH 1/7] Use -fno-sycl-id-queries-fit-in-int for indexing extension --- dpnp/backend/extensions/indexing/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/dpnp/backend/extensions/indexing/CMakeLists.txt b/dpnp/backend/extensions/indexing/CMakeLists.txt index 78f6713bdf2..a6691f31f55 100644 --- a/dpnp/backend/extensions/indexing/CMakeLists.txt +++ b/dpnp/backend/extensions/indexing/CMakeLists.txt @@ -84,6 +84,7 @@ else() ) endif() +target_compile_options(${python_module_name} PUBLIC -fno-sycl-id-queries-fit-in-int) target_link_options(${python_module_name} PUBLIC -fsycl-device-code-split=per_kernel) if(DPNP_GENERATE_COVERAGE) From 335c43418bc4ba73db3279632f8b161b4340faa3 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 14 Jan 2026 03:52:35 -0800 Subject: [PATCH 2/7] Update third party tests --- .../core_tests/test_ndarray_adv_indexing.py | 67 ++++++++++++++++++- 1 file changed, 66 insertions(+), 1 deletion(-) diff --git a/dpnp/tests/third_party/cupy/core_tests/test_ndarray_adv_indexing.py b/dpnp/tests/third_party/cupy/core_tests/test_ndarray_adv_indexing.py index 6a635ee53b2..f4917393cd2 100644 --- a/dpnp/tests/third_party/cupy/core_tests/test_ndarray_adv_indexing.py +++ b/dpnp/tests/third_party/cupy/core_tests/test_ndarray_adv_indexing.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import itertools import numpy @@ -435,6 +437,15 @@ def test_invalid_adv_getitem(self): a[self.indexes] +class TestArrayBadDTypeIndexAdvGetitem: + @pytest.mark.parametrize("dtype", [object, "i,i", "float32", "str"]) + def test_bad_dtype_adv_getitem(self, dtype): + # Test various bad dtypes, supported by CuPy or not. + a = cupy.arange(10) + with pytest.raises(IndexError, match="arrays used as indices"): + a[numpy.array([1, 2], dtype=dtype)] + + @testing.parameterize( {"shape": (0,), "indexes": ([False],)}, { @@ -950,6 +961,60 @@ class TestArrayAdvancedIndexingSetitemTranspose: def test_adv_setitem_transp(self, xp): shape = (2, 3, 4) a = xp.zeros(shape).transpose(0, 2, 1) - slices = (xp.array([1, 0]), slice(None), xp.array([2, 1])) + slices = (numpy.array([1, 0]), slice(None), numpy.array([2, 1])) a[slices] = 1 return a + + +class TestHugeArrays: + # These tests require a lot of memory + @testing.slow + def test_advanced(self): + try: + arr = cupy.ones((1, 2**30), dtype=cupy.int8) + idx = cupy.zeros(3, dtype=cupy.int32) + res = arr[idx, :] + # sanity check, we mostly care about it not crashing. + assert res.sum() == 3 * 2**30 + del res + + arr[idx, :] = cupy.array([[3], [3], [3]], dtype=cupy.int8) + # Check 3 got written (order may not be strictly guaranteed) + assert arr.sum() == 2**30 * 3 + except MemoryError: + pytest.skip("out of memory in test.") + + @testing.slow + def test_take_array(self): + try: + arr = cupy.ones((1, 2**32), dtype=cupy.int8) + arr[0, 2**30] = 0 # We should see each of these once + arr[0, -1] = 0 + res = arr.take(cupy.array([0, 0]), axis=0) + # sanity check, we mostly care about it not crashing. + assert res.sum() == 2 * (2**32 - 2) + except MemoryError: + pytest.skip("out of memory in test.") + + @testing.slow + def test_take_scalar(self): + try: + arr = cupy.ones((1, 2**32), dtype=cupy.int8) + arr[0, 2**30] = 0 # We should see each of these once + arr[0, -1] = 0 + res = arr.take(0, axis=0) + # sanity check, we mostly care about it not crashing. + assert res.sum() == 2**32 - 2 + except MemoryError: + pytest.skip("out of memory in test.") + + @testing.slow + def test_choose(self): + try: + choices = cupy.zeros((2, 2**31), dtype=cupy.int8) + choices[1, :] = 1 + res = choices[1, :].choose(choices) + # sanity check, we mostly care about it not crashing. + assert res.sum() == 2**31 + except MemoryError: + pytest.skip("out of memory in test.") From ec774a945fe3a88404c039605c8cd8edb45cf556 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 14 Jan 2026 04:07:58 -0800 Subject: [PATCH 3/7] Update third_party/cupy/core_tests to keep in sync with the remote repo. The tests were updated there to support run with pytest-run-parallel --- .../cupy/core_tests/test_carray.py | 41 +++++++------ .../third_party/cupy/core_tests/test_core.py | 42 +++++++++---- .../cupy/core_tests/test_cub_reduction.py | 7 ++- .../cupy/core_tests/test_dlpack.py | 23 ++++--- .../cupy/core_tests/test_ndarray.py | 20 ++----- .../cupy/core_tests/test_ndarray_reduction.py | 53 ++++++++-------- .../third_party/cupy/core_tests/test_raw.py | 60 ++++++++++++------- .../cupy/core_tests/test_userkernel.py | 16 ++--- 8 files changed, 148 insertions(+), 114 deletions(-) diff --git a/dpnp/tests/third_party/cupy/core_tests/test_carray.py b/dpnp/tests/third_party/cupy/core_tests/test_carray.py index b161ef49e6b..7146460fc40 100644 --- a/dpnp/tests/third_party/cupy/core_tests/test_carray.py +++ b/dpnp/tests/third_party/cupy/core_tests/test_carray.py @@ -1,4 +1,4 @@ -import unittest +from __future__ import annotations import pytest @@ -8,7 +8,7 @@ pytest.skip("CArray is not supported", allow_module_level=True) -class TestCArray(unittest.TestCase): +class TestCArray: def test_size(self): x = cupy.arange(3).astype("i") @@ -63,39 +63,38 @@ def test_getitem_idx(self): testing.assert_array_equal(y, x) -@testing.parameterize( - {"size": 2**31 - 1024}, - {"size": 2**31}, - {"size": 2**31 + 1024}, - {"size": 2**32 - 1024}, - {"size": 2**32}, - {"size": 2**32 + 1024}, +@pytest.mark.parametrize( + "size", + [2**31 - 1024, 2**31, 2**31 + 1024, 2**32 - 1024, 2**32, 2**32 + 1024], ) -@testing.slow -class TestCArray32BitBoundary(unittest.TestCase): +@pytest.mark.slow +@pytest.mark.thread_unsafe(reason="too large allocations") +class TestCArray32BitBoundary: # This test case is intended to confirm CArray indexing work correctly # with input/output arrays whose size is so large that it crosses the # 32-bit boundary (in terms of both number of elements and size in bytes). # This test requires approx. 8 GiB GPU memory to run. # See https://github.com/cupy/cupy/pull/882 for detailed discussions. - - def tearDown(self): - # Free huge memory for slow test + def teardown_method(self): cupy.get_default_memory_pool().free_all_blocks() # HIP is known to fail with sizes > 2**32-1024 - @unittest.skipIf(cupy.cuda.runtime.is_hip, "HIP does not support this") - def test(self): + @pytest.mark.skipif( + cupy.cuda.runtime.is_hip, reason="HIP does not support this" + ) + def test(self, size): # Elementwise - a = cupy.full((1, self.size), 7, dtype=cupy.int8) + a = cupy.full((1, size), 7, dtype=cupy.int8) # Reduction result = a.sum(axis=0, dtype=cupy.int8) # Explicitly specify the dtype to absorb Linux/Windows difference. - assert result.sum(dtype=cupy.int64) == self.size * 7 + assert result.sum(dtype=cupy.int64) == size * 7 # HIP is known to fail with sizes > 2**32-1024 - @unittest.skipIf(cupy.cuda.runtime.is_hip, "HIP does not support this") - def test_assign(self): - a = cupy.zeros(self.size, dtype=cupy.int8) + @pytest.mark.skipif( + cupy.cuda.runtime.is_hip, reason="HIP does not support this" + ) + def test_assign(self, size): + a = cupy.zeros(size, dtype=cupy.int8) a[-1] = 1.0 assert a.sum() == 1 diff --git a/dpnp/tests/third_party/cupy/core_tests/test_core.py b/dpnp/tests/third_party/cupy/core_tests/test_core.py index 9bcf5ae721c..c959b4f2495 100644 --- a/dpnp/tests/third_party/cupy/core_tests/test_core.py +++ b/dpnp/tests/third_party/cupy/core_tests/test_core.py @@ -8,6 +8,10 @@ import dpnp as cupy from dpnp.tests.third_party.cupy import testing +from dpnp.tests.third_party.cupy.testing._protocol_helpers import ( + DummyObjectWithCudaArrayInterface, + DummyObjectWithCuPyGetNDArray, +) class TestSize(unittest.TestCase): @@ -37,6 +41,7 @@ def test_size_axis_error(self, dtype): @testing.numpy_cupy_equal() @testing.slow + # @pytest.mark.thread_unsafe(reason="Allocation too large.") def test_size_huge(self, xp): a = xp.ndarray(2**32, "b") # 4 GiB return xp.size(a) @@ -95,33 +100,44 @@ def test_cupy_ndarray(self, dtype): for v in (arr, (arr, arr)): assert cupy.min_scalar_type(v) is arr.dtype - -@testing.parameterize( - *testing.product( - { - "cxx": (None, "--std=c++14"), - } + @pytest.mark.parametrize( + "cupy_like", + [ + DummyObjectWithCuPyGetNDArray, + DummyObjectWithCudaArrayInterface, + ], ) -) -@pytest.mark.skip("compiling cupy headers are not supported") -class TestCuPyHeaders(unittest.TestCase): + def test_cupy_likes_and_nested(self, cupy_like): + arr = cupy.array([[-1, 1]], dtype="int8") - def setUp(self): + obj = cupy_like(arr) + assert cupy.min_scalar_type(obj) is arr.dtype + if cupy_like is DummyObjectWithCuPyGetNDArray: + # __cupy_get_ndarray__ path currently assumes .shape and .dtype + obj.shape = arr.shape + obj.dtype = arr.dtype + assert cupy.min_scalar_type([obj, obj]) is arr.dtype + + +@pytest.mark.skip("compiling cupy headers are not supported") +class TestCuPyHeaders: + def setup_method(self): self.temporary_cache_dir_context = test_raw.use_temporary_cache_dir() self.cache_dir = self.temporary_cache_dir_context.__enter__() self.header = "\n".join( ["#include <" + h + ">" for h in core._cupy_header_list] ) - def tearDown(self): + def teardown_method(self): self.temporary_cache_dir_context.__exit__(*sys.exc_info()) - def test_compiling_core_header(self): + @pytest.mark.parametrize("cxx", (None, "--std=c++17")) + def test_compiling_core_header(self, cxx): code = r""" extern "C" __global__ void _test_ker_() { } """ code = self.header + code - options = () if self.cxx is None else (self.cxx,) + options = () if cxx is None else (cxx,) ker = cupy.RawKernel( code, "_test_ker_", options=options, backend="nvrtc" ) diff --git a/dpnp/tests/third_party/cupy/core_tests/test_cub_reduction.py b/dpnp/tests/third_party/cupy/core_tests/test_cub_reduction.py index 2983c9a152f..2adcbfe6ed3 100644 --- a/dpnp/tests/third_party/cupy/core_tests/test_cub_reduction.py +++ b/dpnp/tests/third_party/cupy/core_tests/test_cub_reduction.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import sys import unittest from itertools import combinations @@ -19,7 +21,6 @@ # This test class and its children below only test if CUB backend can be used # or not; they don't verify its correctness as it's already extensively covered # by existing tests -@unittest.skipIf(_environment.get_cub_path() is None, "CUB not found") class CubReductionTestBase(unittest.TestCase): """ Note: call self.can_use() when arrays are already allocated, otherwise @@ -27,9 +28,11 @@ class CubReductionTestBase(unittest.TestCase): """ def setUp(self): + if _environment.get_cub_path() is None: + pytest.skip("CUB not found") if cupy.cuda.runtime.is_hip: if _environment.get_hipcc_path() is None: - self.skipTest("hipcc is not found") + pytest.skip("hipcc is not found") self.can_use = cupy._core._cub_reduction._can_use_cub_block_reduction diff --git a/dpnp/tests/third_party/cupy/core_tests/test_dlpack.py b/dpnp/tests/third_party/cupy/core_tests/test_dlpack.py index 80715257f9b..2df8cbfc5f7 100644 --- a/dpnp/tests/third_party/cupy/core_tests/test_dlpack.py +++ b/dpnp/tests/third_party/cupy/core_tests/test_dlpack.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import dpctl import dpctl.tensor._dlpack as dlp import numpy @@ -60,6 +62,8 @@ class TestNewDLPackConversion: def pool(self, request): self.memory = request.param if self.memory == "managed": + # if cuda.runtime.is_hip: + # pytest.skip("HIP does not support managed memory") old_pool = cupy.get_default_memory_pool() new_pool = cuda.MemoryPool(cuda.malloc_managed) cuda.set_allocator(new_pool.malloc) @@ -201,6 +205,8 @@ def test_conversion_device_to_cpu(self): @pytest.mark.skip("due to dpctl-2213") def test_stream(self): allowed_streams = ["null", True] + # if not cuda.runtime.is_hip: + # allowed_streams.append("ptds") # stream order is automatically established via DLPack protocol for src_s in [self._get_stream(s) for s in allowed_streams]: @@ -226,18 +232,18 @@ class TestDLTensorMemory: @pytest.fixture def pool(self): - pass + # old_pool = cupy.get_default_memory_pool() + # pool = cupy.cuda.MemoryPool() + # cupy.cuda.set_allocator(pool.malloc) - # old_pool = cupy.get_default_memory_pool() - # pool = cupy.cuda.MemoryPool() - # cupy.cuda.set_allocator(pool.malloc) + # yield pool - # yield pool - - # pool.free_all_blocks() - # cupy.cuda.set_allocator(old_pool.malloc) + # pool.free_all_blocks() + # cupy.cuda.set_allocator(old_pool.malloc) + pass @pytest.mark.parametrize("max_version", [None, (1, 0)]) + # @pytest.mark.thread_unsafe(reason="modifies pool and tracks allocations") def test_deleter(self, pool, max_version): # memory is freed when tensor is deleted, as it's not consumed array = cupy.empty(10) @@ -252,6 +258,7 @@ def test_deleter(self, pool, max_version): # assert pool.n_free_blocks() == 1 @pytest.mark.parametrize("max_version", [None, (1, 0)]) + # @pytest.mark.thread_unsafe(reason="modifies pool and tracks allocations") def test_deleter2(self, pool, max_version): # memory is freed when array2 is deleted, as tensor is consumed array = cupy.empty(10) diff --git a/dpnp/tests/third_party/cupy/core_tests/test_ndarray.py b/dpnp/tests/third_party/cupy/core_tests/test_ndarray.py index 200a29d2926..d782eb9f41e 100644 --- a/dpnp/tests/third_party/cupy/core_tests/test_ndarray.py +++ b/dpnp/tests/third_party/cupy/core_tests/test_ndarray.py @@ -246,6 +246,11 @@ def test_copy_multi_device_non_contiguous_K(self): # See cupy/cupy#5004 @pytest.mark.skip("RawKernel() is not supported") @testing.multi_gpu(2) + # @pytest.mark.xfail( + # runtime.is_hip, + # reason='ROCm may work differently in async D2D copy with streams') + # @pytest.mark.thread_unsafe( + # reason="order is unclear multithread. Also, hard crash in threaded!") def test_copy_multi_device_with_stream(self): # Kernel that takes long enough then finally writes values. src = _test_copy_multi_device_with_stream_src @@ -430,21 +435,6 @@ def test_cuda_array_interface_stream(self): assert iface["stream"] == stream.ptr -@pytest.mark.skip("CUDA interface is not supported") -class TestNdarrayCudaInterfaceNoneCUDA(unittest.TestCase): - - def setUp(self): - self.arr = cupy.zeros(shape=(2, 3), dtype=cupy.float64) - - def test_cuda_array_interface_hasattr(self): - assert not hasattr(self.arr, "__cuda_array_interface__") - - def test_cuda_array_interface_getattr(self): - with pytest.raises(AttributeError) as e: - getattr(self.arr, "__cuda_array_interface__") - assert "HIP" in str(e.value) - - @testing.parameterize( *testing.product( { diff --git a/dpnp/tests/third_party/cupy/core_tests/test_ndarray_reduction.py b/dpnp/tests/third_party/cupy/core_tests/test_ndarray_reduction.py index 80b3f92fefa..0c652759f2a 100644 --- a/dpnp/tests/third_party/cupy/core_tests/test_ndarray_reduction.py +++ b/dpnp/tests/third_party/cupy/core_tests/test_ndarray_reduction.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import numpy import pytest @@ -16,32 +18,31 @@ class TestArrayReduction: @pytest.fixture(scope="class") def exclude_cutensor(self): + # cuTENSOR seems to have issues in handling inf/nan in reduction-based + # routines, so we use this fixture to skip testing it + # self.old_routine_accelerators = _acc.get_routine_accelerators() + # self.old_reduction_accelerators = _acc.get_reduction_accelerators() + + # rot_acc = self.old_routine_accelerators.copy() + # try: + # rot_acc.remove(_acc.ACCELERATOR_CUTENSOR) + # except ValueError: + # pass + # _acc.set_routine_accelerators(rot_acc) + + # red_acc = self.old_reduction_accelerators.copy() + # try: + # red_acc.remove(_acc.ACCELERATOR_CUTENSOR) + # except ValueError: + # pass + # _acc.set_reduction_accelerators(red_acc) + + # yield + + # _acc.set_routine_accelerators(self.old_routine_accelerators) + # _acc.set_reduction_accelerators(self.old_reduction_accelerators) pass - # # cuTENSOR seems to have issues in handling inf/nan in reduction-based - # # routines, so we use this fixture to skip testing it - # self.old_routine_accelerators = _acc.get_routine_accelerators() - # self.old_reduction_accelerators = _acc.get_reduction_accelerators() - - # rot_acc = self.old_routine_accelerators.copy() - # try: - # rot_acc.remove(_acc.ACCELERATOR_CUTENSOR) - # except ValueError: - # pass - # _acc.set_routine_accelerators(rot_acc) - - # red_acc = self.old_reduction_accelerators.copy() - # try: - # red_acc.remove(_acc.ACCELERATOR_CUTENSOR) - # except ValueError: - # pass - # _acc.set_reduction_accelerators(red_acc) - - # yield - - # _acc.set_routine_accelerators(self.old_routine_accelerators) - # _acc.set_reduction_accelerators(self.old_reduction_accelerators) - @testing.for_all_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) def test_max_all(self, xp, dtype): @@ -376,6 +377,10 @@ def test_zero_size(self, xp): ) ) @pytest.mark.skip("CUB reduction is not supported") +# @pytest.mark.skipif( +# not cupy.cuda.cub.available, reason="The CUB routine is not enabled" +# ) +# @pytest.mark.thread_unsafe(reason="unsafe setUp and AssertFunctionIsCalled.") class TestCubReduction: @pytest.fixture(autouse=True) diff --git a/dpnp/tests/third_party/cupy/core_tests/test_raw.py b/dpnp/tests/third_party/cupy/core_tests/test_raw.py index 480f6de6ae4..59581674a85 100644 --- a/dpnp/tests/third_party/cupy/core_tests/test_raw.py +++ b/dpnp/tests/third_party/cupy/core_tests/test_raw.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import contextlib import io import os @@ -352,6 +354,8 @@ @contextlib.contextmanager def use_temporary_cache_dir(): + # Note uses mock, so not thread-safe (except at class/method level) + # tempdir fixture could be used instead. target1 = "cupy.cuda.compiler.get_cache_dir" target2 = "cupy.cuda.compiler._empty_file_preprocess_cache" temp_cache = {} @@ -386,9 +390,8 @@ def find_nvcc_ver(): nvcc_ver_pattern = r"release (\d+\.\d+)" cmd = cupy.cuda.get_nvcc_path().split() cmd += ["--version"] - cache_ctx = use_temporary_cache_dir() - with cache_ctx as cache_path: - output = compiler._run_cc(cmd, cache_path, "nvcc") + + output = compiler._run_cc(cmd, cupy.cuda.compiler.get_cache_dir(), "nvcc") match = re.search(nvcc_ver_pattern, output) assert match @@ -404,19 +407,19 @@ class _TestRawBase: _nvrtc_ver = None def setUp(self): - if hasattr(self, "clean_up"): + if getattr(self, "clean_up", False): if cupy.cuda.runtime.is_hip: # Clearing memo triggers recompiling kernels using name # expressions in other tests, e.g. dot and matmul, which # hits a nvrtc bug. See #5843, #5945 and #6725. - self.skipTest("Clearing memo hits a nvrtc bug in other tests") + pytest.skip("Clearing memo hits a nvrtc bug in other tests") _util.clear_memo() self.dev = cupy.cuda.runtime.getDevice() assert self.dev != 1 - if not hasattr(self, "jitify"): - self.jitify = False + + self.jitify = getattr(self, "jitify", False) if cupy.cuda.runtime.is_hip and self.jitify: - self.skipTest("Jitify does not support ROCm/HIP") + pytest.skip("Jitify does not support ROCm/HIP") self.temporary_cache_dir_context = use_temporary_cache_dir() self.in_memory_context = compile_in_memory(self.in_memory) @@ -858,6 +861,7 @@ def test_cuDoubleComplex(self): ker((grid,), (block,), (a, b, out)) assert (out == a + b).all() + @pytest.mark.thread_unsafe(reason="mutates global in RawModule") def test_const_memory(self): mod = cupy.RawModule( code=test_const_mem, backend=self.backend, jitify=self.jitify @@ -1126,6 +1130,7 @@ def test_context_switch_RawModule6(self): @unittest.skipUnless( not cupy.cuda.runtime.is_hip, "only CUDA raises warning" ) + @pytest.mark.thread_unsafe(reason="mutates global cache directory") def test_compile_kernel(self): kern = cupy.RawKernel( _test_compile_src, @@ -1144,6 +1149,7 @@ def test_compile_kernel(self): @unittest.skipUnless( not cupy.cuda.runtime.is_hip, "only CUDA raises warning" ) + @pytest.mark.thread_unsafe(reason="mutates global cache directory") def test_compile_module(self): module = cupy.RawModule( code=_test_compile_src, @@ -1187,6 +1193,9 @@ class TestRaw(_TestRawBase, unittest.TestCase): {"backend": "nvrtc", "in_memory": True, "clean_up": True, "jitify": True}, ) @testing.slow +@pytest.mark.thread_unsafe( + reason="Jitify seems to have problems, skip as largely unmaintained." +) class TestRawWithJitify(_TestRawBase, unittest.TestCase): pass @@ -1220,7 +1229,8 @@ class TestRawWithJitify(_TestRawBase, unittest.TestCase): ) ) @unittest.skipIf( - find_nvcc_ver() >= 12020, "fp16 header compatibility issue, see cupy#8412" + cupy.cuda.runtime.is_hip or find_nvcc_ver() >= 12020, + "fp16 header compatibility issue, see cupy#8412 (Skip on HIP)", ) @unittest.skipUnless( 9000 <= cupy.cuda.runtime.runtimeGetVersion(), "Requires CUDA 9.x or later" @@ -1229,9 +1239,8 @@ class TestRawWithJitify(_TestRawBase, unittest.TestCase): 60 <= int(cupy.cuda.device.get_compute_capability()), "Requires compute capability 6.0 or later", ) -@unittest.skipIf(cupy.cuda.runtime.is_hip, "Skip on HIP") class TestRawGridSync(unittest.TestCase): - + @pytest.mark.thread_unsafe(reason="mutates global cache directory") def test_grid_sync_rawkernel(self): n = self.n with use_temporary_cache_dir(): @@ -1249,6 +1258,7 @@ def test_grid_sync_rawkernel(self): kern_grid_sync((grid,), (block,), (x1, x2, y, n**2)) assert cupy.allclose(y, x1 + x2) + @pytest.mark.thread_unsafe(reason="mutates global cache directory") def test_grid_sync_rawmodule(self): n = self.n with use_temporary_cache_dir(): @@ -1309,7 +1319,6 @@ def test_grid_sync_rawmodule(self): cupy.cuda.runtime.is_hip, "HIP does not support enable_cooperative_groups" ) class TestRawPicklable(unittest.TestCase): - def setUp(self): self.temporary_dir_context = use_temporary_cache_dir() self.temp_dir = self.temporary_dir_context.__enter__() @@ -1392,17 +1401,7 @@ def test_raw_picklable(self): # Recent CCCL has made Jitify cold-launch very slow, see the discussion # starting https://github.com/cupy/cupy/pull/8899#issuecomment-2613022424. # TODO(leofang): Further refactor the test suite? -@testing.parameterize( - *testing.product( - { - "jitify": (False, True), - } - ) -) -@unittest.skipIf(cupy.cuda.runtime.is_hip, "Jitify does not support ROCm/HIP") -@testing.slow -class TestRawJitify(unittest.TestCase): - +class _TestRawJitify: def setUp(self): self.temporary_dir_context = use_temporary_cache_dir() self.temp_dir = self.temporary_dir_context.__enter__() @@ -1509,3 +1508,18 @@ def test_jitify5(self): with pytest.raises(cupy.cuda.compiler.CompileException) as ex: self._helper(hdr, options=("-I" + self.temp_dir,)) assert "cannot open source file" in str(ex.value) + + +@unittest.skipIf(cupy.cuda.runtime.is_hip, "Jitify does not support ROCm/HIP") +@testing.slow +class TestRawJitifyNoJitify(_TestRawJitify, unittest.TestCase): + jitify = False + + +@unittest.skipIf(cupy.cuda.runtime.is_hip, "Jitify does not support ROCm/HIP") +@testing.slow +@pytest.mark.thread_unsafe( + reason="Jitify seems to have problems, skip as largely unmaintained." +) +class TestRawJitifyJitify(_TestRawJitify, unittest.TestCase): + jitify = True diff --git a/dpnp/tests/third_party/cupy/core_tests/test_userkernel.py b/dpnp/tests/third_party/cupy/core_tests/test_userkernel.py index ab184c4939f..aa839f61e9b 100644 --- a/dpnp/tests/third_party/cupy/core_tests/test_userkernel.py +++ b/dpnp/tests/third_party/cupy/core_tests/test_userkernel.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import unittest import numpy @@ -252,12 +254,12 @@ class TestUserkernelScalar(unittest.TestCase): @testing.numpy_cupy_array_equal() def test_scalar(self, xp, dtype): x = testing.shaped_arange((2, 3, 4), xp, dtype) + y = numpy.array(self.value).astype(dtype)[()] if xp is numpy: - y = numpy.array(self.value).astype(dtype) return x + y else: kernel = cupy.ElementwiseKernel("T x, T y", "T z", "z = x + y") - return kernel(x, self.value) + return kernel(x, y) class TestUserkernelManualBlockSize(unittest.TestCase): @@ -297,7 +299,6 @@ def _prep_texture(self): if dim == 3 else (height, width) if dim == 2 else (width,) ) - self.shape = shape # prepare input, output, and texture memory # self.data holds the data stored in the texture memory @@ -307,7 +308,6 @@ def _prep_texture(self): ) arr = CUDAarray(ch, width, height, depth) arr.copy_from(tex_data) - self.data = tex_data # create resource and texture descriptors res = ResourceDescriptor(runtime.cudaResourceTypeArray, cuArr=arr) @@ -322,7 +322,7 @@ def _prep_texture(self): ) # create a texture object - return TextureObject(res, tex) + return TextureObject(res, tex), tex_data, shape def _prep_kernel1D(self): return cupy.ElementwiseKernel( @@ -370,7 +370,7 @@ def test_texture_input(self): width, height, depth = self.dimensions dim = 3 if depth != 0 else 2 if height != 0 else 1 - texobj = self._prep_texture() + texobj, data, shape = self._prep_texture() ker = getattr(self, f"_prep_kernel{dim}D")() # prepare input @@ -383,10 +383,10 @@ def test_texture_input(self): size *= depth args.append(height) in_arr = cupy.arange(size, dtype=cupy.float32) - in_arr = in_arr.reshape(self.shape) + in_arr = in_arr.reshape(shape) args[0] = in_arr # compute and validate output out_arr = ker(*args) - expected = in_arr + self.data + expected = in_arr + data testing.assert_allclose(out_arr, expected) From 6a547b94193a931ca1f28b8414c070b074b0ea7a Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 14 Jan 2026 04:36:09 -0800 Subject: [PATCH 4/7] Enable supported test for error class in third_party/cupy/test_init.py scope --- dpnp/tests/third_party/cupy/test_init.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/dpnp/tests/third_party/cupy/test_init.py b/dpnp/tests/third_party/cupy/test_init.py index dbda6010e12..0a841ba28b2 100644 --- a/dpnp/tests/third_party/cupy/test_init.py +++ b/dpnp/tests/third_party/cupy/test_init.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import operator import os import shutil @@ -73,6 +75,7 @@ def test_available(self): assert available +# @pytest.mark.thread_unsafe(reason="modifies environment variables") @pytest.mark.skip("dpnp.is_available() is not implemented") class TestNotAvailable(unittest.TestCase): @@ -145,18 +148,17 @@ def test_bitwise_not_is_invert(self): assert xp.bitwise_not is xp.invert -@pytest.mark.skip("dpnp.exceptions is not implemented") @testing.with_requires("numpy>=2.0") @pytest.mark.parametrize( "name", [ "exceptions.AxisError", - "exceptions.ComplexWarning", - "exceptions.ModuleDeprecationWarning", - "exceptions.RankWarning", - "exceptions.TooHardError", - "exceptions.VisibleDeprecationWarning", - "linalg.LinAlgError", + # "exceptions.ComplexWarning", + # "exceptions.ModuleDeprecationWarning", + # "exceptions.RankWarning", + # "exceptions.TooHardError", + # "exceptions.VisibleDeprecationWarning", + # "linalg.LinAlgError", ], ) def test_error_classes(name): From d31c2deda4eb809b4412d03cd481a39aa6f468c8 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 14 Jan 2026 04:46:07 -0800 Subject: [PATCH 5/7] Update third_party/cupy/testing/_helper.py --- .../tests/third_party/cupy/testing/_helper.py | 101 +++++++++++------- 1 file changed, 65 insertions(+), 36 deletions(-) diff --git a/dpnp/tests/third_party/cupy/testing/_helper.py b/dpnp/tests/third_party/cupy/testing/_helper.py index 5414c7af2ca..eaec6683ca5 100644 --- a/dpnp/tests/third_party/cupy/testing/_helper.py +++ b/dpnp/tests/third_party/cupy/testing/_helper.py @@ -1,21 +1,20 @@ +from __future__ import annotations + import contextlib import importlib.metadata import inspect import unittest import warnings +from collections.abc import Callable from importlib.metadata import PackageNotFoundError -from typing import Callable from unittest import mock import numpy import dpnp as cupy -from dpnp.tests.third_party.cupy.testing._pytest_impl import is_available # from cupy._core import internal -# import cupyx -# import cupyx.scipy.sparse - +from dpnp.tests.third_party.cupy.testing._pytest_impl import is_available if is_available(): import pytest @@ -25,7 +24,7 @@ _skipif = unittest.skipIf -def with_requires(*requirements): +def with_requires(*requirements: str) -> Callable[[Callable], Callable]: """Run a test case only when given requirements are satisfied. .. admonition:: Example @@ -49,7 +48,7 @@ def with_requires(*requirements): return _skipif(not installed(*requirements), reason=msg) -def installed(*specifiers): +def installed(*specifiers: str) -> bool: """Returns True if the current environment satisfies the specified package requirement. @@ -72,13 +71,13 @@ def installed(*specifiers): return True -def numpy_satisfies(version_range): +def numpy_satisfies(version_range: str) -> bool: """Returns True if numpy version satisfies the specified criteria. Args: version_range: A version specifier (e.g., `>=1.13.0`). """ - return installed("numpy{}".format(version_range)) + return installed(f"numpy{version_range}") def shaped_arange(shape, xp=cupy, dtype=numpy.float32, order="C", device=None): @@ -162,45 +161,72 @@ def shaped_random( from uniform distribution over :math:`[0, scale)` with specified dtype. """ - numpy.random.seed(seed) + rng = numpy.random.RandomState(seed) dtype = numpy.dtype(dtype) if dtype == "?": - a = numpy.random.randint(2, size=shape) + a = rng.randint(2, size=shape) elif dtype.kind == "c": - a = numpy.random.rand(*shape) + 1j * numpy.random.rand(*shape) + a = rng.rand(*shape) + 1j * rng.rand(*shape) a *= scale else: - a = numpy.random.rand(*shape) * scale + a = rng.rand(*shape) * scale return xp.asarray(a, dtype=dtype, order=order) -# def shaped_sparse_random( -# shape, sp=cupyx.scipy.sparse, dtype=numpy.float32, -# density=0.01, format='coo', seed=0): -# """Returns an array filled with random values. +def shaped_sparse_random( + shape, sp=None, dtype=numpy.float32, density=0.01, format="", seed=0 +): + """Returns an array filled with random values. + + Args: + shape (tuple): Shape of returned sparse matrix. + sp (scipy.sparse or cupyx.scipy.sparse): Sparse matrix module to use. + dtype (dtype): Dtype of returned sparse matrix. + density (float): Density of returned sparse matrix. + format (str): Format of returned sparse matrix. + seed (int): Random seed. + + Returns: + The sparse matrix with given shape, array module, + """ + import cupyx.scipy.sparse + import scipy.sparse + + if sp is None: + sp = cupyx.scipy.sparse + n_rows, n_cols = shape + a = scipy.sparse.random(n_rows, n_cols, density, random_state=seed).astype( + dtype + ) + + try: + return sp.coo_matrix(a).asformat(format) + except AttributeError: + raise ValueError(f"Module {sp} does not have the expected sparse APIs") -# Args: -# shape (tuple): Shape of returned sparse matrix. -# sp (scipy.sparse or cupyx.scipy.sparse): Sparse matrix module to use. -# dtype (dtype): Dtype of returned sparse matrix. -# density (float): Density of returned sparse matrix. -# format (str): Format of returned sparse matrix. -# seed (int): Random seed. -# Returns: -# The sparse matrix with given shape, array module, -# """ -# import scipy.sparse -# n_rows, n_cols = shape -# numpy.random.seed(seed) -# a = scipy.sparse.random(n_rows, n_cols, density).astype(dtype) +def shaped_linspace(start, stop, shape, xp=cupy, dtype=numpy.float32): + """Returns an array with given shape, array module, and dtype. -# if sp is cupyx.scipy.sparse: -# a = cupyx.scipy.sparse.coo_matrix(a) -# elif sp is not scipy.sparse: -# raise ValueError('Unknown module: {}'.format(sp)) + Args: + start (int): The starting value. + stop (int): The end value. + shape (tuple of int): Shape of returned ndarray. + xp (numpy or cupy): Array module to use. + dtype (dtype): Dtype of returned ndarray. -# return a.asformat(format) + Returns: + numpy.ndarray or cupy.ndarray: + """ + dtype = numpy.dtype(dtype) + size = numpy.prod(shape) + if dtype == "?": + start = max(start, 0) + stop = min(stop, 1) + elif dtype.kind == "u": + start = max(start, 0) + a = numpy.linspace(start, stop, size) + return xp.array(a.astype(dtype).reshape(shape)) def generate_matrix( @@ -276,6 +302,7 @@ def assert_warns(expected): class NumpyAliasTestBase(unittest.TestCase): + @property def func(self): raise NotImplementedError() @@ -290,6 +317,7 @@ def numpy_func(self): class NumpyAliasBasicTestBase(NumpyAliasTestBase): + def test_argspec(self): f = inspect.signature assert f(self.cupy_func) == f(self.numpy_func) @@ -304,6 +332,7 @@ def test_docstring(self): class NumpyAliasValuesTestBase(NumpyAliasTestBase): + def test_values(self): assert self.cupy_func(*self.args) == self.numpy_func(*self.args) From 3f7410f00867b515532989244ad3d4a50fd603fa Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 14 Jan 2026 04:50:49 -0800 Subject: [PATCH 6/7] Extend third_party/cupy/statistics_tests/test_order.py with testing of new methods --- .../cupy/statistics_tests/test_order.py | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/dpnp/tests/third_party/cupy/statistics_tests/test_order.py b/dpnp/tests/third_party/cupy/statistics_tests/test_order.py index b990c1eeb2c..58eb7999acc 100644 --- a/dpnp/tests/third_party/cupy/statistics_tests/test_order.py +++ b/dpnp/tests/third_party/cupy/statistics_tests/test_order.py @@ -1,9 +1,14 @@ +from __future__ import annotations + import warnings import numpy import pytest import dpnp as cupy + +# import cupy._core._accelerator as _acc +# from cupy import cuda from dpnp.tests.third_party.cupy import testing _all_methods = ( @@ -11,11 +16,11 @@ # 'averaged_inverted_cdf', # TODO(takagi) Not implemented # 'closest_observation', # TODO(takagi) Not implemented # 'interpolated_inverted_cdf', # TODO(takagi) Not implemented - # 'hazen', # TODO(takagi) Not implemented - # 'weibull', # TODO(takagi) Not implemented + "hazen", + "weibull", "linear", - # 'median_unbiased', # TODO(takagi) Not implemented - # 'normal_unbiased', # TODO(takagi) Not implemented + "median_unbiased", + "normal_unbiased", "lower", "higher", "midpoint", @@ -166,9 +171,8 @@ def test_percentile_out_of_range_q(self, dtype, method): with pytest.raises(ValueError): xp.percentile(a, q, axis=-1, method=method) - @testing.for_all_dtypes() @testing.for_all_dtypes(no_float16=True, no_bool=True, no_complex=True) - @testing.numpy_cupy_allclose() + @testing.numpy_cupy_allclose(rtol=1e-6) def test_quantile_defaults(self, xp, dtype, method): a = testing.shaped_random((2, 3, 8), xp, dtype) q = testing.shaped_random((3,), xp, scale=1) @@ -391,12 +395,16 @@ def test_ptp_axis2(self, xp, dtype): @testing.for_float_dtypes() @testing.numpy_cupy_allclose() def test_ptp_nan(self, xp, dtype): + # if _acc.ACCELERATOR_CUTENSOR in _acc.get_routine_accelerators(): + # pytest.skip() a = xp.array([float("nan"), 1, -1], dtype) return xp.ptp(a) @testing.for_float_dtypes() @testing.numpy_cupy_allclose() def test_ptp_all_nan(self, xp, dtype): + # if _acc.ACCELERATOR_CUTENSOR in _acc.get_routine_accelerators(): + # pytest.skip() a = xp.array([float("nan"), float("nan")], dtype) return xp.ptp(a) From 5e2e332965ec280c4ada993883f9466252baf3aa Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 14 Jan 2026 05:30:12 -0800 Subject: [PATCH 7/7] Add PR to the changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index aa531908591..8418736671e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,6 +40,7 @@ Also, that release drops support for Python 3.9, making Python 3.10 the minimum * Unified public API definitions in `dpnp.linalg` and `dpnp.scipy` submodules [#2663](https://github.com/IntelPython/dpnp/pull/2663) * Aligned the signature of `dpnp.reshape` function with Python array API by making `shape` a required argument [#2673](https://github.com/IntelPython/dpnp/pull/2673) * Unified `dpnp` public API exports by consolidating function exports in `__init__.py` and removing wildcard imports [#2665](https://github.com/IntelPython/dpnp/pull/2665) [#2666](https://github.com/IntelPython/dpnp/pull/2666) +* Compile indexing extension with `-fno-sycl-id-queries-fit-in-int` to support huge arrays [#2721](https://github.com/IntelPython/dpnp/pull/2721) ### Deprecated