From f56959620f034ca1075b527cc6fa77c214fe7b4f Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Mon, 22 Jun 2026 15:20:03 +0200 Subject: [PATCH 01/15] Add dense GPU sparse conversion --- lib/JLArrays/src/JLArrays.jl | 15 +++++++++++ src/host/sparse.jl | 52 ++++++++++++++++++++++++++++++++++++ test/testsuite/sparse.jl | 13 +++++++++ 3 files changed, 80 insertions(+) diff --git a/lib/JLArrays/src/JLArrays.jl b/lib/JLArrays/src/JLArrays.jl index 0efc5a5e7..b55f13d96 100644 --- a/lib/JLArrays/src/JLArrays.jl +++ b/lib/JLArrays/src/JLArrays.jl @@ -224,10 +224,14 @@ GPUArrays.dense_vector_type(a::JLArray{T, N}) where {T, N} = JLArray{T, 1} GPUArrays.dense_vector_type(::Type{JLArray{T, N}}) where {T, N} = JLArray{T, 1} GPUArrays.sparse_array_type(sa::JLSparseMatrixCSC) = JLSparseMatrixCSC +GPUArrays.sparse_array_type(sa::JLArray{<:Any,2}) = JLSparseMatrixCSC +GPUArrays.sparse_array_type(::Type{<:JLArray{<:Any,2}}) = JLSparseMatrixCSC GPUArrays.sparse_array_type(::Type{<:JLSparseMatrixCSC}) = JLSparseMatrixCSC GPUArrays.sparse_array_type(sa::JLSparseMatrixCSR) = JLSparseMatrixCSR GPUArrays.sparse_array_type(::Type{<:JLSparseMatrixCSR}) = JLSparseMatrixCSR GPUArrays.sparse_array_type(sa::JLSparseVector) = JLSparseVector +GPUArrays.sparse_array_type(sa::JLArray{<:Any,1}) = JLSparseVector +GPUArrays.sparse_array_type(::Type{<:JLArray{<:Any,1}}) = JLSparseVector GPUArrays.sparse_array_type(::Type{<:JLSparseVector}) = JLSparseVector GPUArrays.dense_array_type(sa::JLSparseVector) = JLArray @@ -238,7 +242,9 @@ GPUArrays.dense_array_type(sa::JLSparseMatrixCSR) = JLArray GPUArrays.dense_array_type(::Type{<:JLSparseMatrixCSR}) = JLArray GPUArrays.csc_type(sa::JLSparseMatrixCSR) = JLSparseMatrixCSC +GPUArrays.csc_type(::Type{<:JLSparseMatrixCSR}) = JLSparseMatrixCSC GPUArrays.csr_type(sa::JLSparseMatrixCSC) = JLSparseMatrixCSR +GPUArrays.csr_type(::Type{<:JLSparseMatrixCSC}) = JLSparseMatrixCSR Base.similar(Mat::JLSparseMatrixCSR) = JLSparseMatrixCSR(copy(Mat.rowPtr), copy(Mat.colVal), similar(nonzeros(Mat)), size(Mat)) Base.similar(Mat::JLSparseMatrixCSR, T::Type) = JLSparseMatrixCSR(copy(Mat.rowPtr), copy(Mat.colVal), similar(nonzeros(Mat), T), size(Mat)) @@ -383,6 +389,7 @@ function JLSparseMatrixCSC(xs::SparseMatrixCSC{Tv, Ti}) where {Ti, Tv} return JLSparseMatrixCSC{Tv, Ti}(colPtr, rowVal, nzVal, (xs.m, xs.n)) end JLSparseMatrixCSC(xs::SparseVector) = JLSparseMatrixCSC(SparseMatrixCSC(xs)) +JLSparseMatrixCSC(xs::JLArray{<:Any,2}) = JLSparseMatrixCSC(SparseMatrixCSC(xs)) Base.length(x::JLSparseMatrixCSC) = prod(x.dims) Base.size(x::JLSparseMatrixCSC) = x.dims @@ -397,6 +404,7 @@ function JLSparseMatrixCSR(xs::SparseMatrixCSC{Tv, Ti}) where {Ti, Tv} return JLSparseMatrixCSR{Tv, Ti}(rowPtr, colVal, nzVal, (xs.m, xs.n)) end JLSparseMatrixCSR(xs::SparseVector{Tv, Ti}) where {Ti, Tv} = JLSparseMatrixCSR(SparseMatrixCSC(xs)) +JLSparseMatrixCSR(xs::JLArray{<:Any,2}) = JLSparseMatrixCSR(SparseMatrixCSC(xs)) function JLSparseMatrixCSR(xs::JLSparseMatrixCSC{Tv, Ti}) where {Ti, Tv} return JLSparseMatrixCSR(SparseMatrixCSC(xs)) end @@ -456,6 +464,13 @@ Adapt.adapt_storage(::Type{Array}, xs::JLArray) = convert(Array, xs) Base.convert(::Type{T}, x::T) where T <: JLArray = x +## indexing + +Base.findall(bools::JLArray{Bool}) = JLArray(findall(Array(bools))) +Base.findall(f::Function, A::JLArray) = JLArray(findall(f, Array(A))) +Base.findall(f::Base.Fix2{typeof(in)}, A::JLArray) = JLArray(findall(f, Array(A))) + + ## broadcast import Base.Broadcast: BroadcastStyle, Broadcasted diff --git a/src/host/sparse.jl b/src/host/sparse.jl index 5c84ee77c..60d469af6 100644 --- a/src/host/sparse.jl +++ b/src/host/sparse.jl @@ -44,6 +44,58 @@ Base.iszero(A::AbstractGPUSparseArray) = SparseArrays.nnz(A) == 0 || all(iszero, SparseArrays.SparseVector(x::AbstractGPUSparseVector) = SparseVector(length(x), Array(SparseArrays.nonzeroinds(x)), Array(SparseArrays.nonzeros(x))) SparseArrays.SparseMatrixCSC(x::AbstractGPUSparseMatrixCSC) = SparseMatrixCSC(size(x)..., Array(SparseArrays.getcolptr(x)), Array(SparseArrays.rowvals(x)), Array(SparseArrays.nonzeros(x))) +@kernel function dense_sparse_values_kernel!(values, A, indices) + i = @index(Global, Linear) + if i <= length(indices) + @inbounds values[i] = A[indices[i]] + end +end + +function dense_sparse_values(A::AnyGPUArray{Tv}, indices) where {Tv} + values = similar(A, Tv, length(indices)) + if !isempty(values) + dense_sparse_values_kernel!(get_backend(A))(values, A, indices; ndrange=length(indices)) + end + return values +end + +function SparseArrays.SparseVector(x::AnyGPUVector) + inds = findall(!iszero, x) + vals = dense_sparse_values(x, inds) + host_inds = Array(inds) + host_vals = Array(vals) + unsafe_free!(inds) + unsafe_free!(vals) + return SparseVector(length(x), host_inds, host_vals) +end + +function SparseArrays.SparseMatrixCSC(x::AnyGPUMatrix) + inds = findall(!iszero, x) + vals = dense_sparse_values(x, inds) + host_inds = Array(inds) + host_vals = Array(vals) + unsafe_free!(inds) + unsafe_free!(vals) + + rows = getindex.(host_inds, 1) + cols = getindex.(host_inds, 2) + return sparse(rows, cols, host_vals, size(x)...) +end + +function SparseArrays.sparse(A::AnyGPUVector) + sparse_array_type(A)(SparseVector(A)) +end + +function SparseArrays.sparse(A::AnyGPUMatrix; fmt=:csc) + if fmt == :csc + return sparse_array_type(A)(SparseMatrixCSC(A)) + elseif fmt == :csr + return csr_type(sparse_array_type(A))(SparseMatrixCSC(A)) + else + throw(ArgumentError("format :$fmt not available, use :csc or :csr")) + end +end + # similar Base.similar(Vec::V) where {V<:AbstractGPUSparseVector} = V(copy(SparseArrays.nonzeroinds(Vec)), similar(SparseArrays.nonzeros(Vec)), length(Vec)) Base.similar(Mat::M) where {M<:AbstractGPUSparseMatrixCSC} = M(copy(SparseArrays.getcolptr(Mat)), copy(SparseArrays.rowvals(Mat)), similar(SparseArrays.nonzeros(Mat)), size(Mat)) diff --git a/test/testsuite/sparse.jl b/test/testsuite/sparse.jl index 3e0d92fe3..763e244ca 100644 --- a/test/testsuite/sparse.jl +++ b/test/testsuite/sparse.jl @@ -71,6 +71,11 @@ function vector_construction(AT, eltypes) @test collect(d_x) == collect(x) @test similar(d_x) isa AT{ET} @test similar(d_x, Float32) isa AT{Float32} + + dense_x = dense_AT(collect(x)) + @test SparseVector(dense_x) == x + @test sparse(dense_x) isa AT{ET} + @test SparseVector(sparse(dense_x)) == x end end @@ -148,6 +153,14 @@ function matrix_construction(AT, eltypes) @test similar(d_x, Float32, (n, m)) isa AT{Float32} @test similar(d_x, (3, 4)) isa AT{ET} @test size(similar(d_x, (3, 4))) == (3, 4) + + dense_x = dense_AT(collect(x)) + @test SparseMatrixCSC(dense_x) == x + @test sparse(dense_x) isa GPUArrays.sparse_array_type(dense_x){ET} + @test SparseMatrixCSC(sparse(dense_x)) == x + if dense_x isa GPUArrays.AnyGPUArray + @test SparseMatrixCSC(sparse(dense_x; fmt=:csr)) == x + end end end From 6563c12243ceb8b076d70535e6ecf2e18449a815 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Mon, 22 Jun 2026 16:04:49 +0200 Subject: [PATCH 02/15] Add generic COO sparse matmul --- Project.toml | 2 + lib/JLArrays/src/JLArrays.jl | 81 +++++++++- src/GPUArrays.jl | 1 + src/host/sparse.jl | 306 +++++++++++++++++++++++++++++++++++ test/runtests.jl | 1 + test/testsuite.jl | 1 + test/testsuite/sparse.jl | 83 ++++++++++ 7 files changed, 473 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index 707ee08d5..d0e72995a 100644 --- a/Project.toml +++ b/Project.toml @@ -7,6 +7,7 @@ projects = ["lib/GPUArraysCore", "lib/JLArrays", "test", "docs"] [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" +Atomix = "a9b6321e-bd34-4604-b9c9-b65b8de01458" GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527" KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" LLVM = "929cbde3-209d-540e-8aea-75f648917ca0" @@ -27,6 +28,7 @@ JLD2Ext = "JLD2" [compat] Adapt = "4.6.1" +Atomix = "1.1" GPUArraysCore = "= 0.2.0" JLD2 = "0.4, 0.5, 0.6" KernelAbstractions = "0.9.28, 0.10" diff --git a/lib/JLArrays/src/JLArrays.jl b/lib/JLArrays/src/JLArrays.jl index b55f13d96..1c9123e9d 100644 --- a/lib/JLArrays/src/JLArrays.jl +++ b/lib/JLArrays/src/JLArrays.jl @@ -6,7 +6,8 @@ module JLArrays -export JLArray, JLVector, JLMatrix, jl, JLBackend, JLSparseVector, JLSparseMatrixCSC, JLSparseMatrixCSR +export JLArray, JLVector, JLMatrix, jl, JLBackend, JLSparseVector, + JLSparseMatrixCSC, JLSparseMatrixCSR, JLSparseMatrixCOO using GPUArrays @@ -74,6 +75,11 @@ Adapt.adapt_structure(to::Adaptor, r::Base.RefValue) = JlRefValue(adapt(to, r[]) @inline Base.getindex(A::JLDeviceArray, index::Integer) = getindex(typed_data(A), index) @inline Base.setindex!(A::JLDeviceArray, x, index::Integer) = setindex!(typed_data(A), x, index) + Base.pointer(x::JLDeviceArray{T}) where {T} = + convert(Ptr{T}, pointer(x.data)) + x.offset + @inline function Base.pointer(x::JLDeviceArray{T}, i::Integer) where T + pointer(x) + (i - 1) * sizeof(T) + end end # @@ -188,6 +194,46 @@ end JLSparseMatrixCSC(Mat::Union{Transpose{Tv, <:SparseMatrixCSC}, Adjoint{Tv, <:SparseMatrixCSC}}) where {Tv} = JLSparseMatrixCSC(JLSparseMatrixCSR(Mat)) +mutable struct JLSparseMatrixCOO{Tv, Ti} <: GPUArrays.AbstractGPUSparseMatrixCOO{Tv, Ti} + rowInd::JLArray{Ti, 1} + colInd::JLArray{Ti, 1} + nzVal::JLArray{Tv, 1} + dims::NTuple{2,Int} + nnz::Ti + + function JLSparseMatrixCOO{Tv, Ti}(rowInd::JLArray{Ti, 1}, colInd::JLArray{Ti, 1}, + nzVal::JLArray{Tv, 1}, dims::NTuple{2,<:Integer}, + nnz::Integer=length(nzVal)) where {Tv, Ti <: Integer} + length(rowInd) == length(colInd) == length(nzVal) || + throw(DimensionMismatch("COO row, column, and value arrays have different lengths")) + new{Tv, Ti}(rowInd, colInd, nzVal, (Int(dims[1]), Int(dims[2])), Ti(nnz)) + end +end +JLSparseMatrixCOO(rowInd::JLArray{Ti, 1}, colInd::JLArray{Ti, 1}, + nzVal::JLArray{Tv, 1}, dims::NTuple{2,<:Integer}, + nnz::Integer=length(nzVal)) where {Tv, Ti <: Integer} = + JLSparseMatrixCOO{Tv, Ti}(rowInd, colInd, nzVal, dims, nnz) + +Base.length(x::JLSparseMatrixCOO) = prod(x.dims) +Base.size(x::JLSparseMatrixCOO) = x.dims +SparseArrays.nnz(x::JLSparseMatrixCOO) = x.nnz +SparseArrays.nonzeros(x::JLSparseMatrixCOO) = x.nzVal + +function SparseArrays.SparseMatrixCSC(x::JLSparseMatrixCOO) + return sparse(Array(x.rowInd), Array(x.colInd), Array(x.nzVal), size(x)...) +end + +function Base.getindex(A::JLSparseMatrixCOO{Tv}, i::Integer, j::Integer) where {Tv} + @boundscheck checkbounds(A, i, j) + acc = zero(Tv) + for k in 1:Int(nnz(A)) + @inbounds if A.rowInd[k] == i && A.colInd[k] == j + acc += A.nzVal[k] + end + end + return acc +end + function Base.size(g::JLSparseMatrixCSR, d::Integer) if 1 <= d <= 2 return g.dims[d] @@ -229,6 +275,8 @@ GPUArrays.sparse_array_type(::Type{<:JLArray{<:Any,2}}) = JLSparseMatrixCSC GPUArrays.sparse_array_type(::Type{<:JLSparseMatrixCSC}) = JLSparseMatrixCSC GPUArrays.sparse_array_type(sa::JLSparseMatrixCSR) = JLSparseMatrixCSR GPUArrays.sparse_array_type(::Type{<:JLSparseMatrixCSR}) = JLSparseMatrixCSR +GPUArrays.sparse_array_type(sa::JLSparseMatrixCOO) = JLSparseMatrixCOO +GPUArrays.sparse_array_type(::Type{<:JLSparseMatrixCOO}) = JLSparseMatrixCOO GPUArrays.sparse_array_type(sa::JLSparseVector) = JLSparseVector GPUArrays.sparse_array_type(sa::JLArray{<:Any,1}) = JLSparseVector GPUArrays.sparse_array_type(::Type{<:JLArray{<:Any,1}}) = JLSparseVector @@ -240,14 +288,24 @@ GPUArrays.dense_array_type(sa::JLSparseMatrixCSC) = JLArray GPUArrays.dense_array_type(::Type{<:JLSparseMatrixCSC}) = JLArray GPUArrays.dense_array_type(sa::JLSparseMatrixCSR) = JLArray GPUArrays.dense_array_type(::Type{<:JLSparseMatrixCSR}) = JLArray +GPUArrays.dense_array_type(sa::JLSparseMatrixCOO) = JLArray +GPUArrays.dense_array_type(::Type{<:JLSparseMatrixCOO}) = JLArray GPUArrays.csc_type(sa::JLSparseMatrixCSR) = JLSparseMatrixCSC +GPUArrays.csc_type(sa::JLSparseMatrixCOO) = JLSparseMatrixCSC GPUArrays.csc_type(::Type{<:JLSparseMatrixCSR}) = JLSparseMatrixCSC +GPUArrays.csc_type(::Type{<:JLSparseMatrixCOO}) = JLSparseMatrixCSC GPUArrays.csr_type(sa::JLSparseMatrixCSC) = JLSparseMatrixCSR +GPUArrays.csr_type(sa::JLSparseMatrixCOO) = JLSparseMatrixCSR GPUArrays.csr_type(::Type{<:JLSparseMatrixCSC}) = JLSparseMatrixCSR +GPUArrays.csr_type(::Type{<:JLSparseMatrixCOO}) = JLSparseMatrixCSR +GPUArrays.coo_type(sa::Union{JLSparseMatrixCSC,JLSparseMatrixCSR,JLSparseMatrixCOO}) = JLSparseMatrixCOO +GPUArrays.coo_type(::Type{<:Union{JLSparseMatrixCSC,JLSparseMatrixCSR,JLSparseMatrixCOO}}) = JLSparseMatrixCOO Base.similar(Mat::JLSparseMatrixCSR) = JLSparseMatrixCSR(copy(Mat.rowPtr), copy(Mat.colVal), similar(nonzeros(Mat)), size(Mat)) Base.similar(Mat::JLSparseMatrixCSR, T::Type) = JLSparseMatrixCSR(copy(Mat.rowPtr), copy(Mat.colVal), similar(nonzeros(Mat), T), size(Mat)) +Base.similar(Mat::JLSparseMatrixCOO) = JLSparseMatrixCOO(copy(Mat.rowInd), copy(Mat.colInd), similar(nonzeros(Mat)), size(Mat), nnz(Mat)) +Base.similar(Mat::JLSparseMatrixCOO, T::Type) = JLSparseMatrixCOO(copy(Mat.rowInd), copy(Mat.colInd), similar(nonzeros(Mat), T), size(Mat), nnz(Mat)) Base.similar(Mat::JLSparseMatrixCSC, T::Type, N::Int, M::Int) = JLSparseMatrixCSC(JLVector([zero(Int32)]), JLVector{Int32}(undef, 0), JLVector{T}(undef, 0), (N, M)) Base.similar(Mat::JLSparseMatrixCSR, T::Type, N::Int, M::Int) = JLSparseMatrixCSR(JLVector([zero(Int32)]), JLVector{Int32}(undef, 0), JLVector{T}(undef, 0), (N, M)) @@ -264,6 +322,7 @@ Base.similar(Mat::JLSparseMatrixCSR, dims::Tuple{Int, Int}) = similar(Mat, dims. JLArray(x::JLSparseVector) = JLArray(collect(SparseVector(x))) JLArray(x::JLSparseMatrixCSC) = JLArray(collect(SparseMatrixCSC(x))) JLArray(x::JLSparseMatrixCSR) = JLArray(collect(SparseMatrixCSC(x))) +JLArray(x::JLSparseMatrixCOO) = JLArray(collect(SparseMatrixCSC(x))) # conversion of untyped data to a typed Array function typed_data(x::JLArray{T}) where {T} @@ -405,12 +464,28 @@ function JLSparseMatrixCSR(xs::SparseMatrixCSC{Tv, Ti}) where {Ti, Tv} end JLSparseMatrixCSR(xs::SparseVector{Tv, Ti}) where {Ti, Tv} = JLSparseMatrixCSR(SparseMatrixCSC(xs)) JLSparseMatrixCSR(xs::JLArray{<:Any,2}) = JLSparseMatrixCSR(SparseMatrixCSC(xs)) +function JLSparseMatrixCOO(xs::SparseMatrixCSC{Tv, Ti}) where {Ti, Tv} + rowInd, colInd, nzVal = findnz(xs) + return JLSparseMatrixCOO{Tv, Ti}(JLVector{Ti}(rowInd), JLVector{Ti}(colInd), + JLVector{Tv}(nzVal), (xs.m, xs.n)) +end +JLSparseMatrixCOO(xs::SparseVector) = JLSparseMatrixCOO(SparseMatrixCSC(xs)) +JLSparseMatrixCOO(xs::JLArray{<:Any,2}) = JLSparseMatrixCOO(SparseMatrixCSC(xs)) function JLSparseMatrixCSR(xs::JLSparseMatrixCSC{Tv, Ti}) where {Ti, Tv} return JLSparseMatrixCSR(SparseMatrixCSC(xs)) end +function JLSparseMatrixCOO(xs::Union{JLSparseMatrixCSC{Tv, Ti},JLSparseMatrixCSR{Tv, Ti}}) where {Tv, Ti} + return JLSparseMatrixCOO(SparseMatrixCSC(xs)) +end function JLSparseMatrixCSC(xs::JLSparseMatrixCSR{Tv, Ti}) where {Ti, Tv} return JLSparseMatrixCSC(SparseMatrixCSC(xs)) end +function JLSparseMatrixCSC(xs::JLSparseMatrixCOO{Tv, Ti}) where {Ti, Tv} + return JLSparseMatrixCSC(SparseMatrixCSC(xs)) +end +function JLSparseMatrixCSR(xs::JLSparseMatrixCOO{Tv, Ti}) where {Ti, Tv} + return JLSparseMatrixCSR(SparseMatrixCSC(xs)) +end function Base.copyto!(dst::JLSparseMatrixCSR, src::JLSparseMatrixCSR) if size(dst) != size(src) throw(ArgumentError("Inconsistent Sparse Matrix size")) @@ -583,13 +658,15 @@ Adapt.adapt_structure(to::Adaptor, x::JLSparseMatrixCSC{Tv,Ti}) where {Tv,Ti} = GPUSparseDeviceMatrixCSC{Tv,Ti,JLDeviceArray{Ti, 1}, JLDeviceArray{Tv, 1}, AS.Generic}(adapt(to, x.colPtr), adapt(to, x.rowVal), adapt(to, x.nzVal), x.dims, x.nnz) Adapt.adapt_structure(to::Adaptor, x::JLSparseMatrixCSR{Tv,Ti}) where {Tv,Ti} = GPUSparseDeviceMatrixCSR{Tv,Ti,JLDeviceArray{Ti, 1}, JLDeviceArray{Tv, 1}, AS.Generic}(adapt(to, x.rowPtr), adapt(to, x.colVal), adapt(to, x.nzVal), x.dims, x.nnz) +Adapt.adapt_structure(to::Adaptor, x::JLSparseMatrixCOO{Tv,Ti}) where {Tv,Ti} = +GPUSparseDeviceMatrixCOO{Tv,Ti,JLDeviceArray{Ti, 1}, JLDeviceArray{Tv, 1}, AS.Generic}(adapt(to, x.rowInd), adapt(to, x.colInd), adapt(to, x.nzVal), x.dims, x.nnz) Adapt.adapt_structure(to::Adaptor, x::JLSparseVector{Tv,Ti}) where {Tv,Ti} = GPUSparseDeviceVector{Tv,Ti,JLDeviceArray{Ti, 1}, JLDeviceArray{Tv, 1}, AS.Generic}(adapt(to, x.iPtr), adapt(to, x.nzVal), x.len, x.nnz) ## KernelAbstractions interface KernelAbstractions.get_backend(a::JLA) where JLA <: JLArray = JLBackend() -KernelAbstractions.get_backend(a::JLA) where JLA <: Union{JLSparseMatrixCSC, JLSparseMatrixCSR, JLSparseVector} = JLBackend() +KernelAbstractions.get_backend(a::JLA) where JLA <: Union{JLSparseMatrixCSC, JLSparseMatrixCSR, JLSparseMatrixCOO, JLSparseVector} = JLBackend() function KernelAbstractions.mkcontext(kernel::Kernel{JLBackend}, I, _ndrange, iterspace, ::Dynamic) where Dynamic return KernelAbstractions.CompilerMetadata{KernelAbstractions.ndrange(kernel), Dynamic}(I, _ndrange, iterspace) diff --git a/src/GPUArrays.jl b/src/GPUArrays.jl index 17a4d8975..d50dcee0e 100644 --- a/src/GPUArrays.jl +++ b/src/GPUArrays.jl @@ -9,6 +9,7 @@ using Printf using LinearAlgebra.BLAS using Base.Cartesian +using Atomix using Adapt using LLVM.Interop diff --git a/src/host/sparse.jl b/src/host/sparse.jl index 60d469af6..266e659ce 100644 --- a/src/host/sparse.jl +++ b/src/host/sparse.jl @@ -119,6 +119,312 @@ coo_type(sa) = coo_type(typeof(sa)) function _spadjoint end function _sptranspose end +const SparseMatmulEltypes = Union{Float16,Float32,ComplexF16,ComplexF32} + +sparse_matmul_accumulator(::Type{T}) where {T} = T +sparse_matmul_accumulator(::Type{Float16}) = Float32 +sparse_matmul_accumulator(::Type{ComplexF16}) = ComplexF32 + +const GPUSparseMatrixOrWrapper = Union{ + AbstractGPUSparseMatrix, + Transpose{<:Any,<:AbstractGPUSparseMatrix}, + Adjoint{<:Any,<:AbstractGPUSparseMatrix}, +} + +function sparse_op(t::AbstractChar) + t in ('N', 'n', 'S', 's', 'H', 'h') && return false, false + t in ('T', 't') && return true, false + t in ('C', 'c') && return true, true + throw(ArgumentError("unsupported sparse matrix operation '$t'")) +end + +sparse_op_size(A, trans::Bool) = trans ? reverse(size(A)) : size(A) + +sparse_coo(A::AbstractGPUSparseMatrixCOO) = A +sparse_coo(A::AbstractGPUSparseMatrix) = coo_type(A)(A) + +@inline function sparse_atomic_add!(A::AbstractArray{<:Real}, parts, I::Integer, x) + @inbounds Atomix.@atomic A[Int(I)] += x + return +end + +@inline function sparse_atomic_add!(A::AbstractArray{<:Complex}, parts, I::Integer, x) + j = 2 * Int(I) + @inbounds begin + Atomix.@atomic parts[j - 1] += real(x) + Atomix.@atomic parts[j] += imag(x) + end + return +end + +sparse_atomic_parts(A::AbstractArray{<:Real}) = A +sparse_atomic_parts(A::AbstractArray{<:Complex{T}}) where {T} = reinterpret(T, A) + +@kernel function sparse_scale_kernel!(Y, beta) + i = @index(Global, Linear) + if i <= length(Y) + T = eltype(Y) + @inbounds Y[i] = iszero(beta) ? zero(T) : T(beta) * Y[i] + end +end + +@kernel function sparse_combine_kernel!(Y, acc, alpha, beta, ::Val{Tacc}) where {Tacc} + i = @index(Global, Linear) + if i <= length(Y) + Ty = eltype(Y) + @inbounds Y[i] = iszero(beta) ? + Ty(Tacc(alpha) * Tacc(acc[i])) : + Ty(Tacc(alpha) * Tacc(acc[i]) + Tacc(beta) * Tacc(Y[i])) + end +end + +function sparse_scale!(Y, beta) + isempty(Y) && return Y + sparse_scale_kernel!(get_backend(Y))(Y, beta; ndrange=length(Y)) + return Y +end + +function sparse_combine!(Y::AnyGPUArray{Ty}, acc, alpha, beta) where {Ty} + isempty(Y) && return Y + Tacc = sparse_matmul_accumulator(Ty) + sparse_combine_kernel!(get_backend(Y))(Y, acc, Tacc(alpha), Tacc(beta), Val(Tacc); + ndrange=length(Y)) + return Y +end + +@kernel function spmv_coo_kernel!(y, yparts, alpha, rowInd, colInd, nzVal, x, + trans::Bool, conjval::Bool, ::Val{Tacc}) where {Tacc} + k = @index(Global, Linear) + if k <= length(nzVal) + @inbounds begin + out = trans ? colInd[k] : rowInd[k] + src = trans ? rowInd[k] : colInd[k] + v = Tacc(nzVal[k]) + v = conjval ? conj(v) : v + sparse_atomic_add!(y, yparts, out, Tacc(alpha) * v * Tacc(x[src])) + end + end +end + +@kernel function spmm_coo_kernel!(C, Cparts, alpha, rowInd, colInd, nzVal, B, + trans::Bool, conjval::Bool, ::Val{Tacc}) where {Tacc} + k, j = @index(Global, NTuple) + if k <= length(nzVal) && j <= size(B, 2) + @inbounds begin + outrow = trans ? colInd[k] : rowInd[k] + srcrow = trans ? rowInd[k] : colInd[k] + v = Tacc(nzVal[k]) + v = conjval ? conj(v) : v + lin = Int(outrow) + (Int(j) - 1) * size(C, 1) + sparse_atomic_add!(C, Cparts, lin, Tacc(alpha) * v * Tacc(B[srcrow, j])) + end + end +end + +@kernel function dense_spmm_coo_kernel!(C, Cparts, alpha, A, rowInd, colInd, nzVal, + trans::Bool, conjval::Bool, ::Val{Tacc}) where {Tacc} + i, k = @index(Global, NTuple) + if i <= size(C, 1) && k <= length(nzVal) + @inbounds begin + outcol = trans ? rowInd[k] : colInd[k] + srccol = trans ? colInd[k] : rowInd[k] + v = Tacc(nzVal[k]) + v = conjval ? conj(v) : v + lin = Int(i) + (Int(outcol) - 1) * size(C, 1) + sparse_atomic_add!(C, Cparts, lin, Tacc(alpha) * Tacc(A[i, srccol]) * v) + end + end +end + +function spmv_coo!(y::AnyGPUVector{Ty}, A::AbstractGPUSparseMatrixCOO, x, + alpha::Number, beta::Number, trans::Bool, conjval::Bool) where {Ty} + Tacc = sparse_matmul_accumulator(Ty) + if Tacc === Ty + sparse_scale!(y, beta) + target = y + scatter_alpha = Tacc(alpha) + else + target = similar(y, Tacc, size(y)) + fill!(target, zero(Tacc)) + scatter_alpha = one(Tacc) + end + + if nnz(A) > 0 + parts = sparse_atomic_parts(target) + spmv_coo_kernel!(get_backend(y))(target, parts, scatter_alpha, A.rowInd, A.colInd, + nonzeros(A), x, trans, conjval, Val(Tacc); + ndrange=nnz(A)) + end + + if !(Tacc === Ty) + sparse_combine!(y, target, alpha, beta) + unsafe_free!(target) + end + return y +end + +function spmm_coo!(C::AnyGPUMatrix{Tc}, A::AbstractGPUSparseMatrixCOO, B, + alpha::Number, beta::Number, trans::Bool, conjval::Bool) where {Tc} + Tacc = sparse_matmul_accumulator(Tc) + if Tacc === Tc + sparse_scale!(C, beta) + target = C + scatter_alpha = Tacc(alpha) + else + target = similar(C, Tacc, size(C)) + fill!(target, zero(Tacc)) + scatter_alpha = one(Tacc) + end + + if nnz(A) > 0 && size(C, 2) > 0 + parts = sparse_atomic_parts(target) + spmm_coo_kernel!(get_backend(C))(target, parts, scatter_alpha, A.rowInd, A.colInd, + nonzeros(A), B, trans, conjval, Val(Tacc); + ndrange=(nnz(A), size(C, 2))) + end + + if !(Tacc === Tc) + sparse_combine!(C, target, alpha, beta) + unsafe_free!(target) + end + return C +end + +function dense_spmm_coo!(C::AnyGPUMatrix{Tc}, A, B::AbstractGPUSparseMatrixCOO, + alpha::Number, beta::Number, trans::Bool, conjval::Bool) where {Tc} + Tacc = sparse_matmul_accumulator(Tc) + if Tacc === Tc + sparse_scale!(C, beta) + target = C + scatter_alpha = Tacc(alpha) + else + target = similar(C, Tacc, size(C)) + fill!(target, zero(Tacc)) + scatter_alpha = one(Tacc) + end + + if nnz(B) > 0 && size(C, 1) > 0 + parts = sparse_atomic_parts(target) + dense_spmm_coo_kernel!(get_backend(C))(target, parts, scatter_alpha, A, B.rowInd, + B.colInd, nonzeros(B), trans, conjval, + Val(Tacc); + ndrange=(size(C, 1), nnz(B))) + end + + if !(Tacc === Tc) + sparse_combine!(C, target, alpha, beta) + unsafe_free!(target) + end + return C +end + +function Base.:(*)(A::GPUSparseMatrixOrWrapper, x::AnyGPUVector) + size(A, 2) == length(x) || throw(DimensionMismatch()) + y = similar(x, promote_type(eltype(A), eltype(x)), size(A, 1)) + return LinearAlgebra.mul!(y, A, x) +end + +function Base.:(*)(A::GPUSparseMatrixOrWrapper, B::AnyGPUMatrix) + size(A, 2) == size(B, 1) || throw(DimensionMismatch()) + C = similar(B, promote_type(eltype(A), eltype(B)), (size(A, 1), size(B, 2))) + return LinearAlgebra.mul!(C, A, B) +end + +function Base.:(*)(A::AnyGPUMatrix, B::GPUSparseMatrixOrWrapper) + size(A, 2) == size(B, 1) || throw(DimensionMismatch()) + C = similar(A, promote_type(eltype(A), eltype(B)), (size(A, 1), size(B, 2))) + return LinearAlgebra.mul!(C, A, B) +end + +LinearAlgebra.generic_matvecmul!(y::AnyGPUVector{Ty}, tA::AbstractChar, + A::AbstractGPUSparseMatrix{Ta}, x::AnyGPUVector{Tx}, + muladd::MulAddMul) where { + Ty<:SparseMatmulEltypes, + Ta<:SparseMatmulEltypes, + Tx<:SparseMatmulEltypes, + } = + LinearAlgebra.generic_matvecmul!(y, tA, A, x, muladd.alpha, muladd.beta) + +function LinearAlgebra.generic_matvecmul!(y::AnyGPUVector{Ty}, tA::AbstractChar, + A::AbstractGPUSparseMatrix{Ta}, + x::AnyGPUVector{Tx}, + alpha::Number, beta::Number) where { + Ty<:SparseMatmulEltypes, + Ta<:SparseMatmulEltypes, + Tx<:SparseMatmulEltypes, + } + trans, conjval = sparse_op(tA) + Asize = sparse_op_size(A, trans) + Asize[2] == length(x) || + throw(DimensionMismatch("A has dimensions $Asize but x has length $(length(x))")) + Asize[1] == length(y) || + throw(DimensionMismatch("A has dimensions $Asize but y has length $(length(y))")) + y === x && throw(ArgumentError("output vector must not be aliased with input vector")) + isempty(y) && return y + + return spmv_coo!(y, sparse_coo(A), x, alpha, beta, trans, conjval) +end + +LinearAlgebra.generic_matmatmul!(C::AnyGPUMatrix{Tc}, tA, tB, + A::AbstractGPUSparseMatrix{Ta}, B::AnyGPUMatrix{Tb}, + muladd::MulAddMul) where { + Tc<:SparseMatmulEltypes, + Ta<:SparseMatmulEltypes, + Tb<:SparseMatmulEltypes, + } = + LinearAlgebra.generic_matmatmul!(C, tA, tB, A, B, muladd.alpha, muladd.beta) + +function LinearAlgebra.generic_matmatmul!(C::AnyGPUMatrix{Tc}, tA, tB, + A::AbstractGPUSparseMatrix{Ta}, + B::AnyGPUMatrix{Tb}, + alpha::Number, beta::Number) where { + Tc<:SparseMatmulEltypes, + Ta<:SparseMatmulEltypes, + Tb<:SparseMatmulEltypes, + } + trans, conjval = sparse_op(tA) + Bop = LinearAlgebra.wrap(B, tB) + Asize = sparse_op_size(A, trans) + Asize[2] == size(Bop, 1) || + throw(DimensionMismatch("A has dimensions $Asize but B has dimensions $(size(Bop))")) + size(C) == (Asize[1], size(Bop, 2)) || + throw(DimensionMismatch("C has dimensions $(size(C)), should have $((Asize[1], size(Bop, 2)))")) + C === B && throw(ArgumentError("output matrix must not be aliased with input matrix")) + isempty(C) && return C + + return spmm_coo!(C, sparse_coo(A), Bop, alpha, beta, trans, conjval) +end + +LinearAlgebra.generic_matmatmul!(C::AnyGPUMatrix{Tc}, tA, tB, + A::AnyGPUMatrix{Ta}, B::AbstractGPUSparseMatrix{Tb}, + muladd::MulAddMul) where { + Tc<:SparseMatmulEltypes, + Ta<:SparseMatmulEltypes, + Tb<:SparseMatmulEltypes, + } = + LinearAlgebra.generic_matmatmul!(C, tA, tB, A, B, muladd.alpha, muladd.beta) + +function LinearAlgebra.generic_matmatmul!(C::AnyGPUMatrix{Tc}, tA, tB, + A::AnyGPUMatrix{Ta}, + B::AbstractGPUSparseMatrix{Tb}, + alpha::Number, beta::Number) where { + Tc<:SparseMatmulEltypes, + Ta<:SparseMatmulEltypes, + Tb<:SparseMatmulEltypes, + } + Aop = LinearAlgebra.wrap(A, tA) + trans, conjval = sparse_op(tB) + Bsize = sparse_op_size(B, trans) + size(Aop, 2) == Bsize[1] || + throw(DimensionMismatch("A has dimensions $(size(Aop)) but B has dimensions $Bsize")) + size(C) == (size(Aop, 1), Bsize[2]) || + throw(DimensionMismatch("C has dimensions $(size(C)), should have $((size(Aop, 1), Bsize[2]))")) + C === A && throw(ArgumentError("output matrix must not be aliased with input matrix")) + isempty(C) && return C + + return dense_spmm_coo!(C, Aop, sparse_coo(B), alpha, beta, trans, conjval) +end + function LinearAlgebra.opnorm(A::AbstractGPUSparseMatrixCSR, p::Real=2) if p == Inf return maximum(sum(abs, A; dims=2)) diff --git a/test/runtests.jl b/test/runtests.jl index a91715b9a..5d13993ad 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -9,6 +9,7 @@ const init_worker_code = quote include("testsuite.jl") TestSuite.sparse_types(::Type{<:JLArray}) = (JLSparseVector, JLSparseMatrixCSC, JLSparseMatrixCSR) + TestSuite.sparse_coo_type(::Type{<:JLArray}) = JLSparseMatrixCOO TestSuite.sparse_types(::Type{<:Array}) = (SparseVector, SparseMatrixCSC) # Disable Float16-related tests until JuliaGPU/KernelAbstractions#600 is resolved diff --git a/test/testsuite.jl b/test/testsuite.jl index e4aea4f72..f95c7366e 100644 --- a/test/testsuite.jl +++ b/test/testsuite.jl @@ -76,6 +76,7 @@ supported_eltypes() = (Int16, Int32, Int64, # derived sparse types that are supported by the array type sparse_types(::Type{AT}) where {AT} = () +sparse_coo_type(::Type{AT}) where {AT} = nothing # some convenience predicates for filtering test eltypes isrealtype(T) = T <: Real diff --git a/test/testsuite/sparse.jl b/test/testsuite/sparse.jl index 763e244ca..e120e450c 100644 --- a/test/testsuite/sparse.jl +++ b/test/testsuite/sparse.jl @@ -15,11 +15,94 @@ iszero_matrix(sparse_AT, eltypes) end end + coo_AT = sparse_coo_type(AT) + coo_AT === nothing || coo_matmul(coo_AT, AT, eltypes) end using SparseArrays using SparseArrays: nonzeroinds, nonzeros, rowvals +function coo_values(::Type{T}) where {T<:Real} + return T[1, 2, 3, 4] +end + +function coo_values(::Type{T}) where {T<:Complex} + return T[1 + im, 2 - 3im, 3 + 2im, 4 - im] +end + +function coo_vector(::Type{T}) where {T<:Real} + return T[2, 3, 5] +end + +function coo_vector(::Type{T}) where {T<:Complex} + return T[2 - im, 3 + 4im, 5 - 2im] +end + +function coo_rhs(::Type{T}) where {T<:Real} + return T[ + 1 2; + 3 4; + 5 6 + ] +end + +function coo_rhs(::Type{T}) where {T<:Complex} + return T[ + 1 + im 2 - im; + 3 + 2im 4 - 3im; + 5 - im 6 + im + ] +end + +function coo_lhs(::Type{T}) where {T<:Real} + return T[ + 1 2 3; + 4 5 6 + ] +end + +function coo_lhs(::Type{T}) where {T<:Complex} + return T[ + 1 - im 2 + im 3 - 2im; + 4 + 3im 5 - im 6 + im + ] +end + +function coo_matmul(AT, dense_AT, eltypes) + for ET in (Float16, Float32, ComplexF16, ComplexF32) + ET in eltypes || continue + @testset "COO repeated-index matmul($ET)" begin + I = Int32[1, 1, 2, 3] + J = Int32[1, 1, 2, 1] + V = coo_values(ET) + A = sparse(Int.(I), Int.(J), V, 3, 3) + dA = AT(dense_AT(I), dense_AT(J), dense_AT(V), (3, 3), length(V)) + + x = coo_vector(ET) + dx = dense_AT(x) + @test Array(dA * dx) ≈ Array(A * x) + @test Array(transpose(dA) * dx) ≈ Array(transpose(A) * x) + @test Array(adjoint(dA) * dx) ≈ Array(adjoint(A) * x) + + old_y = dense_AT(fill(ET(1), 3)) + mul!(old_y, dA, dx, ET(2), ET(3)) + @test Array(old_y) ≈ Array(ET(2) .* (A * x) .+ ET(3)) + + B = coo_rhs(ET) + dB = dense_AT(B) + @test Array(dA * dB) ≈ Array(A * B) + @test Array(transpose(dA) * dB) ≈ Array(transpose(A) * B) + @test Array(adjoint(dA) * dB) ≈ Array(adjoint(A) * B) + + L = coo_lhs(ET) + dL = dense_AT(L) + @test Array(dL * dA) ≈ Array(L * A) + @test Array(dL * transpose(dA)) ≈ Array(L * transpose(A)) + @test Array(dL * adjoint(dA)) ≈ Array(L * adjoint(A)) + end + end +end + function vector(AT, eltypes) dense_AT = GPUArrays.dense_array_type(AT) for ET in eltypes From 13ae5b99de9f9748a046af30acf9ac14568e6c7c Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Mon, 22 Jun 2026 19:34:14 +0200 Subject: [PATCH 03/15] Add typed dense sparse conversion --- lib/JLArrays/src/JLArrays.jl | 38 ++++++++++++++-- src/host/sparse.jl | 86 ++++++++++++++++++++---------------- test/testsuite/sparse.jl | 10 +++++ 3 files changed, 94 insertions(+), 40 deletions(-) diff --git a/lib/JLArrays/src/JLArrays.jl b/lib/JLArrays/src/JLArrays.jl index 1c9123e9d..9f33a29bc 100644 --- a/lib/JLArrays/src/JLArrays.jl +++ b/lib/JLArrays/src/JLArrays.jl @@ -435,6 +435,9 @@ function JLSparseVector(xs::SparseVector{Tv, Ti}) where {Ti, Tv} copyto!(nzVal, convert(Vector{Tv}, xs.nzval)) return JLSparseVector{Tv, Ti}(iPtr, nzVal, length(xs),) end +SparseArrays.SparseVector(xs::JLArray{<:Any,1}) = SparseVector(JLSparseVector(xs)) +JLSparseVector(xs::JLArray{T,1}) where {T} = + GPUArrays.sparse_from_dense(JLSparseVector{T,Int}, xs) Base.length(x::JLSparseVector) = x.len Base.size(x::JLSparseVector) = (x.len,) @@ -447,8 +450,10 @@ function JLSparseMatrixCSC(xs::SparseMatrixCSC{Tv, Ti}) where {Ti, Tv} copyto!(nzVal, convert(Vector{Tv}, xs.nzval)) return JLSparseMatrixCSC{Tv, Ti}(colPtr, rowVal, nzVal, (xs.m, xs.n)) end +SparseArrays.SparseMatrixCSC(xs::JLArray{<:Any,2}) = SparseMatrixCSC(JLSparseMatrixCSC(xs)) JLSparseMatrixCSC(xs::SparseVector) = JLSparseMatrixCSC(SparseMatrixCSC(xs)) -JLSparseMatrixCSC(xs::JLArray{<:Any,2}) = JLSparseMatrixCSC(SparseMatrixCSC(xs)) +JLSparseMatrixCSC(xs::JLArray{T,2}) where {T} = + GPUArrays.sparse_from_dense(JLSparseMatrixCSC{T,Int}, xs) Base.length(x::JLSparseMatrixCSC) = prod(x.dims) Base.size(x::JLSparseMatrixCSC) = x.dims @@ -463,29 +468,56 @@ function JLSparseMatrixCSR(xs::SparseMatrixCSC{Tv, Ti}) where {Ti, Tv} return JLSparseMatrixCSR{Tv, Ti}(rowPtr, colVal, nzVal, (xs.m, xs.n)) end JLSparseMatrixCSR(xs::SparseVector{Tv, Ti}) where {Ti, Tv} = JLSparseMatrixCSR(SparseMatrixCSC(xs)) -JLSparseMatrixCSR(xs::JLArray{<:Any,2}) = JLSparseMatrixCSR(SparseMatrixCSC(xs)) +JLSparseMatrixCSR(xs::JLArray{T,2}) where {T} = + GPUArrays.sparse_from_dense(JLSparseMatrixCSR{T,Int}, xs) function JLSparseMatrixCOO(xs::SparseMatrixCSC{Tv, Ti}) where {Ti, Tv} rowInd, colInd, nzVal = findnz(xs) return JLSparseMatrixCOO{Tv, Ti}(JLVector{Ti}(rowInd), JLVector{Ti}(colInd), JLVector{Tv}(nzVal), (xs.m, xs.n)) end JLSparseMatrixCOO(xs::SparseVector) = JLSparseMatrixCOO(SparseMatrixCSC(xs)) -JLSparseMatrixCOO(xs::JLArray{<:Any,2}) = JLSparseMatrixCOO(SparseMatrixCSC(xs)) +JLSparseMatrixCOO(xs::JLArray{T,2}) where {T} = + GPUArrays.sparse_from_dense(JLSparseMatrixCOO{T,Int}, xs) function JLSparseMatrixCSR(xs::JLSparseMatrixCSC{Tv, Ti}) where {Ti, Tv} return JLSparseMatrixCSR(SparseMatrixCSC(xs)) end function JLSparseMatrixCOO(xs::Union{JLSparseMatrixCSC{Tv, Ti},JLSparseMatrixCSR{Tv, Ti}}) where {Tv, Ti} return JLSparseMatrixCOO(SparseMatrixCSC(xs)) end +function JLSparseMatrixCOO{Tv,Ti}(xs::Union{JLSparseMatrixCSC{Tv, Ti},JLSparseMatrixCSR{Tv, Ti}}) where {Tv, Ti} + return JLSparseMatrixCOO(SparseMatrixCSC(xs)) +end function JLSparseMatrixCSC(xs::JLSparseMatrixCSR{Tv, Ti}) where {Ti, Tv} return JLSparseMatrixCSC(SparseMatrixCSC(xs)) end function JLSparseMatrixCSC(xs::JLSparseMatrixCOO{Tv, Ti}) where {Ti, Tv} return JLSparseMatrixCSC(SparseMatrixCSC(xs)) end +function JLSparseMatrixCSC{Tv,Ti}(xs::JLSparseMatrixCOO{Tv, Ti}) where {Ti, Tv} + return JLSparseMatrixCSC(SparseMatrixCSC(xs)) +end function JLSparseMatrixCSR(xs::JLSparseMatrixCOO{Tv, Ti}) where {Ti, Tv} return JLSparseMatrixCSR(SparseMatrixCSC(xs)) end +function JLSparseMatrixCSR{Tv,Ti}(xs::JLSparseMatrixCOO{Tv, Ti}) where {Ti, Tv} + return JLSparseMatrixCSR(SparseMatrixCSC(xs)) +end + +SparseArrays.sparse(xs::JLArray{T,1}) where {T} = + GPUArrays.sparse_from_dense(JLSparseVector{T,Int}, xs) + +function SparseArrays.sparse(xs::JLArray{T,2}; fmt=:csc) where {T} + if fmt == :csc + return GPUArrays.sparse_from_dense(JLSparseMatrixCSC{T,Int}, xs) + elseif fmt == :csr + return GPUArrays.sparse_from_dense(JLSparseMatrixCSR{T,Int}, xs) + elseif fmt == :coo + return GPUArrays.sparse_from_dense(JLSparseMatrixCOO{T,Int}, xs) + else + throw(ArgumentError("format :$fmt not available, use :csc, :csr, or :coo")) + end +end + function Base.copyto!(dst::JLSparseMatrixCSR, src::JLSparseMatrixCSR) if size(dst) != size(src) throw(ArgumentError("Inconsistent Sparse Matrix size")) diff --git a/src/host/sparse.jl b/src/host/sparse.jl index 266e659ce..6d9c7b0a3 100644 --- a/src/host/sparse.jl +++ b/src/host/sparse.jl @@ -44,55 +44,67 @@ Base.iszero(A::AbstractGPUSparseArray) = SparseArrays.nnz(A) == 0 || all(iszero, SparseArrays.SparseVector(x::AbstractGPUSparseVector) = SparseVector(length(x), Array(SparseArrays.nonzeroinds(x)), Array(SparseArrays.nonzeros(x))) SparseArrays.SparseMatrixCSC(x::AbstractGPUSparseMatrixCSC) = SparseMatrixCSC(size(x)..., Array(SparseArrays.getcolptr(x)), Array(SparseArrays.rowvals(x)), Array(SparseArrays.nonzeros(x))) -@kernel function dense_sparse_values_kernel!(values, A, indices) +function check_sparse_target(::Type{ST}, ::Type{Tv}, ::Type{Ti}) where {ST,Tv,Ti} + Tv isa Type && Ti isa Type || + throw(ArgumentError("sparse target type $ST must specify value and index eltypes")) + return +end + +@kernel function dense_sparse_vector_kernel!(iPtr, nzVal, A, indices) i = @index(Global, Linear) if i <= length(indices) - @inbounds values[i] = A[indices[i]] + @inbounds begin + idx = indices[i] + iPtr[i] = idx + nzVal[i] = A[idx] + end end end -function dense_sparse_values(A::AnyGPUArray{Tv}, indices) where {Tv} - values = similar(A, Tv, length(indices)) - if !isempty(values) - dense_sparse_values_kernel!(get_backend(A))(values, A, indices; ndrange=length(indices)) +@kernel function dense_sparse_matrix_kernel!(rowInd, colInd, nzVal, A, indices) + i = @index(Global, Linear) + if i <= length(indices) + @inbounds begin + idx = indices[i] + rowInd[i] = idx[1] + colInd[i] = idx[2] + nzVal[i] = A[idx] + end end - return values -end - -function SparseArrays.SparseVector(x::AnyGPUVector) - inds = findall(!iszero, x) - vals = dense_sparse_values(x, inds) - host_inds = Array(inds) - host_vals = Array(vals) - unsafe_free!(inds) - unsafe_free!(vals) - return SparseVector(length(x), host_inds, host_vals) end -function SparseArrays.SparseMatrixCSC(x::AnyGPUMatrix) - inds = findall(!iszero, x) - vals = dense_sparse_values(x, inds) - host_inds = Array(inds) - host_vals = Array(vals) - unsafe_free!(inds) - unsafe_free!(vals) - - rows = getindex.(host_inds, 1) - cols = getindex.(host_inds, 2) - return sparse(rows, cols, host_vals, size(x)...) +function sparse_from_dense(::Type{ST}, A::AnyGPUVector) where {Tv,Ti,ST<:AbstractGPUSparseVector{Tv,Ti}} + check_sparse_target(ST, Tv, Ti) + indices = findall(!iszero, A) + nstored = length(indices) + iPtr = similar(A, Ti, nstored) + nzVal = similar(A, Tv, nstored) + if nstored > 0 + dense_sparse_vector_kernel!(get_backend(A))(iPtr, nzVal, A, indices; + ndrange=nstored) + end + unsafe_free!(indices) + return ST(iPtr, nzVal, length(A)) end -function SparseArrays.sparse(A::AnyGPUVector) - sparse_array_type(A)(SparseVector(A)) -end +function sparse_from_dense(::Type{ST}, A::AnyGPUMatrix) where {Tv,Ti,ST<:AbstractGPUSparseMatrix{Tv,Ti}} + check_sparse_target(ST, Tv, Ti) + indices = findall(!iszero, A) + nstored = length(indices) + rowInd = similar(A, Ti, nstored) + colInd = similar(A, Ti, nstored) + nzVal = similar(A, Tv, nstored) + if nstored > 0 + dense_sparse_matrix_kernel!(get_backend(A))(rowInd, colInd, nzVal, A, indices; + ndrange=nstored) + end + unsafe_free!(indices) -function SparseArrays.sparse(A::AnyGPUMatrix; fmt=:csc) - if fmt == :csc - return sparse_array_type(A)(SparseMatrixCSC(A)) - elseif fmt == :csr - return csr_type(sparse_array_type(A))(SparseMatrixCSC(A)) + if ST <: AbstractGPUSparseMatrixCOO + return ST(rowInd, colInd, nzVal, size(A), nstored) else - throw(ArgumentError("format :$fmt not available, use :csc or :csr")) + COO = coo_type(ST) + return ST(COO{Tv,Ti}(rowInd, colInd, nzVal, size(A), nstored)) end end diff --git a/test/testsuite/sparse.jl b/test/testsuite/sparse.jl index e120e450c..431a5cd3c 100644 --- a/test/testsuite/sparse.jl +++ b/test/testsuite/sparse.jl @@ -157,6 +157,11 @@ function vector_construction(AT, eltypes) dense_x = dense_AT(collect(x)) @test SparseVector(dense_x) == x + if dense_x isa GPUArrays.AnyGPUArray + typed_d_x = GPUArrays.sparse_from_dense(typeof(d_x), dense_x) + @test typed_d_x isa typeof(d_x) + @test SparseVector(typed_d_x) == x + end @test sparse(dense_x) isa AT{ET} @test SparseVector(sparse(dense_x)) == x end @@ -239,6 +244,11 @@ function matrix_construction(AT, eltypes) dense_x = dense_AT(collect(x)) @test SparseMatrixCSC(dense_x) == x + if dense_x isa GPUArrays.AnyGPUArray + typed_d_x = GPUArrays.sparse_from_dense(typeof(d_x), dense_x) + @test typed_d_x isa typeof(d_x) + @test SparseMatrixCSC(typed_d_x) == x + end @test sparse(dense_x) isa GPUArrays.sparse_array_type(dense_x){ET} @test SparseMatrixCSC(sparse(dense_x)) == x if dense_x isa GPUArrays.AnyGPUArray From 5224d4286082d42c5f2edc939f63732c968edd77 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Mon, 22 Jun 2026 23:12:20 +0200 Subject: [PATCH 04/15] Rework sparse back-end interface around value verbs and `similar` The sparse support required back-ends to implement a cluster of type-returning functions -- `dense_array_type`, `dense_vector_type`, `sparse_array_type`, `csc_type`, `csr_type`, `coo_type` -- that generic code called to name a related type (the dense counterpart, the unparameterized sparse wrapper, a sibling format). That is unidiomatic: Julia expresses these with value-level verbs (`float`, `complex`, `sparse`) and `similar`, not type-level mapping functions. Replace that machinery: * Format conversion is now the value verbs `sparse_csc`/`sparse_csr`/ `sparse_coo`. Converting to the format already held is the generic identity; back-ends implement only the cross-format cases (which they already had as constructors). `csc_type`/`csr_type`/`coo_type` are gone. * Broadcast and `similar` outputs are built with `similar` dispatched on a representative sparse value (plus an internal `_stored_inds` field accessor for filling), mirroring `SparseArrays`' `_allocres`. No more reconstructing a sparse type from a type, so `sparse_array_type` is gone. * `dense_array_type`/`dense_vector_type` had no consumer in the generic algorithms (a dense result comes from `similar(nonzeros(A), ...)`); they are removed, and the test suite threads the dense array type explicitly. * `sparse_from_dense` now yields COO directly (the natural format when scanning a dense array); CSR/CSC are derived with the conversion verbs, removing the last type-level `coo_type(ST)` use. A back-end's obligation shrinks to: the storage structs, their constructors, `similar`, and the three conversion verbs. JLArrays is updated as the reference; the GPUArrays test suite passes for both JLArray and Array. Co-Authored-By: Claude Opus 4.8 (1M context) --- lib/JLArrays/src/JLArrays.jl | 73 +++++++------------ src/host/sparse.jl | 137 ++++++++++++++++------------------- test/testsuite/sparse.jl | 52 +++++++------ 3 files changed, 114 insertions(+), 148 deletions(-) diff --git a/lib/JLArrays/src/JLArrays.jl b/lib/JLArrays/src/JLArrays.jl index 9f33a29bc..bb1011a0d 100644 --- a/lib/JLArrays/src/JLArrays.jl +++ b/lib/JLArrays/src/JLArrays.jl @@ -14,8 +14,6 @@ using GPUArrays using Adapt using SparseArrays, LinearAlgebra -import GPUArrays: dense_array_type - import KernelAbstractions import KernelAbstractions: Adapt, StaticArrays, Backend, Kernel, StaticSize, DynamicSize, partition, blocks, workitems, launch_config @@ -264,51 +262,32 @@ function Base.getindex(A::JLSparseMatrixCSR{Tv, Ti}, i0::Integer, i1::Integer) w end GPUArrays.storage(a::JLArray) = a.data -GPUArrays.dense_array_type(a::JLArray{T, N}) where {T, N} = JLArray{T, N} -GPUArrays.dense_array_type(::Type{JLArray{T, N}}) where {T, N} = JLArray{T, N} -GPUArrays.dense_vector_type(a::JLArray{T, N}) where {T, N} = JLArray{T, 1} -GPUArrays.dense_vector_type(::Type{JLArray{T, N}}) where {T, N} = JLArray{T, 1} - -GPUArrays.sparse_array_type(sa::JLSparseMatrixCSC) = JLSparseMatrixCSC -GPUArrays.sparse_array_type(sa::JLArray{<:Any,2}) = JLSparseMatrixCSC -GPUArrays.sparse_array_type(::Type{<:JLArray{<:Any,2}}) = JLSparseMatrixCSC -GPUArrays.sparse_array_type(::Type{<:JLSparseMatrixCSC}) = JLSparseMatrixCSC -GPUArrays.sparse_array_type(sa::JLSparseMatrixCSR) = JLSparseMatrixCSR -GPUArrays.sparse_array_type(::Type{<:JLSparseMatrixCSR}) = JLSparseMatrixCSR -GPUArrays.sparse_array_type(sa::JLSparseMatrixCOO) = JLSparseMatrixCOO -GPUArrays.sparse_array_type(::Type{<:JLSparseMatrixCOO}) = JLSparseMatrixCOO -GPUArrays.sparse_array_type(sa::JLSparseVector) = JLSparseVector -GPUArrays.sparse_array_type(sa::JLArray{<:Any,1}) = JLSparseVector -GPUArrays.sparse_array_type(::Type{<:JLArray{<:Any,1}}) = JLSparseVector -GPUArrays.sparse_array_type(::Type{<:JLSparseVector}) = JLSparseVector - -GPUArrays.dense_array_type(sa::JLSparseVector) = JLArray -GPUArrays.dense_array_type(::Type{<:JLSparseVector}) = JLArray -GPUArrays.dense_array_type(sa::JLSparseMatrixCSC) = JLArray -GPUArrays.dense_array_type(::Type{<:JLSparseMatrixCSC}) = JLArray -GPUArrays.dense_array_type(sa::JLSparseMatrixCSR) = JLArray -GPUArrays.dense_array_type(::Type{<:JLSparseMatrixCSR}) = JLArray -GPUArrays.dense_array_type(sa::JLSparseMatrixCOO) = JLArray -GPUArrays.dense_array_type(::Type{<:JLSparseMatrixCOO}) = JLArray - -GPUArrays.csc_type(sa::JLSparseMatrixCSR) = JLSparseMatrixCSC -GPUArrays.csc_type(sa::JLSparseMatrixCOO) = JLSparseMatrixCSC -GPUArrays.csc_type(::Type{<:JLSparseMatrixCSR}) = JLSparseMatrixCSC -GPUArrays.csc_type(::Type{<:JLSparseMatrixCOO}) = JLSparseMatrixCSC -GPUArrays.csr_type(sa::JLSparseMatrixCSC) = JLSparseMatrixCSR -GPUArrays.csr_type(sa::JLSparseMatrixCOO) = JLSparseMatrixCSR -GPUArrays.csr_type(::Type{<:JLSparseMatrixCSC}) = JLSparseMatrixCSR -GPUArrays.csr_type(::Type{<:JLSparseMatrixCOO}) = JLSparseMatrixCSR -GPUArrays.coo_type(sa::Union{JLSparseMatrixCSC,JLSparseMatrixCSR,JLSparseMatrixCOO}) = JLSparseMatrixCOO -GPUArrays.coo_type(::Type{<:Union{JLSparseMatrixCSC,JLSparseMatrixCSR,JLSparseMatrixCOO}}) = JLSparseMatrixCOO +# format-conversion verbs (cross-format only; the identity cases are handled generically +# by GPUArrays). these reuse the format constructors defined below. +GPUArrays.sparse_csc(A::Union{JLSparseMatrixCSR,JLSparseMatrixCOO}) = JLSparseMatrixCSC(A) +GPUArrays.sparse_csr(A::Union{JLSparseMatrixCSC,JLSparseMatrixCOO}) = JLSparseMatrixCSR(A) +GPUArrays.sparse_coo(A::Union{JLSparseMatrixCSC,JLSparseMatrixCSR}) = JLSparseMatrixCOO(A) Base.similar(Mat::JLSparseMatrixCSR) = JLSparseMatrixCSR(copy(Mat.rowPtr), copy(Mat.colVal), similar(nonzeros(Mat)), size(Mat)) Base.similar(Mat::JLSparseMatrixCSR, T::Type) = JLSparseMatrixCSR(copy(Mat.rowPtr), copy(Mat.colVal), similar(nonzeros(Mat), T), size(Mat)) Base.similar(Mat::JLSparseMatrixCOO) = JLSparseMatrixCOO(copy(Mat.rowInd), copy(Mat.colInd), similar(nonzeros(Mat)), size(Mat), nnz(Mat)) Base.similar(Mat::JLSparseMatrixCOO, T::Type) = JLSparseMatrixCOO(copy(Mat.rowInd), copy(Mat.colInd), similar(nonzeros(Mat), T), size(Mat), nnz(Mat)) -Base.similar(Mat::JLSparseMatrixCSC, T::Type, N::Int, M::Int) = JLSparseMatrixCSC(JLVector([zero(Int32)]), JLVector{Int32}(undef, 0), JLVector{T}(undef, 0), (N, M)) -Base.similar(Mat::JLSparseMatrixCSR, T::Type, N::Int, M::Int) = JLSparseMatrixCSR(JLVector([zero(Int32)]), JLVector{Int32}(undef, 0), JLVector{T}(undef, 0), (N, M)) +# CSC and vector structure-preserving `similar` (mirror the CSR/COO methods above; these +# used to come for free from GPUArrays via a type-reconstruction helper). +Base.similar(Mat::JLSparseMatrixCSC{<:Any, Ti}, ::Type{T}) where {T, Ti} = + JLSparseMatrixCSC{T, Ti}(copy(Mat.colPtr), copy(Mat.rowVal), similar(nonzeros(Mat), T), size(Mat)) +Base.similar(Vec::JLSparseVector{<:Any, Ti}, ::Type{T}) where {T, Ti} = + JLSparseVector{T, Ti}(copy(Vec.iPtr), similar(nonzeros(Vec), T), length(Vec)) + +# empty sparse arrays of a given shape, used by the generic broadcast machinery as the +# output container before its storage is sized and filled. the index type is preserved. +Base.similar(Mat::JLSparseMatrixCSC{<:Any, Ti}, ::Type{T}, N::Int, M::Int) where {T, Ti} = + JLSparseMatrixCSC{T, Ti}(JLVector(ones(Ti, M + 1)), JLVector{Ti}(undef, 0), JLVector{T}(undef, 0), (N, M)) +Base.similar(Mat::JLSparseMatrixCSR{<:Any, Ti}, ::Type{T}, N::Int, M::Int) where {T, Ti} = + JLSparseMatrixCSR{T, Ti}(JLVector(ones(Ti, N + 1)), JLVector{Ti}(undef, 0), JLVector{T}(undef, 0), (N, M)) +Base.similar(Vec::JLSparseVector{<:Any, Ti}, ::Type{T}, dims::Dims{1}) where {T, Ti} = + JLSparseVector{T, Ti}(JLVector{Ti}(undef, 0), JLVector{T}(undef, 0), dims[1]) Base.similar(Mat::JLSparseMatrixCSC{Tv, Ti}, N::Int, M::Int) where {Tv, Ti} = similar(Mat, Tv, N, M) Base.similar(Mat::JLSparseMatrixCSR{Tv, Ti}, N::Int, M::Int) where {Tv, Ti} = similar(Mat, Tv, N, M) @@ -452,8 +431,7 @@ function JLSparseMatrixCSC(xs::SparseMatrixCSC{Tv, Ti}) where {Ti, Tv} end SparseArrays.SparseMatrixCSC(xs::JLArray{<:Any,2}) = SparseMatrixCSC(JLSparseMatrixCSC(xs)) JLSparseMatrixCSC(xs::SparseVector) = JLSparseMatrixCSC(SparseMatrixCSC(xs)) -JLSparseMatrixCSC(xs::JLArray{T,2}) where {T} = - GPUArrays.sparse_from_dense(JLSparseMatrixCSC{T,Int}, xs) +JLSparseMatrixCSC(xs::JLArray{T,2}) where {T} = JLSparseMatrixCSC(JLSparseMatrixCOO(xs)) Base.length(x::JLSparseMatrixCSC) = prod(x.dims) Base.size(x::JLSparseMatrixCSC) = x.dims @@ -468,8 +446,7 @@ function JLSparseMatrixCSR(xs::SparseMatrixCSC{Tv, Ti}) where {Ti, Tv} return JLSparseMatrixCSR{Tv, Ti}(rowPtr, colVal, nzVal, (xs.m, xs.n)) end JLSparseMatrixCSR(xs::SparseVector{Tv, Ti}) where {Ti, Tv} = JLSparseMatrixCSR(SparseMatrixCSC(xs)) -JLSparseMatrixCSR(xs::JLArray{T,2}) where {T} = - GPUArrays.sparse_from_dense(JLSparseMatrixCSR{T,Int}, xs) +JLSparseMatrixCSR(xs::JLArray{T,2}) where {T} = JLSparseMatrixCSR(JLSparseMatrixCOO(xs)) function JLSparseMatrixCOO(xs::SparseMatrixCSC{Tv, Ti}) where {Ti, Tv} rowInd, colInd, nzVal = findnz(xs) return JLSparseMatrixCOO{Tv, Ti}(JLVector{Ti}(rowInd), JLVector{Ti}(colInd), @@ -508,11 +485,11 @@ SparseArrays.sparse(xs::JLArray{T,1}) where {T} = function SparseArrays.sparse(xs::JLArray{T,2}; fmt=:csc) where {T} if fmt == :csc - return GPUArrays.sparse_from_dense(JLSparseMatrixCSC{T,Int}, xs) + return JLSparseMatrixCSC(xs) elseif fmt == :csr - return GPUArrays.sparse_from_dense(JLSparseMatrixCSR{T,Int}, xs) + return JLSparseMatrixCSR(xs) elseif fmt == :coo - return GPUArrays.sparse_from_dense(JLSparseMatrixCOO{T,Int}, xs) + return JLSparseMatrixCOO(xs) else throw(ArgumentError("format :$fmt not available, use :csc, :csr, or :coo")) end diff --git a/src/host/sparse.jl b/src/host/sparse.jl index 6d9c7b0a3..77c1544bb 100644 --- a/src/host/sparse.jl +++ b/src/host/sparse.jl @@ -87,7 +87,11 @@ function sparse_from_dense(::Type{ST}, A::AnyGPUVector) where {Tv,Ti,ST<:Abstrac return ST(iPtr, nzVal, length(A)) end -function sparse_from_dense(::Type{ST}, A::AnyGPUMatrix) where {Tv,Ti,ST<:AbstractGPUSparseMatrix{Tv,Ti}} +# Densifying a dense array naturally produces coordinate (COO) format, so this builds a +# COO directly. Other formats are obtained by converting the COO with `sparse_csr`/ +# `sparse_csc` (which back-ends already provide as constructors), avoiding any need to map +# a target type to its COO sibling. +function sparse_from_dense(::Type{ST}, A::AnyGPUMatrix) where {Tv,Ti,ST<:AbstractGPUSparseMatrixCOO{Tv,Ti}} check_sparse_target(ST, Tv, Ti) indices = findall(!iszero, A) nstored = length(indices) @@ -99,34 +103,23 @@ function sparse_from_dense(::Type{ST}, A::AnyGPUMatrix) where {Tv,Ti,ST<:Abstrac ndrange=nstored) end unsafe_free!(indices) - - if ST <: AbstractGPUSparseMatrixCOO - return ST(rowInd, colInd, nzVal, size(A), nstored) - else - COO = coo_type(ST) - return ST(COO{Tv,Ti}(rowInd, colInd, nzVal, size(A), nstored)) - end + return ST(rowInd, colInd, nzVal, size(A), nstored) end # similar Base.similar(Vec::V) where {V<:AbstractGPUSparseVector} = V(copy(SparseArrays.nonzeroinds(Vec)), similar(SparseArrays.nonzeros(Vec)), length(Vec)) Base.similar(Mat::M) where {M<:AbstractGPUSparseMatrixCSC} = M(copy(SparseArrays.getcolptr(Mat)), copy(SparseArrays.rowvals(Mat)), similar(SparseArrays.nonzeros(Mat)), size(Mat)) -Base.similar(Vec::V, T::Type) where {Tv, Ti, V<:AbstractGPUSparseVector{Tv, Ti}} = sparse_array_type(V){T, Ti}(copy(SparseArrays.nonzeroinds(Vec)), similar(SparseArrays.nonzeros(Vec), T), length(Vec)) -Base.similar(Mat::M, T::Type) where {M<:AbstractGPUSparseMatrixCSC} = sparse_array_type(M)(copy(SparseArrays.getcolptr(Mat)), copy(SparseArrays.rowvals(Mat)), similar(SparseArrays.nonzeros(Mat), T), size(Mat)) - -dense_array_type(a::AbstractArray) = dense_array_type(typeof(a)) -sparse_array_type(a::AbstractArray) = sparse_array_type(typeof(a)) -dense_vector_type(a::AbstractArray) = dense_vector_type(typeof(a)) +# `similar(A, ::Type)` (structure-preserving, new element type) is a back-end method, just +# like the dense `similar`. The generic algorithms below always build their outputs with +# `similar` (dispatched on a representative value), so there is no need for backends to +# provide `*_array_type` mapping functions to reconstruct a sparse type from a type. -dense_array_type(::Type{<:SparseVector}) = SparseVector -sparse_array_type(::Type{<:SparseVector}) = SparseVector -dense_vector_type(::Type{<:AbstractSparseArray}) = Vector -dense_vector_type(::Type{<:AbstractArray}) = Vector -dense_array_type(::Type{<:SparseMatrixCSC}) = SparseMatrixCSC -sparse_array_type(::Type{<:SparseMatrixCSC}) = SparseMatrixCSC - -coo_type(sa) = coo_type(typeof(sa)) +# Stored (minor-axis) index buffer, accessed generically through the conventional field; +# the analogue of `SparseArrays.storedinds`. Used by the broadcast/construction code. +_stored_inds(A::AbstractGPUSparseMatrixCSR) = A.colVal +_stored_inds(A::AbstractGPUSparseMatrixCSC) = A.rowVal +_stored_inds(A::AbstractGPUSparseVector) = A.iPtr function _spadjoint end function _sptranspose end @@ -152,8 +145,11 @@ end sparse_op_size(A, trans::Bool) = trans ? reverse(size(A)) : size(A) +# Format-conversion verbs: converting to the format already held is the identity; +# back-ends implement the cross-format conversions (as constructors). +sparse_csc(A::AbstractGPUSparseMatrixCSC) = A +sparse_csr(A::AbstractGPUSparseMatrixCSR) = A sparse_coo(A::AbstractGPUSparseMatrixCOO) = A -sparse_coo(A::AbstractGPUSparseMatrix) = coo_type(A)(A) @inline function sparse_atomic_add!(A::AbstractArray{<:Real}, parts, I::Integer, x) @inbounds Atomix.@atomic A[Int(I)] += x @@ -447,7 +443,7 @@ function LinearAlgebra.opnorm(A::AbstractGPUSparseMatrixCSR, p::Real=2) end end -LinearAlgebra.opnorm(A::AbstractGPUSparseMatrixCSC, p::Real=2) = opnorm(csr_type(A)(A), p) +LinearAlgebra.opnorm(A::AbstractGPUSparseMatrixCSC, p::Real=2) = opnorm(sparse_csr(A), p) function LinearAlgebra.norm(A::AbstractGPUSparseMatrix{T}, p::Real=2) where T if p == Inf @@ -462,7 +458,7 @@ function LinearAlgebra.norm(A::AbstractGPUSparseMatrix{T}, p::Real=2) where T end function SparseArrays.findnz(S::MT) where {MT <: AbstractGPUSparseMatrix} - S2 = coo_type(MT)(S) + S2 = sparse_coo(S) I = S2.rowInd J = S2.colInd V = S2.nzVal @@ -502,61 +498,61 @@ end for SparseMatrixType in [:AbstractGPUSparseMatrixCSC, :AbstractGPUSparseMatrixCSR] @eval begin LinearAlgebra.triu(A::ST, k::Integer) where {T, ST<:$SparseMatrixType{T}} = - ST( triu(coo_type(A)(A), k) ) + ST( triu(sparse_coo(A), k) ) LinearAlgebra.triu(A::Transpose{T,<:ST}, k::Integer) where {T, ST<:$SparseMatrixType{T}} = - ST( triu(coo_type(A)(_sptranspose(parent(A))), k) ) + ST( triu(sparse_coo(_sptranspose(parent(A))), k) ) LinearAlgebra.triu(A::Adjoint{T,<:ST}, k::Integer) where {T, ST<:$SparseMatrixType{T}} = - ST( triu(coo_type(A)(_spadjoint(parent(A))), k) ) + ST( triu(sparse_coo(_spadjoint(parent(A))), k) ) LinearAlgebra.tril(A::ST, k::Integer) where {T, ST<:$SparseMatrixType{T}} = - ST( tril(coo_type(A)(A), k) ) + ST( tril(sparse_coo(A), k) ) LinearAlgebra.tril(A::Transpose{T,<:ST}, k::Integer) where {T, ST<:$SparseMatrixType{T}} = - ST( tril(coo_type(A)(_sptranspose(parent(A))), k) ) + ST( tril(sparse_coo(_sptranspose(parent(A))), k) ) LinearAlgebra.tril(A::Adjoint{T,<:ST}, k::Integer) where {T, ST<:$SparseMatrixType{T}} = - ST( tril(coo_type(A)(_spadjoint(parent(A))), k) ) + ST( tril(sparse_coo(_spadjoint(parent(A))), k) ) LinearAlgebra.triu(A::Union{ST, Transpose{T,<:ST}, Adjoint{T,<:ST}}) where {T, ST<:$SparseMatrixType{T}} = - ST( triu(coo_type(A)(A), 0) ) + ST( triu(sparse_coo(A), 0) ) LinearAlgebra.tril(A::Union{ST,Transpose{T,<:ST}, Adjoint{T,<:ST}}) where {T, ST<:$SparseMatrixType{T}} = - ST( tril(coo_type(A)(A), 0) ) + ST( tril(sparse_coo(A), 0) ) LinearAlgebra.kron(A::ST, B::ST) where {T, ST<:$SparseMatrixType{T}} = - ST( kron(coo_type(A)(A), coo_type(B)(B)) ) + ST( kron(sparse_coo(A), sparse_coo(B)) ) LinearAlgebra.kron(A::ST, B::Diagonal) where {T, ST<:$SparseMatrixType{T}} = - ST( kron(coo_type(A)(A), B) ) + ST( kron(sparse_coo(A), B) ) LinearAlgebra.kron(A::Diagonal, B::ST) where {T, ST<:$SparseMatrixType{T}} = - ST( kron(A, coo_type(B)(B)) ) + ST( kron(A, sparse_coo(B)) ) LinearAlgebra.kron(A::Transpose{T,<:ST}, B::ST) where {T, ST<:$SparseMatrixType{T}} = - ST( kron(coo_type(A)(_sptranspose(parent(A))), coo_type(B)(B)) ) + ST( kron(sparse_coo(_sptranspose(parent(A))), sparse_coo(B)) ) LinearAlgebra.kron(A::ST, B::Transpose{T,<:ST}) where {T, ST<:$SparseMatrixType{T}} = - ST( kron(coo_type(A)(A), coo_type(parent(B))(_sptranspose(parent(B)))) ) + ST( kron(sparse_coo(A), sparse_coo(_sptranspose(parent(B)))) ) LinearAlgebra.kron(A::Transpose{T,<:ST}, B::Transpose{T,<:ST}) where {T, ST<:$SparseMatrixType{T}} = - ST( kron(coo_type(A)(_sptranspose(parent(A))), coo_type(parent(B))(_sptranspose(parent(B)))) ) + ST( kron(sparse_coo(_sptranspose(parent(A))), sparse_coo(_sptranspose(parent(B)))) ) LinearAlgebra.kron(A::Transpose{T,<:ST}, B::Diagonal) where {T, ST<:$SparseMatrixType{T}} = - ST( kron(coo_type(A)(_sptranspose(parent(A))), B) ) + ST( kron(sparse_coo(_sptranspose(parent(A))), B) ) LinearAlgebra.kron(A::Diagonal, B::Transpose{T,<:ST}) where {T, ST<:$SparseMatrixType{T}} = - ST( kron(A, coo_type(B)(_sptranspose(parent(B)))) ) + ST( kron(A, sparse_coo(_sptranspose(parent(B)))) ) LinearAlgebra.kron(A::Adjoint{T,<:ST}, B::ST) where {T, ST<:$SparseMatrixType{T}} = - ST( kron(coo_type(A)(_spadjoint(parent(A))), coo_type(B)(B)) ) + ST( kron(sparse_coo(_spadjoint(parent(A))), sparse_coo(B)) ) LinearAlgebra.kron(A::ST, B::Adjoint{T,<:ST}) where {T, ST<:$SparseMatrixType{T}} = - ST( kron(coo_type(A)(A), coo_type(parent(B))(_spadjoint(parent(B)))) ) + ST( kron(sparse_coo(A), sparse_coo(_spadjoint(parent(B)))) ) LinearAlgebra.kron(A::Adjoint{T,<:ST}, B::Adjoint{T,<:ST}) where {T, ST<:$SparseMatrixType{T}} = - ST( kron(coo_type(parent(A))(_spadjoint(parent(A))), coo_type(parent(B))(_spadjoint(parent(B)))) ) + ST( kron(sparse_coo(_spadjoint(parent(A))), sparse_coo(_spadjoint(parent(B)))) ) LinearAlgebra.kron(A::Adjoint{T,<:ST}, B::Diagonal) where {T, ST<:$SparseMatrixType{T}} = - ST( kron(coo_type(parent(A))(_spadjoint(parent(A))), B) ) + ST( kron(sparse_coo(_spadjoint(parent(A))), B) ) LinearAlgebra.kron(A::Diagonal, B::Adjoint{T,<:ST}) where {T, ST<:$SparseMatrixType{T}} = - ST( kron(A, coo_type(parent(B))(_spadjoint(parent(B)))) ) + ST( kron(A, sparse_coo(_spadjoint(parent(B)))) ) function Base.reshape(A::ST, dims::Dims) where {ST<:$SparseMatrixType} - B = coo_type(A)(A) + B = sparse_coo(A) ST(reshape(B, dims)) end function SparseArrays.droptol!(A::ST, tol::Real) where {ST<:$SparseMatrixType} - B = coo_type(A)(A) + B = sparse_coo(A) droptol!(B, tol) copyto!(A, ST(B)) end @@ -1184,17 +1180,10 @@ function Broadcast.copy(bc::Broadcasted{<:Union{GPUSparseVecStyle,GPUSparseMatSt elseif length(sparse_args) == 1 && sparse_typ <: Union{AbstractGPUSparseMatrixCSR, AbstractGPUSparseMatrixCSC} # we only have a single sparse input, so we can reuse its structure for the output. # this avoids a kernel launch and costly synchronization. - if sparse_typ <: AbstractGPUSparseMatrixCSR - offsets = rowPtr = sparse_arg.rowPtr - colVal = similar(sparse_arg.colVal) - nzVal = similar(sparse_arg.nzVal, Tv) - output = sparse_array_type(sparse_typ)(rowPtr, colVal, nzVal, size(bc)) - elseif sparse_typ <: AbstractGPUSparseMatrixCSC - offsets = colPtr = sparse_arg.colPtr - rowVal = similar(sparse_arg.rowVal) - nzVal = similar(sparse_arg.nzVal, Tv) - output = sparse_array_type(sparse_typ)(colPtr, rowVal, nzVal, size(bc)) - end + # structure is preserved, so a structure-preserving `similar` gives the output + # container directly; the kernel rewrites the (identical) pointer array from `offsets`. + output = similar(sparse_arg, Tv) + offsets = sparse_typ <: AbstractGPUSparseMatrixCSR ? sparse_arg.rowPtr : sparse_arg.colPtr else # determine the number of non-zero elements per row so that we can create an # appropriately-structured output container @@ -1240,17 +1229,22 @@ function Broadcast.copy(bc::Broadcasted{<:Union{GPUSparseVecStyle,GPUSparseMatSt total_nnz = mapreduce(x->first(x) != typemax(first(x)), +, offsets) end output = if sparse_typ <: Union{AbstractGPUSparseMatrixCSR,AbstractGPUSparseMatrixCSC} - ixVal = similar(offsets, Ti, total_nnz) - nzVal = similar(offsets, Tv, total_nnz) - output_sparse_typ = sparse_array_type(sparse_typ) - output_sparse_typ(offsets, ixVal, nzVal, size(bc)) + # build an empty sparse output of the same kind, then size its index/value + # storage to the computed nnz. The kernel fills the pointer array (from + # `offsets`), the stored indices, and the values. + out = similar(sparse_arg, Tv, size(bc)) + resize!(_stored_inds(out), total_nnz) + resize!(nonzeros(out), total_nnz) + out.nnz = total_nnz + out elseif sparse_typ <: AbstractGPUSparseVector && !fpreszeros - val_array = bc.args[first(sparse_args)].nzVal - similar(val_array, Tv, size(bc)) + similar(nonzeros(sparse_arg), Tv, size(bc)) elseif sparse_typ <: AbstractGPUSparseVector && fpreszeros - iPtr = similar(offsets, Ti, total_nnz) - nzVal = similar(offsets, Tv, total_nnz) - sparse_array_type(sparse_arg){Tv, Ti}(iPtr, nzVal, rows) + out = similar(sparse_arg, Tv, (rows,)) + resize!(_stored_inds(out), total_nnz) + resize!(nonzeros(out), total_nnz) + out.nnz = total_nnz + out end if sparse_typ <: AbstractGPUSparseVector && !fpreszeros nonsparse_args = map(bc.args) do arg @@ -1353,9 +1347,6 @@ end end ## COV_EXCL_STOP -function csc_type end -function csr_type end - # TODO: implement mapreducedim! function Base.mapreduce(f, op, A::AbstractGPUSparseMatrix; dims=:, init=nothing) # figure out the destination container type by looking at the initializer element, @@ -1377,9 +1368,9 @@ function Base.mapreduce(f, op, A::AbstractGPUSparseMatrix; dims=:, init=nothing) in(dims, [Colon(), 1, 2]) || error("only dims=:, dims=1 or dims=2 is supported") if A isa AbstractGPUSparseMatrixCSR && dims == 1 - A = csc_type(A)(A) + A = sparse_csc(A) elseif A isa AbstractGPUSparseMatrixCSC && dims == 2 - A = csr_type(A)(A) + A = sparse_csr(A) end m, n = size(A) val_array = nonzeros(A) diff --git a/test/testsuite/sparse.jl b/test/testsuite/sparse.jl index 431a5cd3c..f9c91d0f1 100644 --- a/test/testsuite/sparse.jl +++ b/test/testsuite/sparse.jl @@ -2,14 +2,14 @@ sparse_ATs = sparse_types(AT) for sparse_AT in sparse_ATs if sparse_AT <: AbstractSparseVector - vector(sparse_AT, eltypes) - vector_construction(sparse_AT, eltypes) - broadcasting_vector(sparse_AT, eltypes) + vector(sparse_AT, AT, eltypes) + vector_construction(sparse_AT, AT, eltypes) + broadcasting_vector(sparse_AT, AT, eltypes) iszero_vector(sparse_AT, eltypes) elseif sparse_AT <: AbstractSparseMatrix matrix(sparse_AT, eltypes) - matrix_construction(sparse_AT, eltypes) - broadcasting_matrix(sparse_AT, eltypes) + matrix_construction(sparse_AT, AT, eltypes) + broadcasting_matrix(sparse_AT, AT, eltypes) mapreduce_matrix(sparse_AT, eltypes) linalg(sparse_AT, eltypes) iszero_matrix(sparse_AT, eltypes) @@ -68,6 +68,12 @@ function coo_lhs(::Type{T}) where {T<:Complex} ] end +# A non-zero-preserving broadcast densifies its result: GPU back-ends return the dense +# array type `dense_AT`, whereas CPU `SparseArrays` returns its sparse type with every +# entry explicitly stored. Accept either so the suite is meaningful for both. +is_densified(x, dense_AT, ::Type{ET}) where {ET} = + dense_AT <: GPUArrays.AnyGPUArray ? x isa dense_AT{ET} : (issparse(x) && eltype(x) === ET) + function coo_matmul(AT, dense_AT, eltypes) for ET in (Float16, Float32, ComplexF16, ComplexF32) ET in eltypes || continue @@ -103,8 +109,7 @@ function coo_matmul(AT, dense_AT, eltypes) end end -function vector(AT, eltypes) - dense_AT = GPUArrays.dense_array_type(AT) +function vector(AT, dense_AT, eltypes) for ET in eltypes @testset "Sparse vector properties($ET)" begin m = 25 @@ -141,8 +146,7 @@ function vector(AT, eltypes) end end -function vector_construction(AT, eltypes) - dense_AT = GPUArrays.dense_array_type(AT) +function vector_construction(AT, dense_AT, eltypes) for ET in eltypes m = 25 n = 35 @@ -158,7 +162,7 @@ function vector_construction(AT, eltypes) dense_x = dense_AT(collect(x)) @test SparseVector(dense_x) == x if dense_x isa GPUArrays.AnyGPUArray - typed_d_x = GPUArrays.sparse_from_dense(typeof(d_x), dense_x) + typed_d_x = AT(dense_x) @test typed_d_x isa typeof(d_x) @test SparseVector(typed_d_x) == x end @@ -168,7 +172,6 @@ function vector_construction(AT, eltypes) end function matrix(AT, eltypes) - dense_AT = GPUArrays.dense_array_type(AT) for ET in eltypes @testset "Sparse matrix properties($ET)" begin m = 25 @@ -224,8 +227,7 @@ function matrix(AT, eltypes) end end -function matrix_construction(AT, eltypes) - dense_AT = GPUArrays.dense_array_type(AT) +function matrix_construction(AT, dense_AT, eltypes) for ET in eltypes m = 25 n = 35 @@ -245,11 +247,11 @@ function matrix_construction(AT, eltypes) dense_x = dense_AT(collect(x)) @test SparseMatrixCSC(dense_x) == x if dense_x isa GPUArrays.AnyGPUArray - typed_d_x = GPUArrays.sparse_from_dense(typeof(d_x), dense_x) + typed_d_x = AT(dense_x) @test typed_d_x isa typeof(d_x) @test SparseMatrixCSC(typed_d_x) == x end - @test sparse(dense_x) isa GPUArrays.sparse_array_type(dense_x){ET} + @test issparse(sparse(dense_x)) @test SparseMatrixCSC(sparse(dense_x)) == x if dense_x isa GPUArrays.AnyGPUArray @test SparseMatrixCSC(sparse(dense_x; fmt=:csr)) == x @@ -257,8 +259,7 @@ function matrix_construction(AT, eltypes) end end -function broadcasting_vector(AT, eltypes) - dense_AT = GPUArrays.dense_array_type(AT) +function broadcasting_vector(AT, dense_AT, eltypes) for ET in eltypes @testset "SparseVector($ET)" begin m = 64 @@ -278,14 +279,14 @@ function broadcasting_vector(AT, eltypes) # not zero-preserving y = x .+ ET(1) dy = dx .+ ET(1) - @test dy isa dense_AT{ET} + @test is_densified(dy, dense_AT, ET) hy = Array(dy) @test Array(y) == hy # involving something dense y = x .+ ones(ET, m) dy = dx .+ dense_AT(ones(ET, m)) - @test dy isa dense_AT{ET} + @test is_densified(dy, dense_AT, ET) @test Array(y) == Array(dy) # sparse to sparse @@ -316,7 +317,7 @@ function broadcasting_vector(AT, eltypes) dw = AT(w) z = @. x * y * w * dense_arr dz = @. dx * dy * dw * d_dense_arr - @test dz isa dense_AT{ET} + @test is_densified(dz, dense_AT, ET) @test Array(z) == Array(dz) y = sprand(ET, m, p) @@ -332,7 +333,7 @@ function broadcasting_vector(AT, eltypes) dx = AT(x) dy = dx .+ 1 y = x .+ 1 - @test dy isa dense_AT{promote_type(ET, Int)} + @test is_densified(dy, dense_AT, promote_type(ET, Int)) @test Array(y) == Array(dy) ## zero-preserving dy = dx .* 1 @@ -346,8 +347,7 @@ function broadcasting_vector(AT, eltypes) end end -function broadcasting_matrix(AT, eltypes) - dense_AT = GPUArrays.dense_array_type(AT) +function broadcasting_matrix(AT, dense_AT, eltypes) for ET in eltypes @testset "SparseMatrix($ET)" begin m, n = 5, 6 @@ -363,7 +363,7 @@ function broadcasting_matrix(AT, eltypes) # not zero-preserving y = x .+ ET(1) dy = dx .+ ET(1) - @test dy isa dense_AT{ET} + @test is_densified(dy, dense_AT, ET) hy = Array(dy) dense_y = Array(y) @test Array(y) == Array(dy) @@ -371,7 +371,7 @@ function broadcasting_matrix(AT, eltypes) # involving something dense y = x .* ones(ET, m, n) dy = dx .* dense_AT(ones(ET, m, n)) - @test dy isa dense_AT{ET} + @test is_densified(dy, dense_AT, ET) @test Array(y) == Array(dy) # multiple inputs @@ -407,7 +407,6 @@ function broadcasting_matrix(AT, eltypes) end function mapreduce_matrix(AT, eltypes) - dense_AT = GPUArrays.dense_array_type(AT) for ET in eltypes @testset "SparseMatrix($ET)" begin m,n = 5,6 @@ -460,7 +459,6 @@ function mapreduce_matrix(AT, eltypes) end function linalg(AT, eltypes) - dense_AT = GPUArrays.dense_array_type(AT) for ET in eltypes # sprandn doesn't work nicely with these... if !(ET <: Union{Int16, Int32, Int64, Complex{Int16}, Complex{Int32}, Complex{Int64}}) From 7e149d08534fdafee7feada6e24b3e491bfcdfd9 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Mon, 22 Jun 2026 23:13:23 +0200 Subject: [PATCH 05/15] testsuite: only test sparse `abs`-reductions when the result type is supported MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `mapreduce(abs, …)` over a `Complex{<:Integer}` sparse matrix has result element type `Float64` (since `abs(::Complex{<:Integer})::Float64`). Back-ends that can't allocate that type (e.g. Metal, which has no `Float64`) errored on these cases. Gate the `abs`-reductions on the result type being among the back-end's supported eltypes; `sum`, which preserves the input eltype, stays unconditional. Co-Authored-By: Claude Opus 4.8 (1M context) --- test/testsuite/sparse.jl | 47 ++++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/test/testsuite/sparse.jl b/test/testsuite/sparse.jl index f9c91d0f1..7a15dd09e 100644 --- a/test/testsuite/sparse.jl +++ b/test/testsuite/sparse.jl @@ -414,32 +414,45 @@ function mapreduce_matrix(AT, eltypes) x = sprand(ET, m, n, p) dx = AT(x) + # The `abs`-reductions below produce a result of element type `RT`; for some + # input eltypes that is a type the back-end can't allocate (e.g. `abs` of a + # `Complex{<:Integer}` is `Float64`, unsupported on Metal). Only exercise them + # when the back-end supports the result type; `sum` keeps the input eltype and + # is always tested. + RT = typeof(abs(zero(ET))) + # dim=: y = sum(x) dy = sum(dx) @test y ≈ dy - y = mapreduce(abs, +, x) - dy = mapreduce(abs, +, dx) - @test y ≈ dy + if RT in eltypes + y = mapreduce(abs, +, x) + dy = mapreduce(abs, +, dx) + @test y ≈ dy + end # dim=1 y = sum(x, dims=1) dy = sum(dx, dims=1) @test y ≈ Array(dy) - y = mapreduce(abs, +, x, dims=1) - dy = mapreduce(abs, +, dx, dims=1) - @test y ≈ Array(dy) + if RT in eltypes + y = mapreduce(abs, +, x, dims=1) + dy = mapreduce(abs, +, dx, dims=1) + @test y ≈ Array(dy) + end # dim=2 y = sum(x, dims=2) dy = sum(dx, dims=2) @test y ≈ Array(dy) - y = mapreduce(abs, +, x, dims=2) - dy = mapreduce(abs, +, dx, dims=2) - @test y ≈ Array(dy) + if RT in eltypes + y = mapreduce(abs, +, x, dims=2) + dy = mapreduce(abs, +, dx, dims=2) + @test y ≈ Array(dy) + end if ET in (Float32, Float64) dy = mapreduce(abs, +, dx; init=zero(ET)) y = mapreduce(abs, +, x; init=zero(ET)) @@ -447,13 +460,15 @@ function mapreduce_matrix(AT, eltypes) end # test with a matrix with fully empty rows - x = zeros(ET, m, n) - x[2, :] .= -one(ET) - x[2, end] = -ET(16) - dx = AT(sparse(x)) - y = mapreduce(abs, max, x) - dy = mapreduce(abs, max, dx) - @test y ≈ dy + if RT in eltypes + x = zeros(ET, m, n) + x[2, :] .= -one(ET) + x[2, end] = -ET(16) + dx = AT(sparse(x)) + y = mapreduce(abs, max, x) + dy = mapreduce(abs, max, dx) + @test y ≈ dy + end end end end From 92bde191ad055ed95ec8bbe1e4f811512af15e7b Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Mon, 22 Jun 2026 23:14:11 +0200 Subject: [PATCH 06/15] docs: document the sparse array back-end interface Spell out the parallel sparse hierarchy, the storage structs and their conventional field names, and the methods a back-end implements (constructors, `similar`, and the `sparse_csc`/`sparse_csr`/`sparse_coo` conversion verbs), plus the generic functionality that comes for free. Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/src/interface.md | 46 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/docs/src/interface.md b/docs/src/interface.md index a9c57c539..1e86b8245 100644 --- a/docs/src/interface.md +++ b/docs/src/interface.md @@ -31,6 +31,52 @@ KernelAbstractions.get_backend(a::CA) where CA <: CustomArray = CustomBackend() There are numerous examples of potential interfaces for GPUArrays, such as with [JLArrays](https://github.com/JuliaGPU/GPUArrays.jl/blob/main/lib/JLArrays/src/JLArrays.jl), [CuArrays](https://github.com/JuliaGPU/CUDA.jl/blob/main/src/gpuarrays.jl), and [ROCArrays](https://github.com/JuliaGPU/AMDGPU.jl/blob/main/src/gpuarrays.jl). +## Sparse arrays + +Sparse arrays cannot share the `AbstractGPUArray` supertype: that is a +`DenseArray`, whereas a sparse array must be an `AbstractSparseArray`. GPUArrays +therefore provides a parallel hierarchy, and generic sparse functionality +(broadcast, `mapreduce`, sparse/dense matrix multiplication, `norm`/`opnorm`, +`findnz`, `triu`/`tril`/`kron`, format conversion, indexing) is defined on it. + +A back-end provides one mutable struct per supported format, subtyping the +matching abstract type and using the conventional field names (the generic code +reads them directly): + +| supertype | fields | +|:--|:--| +| `AbstractGPUSparseVector{Tv,Ti}` | `iPtr`, `nzVal`, `len`, `nnz` | +| `AbstractGPUSparseMatrixCSC{Tv,Ti}` | `colPtr`, `rowVal`, `nzVal`, `dims`, `nnz` | +| `AbstractGPUSparseMatrixCSR{Tv,Ti}` | `rowPtr`, `colVal`, `nzVal`, `dims`, `nnz` | +| `AbstractGPUSparseMatrixCOO{Tv,Ti}` | `rowInd`, `colInd`, `nzVal`, `dims`, `nnz` | + +where the index/pointer/value arrays are the back-end's own dense vector type +(so `nonzeros(A)`, `SparseArrays.nonzeroinds`, `rowvals`, `getcolptr` work, and +`get_backend(nonzeros(A))` identifies the compute backend). + +On top of the structs, a back-end implements: + + * constructors from the component arrays (e.g. + `MyCSR(rowPtr, colVal, nzVal, dims)`) and the inter-format and + dense↔sparse conversions (`MyCSR(::MyCOO)`, `MyCSC(::SparseMatrixCSC)`, + `SparseMatrixCSC(::MyCSC)`, …); + + * `Base.similar` — structure-preserving (`similar(A)`, `similar(A, ::Type)`) + and empty-of-a-shape (`similar(A, ::Type, dims)`), exactly as for dense + arrays. The generic algorithms allocate their outputs through `similar` + rather than by naming a type; + + * the format-conversion verbs `GPUArrays.sparse_csc`, `GPUArrays.sparse_csr` + and `GPUArrays.sparse_coo`, for the formats other than the one each + produces (converting to the format already held is the generic identity). + These are the value-level equivalent of `SparseArrays.sparse` and back the + generic code that needs a particular layout (e.g. matrix multiplication + funnels through COO). + +`sparse_from_dense(::Type{<:AbstractGPUSparseMatrixCOO}, A)` is provided +generically to densify-scan into COO; a back-end's CSR/CSC-from-dense +constructors can be defined as `sparse_csr`/`sparse_csc` of that COO. + ## Caching Allocator ```@docs From 38b5dc52ef1f01b14e08ecb98528fd0dec363281 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Tue, 23 Jun 2026 06:58:07 +0200 Subject: [PATCH 07/15] =?UTF-8?q?Add=20on-device=20`to=5Fdense`;=20rename?= =?UTF-8?q?=20`sparse=5Ffrom=5Fdense`=20=E2=86=92=20`to=5Fsparse`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Densifying a sparse array had no generic on-device path: back-ends did it with a host round-trip (`Array(collect(SparseMatrixCSC(x)))`). Add `to_dense(A)`, which scatters the stored entries into a dense array of the same back-end with a kernel (allocated via `similar(nonzeros(A), …)`), no host transfer. Coordinates are taken to be unique -- true for CSC/CSR and for COO produced by conversion. Rename the existing `sparse_from_dense` to `to_sparse` so the two on-device conversions form a symmetric, discoverable pair (`to_sparse` needs a target format; `to_dense` does not). JLArrays uses `to_dense` for `JLArray(::JLSparse…)`, and the interface docs are updated. Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/src/interface.md | 9 ++++--- lib/JLArrays/src/JLArrays.jl | 14 +++++------ src/host/sparse.jl | 49 ++++++++++++++++++++++++++++++++++-- 3 files changed, 60 insertions(+), 12 deletions(-) diff --git a/docs/src/interface.md b/docs/src/interface.md index 1e86b8245..4bd21efa0 100644 --- a/docs/src/interface.md +++ b/docs/src/interface.md @@ -73,9 +73,12 @@ On top of the structs, a back-end implements: generic code that needs a particular layout (e.g. matrix multiplication funnels through COO). -`sparse_from_dense(::Type{<:AbstractGPUSparseMatrixCOO}, A)` is provided -generically to densify-scan into COO; a back-end's CSR/CSC-from-dense -constructors can be defined as `sparse_csr`/`sparse_csc` of that COO. +Dense↔sparse conversion is provided generically and on-device: `to_sparse(::Type{ST}, +A)` scans a dense array into a sparse one (`ST` is an `AbstractGPUSparseVector` or +`AbstractGPUSparseMatrixCOO` -- CSR/CSC follow via the conversion verbs), and `to_dense(A)` +is the inverse, scattering a sparse array into a dense one of the same back-end. A +back-end's dense↔sparse constructors (`MyArray(::MySparse…)`, `MyCSR(::MyDense)`) can be +defined in terms of these. ## Caching Allocator diff --git a/lib/JLArrays/src/JLArrays.jl b/lib/JLArrays/src/JLArrays.jl index bb1011a0d..aa935950c 100644 --- a/lib/JLArrays/src/JLArrays.jl +++ b/lib/JLArrays/src/JLArrays.jl @@ -298,10 +298,10 @@ Base.similar(Mat::JLSparseMatrixCSR, T::Type, dims::Tuple{Int, Int}) = similar(M Base.similar(Mat::JLSparseMatrixCSC, dims::Tuple{Int, Int}) = similar(Mat, dims...) Base.similar(Mat::JLSparseMatrixCSR, dims::Tuple{Int, Int}) = similar(Mat, dims...) -JLArray(x::JLSparseVector) = JLArray(collect(SparseVector(x))) -JLArray(x::JLSparseMatrixCSC) = JLArray(collect(SparseMatrixCSC(x))) -JLArray(x::JLSparseMatrixCSR) = JLArray(collect(SparseMatrixCSC(x))) -JLArray(x::JLSparseMatrixCOO) = JLArray(collect(SparseMatrixCSC(x))) +JLArray(x::JLSparseVector) = GPUArrays.to_dense(x) +JLArray(x::JLSparseMatrixCSC) = GPUArrays.to_dense(x) +JLArray(x::JLSparseMatrixCSR) = GPUArrays.to_dense(x) +JLArray(x::JLSparseMatrixCOO) = GPUArrays.to_dense(x) # conversion of untyped data to a typed Array function typed_data(x::JLArray{T}) where {T} @@ -416,7 +416,7 @@ function JLSparseVector(xs::SparseVector{Tv, Ti}) where {Ti, Tv} end SparseArrays.SparseVector(xs::JLArray{<:Any,1}) = SparseVector(JLSparseVector(xs)) JLSparseVector(xs::JLArray{T,1}) where {T} = - GPUArrays.sparse_from_dense(JLSparseVector{T,Int}, xs) + GPUArrays.to_sparse(JLSparseVector{T,Int}, xs) Base.length(x::JLSparseVector) = x.len Base.size(x::JLSparseVector) = (x.len,) @@ -454,7 +454,7 @@ function JLSparseMatrixCOO(xs::SparseMatrixCSC{Tv, Ti}) where {Ti, Tv} end JLSparseMatrixCOO(xs::SparseVector) = JLSparseMatrixCOO(SparseMatrixCSC(xs)) JLSparseMatrixCOO(xs::JLArray{T,2}) where {T} = - GPUArrays.sparse_from_dense(JLSparseMatrixCOO{T,Int}, xs) + GPUArrays.to_sparse(JLSparseMatrixCOO{T,Int}, xs) function JLSparseMatrixCSR(xs::JLSparseMatrixCSC{Tv, Ti}) where {Ti, Tv} return JLSparseMatrixCSR(SparseMatrixCSC(xs)) end @@ -481,7 +481,7 @@ function JLSparseMatrixCSR{Tv,Ti}(xs::JLSparseMatrixCOO{Tv, Ti}) where {Ti, Tv} end SparseArrays.sparse(xs::JLArray{T,1}) where {T} = - GPUArrays.sparse_from_dense(JLSparseVector{T,Int}, xs) + GPUArrays.to_sparse(JLSparseVector{T,Int}, xs) function SparseArrays.sparse(xs::JLArray{T,2}; fmt=:csc) where {T} if fmt == :csc diff --git a/src/host/sparse.jl b/src/host/sparse.jl index 77c1544bb..406e1862a 100644 --- a/src/host/sparse.jl +++ b/src/host/sparse.jl @@ -73,7 +73,11 @@ end end end -function sparse_from_dense(::Type{ST}, A::AnyGPUVector) where {Tv,Ti,ST<:AbstractGPUSparseVector{Tv,Ti}} +# `to_sparse`/`to_dense` are the on-device conversions between a dense GPU array and a +# sparse one. `to_sparse` needs a target format (the result layout is ambiguous); +# `to_dense` does not. + +function to_sparse(::Type{ST}, A::AnyGPUVector) where {Tv,Ti,ST<:AbstractGPUSparseVector{Tv,Ti}} check_sparse_target(ST, Tv, Ti) indices = findall(!iszero, A) nstored = length(indices) @@ -91,7 +95,7 @@ end # COO directly. Other formats are obtained by converting the COO with `sparse_csr`/ # `sparse_csc` (which back-ends already provide as constructors), avoiding any need to map # a target type to its COO sibling. -function sparse_from_dense(::Type{ST}, A::AnyGPUMatrix) where {Tv,Ti,ST<:AbstractGPUSparseMatrixCOO{Tv,Ti}} +function to_sparse(::Type{ST}, A::AnyGPUMatrix) where {Tv,Ti,ST<:AbstractGPUSparseMatrixCOO{Tv,Ti}} check_sparse_target(ST, Tv, Ti) indices = findall(!iszero, A) nstored = length(indices) @@ -106,6 +110,47 @@ function sparse_from_dense(::Type{ST}, A::AnyGPUMatrix) where {Tv,Ti,ST<:Abstrac return ST(rowInd, colInd, nzVal, size(A), nstored) end +@kernel function densify_vector_kernel!(out, iPtr, nzVal) + k = @index(Global, Linear) + if k <= length(nzVal) + @inbounds out[iPtr[k]] = nzVal[k] + end +end + +@kernel function densify_matrix_kernel!(out, rowInd, colInd, nzVal) + k = @index(Global, Linear) + if k <= length(nzVal) + @inbounds out[rowInd[k], colInd[k]] = nzVal[k] + end +end + +""" + to_dense(A) + +Materialize the sparse GPU array `A` into a dense array of the same back-end, on the +device (no host round-trip); the inverse of [`to_sparse`](@ref). Stored coordinates are +assumed unique (true for CSC/CSR and for COO produced by conversion); duplicate COO +coordinates resolve to the last value written. +""" +function to_dense(x::AbstractGPUSparseVector{Tv}) where {Tv} + out = fill!(similar(nonzeros(x), Tv, length(x)), zero(Tv)) + if nnz(x) > 0 + densify_vector_kernel!(get_backend(out))(out, SparseArrays.nonzeroinds(x), nonzeros(x); + ndrange=nnz(x)) + end + return out +end + +function to_dense(A::AbstractGPUSparseMatrix{Tv}) where {Tv} + coo = sparse_coo(A) + out = fill!(similar(nonzeros(A), Tv, size(A)), zero(Tv)) + if nnz(coo) > 0 + densify_matrix_kernel!(get_backend(out))(out, coo.rowInd, coo.colInd, nonzeros(coo); + ndrange=nnz(coo)) + end + return out +end + # similar Base.similar(Vec::V) where {V<:AbstractGPUSparseVector} = V(copy(SparseArrays.nonzeroinds(Vec)), similar(SparseArrays.nonzeros(Vec)), length(Vec)) Base.similar(Mat::M) where {M<:AbstractGPUSparseMatrixCSC} = M(copy(SparseArrays.getcolptr(Mat)), copy(SparseArrays.rowvals(Mat)), similar(SparseArrays.nonzeros(Mat)), size(Mat)) From 637acb0658b48f4c0c38adf15551efbf2eaa52d5 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Tue, 23 Jun 2026 07:14:18 +0200 Subject: [PATCH 08/15] docs: restructure the sparse interface; document `to_sparse`/`to_dense` Reorganize the sparse section of the interface docs into three parts -- the storage types a back-end provides, the methods it implements to integrate them, and the functionality it gets for free -- and round it out (device-struct `Adapt` rule, `get_backend`, `_sptranspose`/`_spadjoint`, the `to_sparse`/ `to_dense` conversions, and the full list of generic operations). Also give `to_sparse` a docstring to match `to_dense`. Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/src/interface.md | 82 ++++++++++++++++++++++++------------------- src/host/sparse.jl | 11 ++++-- 2 files changed, 53 insertions(+), 40 deletions(-) diff --git a/docs/src/interface.md b/docs/src/interface.md index 4bd21efa0..0aeb49636 100644 --- a/docs/src/interface.md +++ b/docs/src/interface.md @@ -33,15 +33,16 @@ There are numerous examples of potential interfaces for GPUArrays, such as with ## Sparse arrays -Sparse arrays cannot share the `AbstractGPUArray` supertype: that is a -`DenseArray`, whereas a sparse array must be an `AbstractSparseArray`. GPUArrays -therefore provides a parallel hierarchy, and generic sparse functionality -(broadcast, `mapreduce`, sparse/dense matrix multiplication, `norm`/`opnorm`, -`findnz`, `triu`/`tril`/`kron`, format conversion, indexing) is defined on it. +A sparse array can't share the `AbstractGPUArray` supertype — that is a `DenseArray`, +whereas a sparse array must be an `AbstractSparseArray` — so GPUArrays keeps a parallel +sparse hierarchy with its own generic functionality. Integrating a back-end has three +parts: the storage types it provides, the methods it implements to plug them in, and the +functionality it then gets for free. -A back-end provides one mutable struct per supported format, subtyping the -matching abstract type and using the conventional field names (the generic code -reads them directly): +### Storage types to provide + +One mutable struct per supported format, subtyping the matching abstract type and using +the conventional field names (generic code reads them directly): | supertype | fields | |:--|:--| @@ -50,35 +51,42 @@ reads them directly): | `AbstractGPUSparseMatrixCSR{Tv,Ti}` | `rowPtr`, `colVal`, `nzVal`, `dims`, `nnz` | | `AbstractGPUSparseMatrixCOO{Tv,Ti}` | `rowInd`, `colInd`, `nzVal`, `dims`, `nnz` | -where the index/pointer/value arrays are the back-end's own dense vector type -(so `nonzeros(A)`, `SparseArrays.nonzeroinds`, `rowvals`, `getcolptr` work, and -`get_backend(nonzeros(A))` identifies the compute backend). - -On top of the structs, a back-end implements: - - * constructors from the component arrays (e.g. - `MyCSR(rowPtr, colVal, nzVal, dims)`) and the inter-format and - dense↔sparse conversions (`MyCSR(::MyCOO)`, `MyCSC(::SparseMatrixCSC)`, - `SparseMatrixCSC(::MyCSC)`, …); - - * `Base.similar` — structure-preserving (`similar(A)`, `similar(A, ::Type)`) - and empty-of-a-shape (`similar(A, ::Type, dims)`), exactly as for dense - arrays. The generic algorithms allocate their outputs through `similar` - rather than by naming a type; - - * the format-conversion verbs `GPUArrays.sparse_csc`, `GPUArrays.sparse_csr` - and `GPUArrays.sparse_coo`, for the formats other than the one each - produces (converting to the format already held is the generic identity). - These are the value-level equivalent of `SparseArrays.sparse` and back the - generic code that needs a particular layout (e.g. matrix multiplication - funnels through COO). - -Dense↔sparse conversion is provided generically and on-device: `to_sparse(::Type{ST}, -A)` scans a dense array into a sparse one (`ST` is an `AbstractGPUSparseVector` or -`AbstractGPUSparseMatrixCOO` -- CSR/CSC follow via the conversion verbs), and `to_dense(A)` -is the inverse, scattering a sparse array into a dense one of the same back-end. A -back-end's dense↔sparse constructors (`MyArray(::MySparse…)`, `MyCSR(::MyDense)`) can be -defined in terms of these. +The pointer/index/value arrays are the back-end's own dense vector type. Provide only the +formats you need, but note that several generic operations route through COO. + +### Interface to implement + + * **Constructors** — from component arrays (`MyCSR(rowPtr, colVal, nzVal, dims)`), + between formats (`MyCSR(::MyCOO)`, …), and to/from host `SparseArrays` + (`MyCSC(::SparseMatrixCSC)`, `SparseMatrixCSC(::MyCSC)`). + * **`Base.similar`** — structure-preserving (`similar(A)`, `similar(A, ::Type)`) and + empty-of-a-shape (`similar(A, ::Type, dims)`), as for dense arrays; generic code + allocates its outputs through `similar`, never by naming a type. + * **Conversion verbs** `GPUArrays.sparse_csc`/`sparse_csr`/`sparse_coo`, for the formats + other than the one each produces (the identity case is generic) — the value-level + analogue of `SparseArrays.sparse`. + * **`KernelAbstractions.get_backend`** for the sparse types (usually + `get_backend(nonzeros(A))`). + * **`Adapt.adapt_structure`** converting each host struct to its device counterpart + (`GPUArrays.GPUSparseDeviceVector`, `GPUSparseDeviceMatrixCSC`/`CSR`/`COO`), so the + generic kernels can consume it inside `@kernel`s. + * **`GPUArrays._sptranspose`/`_spadjoint`** — materialize a (conjugate) transpose; used + by `kron`/`triu`/`tril` on lazily wrapped operands. + +`SparseArrays`' accessors (`nnz`, `nonzeros`, `nonzeroinds`, `rowvals`, `getcolptr`) come +for free from the field names. Dense↔sparse conversion is generic and on-device: +`to_sparse(::Type{ST}, dense)` scans into a sparse array (`ST` a vector or COO type; +CSR/CSC follow via the verbs) and `to_dense(A)` scatters back to a dense array of the +back-end — so a back-end's `MyArray(::MySparse…)` and dense→sparse constructors can simply +call them. + +### Functionality you get + +Broadcasting; `mapreduce` and reductions (`sum`, `norm`, `opnorm`); sparse–dense and +sparse–vector multiplication (`*`, `mul!`, including transposed/adjoint operands); +`findnz`, `triu`/`tril`/`kron`/`reshape`/`droptol!`; `iszero`/`issymmetric`/`ishermitian`; +scalar and slice indexing; `copy`/`copyto!`/`collect`/`Array`; and conversion between +formats and to/from dense. ## Caching Allocator diff --git a/src/host/sparse.jl b/src/host/sparse.jl index 406e1862a..f057016c0 100644 --- a/src/host/sparse.jl +++ b/src/host/sparse.jl @@ -73,10 +73,15 @@ end end end -# `to_sparse`/`to_dense` are the on-device conversions between a dense GPU array and a -# sparse one. `to_sparse` needs a target format (the result layout is ambiguous); -# `to_dense` does not. +""" + to_sparse(ST, A) +Build a sparse array of type `ST` from the dense GPU array `A`, on the device (no host +round-trip); the inverse of [`to_dense`](@ref). `ST` must be a concrete +`AbstractGPUSparseVector` or `AbstractGPUSparseMatrixCOO` (other matrix formats follow by +converting the resulting COO with `sparse_csr`/`sparse_csc`). The target format is required +because the result layout would otherwise be ambiguous; `to_dense` needs no such argument. +""" function to_sparse(::Type{ST}, A::AnyGPUVector) where {Tv,Ti,ST<:AbstractGPUSparseVector{Tv,Ti}} check_sparse_target(ST, Tv, Ti) indices = findall(!iszero, A) From 22313e19f088a4e256655b7e74f283059c3df6b1 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Tue, 23 Jun 2026 14:54:50 +0200 Subject: [PATCH 09/15] Add on-device sparse format conversions via `convert` Co-Authored-By: Claude Opus 4.8 (1M context) --- src/host/sparse.jl | 171 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 169 insertions(+), 2 deletions(-) diff --git a/src/host/sparse.jl b/src/host/sparse.jl index f057016c0..7c381b0d8 100644 --- a/src/host/sparse.jl +++ b/src/host/sparse.jl @@ -195,12 +195,179 @@ end sparse_op_size(A, trans::Bool) = trans ? reverse(size(A)) : size(A) -# Format-conversion verbs: converting to the format already held is the identity; -# back-ends implement the cross-format conversions (as constructors). +# Format-conversion verbs: converting to the format already held is the identity. The +# cross-format conversions are `convert` (below). A back-end supplies the 3 verb methods +# naming its concrete sibling types — `sparse_coo(A::MyCSC) = convert(MyCOO, A)` — which is +# also where its `MyCOO(::MyCSC)` constructors route. sparse_csc(A::AbstractGPUSparseMatrixCSC) = A sparse_csr(A::AbstractGPUSparseMatrixCSR) = A sparse_coo(A::AbstractGPUSparseMatrixCOO) = A +## Generic format conversions (CSC↔CSR↔COO), on-device. +# +# Conversion is owned by `convert` (dispatched on GPUArrays' own abstract types, so not type +# piracy); construction is owned by the back-end through its conventional field constructor, +# which `convert` calls — the same division `SparseArrays` uses (`convert`/constructors for +# conversion, `SparseMatrixCSC(m, n, colptr, rowval, nzval)` for construction). Conversions +# preserve the source's value/index eltypes; every buffer is allocated from a source array +# via `similar`, so the storage mode and back-end are inherited. `sortperm` on a back-end +# vector is required (the COO→CSR/CSC path; already implied by `findnz`). + +@inline function sparse_lower_bound(xs, lo::Ti, hi::Ti, key::Ti) where {Ti} + l = lo + r = hi + while l < r + m = (l + r) >>> 1 + @inbounds v = xs[m] + if v < key + l = m + one(Ti) + else + r = m + end + end + return l +end + +# Expand a compressed pointer array (CSC `colPtr` / CSR `rowPtr`, length `dim+1`) into one +# explicit lead index per stored entry: thread `lead` writes its index into the slots +# `ptr[lead]:ptr[lead+1]-1` of `out`. +@kernel function expand_ptr_kernel!(out, ptr) + lead = @index(Global, Linear) + if lead <= length(ptr) - 1 + T = eltype(out) + @inbounds begin + k = ptr[lead] + stop = ptr[lead + 1] + v = T(lead) + while k < stop + out[k] = v + k += oneunit(k) + end + end + end +end + +# Build a lead-major linear sort key per stored COO entry (lead-major: sorting by the key +# orders entries by `(lead, sub)`); `subdim` is the size of the sub axis. +@kernel function coo_sort_keys_kernel!(keys, leadInd, subInd, subdim) + i = @index(Global, Linear) + if i <= length(keys) + @inbounds keys[i] = (Int64(leadInd[i]) - Int64(1)) * Int64(subdim) + Int64(subInd[i]) + end +end + +# Gather the three COO arrays through a permutation (e.g. from `sortperm`). +@kernel function coo_gather_kernel!(leadOut, subOut, nzOut, leadIn, subIn, nzIn, perm) + i = @index(Global, Linear) + if i <= length(perm) + @inbounds begin + src = perm[i] + leadOut[i] = leadIn[src] + subOut[i] = subIn[src] + nzOut[i] = nzIn[src] + end + end +end + +# Build a compressed pointer array from sorted lead indices: `ptr[i]` is the first 1-based +# position of a stored entry whose lead index is ≥ `i`, and `ptr[end] = nnz+1`. +@kernel function compressed_ptr_from_sorted_kernel!(ptr, sortedLead, nnz) + i = @index(Global, Linear) + Ti = eltype(ptr) + if i <= length(ptr) + @inbounds ptr[i] = if i == length(ptr) + Ti(nnz) + one(Ti) + else + sparse_lower_bound(sortedLead, one(Ti), Ti(nnz) + one(Ti), Ti(i)) + end + end +end + +# Order a COO matrix's stored entries lead-major, returning fresh (lead, sub, nz) arrays at +# the source eltypes. Duplicate coordinates are preserved (no summing). +function _sorted_coo(A::AbstractGPUSparseMatrixCOO{Tv,Ti}, ::Val{lead}) where {Tv,Ti,lead} + lead === :row || lead === :col || + throw(ArgumentError("COO sort lead must be :row or :col")) + total = Int(nnz(A)) + leadIn = lead === :row ? A.rowInd : A.colInd + subIn = lead === :row ? A.colInd : A.rowInd + subdim = lead === :row ? size(A, 2) : size(A, 1) + + leadOut = similar(leadIn, Ti, total) + subOut = similar(subIn, Ti, total) + nzOut = similar(nonzeros(A), Tv, total) + total == 0 && return leadOut, subOut, nzOut + + backend = get_backend(leadOut) + keys = similar(leadIn, Int64, total) + coo_sort_keys_kernel!(backend)(keys, leadIn, subIn, subdim; ndrange=total) + perm = sortperm(keys) + coo_gather_kernel!(backend)(leadOut, subOut, nzOut, leadIn, subIn, nonzeros(A), perm; + ndrange=total) + unsafe_free!(keys) + unsafe_free!(perm) + return leadOut, subOut, nzOut +end + +# compressed pointer array of length `dim+1` from (possibly empty) sorted lead indices, +# inheriting the lead array's index eltype and storage. +function _compressed_ptr(sortedLead, dim::Integer, n::Integer) + ptr = similar(sortedLead, dim + 1) + if n > 0 + compressed_ptr_from_sorted_kernel!(get_backend(ptr))(ptr, sortedLead, n; + ndrange=dim + 1) + else + fill!(ptr, one(eltype(ptr))) + end + return ptr +end + +# Cross-format conversions are ordinary `convert` methods, dispatched on GPUArrays' own +# abstract types (so this is not type piracy and avoids the ambiguity a generic constructor +# would have with a back-end's host-array constructor). The result is built with the +# back-end's conventional field constructor `ST(components..., dims[, nnz])` — the write-side +# analogue of the `colPtr`/`rowVal`/`nzVal` read contract, and how `SparseArrays` itself +# builds (`SparseMatrixCSC(m, n, colptr, rowval, nzval)`). A back-end with unconventional +# construction overrides the relevant `convert` method. + +# CSR → COO: expand `rowPtr` into explicit row indices; columns and values are shared. +function Base.convert(::Type{ST}, A::AbstractGPUSparseMatrixCSR) where {ST<:AbstractGPUSparseMatrixCOO} + n = Int(nnz(A)) + rowInd = similar(A.colVal, n) + n > 0 && expand_ptr_kernel!(get_backend(rowInd))(rowInd, A.rowPtr; ndrange=size(A, 1)) + return ST(rowInd, A.colVal, nonzeros(A), size(A), n) +end + +# CSC → COO: expand `colPtr` into explicit column indices; rows and values are shared. +function Base.convert(::Type{ST}, A::AbstractGPUSparseMatrixCSC) where {ST<:AbstractGPUSparseMatrixCOO} + n = Int(nnz(A)) + colInd = similar(A.rowVal, n) + n > 0 && expand_ptr_kernel!(get_backend(colInd))(colInd, A.colPtr; ndrange=size(A, 2)) + return ST(A.rowVal, colInd, nonzeros(A), size(A), n) +end + +# COO → CSR: sort row-major, then compress the row indices into a `rowPtr`. +function Base.convert(::Type{ST}, A::AbstractGPUSparseMatrixCOO) where {ST<:AbstractGPUSparseMatrixCSR} + rowInd, colVal, nzVal = _sorted_coo(A, Val(:row)) + rowPtr = _compressed_ptr(rowInd, size(A, 1), Int(nnz(A))) + unsafe_free!(rowInd) + return ST(rowPtr, colVal, nzVal, size(A)) +end + +# COO → CSC: sort column-major, then compress the column indices into a `colPtr`. +function Base.convert(::Type{ST}, A::AbstractGPUSparseMatrixCOO) where {ST<:AbstractGPUSparseMatrixCSC} + colInd, rowVal, nzVal = _sorted_coo(A, Val(:col)) + colPtr = _compressed_ptr(colInd, size(A, 2), Int(nnz(A))) + unsafe_free!(colInd) + return ST(colPtr, rowVal, nzVal, size(A)) +end + +# CSC ↔ CSR route through COO (the back-end's verb yields its concrete COO type). +Base.convert(::Type{ST}, A::AbstractGPUSparseMatrixCSC) where {ST<:AbstractGPUSparseMatrixCSR} = + convert(ST, sparse_coo(A)) +Base.convert(::Type{ST}, A::AbstractGPUSparseMatrixCSR) where {ST<:AbstractGPUSparseMatrixCSC} = + convert(ST, sparse_coo(A)) + @inline function sparse_atomic_add!(A::AbstractArray{<:Real}, parts, I::Integer, x) @inbounds Atomix.@atomic A[Int(I)] += x return From 780e855bcc098e8056a6f528c4d0e0e083e8e975 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Tue, 23 Jun 2026 14:54:50 +0200 Subject: [PATCH 10/15] Adopt generic sparse conversions in JLArrays Co-Authored-By: Claude Opus 4.8 (1M context) --- lib/JLArrays/src/JLArrays.jl | 40 +++++++++++++++--------------------- 1 file changed, 16 insertions(+), 24 deletions(-) diff --git a/lib/JLArrays/src/JLArrays.jl b/lib/JLArrays/src/JLArrays.jl index aa935950c..882f7ba1d 100644 --- a/lib/JLArrays/src/JLArrays.jl +++ b/lib/JLArrays/src/JLArrays.jl @@ -455,30 +455,22 @@ end JLSparseMatrixCOO(xs::SparseVector) = JLSparseMatrixCOO(SparseMatrixCSC(xs)) JLSparseMatrixCOO(xs::JLArray{T,2}) where {T} = GPUArrays.to_sparse(JLSparseMatrixCOO{T,Int}, xs) -function JLSparseMatrixCSR(xs::JLSparseMatrixCSC{Tv, Ti}) where {Ti, Tv} - return JLSparseMatrixCSR(SparseMatrixCSC(xs)) -end -function JLSparseMatrixCOO(xs::Union{JLSparseMatrixCSC{Tv, Ti},JLSparseMatrixCSR{Tv, Ti}}) where {Tv, Ti} - return JLSparseMatrixCOO(SparseMatrixCSC(xs)) -end -function JLSparseMatrixCOO{Tv,Ti}(xs::Union{JLSparseMatrixCSC{Tv, Ti},JLSparseMatrixCSR{Tv, Ti}}) where {Tv, Ti} - return JLSparseMatrixCOO(SparseMatrixCSC(xs)) -end -function JLSparseMatrixCSC(xs::JLSparseMatrixCSR{Tv, Ti}) where {Ti, Tv} - return JLSparseMatrixCSC(SparseMatrixCSC(xs)) -end -function JLSparseMatrixCSC(xs::JLSparseMatrixCOO{Tv, Ti}) where {Ti, Tv} - return JLSparseMatrixCSC(SparseMatrixCSC(xs)) -end -function JLSparseMatrixCSC{Tv,Ti}(xs::JLSparseMatrixCOO{Tv, Ti}) where {Ti, Tv} - return JLSparseMatrixCSC(SparseMatrixCSC(xs)) -end -function JLSparseMatrixCSR(xs::JLSparseMatrixCOO{Tv, Ti}) where {Ti, Tv} - return JLSparseMatrixCSR(SparseMatrixCSC(xs)) -end -function JLSparseMatrixCSR{Tv,Ti}(xs::JLSparseMatrixCOO{Tv, Ti}) where {Ti, Tv} - return JLSparseMatrixCSR(SparseMatrixCSC(xs)) -end +# Cross-format device→device conversions (CSC↔CSR↔COO): the back-end owns the constructors; +# the on-device conversion algorithm is GPUArrays' generic `convert` (replacing the former +# host round-trips through `SparseMatrixCSC`). The `sparse_csc/csr/coo` verbs above route to +# these. The COO→CSR/CSC path needs `sortperm`, provided below. +JLSparseMatrixCOO(A::Union{JLSparseMatrixCSC,JLSparseMatrixCSR}) = + convert(JLSparseMatrixCOO, A) +JLSparseMatrixCSR(A::Union{JLSparseMatrixCSC,JLSparseMatrixCOO}) = + convert(JLSparseMatrixCSR, A) +JLSparseMatrixCSC(A::Union{JLSparseMatrixCSR,JLSparseMatrixCOO}) = + convert(JLSparseMatrixCSC, A) + +# `sortperm` on a JLArray vector, required by the generic COO→CSR/CSC conversions (and by +# `findnz`). Computed on the host and wrapped back into a device vector so that the gather +# kernel can index the permutation on-device (mirrors Metal's `sortperm`, which also returns +# a device `Int` vector). +Base.sortperm(x::JLVector; kwargs...) = JLArray(sortperm(Array(x); kwargs...)) SparseArrays.sparse(xs::JLArray{T,1}) where {T} = GPUArrays.to_sparse(JLSparseVector{T,Int}, xs) From f0be29ffb004a4dd8bfe7c9cd6aeaa6792912df5 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Tue, 23 Jun 2026 14:54:50 +0200 Subject: [PATCH 11/15] Drop `sparse_coo_type`; test COO via `sparse_types` Co-Authored-By: Claude Opus 4.8 (1M context) --- test/runtests.jl | 3 +-- test/testsuite.jl | 1 - test/testsuite/sparse.jl | 26 ++++++++++++++++++-------- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index 5d13993ad..f60c43e60 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -8,8 +8,7 @@ const init_worker_code = quote include("testsuite.jl") - TestSuite.sparse_types(::Type{<:JLArray}) = (JLSparseVector, JLSparseMatrixCSC, JLSparseMatrixCSR) - TestSuite.sparse_coo_type(::Type{<:JLArray}) = JLSparseMatrixCOO + TestSuite.sparse_types(::Type{<:JLArray}) = (JLSparseVector, JLSparseMatrixCSC, JLSparseMatrixCSR, JLSparseMatrixCOO) TestSuite.sparse_types(::Type{<:Array}) = (SparseVector, SparseMatrixCSC) # Disable Float16-related tests until JuliaGPU/KernelAbstractions#600 is resolved diff --git a/test/testsuite.jl b/test/testsuite.jl index f95c7366e..e4aea4f72 100644 --- a/test/testsuite.jl +++ b/test/testsuite.jl @@ -76,7 +76,6 @@ supported_eltypes() = (Int16, Int32, Int64, # derived sparse types that are supported by the array type sparse_types(::Type{AT}) where {AT} = () -sparse_coo_type(::Type{AT}) where {AT} = nothing # some convenience predicates for filtering test eltypes isrealtype(T) = T <: Real diff --git a/test/testsuite/sparse.jl b/test/testsuite/sparse.jl index 7a15dd09e..51d93e678 100644 --- a/test/testsuite/sparse.jl +++ b/test/testsuite/sparse.jl @@ -7,16 +7,26 @@ broadcasting_vector(sparse_AT, AT, eltypes) iszero_vector(sparse_AT, eltypes) elseif sparse_AT <: AbstractSparseMatrix - matrix(sparse_AT, eltypes) - matrix_construction(sparse_AT, AT, eltypes) - broadcasting_matrix(sparse_AT, AT, eltypes) - mapreduce_matrix(sparse_AT, eltypes) - linalg(sparse_AT, eltypes) - iszero_matrix(sparse_AT, eltypes) + # The matrix battery is only run for CSC/CSR. COO lacks the generic operations + # these exercise: broadcast (which `iszero_matrix` and several construction + # paths use), dims-reductions, `opnorm`, and `issymmetric`/`similar`-with-shape. + # COO is still registered in `sparse_types` so the repeated-index `coo_matmul` + # test below is discovered; the loop itself doesn't otherwise exercise it. + if sparse_AT <: Union{GPUArrays.AbstractGPUSparseMatrixCSC, + GPUArrays.AbstractGPUSparseMatrixCSR} + matrix(sparse_AT, eltypes) + matrix_construction(sparse_AT, AT, eltypes) + broadcasting_matrix(sparse_AT, AT, eltypes) + mapreduce_matrix(sparse_AT, eltypes) + linalg(sparse_AT, eltypes) + iszero_matrix(sparse_AT, eltypes) + end end end - coo_AT = sparse_coo_type(AT) - coo_AT === nothing || coo_matmul(coo_AT, AT, eltypes) + # the repeated-index COO matmul test runs against whichever COO type the back-end + # registers in `sparse_types` (no separate `sparse_coo_type` hook). + i = findfirst(T -> T <: GPUArrays.AbstractGPUSparseMatrixCOO, sparse_ATs) + i === nothing || coo_matmul(sparse_ATs[i], AT, eltypes) end using SparseArrays From 21b3dd7e18e03228b67684cc17e1757a4fb35070 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Tue, 23 Jun 2026 16:10:05 +0200 Subject: [PATCH 12/15] Test sparse matmul and conversions generically Also add the missing JLArrays COO identity constructor. Co-Authored-By: Claude Opus 4.8 (1M context) --- lib/JLArrays/src/JLArrays.jl | 2 + test/testsuite/sparse.jl | 147 +++++++++++++++++++++++++++++++++-- 2 files changed, 144 insertions(+), 5 deletions(-) diff --git a/lib/JLArrays/src/JLArrays.jl b/lib/JLArrays/src/JLArrays.jl index 882f7ba1d..e18b2eabc 100644 --- a/lib/JLArrays/src/JLArrays.jl +++ b/lib/JLArrays/src/JLArrays.jl @@ -212,6 +212,8 @@ JLSparseMatrixCOO(rowInd::JLArray{Ti, 1}, colInd::JLArray{Ti, 1}, nnz::Integer=length(nzVal)) where {Tv, Ti <: Integer} = JLSparseMatrixCOO{Tv, Ti}(rowInd, colInd, nzVal, dims, nnz) +JLSparseMatrixCOO(A::JLSparseMatrixCOO) = A + Base.length(x::JLSparseMatrixCOO) = prod(x.dims) Base.size(x::JLSparseMatrixCOO) = x.dims SparseArrays.nnz(x::JLSparseMatrixCOO) = x.nnz diff --git a/test/testsuite/sparse.jl b/test/testsuite/sparse.jl index 51d93e678..73134c68d 100644 --- a/test/testsuite/sparse.jl +++ b/test/testsuite/sparse.jl @@ -1,5 +1,6 @@ @testsuite "sparse" (AT, eltypes)->begin sparse_ATs = sparse_types(AT) + matrix_ATs = filter(T -> T <: AbstractSparseMatrix, sparse_ATs) for sparse_AT in sparse_ATs if sparse_AT <: AbstractSparseVector vector(sparse_AT, AT, eltypes) @@ -7,11 +8,13 @@ broadcasting_vector(sparse_AT, AT, eltypes) iszero_vector(sparse_AT, eltypes) elseif sparse_AT <: AbstractSparseMatrix - # The matrix battery is only run for CSC/CSR. COO lacks the generic operations - # these exercise: broadcast (which `iszero_matrix` and several construction - # paths use), dims-reductions, `opnorm`, and `issymmetric`/`similar`-with-shape. - # COO is still registered in `sparse_types` so the repeated-index `coo_matmul` - # test below is discovered; the loop itself doesn't otherwise exercise it. + # `matmul`/`matmul_accumulation` run for every matrix format (incl. COO, which + # natively supports matmul). The rest of the matrix battery is CSC/CSR-only: + # COO lacks the generic operations they exercise — broadcast (used by + # `iszero_matrix` and several construction paths), dims-reductions, `opnorm`, + # and `issymmetric`/`similar`-with-shape. + matmul(sparse_AT, AT, eltypes) + matmul_accumulation(sparse_AT, AT, eltypes) if sparse_AT <: Union{GPUArrays.AbstractGPUSparseMatrixCSC, GPUArrays.AbstractGPUSparseMatrixCSR} matrix(sparse_AT, eltypes) @@ -23,6 +26,8 @@ end end end + # cross-format conversions need ≥2 registered matrix formats to be meaningful. + length(matrix_ATs) > 1 && conversions(matrix_ATs, AT, eltypes) # the repeated-index COO matmul test runs against whichever COO type the back-end # registers in `sparse_types` (no separate `sparse_coo_type` hook). i = findfirst(T -> T <: GPUArrays.AbstractGPUSparseMatrixCOO, sparse_ATs) @@ -119,6 +124,138 @@ function coo_matmul(AT, dense_AT, eltypes) end end +# Narrow complex test data to the matmul eltype: complex eltypes keep both parts (so +# `transpose` and `adjoint` differ); real eltypes take the real part, since a complex value +# with a nonzero imaginary part can't be converted to a real type directly. Projecting onto +# the real part reproduces the proven real test data exactly (e.g. `1 + 2im` → `1`). +matmul_cast(x::AbstractArray, ::Type{ET}) where {ET<:Complex} = ET.(x) +matmul_cast(x::AbstractArray, ::Type{ET}) where {ET<:Real} = ET.(real.(x)) +matmul_cast(x::Number, ::Type{ET}) where {ET<:Complex} = ET(x) +matmul_cast(x::Number, ::Type{ET}) where {ET<:Real} = ET(real(x)) + +# Per matrix format, general (duplicate-free) matmul: SpMV, SpMM, and dense·sparse, each +# with transpose/adjoint and the 5-arg `mul!`. The repeated-coordinate COO case is covered +# separately by `coo_matmul`. +function matmul(AT, dense_AT, eltypes) + for ET in (Float16, Float32, ComplexF16, ComplexF32) + ET in eltypes || continue + @testset "$(nameof(AT)) matmul($ET)" begin + V = matmul_cast(ComplexF32[1 + 2im, 2 - im, 3 + im, 4 - 3im, 5 + 2im], ET) + A = sparse([1, 1, 2, 4, 4], [1, 3, 2, 1, 4], V, 4, 4) + dA = AT(A) + + α = matmul_cast(2 + im, ET) + β = matmul_cast(3 - im, ET) + + # SpMV, transpose/adjoint, and the 5-arg and bool `mul!` + x = matmul_cast(ComplexF32[2 - im, 3 + 2im, 5 - 4im, 7 + im], ET) + dx = dense_AT(x) + @test Array(dA * dx) ≈ Array(A * x) + @test Array(transpose(dA) * dx) ≈ Array(transpose(A) * x) + @test Array(adjoint(dA) * dx) ≈ Array(adjoint(A) * x) + + y = matmul_cast(ComplexF32[1 + im, 2 - im, 3 + 2im, 4 - im], ET) + dy = dense_AT(copy(y)) + mul!(dy, dA, dx, α, β) + @test Array(dy) ≈ α .* Array(A * x) .+ β .* y + + dz = dense_AT(zeros(ET, size(A, 1))) + mul!(dz, dA, dx, true, false) + @test Array(dz) ≈ Array(A * x) + + # SpMM, transpose/adjoint, and the 5-arg `mul!` + B = matmul_cast(ComplexF32[ + 1 + im 2 - im 3 + 2im; + 4 - im 5 + im 6 - 2im; + 7 + im 8 - im 9 + 3im; + 2 + 4im 3 - 5im 4 + im + ], ET) + dB = dense_AT(B) + @test Array(dA * dB) ≈ Array(A * B) + @test Array(transpose(dA) * dB) ≈ Array(transpose(A) * B) + @test Array(adjoint(dA) * dB) ≈ Array(adjoint(A) * B) + + C = matmul_cast(ComplexF32[ + 1 - im 2 + im 3 - 2im; + 4 + im 5 - im 6 + 2im; + 7 - im 8 + im 9 - 3im; + 2 - 4im 3 + 5im 4 - im + ], ET) + dC = dense_AT(copy(C)) + mul!(dC, dA, dB, α, β) + @test Array(dC) ≈ α .* Array(A * B) .+ β .* C + + # dense·sparse, transpose/adjoint, and the 5-arg `mul!` + L = matmul_cast(ComplexF32[ + 1 - im 2 + im 3 - 2im 4 + im; + 5 + 2im 6 - im 7 + im 8 - 3im + ], ET) + dL = dense_AT(L) + @test Array(dL * dA) ≈ Array(L * A) + @test Array(dL * transpose(dA)) ≈ Array(L * transpose(A)) + @test Array(dL * adjoint(dA)) ≈ Array(L * adjoint(A)) + + D = matmul_cast(ComplexF32[ + 1 + im 2 - im 3 + 2im 4 - im; + 2 + 3im 3 - im 5 + im 7 - 2im + ], ET) + dD = dense_AT(copy(D)) + mul!(dD, dL, dA, α, β) + @test Array(dD) ≈ α .* Array(L * A) .+ β .* D + end + end +end + +# Per matrix format, the Float16/ComplexF16 accumulation contract: the back-end must +# accumulate in Float32/ComplexF32 and round once at the end. Every product feeding a given +# output entry is identical, so the n-term sum is independent of accumulation order, and the +# result must match a host reference accumulated in `Tacc` *exactly* (`==`, not `≈`); a +# narrow Float16 accumulation would lose precision and the equality would fail. +function matmul_accumulation(AT, dense_AT, eltypes) + for ET in (Float16, ComplexF16) + ET in eltypes || continue + Tacc = ET <: Complex ? ComplexF32 : Float32 + @testset "$(nameof(AT)) matmul accumulation($ET)" begin + n = 128 + av = matmul_cast(0.1 + 0.2im, ET) + xv = matmul_cast(0.1 - 0.3im, ET) + + # SpMV: a 1×n row times a length-n vector + A = sparse(fill(1, n), 1:n, fill(av, n), 1, n) + x = fill(xv, n) + dA = AT(A) + dx = dense_AT(x) + @test Array(dA * dx) == ET.(SparseMatrixCSC{Tacc,Int}(A) * Tacc.(x)) + + # SpMM: the same row times an n×3 dense matrix + B = fill(xv, n, 3) + dB = dense_AT(B) + @test Array(dA * dB) == ET.(SparseMatrixCSC{Tacc,Int}(A) * Tacc.(B)) + + # dense·sparse: a 3×n dense matrix times an n×1 column + S = sparse(1:n, fill(1, n), fill(av, n), n, 1) + dS = AT(S) + L = fill(xv, 3, n) + dL = dense_AT(L) + @test Array(dL * dS) == ET.(Tacc.(L) * SparseMatrixCSC{Tacc,Int}(S)) + end + end +end + +# Round-trip a duplicate-free matrix (and an empty one) through every ordered pair of +# registered matrix formats: `src == dst` exercises the identity path and `src != dst` the +# cross-format `convert`. Conversions preserve coordinates, so the round-trip is exact. +function conversions(matrix_ATs, dense_AT, eltypes) + @testset "sparse format conversions" begin + A = sparse([1, 4, 2, 4, 1], [3, 1, 2, 5, 1], Float32[3, 4, 5, 6, 7], 4, 5) + Z = spzeros(Float32, 3, 4) + for src in matrix_ATs, dst in matrix_ATs + @test SparseMatrixCSC(dst(src(A))) == A + @test SparseMatrixCSC(dst(src(Z))) == Z + end + end +end + function vector(AT, dense_AT, eltypes) for ET in eltypes @testset "Sparse vector properties($ET)" begin From d8a8c60e699931c9734641cf85a383883519ea38 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Tue, 23 Jun 2026 18:29:13 +0200 Subject: [PATCH 13/15] Allocate empty sparse arrays via spzeros/undef Mandate the undef constructor as the empty-of-shape primitive; similar delegates to it. JLArrays now provides spzeros + undef constructors. Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/src/interface.md | 14 ++++++++++++- lib/JLArrays/src/JLArrays.jl | 40 +++++++++++++++++++++++++++++++++--- test/testsuite/sparse.jl | 34 ++++++++++++++++++++++++++++++ 3 files changed, 84 insertions(+), 4 deletions(-) diff --git a/docs/src/interface.md b/docs/src/interface.md index 0aeb49636..245a2bfa5 100644 --- a/docs/src/interface.md +++ b/docs/src/interface.md @@ -59,9 +59,21 @@ formats you need, but note that several generic operations route through COO. * **Constructors** — from component arrays (`MyCSR(rowPtr, colVal, nzVal, dims)`), between formats (`MyCSR(::MyCOO)`, …), and to/from host `SparseArrays` (`MyCSC(::SparseMatrixCSC)`, `SparseMatrixCSC(::MyCSC)`). + * **`undef` constructors** — `MyCSC{Tv,Ti}(undef, dims)` / `MyVec{Tv,Ti}(undef, n)`, + building a *structurally-empty* array (no stored entries), mirroring dense + `Array{T}(undef, dims)` and `SparseArrays`' `SparseMatrixCSC{Tv,Ti}(undef, m, n)`. This + is the empty-of-a-shape allocation primitive. Note there is no uninitialized-structure + analogue: for a sparse array `undef` means empty, exactly as in `SparseArrays`. + Implementing these through a `spzeros(Tv, Ti, dims…; fmt=…)` helper (the value-level + analogue of `SparseArrays.spzeros`, with a format selector) is recommended — it also + serves as a convenient public, format-polymorphic entry point — but `spzeros` itself is + not mandated, since its signature is back-end-flavored (format symbols, storage modes) + whereas the `undef` constructor is uniform. * **`Base.similar`** — structure-preserving (`similar(A)`, `similar(A, ::Type)`) and empty-of-a-shape (`similar(A, ::Type, dims)`), as for dense arrays; generic code - allocates its outputs through `similar`, never by naming a type. + allocates its outputs through `similar`, never by naming a type. The empty-of-a-shape + form just delegates to the `undef` constructor (threading the source's storage mode), + so the constructor is the real primitive. * **Conversion verbs** `GPUArrays.sparse_csc`/`sparse_csr`/`sparse_coo`, for the formats other than the one each produces (the identity case is generic) — the value-level analogue of `SparseArrays.sparse`. diff --git a/lib/JLArrays/src/JLArrays.jl b/lib/JLArrays/src/JLArrays.jl index e18b2eabc..5331c1d75 100644 --- a/lib/JLArrays/src/JLArrays.jl +++ b/lib/JLArrays/src/JLArrays.jl @@ -282,14 +282,48 @@ Base.similar(Mat::JLSparseMatrixCSC{<:Any, Ti}, ::Type{T}) where {T, Ti} = Base.similar(Vec::JLSparseVector{<:Any, Ti}, ::Type{T}) where {T, Ti} = JLSparseVector{T, Ti}(copy(Vec.iPtr), similar(nonzeros(Vec), T), length(Vec)) +# Empty (structurally-zero) sparse allocation — the value-level analogue of +# `SparseArrays.spzeros` and the shared implementation behind the `undef` constructors and +# the empty-of-shape `similar` below. The matrix method selects the format via `fmt` +# (mirroring `sparse(A; fmt=…)`); the vector method takes a length. Unexported (so it never +# clashes with `SparseArrays.spzeros`); call it as `JLArrays.spzeros`. +function spzeros(::Type{Tv}, ::Type{Ti}, dims::Dims{2}; fmt=:csc) where {Tv, Ti} + if fmt === :csc + JLSparseMatrixCSC{Tv, Ti}(JLVector(ones(Ti, dims[2] + 1)), JLVector{Ti}(undef, 0), JLVector{Tv}(undef, 0), dims) + elseif fmt === :csr + JLSparseMatrixCSR{Tv, Ti}(JLVector(ones(Ti, dims[1] + 1)), JLVector{Ti}(undef, 0), JLVector{Tv}(undef, 0), dims) + elseif fmt === :coo + JLSparseMatrixCOO{Tv, Ti}(JLVector{Ti}(undef, 0), JLVector{Ti}(undef, 0), JLVector{Tv}(undef, 0), dims) + else + throw(ArgumentError("format :$fmt not available, use :csc, :csr, or :coo")) + end +end +spzeros(::Type{Tv}, ::Type{Ti}, dims::Dims{1}) where {Tv, Ti} = + JLSparseVector{Tv, Ti}(JLVector{Ti}(undef, 0), JLVector{Tv}(undef, 0), dims[1]) +spzeros(::Type{Tv}, ::Type{Ti}, dims::Integer...; kw...) where {Tv, Ti} = + spzeros(Tv, Ti, map(Int, dims); kw...) +spzeros(::Type{Tv}, dims::Union{Integer,Dims}...; kw...) where {Tv} = + spzeros(Tv, Int, dims...; kw...) + +# `undef` constructors — the mandated empty-of-shape allocation primitive, mirroring the +# dense `JLArray{T,N}(undef, dims)` and `SparseArrays`' `SparseMatrixCSC{Tv,Ti}(undef, m, n)`. +JLSparseVector{Tv, Ti}(::UndefInitializer, len::Integer) where {Tv, Ti} = + spzeros(Tv, Ti, (Int(len),)) +JLSparseMatrixCSC{Tv, Ti}(::UndefInitializer, dims::Dims{2}) where {Tv, Ti} = spzeros(Tv, Ti, dims; fmt=:csc) +JLSparseMatrixCSR{Tv, Ti}(::UndefInitializer, dims::Dims{2}) where {Tv, Ti} = spzeros(Tv, Ti, dims; fmt=:csr) +JLSparseMatrixCOO{Tv, Ti}(::UndefInitializer, dims::Dims{2}) where {Tv, Ti} = spzeros(Tv, Ti, dims; fmt=:coo) +JLSparseMatrixCSC{Tv, Ti}(u::UndefInitializer, m::Integer, n::Integer) where {Tv, Ti} = JLSparseMatrixCSC{Tv, Ti}(u, (Int(m), Int(n))) +JLSparseMatrixCSR{Tv, Ti}(u::UndefInitializer, m::Integer, n::Integer) where {Tv, Ti} = JLSparseMatrixCSR{Tv, Ti}(u, (Int(m), Int(n))) +JLSparseMatrixCOO{Tv, Ti}(u::UndefInitializer, m::Integer, n::Integer) where {Tv, Ti} = JLSparseMatrixCOO{Tv, Ti}(u, (Int(m), Int(n))) + # empty sparse arrays of a given shape, used by the generic broadcast machinery as the # output container before its storage is sized and filled. the index type is preserved. Base.similar(Mat::JLSparseMatrixCSC{<:Any, Ti}, ::Type{T}, N::Int, M::Int) where {T, Ti} = - JLSparseMatrixCSC{T, Ti}(JLVector(ones(Ti, M + 1)), JLVector{Ti}(undef, 0), JLVector{T}(undef, 0), (N, M)) + JLSparseMatrixCSC{T, Ti}(undef, (N, M)) Base.similar(Mat::JLSparseMatrixCSR{<:Any, Ti}, ::Type{T}, N::Int, M::Int) where {T, Ti} = - JLSparseMatrixCSR{T, Ti}(JLVector(ones(Ti, N + 1)), JLVector{Ti}(undef, 0), JLVector{T}(undef, 0), (N, M)) + JLSparseMatrixCSR{T, Ti}(undef, (N, M)) Base.similar(Vec::JLSparseVector{<:Any, Ti}, ::Type{T}, dims::Dims{1}) where {T, Ti} = - JLSparseVector{T, Ti}(JLVector{Ti}(undef, 0), JLVector{T}(undef, 0), dims[1]) + JLSparseVector{T, Ti}(undef, dims[1]) Base.similar(Mat::JLSparseMatrixCSC{Tv, Ti}, N::Int, M::Int) where {Tv, Ti} = similar(Mat, Tv, N, M) Base.similar(Mat::JLSparseMatrixCSR{Tv, Ti}, N::Int, M::Int) where {Tv, Ti} = similar(Mat, Tv, N, M) diff --git a/test/testsuite/sparse.jl b/test/testsuite/sparse.jl index 73134c68d..823e4e925 100644 --- a/test/testsuite/sparse.jl +++ b/test/testsuite/sparse.jl @@ -26,6 +26,7 @@ end end end + undef_construction(sparse_ATs, eltypes) # cross-format conversions need ≥2 registered matrix formats to be meaningful. length(matrix_ATs) > 1 && conversions(matrix_ATs, AT, eltypes) # the repeated-index COO matmul test runs against whichever COO type the back-end @@ -256,6 +257,39 @@ function conversions(matrix_ATs, dense_AT, eltypes) end end +# The mandated empty-of-shape primitive: `ST{Tv,Ti}(undef, dims)` builds a structurally +# empty array (no stored entries), mirroring dense `undef` and `SparseArrays`' undef +# constructors. Exercised per registered format; the back-end's index type is read off a +# reference instance so the test stays back-end agnostic. +function undef_construction(sparse_ATs, eltypes) + @testset "undef construction" begin + for ST in sparse_ATs + if ST <: AbstractSparseVector + Ti = typeof(ST(spzeros(Float32, 3))).parameters[2] + for Tv in (Float32, ComplexF32) + Tv in eltypes || continue + v = ST{Tv,Ti}(undef, 5) + @test v isa ST{Tv,Ti} + @test size(v) == (5,) + @test nnz(v) == 0 + @test SparseVector(v) == spzeros(Tv, 5) + end + else + Ti = typeof(ST(spzeros(Float32, 3, 4))).parameters[2] + for Tv in (Float32, ComplexF32) + Tv in eltypes || continue + A = ST{Tv,Ti}(undef, (3, 4)) + @test A isa ST{Tv,Ti} + @test size(A) == (3, 4) + @test nnz(A) == 0 + @test SparseMatrixCSC(A) == spzeros(Tv, 3, 4) + @test size(ST{Tv,Ti}(undef, 3, 4)) == (3, 4) # varargs form + end + end + end + end +end + function vector(AT, dense_AT, eltypes) for ET in eltypes @testset "Sparse vector properties($ET)" begin From ae6812fa6ef3abd4f1ad0fabfb40cbbd69099b82 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Tue, 23 Jun 2026 21:41:49 +0200 Subject: [PATCH 14/15] Convert sparse formats via coo_type/csr_type/csc_type hooks Replaces the sparse branch's sparse_csc/csr/coo value-verbs with main's type-level format hooks, used as coo_type(A)(A); keeps the generic on-device convert algorithm as the engine. Re-aligns the conversion API with main. Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/src/interface.md | 12 +++-- lib/JLArrays/src/JLArrays.jl | 16 ++++--- src/host/sparse.jl | 92 +++++++++++++++++++----------------- 3 files changed, 66 insertions(+), 54 deletions(-) diff --git a/docs/src/interface.md b/docs/src/interface.md index 245a2bfa5..7be109009 100644 --- a/docs/src/interface.md +++ b/docs/src/interface.md @@ -74,9 +74,15 @@ formats you need, but note that several generic operations route through COO. allocates its outputs through `similar`, never by naming a type. The empty-of-a-shape form just delegates to the `undef` constructor (threading the source's storage mode), so the constructor is the real primitive. - * **Conversion verbs** `GPUArrays.sparse_csc`/`sparse_csr`/`sparse_coo`, for the formats - other than the one each produces (the identity case is generic) — the value-level - analogue of `SparseArrays.sparse`. + * **Format-conversion hooks** `GPUArrays.coo_type`/`csr_type`/`csc_type` — map any of your + sparse-matrix types to the *type* of the named sibling format + (`coo_type(::Type{<:MyCSC}) = MyCOO`); generic code converts with `coo_type(A)(A)`. These + are type-level hooks rather than plain `convert(Dest, A)` because a format is the + wrapper's identity (distinct structs), not a type parameter — so, unlike an eltype change, + there is no generic wrapper→sibling-wrapper operation, and only the back-end knows its + sibling types. The cross-format `convert` methods above are the engine the resulting + constructors route through; the identity case (`coo_type(coo)(coo)`) is your identity + constructor. * **`KernelAbstractions.get_backend`** for the sparse types (usually `get_backend(nonzeros(A))`). * **`Adapt.adapt_structure`** converting each host struct to its device counterpart diff --git a/lib/JLArrays/src/JLArrays.jl b/lib/JLArrays/src/JLArrays.jl index 5331c1d75..5ebfa26b6 100644 --- a/lib/JLArrays/src/JLArrays.jl +++ b/lib/JLArrays/src/JLArrays.jl @@ -264,11 +264,13 @@ function Base.getindex(A::JLSparseMatrixCSR{Tv, Ti}, i0::Integer, i1::Integer) w end GPUArrays.storage(a::JLArray) = a.data -# format-conversion verbs (cross-format only; the identity cases are handled generically -# by GPUArrays). these reuse the format constructors defined below. -GPUArrays.sparse_csc(A::Union{JLSparseMatrixCSR,JLSparseMatrixCOO}) = JLSparseMatrixCSC(A) -GPUArrays.sparse_csr(A::Union{JLSparseMatrixCSC,JLSparseMatrixCOO}) = JLSparseMatrixCSR(A) -GPUArrays.sparse_coo(A::Union{JLSparseMatrixCSC,JLSparseMatrixCSR}) = JLSparseMatrixCOO(A) +const JLSparseMatrix{Tv,Ti} = Union{JLSparseMatrixCSC{Tv,Ti},JLSparseMatrixCSR{Tv,Ti},JLSparseMatrixCOO{Tv,Ti}} +# format-conversion hooks: map any JLArrays sparse matrix to the *type* of its sibling +# format. Generic GPUArrays code converts with `coo_type(A)(A)` etc., routing through the +# format constructors defined below. +GPUArrays.csc_type(::Type{<:JLSparseMatrix}) = JLSparseMatrixCSC +GPUArrays.csr_type(::Type{<:JLSparseMatrix}) = JLSparseMatrixCSR +GPUArrays.coo_type(::Type{<:JLSparseMatrix}) = JLSparseMatrixCOO Base.similar(Mat::JLSparseMatrixCSR) = JLSparseMatrixCSR(copy(Mat.rowPtr), copy(Mat.colVal), similar(nonzeros(Mat)), size(Mat)) Base.similar(Mat::JLSparseMatrixCSR, T::Type) = JLSparseMatrixCSR(copy(Mat.rowPtr), copy(Mat.colVal), similar(nonzeros(Mat), T), size(Mat)) @@ -493,8 +495,8 @@ JLSparseMatrixCOO(xs::JLArray{T,2}) where {T} = GPUArrays.to_sparse(JLSparseMatrixCOO{T,Int}, xs) # Cross-format device→device conversions (CSC↔CSR↔COO): the back-end owns the constructors; # the on-device conversion algorithm is GPUArrays' generic `convert` (replacing the former -# host round-trips through `SparseMatrixCSC`). The `sparse_csc/csr/coo` verbs above route to -# these. The COO→CSR/CSC path needs `sortperm`, provided below. +# host round-trips through `SparseMatrixCSC`). The `csc_type/csr_type/coo_type` hooks above +# route to these. The COO→CSR/CSC path needs `sortperm`, provided below. JLSparseMatrixCOO(A::Union{JLSparseMatrixCSC,JLSparseMatrixCSR}) = convert(JLSparseMatrixCOO, A) JLSparseMatrixCSR(A::Union{JLSparseMatrixCSC,JLSparseMatrixCOO}) = diff --git a/src/host/sparse.jl b/src/host/sparse.jl index 7c381b0d8..c3bf5d4b6 100644 --- a/src/host/sparse.jl +++ b/src/host/sparse.jl @@ -79,7 +79,7 @@ end Build a sparse array of type `ST` from the dense GPU array `A`, on the device (no host round-trip); the inverse of [`to_dense`](@ref). `ST` must be a concrete `AbstractGPUSparseVector` or `AbstractGPUSparseMatrixCOO` (other matrix formats follow by -converting the resulting COO with `sparse_csr`/`sparse_csc`). The target format is required +converting the resulting COO with `csr_type`/`csc_type`). The target format is required because the result layout would otherwise be ambiguous; `to_dense` needs no such argument. """ function to_sparse(::Type{ST}, A::AnyGPUVector) where {Tv,Ti,ST<:AbstractGPUSparseVector{Tv,Ti}} @@ -97,8 +97,8 @@ function to_sparse(::Type{ST}, A::AnyGPUVector) where {Tv,Ti,ST<:AbstractGPUSpar end # Densifying a dense array naturally produces coordinate (COO) format, so this builds a -# COO directly. Other formats are obtained by converting the COO with `sparse_csr`/ -# `sparse_csc` (which back-ends already provide as constructors), avoiding any need to map +# COO directly. Other formats are obtained by converting the COO with `csr_type`/ +# `csc_type` (which back-ends already provide as constructors), avoiding any need to map # a target type to its COO sibling. function to_sparse(::Type{ST}, A::AnyGPUMatrix) where {Tv,Ti,ST<:AbstractGPUSparseMatrixCOO{Tv,Ti}} check_sparse_target(ST, Tv, Ti) @@ -147,7 +147,7 @@ function to_dense(x::AbstractGPUSparseVector{Tv}) where {Tv} end function to_dense(A::AbstractGPUSparseMatrix{Tv}) where {Tv} - coo = sparse_coo(A) + coo = coo_type(A)(A) out = fill!(similar(nonzeros(A), Tv, size(A)), zero(Tv)) if nnz(coo) > 0 densify_matrix_kernel!(get_backend(out))(out, coo.rowInd, coo.colInd, nonzeros(coo); @@ -195,13 +195,17 @@ end sparse_op_size(A, trans::Bool) = trans ? reverse(size(A)) : size(A) -# Format-conversion verbs: converting to the format already held is the identity. The -# cross-format conversions are `convert` (below). A back-end supplies the 3 verb methods -# naming its concrete sibling types — `sparse_coo(A::MyCSC) = convert(MyCOO, A)` — which is -# also where its `MyCOO(::MyCSC)` constructors route. -sparse_csc(A::AbstractGPUSparseMatrixCSC) = A -sparse_csr(A::AbstractGPUSparseMatrixCSR) = A -sparse_coo(A::AbstractGPUSparseMatrixCOO) = A +# Format-conversion hooks: a back-end maps any of its sparse-matrix types to the *type* of +# its sibling format (`coo_type(::Type{<:MyCSC}) = MyCOO`); generic code then converts with +# `coo_type(A)(A)`. This is a type-level hook rather than plain `convert(Dest, A)` because the +# format is the wrapper's identity (distinct structs/fields), not a type parameter — so unlike +# an eltype change there is no generic wrapper→sibling-wrapper operation, and only the back-end +# knows its sibling types. The cross-format `convert` algorithm below is the engine those +# constructors route through; the identity case (`coo_type(coo)(coo)`) is the back-end's +# identity constructor. +coo_type(a) = coo_type(typeof(a)) +csr_type(a) = csr_type(typeof(a)) +csc_type(a) = csc_type(typeof(a)) ## Generic format conversions (CSC↔CSR↔COO), on-device. # @@ -362,11 +366,11 @@ function Base.convert(::Type{ST}, A::AbstractGPUSparseMatrixCOO) where {ST<:Abst return ST(colPtr, rowVal, nzVal, size(A)) end -# CSC ↔ CSR route through COO (the back-end's verb yields its concrete COO type). +# CSC ↔ CSR route through COO (the back-end's hook yields its concrete COO type). Base.convert(::Type{ST}, A::AbstractGPUSparseMatrixCSC) where {ST<:AbstractGPUSparseMatrixCSR} = - convert(ST, sparse_coo(A)) + convert(ST, coo_type(A)(A)) Base.convert(::Type{ST}, A::AbstractGPUSparseMatrixCSR) where {ST<:AbstractGPUSparseMatrixCSC} = - convert(ST, sparse_coo(A)) + convert(ST, coo_type(A)(A)) @inline function sparse_atomic_add!(A::AbstractArray{<:Real}, parts, I::Integer, x) @inbounds Atomix.@atomic A[Int(I)] += x @@ -587,7 +591,7 @@ function LinearAlgebra.generic_matvecmul!(y::AnyGPUVector{Ty}, tA::AbstractChar, y === x && throw(ArgumentError("output vector must not be aliased with input vector")) isempty(y) && return y - return spmv_coo!(y, sparse_coo(A), x, alpha, beta, trans, conjval) + return spmv_coo!(y, coo_type(A)(A), x, alpha, beta, trans, conjval) end LinearAlgebra.generic_matmatmul!(C::AnyGPUMatrix{Tc}, tA, tB, @@ -617,7 +621,7 @@ function LinearAlgebra.generic_matmatmul!(C::AnyGPUMatrix{Tc}, tA, tB, C === B && throw(ArgumentError("output matrix must not be aliased with input matrix")) isempty(C) && return C - return spmm_coo!(C, sparse_coo(A), Bop, alpha, beta, trans, conjval) + return spmm_coo!(C, coo_type(A)(A), Bop, alpha, beta, trans, conjval) end LinearAlgebra.generic_matmatmul!(C::AnyGPUMatrix{Tc}, tA, tB, @@ -647,7 +651,7 @@ function LinearAlgebra.generic_matmatmul!(C::AnyGPUMatrix{Tc}, tA, tB, C === A && throw(ArgumentError("output matrix must not be aliased with input matrix")) isempty(C) && return C - return dense_spmm_coo!(C, Aop, sparse_coo(B), alpha, beta, trans, conjval) + return dense_spmm_coo!(C, Aop, coo_type(B)(B), alpha, beta, trans, conjval) end function LinearAlgebra.opnorm(A::AbstractGPUSparseMatrixCSR, p::Real=2) @@ -660,7 +664,7 @@ function LinearAlgebra.opnorm(A::AbstractGPUSparseMatrixCSR, p::Real=2) end end -LinearAlgebra.opnorm(A::AbstractGPUSparseMatrixCSC, p::Real=2) = opnorm(sparse_csr(A), p) +LinearAlgebra.opnorm(A::AbstractGPUSparseMatrixCSC, p::Real=2) = opnorm(csr_type(A)(A), p) function LinearAlgebra.norm(A::AbstractGPUSparseMatrix{T}, p::Real=2) where T if p == Inf @@ -675,7 +679,7 @@ function LinearAlgebra.norm(A::AbstractGPUSparseMatrix{T}, p::Real=2) where T end function SparseArrays.findnz(S::MT) where {MT <: AbstractGPUSparseMatrix} - S2 = sparse_coo(S) + S2 = coo_type(S)(S) I = S2.rowInd J = S2.colInd V = S2.nzVal @@ -715,61 +719,61 @@ end for SparseMatrixType in [:AbstractGPUSparseMatrixCSC, :AbstractGPUSparseMatrixCSR] @eval begin LinearAlgebra.triu(A::ST, k::Integer) where {T, ST<:$SparseMatrixType{T}} = - ST( triu(sparse_coo(A), k) ) + ST( triu(coo_type(A)(A), k) ) LinearAlgebra.triu(A::Transpose{T,<:ST}, k::Integer) where {T, ST<:$SparseMatrixType{T}} = - ST( triu(sparse_coo(_sptranspose(parent(A))), k) ) + ST( triu(coo_type(parent(A))(_sptranspose(parent(A))), k) ) LinearAlgebra.triu(A::Adjoint{T,<:ST}, k::Integer) where {T, ST<:$SparseMatrixType{T}} = - ST( triu(sparse_coo(_spadjoint(parent(A))), k) ) + ST( triu(coo_type(parent(A))(_spadjoint(parent(A))), k) ) LinearAlgebra.tril(A::ST, k::Integer) where {T, ST<:$SparseMatrixType{T}} = - ST( tril(sparse_coo(A), k) ) + ST( tril(coo_type(A)(A), k) ) LinearAlgebra.tril(A::Transpose{T,<:ST}, k::Integer) where {T, ST<:$SparseMatrixType{T}} = - ST( tril(sparse_coo(_sptranspose(parent(A))), k) ) + ST( tril(coo_type(parent(A))(_sptranspose(parent(A))), k) ) LinearAlgebra.tril(A::Adjoint{T,<:ST}, k::Integer) where {T, ST<:$SparseMatrixType{T}} = - ST( tril(sparse_coo(_spadjoint(parent(A))), k) ) + ST( tril(coo_type(parent(A))(_spadjoint(parent(A))), k) ) LinearAlgebra.triu(A::Union{ST, Transpose{T,<:ST}, Adjoint{T,<:ST}}) where {T, ST<:$SparseMatrixType{T}} = - ST( triu(sparse_coo(A), 0) ) + ST( triu(coo_type(A)(A), 0) ) LinearAlgebra.tril(A::Union{ST,Transpose{T,<:ST}, Adjoint{T,<:ST}}) where {T, ST<:$SparseMatrixType{T}} = - ST( tril(sparse_coo(A), 0) ) + ST( tril(coo_type(A)(A), 0) ) LinearAlgebra.kron(A::ST, B::ST) where {T, ST<:$SparseMatrixType{T}} = - ST( kron(sparse_coo(A), sparse_coo(B)) ) + ST( kron(coo_type(A)(A), coo_type(B)(B)) ) LinearAlgebra.kron(A::ST, B::Diagonal) where {T, ST<:$SparseMatrixType{T}} = - ST( kron(sparse_coo(A), B) ) + ST( kron(coo_type(A)(A), B) ) LinearAlgebra.kron(A::Diagonal, B::ST) where {T, ST<:$SparseMatrixType{T}} = - ST( kron(A, sparse_coo(B)) ) + ST( kron(A, coo_type(B)(B)) ) LinearAlgebra.kron(A::Transpose{T,<:ST}, B::ST) where {T, ST<:$SparseMatrixType{T}} = - ST( kron(sparse_coo(_sptranspose(parent(A))), sparse_coo(B)) ) + ST( kron(coo_type(parent(A))(_sptranspose(parent(A))), coo_type(B)(B)) ) LinearAlgebra.kron(A::ST, B::Transpose{T,<:ST}) where {T, ST<:$SparseMatrixType{T}} = - ST( kron(sparse_coo(A), sparse_coo(_sptranspose(parent(B)))) ) + ST( kron(coo_type(A)(A), coo_type(parent(B))(_sptranspose(parent(B)))) ) LinearAlgebra.kron(A::Transpose{T,<:ST}, B::Transpose{T,<:ST}) where {T, ST<:$SparseMatrixType{T}} = - ST( kron(sparse_coo(_sptranspose(parent(A))), sparse_coo(_sptranspose(parent(B)))) ) + ST( kron(coo_type(parent(A))(_sptranspose(parent(A))), coo_type(parent(B))(_sptranspose(parent(B)))) ) LinearAlgebra.kron(A::Transpose{T,<:ST}, B::Diagonal) where {T, ST<:$SparseMatrixType{T}} = - ST( kron(sparse_coo(_sptranspose(parent(A))), B) ) + ST( kron(coo_type(parent(A))(_sptranspose(parent(A))), B) ) LinearAlgebra.kron(A::Diagonal, B::Transpose{T,<:ST}) where {T, ST<:$SparseMatrixType{T}} = - ST( kron(A, sparse_coo(_sptranspose(parent(B)))) ) + ST( kron(A, coo_type(parent(B))(_sptranspose(parent(B)))) ) LinearAlgebra.kron(A::Adjoint{T,<:ST}, B::ST) where {T, ST<:$SparseMatrixType{T}} = - ST( kron(sparse_coo(_spadjoint(parent(A))), sparse_coo(B)) ) + ST( kron(coo_type(parent(A))(_spadjoint(parent(A))), coo_type(B)(B)) ) LinearAlgebra.kron(A::ST, B::Adjoint{T,<:ST}) where {T, ST<:$SparseMatrixType{T}} = - ST( kron(sparse_coo(A), sparse_coo(_spadjoint(parent(B)))) ) + ST( kron(coo_type(A)(A), coo_type(parent(B))(_spadjoint(parent(B)))) ) LinearAlgebra.kron(A::Adjoint{T,<:ST}, B::Adjoint{T,<:ST}) where {T, ST<:$SparseMatrixType{T}} = - ST( kron(sparse_coo(_spadjoint(parent(A))), sparse_coo(_spadjoint(parent(B)))) ) + ST( kron(coo_type(parent(A))(_spadjoint(parent(A))), coo_type(parent(B))(_spadjoint(parent(B)))) ) LinearAlgebra.kron(A::Adjoint{T,<:ST}, B::Diagonal) where {T, ST<:$SparseMatrixType{T}} = - ST( kron(sparse_coo(_spadjoint(parent(A))), B) ) + ST( kron(coo_type(parent(A))(_spadjoint(parent(A))), B) ) LinearAlgebra.kron(A::Diagonal, B::Adjoint{T,<:ST}) where {T, ST<:$SparseMatrixType{T}} = - ST( kron(A, sparse_coo(_spadjoint(parent(B)))) ) + ST( kron(A, coo_type(parent(B))(_spadjoint(parent(B)))) ) function Base.reshape(A::ST, dims::Dims) where {ST<:$SparseMatrixType} - B = sparse_coo(A) + B = coo_type(A)(A) ST(reshape(B, dims)) end function SparseArrays.droptol!(A::ST, tol::Real) where {ST<:$SparseMatrixType} - B = sparse_coo(A) + B = coo_type(A)(A) droptol!(B, tol) copyto!(A, ST(B)) end @@ -1585,9 +1589,9 @@ function Base.mapreduce(f, op, A::AbstractGPUSparseMatrix; dims=:, init=nothing) in(dims, [Colon(), 1, 2]) || error("only dims=:, dims=1 or dims=2 is supported") if A isa AbstractGPUSparseMatrixCSR && dims == 1 - A = sparse_csc(A) + A = csc_type(A)(A) elseif A isa AbstractGPUSparseMatrixCSC && dims == 2 - A = sparse_csr(A) + A = csr_type(A)(A) end m, n = size(A) val_array = nonzeros(A) From 5115577d7e5c59b590596472e4f708f3ce56dba3 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Tue, 23 Jun 2026 23:22:48 +0200 Subject: [PATCH 15/15] Only run the matmul accumulation test on GPU sparse formats Co-Authored-By: Claude Opus 4.8 (1M context) --- test/testsuite/sparse.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test/testsuite/sparse.jl b/test/testsuite/sparse.jl index 823e4e925..f54ffbf85 100644 --- a/test/testsuite/sparse.jl +++ b/test/testsuite/sparse.jl @@ -8,13 +8,13 @@ broadcasting_vector(sparse_AT, AT, eltypes) iszero_vector(sparse_AT, eltypes) elseif sparse_AT <: AbstractSparseMatrix - # `matmul`/`matmul_accumulation` run for every matrix format (incl. COO, which - # natively supports matmul). The rest of the matrix battery is CSC/CSR-only: - # COO lacks the generic operations they exercise — broadcast (used by - # `iszero_matrix` and several construction paths), dims-reductions, `opnorm`, - # and `issymmetric`/`similar`-with-shape. + # `matmul` runs for every format (incl. COO). The matrix battery below is + # CSC/CSR-only: COO lacks the broadcast/reductions/`opnorm`/`issymmetric` it uses. matmul(sparse_AT, AT, eltypes) - matmul_accumulation(sparse_AT, AT, eltypes) + # Float32 accumulation is a GPU-kernel contract; stdlib `Array` accumulates + # narrowly, so assert it only for GPU formats. + sparse_AT <: GPUArrays.AbstractGPUSparseMatrix && + matmul_accumulation(sparse_AT, AT, eltypes) if sparse_AT <: Union{GPUArrays.AbstractGPUSparseMatrixCSC, GPUArrays.AbstractGPUSparseMatrixCSR} matrix(sparse_AT, eltypes)