diff --git a/src/device/intrinsics/synchronization.jl b/src/device/intrinsics/synchronization.jl index 25dc6c925..c7489a5b9 100644 --- a/src/device/intrinsics/synchronization.jl +++ b/src/device/intrinsics/synchronization.jl @@ -33,22 +33,24 @@ Possible values: end -""" +@device_function @inline threadgroup_barrier(flag=MemoryFlagNone) = + ccall("extern air.wg.barrier", llvmcall, Cvoid, (Cuint, Cuint, ), flag, UInt32(1)) + +@device_function @inline simdgroup_barrier(flag=MemoryFlagNone) = + ccall("extern air.simdgroup.barrier", llvmcall, Cvoid, (Cuint, Cuint, ), flag, UInt32(1)) + +@doc """ threadgroup_barrier(flag=MemoryFlagNone) Synchronize all threads in a threadgroup. Possible flags that affect the memory synchronization behavior are found in [`MemoryFlags`](@ref) -""" -@inline threadgroup_barrier(flag=MemoryFlagNone) = - ccall("extern air.wg.barrier", llvmcall, Cvoid, (Cuint, Cuint, ), flag, UInt32(1)) +""" threadgroup_barrier -""" +@doc """ simdgroup_barrier(flag=MemoryFlagNone) Synchronize all threads in a SIMD-group. Possible flags that affect the memory synchronization behavior are found in [`MemoryFlags`](@ref) -""" -@inline simdgroup_barrier(flag=MemoryFlagNone) = - ccall("extern air.simdgroup.barrier", llvmcall, Cvoid, (Cuint, Cuint, ), flag, UInt32(1)) +""" simdgroup_barrier diff --git a/src/device/intrinsics/version.jl b/src/device/intrinsics/version.jl index b2b9c0126..3a6154c7e 100644 --- a/src/device/intrinsics/version.jl +++ b/src/device/intrinsics/version.jl @@ -3,7 +3,7 @@ export metal_version, air_version, @sv_str for var in ["metal_major", "metal_minor", "air_major", "air_minor"] - @eval @inline $(Symbol(var))() = + @eval @device_function @inline $(Symbol(var))() = Base.llvmcall( $("""@$var = external global i32 define i32 @entry() #0 {