diff --git a/reference/shaders-msl-no-opt/asm/comp/volatile-phys-buf-load-no-forward.asm.msl24.comp b/reference/shaders-msl-no-opt/asm/comp/volatile-phys-buf-load-no-forward.asm.msl24.comp new file mode 100644 index 000000000..2a513c35a --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/comp/volatile-phys-buf-load-no-forward.asm.msl24.comp @@ -0,0 +1,19 @@ +#include +#include + +using namespace metal; + +struct Registers +{ + ulong addr; + ulong addr2; +}; + +kernel void main0(constant Registers& registers [[buffer(0)]]) +{ + device int* _21 = reinterpret_cast(registers.addr2); + int _22 = *(reinterpret_cast(registers.addr)); + *_21 = _22; + *(reinterpret_cast(reinterpret_cast(_21) + 4ul)) = _22; +} + diff --git a/reference/shaders-no-opt/asm/comp/volatile-phys-buf-load-no-forward.nocompat.vk.asm.comp.vk b/reference/shaders-no-opt/asm/comp/volatile-phys-buf-load-no-forward.nocompat.vk.asm.comp.vk new file mode 100644 index 000000000..ceb86a172 --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/volatile-phys-buf-load-no-forward.nocompat.vk.asm.comp.vk @@ -0,0 +1,30 @@ +#version 450 +#if defined(GL_ARB_gpu_shader_int64) +#extension GL_ARB_gpu_shader_int64 : require +#else +#error No extension available for 64-bit integers. +#endif +#extension GL_EXT_buffer_reference2 : require +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(buffer_reference) buffer intPointer; + +layout(buffer_reference, buffer_reference_align = 4) buffer intPointer +{ + int value; +}; + +layout(push_constant, std430) uniform Registers +{ + uint64_t addr; + uint64_t addr2; +} registers; + +void main() +{ + intPointer _21 = intPointer(registers.addr2); + int _22 = intPointer(registers.addr).value; + _21.value = _22; + intPointer(uint64_t(_21) + 4ul).value = _22; +} + diff --git a/shaders-msl-no-opt/asm/comp/volatile-phys-buf-load-no-forward.asm.msl24.comp b/shaders-msl-no-opt/asm/comp/volatile-phys-buf-load-no-forward.asm.msl24.comp new file mode 100644 index 000000000..7770ef68d --- /dev/null +++ b/shaders-msl-no-opt/asm/comp/volatile-phys-buf-load-no-forward.asm.msl24.comp @@ -0,0 +1,60 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 40 +; Schema: 0 + OpCapability Shader + OpCapability Int64 + OpCapability PhysicalStorageBufferAddresses + OpExtension "SPV_KHR_physical_storage_buffer" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel PhysicalStorageBuffer64 GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpName %main "main" + OpName %Registers "Registers" + OpMemberName %Registers 0 "addr" + OpMemberName %Registers 1 "addr2" + OpName %registers "registers" + OpMemberDecorate %Registers 0 Offset 0 + OpMemberDecorate %Registers 1 Offset 8 + OpDecorate %Registers Block + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 + %ulong = OpTypeInt 64 0 + %Registers = OpTypeStruct %ulong %ulong +%_ptr_PushConstant_Registers = OpTypePointer PushConstant %Registers + %registers = OpVariable %_ptr_PushConstant_Registers PushConstant + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 + %ulong_4 = OpConstant %ulong 4 +%_ptr_PushConstant_ulong = OpTypePointer PushConstant %ulong +%_ptr_PhysicalStorageBuffer_int = OpTypePointer PhysicalStorageBuffer %int + %main = OpFunction %void None %3 + %5 = OpLabel + + %pc0 = OpAccessChain %_ptr_PushConstant_ulong %registers %int_0 + %addr0 = OpLoad %ulong %pc0 + %src_p = OpConvertUToPtr %_ptr_PhysicalStorageBuffer_int %addr0 + + %pc1 = OpAccessChain %_ptr_PushConstant_ulong %registers %int_1 + %addr1 = OpLoad %ulong %pc1 + %dst_p = OpConvertUToPtr %_ptr_PhysicalStorageBuffer_int %addr1 + + ; Volatile load from src — must NOT be forwarded + %ld = OpLoad %int %src_p Volatile|Aligned 4 + + ; Store the loaded value into dst (first use) + OpStore %dst_p %ld Aligned 4 + + ; Compute dst + 1 element via integer arithmetic + %dst_u64 = OpConvertPtrToU %ulong %dst_p + %dst_plus4 = OpIAdd %ulong %dst_u64 %ulong_4 + %dst_p2 = OpConvertUToPtr %_ptr_PhysicalStorageBuffer_int %dst_plus4 + + ; Store the same loaded value again (second use) + OpStore %dst_p2 %ld Aligned 4 + + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/comp/volatile-phys-buf-load-no-forward.nocompat.vk.asm.comp b/shaders-no-opt/asm/comp/volatile-phys-buf-load-no-forward.nocompat.vk.asm.comp new file mode 100644 index 000000000..7770ef68d --- /dev/null +++ b/shaders-no-opt/asm/comp/volatile-phys-buf-load-no-forward.nocompat.vk.asm.comp @@ -0,0 +1,60 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 40 +; Schema: 0 + OpCapability Shader + OpCapability Int64 + OpCapability PhysicalStorageBufferAddresses + OpExtension "SPV_KHR_physical_storage_buffer" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel PhysicalStorageBuffer64 GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpName %main "main" + OpName %Registers "Registers" + OpMemberName %Registers 0 "addr" + OpMemberName %Registers 1 "addr2" + OpName %registers "registers" + OpMemberDecorate %Registers 0 Offset 0 + OpMemberDecorate %Registers 1 Offset 8 + OpDecorate %Registers Block + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 + %ulong = OpTypeInt 64 0 + %Registers = OpTypeStruct %ulong %ulong +%_ptr_PushConstant_Registers = OpTypePointer PushConstant %Registers + %registers = OpVariable %_ptr_PushConstant_Registers PushConstant + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 + %ulong_4 = OpConstant %ulong 4 +%_ptr_PushConstant_ulong = OpTypePointer PushConstant %ulong +%_ptr_PhysicalStorageBuffer_int = OpTypePointer PhysicalStorageBuffer %int + %main = OpFunction %void None %3 + %5 = OpLabel + + %pc0 = OpAccessChain %_ptr_PushConstant_ulong %registers %int_0 + %addr0 = OpLoad %ulong %pc0 + %src_p = OpConvertUToPtr %_ptr_PhysicalStorageBuffer_int %addr0 + + %pc1 = OpAccessChain %_ptr_PushConstant_ulong %registers %int_1 + %addr1 = OpLoad %ulong %pc1 + %dst_p = OpConvertUToPtr %_ptr_PhysicalStorageBuffer_int %addr1 + + ; Volatile load from src — must NOT be forwarded + %ld = OpLoad %int %src_p Volatile|Aligned 4 + + ; Store the loaded value into dst (first use) + OpStore %dst_p %ld Aligned 4 + + ; Compute dst + 1 element via integer arithmetic + %dst_u64 = OpConvertPtrToU %ulong %dst_p + %dst_plus4 = OpIAdd %ulong %dst_u64 %ulong_4 + %dst_p2 = OpConvertUToPtr %_ptr_PhysicalStorageBuffer_int %dst_plus4 + + ; Store the same loaded value again (second use) + OpStore %dst_p2 %ld Aligned 4 + + OpReturn + OpFunctionEnd diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index 8a3c21da6..aa8bb4ebc 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -12429,6 +12429,14 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) // If an expression is mutable and forwardable, we speculate that it is immutable. bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries); + // Volatile memory access requires the value be read exactly once from + // memory. Do not forward the expression so that re-evaluation at each + // use site cannot re-read potentially modified memory. + // FIXME: To force implementations to actually respect the volatile nature of the load, + // the block itself must be marked volatile, or VulkanMM is used to do an explicit volatile load. + if (forward && length >= 4 && (ops[3] & MemoryAccessVolatileMask) != 0) + forward = false; + // If loading a non-native row-major matrix, mark the expression as need_transpose. bool need_transpose = false; bool old_need_transpose = false;