pygfx · almarklein · Jun 23, 2026 · Apr 9, 2026 · Apr 10, 2026 · Apr 10, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -17,6 +17,21 @@ Possible sections in each release:
 * Security: in case of vulnerabilities.
 
 
+## [unreleased]
+
+### Changed:
+* update wgpu-native to [29.0.0.0](https://github.com/gfx-rs/wgpu-native/blob/v29.0.0.0/CHANGELOG.md#changed) by @Vipitis in https://github.com/pygfx/wgpu-py/pull/805
+  * updates the `webgpu.h` header to a more recent version
+  * Updated features to be in spec: `shader-primitive-index` -> `primitive-index`
+  * push-constants are now closer to spec and called `immediates`:
+    * required feature `push-constants` -> `immediates`
+    * required limit `max-push-constants-size` -> `max-immediate-size`
+    * no more `push_constant_layouts` in create_pipeline_layout, just `immediate_size`
+    * `set_push_constants` -> `set_immediates` (removed stages parameter)
+    * in wgsl `var<push_constant>` -> `var<immediate>`
+
+
+
 ## [v0.31.0] - 02-03-2026
 
 * Update to wgpu-native v27.0.4.0 by @almarklein in https://github.com/pygfx/wgpu-py/pull/787

diff --git a/codegen/hparser.py b/codegen/hparser.py
@@ -1,39 +1,88 @@
+import os
 from cffi import FFI
+from cffi.cparser import _preprocess as cffi_preprocess
 
 from codegen.utils import print, remove_c_comments
-from codegen.files import read_file
 
+from wgpu._coreutils import get_header_filename
 
 _parser = None
 
 
-def _get_wgpu_header():
+def _get_wgpu_header(*filenames):
     """Func written so we can use this in both wgpu_native/_ffi.py and codegen/hparser.py"""
-    # Read files
-    lines1 = []
-    lines1.extend(read_file("resources", "webgpu.h").splitlines())
-    lines1.extend(read_file("resources", "wgpu.h").splitlines())
-    # Deal with pre-processor commands, because cffi cannot handle them.
-    # Just removing them, plus a few extra lines, seems to do the trick.
-    lines2 = []
-    for line in lines1:
-        if line.startswith("#define ") and len(line.split()) > 2 and "0x" in line:
-            line = line.replace("(", "").replace(")", "")
-        elif line.startswith("#"):
-            continue
-        elif 'extern "C"' in line:
-            continue
-        for define_to_drop in [
-            "WGPU_EXPORT ",
-            "WGPU_NULLABLE ",
-            " WGPU_OBJECT_ATTRIBUTE",
-            " WGPU_ENUM_ATTRIBUTE",
-            " WGPU_FUNCTION_ATTRIBUTE",
-            " WGPU_STRUCTURE_ATTRIBUTE",
-        ]:
-            line = line.replace(define_to_drop, "")
-        lines2.append(line)
-    return "\n".join(lines2)
+
+    cleaned_contents = []  # one for each filename
+
+    for filename in filenames:
+        # Read the code
+        with open(filename, "rb") as f:
+            source = f.read().decode().replace("\r\n", "\n").replace("\\\n", "")
+
+        # Use a cffi preprocessor to e.g. remove comments, then remove empty lines.
+        # Yeah, that's a private func from cffi, but this is not runtime code so we can fix it when it changes.
+        lines1 = []
+        cleaned_source, macros = cffi_preprocess(source)
+        lines1 += [f"#define {k} {v}" for k, v in macros.items()]  # restore macros
+        lines1 += [line.strip() for line in cleaned_source.splitlines() if line.strip()]
+
+        # Deal with pre-processor commands, because cffi cannot handle them.
+        # Just removing them, plus a few extra lines, seems to do the trick.
+        lines2 = [f"// Cleaned version of {os.path.basename(filename)} ".ljust(79, "-")]
+        in_ifdef = False
+        for line in lines1:
+            # skip #ifdef blocks, which cffi doesn't support. In both headers they are used for `#ifdef __cplusplus` which we were skipping anyway.
+            if line.startswith("#ifdef "):
+                in_ifdef = True
+                continue
+            if in_ifdef and line.startswith("#endif"):
+                in_ifdef = False
+                continue
+            if in_ifdef:
+                continue
+            if (
+                line.startswith("#define ")
+                and len(line.split()) > 2
+                and ("0x" in line or "_MAX" in line)
+            ):
+                # pattern to find: #define WGPU_CONSTANT (0x1234)
+                # we use ffi.sizeof() to hopefully get the correct max sizes per platform
+                # we don't have ffi in this namespace, so I just put the hardcoded values for now, we could use ctypes.sizeof(ctypes.c_size_t)
+                max_size = hex((1 << (8 * 8)) - 1)  # sizeof(size_t)
+                max_32 = hex((1 << (4 * 8)) - 1)  # sizeof(uint32_t)
+                max_64 = hex((1 << (8 * 8)) - 1)  # sizeof(uint64_t)
+                line = (
+                    line.replace("SIZE_MAX", max_size)
+                    .replace("UINT32_MAX", max_32)
+                    .replace("UINT64_MAX", max_64)
+                )
+                # cffi seems to struggle with these macros, so we can just skip them I hope, the idl spec already contains defaults.
+                if line.startswith("#define") and "_INIT" in line:
+                    continue
+                line = line.replace("(", "").replace(")", "")
+            elif line.startswith("#"):
+                continue
+            for define_to_drop in [
+                "WGPU_EXPORT ",
+                "WGPU_NULLABLE ",
+                " WGPU_OBJECT_ATTRIBUTE",
+                " WGPU_ENUM_ATTRIBUTE",
+                " WGPU_FUNCTION_ATTRIBUTE",
+                " WGPU_STRUCTURE_ATTRIBUTE",
+            ]:
+                line = line.replace(define_to_drop, "")
+            lines2.append(line)
+
+        lines2.append("")
+        cleaned_contents.append("\n".join(lines2))
+
+    # Write the combined source, which is what will be loaded at wgpu import time
+    combined_source = "\n\n".join(cleaned_contents)
+    combined_header_file = get_header_filename("combined_header.h")
+    with open(combined_header_file, "wb") as f:
+        f.write(combined_source.encode())
+
+    return combined_source
 
 
 def get_h_parser(*, allow_cache=True):
@@ -44,7 +93,9 @@ def get_h_parser(*, allow_cache=True):
     if _parser and allow_cache:
         return _parser
 
-    source = _get_wgpu_header()
+    source = _get_wgpu_header(
+        get_header_filename("webgpu.h"), get_header_filename("wgpu.h")
+    )
 
     # Create parser
     hp = HParser(source)
@@ -74,6 +125,7 @@ def parse(self, verbose=True):
             stats = ", ".join(f"{len(getattr(self, key))} {key}" for key in keys)
             print("webgpu.h/wgpu.h define " + stats)
 
+    # NOTE: we could use pycparser as it's used by cffi anyway and we have that.
     def _parse_from_h(self):
         code = self.source
 
@@ -91,7 +143,7 @@ def _parse_from_h(self):
             # Decompose "typedef enum XX {...} XX;"
             name1 = code[i1 + 13 : i2].strip()
             name2 = code[i3 + 1 : i4].strip()
-            assert name1 == name2
+            assert name1 == name2, f"mismatch in enum name: {name1} vs {name2}"
             assert name1.startswith("WGPU")
             name = name1[4:]
             self.enums[name] = enum = {}

diff --git a/codegen/wgpu_native_patcher.py b/codegen/wgpu_native_patcher.py
@@ -79,20 +79,32 @@ def write_mappings():
 
     # Create enummap, which allows the wgpu-native backend to resolve enum field names
     # to the corresponding integer value.
+    # We only include enums and enum fields that are defined in webgpu.h, so any
+    # code written against wgpu-py is maximally compatible.
     enummap = {}
     for name in idl.enums:
         hname = name_map.get(name, name)
+        native_name = "Native" + hname
         if hname not in hp.enums:
-            print(f"Enum {hname} missing in webgpu.h/wgpu.h")
+            print(f"Enum {hname} missing in webgpu.h")
             continue
         hp_enum = {key.lower(): val for key, val in hp.enums[hname].items()}
+        # When a value is present *also* in wgpu.h, it overrides the value, whereby we
+        # assume that's the value wgpu-native will most likely accept. This was introduced
+        # to cover 16-norm TextureFormats, ref https://github.com/pygfx/wgpu-py/pull/805.
+        if native_name in hp.enums:
+            for key, val in hp.enums[native_name].items():
+                key = key.lower()
+                if key in hp_enum:
+                    print(f"Enum field {name}.{key} overridden by wgpu.h")
+                    hp_enum[key] = val
         for ikey in idl.enums[name].values():
             hkey = ikey.lower().replace("-", "")
             hkey = name_map.get(f"{name}.{hkey}") or hkey
             if hkey in hp_enum:
                 enummap[name + "." + ikey] = hp_enum[hkey]
             else:
-                print(f"Enum field {name}.{ikey} missing in webgpu.h/wgpu.h")
+                print(f"Enum field {name}.{ikey} missing in webgpu.h")
 
     # Write enummap
     pylines.append(f"# There are {len(enummap)} enum mappings\n")

diff --git a/docs/backends.rst b/docs/backends.rst
@@ -61,100 +61,73 @@ The wgpu_native backend provides a few extra functionalities:
     :return: Device
     :rtype: wgpu.GPUDevice
 
-The wgpu_native backend provides support for push constants.
-Since WebGPU does not support this feature, documentation on its use is hard to find.
-A full explanation of push constants and its use in Vulkan can be found
-`here <https://vkguide.dev/docs/chapter-3/push_constants/>`_.
-Using push constants in WGPU closely follows the Vulkan model.
+The wgpu_native backend provides support for immediates.
+Immediates are not yet part of the WebGPU spec, but the headers for native webgpu have converged officially.
 
-The advantage of push constants is that they are typically faster to update than uniform buffers.
-Modifications to push constants are included in the command encoder; updating a uniform
-buffer involves sending a separate command to the GPU.
-The disadvantage of push constants is that their size limit is much smaller. The limit
-is guaranteed to be at least 128 bytes, and 256 bytes is typical.
+Immediates offer a way to set send a small amount of data to the GPU in the command encoder directly, no need for uniform buffer uploads.
+They are restricted to rather small sizes, usually 128 or 265 bytes.
 
-Given an adapter, first determine if it supports push constants::
+Given an adapter, first determine if it supports immediates::
 
-    >> "push-constants" in adapter.features
+    >> "immediates" in adapter.features
     True
 
-If push constants are supported, determine the maximum number of bytes that can
-be allocated for push constants::
+If immediates are supported, determine the maximum number of bytes that can
+be allocated for immediates::
 
-    >> adapter.limits["max-push-constant-size"]
+    >> adapter.limits["max-immediate-size"]
     256
 
-You must tell the adapter to create a device that supports push constants,
-and you must tell it the number of bytes of push constants that you are using.
+You must tell the adapter to create a device that supports immediates,
+and you must tell it the number of bytes of immediates that you are using.
 Overestimating is okay::
 
     device = adapter.request_device_sync(
-        required_features=["push-constants"],
-        required_limits={"max-push-constant-size": 256},
+        required_features=["immediates"],
+        required_limits={"max-immediate-size": 256},
     )
 
-Creating a push constant in your shader code is similar to the way you would create
+Creating a immediate data struct in your shader code is similar to the way you would create
 a uniform buffer.
-The fields that are only used in the ``@vertex`` shader should be separated from the fields
-that are only used in the ``@fragment`` shader which should be separated from the fields
-used in both shaders::
+The same data can be accessed across all shader stages: vertex, fragment and compute::
 
-    struct PushConstants {
-        // vertex shader
+    struct Immediates {
         vertex_transform: vec4x4f,
-        // fragment shader
-        fragment_transform: vec4x4f,
-        // used in both
-        generic_transform: vec4x4f,
+        fragment_color: vec4f,
+        pick_position: vec2f,
+        frame_counter: u32,
     }
-    var<push_constant> push_constants: PushConstants;
+    var<immediate> immediate_data: Immediates;
 
 To the pipeline layout for this shader, use
 ``wgpu.backends.wpgu_native.create_pipeline_layout`` instead of
-``device.create_pipelinelayout``.  It takes an additional argument,
-``push_constant_layouts``, describing
-the layout of the push constants.  For example, in the above example::
+``device.create_pipeline_layout``.  It takes an additional argument,
+``immediate_size`` simply the number of bytes of immediate data you are using.
 
-    push_constant_layouts = [
-        {"visibility": ShaderState.VERTEX, "start": 0, "end": 64},
-        {"visibility": ShaderStage.FRAGMENT, "start": 64, "end": 128},
-        {"visibility": ShaderState.VERTEX + ShaderStage.FRAGMENT , "start": 128, "end": 192},
-    ],
+Finally, you set the value of the immediates by using
+``wgpu.backends.wpgu_native.set_immediates``::
 
-Finally, you set the value of the push constant by using
-``wgpu.backends.wpgu_native.set_push_constants``::
+    set_immediates(pass_encoder, offset=0, size_in_bytes=64, data=<64 bytes>, data_offset=0)
 
-    set_push_constants(this_pass, ShaderStage.VERTEX, 0, 64, <64 bytes>)
-    set_push_constants(this_pass, ShaderStage.FRAGMENT, 64, 128, <64 bytes>)
-    set_push_constants(this_pass, ShaderStage.VERTEX + ShaderStage.FRAGMENT, 128, 192, <64 bytes>)
-
-Bytes must be set separately for each of the three shader stages.  If the push constant has
-already been set, on the next use you only need to call ``set_push_constants`` on those
-bytes you wish to change.
-
-.. py:function:: wgpu.backends.wpgu_native.create_pipeline_layout(device, *, label="", bind_group_layouts, push_constant_layouts=[])
+.. py:function:: wgpu.backends.wpgu_native.create_pipeline_layout(device, *, label="", bind_group_layouts, immediate_size=0)
 
    This method provides the same functionality as :func:`wgpu.GPUDevice.create_pipeline_layout`,
-   but provides an extra `push_constant_layouts` argument.
-   When using push constants, this argument is a list of dictionaries, where each item
-   in the dictionary has three fields: `visibility`, `start`, and `end`.
+   but provides an extra `immediate_size` argument.
+   When using immediates, this argument is the number of bytes of immediate data you are using.
 
     :param device: The device on which we are creating the pipeline layout
     :param label: An optional label
-    :param bind_group_layouts:
-    :param push_constant_layouts: Described above.
+    :param bind_group_layouts: 
+    :param immediate_size: number of bytes for immediates data.
 
-.. py:function:: wgpu.backends.wgpu_native.set_push_constants(render_pass_encoder, visibility, offset, size_in_bytes, data, data_offset=0)
+.. py:function:: wgpu.backends.wgpu_native.set_immediates(render_pass_encoder,offset, size_in_bytes, data, data_offset=0)
 
-    This function requires that the underlying GPU implement `push_constants`.
-    These push constants are a buffer of bytes available to the `fragment` and `vertex`
-    shaders. They are similar to a bound buffer, but the buffer is set using this
-    function call.
+    This function requires that the underlying GPU implement `immediates`.
+    These immediates are a buffer of bytes available to all shader stages.
 
-    :param render_pass_encoder: The render pass encoder to which we are pushing constants.
-    :param visibility: The stages (vertex, fragment, or both) to which these constants are visible
-    :param offset: The offset into the push constants at which the bytes are to be written
-    :param size_in_bytes: The number of bytes to copy from the ata
+    :param render_pass_encoder: The render pass encoder to which we are providing immediates.
+    :param offset: The offset into the immediate data at which the bytes are to be written
+    :param size_in_bytes: The number of bytes to copy from the data
     :param data: The data to copy to the buffer
     :param data_offset: The starting offset in the data at which to begin copying.
 
@@ -349,6 +322,7 @@ Use like the following before the instance is created, which happens during requ
         print(a.summary)
 
 For additional usage examples look at `extras_dxc.py` and `extras_debug.py` in the examples directory.
+Limited documentation on instance extras can be found in `wgpu.h`.
 
 The js_webgpu backend
 ---------------------