From 8ee9ccf7cf27d04dc3ddf9b66aa9f36973fabd7f Mon Sep 17 00:00:00 2001 From: JianHong Zhao Date: Thu, 5 Mar 2026 13:21:32 +0800 Subject: [PATCH 1/6] fix(kompile): map linked NormalSym functions to monoItemFn(noBody) (#953) ## Summary - Generate `MonoItemKind::MonoItemFn(symbol(...), defId(...), noBody)` for linked `NormalSym` entries that are present in `function_symbols` but missing from `items`. - Keep function resolution on concrete linked symbols instead of falling back to `"** UNKNOWN FUNCTION **"` for bodyless linked functions. - Refresh affected integration/show fixtures and normalize panic symbol hash suffixes in fixture comparison to keep snapshot output stable. - Preserve readable `kmir show --statistics --leaves` annotations for `noBody` callees (`>> function`, `>> call span`, `>> message`) even after replacing `"** UNKNOWN FUNCTION **"` with concrete symbols. ## Context In multi-crate SMIR, linked external functions can exist in `function_symbols` but not in local `items` (no local body). Before this change, that gap could leave `lookupFunction(Ty)` without a generated `monoItemFn` equation. ### Red vs Green **RED (actual stuck shape in show fixtures):** ```k #setUpCalleeData( monoItemFn(... name: symbol("** UNKNOWN FUNCTION **"), id: defId(38), body: noBody), ... ) ``` This is the unresolved-function path. **GREEN (after fix):** ```k #setUpCalleeData( monoItemFn(... name: symbol("_ZN4core9panicking5panic17hE"), id: defId(38), body: noBody), ... ) ``` The same path now resolves to a concrete linked symbol instead of `"** UNKNOWN FUNCTION **"`. --------- Co-authored-by: automergerpr-permission-manager[bot] <190534181+automergerpr-permission-manager[bot]@users.noreply.github.com> --- kmir/src/kmir/kompile.py | 16 +- kmir/src/kmir/testing/fixtures.py | 18 +- kmir/src/kmir/utils.py | 12 +- ...t_lib::testing::test_add_in_range.expected | 2 +- .../blackbox_function_symbols.expected.json | 40 +-- ...box_function_symbols_reverse.expected.json | 40 +-- .../blackbox_functions.expected.json | 315 ++++++++++++++++++ ...nflict-fail.check_assume_conflict.expected | 2 +- .../show/interior-mut-fail.main.expected | 2 +- ...-length-test-fail.array_cast_test.expected | 2 +- ...c-args-fail.main.cli-stats-leaves.expected | 6 +- .../show/symbolic-args-fail.main.expected | 2 +- ...lic-structs-fail.eats_struct_args.expected | 2 +- .../test_offset_from-fail.testing.expected | 2 +- kmir/src/tests/unit/test_fixtures.py | 27 ++ 15 files changed, 427 insertions(+), 61 deletions(-) create mode 100644 kmir/src/tests/unit/test_fixtures.py diff --git a/kmir/src/kmir/kompile.py b/kmir/src/kmir/kompile.py index 67878e05e..c48fef906 100644 --- a/kmir/src/kmir/kompile.py +++ b/kmir/src/kmir/kompile.py @@ -521,13 +521,25 @@ def _functions(kmir: KMIR, smir_info: SMIRInfo) -> dict[int, KInner]: for ty in smir_info.function_symbols_reverse[item_name]: functions[ty] = parsed_item_kinner.args[1] - # Add intrinsic functions + # Add intrinsic functions and linked normal symbols that have no local body in `items`. + # Normal symbols must still map to `monoItemFn(..., noBody)` instead of falling back to UNKNOWN FUNCTION. for ty, sym in smir_info.function_symbols.items(): - if 'IntrinsicSym' in sym and ty not in functions: + if ty in functions: + continue + if 'IntrinsicSym' in sym: functions[ty] = KApply( 'IntrinsicFunction', [KApply('symbol(_)_LIB_Symbol_String', [stringToken(sym['IntrinsicSym'])])], ) + elif isinstance(sym.get('NormalSym'), str): + functions[ty] = KApply( + 'MonoItemKind::MonoItemFn', + ( + KApply('symbol(_)_LIB_Symbol_String', (stringToken(sym['NormalSym']),)), + KApply('defId(_)_BODY_DefId_Int', (intToken(ty),)), + KApply('noBody_BODY_MaybeBody', ()), + ), + ) return functions diff --git a/kmir/src/kmir/testing/fixtures.py b/kmir/src/kmir/testing/fixtures.py index 4ade1e06d..d42bf606d 100644 --- a/kmir/src/kmir/testing/fixtures.py +++ b/kmir/src/kmir/testing/fixtures.py @@ -1,5 +1,7 @@ from __future__ import annotations +import re +import sys from difflib import unified_diff from typing import TYPE_CHECKING @@ -13,24 +15,36 @@ from pytest import FixtureRequest, Parser -import sys - def pytest_configure(config) -> None: sys.setrecursionlimit(1000000) +def _normalize_symbol_hashes(text: str) -> str: + """Normalize rustc symbol hash suffixes that drift across builds/environments.""" + # Normalize mangled symbol hashes, including generic names with `$` and `.`. + # Keep trailing `E` when present; truncated variants may omit it. + text = re.sub(r'(_ZN[0-9A-Za-z_$.]+17h)[0-9a-fA-F]+E', r'\1E', text) + text = re.sub(r'(_ZN[0-9A-Za-z_$.]+17h)[0-9a-fA-F]+', r'\1', text) + # Normalize demangled hash suffixes (`...::h`). + text = re.sub(r'(::h)[0-9a-fA-F]{8,}', r'\1', text) + return text + + def assert_or_update_show_output( actual_text: str, expected_file: Path, *, update: bool, path_replacements: dict[str, str] | None = None ) -> None: if path_replacements: for old, new in path_replacements.items(): actual_text = actual_text.replace(old, new) + # Normalize rustc symbol hash suffixes that can drift across builds/environments. + actual_text = _normalize_symbol_hashes(actual_text) if update: expected_file.write_text(actual_text) else: assert expected_file.is_file() expected_text = expected_file.read_text() + expected_text = _normalize_symbol_hashes(expected_text) if actual_text != expected_text: diff = '\n'.join( unified_diff( diff --git a/kmir/src/kmir/utils.py b/kmir/src/kmir/utils.py index 935d37656..8feb00ca5 100644 --- a/kmir/src/kmir/utils.py +++ b/kmir/src/kmir/utils.py @@ -288,8 +288,8 @@ def _extract_alloc_id(operands: KInner) -> int | None: return None -def _annotate_unknown_function(k_cell: KInner, smir_info: SMIRInfo) -> list[str]: - """If the k cell is `#setUpCalleeData` for `** UNKNOWN FUNCTION **`, return annotation lines with decoded info.""" +def _annotate_nobody_function(k_cell: KInner, smir_info: SMIRInfo) -> list[str]: + """If the k cell is `#setUpCalleeData` for a `noBody` callee, return annotation lines with decoded info.""" from .alloc import Allocation, AllocId, AllocInfo, Memory from .linker import _demangle @@ -307,16 +307,14 @@ def _annotate_unknown_function(k_cell: KInner, smir_info: SMIRInfo) -> list[str] KApply( label=KLabel(name='MonoItemKind::MonoItemFn'), args=[ - KApply(args=[KToken(token=symbol_name)]), + KApply(args=[KToken()]), KApply(label=KLabel(name='defId(_)_BODY_DefId_Int'), args=[KToken(token=def_id_str)]), _, ], ), operands, KApply(label=KLabel(name='span'), args=[KToken(token=span_str)]), - ] if ( - symbol_name == '\"** UNKNOWN FUNCTION **\"' - ): + ]: def_id = int(def_id_str) span = int(span_str) case _: @@ -379,7 +377,7 @@ def render_leaf_k_cells(proof: APRProof, cterm_show: CTermShow, smir_info: SMIRI lines.extend(f' {k_line}' for k_line in k_lines) if smir_info is not None and k_cell is not None: - annotations = _annotate_unknown_function(k_cell, smir_info) + annotations = _annotate_nobody_function(k_cell, smir_info) lines.extend(annotations) if idx != len(leaves) - 1: diff --git a/kmir/src/tests/integration/data/crate-tests/single-lib/small_test_lib::testing::test_add_in_range.expected b/kmir/src/tests/integration/data/crate-tests/single-lib/small_test_lib::testing::test_add_in_range.expected index 36e7fa6a7..3cb67eec2 100644 --- a/kmir/src/tests/integration/data/crate-tests/single-lib/small_test_lib::testing::test_add_in_range.expected +++ b/kmir/src/tests/integration/data/crate-tests/single-lib/small_test_lib::testing::test_add_in_range.expected @@ -16,7 +16,7 @@ ┃ │ ┃ │ (6 steps) ┃ └─ 6 (stuck, leaf) -┃ #setUpCalleeData ( monoItemFn ( ... name: symbol ( "** UNKNOWN FUNCTION **" ) , +┃ #setUpCalleeData ( monoItemFn ( ... name: symbol ( "_ZN3std7process4exit17h ┃ ┗━━┓ subst: .Subst ┃ constraint: diff --git a/kmir/src/tests/integration/data/exec-smir/intrinsic/blackbox_function_symbols.expected.json b/kmir/src/tests/integration/data/exec-smir/intrinsic/blackbox_function_symbols.expected.json index 88db28bbd..3058a6679 100644 --- a/kmir/src/tests/integration/data/exec-smir/intrinsic/blackbox_function_symbols.expected.json +++ b/kmir/src/tests/integration/data/exec-smir/intrinsic/blackbox_function_symbols.expected.json @@ -1,69 +1,69 @@ { "-6": { - "NormalSym": "_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17hb524060ed0e83bbbE" + "NormalSym": "_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17hE" }, "-5": { - "NormalSym": "_ZN4core3ptr28drop_in_place$LT$$RF$u32$GT$17hb92e31f0aaa3d322E" + "NormalSym": "_ZN4core3ptr28drop_in_place$LT$$RF$u32$GT$17hE" }, "-4": { - "NormalSym": "_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h526a5c5d4d9d3202E" + "NormalSym": "_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17hE" }, "-3": { - "NormalSym": "_ZN42_$LT$$RF$T$u20$as$u20$core..fmt..Debug$GT$3fmt17h3a8781eea2f9ddb7E" + "NormalSym": "_ZN42_$LT$$RF$T$u20$as$u20$core..fmt..Debug$GT$3fmt17hE" }, "-2": { - "NormalSym": "_ZN3std2rt10lang_start17h17250425291430deE" + "NormalSym": "_ZN3std2rt10lang_start17hE" }, "-1": { - "NormalSym": "_ZN8blackbox4main17h56268fefa1135d9eE" + "NormalSym": "_ZN8blackbox4main17hE" }, "0": { - "NormalSym": "_ZN3std2rt19lang_start_internal17h018b8394ba015d86E" + "NormalSym": "_ZN3std2rt19lang_start_internal17hE" }, "13": { - "NormalSym": "_ZN3std3sys9backtrace28__rust_begin_short_backtrace17h3ac302c9481e885fE" + "NormalSym": "_ZN3std3sys9backtrace28__rust_begin_short_backtrace17hE" }, "14": { - "NormalSym": "_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17h8eaae5c69aab66e9E" + "NormalSym": "_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17hE" }, "19": { - "NormalSym": "_ZN4core3ops8function6FnOnce9call_once17haae505bd39892fd2E" + "NormalSym": "_ZN4core3ops8function6FnOnce9call_once17hE" }, "20": { "IntrinsicSym": "black_box" }, "21": { - "NormalSym": "_ZN4core3fmt3num50_$LT$impl$u20$core..fmt..Debug$u20$for$u20$u32$GT$3fmt17h99c61f0bde9a2afdE" + "NormalSym": "_ZN4core3fmt3num50_$LT$impl$u20$core..fmt..Debug$u20$for$u20$u32$GT$3fmt17hE" }, "27": { - "NormalSym": "_ZN4core3fmt3num53_$LT$impl$u20$core..fmt..LowerHex$u20$for$u20$u32$GT$3fmt17hb987357f13dc6cc8E" + "NormalSym": "_ZN4core3fmt3num53_$LT$impl$u20$core..fmt..LowerHex$u20$for$u20$u32$GT$3fmt17hE" }, "28": { - "NormalSym": "_ZN4core3fmt3num53_$LT$impl$u20$core..fmt..UpperHex$u20$for$u20$u32$GT$3fmt17h7baa47f3e5cbe44cE" + "NormalSym": "_ZN4core3fmt3num53_$LT$impl$u20$core..fmt..UpperHex$u20$for$u20$u32$GT$3fmt17hE" }, "29": { - "NormalSym": "_ZN4core3fmt3num3imp52_$LT$impl$u20$core..fmt..Display$u20$for$u20$u32$GT$3fmt17hec74c53b91325b16E" + "NormalSym": "_ZN4core3fmt3num3imp52_$LT$impl$u20$core..fmt..Display$u20$for$u20$u32$GT$3fmt17hE" }, "30": { - "NormalSym": "_ZN4core3ops8function6FnOnce9call_once17h2d8e0aae13049ae8E" + "NormalSym": "_ZN4core3ops8function6FnOnce9call_once17hE" }, "32": { - "NormalSym": "_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h5c121846c1652782E" + "NormalSym": "_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17hE" }, "35": { "IntrinsicSym": "black_box" }, "36": { - "NormalSym": "_ZN4core9panicking19assert_failed_inner17h1d286061ca0adfe7E" + "NormalSym": "_ZN4core9panicking19assert_failed_inner17hE" }, "43": { - "NormalSym": "_ZN8blackbox7add_one17h19d5f3b41fdf13daE" + "NormalSym": "_ZN8blackbox7add_one17hE" }, "44": { - "NormalSym": "_ZN4core4hint9black_box17h0bfc654765aa2ddfE" + "NormalSym": "_ZN4core4hint9black_box17hE" }, "45": { - "NormalSym": "_ZN4core9panicking13assert_failed17h9acd0d94a91ca0eaE" + "NormalSym": "_ZN4core9panicking13assert_failed17hE" }, "48": { "NoOpSym": "" diff --git a/kmir/src/tests/integration/data/exec-smir/intrinsic/blackbox_function_symbols_reverse.expected.json b/kmir/src/tests/integration/data/exec-smir/intrinsic/blackbox_function_symbols_reverse.expected.json index 4fcfd0647..05755f54f 100644 --- a/kmir/src/tests/integration/data/exec-smir/intrinsic/blackbox_function_symbols_reverse.expected.json +++ b/kmir/src/tests/integration/data/exec-smir/intrinsic/blackbox_function_symbols_reverse.expected.json @@ -1,62 +1,62 @@ { - "_ZN3std2rt10lang_start17h17250425291430deE": [ + "_ZN3std2rt10lang_start17hE": [ -2 ], - "_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h5c121846c1652782E": [ + "_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17hE": [ 32 ], - "_ZN3std2rt19lang_start_internal17h018b8394ba015d86E": [ + "_ZN3std2rt19lang_start_internal17hE": [ 0 ], - "_ZN3std3sys9backtrace28__rust_begin_short_backtrace17h3ac302c9481e885fE": [ + "_ZN3std3sys9backtrace28__rust_begin_short_backtrace17hE": [ 13 ], - "_ZN42_$LT$$RF$T$u20$as$u20$core..fmt..Debug$GT$3fmt17h3a8781eea2f9ddb7E": [ + "_ZN42_$LT$$RF$T$u20$as$u20$core..fmt..Debug$GT$3fmt17hE": [ -3 ], - "_ZN4core3fmt3num3imp52_$LT$impl$u20$core..fmt..Display$u20$for$u20$u32$GT$3fmt17hec74c53b91325b16E": [ + "_ZN4core3fmt3num3imp52_$LT$impl$u20$core..fmt..Display$u20$for$u20$u32$GT$3fmt17hE": [ 29 ], - "_ZN4core3fmt3num50_$LT$impl$u20$core..fmt..Debug$u20$for$u20$u32$GT$3fmt17h99c61f0bde9a2afdE": [ + "_ZN4core3fmt3num50_$LT$impl$u20$core..fmt..Debug$u20$for$u20$u32$GT$3fmt17hE": [ 21 ], - "_ZN4core3fmt3num53_$LT$impl$u20$core..fmt..LowerHex$u20$for$u20$u32$GT$3fmt17hb987357f13dc6cc8E": [ + "_ZN4core3fmt3num53_$LT$impl$u20$core..fmt..LowerHex$u20$for$u20$u32$GT$3fmt17hE": [ 27 ], - "_ZN4core3fmt3num53_$LT$impl$u20$core..fmt..UpperHex$u20$for$u20$u32$GT$3fmt17h7baa47f3e5cbe44cE": [ + "_ZN4core3fmt3num53_$LT$impl$u20$core..fmt..UpperHex$u20$for$u20$u32$GT$3fmt17hE": [ 28 ], - "_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17h526a5c5d4d9d3202E": [ + "_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17hE": [ -4 ], - "_ZN4core3ops8function6FnOnce9call_once17h2d8e0aae13049ae8E": [ + "_ZN4core3ops8function6FnOnce9call_once17hE": [ 30 ], - "_ZN4core3ops8function6FnOnce9call_once17haae505bd39892fd2E": [ + "_ZN4core3ops8function6FnOnce9call_once17hE": [ 19 ], - "_ZN4core3ptr28drop_in_place$LT$$RF$u32$GT$17hb92e31f0aaa3d322E": [ + "_ZN4core3ptr28drop_in_place$LT$$RF$u32$GT$17hE": [ -5 ], - "_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17hb524060ed0e83bbbE": [ + "_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17hE": [ -6 ], - "_ZN4core4hint9black_box17h0bfc654765aa2ddfE": [ + "_ZN4core4hint9black_box17hE": [ 44 ], - "_ZN4core9panicking13assert_failed17h9acd0d94a91ca0eaE": [ + "_ZN4core9panicking13assert_failed17hE": [ 45 ], - "_ZN4core9panicking19assert_failed_inner17h1d286061ca0adfe7E": [ + "_ZN4core9panicking19assert_failed_inner17hE": [ 36 ], - "_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17h8eaae5c69aab66e9E": [ + "_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17hE": [ 14 ], - "_ZN8blackbox4main17h56268fefa1135d9eE": [ + "_ZN8blackbox4main17hE": [ -1 ], - "_ZN8blackbox7add_one17h19d5f3b41fdf13daE": [ + "_ZN8blackbox7add_one17hE": [ 43 ], "black_box": [ diff --git a/kmir/src/tests/integration/data/exec-smir/intrinsic/blackbox_functions.expected.json b/kmir/src/tests/integration/data/exec-smir/intrinsic/blackbox_functions.expected.json index 8c767e34e..029619a88 100644 --- a/kmir/src/tests/integration/data/exec-smir/intrinsic/blackbox_functions.expected.json +++ b/kmir/src/tests/integration/data/exec-smir/intrinsic/blackbox_functions.expected.json @@ -13569,6 +13569,69 @@ "node": "KApply", "variable": false }, + "0": { + "args": [ + { + "args": [ + { + "node": "KToken", + "sort": { + "name": "String", + "node": "KSort" + }, + "token": "\"_ZN3std2rt19lang_start_internal17hE\"" + } + ], + "arity": 1, + "label": { + "name": "symbol(_)_LIB_Symbol_String", + "node": "KLabel", + "params": [] + }, + "node": "KApply", + "variable": false + }, + { + "args": [ + { + "node": "KToken", + "sort": { + "name": "Int", + "node": "KSort" + }, + "token": "0" + } + ], + "arity": 1, + "label": { + "name": "defId(_)_BODY_DefId_Int", + "node": "KLabel", + "params": [] + }, + "node": "KApply", + "variable": false + }, + { + "args": [], + "arity": 0, + "label": { + "name": "noBody_BODY_MaybeBody", + "node": "KLabel", + "params": [] + }, + "node": "KApply", + "variable": false + } + ], + "arity": 3, + "label": { + "name": "MonoItemKind::MonoItemFn", + "node": "KLabel", + "params": [] + }, + "node": "KApply", + "variable": false + }, "13": { "args": [ { @@ -22521,6 +22584,195 @@ "node": "KApply", "variable": false }, + "27": { + "args": [ + { + "args": [ + { + "node": "KToken", + "sort": { + "name": "String", + "node": "KSort" + }, + "token": "\"_ZN4core3fmt3num53_$LT$impl$u20$core..fmt..LowerHex$u20$for$u20$u32$GT$3fmt17hE\"" + } + ], + "arity": 1, + "label": { + "name": "symbol(_)_LIB_Symbol_String", + "node": "KLabel", + "params": [] + }, + "node": "KApply", + "variable": false + }, + { + "args": [ + { + "node": "KToken", + "sort": { + "name": "Int", + "node": "KSort" + }, + "token": "27" + } + ], + "arity": 1, + "label": { + "name": "defId(_)_BODY_DefId_Int", + "node": "KLabel", + "params": [] + }, + "node": "KApply", + "variable": false + }, + { + "args": [], + "arity": 0, + "label": { + "name": "noBody_BODY_MaybeBody", + "node": "KLabel", + "params": [] + }, + "node": "KApply", + "variable": false + } + ], + "arity": 3, + "label": { + "name": "MonoItemKind::MonoItemFn", + "node": "KLabel", + "params": [] + }, + "node": "KApply", + "variable": false + }, + "28": { + "args": [ + { + "args": [ + { + "node": "KToken", + "sort": { + "name": "String", + "node": "KSort" + }, + "token": "\"_ZN4core3fmt3num53_$LT$impl$u20$core..fmt..UpperHex$u20$for$u20$u32$GT$3fmt17hE\"" + } + ], + "arity": 1, + "label": { + "name": "symbol(_)_LIB_Symbol_String", + "node": "KLabel", + "params": [] + }, + "node": "KApply", + "variable": false + }, + { + "args": [ + { + "node": "KToken", + "sort": { + "name": "Int", + "node": "KSort" + }, + "token": "28" + } + ], + "arity": 1, + "label": { + "name": "defId(_)_BODY_DefId_Int", + "node": "KLabel", + "params": [] + }, + "node": "KApply", + "variable": false + }, + { + "args": [], + "arity": 0, + "label": { + "name": "noBody_BODY_MaybeBody", + "node": "KLabel", + "params": [] + }, + "node": "KApply", + "variable": false + } + ], + "arity": 3, + "label": { + "name": "MonoItemKind::MonoItemFn", + "node": "KLabel", + "params": [] + }, + "node": "KApply", + "variable": false + }, + "29": { + "args": [ + { + "args": [ + { + "node": "KToken", + "sort": { + "name": "String", + "node": "KSort" + }, + "token": "\"_ZN4core3fmt3num3imp52_$LT$impl$u20$core..fmt..Display$u20$for$u20$u32$GT$3fmt17hE\"" + } + ], + "arity": 1, + "label": { + "name": "symbol(_)_LIB_Symbol_String", + "node": "KLabel", + "params": [] + }, + "node": "KApply", + "variable": false + }, + { + "args": [ + { + "node": "KToken", + "sort": { + "name": "Int", + "node": "KSort" + }, + "token": "29" + } + ], + "arity": 1, + "label": { + "name": "defId(_)_BODY_DefId_Int", + "node": "KLabel", + "params": [] + }, + "node": "KApply", + "variable": false + }, + { + "args": [], + "arity": 0, + "label": { + "name": "noBody_BODY_MaybeBody", + "node": "KLabel", + "params": [] + }, + "node": "KApply", + "variable": false + } + ], + "arity": 3, + "label": { + "name": "MonoItemKind::MonoItemFn", + "node": "KLabel", + "params": [] + }, + "node": "KApply", + "variable": false + }, "30": { "args": [ { @@ -27986,6 +28238,69 @@ "node": "KApply", "variable": false }, + "36": { + "args": [ + { + "args": [ + { + "node": "KToken", + "sort": { + "name": "String", + "node": "KSort" + }, + "token": "\"_ZN4core9panicking19assert_failed_inner17hE\"" + } + ], + "arity": 1, + "label": { + "name": "symbol(_)_LIB_Symbol_String", + "node": "KLabel", + "params": [] + }, + "node": "KApply", + "variable": false + }, + { + "args": [ + { + "node": "KToken", + "sort": { + "name": "Int", + "node": "KSort" + }, + "token": "36" + } + ], + "arity": 1, + "label": { + "name": "defId(_)_BODY_DefId_Int", + "node": "KLabel", + "params": [] + }, + "node": "KApply", + "variable": false + }, + { + "args": [], + "arity": 0, + "label": { + "name": "noBody_BODY_MaybeBody", + "node": "KLabel", + "params": [] + }, + "node": "KApply", + "variable": false + } + ], + "arity": 3, + "label": { + "name": "MonoItemKind::MonoItemFn", + "node": "KLabel", + "params": [] + }, + "node": "KApply", + "variable": false + }, "43": { "args": [ { diff --git a/kmir/src/tests/integration/data/prove-rs/show/assume-cheatcode-conflict-fail.check_assume_conflict.expected b/kmir/src/tests/integration/data/prove-rs/show/assume-cheatcode-conflict-fail.check_assume_conflict.expected index 700446f6e..470f41c25 100644 --- a/kmir/src/tests/integration/data/prove-rs/show/assume-cheatcode-conflict-fail.check_assume_conflict.expected +++ b/kmir/src/tests/integration/data/prove-rs/show/assume-cheatcode-conflict-fail.check_assume_conflict.expected @@ -5,7 +5,7 @@ │ │ (61 steps) └─ 3 (stuck, leaf) - #setUpCalleeData ( monoItemFn ( ... name: symbol ( "** UNKNOWN FUNCTION **" ) , + #setUpCalleeData ( monoItemFn ( ... name: symbol ( "_ZN4core9panicking5panic17h span: 32 diff --git a/kmir/src/tests/integration/data/prove-rs/show/interior-mut-fail.main.expected b/kmir/src/tests/integration/data/prove-rs/show/interior-mut-fail.main.expected index 44581b4f2..4c165cf72 100644 --- a/kmir/src/tests/integration/data/prove-rs/show/interior-mut-fail.main.expected +++ b/kmir/src/tests/integration/data/prove-rs/show/interior-mut-fail.main.expected @@ -5,7 +5,7 @@ │ │ (877 steps) └─ 3 (stuck, leaf) - #setUpCalleeData ( monoItemFn ( ... name: symbol ( "** UNKNOWN FUNCTION **" ) , + #setUpCalleeData ( monoItemFn ( ... name: symbol ( "_ZN4core4cell22panic_already span: 32 diff --git a/kmir/src/tests/integration/data/prove-rs/show/pointer-cast-length-test-fail.array_cast_test.expected b/kmir/src/tests/integration/data/prove-rs/show/pointer-cast-length-test-fail.array_cast_test.expected index 1a3c758c5..9198552ba 100644 --- a/kmir/src/tests/integration/data/prove-rs/show/pointer-cast-length-test-fail.array_cast_test.expected +++ b/kmir/src/tests/integration/data/prove-rs/show/pointer-cast-length-test-fail.array_cast_test.expected @@ -17,7 +17,7 @@ ┃ │ ┃ │ (6 steps) ┃ └─ 6 (stuck, leaf) -┃ #setUpCalleeData ( monoItemFn ( ... name: symbol ( "** UNKNOWN FUNCTION **" ) , +┃ #setUpCalleeData ( monoItemFn ( ... name: symbol ( "_ZN4core9panicking5panic17h ┃ span: 32 ┃ ┗━━┓ subst: .Subst diff --git a/kmir/src/tests/integration/data/prove-rs/show/symbolic-args-fail.main.cli-stats-leaves.expected b/kmir/src/tests/integration/data/prove-rs/show/symbolic-args-fail.main.cli-stats-leaves.expected index 84737597a..e4e10b74c 100644 --- a/kmir/src/tests/integration/data/prove-rs/show/symbolic-args-fail.main.cli-stats-leaves.expected +++ b/kmir/src/tests/integration/data/prove-rs/show/symbolic-args-fail.main.cli-stats-leaves.expected @@ -5,7 +5,7 @@ │ │ (566 steps) └─ 3 (stuck, leaf) - #setUpCalleeData ( monoItemFn ( ... name: symbol ( "** UNKNOWN FUNCTION **" ) , + #setUpCalleeData ( monoItemFn ( ... name: symbol ( "_ZN4core9panicking5panic17h span: no-location:0 @@ -32,7 +32,7 @@ Leaf paths from init: LEAF CELLS --------------- Node 3: - #setUpCalleeData ( monoItemFn ( ... name: symbol ( "** UNKNOWN FUNCTION **" ) , id: defId ( 38 ) , body: noBody ) , operandConstant ( constOperand ( ... span: span ( 32 ) , userTy: noUserTypeAnnotationIndex , const: mirConst ( ... kind: constantKindAllocated ( allocation ( ... bytes: b"\x00\x00\x00\x00\x00\x00\x00\x00\x17\x00\x00\x00\x00\x00\x00\x00" , provenance: provenanceMap ( ... ptrs: provenanceMapEntry ( ... offset: 0 , allocId: allocId ( 1 ) ) .ProvenanceMapEntries ) , align: align ( 8 ) , mutability: mutabilityMut ) ) , ty: ty ( 39 ) , id: mirConstId ( 25 ) ) ) ) .Operands , span ( 117 ) ) ~> .K - >> function: core::panicking::panic::h941160ead03e2d54 + #setUpCalleeData ( monoItemFn ( ... name: symbol ( "_ZN4core9panicking5panic17hE" ) , id: defId ( 38 ) , body: noBody ) , operandConstant ( constOperand ( ... span: span ( 32 ) , userTy: noUserTypeAnnotationIndex , const: mirConst ( ... kind: constantKindAllocated ( allocation ( ... bytes: b"\x00\x00\x00\x00\x00\x00\x00\x00\x17\x00\x00\x00\x00\x00\x00\x00" , provenance: provenanceMap ( ... ptrs: provenanceMapEntry ( ... offset: 0 , allocId: allocId ( 1 ) ) .ProvenanceMapEntries ) , align: align ( 8 ) , mutability: mutabilityMut ) ) , ty: ty ( 39 ) , id: mirConstId ( 25 ) ) ) ) .Operands , span ( 117 ) ) ~> .K + >> function: core::panicking::panic::h >> call span: /kmir/src/tests/integration/data/prove-rs/symbolic-args-fail.rs:53:5 >> message: 'assertion failed: false' \ No newline at end of file diff --git a/kmir/src/tests/integration/data/prove-rs/show/symbolic-args-fail.main.expected b/kmir/src/tests/integration/data/prove-rs/show/symbolic-args-fail.main.expected index ef7740af7..e5b630195 100644 --- a/kmir/src/tests/integration/data/prove-rs/show/symbolic-args-fail.main.expected +++ b/kmir/src/tests/integration/data/prove-rs/show/symbolic-args-fail.main.expected @@ -5,7 +5,7 @@ │ │ (566 steps) └─ 3 (stuck, leaf) - #setUpCalleeData ( monoItemFn ( ... name: symbol ( "** UNKNOWN FUNCTION **" ) , + #setUpCalleeData ( monoItemFn ( ... name: symbol ( "_ZN4core9panicking5panic17h span: 32 diff --git a/kmir/src/tests/integration/data/prove-rs/show/symbolic-structs-fail.eats_struct_args.expected b/kmir/src/tests/integration/data/prove-rs/show/symbolic-structs-fail.eats_struct_args.expected index a3abc238d..673eb0193 100644 --- a/kmir/src/tests/integration/data/prove-rs/show/symbolic-structs-fail.eats_struct_args.expected +++ b/kmir/src/tests/integration/data/prove-rs/show/symbolic-structs-fail.eats_struct_args.expected @@ -29,7 +29,7 @@ ┃ ┃ │ ┃ ┃ │ (6 steps) ┃ ┃ └─ 10 (stuck, leaf) -┃ ┃ #setUpCalleeData ( monoItemFn ( ... name: symbol ( "** UNKNOWN FUNCTION **" ) , +┃ ┃ #setUpCalleeData ( monoItemFn ( ... name: symbol ( "_ZN4core9panicking5panic17h ┃ ┃ span: 32 ┃ ┃ ┃ ┗━━┓ subst: .Subst diff --git a/kmir/src/tests/integration/data/prove-rs/show/test_offset_from-fail.testing.expected b/kmir/src/tests/integration/data/prove-rs/show/test_offset_from-fail.testing.expected index a0ac24cae..8fd816dd8 100644 --- a/kmir/src/tests/integration/data/prove-rs/show/test_offset_from-fail.testing.expected +++ b/kmir/src/tests/integration/data/prove-rs/show/test_offset_from-fail.testing.expected @@ -114,7 +114,7 @@ ┃ │ ┃ │ (70 steps) ┃ └─ 22 (stuck, leaf) - ┃ #setUpCalleeData ( monoItemFn ( ... name: symbol ( "** UNKNOWN FUNCTION **" ) , + ┃ #setUpCalleeData ( monoItemFn ( ... name: symbol ( "_ZN4core9panicking5panic17h ┃ span: 32 ┃ ┗━━┓ subst: .Subst diff --git a/kmir/src/tests/unit/test_fixtures.py b/kmir/src/tests/unit/test_fixtures.py new file mode 100644 index 000000000..b9a3de5b5 --- /dev/null +++ b/kmir/src/tests/unit/test_fixtures.py @@ -0,0 +1,27 @@ +from __future__ import annotations + +import pytest + +from kmir.testing.fixtures import _normalize_symbol_hashes + + +@pytest.mark.parametrize( + ('raw', 'expected'), + [ + ( + '_ZN4core3fmt3num53_$LT$impl$u20$core..fmt..LowerHex$u20$for$u20$u32$GT$3fmt17hb987357f13dc6cc8E', + '_ZN4core3fmt3num53_$LT$impl$u20$core..fmt..LowerHex$u20$for$u20$u32$GT$3fmt17hE', + ), + ( + '_ZN4core3fmt3num53_$LT$impl$u20$core..fmt..LowerHex$u20$for$u20$u32$GT$3fmt17hb987357f13dc6cc8', + '_ZN4core3fmt3num53_$LT$impl$u20$core..fmt..LowerHex$u20$for$u20$u32$GT$3fmt17h', + ), + ( + 'core::panicking::panic::h941160ead03e2d54', + 'core::panicking::panic::h', + ), + ], + ids=['generic-mangled-full', 'generic-mangled-truncated', 'demangled'], +) +def test_normalize_symbol_hashes(raw: str, expected: str) -> None: + assert _normalize_symbol_hashes(raw) == expected From 871e1554cdf03670c96ffdb551dc270fdac82626 Mon Sep 17 00:00:00 2001 From: JianHong Zhao Date: Thu, 5 Mar 2026 16:41:06 +0800 Subject: [PATCH 2/6] fix(rt): generalize direct-tag enum decoding to any variant count and discriminant (#955) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Replace the specialized \`#decodeOptionTag01\` rule (which only handled exactly 2 variants with discriminants 0/1 and the Option-like 0+1 field pattern) with a general \`#decodeEnumDirect\` decoder that handles: - Any number of variants (not just two) - Any discriminant values (not just 0 and 1) - Any unsigned integer tag width (u8, u16, etc.) - Any number of fields per variant New K helper functions (all total with `owise` fallbacks): - `#decodeEnumDirectFields`: given variant index, decode its fields using per-variant offsets - `#decodeEnumDirectTag`: read tag bytes as unsigned little-endian integer - `#nthTys`: index into per-variant field type lists - `#nthVariantOffsets`: extract field offsets from nth variant layout ## Context Before this change, direct-tag enum decoding was specialized to `discriminants: 0, 1` and single-byte tag checks (modeled after `Option`). Any enum with different discriminant values or more than two variants would produce `UnableToDecode`. ## Changes - `kmir/src/kmir/kdist/mir-semantics/rt/decoding.md`: replace `#decodeOptionTag01` with general `#decodeEnumDirect` rules - `kmir/src/tests/integration/data/prove-rs/enum-direct-tag-decode.rs`: add Rust test covering repr(u8) 0/1, repr(u8) 2/5, repr(u16) 0/256, and 3-variant enums - `kmir/src/tests/integration/test_integration.py`: register the new prove-rs test ## Testing - `make build` ✅ - `uv --directory kmir run pytest kmir/src/tests/integration/test_integration.py -k 'enum-direct-tag-decode' -vv` ✅ --------- Co-authored-by: Daniel Cumming <124537596+dkcumming@users.noreply.github.com> --- .../kmir/kdist/mir-semantics/rt/decoding.md | 84 +++++++++++++------ .../data/prove-rs/enum-direct-tag-decode.rs | 72 ++++++++++++++++ 2 files changed, 132 insertions(+), 24 deletions(-) create mode 100644 kmir/src/tests/integration/data/prove-rs/enum-direct-tag-decode.rs diff --git a/kmir/src/kmir/kdist/mir-semantics/rt/decoding.md b/kmir/src/kmir/kdist/mir-semantics/rt/decoding.md index 0ac8cff08..98b522012 100644 --- a/kmir/src/kmir/kdist/mir-semantics/rt/decoding.md +++ b/kmir/src/kmir/kdist/mir-semantics/rt/decoding.md @@ -297,27 +297,26 @@ If there are no fields, the enum can be decoded by using their data as the discr requires TAG =/=Int DISCRIMINANT ``` -#### Enums with two variants +#### Enums with direct tag encoding and fields -Having two variants with possibly zero or one field each is a very common case, -it includes a number of standard library types such as `Option` and `Result`. - -The following rules are specialised to the case of encoding an `Option`. -An important distinction here is whether or not the tag is niche-encoded. -If the option holds data that has all-zero as a possible value, a separate tag is used, usually as the first field. -In both cases we expect the tag to be in the single shared field, and the discriminant to be just 0 and 1. +Decodes any enum with `tagEncodingDirect` and a `primitiveInt` tag, +with arbitrary variant counts, discriminant values, and field counts per variant. +The tag is read as an unsigned little-endian integer, matched against the discriminant +list to find the variant index, and then the variant's fields are decoded using the +per-variant layout offsets. ```k + // General entry rule: direct-tag enum with at least one field somewhere. rule #decodeValue( BYTES , typeInfoEnumType(... name: _ , adtDef: _ - , discriminants: discriminant(0) discriminant(1) .Discriminants - , fields: (.Tys : (FIELD_TYPE .Tys) : .Tyss) + , discriminants: DISCRIMINANTS + , fields: FIELD_TYPESS , layout: someLayoutShape(layoutShape(... - fields: fieldsShapeArbitrary(mk(... offsets: machineSize(0) .MachineSizes)) + fields: _FIELDS , variants: variantsShapeMultiple( mk(... @@ -329,7 +328,7 @@ In both cases we expect the tag to be in the single shared field, and the discri ) , tagEncoding: tagEncodingDirect , tagField: 0 - , variants: _VARIANTS + , variants: VARIANT_LAYOUTS ) ) , abi: _ABI @@ -338,22 +337,59 @@ In both cases we expect the tag to be in the single shared field, and the discri )) ) #as ENUM_TYPE ) - => #decodeOptionTag01(BYTES, TAG_WIDTH, FIELD_TYPE, ENUM_TYPE) - - syntax Evaluation ::= #decodeOptionTag01 ( Bytes , IntegerLength , Ty , TypeInfo ) [function, total] - // -------------------------------------------------------------------------------------- - rule #decodeOptionTag01(BYTES, _LEN, _TY, _ENUM_TYPE) - => Aggregate(variantIdx(0), .List) - requires 0 ==Int BYTES[0] // expect only 0 or 1 as tags, so higher bytes do not matter - [preserves-definedness] - rule #decodeOptionTag01(BYTES, LEN, TY, _ENUM_TYPE) - => Aggregate(variantIdx(1), ListItem(#decodeValue(substrBytes(BYTES, #byteLength(LEN), lengthBytes(BYTES)), lookupTy(TY)))) - requires 1 ==Int BYTES[0] // expect only 0 or 1 as tags, so higher bytes do not matter + => #decodeEnumDirectFields( + BYTES, + #findVariantIdx(#decodeEnumDirectTag(BYTES, TAG_WIDTH), DISCRIMINANTS), + FIELD_TYPESS, + VARIANT_LAYOUTS, + ENUM_TYPE + ) + requires notBool #noFields(FIELD_TYPESS) + + // --------------------------------------------------------------------------- + // #decodeEnumDirectFields: given the variant index, decode its fields + // --------------------------------------------------------------------------- + syntax Evaluation ::= #decodeEnumDirectFields ( Bytes , VariantIdx , Tyss , LayoutShapes , TypeInfo ) [function, total] + // -------------------------------------------------------------------------------------------------------------------------- + rule #decodeEnumDirectFields(BYTES, variantIdx(IDX), FIELD_TYPESS, VARIANT_LAYOUTS, _ENUM_TYPE) + => Aggregate( + variantIdx(IDX), + #decodeFieldsWithOffsets(BYTES, #nthTys(FIELD_TYPESS, IDX), #nthVariantOffsets(VARIANT_LAYOUTS, IDX)) + ) + requires 0 <=Int IDX [preserves-definedness] - rule #decodeOptionTag01(BYTES, _LEN, _TY, ENUM_TYPE) + + // Error cases: variant not found or other failure + rule #decodeEnumDirectFields(BYTES, _, _FIELD_TYPESS, _VARIANT_LAYOUTS, ENUM_TYPE) => UnableToDecode(BYTES, ENUM_TYPE) [owise] + // --------------------------------------------------------------------------- + // #decodeEnumDirectTag: read the tag bytes as an unsigned little-endian int + // --------------------------------------------------------------------------- + syntax Int ::= #decodeEnumDirectTag ( Bytes , IntegerLength ) [function, total] + rule #decodeEnumDirectTag(BYTES, TAG_WIDTH) + => Bytes2Int(substrBytes(BYTES, 0, #byteLength(TAG_WIDTH)), LE, Unsigned) + requires lengthBytes(BYTES) >=Int #byteLength(TAG_WIDTH) + [preserves-definedness] + rule #decodeEnumDirectTag(_, _) => -1 [owise] + + // --------------------------------------------------------------------------- + // List-indexing helpers + // --------------------------------------------------------------------------- + + // Index into a Tyss (list of per-variant field type lists) + syntax Tys ::= #nthTys ( Tyss , Int ) [function, total] + rule #nthTys(TYS : _REST, 0) => TYS + rule #nthTys(_TYS : REST, N) => #nthTys(REST, N -Int 1) requires N >Int 0 + rule #nthTys(_, _) => .Tys [owise] + + // Index into variant layouts and extract field offsets in one step (total) + syntax MachineSizes ::= #nthVariantOffsets ( LayoutShapes , Int ) [function, total] + rule #nthVariantOffsets(layoutShape(fieldsShapeArbitrary(mk(OFFSETS)), _, _, _, _) _REST, 0) => OFFSETS + rule #nthVariantOffsets(_L REST, N) => #nthVariantOffsets(REST, N -Int 1) requires N >Int 0 + rule #nthVariantOffsets(_, _) => .MachineSizes [owise] + syntax Int ::= #byteLength ( IntegerLength ) [function, total] // ----------------------------------------------------------- rule #byteLength(integerLengthI8 ) => 1 diff --git a/kmir/src/tests/integration/data/prove-rs/enum-direct-tag-decode.rs b/kmir/src/tests/integration/data/prove-rs/enum-direct-tag-decode.rs new file mode 100644 index 000000000..30eef03d1 --- /dev/null +++ b/kmir/src/tests/integration/data/prove-rs/enum-direct-tag-decode.rs @@ -0,0 +1,72 @@ +/// Comprehensive test for direct-tag enum decoding. +/// +/// Covers: +/// - `#[repr(u8)]` with discriminants 0/1 (baseline, like Option) +/// - `#[repr(u8)]` with discriminants 2/5 (non-zero start) +/// - `#[repr(u16)]` with discriminants 0/256 (wide tag exceeding u8 range) +/// - `#[repr(u8)]` with 3 variants (beyond two-variant restriction) + +// --- Baseline: repr(u8), disc 0 / 1 ------------------------------------------- + +#[repr(u8)] +enum TwoU8 { + A(bool) = 0, + B(bool) = 1, +} + +const TWO_U8_A: TwoU8 = TwoU8::A(true); +const TWO_U8_B: TwoU8 = TwoU8::B(false); + +// --- Non-zero discriminants: repr(u8), disc 2 / 5 ----------------------------- + +#[repr(u8)] +enum NonZero { + X(bool) = 2, + Y(bool) = 5, +} + +const NZ_X: NonZero = NonZero::X(false); +const NZ_Y: NonZero = NonZero::Y(true); + +// --- Wide tag: repr(u16), disc 0 / 256 ---------------------------------------- + +#[repr(u16)] +enum WideTag { + Lo(bool) = 0, + Hi(bool) = 256, +} + +const WIDE_LO: WideTag = WideTag::Lo(true); +const WIDE_HI: WideTag = WideTag::Hi(false); + +// --- Three variants: repr(u8), disc 0 / 1 / 2 --------------------------------- + +#[repr(u8)] +enum Three { + First(bool) = 0, + Second(bool) = 1, + Third(bool) = 2, +} + +const THREE_A: Three = Three::First(true); +const THREE_B: Three = Three::Second(false); +const THREE_C: Three = Three::Third(true); + +fn main() { + // Baseline + assert!(matches!(TWO_U8_A, TwoU8::A(true))); + assert!(matches!(TWO_U8_B, TwoU8::B(false))); + + // Non-zero discriminants + assert!(matches!(NZ_X, NonZero::X(false))); + assert!(matches!(NZ_Y, NonZero::Y(true))); + + // Wide tag + assert!(matches!(WIDE_LO, WideTag::Lo(true))); + assert!(matches!(WIDE_HI, WideTag::Hi(false))); + + // Three variants + assert!(matches!(THREE_A, Three::First(true))); + assert!(matches!(THREE_B, Three::Second(false))); + assert!(matches!(THREE_C, Three::Third(true))); +} From 022dafbb0a40cc99c5cd1f49d814313d8e3b577b Mon Sep 17 00:00:00 2001 From: JianHong Zhao Date: Fri, 6 Mar 2026 14:06:47 +0800 Subject: [PATCH 3/6] fix(rt): closure aggregate + #setTupleArgs fallback (#952) ## Summary Add three semantics fixes needed by the closure-call path, plus a regression test: - Add `aggregateKindClosure` reduction: - `ARGS:List ~> #mkAggregate(aggregateKindClosure(...)) => Aggregate(variantIdx(0), ARGS)` - Add `#setTupleArgs(IDX, VAL:Value) [owise]` fallback for unwrapped single-value arguments - Add typed-closure callee setup rule `setupCalleeClosure3` (with `isFunType` predicate) - Add `closure-no-capture.rs` regression test ## Context Stable MIR represents closure construction via [`Rvalue::Aggregate(AggregateKind::Closure(...))`](https://github.com/rust-lang/rust/blob/a2545fd6fc66b4323f555223a860c451885d1d2b/compiler/stable_mir/src/mir/body.rs#L633). The K semantics had rules for tuple/ADT aggregates, but no rule for `aggregateKindClosure`. For closure calls, argument placement goes through `#setTupleArgs`. Existing rules only handled tuple/list-shaped payloads. Also, once closure type metadata is present, closure env locals can be typed as `Ref`. Before this PR, closure setup rules only matched `VoidType`-based closure typing paths, so the typed closure path needed an explicit setup rule. ### Red vs Green for `aggregateKindClosure` **RED:** ``` ListItem(...) ~> #mkAggregate(aggregateKindClosure(...)) ``` No matching rule, execution is stuck. **GREEN:** `aggregateKindClosure` reduces to `Aggregate(variantIdx(0), ARGS)`, so closure aggregate construction proceeds. ### Red vs Green for `#setTupleArgs [owise]` After closure aggregate reduction is added, execution advances to the next blocker: **RED (next blocker):** ``` #setTupleArgs(2, Integer(41,8,false)) ``` No existing `#setTupleArgs` rule matches this plain `Value` shape. **GREEN:** The fallback ``` #setTupleArgs(IDX, VAL:Value) => #setLocalValue(..., #incrementRef(VAL)) [owise] ``` handles the unwrapped single-value case and allows argument setup to continue. ### Red vs Green for `setupCalleeClosure3` With typed closure metadata, closure env can appear as `Ref`. Existing closure setup rules were guarded for `VoidType` closure typing paths, so typed closure setup could miss those rules. **RED:** Typed closure env path does not satisfy the existing `VoidType` guard shape for closure setup, so closure-call setup does not reliably take the typed closure path. **GREEN:** `setupCalleeClosure3` adds the typed path explicitly: - guard uses `isFunType(#lookupMaybeTy(pointeeTy(...)))` - still performs the same tuple-arg unpacking via `#setTupleArgs(2, getValue(LOCALS, TUPLE))` This keeps closure-call argument placement consistent when closure env typing is known. ## Proof evidence **Without fix (RED):** `closure-no-capture.rs` gets stuck on closure aggregate, and after adding only the closure aggregate rule it gets stuck at `#setTupleArgs(2, Integer(41,8,false))`. **With fix (GREEN):** ``` test_prove_rs[closure-no-capture] PASSED ``` ## Test plan - [x] `test_prove_rs[closure-no-capture]` - [x] `make test-integration` regression --- kmir/src/kmir/kdist/mir-semantics/kmir.md | 5 +++++ kmir/src/kmir/kdist/mir-semantics/rt/data.md | 6 ++++++ .../tests/integration/data/prove-rs/closure-staged.rs | 11 +++++++++++ 3 files changed, 22 insertions(+) create mode 100644 kmir/src/tests/integration/data/prove-rs/closure-staged.rs diff --git a/kmir/src/kmir/kdist/mir-semantics/kmir.md b/kmir/src/kmir/kdist/mir-semantics/kmir.md index 5a2b48c65..b85ee0002 100644 --- a/kmir/src/kmir/kdist/mir-semantics/kmir.md +++ b/kmir/src/kmir/kdist/mir-semantics/kmir.md @@ -606,6 +606,11 @@ Therefore a heuristics is used here: // unpack tuple and set arguments individually rule #setTupleArgs(IDX, Aggregate(variantIdx(0), ARGS)) => #setTupleArgs(IDX, ARGS) ... + rule #setTupleArgs(IDX, VAL:Value) + => #setTupleArgs(IDX, ListItem(VAL)) + ... + [owise] + rule #setTupleArgs(_, .List ) => .K ... rule #setTupleArgs(IDX, ListItem(VAL) REST:List) diff --git a/kmir/src/kmir/kdist/mir-semantics/rt/data.md b/kmir/src/kmir/kdist/mir-semantics/rt/data.md index 840ae6095..bcce7438b 100644 --- a/kmir/src/kmir/kdist/mir-semantics/rt/data.md +++ b/kmir/src/kmir/kdist/mir-semantics/rt/data.md @@ -1051,6 +1051,12 @@ Literal arrays are also built using this RValue. ... + rule ARGS:List ~> #mkAggregate(aggregateKindClosure(_DEF, _TY_ARGS)) + => + Aggregate(variantIdx(0), ARGS) + ... + + // #readOperands accumulates a list of `TypedLocal` values from operands syntax KItem ::= #readOperands ( Operands ) diff --git a/kmir/src/tests/integration/data/prove-rs/closure-staged.rs b/kmir/src/tests/integration/data/prove-rs/closure-staged.rs new file mode 100644 index 000000000..98e256256 --- /dev/null +++ b/kmir/src/tests/integration/data/prove-rs/closure-staged.rs @@ -0,0 +1,11 @@ +fn apply u8>(f: F, v: u8) -> u8 { + f(v) +} + +fn main() { + let delta: u8 = 1u8; + let _captured = |x: u8| x + delta; + + let f = |x: u8| x + 1u8; + assert_eq!(apply(f, 41u8), 42u8); +} From caa5994e42589bc3aee4ed17794f9b6de187cd37 Mon Sep 17 00:00:00 2001 From: Maria Kotsifakou Date: Sat, 7 Mar 2026 12:50:43 -0600 Subject: [PATCH 4/6] fix(rt): bug fix for `castKindPtrToPtr` (#974) This PR addresses an issue that was first identified in solana-token's `test_validate_owner_multisig.rs` proof. `iter_next_3.rs` is a minimized test that reproduces the issue. ### Problem When executing Rust's slice iterator (`core::slice::Iter`), the `Iter::new` function creates two pointers: a start pointer and an end pointer. The `next` method compares these pointers for equality to determine whether iteration is complete. During `Iter::new`, the end pointer undergoes a `PtrToPtr` cast (e.g., from `*const [u8; 32]` to `*mut [u8; 32]`) that changes its initially correct metadata to one that causes the issue. - Initially correct metadata: `metadata(staticSize(32), 3, dynamicSize(3))` - Resulting metadata: `metadata(staticSize(32), 3, staticSize(32))` - Applied `convertMetadata` rule: https://github.com/runtimeverification/mir-semantics/blob/8b331c654d225b13b410970599e559e95947edc5/kmir/src/kmir/kdist/mir-semantics/rt/data.md?plain=1#L1508 This rule sets the origin to `staticSize(32)` (matched with `ORIGIN_SIZE`), discarding the previous correct `dynamicSize(3)`. The start pointer does not undergo this additional cast, and maintains its metadata: `metadata(staticSize(32), 3, dynamicSize(3))`. This causes the two pointers to end up with different metadata: `metadata(staticSize(32), 3, dynamicSize(3))` vs `metadata(staticSize(32), 3, staticSize(32))`. When `Iter::next` compares the pointers for equality at the end of iteration, the comparison evaluates to `false` instead of `true`, causing the proof to take the wrong branch (not-at-the-end) and eventually get stuck on an out-of-bounds access. ### Solution We add a `#cast` rule for `castKindPtrToPtr`, with higher priority, that matches when the source and target pointer types have identical pointee types. This rule preserves the source pointer metadata. This works because when the pointee types are identical, the pointer representation does not need to change. The only reason such a cast exists in MIR is a mutability change, which does not affect the pointer's runtime representation or metadata. When the pointee types are not identical, the rule does not match, and we fallback to the existing rule and handle the cast as before. For casts where both the pointee type and mutability differ (e.g., `*const T1` to `*mut T2`: `T1=/=T2`), I am not sure how MIR is generated for these, but either case is handled correctly: - If it is a single cast, the pointee types differ, so the new rule does not match and the fallback rule handles it as before. Mutability is not involved in type projections or equality checks, so it does not interfere. - If it is two separate casts (one for mutability, one for the pointee type), the new rule matches the mutability cast and the fallback rule handles the pointee type cast. ### Why mutability information was not added It would be possible to thread mutability information through the semantics (from the smir.json, which now includes mutability on `PtrType` and `RefType`, PR [here](https://github.com/runtimeverification/stable-mir-json/pull/127)) and add an explicit check that the cast is indeed a mutability-only change. I chose not to include it, with the following reasoning. The new mutability information would only be used to confirm that the source and target pointer types differ in mutability in the new `#cast` rule. Any other place where the new mutability information would be threaded through would be `_`. At the same time, if the source and target pointer types have identical pointee types, the only possible difference that would have led to a cast is a difference in mutability. Propagating the mutability in the semantics for this check alone seems to add overhead for no benefit, especially considering that mutability is not checked/enforced anywhere else. So, it seems sufficient to just rely on the fact that because the pointee type is the same, we do not need to change the metadata of the pointer, so we simply do not. Mutability does not affect that. --- kmir/src/kmir/kdist/mir-semantics/rt/data.md | 14 ++++++++++++++ .../integration/data/prove-rs/iter_next_3.rs | 19 +++++++++++++++++++ .../prove-rs/show/iter_next_3.main.expected | 17 +++++++++++++++++ .../src/tests/integration/test_integration.py | 1 + 4 files changed, 51 insertions(+) create mode 100644 kmir/src/tests/integration/data/prove-rs/iter_next_3.rs create mode 100644 kmir/src/tests/integration/data/prove-rs/show/iter_next_3.main.expected diff --git a/kmir/src/kmir/kdist/mir-semantics/rt/data.md b/kmir/src/kmir/kdist/mir-semantics/rt/data.md index bcce7438b..f76620160 100644 --- a/kmir/src/kmir/kdist/mir-semantics/rt/data.md +++ b/kmir/src/kmir/kdist/mir-semantics/rt/data.md @@ -1440,6 +1440,20 @@ which have the same representation `Value::Range`. Also, casts to and from _transparent wrappers_ (newtypes that just forward field `0`, i.e. `struct Wrapper(T)`) are allowed, and supported by a special projection `WrapStruct`. +When the source and target types are pointer types with the same pointee type (i.e., differing only in mutability), +the cast preserves the source pointer and its metadata unchanged. + +```k + rule #cast(PtrLocal(OFFSET, PLACE, MUT, META), castKindPtrToPtr, TY_SOURCE, TY_TARGET) + => PtrLocal(OFFSET, PLACE, MUT, META) + ... + + requires pointeeTy(lookupTy(TY_SOURCE)) ==K pointeeTy(lookupTy(TY_TARGET)) + [priority(45), preserves-definedness] // valid map lookups checked +``` + +Otherwise, compute the type projection and convert metadata accordingly. + ```k rule #cast(PtrLocal(OFFSET, place(LOCAL, PROJS), MUT, META), castKindPtrToPtr, TY_SOURCE, TY_TARGET) => diff --git a/kmir/src/tests/integration/data/prove-rs/iter_next_3.rs b/kmir/src/tests/integration/data/prove-rs/iter_next_3.rs new file mode 100644 index 000000000..260b11cae --- /dev/null +++ b/kmir/src/tests/integration/data/prove-rs/iter_next_3.rs @@ -0,0 +1,19 @@ +struct Wrapper { + tag: u8, + a: [[u8; 32]; 3], +} + +fn foo(c: &Wrapper) { + for elem in c.a.iter() { + assert!(elem[0] != 0); + } +} + +fn main() { + let c = Wrapper { + tag: 42, + a: [[1u8; 32], [2u8; 32], [3u8; 32]], + }; + + foo(&c); +} diff --git a/kmir/src/tests/integration/data/prove-rs/show/iter_next_3.main.expected b/kmir/src/tests/integration/data/prove-rs/show/iter_next_3.main.expected new file mode 100644 index 000000000..a4c5bdc3d --- /dev/null +++ b/kmir/src/tests/integration/data/prove-rs/show/iter_next_3.main.expected @@ -0,0 +1,17 @@ + +┌─ 1 (root, init) +│ #execTerminator ( terminator ( ... kind: terminatorKindCall ( ... func: operandC +│ span: 0 +│ +│ (2028 steps) +├─ 3 (terminal) +│ #EndProgram ~> .K +│ function: main +│ +┊ constraint: true +┊ subst: ... +└─ 2 (leaf, target, terminal) + #EndProgram ~> .K + + + diff --git a/kmir/src/tests/integration/test_integration.py b/kmir/src/tests/integration/test_integration.py index 5f36b155d..3014e4b04 100644 --- a/kmir/src/tests/integration/test_integration.py +++ b/kmir/src/tests/integration/test_integration.py @@ -43,6 +43,7 @@ 'local-raw-fail', 'interior-mut-fail', 'interior-mut3-fail', + 'iter_next_3', 'assert_eq_exp', 'bitwise-not-shift', 'symbolic-args-fail', From c5be6499cad62eee6b3cc87905709e7dbc73d051 Mon Sep 17 00:00:00 2001 From: JianHong Zhao Date: Mon, 9 Mar 2026 23:31:04 +0800 Subject: [PATCH 5/6] fix(rt): repair closure callee setup for iter-eq repro (#957) ## Summary - adds `iter-eq-copied-take-dereftruncate` as a regression - fixes `setupCalleeClosure2` by initializing callee `_1` with the closure env before tuple unpacking - removes temporary show tracking after the repro reaches `#EndProgram` ## Why This Fix - Red commit frontier: after `1947` steps the new repro stopped at: - `#traverseProjection(toLocal(1), thunk(operandCopy(place(... local: local(1) ...))))` - Root cause: `setupCalleeClosure2` reserved `NEWLOCALS` and unpacked the tuple argument into `_2..`, but never wrote the closure receiver/env into `_1`. The std closure body later dereferenced `local(1)`, so execution walked a bad projection path instead of the closure env. - Fix: write `#incrementRef(getValue(LOCALS, CLOSURE))` into `local(1)` before `#setTupleArgs(2, ...)`. - Effect: the same repro moves from the stuck `1947`-step leaf to `#EndProgram` after `5601` steps. The cleanup commit then deletes only the temporary show artifact. --- kmir/src/kmir/kdist/mir-semantics/kmir.md | 3 +- .../iter-eq-copied-take-dereftruncate.rs | 32 +++++++++++++++++++ .../src/tests/integration/test_integration.py | 1 + 3 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 kmir/src/tests/integration/data/prove-rs/iter-eq-copied-take-dereftruncate.rs diff --git a/kmir/src/kmir/kdist/mir-semantics/kmir.md b/kmir/src/kmir/kdist/mir-semantics/kmir.md index b85ee0002..ea3d2e418 100644 --- a/kmir/src/kmir/kdist/mir-semantics/kmir.md +++ b/kmir/src/kmir/kdist/mir-semantics/kmir.md @@ -562,7 +562,8 @@ Therefore a heuristics is used here: _SPAN ) => - #setTupleArgs(2, getValue(LOCALS, TUPLE)) ~> #execBlock(FIRST) + #setLocalValue(place(local(1), .ProjectionElems), #incrementRef(getValue(LOCALS, CLOSURE))) + ~> #setTupleArgs(2, getValue(LOCALS, TUPLE)) ~> #execBlock(FIRST) // arguments are tuple components, stored as _2 .. _n ... diff --git a/kmir/src/tests/integration/data/prove-rs/iter-eq-copied-take-dereftruncate.rs b/kmir/src/tests/integration/data/prove-rs/iter-eq-copied-take-dereftruncate.rs new file mode 100644 index 000000000..75e7c1b9e --- /dev/null +++ b/kmir/src/tests/integration/data/prove-rs/iter-eq-copied-take-dereftruncate.rs @@ -0,0 +1,32 @@ +fn main() { + let _keep: fn() -> bool = repro; +} + +#[inline(never)] +#[no_mangle] +pub fn repro() -> bool { + let k0 = Pubkey([1; 32]); + let k1 = Pubkey([2; 32]); + let k2 = Pubkey([3; 32]); + let n = 2usize; + + let accounts = [ + AccountInfo { key: &k0 }, + AccountInfo { key: &k1 }, + AccountInfo { key: &k2 }, + ]; + let signers = [k1, k2, k0]; + + accounts[1..] + .iter() + .map(|signer| *signer.key) + .eq(signers.iter().take(n).copied()) +} + +#[derive(Clone)] +struct AccountInfo<'a> { + key: &'a Pubkey, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +struct Pubkey([u8; 32]); diff --git a/kmir/src/tests/integration/test_integration.py b/kmir/src/tests/integration/test_integration.py index 3014e4b04..945002873 100644 --- a/kmir/src/tests/integration/test_integration.py +++ b/kmir/src/tests/integration/test_integration.py @@ -38,6 +38,7 @@ 'assume-cheatcode-conflict-fail': ['check_assume_conflict'], 'transmute-bytes': ['bytes_to_u64', 'u64_to_bytes'], 'test_offset_from-fail': ['testing'], + 'iter-eq-copied-take-dereftruncate': ['repro'], } PROVE_RS_SHOW_SPECS = [ 'local-raw-fail', From 133ebf35504bf9b012db6658db62c725e56cf7e1 Mon Sep 17 00:00:00 2001 From: Stevengre Date: Tue, 10 Mar 2026 11:29:14 +0800 Subject: [PATCH 6/6] test(integration): update spl-multisig show expectation --- .../spl-multisig-iter-eq-copied-next-fail.repro.expected | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kmir/src/tests/integration/data/prove-rs/show/spl-multisig-iter-eq-copied-next-fail.repro.expected b/kmir/src/tests/integration/data/prove-rs/show/spl-multisig-iter-eq-copied-next-fail.repro.expected index 378034e6e..645835cf8 100644 --- a/kmir/src/tests/integration/data/prove-rs/show/spl-multisig-iter-eq-copied-next-fail.repro.expected +++ b/kmir/src/tests/integration/data/prove-rs/show/spl-multisig-iter-eq-copied-next-fail.repro.expected @@ -3,10 +3,10 @@ │ #execTerminator ( terminator ( ... kind: terminatorKindCall ( ... func: operandC │ span: 0 │ -│ (1077 steps) +│ (1837 steps) └─ 3 (stuck, leaf) - ListItem ( thunk ( #decodeConstant ( constantKindZeroSized , ty ( 89 ) , typeInf - span: 274 + #execTerminator ( terminator ( ... kind: terminatorKindCall ( ... func: operandM + span: 149 ┌─ 2 (root, leaf, target, terminal)