Skip to content

Commit f3d1557

Browse files
authored
Revert "ZJIT: Allow ccalls above 7 arguments"
This reverts commit 2f151e7. The SP decrement (push) before the call do not match up with the pops after the call, so registers were restored incorrectly. Code from: ./miniruby --zjit-call-threshold=1 --zjit-dump-disasm -e 'p Time.new(1992, 9, 23, 23, 0, 0, :std)' str x11, [sp, #-0x10]! str x12, [sp, #-0x10]! stur x7, [sp] # last argument mov x0, x20 mov x7, x6 mov x6, x5 mov x5, x4 mov x4, x3 mov x3, x2 mov x2, x1 ldur x1, [x29, #-0x20] mov x16, #0xccfc movk x16, #0x2e7, lsl #16 movk x16, #1, lsl #32 blr x16 ldr x12, [sp], #0x10 # supposed to match str x12, [sp, #-0x10]!, but got last argument ldr x11, [sp], #0x10
1 parent 4fb537b commit f3d1557

File tree

5 files changed

+99
-99
lines changed

5 files changed

+99
-99
lines changed

zjit/src/asm/arm64/opnd.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,6 @@ pub const X2_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 2 };
9898
pub const X3_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 3 };
9999
pub const X4_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 4 };
100100
pub const X5_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 5 };
101-
pub const X6_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 6 };
102-
pub const X7_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 7 };
103101

104102
// caller-save registers
105103
pub const X9_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 9 };

zjit/src/backend/arm64/mod.rs

Lines changed: 65 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,13 @@ pub const EC: Opnd = Opnd::Reg(X20_REG);
2424
pub const SP: Opnd = Opnd::Reg(X21_REG);
2525

2626
// C argument registers on this platform
27-
pub const C_ARG_OPNDS: [Opnd; 8] = [
27+
pub const C_ARG_OPNDS: [Opnd; 6] = [
2828
Opnd::Reg(X0_REG),
2929
Opnd::Reg(X1_REG),
3030
Opnd::Reg(X2_REG),
3131
Opnd::Reg(X3_REG),
3232
Opnd::Reg(X4_REG),
33-
Opnd::Reg(X5_REG),
34-
Opnd::Reg(X6_REG),
35-
Opnd::Reg(X7_REG)
33+
Opnd::Reg(X5_REG)
3634
];
3735

3836
// C return value register on this platform
@@ -201,8 +199,6 @@ pub const ALLOC_REGS: &[Reg] = &[
201199
X3_REG,
202200
X4_REG,
203201
X5_REG,
204-
X6_REG,
205-
X7_REG,
206202
X11_REG,
207203
X12_REG,
208204
];
@@ -235,7 +231,7 @@ impl Assembler {
235231

236232
/// Get a list of all of the caller-saved registers
237233
pub fn get_caller_save_regs() -> Vec<Reg> {
238-
vec![X1_REG, X6_REG, X7_REG, X9_REG, X10_REG, X11_REG, X12_REG, X13_REG, X14_REG, X15_REG]
234+
vec![X1_REG, X9_REG, X10_REG, X11_REG, X12_REG, X13_REG, X14_REG, X15_REG]
239235
}
240236

241237
/// How many bytes a call and a [Self::frame_setup] would change native SP
@@ -492,13 +488,31 @@ impl Assembler {
492488
}
493489
*/
494490
Insn::CCall { opnds, .. } => {
495-
opnds.iter_mut().for_each(|opnd| {
496-
*opnd = match opnd {
497-
Opnd::UImm(0) | Opnd::Imm(0) => Opnd::UImm(0),
498-
Opnd::Mem(_) => split_memory_address(asm, *opnd),
499-
_ => *opnd
500-
};
501-
});
491+
assert!(opnds.len() <= C_ARG_OPNDS.len());
492+
493+
// Load each operand into the corresponding argument
494+
// register.
495+
// Note: the iteration order is reversed to avoid corrupting x0,
496+
// which is both the return value and first argument register
497+
if !opnds.is_empty() {
498+
let mut args: Vec<(Opnd, Opnd)> = vec![];
499+
for (idx, opnd) in opnds.iter_mut().enumerate().rev() {
500+
// If the value that we're sending is 0, then we can use
501+
// the zero register, so in this case we'll just send
502+
// a UImm of 0 along as the argument to the move.
503+
let value = match opnd {
504+
Opnd::UImm(0) | Opnd::Imm(0) => Opnd::UImm(0),
505+
Opnd::Mem(_) => split_memory_address(asm, *opnd),
506+
_ => *opnd
507+
};
508+
args.push((C_ARG_OPNDS[idx], value));
509+
}
510+
asm.parallel_mov(args);
511+
}
512+
513+
// Now we push the CCall without any arguments so that it
514+
// just performs the call.
515+
*opnds = vec![];
502516
asm.push_insn(insn);
503517
},
504518
Insn::Cmp { left, right } => {
@@ -1843,19 +1857,17 @@ mod tests {
18431857

18441858
assert_disasm_snapshot!(cb.disasm(), @r"
18451859
0x0: str x1, [sp, #-0x10]!
1846-
0x4: str x6, [sp, #-0x10]!
1847-
0x8: str x7, [sp, #-0x10]!
1848-
0xc: str x9, [sp, #-0x10]!
1849-
0x10: str x10, [sp, #-0x10]!
1850-
0x14: str x11, [sp, #-0x10]!
1851-
0x18: str x12, [sp, #-0x10]!
1852-
0x1c: str x13, [sp, #-0x10]!
1853-
0x20: str x14, [sp, #-0x10]!
1854-
0x24: str x15, [sp, #-0x10]!
1855-
0x28: mrs x16, nzcv
1856-
0x2c: str x16, [sp, #-0x10]!
1860+
0x4: str x9, [sp, #-0x10]!
1861+
0x8: str x10, [sp, #-0x10]!
1862+
0xc: str x11, [sp, #-0x10]!
1863+
0x10: str x12, [sp, #-0x10]!
1864+
0x14: str x13, [sp, #-0x10]!
1865+
0x18: str x14, [sp, #-0x10]!
1866+
0x1c: str x15, [sp, #-0x10]!
1867+
0x20: mrs x16, nzcv
1868+
0x24: str x16, [sp, #-0x10]!
18571869
");
1858-
assert_snapshot!(cb.hexdump(), @"e10f1ff8e60f1ff8e70f1ff8e90f1ff8ea0f1ff8eb0f1ff8ec0f1ff8ed0f1ff8ee0f1ff8ef0f1ff810423bd5f00f1ff8");
1870+
assert_snapshot!(cb.hexdump(), @"e10f1ff8e90f1ff8ea0f1ff8eb0f1ff8ec0f1ff8ed0f1ff8ee0f1ff8ef0f1ff810423bd5f00f1ff8");
18591871
}
18601872

18611873
#[test]
@@ -1875,11 +1887,9 @@ mod tests {
18751887
0x18: ldr x11, [sp], #0x10
18761888
0x1c: ldr x10, [sp], #0x10
18771889
0x20: ldr x9, [sp], #0x10
1878-
0x24: ldr x7, [sp], #0x10
1879-
0x28: ldr x6, [sp], #0x10
1880-
0x2c: ldr x1, [sp], #0x10
1890+
0x24: ldr x1, [sp], #0x10
18811891
");
1882-
assert_snapshot!(cb.hexdump(), @"10421bd5f00741f8ef0741f8ee0741f8ed0741f8ec0741f8eb0741f8ea0741f8e90741f8e70741f8e60741f8e10741f8");
1892+
assert_snapshot!(cb.hexdump(), @"10421bd5f00741f8ef0741f8ee0741f8ed0741f8ec0741f8eb0741f8ea0741f8e90741f8e10741f8");
18831893
}
18841894

18851895
#[test]
@@ -2648,14 +2658,14 @@ mod tests {
26482658
]);
26492659
asm.compile_with_num_regs(&mut cb, ALLOC_REGS.len());
26502660

2651-
assert_disasm_snapshot!(cb.disasm(), @r"
2652-
0x0: mov x15, x1
2653-
0x4: mov x1, x0
2654-
0x8: mov x0, x15
2655-
0xc: mov x16, #0
2656-
0x10: blr x16
2661+
assert_disasm_snapshot!(cb.disasm(), @"
2662+
0x0: mov x15, x0
2663+
0x4: mov x0, x1
2664+
0x8: mov x1, x15
2665+
0xc: mov x16, #0
2666+
0x10: blr x16
26572667
");
2658-
assert_snapshot!(cb.hexdump(), @"ef0301aae10300aae0030faa100080d200023fd6");
2668+
assert_snapshot!(cb.hexdump(), @"ef0300aae00301aae1030faa100080d200023fd6");
26592669
}
26602670

26612671
#[test]
@@ -2671,17 +2681,17 @@ mod tests {
26712681
]);
26722682
asm.compile_with_num_regs(&mut cb, ALLOC_REGS.len());
26732683

2674-
assert_disasm_snapshot!(cb.disasm(), @r"
2675-
0x0: mov x15, x1
2676-
0x4: mov x1, x0
2677-
0x8: mov x0, x15
2678-
0xc: mov x15, x3
2679-
0x10: mov x3, x2
2680-
0x14: mov x2, x15
2681-
0x18: mov x16, #0
2682-
0x1c: blr x16
2684+
assert_disasm_snapshot!(cb.disasm(), @"
2685+
0x0: mov x15, x2
2686+
0x4: mov x2, x3
2687+
0x8: mov x3, x15
2688+
0xc: mov x15, x0
2689+
0x10: mov x0, x1
2690+
0x14: mov x1, x15
2691+
0x18: mov x16, #0
2692+
0x1c: blr x16
26832693
");
2684-
assert_snapshot!(cb.hexdump(), @"ef0301aae10300aae0030faaef0303aae30302aae2030faa100080d200023fd6");
2694+
assert_snapshot!(cb.hexdump(), @"ef0302aae20303aae3030faaef0300aae00301aae1030faa100080d200023fd6");
26852695
}
26862696

26872697
#[test]
@@ -2696,15 +2706,15 @@ mod tests {
26962706
]);
26972707
asm.compile_with_num_regs(&mut cb, ALLOC_REGS.len());
26982708

2699-
assert_disasm_snapshot!(cb.disasm(), @r"
2700-
0x0: mov x15, x1
2701-
0x4: mov x1, x2
2702-
0x8: mov x2, x0
2703-
0xc: mov x0, x15
2704-
0x10: mov x16, #0
2705-
0x14: blr x16
2709+
assert_disasm_snapshot!(cb.disasm(), @"
2710+
0x0: mov x15, x0
2711+
0x4: mov x0, x1
2712+
0x8: mov x1, x2
2713+
0xc: mov x2, x15
2714+
0x10: mov x16, #0
2715+
0x14: blr x16
27062716
");
2707-
assert_snapshot!(cb.hexdump(), @"ef0301aae10302aae20300aae0030faa100080d200023fd6");
2717+
assert_snapshot!(cb.hexdump(), @"ef0300aae00301aae10302aae2030faa100080d200023fd6");
27082718
}
27092719

27102720
#[test]

zjit/src/backend/lir.rs

Lines changed: 7 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1559,8 +1559,9 @@ impl Assembler
15591559
frame_setup_idxs.push(asm.insns.len());
15601560
}
15611561

1562-
let before_ccall = match &insn {
1563-
Insn::CCall { .. } if !pool.is_empty() => {
1562+
let before_ccall = match (&insn, iterator.peek().map(|(_, insn)| insn)) {
1563+
(Insn::ParallelMov { .. }, Some(Insn::CCall { .. })) |
1564+
(Insn::CCall { .. }, _) if !pool.is_empty() => {
15641565
// If C_RET_REG is in use, move it to another register.
15651566
// This must happen before last-use registers are deallocated.
15661567
if let Some(vreg_idx) = pool.vreg_for(&C_RET_REG) {
@@ -1611,25 +1612,6 @@ impl Assembler
16111612
if cfg!(target_arch = "x86_64") && saved_regs.len() % 2 == 1 {
16121613
asm.cpush(Opnd::Reg(saved_regs.last().unwrap().0));
16131614
}
1614-
if let Insn::ParallelMov { moves } = &insn {
1615-
if moves.len() > C_ARG_OPNDS.len() {
1616-
let difference = moves.len().saturating_sub(C_ARG_OPNDS.len());
1617-
1618-
#[cfg(target_arch = "x86_64")]
1619-
let offset = {
1620-
// double quadword alignment
1621-
((difference + 3) / 4) * 4
1622-
};
1623-
1624-
#[cfg(target_arch = "aarch64")]
1625-
let offset = {
1626-
// quadword alignment
1627-
if difference % 2 == 0 { difference } else { difference + 1 }
1628-
};
1629-
1630-
asm.sub_into(NATIVE_STACK_PTR, (offset * 8).into());
1631-
}
1632-
}
16331615
}
16341616

16351617
// Allocate a register for the output operand if it exists
@@ -1721,23 +1703,7 @@ impl Assembler
17211703
// Push instruction(s)
17221704
let is_ccall = matches!(insn, Insn::CCall { .. });
17231705
match insn {
1724-
Insn::CCall { opnds, fptr, start_marker, end_marker, out } => {
1725-
let mut moves: Vec<(Opnd, Opnd)> = vec![];
1726-
let num_reg_args = opnds.len().min(C_ARG_OPNDS.len());
1727-
let num_stack_args = opnds.len().saturating_sub(C_ARG_OPNDS.len());
1728-
1729-
if num_stack_args > 0 {
1730-
for (i, opnd) in opnds.iter().skip(num_reg_args).enumerate() {
1731-
moves.push((Opnd::mem(64, NATIVE_STACK_PTR, 8 * i as i32), *opnd));
1732-
}
1733-
}
1734-
1735-
if num_reg_args > 0 {
1736-
for (i, opnd) in opnds.iter().take(num_reg_args).enumerate() {
1737-
moves.push((C_ARG_OPNDS[i], *opnd));
1738-
}
1739-
}
1740-
1706+
Insn::ParallelMov { moves } => {
17411707
// For trampolines that use scratch registers, attempt to lower ParallelMov without scratch_reg.
17421708
if let Some(moves) = Self::resolve_parallel_moves(&moves, None) {
17431709
for (dst, src) in moves {
@@ -1747,12 +1713,13 @@ impl Assembler
17471713
// If it needs a scratch_reg, leave it to *_split_with_scratch_regs to handle it.
17481714
asm.push_insn(Insn::ParallelMov { moves });
17491715
}
1750-
1716+
}
1717+
Insn::CCall { opnds, fptr, start_marker, end_marker, out } => {
17511718
// Split start_marker and end_marker here to avoid inserting push/pop between them.
17521719
if let Some(start_marker) = start_marker {
17531720
asm.push_insn(Insn::PosMarker(start_marker));
17541721
}
1755-
asm.push_insn(Insn::CCall { opnds: vec![], fptr, start_marker: None, end_marker: None, out });
1722+
asm.push_insn(Insn::CCall { opnds, fptr, start_marker: None, end_marker: None, out });
17561723
if let Some(end_marker) = end_marker {
17571724
asm.push_insn(Insn::PosMarker(end_marker));
17581725
}

zjit/src/backend/x86_64/mod.rs

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,21 @@ impl Assembler {
351351
};
352352
asm.push_insn(insn);
353353
},
354-
Insn::CCall { .. } => {
354+
Insn::CCall { opnds, .. } => {
355+
assert!(opnds.len() <= C_ARG_OPNDS.len());
356+
357+
// Load each operand into the corresponding argument register.
358+
if !opnds.is_empty() {
359+
let mut args: Vec<(Opnd, Opnd)> = vec![];
360+
for (idx, opnd) in opnds.iter_mut().enumerate() {
361+
args.push((C_ARG_OPNDS[idx], *opnd));
362+
}
363+
asm.parallel_mov(args);
364+
}
365+
366+
// Now we push the CCall without any arguments so that it
367+
// just performs the call.
368+
*opnds = vec![];
355369
asm.push_insn(insn);
356370
},
357371
Insn::Lea { .. } => {

zjit/src/codegen.rs

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -398,12 +398,15 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio
398398
&Insn::Send { cd, blockiseq, state, reason, .. } => gen_send(jit, asm, cd, blockiseq, &function.frame_state(state), reason),
399399
&Insn::SendForward { cd, blockiseq, state, reason, .. } => gen_send_forward(jit, asm, cd, blockiseq, &function.frame_state(state), reason),
400400
&Insn::SendWithoutBlock { cd, state, reason, .. } => gen_send_without_block(jit, asm, cd, &function.frame_state(state), reason),
401-
// Give up SendWithoutBlockDirect for 6+ args since the callee doesn't know how to read arguments from the stack.
401+
// Give up SendWithoutBlockDirect for 6+ args since asm.ccall() doesn't support it.
402402
Insn::SendWithoutBlockDirect { cd, state, args, .. } if args.len() + 1 > C_ARG_OPNDS.len() => // +1 for self
403403
gen_send_without_block(jit, asm, *cd, &function.frame_state(*state), SendFallbackReason::TooManyArgsForLir),
404404
Insn::SendWithoutBlockDirect { cme, iseq, recv, args, state, .. } => gen_send_without_block_direct(cb, jit, asm, *cme, *iseq, opnd!(recv), opnds!(args), &function.frame_state(*state)),
405405
&Insn::InvokeSuper { cd, blockiseq, state, reason, .. } => gen_invokesuper(jit, asm, cd, blockiseq, &function.frame_state(state), reason),
406406
&Insn::InvokeBlock { cd, state, reason, .. } => gen_invokeblock(jit, asm, cd, &function.frame_state(state), reason),
407+
// Ensure we have enough room fit ec, self, and arguments
408+
// TODO remove this check when we have stack args (we can use Time.new to test it)
409+
Insn::InvokeBuiltin { bf, state, .. } if bf.argc + 2 > (C_ARG_OPNDS.len() as i32) => return Err(*state),
407410
Insn::InvokeBuiltin { bf, leaf, args, state, .. } => gen_invokebuiltin(jit, asm, &function.frame_state(*state), bf, *leaf, opnds!(args)),
408411
&Insn::EntryPoint { jit_entry_idx } => no_output!(gen_entry_point(jit, asm, jit_entry_idx)),
409412
Insn::Return { val } => no_output!(gen_return(asm, opnd!(val))),
@@ -450,6 +453,10 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio
450453
&Insn::GuardGreaterEq { left, right, state } => gen_guard_greater_eq(jit, asm, opnd!(left), opnd!(right), &function.frame_state(state)),
451454
Insn::PatchPoint { invariant, state } => no_output!(gen_patch_point(jit, asm, invariant, &function.frame_state(*state))),
452455
Insn::CCall { cfunc, recv, args, name, return_type: _, elidable: _ } => gen_ccall(asm, *cfunc, *name, opnd!(recv), opnds!(args)),
456+
// Give up CCallWithFrame for 7+ args since asm.ccall() supports at most 6 args (recv + args).
457+
// There's no test case for this because no core cfuncs have this many parameters. But C extensions could have such methods.
458+
Insn::CCallWithFrame { cd, state, args, .. } if args.len() + 1 > C_ARG_OPNDS.len() =>
459+
gen_send_without_block(jit, asm, *cd, &function.frame_state(*state), SendFallbackReason::CCallWithFrameTooManyArgs),
453460
Insn::CCallWithFrame { cfunc, recv, name, args, cme, state, blockiseq, .. } =>
454461
gen_ccall_with_frame(jit, asm, *cfunc, *name, opnd!(recv), opnds!(args), *cme, *blockiseq, &function.frame_state(*state)),
455462
Insn::CCallVariadic { cfunc, recv, name, args, cme, state, blockiseq, return_type: _, elidable: _ } => {
@@ -715,6 +722,10 @@ fn gen_fixnum_bit_check(asm: &mut Assembler, val: Opnd, index: u8) -> Opnd {
715722
}
716723

717724
fn gen_invokebuiltin(jit: &JITState, asm: &mut Assembler, state: &FrameState, bf: &rb_builtin_function, leaf: bool, args: Vec<Opnd>) -> lir::Opnd {
725+
assert!(bf.argc + 2 <= C_ARG_OPNDS.len() as i32,
726+
"gen_invokebuiltin should not be called for builtin function {} with too many arguments: {}",
727+
unsafe { std::ffi::CStr::from_ptr(bf.name).to_str().unwrap() },
728+
bf.argc);
718729
if leaf {
719730
gen_prepare_leaf_call_with_gc(asm, state);
720731
} else {

0 commit comments

Comments
 (0)