Skip to content

Commit 7fa3e1a

Browse files
k0kubuntekknolagi
andauthored
ZJIT: Write a callee frame on JIT-to-JIT calls (#13579)
Co-authored-by: Max Bernstein <tekknolagi@gmail.com>
1 parent ef9301a commit 7fa3e1a

File tree

7 files changed

+176
-55
lines changed

7 files changed

+176
-55
lines changed

test/ruby/test_zjit.rb

Lines changed: 46 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -102,12 +102,39 @@ def test(a) = 1 + a
102102
}, call_threshold: 2
103103
end
104104

105+
def test_opt_plus_type_guard_exit_with_locals
106+
assert_compiles '[6, 6.0]', %q{
107+
def test(a)
108+
local = 3
109+
1 + a + local
110+
end
111+
test(1) # profile opt_plus
112+
[test(2), test(2.0)]
113+
}, call_threshold: 2
114+
end
115+
105116
def test_opt_plus_type_guard_nested_exit
106-
omit 'rewind_caller_frames is not implemented yet'
107-
assert_compiles '[3, 3.0]', %q{
117+
assert_compiles '[4, 4.0]', %q{
108118
def side_exit(n) = 1 + n
109119
def jit_frame(n) = 1 + side_exit(n)
110120
def entry(n) = jit_frame(n)
121+
entry(2) # profile send
122+
[entry(2), entry(2.0)]
123+
}, call_threshold: 2
124+
end
125+
126+
def test_opt_plus_type_guard_nested_exit_with_locals
127+
assert_compiles '[9, 9.0]', %q{
128+
def side_exit(n)
129+
local = 2
130+
1 + n + local
131+
end
132+
def jit_frame(n)
133+
local = 3
134+
1 + side_exit(n) + local
135+
end
136+
def entry(n) = jit_frame(n)
137+
entry(2) # profile send
111138
[entry(2), entry(2.0)]
112139
}, call_threshold: 2
113140
end
@@ -130,7 +157,6 @@ def test(a, b) = a * b
130157
end
131158

132159
def test_opt_mult_overflow
133-
omit 'side exits are not implemented yet'
134160
assert_compiles '[6, -6, 9671406556917033397649408, -9671406556917033397649408, 21267647932558653966460912964485513216]', %q{
135161
def test(a, b)
136162
a * b
@@ -610,6 +636,22 @@ def test() = @foo = 1
610636
}
611637
end
612638

639+
def test_send_backtrace
640+
backtrace = [
641+
"-e:2:in 'Object#jit_frame1'",
642+
"-e:3:in 'Object#entry'",
643+
"-e:5:in 'block in <main>'",
644+
"-e:6:in '<main>'",
645+
]
646+
assert_compiles backtrace.inspect, %q{
647+
def jit_frame2 = caller # 1
648+
def jit_frame1 = jit_frame2 # 2
649+
def entry = jit_frame1 # 3
650+
entry # profile send # 4
651+
entry # 5
652+
}, call_threshold: 2
653+
end
654+
613655
# tool/ruby_vm/views/*.erb relies on the zjit instructions a) being contiguous and
614656
# b) being reliably ordered after all the other instructions.
615657
def test_instruction_order
@@ -631,11 +673,7 @@ def assert_compiles(expected, test_script, insns: [], **opts)
631673
pipe_fd = 3
632674

633675
script = <<~RUBY
634-
_test_proc = -> {
635-
RubyVM::ZJIT.assert_compiles
636-
#{test_script}
637-
}
638-
ret_val = _test_proc.call
676+
ret_val = (_test_proc = -> { RubyVM::ZJIT.assert_compiles; #{test_script.lstrip} }).call
639677
result = {
640678
ret_val:,
641679
#{ unless insns.empty?

zjit/src/backend/arm64/mod.rs

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -211,11 +211,6 @@ impl Assembler
211211
vec![X1_REG, X9_REG, X10_REG, X11_REG, X12_REG, X13_REG, X14_REG, X15_REG]
212212
}
213213

214-
/// Get the address that the current frame returns to
215-
pub fn return_addr_opnd() -> Opnd {
216-
Opnd::Reg(X30_REG)
217-
}
218-
219214
/// Split platform-specific instructions
220215
/// The transformations done here are meant to make our lives simpler in later
221216
/// stages of the compilation pipeline.

zjit/src/backend/lir.rs

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
use std::collections::HashMap;
22
use std::fmt;
33
use std::mem::take;
4-
use crate::cruby::{Qundef, RUBY_OFFSET_CFP_PC, RUBY_OFFSET_CFP_SP, SIZEOF_VALUE_I32, VM_ENV_DATA_SIZE};
5-
use crate::state::ZJITState;
4+
use crate::codegen::local_size_and_idx_to_ep_offset;
5+
use crate::cruby::{Qundef, RUBY_OFFSET_CFP_PC, RUBY_OFFSET_CFP_SP, SIZEOF_VALUE_I32};
66
use crate::{cruby::VALUE};
77
use crate::backend::current::*;
88
use crate::virtualmem::CodePtr;
@@ -1797,7 +1797,7 @@ impl Assembler
17971797
asm_comment!(self, "write locals: {locals:?}");
17981798
for (idx, &opnd) in locals.iter().enumerate() {
17991799
let opnd = split_store_source(self, opnd);
1800-
self.store(Opnd::mem(64, SP, (-(VM_ENV_DATA_SIZE as i32) - locals.len() as i32 + idx as i32) * SIZEOF_VALUE_I32), opnd);
1800+
self.store(Opnd::mem(64, SP, (-local_size_and_idx_to_ep_offset(locals.len(), idx) - 1) * SIZEOF_VALUE_I32), opnd);
18011801
}
18021802

18031803
asm_comment!(self, "save cfp->pc");
@@ -1809,10 +1809,6 @@ impl Assembler
18091809
let cfp_sp = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP);
18101810
self.store(cfp_sp, Opnd::Reg(Assembler::SCRATCH_REG));
18111811

1812-
asm_comment!(self, "rewind caller frames");
1813-
self.mov(C_ARG_OPNDS[0], Assembler::return_addr_opnd());
1814-
self.ccall(Self::rewind_caller_frames as *const u8, vec![]);
1815-
18161812
asm_comment!(self, "exit to the interpreter");
18171813
self.frame_teardown();
18181814
self.mov(C_RET_OPND, Opnd::UImm(Qundef.as_u64()));
@@ -1823,13 +1819,6 @@ impl Assembler
18231819
}
18241820
Some(())
18251821
}
1826-
1827-
#[unsafe(no_mangle)]
1828-
extern "C" fn rewind_caller_frames(addr: *const u8) {
1829-
if ZJITState::is_iseq_return_addr(addr) {
1830-
unimplemented!("Can't side-exit from JIT-JIT call: rewind_caller_frames is not implemented yet");
1831-
}
1832-
}
18331822
}
18341823

18351824
impl fmt::Debug for Assembler {

zjit/src/backend/x86_64/mod.rs

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -109,11 +109,6 @@ impl Assembler
109109
vec![RAX_REG, RCX_REG, RDX_REG, RSI_REG, RDI_REG, R8_REG, R9_REG, R10_REG, R11_REG]
110110
}
111111

112-
/// Get the address that the current frame returns to
113-
pub fn return_addr_opnd() -> Opnd {
114-
Opnd::mem(64, Opnd::Reg(RSP_REG), 0)
115-
}
116-
117112
// These are the callee-saved registers in the x86-64 SysV ABI
118113
// RBX, RSP, RBP, and R12–R15
119114

zjit/src/codegen.rs

Lines changed: 96 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio
258258
Insn::IfTrue { val, target } => return gen_if_true(jit, asm, opnd!(val), target),
259259
Insn::IfFalse { val, target } => return gen_if_false(jit, asm, opnd!(val), target),
260260
Insn::SendWithoutBlock { call_info, cd, state, self_val, args, .. } => gen_send_without_block(jit, asm, call_info, *cd, &function.frame_state(*state), self_val, args)?,
261-
Insn::SendWithoutBlockDirect { iseq, self_val, args, .. } => gen_send_without_block_direct(cb, jit, asm, *iseq, opnd!(self_val), args)?,
261+
Insn::SendWithoutBlockDirect { cme, iseq, self_val, args, state, .. } => gen_send_without_block_direct(cb, jit, asm, *cme, *iseq, opnd!(self_val), args, &function.frame_state(*state))?,
262262
Insn::Return { val } => return Some(gen_return(asm, opnd!(val))?),
263263
Insn::FixnumAdd { left, right, state } => gen_fixnum_add(jit, asm, opnd!(left), opnd!(right), &function.frame_state(*state))?,
264264
Insn::FixnumSub { left, right, state } => gen_fixnum_sub(jit, asm, opnd!(left), opnd!(right), &function.frame_state(*state))?,
@@ -484,8 +484,16 @@ fn gen_send_without_block(
484484
self_val: &InsnId,
485485
args: &Vec<InsnId>,
486486
) -> Option<lir::Opnd> {
487-
// Spill the receiver and the arguments onto the stack. They need to be marked by GC and may be caller-saved registers.
487+
// Spill locals onto the stack.
488+
// TODO: Don't spill locals eagerly; lazily reify frames
489+
asm_comment!(asm, "spill locals");
490+
for (idx, &insn_id) in state.locals().enumerate() {
491+
asm.mov(Opnd::mem(64, SP, (-local_idx_to_ep_offset(jit.iseq, idx) - 1) * SIZEOF_VALUE_I32), jit.get_opnd(insn_id)?);
492+
}
493+
// Spill the receiver and the arguments onto the stack.
494+
// They need to be on the interpreter stack to let the interpreter access them.
488495
// TODO: Avoid spilling operands that have been spilled before.
496+
asm_comment!(asm, "spill receiver and arguments");
489497
for (idx, &insn_id) in [*self_val].iter().chain(args.iter()).enumerate() {
490498
// Currently, we don't move the SP register. So it's equal to the base pointer.
491499
let stack_opnd = Opnd::mem(64, SP, idx as i32 * SIZEOF_VALUE_I32);
@@ -515,10 +523,40 @@ fn gen_send_without_block_direct(
515523
cb: &mut CodeBlock,
516524
jit: &mut JITState,
517525
asm: &mut Assembler,
526+
cme: *const rb_callable_method_entry_t,
518527
iseq: IseqPtr,
519528
recv: Opnd,
520529
args: &Vec<InsnId>,
530+
state: &FrameState,
521531
) -> Option<lir::Opnd> {
532+
// Save cfp->pc and cfp->sp for the caller frame
533+
gen_save_pc(asm, state);
534+
gen_save_sp(asm, state.stack().len() - args.len() - 1); // -1 for receiver
535+
536+
// Spill the virtual stack and the locals of the caller onto the stack
537+
// TODO: Lazily materialize caller frames on side exits or when needed
538+
asm_comment!(asm, "spill locals and stack");
539+
for (idx, &insn_id) in state.locals().enumerate() {
540+
asm.mov(Opnd::mem(64, SP, (-local_idx_to_ep_offset(jit.iseq, idx) - 1) * SIZEOF_VALUE_I32), jit.get_opnd(insn_id)?);
541+
}
542+
for (idx, &insn_id) in state.stack().enumerate() {
543+
asm.mov(Opnd::mem(64, SP, idx as i32 * SIZEOF_VALUE_I32), jit.get_opnd(insn_id)?);
544+
}
545+
546+
// Set up the new frame
547+
// TODO: Lazily materialize caller frames on side exits or when needed
548+
gen_push_frame(asm, args.len(), state, ControlFrame {
549+
recv,
550+
iseq,
551+
cme,
552+
frame_type: VM_FRAME_MAGIC_METHOD | VM_ENV_FLAG_LOCAL,
553+
});
554+
555+
asm_comment!(asm, "switch to new SP register");
556+
let local_size = unsafe { get_iseq_body_local_table_size(iseq) } as usize;
557+
let new_sp = asm.add(SP, ((state.stack().len() + local_size - args.len() + VM_ENV_DATA_SIZE as usize) * SIZEOF_VALUE).into());
558+
asm.mov(SP, new_sp);
559+
522560
asm_comment!(asm, "switch to new CFP");
523561
let new_cfp = asm.sub(CFP, RUBY_SIZEOF_CONTROL_FRAME.into());
524562
asm.mov(CFP, new_cfp);
@@ -537,7 +575,15 @@ fn gen_send_without_block_direct(
537575
jit.branch_iseqs.push((branch.clone(), iseq));
538576
// TODO(max): Add a PatchPoint here that can side-exit the function if the callee messed with
539577
// the frame's locals
540-
Some(asm.ccall_with_branch(dummy_ptr, c_args, &branch))
578+
let ret = asm.ccall_with_branch(dummy_ptr, c_args, &branch);
579+
580+
// If a callee side-exits, i.e. returns Qundef, propagate the return value to the caller.
581+
// The caller will side-exit the callee into the interpreter.
582+
// TODO: Let side exit code pop all JIT frames to optimize away this cmp + je.
583+
asm.cmp(ret, Qundef.into());
584+
asm.je(ZJITState::get_exit_trampoline().into());
585+
586+
Some(ret)
541587
}
542588

543589
/// Compile an array duplication instruction
@@ -749,6 +795,45 @@ fn gen_save_sp(asm: &mut Assembler, stack_size: usize) {
749795
asm.mov(cfp_sp, sp_addr);
750796
}
751797

798+
/// Frame metadata written by gen_push_frame()
799+
struct ControlFrame {
800+
recv: Opnd,
801+
iseq: IseqPtr,
802+
cme: *const rb_callable_method_entry_t,
803+
frame_type: u32,
804+
}
805+
806+
/// Compile an interpreter frame
807+
fn gen_push_frame(asm: &mut Assembler, argc: usize, state: &FrameState, frame: ControlFrame) {
808+
// Locals are written by the callee frame on side-exits or non-leaf calls
809+
810+
// See vm_push_frame() for details
811+
asm_comment!(asm, "push cme, specval, frame type");
812+
// ep[-2]: cref of cme
813+
let local_size = unsafe { get_iseq_body_local_table_size(frame.iseq) } as i32;
814+
let ep_offset = state.stack().len() as i32 + local_size - argc as i32 + VM_ENV_DATA_SIZE as i32 - 1;
815+
asm.store(Opnd::mem(64, SP, (ep_offset - 2) * SIZEOF_VALUE_I32), VALUE::from(frame.cme).into());
816+
// ep[-1]: block_handler or prev EP
817+
// block_handler is not supported for now
818+
asm.store(Opnd::mem(64, SP, (ep_offset - 1) * SIZEOF_VALUE_I32), VM_BLOCK_HANDLER_NONE.into());
819+
// ep[0]: ENV_FLAGS
820+
asm.store(Opnd::mem(64, SP, ep_offset * SIZEOF_VALUE_I32), frame.frame_type.into());
821+
822+
// Write to the callee CFP
823+
fn cfp_opnd(offset: i32) -> Opnd {
824+
Opnd::mem(64, CFP, offset - (RUBY_SIZEOF_CONTROL_FRAME as i32))
825+
}
826+
827+
asm_comment!(asm, "push callee control frame");
828+
// cfp_opnd(RUBY_OFFSET_CFP_PC): written by the callee frame on side-exits or non-leaf calls
829+
// cfp_opnd(RUBY_OFFSET_CFP_SP): written by the callee frame on side-exits or non-leaf calls
830+
asm.mov(cfp_opnd(RUBY_OFFSET_CFP_ISEQ), VALUE::from(frame.iseq).into());
831+
asm.mov(cfp_opnd(RUBY_OFFSET_CFP_SELF), frame.recv);
832+
let ep = asm.lea(Opnd::mem(64, SP, ep_offset * SIZEOF_VALUE_I32));
833+
asm.mov(cfp_opnd(RUBY_OFFSET_CFP_EP), ep);
834+
asm.mov(cfp_opnd(RUBY_OFFSET_CFP_BLOCK_CODE), 0.into());
835+
}
836+
752837
/// Return a register we use for the basic block argument at a given index
753838
fn param_reg(idx: usize) -> Reg {
754839
// To simplify the implementation, allocate a fixed register for each basic block argument for now.
@@ -764,10 +849,13 @@ fn param_reg(idx: usize) -> Reg {
764849

765850
/// Inverse of ep_offset_to_local_idx(). See ep_offset_to_local_idx() for details.
766851
fn local_idx_to_ep_offset(iseq: IseqPtr, local_idx: usize) -> i32 {
767-
let local_table_size: i32 = unsafe { get_iseq_body_local_table_size(iseq) }
768-
.try_into()
769-
.unwrap();
770-
local_table_size - local_idx as i32 - 1 + VM_ENV_DATA_SIZE as i32
852+
let local_size = unsafe { get_iseq_body_local_table_size(iseq) };
853+
local_size_and_idx_to_ep_offset(local_size as usize, local_idx)
854+
}
855+
856+
/// Convert the number of locals and a local index to an offset in the EP
857+
pub fn local_size_and_idx_to_ep_offset(local_size: usize, local_idx: usize) -> i32 {
858+
local_size as i32 - local_idx as i32 - 1 + VM_ENV_DATA_SIZE as i32
771859
}
772860

773861
/// Convert ISEQ into High-level IR
@@ -816,9 +904,8 @@ impl Assembler {
816904
move |code_ptr, _| {
817905
start_branch.start_addr.set(Some(code_ptr));
818906
},
819-
move |code_ptr, cb| {
907+
move |code_ptr, _| {
820908
end_branch.end_addr.set(Some(code_ptr));
821-
ZJITState::add_iseq_return_addr(code_ptr.raw_ptr(cb));
822909
},
823910
)
824911
}

zjit/src/hir.rs

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -426,7 +426,15 @@ pub enum Insn {
426426
/// Ignoring keyword arguments etc for now
427427
SendWithoutBlock { self_val: InsnId, call_info: CallInfo, cd: *const rb_call_data, args: Vec<InsnId>, state: InsnId },
428428
Send { self_val: InsnId, call_info: CallInfo, cd: *const rb_call_data, blockiseq: IseqPtr, args: Vec<InsnId>, state: InsnId },
429-
SendWithoutBlockDirect { self_val: InsnId, call_info: CallInfo, cd: *const rb_call_data, iseq: IseqPtr, args: Vec<InsnId>, state: InsnId },
429+
SendWithoutBlockDirect {
430+
self_val: InsnId,
431+
call_info: CallInfo,
432+
cd: *const rb_call_data,
433+
cme: *const rb_callable_method_entry_t,
434+
iseq: IseqPtr,
435+
args: Vec<InsnId>,
436+
state: InsnId,
437+
},
430438

431439
/// Control flow instructions
432440
Return { val: InsnId },
@@ -957,10 +965,11 @@ impl Function {
957965
args: args.iter().map(|arg| find!(*arg)).collect(),
958966
state: *state,
959967
},
960-
SendWithoutBlockDirect { self_val, call_info, cd, iseq, args, state } => SendWithoutBlockDirect {
968+
SendWithoutBlockDirect { self_val, call_info, cd, cme, iseq, args, state } => SendWithoutBlockDirect {
961969
self_val: find!(*self_val),
962970
call_info: call_info.clone(),
963971
cd: *cd,
972+
cme: *cme,
964973
iseq: *iseq,
965974
args: args.iter().map(|arg| find!(*arg)).collect(),
966975
state: *state,
@@ -1261,7 +1270,7 @@ impl Function {
12611270
if let Some(expected) = guard_equal_to {
12621271
self_val = self.push_insn(block, Insn::GuardBitEquals { val: self_val, expected, state });
12631272
}
1264-
let send_direct = self.push_insn(block, Insn::SendWithoutBlockDirect { self_val, call_info, cd, iseq, args, state });
1273+
let send_direct = self.push_insn(block, Insn::SendWithoutBlockDirect { self_val, call_info, cd, cme, iseq, args, state });
12651274
self.make_equal_to(insn_id, send_direct);
12661275
}
12671276
Insn::GetConstantPath { ic } => {

0 commit comments

Comments
 (0)